| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9969650986342944, |
| "eval_steps": 500, |
| "global_step": 878, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002276176024279211, |
| "grad_norm": 5.864941596984863, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 1.982, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004552352048558422, |
| "grad_norm": 6.175215244293213, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 2.0217, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006828528072837633, |
| "grad_norm": 6.1325860023498535, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 2.0283, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.009104704097116844, |
| "grad_norm": 6.438838481903076, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 2.0133, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.011380880121396054, |
| "grad_norm": 6.120014190673828, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.9788, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.013657056145675266, |
| "grad_norm": 6.399510860443115, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 2.0115, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.015933232169954476, |
| "grad_norm": 6.267389297485352, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 2.034, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.018209408194233688, |
| "grad_norm": 6.195969581604004, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 2.0221, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0204855842185129, |
| "grad_norm": 6.281792163848877, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 2.034, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02276176024279211, |
| "grad_norm": 6.259925365447998, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.9919, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02503793626707132, |
| "grad_norm": 6.189306259155273, |
| "learning_rate": 5.5e-07, |
| "loss": 1.9989, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.027314112291350532, |
| "grad_norm": 6.382223606109619, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 2.0004, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02959028831562974, |
| "grad_norm": 6.581198215484619, |
| "learning_rate": 6.5e-07, |
| "loss": 1.9606, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03186646433990895, |
| "grad_norm": 6.698477268218994, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 1.9986, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03414264036418816, |
| "grad_norm": 6.462113857269287, |
| "learning_rate": 7.5e-07, |
| "loss": 1.9435, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.036418816388467376, |
| "grad_norm": 6.667123794555664, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.9262, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.038694992412746584, |
| "grad_norm": 6.812009334564209, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 1.9341, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0409711684370258, |
| "grad_norm": 6.460822582244873, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.8857, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04324734446130501, |
| "grad_norm": 5.623890399932861, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.8256, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04552352048558422, |
| "grad_norm": 4.976780414581299, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.8312, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04779969650986343, |
| "grad_norm": 4.3025383949279785, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.8263, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05007587253414264, |
| "grad_norm": 3.7881436347961426, |
| "learning_rate": 1.1e-06, |
| "loss": 1.7652, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.05235204855842185, |
| "grad_norm": 3.4925425052642822, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.7603, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.054628224582701064, |
| "grad_norm": 3.0760865211486816, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.7599, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05690440060698027, |
| "grad_norm": 2.7170724868774414, |
| "learning_rate": 1.25e-06, |
| "loss": 1.7725, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05918057663125948, |
| "grad_norm": 2.0981554985046387, |
| "learning_rate": 1.3e-06, |
| "loss": 1.6781, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.061456752655538696, |
| "grad_norm": 1.9057221412658691, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 1.6897, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0637329286798179, |
| "grad_norm": 1.678957223892212, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.7124, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06600910470409711, |
| "grad_norm": 1.594223141670227, |
| "learning_rate": 1.45e-06, |
| "loss": 1.6953, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06828528072837632, |
| "grad_norm": 1.5038321018218994, |
| "learning_rate": 1.5e-06, |
| "loss": 1.6392, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07056145675265554, |
| "grad_norm": 1.5202770233154297, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 1.6756, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.07283763277693475, |
| "grad_norm": 1.4849720001220703, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.6587, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07511380880121396, |
| "grad_norm": 1.4973641633987427, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 1.6222, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07738998482549317, |
| "grad_norm": 1.4055628776550293, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 1.6318, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07966616084977238, |
| "grad_norm": 1.365734338760376, |
| "learning_rate": 1.75e-06, |
| "loss": 1.5656, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0819423368740516, |
| "grad_norm": 1.2574050426483154, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 1.602, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.08421851289833081, |
| "grad_norm": 1.2459263801574707, |
| "learning_rate": 1.85e-06, |
| "loss": 1.571, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08649468892261002, |
| "grad_norm": 1.1563637256622314, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 1.5968, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08877086494688922, |
| "grad_norm": 1.0916545391082764, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 1.5493, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.09104704097116843, |
| "grad_norm": 1.0802186727523804, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.529, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09332321699544764, |
| "grad_norm": 1.0635664463043213, |
| "learning_rate": 2.05e-06, |
| "loss": 1.4784, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.09559939301972686, |
| "grad_norm": 0.985824465751648, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 1.5508, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09787556904400607, |
| "grad_norm": 1.036191701889038, |
| "learning_rate": 2.15e-06, |
| "loss": 1.5465, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.10015174506828528, |
| "grad_norm": 1.0564978122711182, |
| "learning_rate": 2.2e-06, |
| "loss": 1.503, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.10242792109256449, |
| "grad_norm": 1.1553199291229248, |
| "learning_rate": 2.25e-06, |
| "loss": 1.4578, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1047040971168437, |
| "grad_norm": 1.1265777349472046, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 1.4497, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10698027314112292, |
| "grad_norm": 0.9469030499458313, |
| "learning_rate": 2.35e-06, |
| "loss": 1.4676, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.10925644916540213, |
| "grad_norm": 0.649141252040863, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 1.455, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.11153262518968134, |
| "grad_norm": 0.6022727489471436, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 1.4814, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.11380880121396054, |
| "grad_norm": 0.7700338363647461, |
| "learning_rate": 2.5e-06, |
| "loss": 1.4786, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11608497723823975, |
| "grad_norm": 0.924614429473877, |
| "learning_rate": 2.55e-06, |
| "loss": 1.4338, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.11836115326251896, |
| "grad_norm": 0.8892627954483032, |
| "learning_rate": 2.6e-06, |
| "loss": 1.441, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.12063732928679818, |
| "grad_norm": 0.7454217076301575, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 1.4016, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.12291350531107739, |
| "grad_norm": 0.5784000754356384, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 1.4222, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1251896813353566, |
| "grad_norm": 0.5783917903900146, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 1.4087, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1274658573596358, |
| "grad_norm": 0.5947427153587341, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 1.4008, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.12974203338391502, |
| "grad_norm": 0.6172689199447632, |
| "learning_rate": 2.85e-06, |
| "loss": 1.4292, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.13201820940819423, |
| "grad_norm": 0.6890118718147278, |
| "learning_rate": 2.9e-06, |
| "loss": 1.4215, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.13429438543247343, |
| "grad_norm": 0.5748654007911682, |
| "learning_rate": 2.95e-06, |
| "loss": 1.4402, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.13657056145675264, |
| "grad_norm": 0.5015429258346558, |
| "learning_rate": 3e-06, |
| "loss": 1.4338, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13884673748103188, |
| "grad_norm": 0.4844941794872284, |
| "learning_rate": 3.05e-06, |
| "loss": 1.3846, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.1411229135053111, |
| "grad_norm": 0.48353612422943115, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 1.3864, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1433990895295903, |
| "grad_norm": 0.47880005836486816, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 1.3764, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1456752655538695, |
| "grad_norm": 0.5600204467773438, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 1.398, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1479514415781487, |
| "grad_norm": 0.4868157207965851, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 1.3959, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.15022761760242792, |
| "grad_norm": 0.4253179430961609, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 1.3695, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.15250379362670713, |
| "grad_norm": 0.4152253270149231, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 1.428, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.15477996965098634, |
| "grad_norm": 0.43653807044029236, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 1.4244, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.15705614567526555, |
| "grad_norm": 0.4184909164905548, |
| "learning_rate": 3.45e-06, |
| "loss": 1.413, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.15933232169954475, |
| "grad_norm": 0.4401929974555969, |
| "learning_rate": 3.5e-06, |
| "loss": 1.3769, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16160849772382396, |
| "grad_norm": 0.42470934987068176, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 1.328, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1638846737481032, |
| "grad_norm": 0.43167445063591003, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 1.3585, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1661608497723824, |
| "grad_norm": 0.39305731654167175, |
| "learning_rate": 3.65e-06, |
| "loss": 1.3635, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.16843702579666162, |
| "grad_norm": 0.3937039077281952, |
| "learning_rate": 3.7e-06, |
| "loss": 1.3583, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.17071320182094082, |
| "grad_norm": 0.4098603129386902, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 1.3651, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.17298937784522003, |
| "grad_norm": 0.41061389446258545, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 1.4184, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.17526555386949924, |
| "grad_norm": 0.3926120698451996, |
| "learning_rate": 3.85e-06, |
| "loss": 1.3693, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.17754172989377845, |
| "grad_norm": 0.41317838430404663, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 1.3354, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.17981790591805766, |
| "grad_norm": 0.37922877073287964, |
| "learning_rate": 3.95e-06, |
| "loss": 1.364, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.18209408194233687, |
| "grad_norm": 0.3894996643066406, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.3495, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18437025796661607, |
| "grad_norm": 0.4024641513824463, |
| "learning_rate": 4.05e-06, |
| "loss": 1.3604, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.18664643399089528, |
| "grad_norm": 0.38427308201789856, |
| "learning_rate": 4.1e-06, |
| "loss": 1.3734, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.18892261001517452, |
| "grad_norm": 0.38881292939186096, |
| "learning_rate": 4.15e-06, |
| "loss": 1.3235, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.19119878603945373, |
| "grad_norm": 0.4112228453159332, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 1.3714, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.19347496206373294, |
| "grad_norm": 0.3790343999862671, |
| "learning_rate": 4.25e-06, |
| "loss": 1.3508, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.19575113808801214, |
| "grad_norm": 0.38511818647384644, |
| "learning_rate": 4.3e-06, |
| "loss": 1.3726, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.19802731411229135, |
| "grad_norm": 0.3809172213077545, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 1.3978, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.20030349013657056, |
| "grad_norm": 0.39862319827079773, |
| "learning_rate": 4.4e-06, |
| "loss": 1.3402, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.20257966616084977, |
| "grad_norm": 0.3779354989528656, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 1.3585, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.20485584218512898, |
| "grad_norm": 0.3755280375480652, |
| "learning_rate": 4.5e-06, |
| "loss": 1.3809, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2071320182094082, |
| "grad_norm": 0.4072270691394806, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 1.337, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2094081942336874, |
| "grad_norm": 0.3852587938308716, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 1.3239, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2116843702579666, |
| "grad_norm": 0.3857567012310028, |
| "learning_rate": 4.65e-06, |
| "loss": 1.3676, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.21396054628224584, |
| "grad_norm": 0.39954471588134766, |
| "learning_rate": 4.7e-06, |
| "loss": 1.372, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.21623672230652505, |
| "grad_norm": 0.3801283836364746, |
| "learning_rate": 4.75e-06, |
| "loss": 1.3636, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.21851289833080426, |
| "grad_norm": 0.37748953700065613, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 1.3298, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.22078907435508346, |
| "grad_norm": 0.3678078055381775, |
| "learning_rate": 4.85e-06, |
| "loss": 1.3267, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.22306525037936267, |
| "grad_norm": 0.3928042948246002, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 1.3705, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.22534142640364188, |
| "grad_norm": 0.3824443817138672, |
| "learning_rate": 4.95e-06, |
| "loss": 1.3536, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2276176024279211, |
| "grad_norm": 0.38775718212127686, |
| "learning_rate": 5e-06, |
| "loss": 1.3366, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2298937784522003, |
| "grad_norm": 0.39415422081947327, |
| "learning_rate": 4.999998078694254e-06, |
| "loss": 1.3369, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2321699544764795, |
| "grad_norm": 0.3640560507774353, |
| "learning_rate": 4.999992314779968e-06, |
| "loss": 1.3548, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.23444613050075871, |
| "grad_norm": 0.38077881932258606, |
| "learning_rate": 4.999982708266002e-06, |
| "loss": 1.322, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.23672230652503792, |
| "grad_norm": 0.3910675346851349, |
| "learning_rate": 4.999969259167121e-06, |
| "loss": 1.3568, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.23899848254931716, |
| "grad_norm": 0.3724777102470398, |
| "learning_rate": 4.999951967503998e-06, |
| "loss": 1.3657, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.24127465857359637, |
| "grad_norm": 0.39835065603256226, |
| "learning_rate": 4.9999308333032095e-06, |
| "loss": 1.3728, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.24355083459787558, |
| "grad_norm": 0.3887874186038971, |
| "learning_rate": 4.999905856597241e-06, |
| "loss": 1.3269, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.24582701062215478, |
| "grad_norm": 0.37291401624679565, |
| "learning_rate": 4.999877037424482e-06, |
| "loss": 1.3522, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.248103186646434, |
| "grad_norm": 0.3793584406375885, |
| "learning_rate": 4.999844375829229e-06, |
| "loss": 1.3459, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.2503793626707132, |
| "grad_norm": 0.38437148928642273, |
| "learning_rate": 4.999807871861686e-06, |
| "loss": 1.3419, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2526555386949924, |
| "grad_norm": 0.37772583961486816, |
| "learning_rate": 4.999767525577958e-06, |
| "loss": 1.3349, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2549317147192716, |
| "grad_norm": 0.3829944133758545, |
| "learning_rate": 4.999723337040062e-06, |
| "loss": 1.3193, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.2572078907435508, |
| "grad_norm": 0.38355737924575806, |
| "learning_rate": 4.999675306315917e-06, |
| "loss": 1.3457, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.25948406676783003, |
| "grad_norm": 0.39071688055992126, |
| "learning_rate": 4.999623433479346e-06, |
| "loss": 1.3401, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.26176024279210924, |
| "grad_norm": 0.3796067535877228, |
| "learning_rate": 4.9995677186100835e-06, |
| "loss": 1.3593, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.26403641881638845, |
| "grad_norm": 0.3870932459831238, |
| "learning_rate": 4.9995081617937635e-06, |
| "loss": 1.3678, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.26631259484066766, |
| "grad_norm": 0.3870759606361389, |
| "learning_rate": 4.999444763121928e-06, |
| "loss": 1.331, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.26858877086494687, |
| "grad_norm": 0.37003180384635925, |
| "learning_rate": 4.999377522692023e-06, |
| "loss": 1.3242, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2708649468892261, |
| "grad_norm": 0.3826284408569336, |
| "learning_rate": 4.999306440607401e-06, |
| "loss": 1.2921, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2731411229135053, |
| "grad_norm": 0.3886045515537262, |
| "learning_rate": 4.999231516977318e-06, |
| "loss": 1.2971, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.27541729893778455, |
| "grad_norm": 0.3992857336997986, |
| "learning_rate": 4.999152751916936e-06, |
| "loss": 1.2872, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.27769347496206376, |
| "grad_norm": 0.4303230941295624, |
| "learning_rate": 4.999070145547318e-06, |
| "loss": 1.3562, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.27996965098634297, |
| "grad_norm": 0.40188783407211304, |
| "learning_rate": 4.998983697995435e-06, |
| "loss": 1.3251, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2822458270106222, |
| "grad_norm": 0.41683951020240784, |
| "learning_rate": 4.998893409394162e-06, |
| "loss": 1.3279, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2845220030349014, |
| "grad_norm": 0.4539605379104614, |
| "learning_rate": 4.9987992798822745e-06, |
| "loss": 1.3133, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2867981790591806, |
| "grad_norm": 0.40195104479789734, |
| "learning_rate": 4.998701309604454e-06, |
| "loss": 1.3372, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2890743550834598, |
| "grad_norm": 0.40602678060531616, |
| "learning_rate": 4.998599498711287e-06, |
| "loss": 1.3008, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.291350531107739, |
| "grad_norm": 0.37955862283706665, |
| "learning_rate": 4.99849384735926e-06, |
| "loss": 1.2919, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2936267071320182, |
| "grad_norm": 0.38034912943840027, |
| "learning_rate": 4.9983843557107635e-06, |
| "loss": 1.3307, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.2959028831562974, |
| "grad_norm": 0.3922058641910553, |
| "learning_rate": 4.9982710239340915e-06, |
| "loss": 1.3211, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.29817905918057663, |
| "grad_norm": 0.4012414515018463, |
| "learning_rate": 4.998153852203441e-06, |
| "loss": 1.3762, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.30045523520485584, |
| "grad_norm": 0.41045159101486206, |
| "learning_rate": 4.998032840698909e-06, |
| "loss": 1.3384, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.30273141122913505, |
| "grad_norm": 0.3880952298641205, |
| "learning_rate": 4.997907989606495e-06, |
| "loss": 1.2976, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.30500758725341426, |
| "grad_norm": 0.39358070492744446, |
| "learning_rate": 4.997779299118102e-06, |
| "loss": 1.3036, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.30728376327769347, |
| "grad_norm": 0.400647908449173, |
| "learning_rate": 4.997646769431532e-06, |
| "loss": 1.3573, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.3095599393019727, |
| "grad_norm": 0.40589869022369385, |
| "learning_rate": 4.99751040075049e-06, |
| "loss": 1.3462, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3118361153262519, |
| "grad_norm": 0.420673131942749, |
| "learning_rate": 4.997370193284581e-06, |
| "loss": 1.317, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.3141122913505311, |
| "grad_norm": 0.3844830393791199, |
| "learning_rate": 4.997226147249309e-06, |
| "loss": 1.3437, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3163884673748103, |
| "grad_norm": 0.37681150436401367, |
| "learning_rate": 4.9970782628660794e-06, |
| "loss": 1.3216, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.3186646433990895, |
| "grad_norm": 0.40281322598457336, |
| "learning_rate": 4.996926540362198e-06, |
| "loss": 1.3578, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3209408194233687, |
| "grad_norm": 0.3950099050998688, |
| "learning_rate": 4.9967709799708675e-06, |
| "loss": 1.3472, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3232169954476479, |
| "grad_norm": 0.3890508711338043, |
| "learning_rate": 4.9966115819311926e-06, |
| "loss": 1.3112, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.3254931714719272, |
| "grad_norm": 0.3960939347743988, |
| "learning_rate": 4.996448346488175e-06, |
| "loss": 1.331, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3277693474962064, |
| "grad_norm": 0.394761323928833, |
| "learning_rate": 4.9962812738927135e-06, |
| "loss": 1.3265, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3300455235204856, |
| "grad_norm": 0.4139835238456726, |
| "learning_rate": 4.996110364401607e-06, |
| "loss": 1.3423, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3323216995447648, |
| "grad_norm": 0.40223428606987, |
| "learning_rate": 4.9959356182775525e-06, |
| "loss": 1.3213, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.334597875569044, |
| "grad_norm": 0.41239285469055176, |
| "learning_rate": 4.9957570357891406e-06, |
| "loss": 1.3488, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.33687405159332323, |
| "grad_norm": 0.41569817066192627, |
| "learning_rate": 4.995574617210861e-06, |
| "loss": 1.3373, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.33915022761760244, |
| "grad_norm": 0.40224048495292664, |
| "learning_rate": 4.9953883628231e-06, |
| "loss": 1.3086, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.34142640364188165, |
| "grad_norm": 0.4080573618412018, |
| "learning_rate": 4.995198272912137e-06, |
| "loss": 1.3221, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.34370257966616086, |
| "grad_norm": 0.41279059648513794, |
| "learning_rate": 4.9950043477701505e-06, |
| "loss": 1.3336, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.34597875569044007, |
| "grad_norm": 0.4138430655002594, |
| "learning_rate": 4.994806587695212e-06, |
| "loss": 1.3245, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3482549317147193, |
| "grad_norm": 0.4141685664653778, |
| "learning_rate": 4.994604992991287e-06, |
| "loss": 1.3459, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.3505311077389985, |
| "grad_norm": 0.4655224680900574, |
| "learning_rate": 4.994399563968235e-06, |
| "loss": 1.307, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3528072837632777, |
| "grad_norm": 0.40181776881217957, |
| "learning_rate": 4.99419030094181e-06, |
| "loss": 1.2951, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3550834597875569, |
| "grad_norm": 0.4349536597728729, |
| "learning_rate": 4.99397720423366e-06, |
| "loss": 1.3346, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3573596358118361, |
| "grad_norm": 0.47389090061187744, |
| "learning_rate": 4.993760274171322e-06, |
| "loss": 1.2918, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.3596358118361153, |
| "grad_norm": 0.43464231491088867, |
| "learning_rate": 4.993539511088228e-06, |
| "loss": 1.3469, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3619119878603945, |
| "grad_norm": 0.43050721287727356, |
| "learning_rate": 4.993314915323701e-06, |
| "loss": 1.2993, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.36418816388467373, |
| "grad_norm": 0.4154967665672302, |
| "learning_rate": 4.9930864872229555e-06, |
| "loss": 1.301, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.36646433990895294, |
| "grad_norm": 0.4043583869934082, |
| "learning_rate": 4.992854227137094e-06, |
| "loss": 1.3357, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.36874051593323215, |
| "grad_norm": 0.4242326617240906, |
| "learning_rate": 4.992618135423111e-06, |
| "loss": 1.3139, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.37101669195751136, |
| "grad_norm": 0.4029645621776581, |
| "learning_rate": 4.992378212443891e-06, |
| "loss": 1.2773, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.37329286798179057, |
| "grad_norm": 0.3948841989040375, |
| "learning_rate": 4.992134458568205e-06, |
| "loss": 1.3267, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.37556904400606983, |
| "grad_norm": 0.4325512647628784, |
| "learning_rate": 4.991886874170715e-06, |
| "loss": 1.2986, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.37784522003034904, |
| "grad_norm": 0.4292261600494385, |
| "learning_rate": 4.991635459631968e-06, |
| "loss": 1.3383, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.38012139605462825, |
| "grad_norm": 0.407819539308548, |
| "learning_rate": 4.991380215338399e-06, |
| "loss": 1.2798, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.38239757207890746, |
| "grad_norm": 0.41592007875442505, |
| "learning_rate": 4.991121141682332e-06, |
| "loss": 1.3161, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.38467374810318666, |
| "grad_norm": 0.4135512411594391, |
| "learning_rate": 4.990858239061973e-06, |
| "loss": 1.3221, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.38694992412746587, |
| "grad_norm": 0.4168025851249695, |
| "learning_rate": 4.990591507881416e-06, |
| "loss": 1.3094, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3892261001517451, |
| "grad_norm": 0.42845603823661804, |
| "learning_rate": 4.990320948550638e-06, |
| "loss": 1.3086, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.3915022761760243, |
| "grad_norm": 0.4117361009120941, |
| "learning_rate": 4.9900465614855e-06, |
| "loss": 1.3074, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3937784522003035, |
| "grad_norm": 0.40385058522224426, |
| "learning_rate": 4.989768347107749e-06, |
| "loss": 1.3015, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3960546282245827, |
| "grad_norm": 0.42507070302963257, |
| "learning_rate": 4.989486305845012e-06, |
| "loss": 1.303, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3983308042488619, |
| "grad_norm": 0.4167408347129822, |
| "learning_rate": 4.989200438130799e-06, |
| "loss": 1.3246, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4006069802731411, |
| "grad_norm": 0.4459727108478546, |
| "learning_rate": 4.988910744404501e-06, |
| "loss": 1.3082, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.40288315629742033, |
| "grad_norm": 0.41572514176368713, |
| "learning_rate": 4.988617225111392e-06, |
| "loss": 1.329, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.40515933232169954, |
| "grad_norm": 0.40346917510032654, |
| "learning_rate": 4.988319880702621e-06, |
| "loss": 1.3204, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.40743550834597875, |
| "grad_norm": 0.49305301904678345, |
| "learning_rate": 4.988018711635223e-06, |
| "loss": 1.3174, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.40971168437025796, |
| "grad_norm": 0.4136899411678314, |
| "learning_rate": 4.987713718372106e-06, |
| "loss": 1.3153, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.41198786039453716, |
| "grad_norm": 0.4320002794265747, |
| "learning_rate": 4.98740490138206e-06, |
| "loss": 1.3233, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.4142640364188164, |
| "grad_norm": 0.40051817893981934, |
| "learning_rate": 4.9870922611397484e-06, |
| "loss": 1.3298, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.4165402124430956, |
| "grad_norm": 0.43490317463874817, |
| "learning_rate": 4.986775798125715e-06, |
| "loss": 1.2924, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.4188163884673748, |
| "grad_norm": 0.41733044385910034, |
| "learning_rate": 4.986455512826377e-06, |
| "loss": 1.3407, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.421092564491654, |
| "grad_norm": 0.45686185359954834, |
| "learning_rate": 4.986131405734027e-06, |
| "loss": 1.3002, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.4233687405159332, |
| "grad_norm": 0.4178033173084259, |
| "learning_rate": 4.985803477346832e-06, |
| "loss": 1.2707, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.42564491654021247, |
| "grad_norm": 0.44030341506004333, |
| "learning_rate": 4.985471728168832e-06, |
| "loss": 1.3522, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.4279210925644917, |
| "grad_norm": 0.4167434573173523, |
| "learning_rate": 4.985136158709942e-06, |
| "loss": 1.2952, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.4301972685887709, |
| "grad_norm": 0.43799030780792236, |
| "learning_rate": 4.984796769485946e-06, |
| "loss": 1.3204, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.4324734446130501, |
| "grad_norm": 0.3963024914264679, |
| "learning_rate": 4.984453561018501e-06, |
| "loss": 1.2852, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4347496206373293, |
| "grad_norm": 0.4606306850910187, |
| "learning_rate": 4.984106533835132e-06, |
| "loss": 1.3, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.4370257966616085, |
| "grad_norm": 0.43703702092170715, |
| "learning_rate": 4.9837556884692374e-06, |
| "loss": 1.2865, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.4393019726858877, |
| "grad_norm": 0.419226735830307, |
| "learning_rate": 4.9834010254600814e-06, |
| "loss": 1.3212, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.44157814871016693, |
| "grad_norm": 0.4051378071308136, |
| "learning_rate": 4.983042545352796e-06, |
| "loss": 1.3102, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.44385432473444614, |
| "grad_norm": 0.44308584928512573, |
| "learning_rate": 4.982680248698383e-06, |
| "loss": 1.2753, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.44613050075872535, |
| "grad_norm": 0.48592913150787354, |
| "learning_rate": 4.982314136053707e-06, |
| "loss": 1.3468, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.44840667678300455, |
| "grad_norm": 0.4361239969730377, |
| "learning_rate": 4.981944207981499e-06, |
| "loss": 1.2345, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.45068285280728376, |
| "grad_norm": 0.4420235753059387, |
| "learning_rate": 4.981570465050357e-06, |
| "loss": 1.308, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.45295902883156297, |
| "grad_norm": 0.4724012315273285, |
| "learning_rate": 4.98119290783474e-06, |
| "loss": 1.3451, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.4552352048558422, |
| "grad_norm": 0.4347815215587616, |
| "learning_rate": 4.980811536914968e-06, |
| "loss": 1.2926, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4575113808801214, |
| "grad_norm": 0.4243141710758209, |
| "learning_rate": 4.980426352877228e-06, |
| "loss": 1.2863, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.4597875569044006, |
| "grad_norm": 0.41129249334335327, |
| "learning_rate": 4.980037356313563e-06, |
| "loss": 1.3017, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4620637329286798, |
| "grad_norm": 0.4349686801433563, |
| "learning_rate": 4.979644547821879e-06, |
| "loss": 1.3655, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.464339908952959, |
| "grad_norm": 0.438151478767395, |
| "learning_rate": 4.97924792800594e-06, |
| "loss": 1.304, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4666160849772382, |
| "grad_norm": 0.46755126118659973, |
| "learning_rate": 4.978847497475369e-06, |
| "loss": 1.3282, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.46889226100151743, |
| "grad_norm": 0.42544615268707275, |
| "learning_rate": 4.9784432568456445e-06, |
| "loss": 1.3524, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.47116843702579664, |
| "grad_norm": 0.4163425862789154, |
| "learning_rate": 4.9780352067381024e-06, |
| "loss": 1.3303, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.47344461305007585, |
| "grad_norm": 0.4662051498889923, |
| "learning_rate": 4.977623347779935e-06, |
| "loss": 1.2723, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4757207890743551, |
| "grad_norm": 0.4841192662715912, |
| "learning_rate": 4.977207680604187e-06, |
| "loss": 1.3281, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.4779969650986343, |
| "grad_norm": 0.47023245692253113, |
| "learning_rate": 4.976788205849758e-06, |
| "loss": 1.2983, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4802731411229135, |
| "grad_norm": 0.4251156449317932, |
| "learning_rate": 4.9763649241613985e-06, |
| "loss": 1.3215, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.48254931714719274, |
| "grad_norm": 0.436788409948349, |
| "learning_rate": 4.975937836189712e-06, |
| "loss": 1.3006, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.48482549317147194, |
| "grad_norm": 0.46025222539901733, |
| "learning_rate": 4.975506942591152e-06, |
| "loss": 1.3121, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.48710166919575115, |
| "grad_norm": 0.43663930892944336, |
| "learning_rate": 4.97507224402802e-06, |
| "loss": 1.3133, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.48937784522003036, |
| "grad_norm": 0.48787179589271545, |
| "learning_rate": 4.974633741168469e-06, |
| "loss": 1.266, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.49165402124430957, |
| "grad_norm": 0.4265913665294647, |
| "learning_rate": 4.974191434686496e-06, |
| "loss": 1.3035, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.4939301972685888, |
| "grad_norm": 0.4345017373561859, |
| "learning_rate": 4.973745325261946e-06, |
| "loss": 1.2987, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.496206373292868, |
| "grad_norm": 0.47078996896743774, |
| "learning_rate": 4.973295413580509e-06, |
| "loss": 1.3176, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4984825493171472, |
| "grad_norm": 0.4349548816680908, |
| "learning_rate": 4.97284170033372e-06, |
| "loss": 1.2829, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5007587253414264, |
| "grad_norm": 0.4705260694026947, |
| "learning_rate": 4.9723841862189555e-06, |
| "loss": 1.2847, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5030349013657056, |
| "grad_norm": 0.4285137951374054, |
| "learning_rate": 4.971922871939436e-06, |
| "loss": 1.2774, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5053110773899848, |
| "grad_norm": 0.46022048592567444, |
| "learning_rate": 4.971457758204221e-06, |
| "loss": 1.3006, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.507587253414264, |
| "grad_norm": 0.4904478192329407, |
| "learning_rate": 4.970988845728213e-06, |
| "loss": 1.3032, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5098634294385432, |
| "grad_norm": 0.4171503484249115, |
| "learning_rate": 4.9705161352321496e-06, |
| "loss": 1.3118, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5121396054628224, |
| "grad_norm": 0.4424084722995758, |
| "learning_rate": 4.970039627442608e-06, |
| "loss": 1.2342, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5144157814871017, |
| "grad_norm": 0.45744988322257996, |
| "learning_rate": 4.969559323092004e-06, |
| "loss": 1.2975, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5166919575113809, |
| "grad_norm": 0.4306228756904602, |
| "learning_rate": 4.969075222918583e-06, |
| "loss": 1.2791, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5189681335356601, |
| "grad_norm": 0.43930479884147644, |
| "learning_rate": 4.9685873276664324e-06, |
| "loss": 1.2952, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.5212443095599393, |
| "grad_norm": 0.4268686771392822, |
| "learning_rate": 4.968095638085467e-06, |
| "loss": 1.2902, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5235204855842185, |
| "grad_norm": 0.4320680499076843, |
| "learning_rate": 4.9676001549314356e-06, |
| "loss": 1.2941, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5257966616084977, |
| "grad_norm": 0.4509009122848511, |
| "learning_rate": 4.967100878965918e-06, |
| "loss": 1.3353, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5280728376327769, |
| "grad_norm": 0.4458315670490265, |
| "learning_rate": 4.966597810956325e-06, |
| "loss": 1.2918, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5303490136570561, |
| "grad_norm": 0.4613376259803772, |
| "learning_rate": 4.966090951675893e-06, |
| "loss": 1.3085, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5326251896813353, |
| "grad_norm": 0.4486188590526581, |
| "learning_rate": 4.9655803019036875e-06, |
| "loss": 1.2783, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.5349013657056145, |
| "grad_norm": 0.44070056080818176, |
| "learning_rate": 4.9650658624246e-06, |
| "loss": 1.2969, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5371775417298937, |
| "grad_norm": 0.45442667603492737, |
| "learning_rate": 4.9645476340293474e-06, |
| "loss": 1.273, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.539453717754173, |
| "grad_norm": 0.4485810697078705, |
| "learning_rate": 4.96402561751447e-06, |
| "loss": 1.2524, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.5417298937784522, |
| "grad_norm": 0.43408727645874023, |
| "learning_rate": 4.96349981368233e-06, |
| "loss": 1.3, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5440060698027314, |
| "grad_norm": 0.45317673683166504, |
| "learning_rate": 4.962970223341112e-06, |
| "loss": 1.2959, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.5462822458270106, |
| "grad_norm": 0.45147350430488586, |
| "learning_rate": 4.962436847304818e-06, |
| "loss": 1.2588, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5485584218512898, |
| "grad_norm": 0.4372202157974243, |
| "learning_rate": 4.961899686393273e-06, |
| "loss": 1.2472, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5508345978755691, |
| "grad_norm": 0.4300381541252136, |
| "learning_rate": 4.961358741432116e-06, |
| "loss": 1.2892, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.5531107738998483, |
| "grad_norm": 0.4326576888561249, |
| "learning_rate": 4.9608140132528045e-06, |
| "loss": 1.2873, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5553869499241275, |
| "grad_norm": 0.42891374230384827, |
| "learning_rate": 4.960265502692609e-06, |
| "loss": 1.3159, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5576631259484067, |
| "grad_norm": 0.44637322425842285, |
| "learning_rate": 4.959713210594616e-06, |
| "loss": 1.2964, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.5599393019726859, |
| "grad_norm": 0.4534567892551422, |
| "learning_rate": 4.959157137807721e-06, |
| "loss": 1.2811, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5622154779969651, |
| "grad_norm": 0.4480896294116974, |
| "learning_rate": 4.958597285186635e-06, |
| "loss": 1.2887, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5644916540212443, |
| "grad_norm": 0.42966964840888977, |
| "learning_rate": 4.958033653591874e-06, |
| "loss": 1.2927, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5667678300455236, |
| "grad_norm": 0.4520474076271057, |
| "learning_rate": 4.9574662438897675e-06, |
| "loss": 1.334, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5690440060698028, |
| "grad_norm": 0.4476149082183838, |
| "learning_rate": 4.956895056952448e-06, |
| "loss": 1.2813, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.571320182094082, |
| "grad_norm": 0.4495325982570648, |
| "learning_rate": 4.956320093657855e-06, |
| "loss": 1.3455, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5735963581183612, |
| "grad_norm": 0.4634062945842743, |
| "learning_rate": 4.955741354889734e-06, |
| "loss": 1.3009, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5758725341426404, |
| "grad_norm": 0.43844589591026306, |
| "learning_rate": 4.955158841537632e-06, |
| "loss": 1.2775, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5781487101669196, |
| "grad_norm": 0.4297947585582733, |
| "learning_rate": 4.954572554496897e-06, |
| "loss": 1.3005, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5804248861911988, |
| "grad_norm": 0.45026981830596924, |
| "learning_rate": 4.953982494668679e-06, |
| "loss": 1.2829, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.582701062215478, |
| "grad_norm": 0.4508177936077118, |
| "learning_rate": 4.953388662959926e-06, |
| "loss": 1.3249, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5849772382397572, |
| "grad_norm": 0.4628501236438751, |
| "learning_rate": 4.952791060283384e-06, |
| "loss": 1.2772, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5872534142640364, |
| "grad_norm": 0.47145721316337585, |
| "learning_rate": 4.952189687557595e-06, |
| "loss": 1.2843, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5895295902883156, |
| "grad_norm": 0.44380298256874084, |
| "learning_rate": 4.951584545706896e-06, |
| "loss": 1.3169, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5918057663125948, |
| "grad_norm": 0.45627689361572266, |
| "learning_rate": 4.950975635661416e-06, |
| "loss": 1.2855, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5940819423368741, |
| "grad_norm": 0.43097957968711853, |
| "learning_rate": 4.950362958357078e-06, |
| "loss": 1.2802, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5963581183611533, |
| "grad_norm": 0.4480797052383423, |
| "learning_rate": 4.949746514735594e-06, |
| "loss": 1.2845, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5986342943854325, |
| "grad_norm": 0.4356028139591217, |
| "learning_rate": 4.949126305744466e-06, |
| "loss": 1.2559, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.6009104704097117, |
| "grad_norm": 0.45533114671707153, |
| "learning_rate": 4.948502332336982e-06, |
| "loss": 1.333, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.6031866464339909, |
| "grad_norm": 0.43486839532852173, |
| "learning_rate": 4.947874595472216e-06, |
| "loss": 1.299, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6054628224582701, |
| "grad_norm": 0.45472636818885803, |
| "learning_rate": 4.947243096115028e-06, |
| "loss": 1.2853, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.6077389984825493, |
| "grad_norm": 0.448030024766922, |
| "learning_rate": 4.946607835236064e-06, |
| "loss": 1.2549, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.6100151745068285, |
| "grad_norm": 0.46248579025268555, |
| "learning_rate": 4.945968813811743e-06, |
| "loss": 1.2845, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.6122913505311077, |
| "grad_norm": 0.47284016013145447, |
| "learning_rate": 4.9453260328242735e-06, |
| "loss": 1.274, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.6145675265553869, |
| "grad_norm": 0.46916916966438293, |
| "learning_rate": 4.944679493261637e-06, |
| "loss": 1.272, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6168437025796661, |
| "grad_norm": 0.4469199776649475, |
| "learning_rate": 4.944029196117594e-06, |
| "loss": 1.273, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.6191198786039454, |
| "grad_norm": 0.4460132420063019, |
| "learning_rate": 4.943375142391679e-06, |
| "loss": 1.2749, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6213960546282246, |
| "grad_norm": 0.45281344652175903, |
| "learning_rate": 4.942717333089204e-06, |
| "loss": 1.2858, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.6236722306525038, |
| "grad_norm": 0.4766104221343994, |
| "learning_rate": 4.942055769221249e-06, |
| "loss": 1.3047, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.625948406676783, |
| "grad_norm": 0.4342869818210602, |
| "learning_rate": 4.941390451804668e-06, |
| "loss": 1.258, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.6282245827010622, |
| "grad_norm": 0.44943931698799133, |
| "learning_rate": 4.940721381862083e-06, |
| "loss": 1.2714, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.6305007587253414, |
| "grad_norm": 0.4642450213432312, |
| "learning_rate": 4.940048560421887e-06, |
| "loss": 1.2883, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.6327769347496206, |
| "grad_norm": 0.530925989151001, |
| "learning_rate": 4.9393719885182335e-06, |
| "loss": 1.2869, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.6350531107738998, |
| "grad_norm": 0.44706323742866516, |
| "learning_rate": 4.938691667191044e-06, |
| "loss": 1.2912, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.637329286798179, |
| "grad_norm": 0.46952497959136963, |
| "learning_rate": 4.938007597486005e-06, |
| "loss": 1.3293, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6396054628224582, |
| "grad_norm": 0.45387259125709534, |
| "learning_rate": 4.937319780454559e-06, |
| "loss": 1.2328, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.6418816388467374, |
| "grad_norm": 0.4683968126773834, |
| "learning_rate": 4.936628217153914e-06, |
| "loss": 1.3101, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6441578148710166, |
| "grad_norm": 0.4984208941459656, |
| "learning_rate": 4.935932908647033e-06, |
| "loss": 1.3078, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6464339908952959, |
| "grad_norm": 0.47393515706062317, |
| "learning_rate": 4.935233856002635e-06, |
| "loss": 1.2667, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.6487101669195751, |
| "grad_norm": 0.4559146761894226, |
| "learning_rate": 4.9345310602951964e-06, |
| "loss": 1.2816, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.6509863429438544, |
| "grad_norm": 0.4612574279308319, |
| "learning_rate": 4.933824522604945e-06, |
| "loss": 1.3009, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.6532625189681336, |
| "grad_norm": 0.4839983880519867, |
| "learning_rate": 4.933114244017861e-06, |
| "loss": 1.2762, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6555386949924128, |
| "grad_norm": 0.47950032353401184, |
| "learning_rate": 4.932400225625674e-06, |
| "loss": 1.2639, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.657814871016692, |
| "grad_norm": 0.46797841787338257, |
| "learning_rate": 4.931682468525863e-06, |
| "loss": 1.3116, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6600910470409712, |
| "grad_norm": 0.46507689356803894, |
| "learning_rate": 4.93096097382165e-06, |
| "loss": 1.2795, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6623672230652504, |
| "grad_norm": 0.4672064781188965, |
| "learning_rate": 4.9302357426220086e-06, |
| "loss": 1.2769, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.6646433990895296, |
| "grad_norm": 0.469881147146225, |
| "learning_rate": 4.929506776041648e-06, |
| "loss": 1.246, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6669195751138088, |
| "grad_norm": 0.49012723565101624, |
| "learning_rate": 4.928774075201024e-06, |
| "loss": 1.3308, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.669195751138088, |
| "grad_norm": 0.47186344861984253, |
| "learning_rate": 4.9280376412263295e-06, |
| "loss": 1.2685, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.6714719271623673, |
| "grad_norm": 0.4914249777793884, |
| "learning_rate": 4.9272974752494974e-06, |
| "loss": 1.3029, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6737481031866465, |
| "grad_norm": 0.4709179699420929, |
| "learning_rate": 4.9265535784081965e-06, |
| "loss": 1.2459, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6760242792109257, |
| "grad_norm": 0.46568986773490906, |
| "learning_rate": 4.925805951845826e-06, |
| "loss": 1.2713, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6783004552352049, |
| "grad_norm": 0.46113038063049316, |
| "learning_rate": 4.925054596711526e-06, |
| "loss": 1.2787, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6805766312594841, |
| "grad_norm": 0.49636346101760864, |
| "learning_rate": 4.92429951416016e-06, |
| "loss": 1.2787, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6828528072837633, |
| "grad_norm": 0.4823263883590698, |
| "learning_rate": 4.9235407053523235e-06, |
| "loss": 1.3029, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6851289833080425, |
| "grad_norm": 0.45272234082221985, |
| "learning_rate": 4.92277817145434e-06, |
| "loss": 1.3053, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.6874051593323217, |
| "grad_norm": 0.4724232256412506, |
| "learning_rate": 4.922011913638258e-06, |
| "loss": 1.2594, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6896813353566009, |
| "grad_norm": 0.5244677066802979, |
| "learning_rate": 4.92124193308185e-06, |
| "loss": 1.305, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6919575113808801, |
| "grad_norm": 0.4562852382659912, |
| "learning_rate": 4.92046823096861e-06, |
| "loss": 1.283, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6942336874051593, |
| "grad_norm": 0.460565447807312, |
| "learning_rate": 4.919690808487754e-06, |
| "loss": 1.3004, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6965098634294385, |
| "grad_norm": 0.4588528871536255, |
| "learning_rate": 4.918909666834214e-06, |
| "loss": 1.2745, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6987860394537178, |
| "grad_norm": 0.4980691075325012, |
| "learning_rate": 4.91812480720864e-06, |
| "loss": 1.2802, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.701062215477997, |
| "grad_norm": 0.5080570578575134, |
| "learning_rate": 4.917336230817396e-06, |
| "loss": 1.286, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.7033383915022762, |
| "grad_norm": 0.46659743785858154, |
| "learning_rate": 4.9165439388725585e-06, |
| "loss": 1.3093, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.7056145675265554, |
| "grad_norm": 0.4846821129322052, |
| "learning_rate": 4.915747932591916e-06, |
| "loss": 1.2904, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7078907435508346, |
| "grad_norm": 0.4945422112941742, |
| "learning_rate": 4.914948213198966e-06, |
| "loss": 1.2592, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.7101669195751138, |
| "grad_norm": 0.49606069922447205, |
| "learning_rate": 4.9141447819229125e-06, |
| "loss": 1.2699, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.712443095599393, |
| "grad_norm": 0.48810863494873047, |
| "learning_rate": 4.913337639998666e-06, |
| "loss": 1.2993, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.7147192716236722, |
| "grad_norm": 0.4933323562145233, |
| "learning_rate": 4.912526788666838e-06, |
| "loss": 1.2514, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.7169954476479514, |
| "grad_norm": 0.4674908220767975, |
| "learning_rate": 4.911712229173745e-06, |
| "loss": 1.2602, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.7192716236722306, |
| "grad_norm": 0.5178641676902771, |
| "learning_rate": 4.9108939627714e-06, |
| "loss": 1.312, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.7215477996965098, |
| "grad_norm": 0.4949224293231964, |
| "learning_rate": 4.910071990717516e-06, |
| "loss": 1.2787, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.723823975720789, |
| "grad_norm": 0.4700353443622589, |
| "learning_rate": 4.909246314275499e-06, |
| "loss": 1.251, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.7261001517450683, |
| "grad_norm": 0.4828815758228302, |
| "learning_rate": 4.908416934714452e-06, |
| "loss": 1.2967, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.7283763277693475, |
| "grad_norm": 0.47781631350517273, |
| "learning_rate": 4.907583853309168e-06, |
| "loss": 1.3108, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7306525037936267, |
| "grad_norm": 0.4467979073524475, |
| "learning_rate": 4.90674707134013e-06, |
| "loss": 1.2332, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.7329286798179059, |
| "grad_norm": 0.4529818892478943, |
| "learning_rate": 4.90590659009351e-06, |
| "loss": 1.2958, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.7352048558421851, |
| "grad_norm": 0.4782491624355316, |
| "learning_rate": 4.905062410861164e-06, |
| "loss": 1.2754, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.7374810318664643, |
| "grad_norm": 0.4517338275909424, |
| "learning_rate": 4.9042145349406335e-06, |
| "loss": 1.3098, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.7397572078907435, |
| "grad_norm": 0.4599636197090149, |
| "learning_rate": 4.903362963635142e-06, |
| "loss": 1.2843, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.7420333839150227, |
| "grad_norm": 0.4922712743282318, |
| "learning_rate": 4.902507698253593e-06, |
| "loss": 1.2987, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.7443095599393019, |
| "grad_norm": 0.47610870003700256, |
| "learning_rate": 4.901648740110566e-06, |
| "loss": 1.2739, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.7465857359635811, |
| "grad_norm": 0.46494367718696594, |
| "learning_rate": 4.900786090526319e-06, |
| "loss": 1.2579, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.7488619119878603, |
| "grad_norm": 0.46867313981056213, |
| "learning_rate": 4.899919750826784e-06, |
| "loss": 1.2838, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.7511380880121397, |
| "grad_norm": 0.49616602063179016, |
| "learning_rate": 4.899049722343561e-06, |
| "loss": 1.3108, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7534142640364189, |
| "grad_norm": 0.46307483315467834, |
| "learning_rate": 4.898176006413925e-06, |
| "loss": 1.3047, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.7556904400606981, |
| "grad_norm": 0.47475141286849976, |
| "learning_rate": 4.897298604380816e-06, |
| "loss": 1.2416, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.7579666160849773, |
| "grad_norm": 0.468184232711792, |
| "learning_rate": 4.896417517592838e-06, |
| "loss": 1.2904, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.7602427921092565, |
| "grad_norm": 0.47171875834465027, |
| "learning_rate": 4.895532747404263e-06, |
| "loss": 1.2641, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7625189681335357, |
| "grad_norm": 0.45646342635154724, |
| "learning_rate": 4.8946442951750215e-06, |
| "loss": 1.285, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.7647951441578149, |
| "grad_norm": 0.48363035917282104, |
| "learning_rate": 4.893752162270704e-06, |
| "loss": 1.2507, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.7670713201820941, |
| "grad_norm": 0.4761241674423218, |
| "learning_rate": 4.892856350062558e-06, |
| "loss": 1.2628, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.7693474962063733, |
| "grad_norm": 0.47408172488212585, |
| "learning_rate": 4.891956859927489e-06, |
| "loss": 1.2919, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.7716236722306525, |
| "grad_norm": 0.48075783252716064, |
| "learning_rate": 4.89105369324805e-06, |
| "loss": 1.282, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.7738998482549317, |
| "grad_norm": 0.45937585830688477, |
| "learning_rate": 4.890146851412452e-06, |
| "loss": 1.2823, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.776176024279211, |
| "grad_norm": 0.5253570675849915, |
| "learning_rate": 4.889236335814549e-06, |
| "loss": 1.2657, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.7784522003034902, |
| "grad_norm": 0.47888922691345215, |
| "learning_rate": 4.888322147853846e-06, |
| "loss": 1.3003, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.7807283763277694, |
| "grad_norm": 0.4705219566822052, |
| "learning_rate": 4.887404288935488e-06, |
| "loss": 1.2822, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.7830045523520486, |
| "grad_norm": 0.5236004590988159, |
| "learning_rate": 4.8864827604702675e-06, |
| "loss": 1.2338, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7852807283763278, |
| "grad_norm": 0.4856922924518585, |
| "learning_rate": 4.885557563874614e-06, |
| "loss": 1.2394, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.787556904400607, |
| "grad_norm": 0.48127493262290955, |
| "learning_rate": 4.884628700570595e-06, |
| "loss": 1.2827, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.7898330804248862, |
| "grad_norm": 0.46932077407836914, |
| "learning_rate": 4.883696171985917e-06, |
| "loss": 1.2608, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7921092564491654, |
| "grad_norm": 0.5052128434181213, |
| "learning_rate": 4.882759979553916e-06, |
| "loss": 1.2727, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7943854324734446, |
| "grad_norm": 0.5077352523803711, |
| "learning_rate": 4.881820124713562e-06, |
| "loss": 1.2364, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.7966616084977238, |
| "grad_norm": 0.5095151662826538, |
| "learning_rate": 4.880876608909454e-06, |
| "loss": 1.2788, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.798937784522003, |
| "grad_norm": 0.4920441806316376, |
| "learning_rate": 4.8799294335918185e-06, |
| "loss": 1.2944, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.8012139605462822, |
| "grad_norm": 0.4824545085430145, |
| "learning_rate": 4.8789786002165055e-06, |
| "loss": 1.2669, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.8034901365705615, |
| "grad_norm": 0.49492961168289185, |
| "learning_rate": 4.878024110244988e-06, |
| "loss": 1.3021, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.8057663125948407, |
| "grad_norm": 0.5213160514831543, |
| "learning_rate": 4.877065965144361e-06, |
| "loss": 1.2832, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.8080424886191199, |
| "grad_norm": 0.4782240390777588, |
| "learning_rate": 4.8761041663873345e-06, |
| "loss": 1.2812, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.8103186646433991, |
| "grad_norm": 0.4901832938194275, |
| "learning_rate": 4.875138715452237e-06, |
| "loss": 1.289, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.8125948406676783, |
| "grad_norm": 0.48875507712364197, |
| "learning_rate": 4.87416961382301e-06, |
| "loss": 1.2876, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.8148710166919575, |
| "grad_norm": 0.49773871898651123, |
| "learning_rate": 4.873196862989205e-06, |
| "loss": 1.2766, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.8171471927162367, |
| "grad_norm": 0.5069698691368103, |
| "learning_rate": 4.872220464445983e-06, |
| "loss": 1.284, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.8194233687405159, |
| "grad_norm": 0.4725041389465332, |
| "learning_rate": 4.871240419694115e-06, |
| "loss": 1.2183, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8216995447647951, |
| "grad_norm": 0.4846250116825104, |
| "learning_rate": 4.8702567302399705e-06, |
| "loss": 1.2851, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.8239757207890743, |
| "grad_norm": 0.4825296998023987, |
| "learning_rate": 4.869269397595525e-06, |
| "loss": 1.2621, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.8262518968133535, |
| "grad_norm": 0.4880293905735016, |
| "learning_rate": 4.8682784232783535e-06, |
| "loss": 1.2684, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.8285280728376327, |
| "grad_norm": 0.4805878698825836, |
| "learning_rate": 4.867283808811626e-06, |
| "loss": 1.2604, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.830804248861912, |
| "grad_norm": 0.5031499266624451, |
| "learning_rate": 4.86628555572411e-06, |
| "loss": 1.2701, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.8330804248861912, |
| "grad_norm": 0.49856945872306824, |
| "learning_rate": 4.865283665550167e-06, |
| "loss": 1.266, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.8353566009104704, |
| "grad_norm": 0.49834373593330383, |
| "learning_rate": 4.864278139829745e-06, |
| "loss": 1.254, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.8376327769347496, |
| "grad_norm": 0.47436273097991943, |
| "learning_rate": 4.863268980108381e-06, |
| "loss": 1.308, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.8399089529590288, |
| "grad_norm": 0.4866158962249756, |
| "learning_rate": 4.8622561879372e-06, |
| "loss": 1.2565, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.842185128983308, |
| "grad_norm": 0.46591049432754517, |
| "learning_rate": 4.861239764872909e-06, |
| "loss": 1.2528, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8444613050075872, |
| "grad_norm": 0.5084807872772217, |
| "learning_rate": 4.860219712477795e-06, |
| "loss": 1.2727, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.8467374810318664, |
| "grad_norm": 0.49390751123428345, |
| "learning_rate": 4.859196032319724e-06, |
| "loss": 1.2544, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.8490136570561456, |
| "grad_norm": 0.4931376576423645, |
| "learning_rate": 4.8581687259721375e-06, |
| "loss": 1.2728, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.8512898330804249, |
| "grad_norm": 0.4991268813610077, |
| "learning_rate": 4.857137795014051e-06, |
| "loss": 1.2382, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.8535660091047041, |
| "grad_norm": 0.48629266023635864, |
| "learning_rate": 4.856103241030054e-06, |
| "loss": 1.2464, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.8558421851289834, |
| "grad_norm": 0.4945109188556671, |
| "learning_rate": 4.855065065610298e-06, |
| "loss": 1.2592, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.8581183611532626, |
| "grad_norm": 0.4683839678764343, |
| "learning_rate": 4.8540232703505085e-06, |
| "loss": 1.2795, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.8603945371775418, |
| "grad_norm": 0.4917154610157013, |
| "learning_rate": 4.8529778568519695e-06, |
| "loss": 1.297, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.862670713201821, |
| "grad_norm": 0.4950079917907715, |
| "learning_rate": 4.851928826721528e-06, |
| "loss": 1.2424, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.8649468892261002, |
| "grad_norm": 0.49165982007980347, |
| "learning_rate": 4.850876181571592e-06, |
| "loss": 1.2442, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8672230652503794, |
| "grad_norm": 0.47863882780075073, |
| "learning_rate": 4.849819923020121e-06, |
| "loss": 1.2946, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.8694992412746586, |
| "grad_norm": 0.5066231489181519, |
| "learning_rate": 4.848760052690635e-06, |
| "loss": 1.2658, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8717754172989378, |
| "grad_norm": 0.46788156032562256, |
| "learning_rate": 4.847696572212199e-06, |
| "loss": 1.2787, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.874051593323217, |
| "grad_norm": 0.5010194182395935, |
| "learning_rate": 4.846629483219431e-06, |
| "loss": 1.2645, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.8763277693474962, |
| "grad_norm": 0.480258584022522, |
| "learning_rate": 4.845558787352495e-06, |
| "loss": 1.2535, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.8786039453717754, |
| "grad_norm": 0.5160472393035889, |
| "learning_rate": 4.844484486257097e-06, |
| "loss": 1.2838, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.8808801213960546, |
| "grad_norm": 0.5098587870597839, |
| "learning_rate": 4.843406581584487e-06, |
| "loss": 1.2834, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.8831562974203339, |
| "grad_norm": 0.5033400058746338, |
| "learning_rate": 4.8423250749914515e-06, |
| "loss": 1.2959, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8854324734446131, |
| "grad_norm": 0.506367564201355, |
| "learning_rate": 4.841239968140316e-06, |
| "loss": 1.2757, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.8877086494688923, |
| "grad_norm": 0.47980019450187683, |
| "learning_rate": 4.8401512626989354e-06, |
| "loss": 1.2683, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8899848254931715, |
| "grad_norm": 0.48923107981681824, |
| "learning_rate": 4.8390589603407005e-06, |
| "loss": 1.2325, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.8922610015174507, |
| "grad_norm": 0.4891837537288666, |
| "learning_rate": 4.8379630627445286e-06, |
| "loss": 1.2508, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.8945371775417299, |
| "grad_norm": 0.4819527566432953, |
| "learning_rate": 4.836863571594863e-06, |
| "loss": 1.2655, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.8968133535660091, |
| "grad_norm": 0.5067424178123474, |
| "learning_rate": 4.83576048858167e-06, |
| "loss": 1.2477, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.8990895295902883, |
| "grad_norm": 0.5201086401939392, |
| "learning_rate": 4.8346538154004386e-06, |
| "loss": 1.249, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.9013657056145675, |
| "grad_norm": 0.5033949017524719, |
| "learning_rate": 4.833543553752173e-06, |
| "loss": 1.2882, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.9036418816388467, |
| "grad_norm": 0.4921282231807709, |
| "learning_rate": 4.8324297053433975e-06, |
| "loss": 1.2355, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.9059180576631259, |
| "grad_norm": 0.49898359179496765, |
| "learning_rate": 4.831312271886145e-06, |
| "loss": 1.24, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.9081942336874052, |
| "grad_norm": 0.4932885468006134, |
| "learning_rate": 4.83019125509796e-06, |
| "loss": 1.2651, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.9104704097116844, |
| "grad_norm": 0.5081654191017151, |
| "learning_rate": 4.829066656701897e-06, |
| "loss": 1.2846, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9127465857359636, |
| "grad_norm": 0.4848720133304596, |
| "learning_rate": 4.8279384784265124e-06, |
| "loss": 1.2834, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.9150227617602428, |
| "grad_norm": 0.47641217708587646, |
| "learning_rate": 4.826806722005868e-06, |
| "loss": 1.2556, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.917298937784522, |
| "grad_norm": 0.5004164576530457, |
| "learning_rate": 4.825671389179522e-06, |
| "loss": 1.2852, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.9195751138088012, |
| "grad_norm": 0.5069151520729065, |
| "learning_rate": 4.824532481692533e-06, |
| "loss": 1.2468, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.9218512898330804, |
| "grad_norm": 0.5043609738349915, |
| "learning_rate": 4.823390001295453e-06, |
| "loss": 1.2602, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.9241274658573596, |
| "grad_norm": 0.47922301292419434, |
| "learning_rate": 4.822243949744324e-06, |
| "loss": 1.2909, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.9264036418816388, |
| "grad_norm": 0.5012561082839966, |
| "learning_rate": 4.821094328800678e-06, |
| "loss": 1.3058, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.928679817905918, |
| "grad_norm": 0.5232773423194885, |
| "learning_rate": 4.8199411402315356e-06, |
| "loss": 1.2689, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.9309559939301972, |
| "grad_norm": 0.5023229718208313, |
| "learning_rate": 4.8187843858093975e-06, |
| "loss": 1.2623, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.9332321699544764, |
| "grad_norm": 0.5061272382736206, |
| "learning_rate": 4.817624067312247e-06, |
| "loss": 1.2771, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9355083459787557, |
| "grad_norm": 0.47715064883232117, |
| "learning_rate": 4.816460186523547e-06, |
| "loss": 1.266, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.9377845220030349, |
| "grad_norm": 0.5037026405334473, |
| "learning_rate": 4.815292745232233e-06, |
| "loss": 1.2812, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.9400606980273141, |
| "grad_norm": 0.47421544790267944, |
| "learning_rate": 4.814121745232714e-06, |
| "loss": 1.2349, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.9423368740515933, |
| "grad_norm": 0.5214923620223999, |
| "learning_rate": 4.812947188324868e-06, |
| "loss": 1.2986, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.9446130500758725, |
| "grad_norm": 0.5169025659561157, |
| "learning_rate": 4.811769076314044e-06, |
| "loss": 1.2687, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.9468892261001517, |
| "grad_norm": 0.5028119087219238, |
| "learning_rate": 4.8105874110110516e-06, |
| "loss": 1.2666, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.9491654021244309, |
| "grad_norm": 0.5233621597290039, |
| "learning_rate": 4.809402194232163e-06, |
| "loss": 1.2817, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.9514415781487102, |
| "grad_norm": 0.5662165880203247, |
| "learning_rate": 4.808213427799108e-06, |
| "loss": 1.212, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.9537177541729894, |
| "grad_norm": 0.5214280486106873, |
| "learning_rate": 4.807021113539077e-06, |
| "loss": 1.2659, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.9559939301972686, |
| "grad_norm": 0.5059605240821838, |
| "learning_rate": 4.805825253284706e-06, |
| "loss": 1.2417, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9582701062215478, |
| "grad_norm": 0.48347723484039307, |
| "learning_rate": 4.804625848874088e-06, |
| "loss": 1.279, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.960546282245827, |
| "grad_norm": 0.5225522518157959, |
| "learning_rate": 4.803422902150762e-06, |
| "loss": 1.2555, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.9628224582701063, |
| "grad_norm": 0.49709466099739075, |
| "learning_rate": 4.802216414963708e-06, |
| "loss": 1.2956, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.9650986342943855, |
| "grad_norm": 0.500357985496521, |
| "learning_rate": 4.801006389167352e-06, |
| "loss": 1.2748, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.9673748103186647, |
| "grad_norm": 0.504552960395813, |
| "learning_rate": 4.799792826621559e-06, |
| "loss": 1.2939, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.9696509863429439, |
| "grad_norm": 0.4881986379623413, |
| "learning_rate": 4.7985757291916264e-06, |
| "loss": 1.2827, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.9719271623672231, |
| "grad_norm": 0.517511785030365, |
| "learning_rate": 4.797355098748289e-06, |
| "loss": 1.2668, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.9742033383915023, |
| "grad_norm": 0.49534812569618225, |
| "learning_rate": 4.796130937167709e-06, |
| "loss": 1.2878, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9764795144157815, |
| "grad_norm": 0.4725462794303894, |
| "learning_rate": 4.794903246331477e-06, |
| "loss": 1.2612, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.9787556904400607, |
| "grad_norm": 0.49760913848876953, |
| "learning_rate": 4.79367202812661e-06, |
| "loss": 1.284, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9810318664643399, |
| "grad_norm": 0.5361410975456238, |
| "learning_rate": 4.792437284445545e-06, |
| "loss": 1.2517, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.9833080424886191, |
| "grad_norm": 0.5160269141197205, |
| "learning_rate": 4.791199017186137e-06, |
| "loss": 1.2422, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9855842185128983, |
| "grad_norm": 0.5418286919593811, |
| "learning_rate": 4.7899572282516596e-06, |
| "loss": 1.2697, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.9878603945371776, |
| "grad_norm": 0.5236756801605225, |
| "learning_rate": 4.788711919550796e-06, |
| "loss": 1.2546, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.9901365705614568, |
| "grad_norm": 0.4919045567512512, |
| "learning_rate": 4.787463092997643e-06, |
| "loss": 1.2478, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.992412746585736, |
| "grad_norm": 0.4918051064014435, |
| "learning_rate": 4.786210750511701e-06, |
| "loss": 1.2522, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.9946889226100152, |
| "grad_norm": 0.5032536387443542, |
| "learning_rate": 4.784954894017878e-06, |
| "loss": 1.2924, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.9969650986342944, |
| "grad_norm": 0.5253746509552002, |
| "learning_rate": 4.78369552544648e-06, |
| "loss": 1.258, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.9992412746585736, |
| "grad_norm": 0.5097838044166565, |
| "learning_rate": 4.782432646733214e-06, |
| "loss": 1.2479, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5097838044166565, |
| "learning_rate": 4.781166259819179e-06, |
| "loss": 1.2895, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.0022761760242793, |
| "grad_norm": 1.0558606386184692, |
| "learning_rate": 4.77989636665087e-06, |
| "loss": 1.2707, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.0045523520485584, |
| "grad_norm": 0.47916215658187866, |
| "learning_rate": 4.778622969180167e-06, |
| "loss": 1.2364, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.0068285280728377, |
| "grad_norm": 0.5158357620239258, |
| "learning_rate": 4.777346069364343e-06, |
| "loss": 1.2421, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.0091047040971168, |
| "grad_norm": 0.4970231354236603, |
| "learning_rate": 4.776065669166045e-06, |
| "loss": 1.2534, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.0113808801213962, |
| "grad_norm": 0.529381513595581, |
| "learning_rate": 4.774781770553309e-06, |
| "loss": 1.2429, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.0136570561456753, |
| "grad_norm": 0.5027406811714172, |
| "learning_rate": 4.773494375499543e-06, |
| "loss": 1.2427, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.0159332321699546, |
| "grad_norm": 0.5164632797241211, |
| "learning_rate": 4.772203485983531e-06, |
| "loss": 1.273, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.0182094081942337, |
| "grad_norm": 0.5203757882118225, |
| "learning_rate": 4.770909103989426e-06, |
| "loss": 1.2261, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.020485584218513, |
| "grad_norm": 0.518552839756012, |
| "learning_rate": 4.769611231506753e-06, |
| "loss": 1.2404, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.022761760242792, |
| "grad_norm": 0.5020595788955688, |
| "learning_rate": 4.7683098705303995e-06, |
| "loss": 1.2722, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.0250379362670714, |
| "grad_norm": 0.508852481842041, |
| "learning_rate": 4.767005023060615e-06, |
| "loss": 1.2344, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.0273141122913505, |
| "grad_norm": 0.5240857005119324, |
| "learning_rate": 4.765696691103008e-06, |
| "loss": 1.2553, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.0295902883156298, |
| "grad_norm": 0.5548052787780762, |
| "learning_rate": 4.764384876668542e-06, |
| "loss": 1.3039, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.031866464339909, |
| "grad_norm": 0.5021058917045593, |
| "learning_rate": 4.763069581773537e-06, |
| "loss": 1.2636, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.0341426403641882, |
| "grad_norm": 0.5170218348503113, |
| "learning_rate": 4.761750808439658e-06, |
| "loss": 1.2584, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.0364188163884673, |
| "grad_norm": 0.5254265069961548, |
| "learning_rate": 4.760428558693919e-06, |
| "loss": 1.2578, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.0386949924127467, |
| "grad_norm": 0.5046964883804321, |
| "learning_rate": 4.7591028345686765e-06, |
| "loss": 1.253, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.0409711684370258, |
| "grad_norm": 0.5212562084197998, |
| "learning_rate": 4.757773638101629e-06, |
| "loss": 1.2453, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.043247344461305, |
| "grad_norm": 0.5397632718086243, |
| "learning_rate": 4.7564409713358075e-06, |
| "loss": 1.2612, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.0455235204855842, |
| "grad_norm": 0.5086544752120972, |
| "learning_rate": 4.755104836319583e-06, |
| "loss": 1.27, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.0477996965098635, |
| "grad_norm": 0.4974862337112427, |
| "learning_rate": 4.7537652351066545e-06, |
| "loss": 1.1955, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.0500758725341426, |
| "grad_norm": 0.5382196307182312, |
| "learning_rate": 4.752422169756048e-06, |
| "loss": 1.2996, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.052352048558422, |
| "grad_norm": 0.5093661546707153, |
| "learning_rate": 4.751075642332116e-06, |
| "loss": 1.2671, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.054628224582701, |
| "grad_norm": 0.53044593334198, |
| "learning_rate": 4.749725654904529e-06, |
| "loss": 1.2572, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.0569044006069803, |
| "grad_norm": 0.5372816920280457, |
| "learning_rate": 4.74837220954828e-06, |
| "loss": 1.2215, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.0591805766312594, |
| "grad_norm": 0.5148317217826843, |
| "learning_rate": 4.747015308343673e-06, |
| "loss": 1.2636, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.0614567526555387, |
| "grad_norm": 0.5267722010612488, |
| "learning_rate": 4.745654953376327e-06, |
| "loss": 1.2786, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.0637329286798178, |
| "grad_norm": 0.5123690366744995, |
| "learning_rate": 4.744291146737169e-06, |
| "loss": 1.2217, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.0660091047040972, |
| "grad_norm": 0.5397908687591553, |
| "learning_rate": 4.74292389052243e-06, |
| "loss": 1.2353, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.0682852807283763, |
| "grad_norm": 0.5311163067817688, |
| "learning_rate": 4.741553186833642e-06, |
| "loss": 1.2307, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.0705614567526556, |
| "grad_norm": 0.5108172297477722, |
| "learning_rate": 4.740179037777639e-06, |
| "loss": 1.2526, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.0728376327769347, |
| "grad_norm": 0.5670639276504517, |
| "learning_rate": 4.7388014454665495e-06, |
| "loss": 1.214, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.075113808801214, |
| "grad_norm": 0.5621855854988098, |
| "learning_rate": 4.737420412017795e-06, |
| "loss": 1.2202, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.077389984825493, |
| "grad_norm": 0.5175919532775879, |
| "learning_rate": 4.736035939554084e-06, |
| "loss": 1.2295, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.0796661608497724, |
| "grad_norm": 0.510009765625, |
| "learning_rate": 4.7346480302034144e-06, |
| "loss": 1.2489, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.0819423368740515, |
| "grad_norm": 0.5198955535888672, |
| "learning_rate": 4.733256686099063e-06, |
| "loss": 1.2148, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.0842185128983308, |
| "grad_norm": 0.5157918334007263, |
| "learning_rate": 4.731861909379588e-06, |
| "loss": 1.2858, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.08649468892261, |
| "grad_norm": 0.5016840100288391, |
| "learning_rate": 4.730463702188824e-06, |
| "loss": 1.2137, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.0887708649468892, |
| "grad_norm": 0.5427749156951904, |
| "learning_rate": 4.729062066675877e-06, |
| "loss": 1.2616, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.0910470409711683, |
| "grad_norm": 0.5368303656578064, |
| "learning_rate": 4.727657004995124e-06, |
| "loss": 1.22, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.0933232169954477, |
| "grad_norm": 0.5127097964286804, |
| "learning_rate": 4.726248519306208e-06, |
| "loss": 1.1953, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.095599393019727, |
| "grad_norm": 0.5109656453132629, |
| "learning_rate": 4.724836611774032e-06, |
| "loss": 1.2483, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.097875569044006, |
| "grad_norm": 0.5445286631584167, |
| "learning_rate": 4.723421284568764e-06, |
| "loss": 1.242, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.1001517450682852, |
| "grad_norm": 0.5462026000022888, |
| "learning_rate": 4.722002539865823e-06, |
| "loss": 1.2475, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.1024279210925645, |
| "grad_norm": 0.5589436292648315, |
| "learning_rate": 4.720580379845884e-06, |
| "loss": 1.2511, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.1047040971168438, |
| "grad_norm": 0.5450273752212524, |
| "learning_rate": 4.719154806694869e-06, |
| "loss": 1.2843, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.106980273141123, |
| "grad_norm": 0.5322884321212769, |
| "learning_rate": 4.717725822603948e-06, |
| "loss": 1.2159, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.1092564491654022, |
| "grad_norm": 0.5098543763160706, |
| "learning_rate": 4.716293429769534e-06, |
| "loss": 1.2818, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.1115326251896813, |
| "grad_norm": 0.5248117446899414, |
| "learning_rate": 4.7148576303932784e-06, |
| "loss": 1.2497, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.1138088012139606, |
| "grad_norm": 0.5317633748054504, |
| "learning_rate": 4.7134184266820675e-06, |
| "loss": 1.2174, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.1160849772382397, |
| "grad_norm": 0.5104670524597168, |
| "learning_rate": 4.711975820848024e-06, |
| "loss": 1.2492, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.118361153262519, |
| "grad_norm": 0.5210446715354919, |
| "learning_rate": 4.710529815108496e-06, |
| "loss": 1.2478, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.1206373292867982, |
| "grad_norm": 0.5357753038406372, |
| "learning_rate": 4.7090804116860574e-06, |
| "loss": 1.2533, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.1229135053110775, |
| "grad_norm": 0.5544043779373169, |
| "learning_rate": 4.707627612808509e-06, |
| "loss": 1.2315, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.1251896813353566, |
| "grad_norm": 0.5387628674507141, |
| "learning_rate": 4.706171420708866e-06, |
| "loss": 1.2492, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.127465857359636, |
| "grad_norm": 0.5289620757102966, |
| "learning_rate": 4.704711837625361e-06, |
| "loss": 1.1865, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.129742033383915, |
| "grad_norm": 0.5673317909240723, |
| "learning_rate": 4.703248865801436e-06, |
| "loss": 1.1963, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.1320182094081943, |
| "grad_norm": 0.5180116295814514, |
| "learning_rate": 4.701782507485747e-06, |
| "loss": 1.2431, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.1342943854324734, |
| "grad_norm": 0.5326710343360901, |
| "learning_rate": 4.700312764932151e-06, |
| "loss": 1.2543, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.1365705614567527, |
| "grad_norm": 0.536686360836029, |
| "learning_rate": 4.698839640399707e-06, |
| "loss": 1.2664, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.1388467374810318, |
| "grad_norm": 0.5708869695663452, |
| "learning_rate": 4.6973631361526745e-06, |
| "loss": 1.2445, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.1411229135053111, |
| "grad_norm": 0.5445765852928162, |
| "learning_rate": 4.695883254460505e-06, |
| "loss": 1.2111, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.1433990895295902, |
| "grad_norm": 0.5529754161834717, |
| "learning_rate": 4.6943999975978445e-06, |
| "loss": 1.2346, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.1456752655538696, |
| "grad_norm": 0.5409250855445862, |
| "learning_rate": 4.692913367844523e-06, |
| "loss": 1.2338, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.1479514415781487, |
| "grad_norm": 0.5459516644477844, |
| "learning_rate": 4.691423367485558e-06, |
| "loss": 1.2487, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.150227617602428, |
| "grad_norm": 0.5377400517463684, |
| "learning_rate": 4.689929998811145e-06, |
| "loss": 1.2719, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.152503793626707, |
| "grad_norm": 0.5768429636955261, |
| "learning_rate": 4.68843326411666e-06, |
| "loss": 1.2106, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.1547799696509864, |
| "grad_norm": 0.5586393475532532, |
| "learning_rate": 4.686933165702651e-06, |
| "loss": 1.2469, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.1570561456752655, |
| "grad_norm": 0.5209569334983826, |
| "learning_rate": 4.685429705874834e-06, |
| "loss": 1.2453, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.1593323216995448, |
| "grad_norm": 0.5145371556282043, |
| "learning_rate": 4.6839228869440965e-06, |
| "loss": 1.2484, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.161608497723824, |
| "grad_norm": 0.5463981032371521, |
| "learning_rate": 4.682412711226485e-06, |
| "loss": 1.2691, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.1638846737481032, |
| "grad_norm": 0.5128470659255981, |
| "learning_rate": 4.680899181043206e-06, |
| "loss": 1.2579, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.1661608497723823, |
| "grad_norm": 0.5277767777442932, |
| "learning_rate": 4.679382298720625e-06, |
| "loss": 1.2247, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.1684370257966616, |
| "grad_norm": 0.5547785758972168, |
| "learning_rate": 4.6778620665902566e-06, |
| "loss": 1.2492, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.1707132018209407, |
| "grad_norm": 0.5689957737922668, |
| "learning_rate": 4.676338486988765e-06, |
| "loss": 1.2384, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.17298937784522, |
| "grad_norm": 0.5139868259429932, |
| "learning_rate": 4.674811562257961e-06, |
| "loss": 1.2562, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.1752655538694992, |
| "grad_norm": 0.5729711055755615, |
| "learning_rate": 4.673281294744796e-06, |
| "loss": 1.2833, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.1775417298937785, |
| "grad_norm": 0.5735371708869934, |
| "learning_rate": 4.671747686801358e-06, |
| "loss": 1.2481, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.1798179059180576, |
| "grad_norm": 0.5259848833084106, |
| "learning_rate": 4.670210740784872e-06, |
| "loss": 1.2496, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.182094081942337, |
| "grad_norm": 0.5374155640602112, |
| "learning_rate": 4.668670459057693e-06, |
| "loss": 1.2484, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.184370257966616, |
| "grad_norm": 0.5365428328514099, |
| "learning_rate": 4.667126843987301e-06, |
| "loss": 1.2651, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.1866464339908953, |
| "grad_norm": 0.5263276100158691, |
| "learning_rate": 4.665579897946303e-06, |
| "loss": 1.19, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.1889226100151746, |
| "grad_norm": 0.5412886142730713, |
| "learning_rate": 4.664029623312422e-06, |
| "loss": 1.2551, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.1911987860394537, |
| "grad_norm": 0.5376629829406738, |
| "learning_rate": 4.662476022468503e-06, |
| "loss": 1.2541, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.1934749620637328, |
| "grad_norm": 0.5543259382247925, |
| "learning_rate": 4.660919097802495e-06, |
| "loss": 1.2745, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.1957511380880121, |
| "grad_norm": 0.5453343987464905, |
| "learning_rate": 4.659358851707464e-06, |
| "loss": 1.238, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.1980273141122915, |
| "grad_norm": 0.5588712692260742, |
| "learning_rate": 4.657795286581576e-06, |
| "loss": 1.1767, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.2003034901365706, |
| "grad_norm": 0.5432548522949219, |
| "learning_rate": 4.656228404828102e-06, |
| "loss": 1.2243, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.2025796661608497, |
| "grad_norm": 0.5616108179092407, |
| "learning_rate": 4.654658208855408e-06, |
| "loss": 1.1937, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.204855842185129, |
| "grad_norm": 0.5578548908233643, |
| "learning_rate": 4.653084701076955e-06, |
| "loss": 1.2454, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.2071320182094083, |
| "grad_norm": 0.5913681983947754, |
| "learning_rate": 4.651507883911296e-06, |
| "loss": 1.2717, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.2094081942336874, |
| "grad_norm": 0.5625573992729187, |
| "learning_rate": 4.649927759782068e-06, |
| "loss": 1.2619, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.2116843702579665, |
| "grad_norm": 0.5766717195510864, |
| "learning_rate": 4.648344331117992e-06, |
| "loss": 1.2748, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.2139605462822458, |
| "grad_norm": 0.529719889163971, |
| "learning_rate": 4.64675760035287e-06, |
| "loss": 1.2443, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.2162367223065251, |
| "grad_norm": 0.5937225222587585, |
| "learning_rate": 4.645167569925577e-06, |
| "loss": 1.253, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.2185128983308042, |
| "grad_norm": 0.6403617262840271, |
| "learning_rate": 4.64357424228006e-06, |
| "loss": 1.1932, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.2207890743550835, |
| "grad_norm": 0.5702269077301025, |
| "learning_rate": 4.6419776198653365e-06, |
| "loss": 1.2498, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.2230652503793626, |
| "grad_norm": 0.5545888543128967, |
| "learning_rate": 4.640377705135485e-06, |
| "loss": 1.2517, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.225341426403642, |
| "grad_norm": 0.5598457455635071, |
| "learning_rate": 4.638774500549645e-06, |
| "loss": 1.2503, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.227617602427921, |
| "grad_norm": 0.5853296518325806, |
| "learning_rate": 4.637168008572016e-06, |
| "loss": 1.2418, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.2298937784522004, |
| "grad_norm": 0.5423877239227295, |
| "learning_rate": 4.635558231671846e-06, |
| "loss": 1.2295, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.2321699544764795, |
| "grad_norm": 0.5638657808303833, |
| "learning_rate": 4.633945172323434e-06, |
| "loss": 1.2934, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.2344461305007588, |
| "grad_norm": 0.5612449645996094, |
| "learning_rate": 4.6323288330061244e-06, |
| "loss": 1.2624, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.236722306525038, |
| "grad_norm": 0.5534572601318359, |
| "learning_rate": 4.630709216204303e-06, |
| "loss": 1.2488, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.2389984825493172, |
| "grad_norm": 0.5525970458984375, |
| "learning_rate": 4.629086324407393e-06, |
| "loss": 1.231, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.2412746585735963, |
| "grad_norm": 0.5725768804550171, |
| "learning_rate": 4.6274601601098505e-06, |
| "loss": 1.2959, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.2435508345978756, |
| "grad_norm": 0.582775354385376, |
| "learning_rate": 4.625830725811164e-06, |
| "loss": 1.2554, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.2458270106221547, |
| "grad_norm": 0.5522809028625488, |
| "learning_rate": 4.624198024015845e-06, |
| "loss": 1.2487, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.248103186646434, |
| "grad_norm": 0.5601561069488525, |
| "learning_rate": 4.622562057233431e-06, |
| "loss": 1.2489, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.2503793626707131, |
| "grad_norm": 0.5581909418106079, |
| "learning_rate": 4.620922827978475e-06, |
| "loss": 1.205, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.2526555386949925, |
| "grad_norm": 0.5560769438743591, |
| "learning_rate": 4.619280338770545e-06, |
| "loss": 1.2253, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.2549317147192716, |
| "grad_norm": 0.5541017651557922, |
| "learning_rate": 4.617634592134221e-06, |
| "loss": 1.2476, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.2572078907435509, |
| "grad_norm": 0.5714686512947083, |
| "learning_rate": 4.615985590599088e-06, |
| "loss": 1.2274, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.25948406676783, |
| "grad_norm": 0.5909372568130493, |
| "learning_rate": 4.6143333366997354e-06, |
| "loss": 1.2481, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.2617602427921093, |
| "grad_norm": 0.5704237818717957, |
| "learning_rate": 4.612677832975751e-06, |
| "loss": 1.2607, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.2640364188163884, |
| "grad_norm": 0.5494899749755859, |
| "learning_rate": 4.611019081971719e-06, |
| "loss": 1.2171, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.2663125948406677, |
| "grad_norm": 0.5628857612609863, |
| "learning_rate": 4.609357086237213e-06, |
| "loss": 1.2185, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.2685887708649468, |
| "grad_norm": 0.5746468305587769, |
| "learning_rate": 4.607691848326793e-06, |
| "loss": 1.2485, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.2708649468892261, |
| "grad_norm": 0.5731273889541626, |
| "learning_rate": 4.606023370800006e-06, |
| "loss": 1.2302, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.2731411229135052, |
| "grad_norm": 0.5782604217529297, |
| "learning_rate": 4.604351656221374e-06, |
| "loss": 1.2281, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.2754172989377845, |
| "grad_norm": 0.5706422328948975, |
| "learning_rate": 4.6026767071604e-06, |
| "loss": 1.2145, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.2776934749620636, |
| "grad_norm": 0.5888031125068665, |
| "learning_rate": 4.6009985261915536e-06, |
| "loss": 1.1982, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.279969650986343, |
| "grad_norm": 0.543771505355835, |
| "learning_rate": 4.599317115894273e-06, |
| "loss": 1.2439, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.2822458270106223, |
| "grad_norm": 0.5837553143501282, |
| "learning_rate": 4.597632478852963e-06, |
| "loss": 1.22, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.2845220030349014, |
| "grad_norm": 0.5469195246696472, |
| "learning_rate": 4.595944617656984e-06, |
| "loss": 1.2161, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.2867981790591805, |
| "grad_norm": 0.5544828772544861, |
| "learning_rate": 4.594253534900656e-06, |
| "loss": 1.22, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.2890743550834598, |
| "grad_norm": 0.5594440698623657, |
| "learning_rate": 4.592559233183246e-06, |
| "loss": 1.2088, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.2913505311077391, |
| "grad_norm": 0.541545569896698, |
| "learning_rate": 4.590861715108972e-06, |
| "loss": 1.2185, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.2936267071320182, |
| "grad_norm": 0.5520378947257996, |
| "learning_rate": 4.5891609832869964e-06, |
| "loss": 1.2268, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.2959028831562973, |
| "grad_norm": 0.5583465695381165, |
| "learning_rate": 4.587457040331419e-06, |
| "loss": 1.2225, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.2981790591805766, |
| "grad_norm": 0.5398393869400024, |
| "learning_rate": 4.5857498888612755e-06, |
| "loss": 1.2479, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.300455235204856, |
| "grad_norm": 0.5736100673675537, |
| "learning_rate": 4.584039531500535e-06, |
| "loss": 1.2572, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.302731411229135, |
| "grad_norm": 0.5614636540412903, |
| "learning_rate": 4.582325970878092e-06, |
| "loss": 1.2221, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.3050075872534141, |
| "grad_norm": 0.5580296516418457, |
| "learning_rate": 4.580609209627766e-06, |
| "loss": 1.232, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.3072837632776935, |
| "grad_norm": 0.5606446266174316, |
| "learning_rate": 4.578889250388296e-06, |
| "loss": 1.2214, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.3095599393019728, |
| "grad_norm": 0.5508303642272949, |
| "learning_rate": 4.577166095803336e-06, |
| "loss": 1.244, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.3118361153262519, |
| "grad_norm": 0.557896614074707, |
| "learning_rate": 4.5754397485214505e-06, |
| "loss": 1.2668, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.314112291350531, |
| "grad_norm": 0.5473496317863464, |
| "learning_rate": 4.573710211196113e-06, |
| "loss": 1.2265, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.3163884673748103, |
| "grad_norm": 0.5576569437980652, |
| "learning_rate": 4.5719774864857e-06, |
| "loss": 1.2626, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.3186646433990896, |
| "grad_norm": 0.5799663662910461, |
| "learning_rate": 4.570241577053486e-06, |
| "loss": 1.2573, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.3209408194233687, |
| "grad_norm": 0.555438756942749, |
| "learning_rate": 4.568502485567641e-06, |
| "loss": 1.2775, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.3232169954476478, |
| "grad_norm": 0.5486553907394409, |
| "learning_rate": 4.566760214701227e-06, |
| "loss": 1.2588, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.3254931714719271, |
| "grad_norm": 0.5853822231292725, |
| "learning_rate": 4.565014767132191e-06, |
| "loss": 1.2185, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.3277693474962065, |
| "grad_norm": 0.569977879524231, |
| "learning_rate": 4.563266145543364e-06, |
| "loss": 1.2387, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.3300455235204856, |
| "grad_norm": 0.5845345258712769, |
| "learning_rate": 4.5615143526224555e-06, |
| "loss": 1.2935, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.3323216995447649, |
| "grad_norm": 0.5513466000556946, |
| "learning_rate": 4.559759391062051e-06, |
| "loss": 1.2347, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.334597875569044, |
| "grad_norm": 0.5497938990592957, |
| "learning_rate": 4.558001263559602e-06, |
| "loss": 1.2266, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.3368740515933233, |
| "grad_norm": 0.5504549145698547, |
| "learning_rate": 4.556239972817429e-06, |
| "loss": 1.2535, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.3391502276176024, |
| "grad_norm": 0.5670903325080872, |
| "learning_rate": 4.5544755215427175e-06, |
| "loss": 1.261, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.3414264036418817, |
| "grad_norm": 0.5838532447814941, |
| "learning_rate": 4.552707912447504e-06, |
| "loss": 1.2487, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.3437025796661608, |
| "grad_norm": 0.5291898250579834, |
| "learning_rate": 4.550937148248685e-06, |
| "loss": 1.2528, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.3459787556904401, |
| "grad_norm": 0.5700204968452454, |
| "learning_rate": 4.549163231668004e-06, |
| "loss": 1.2657, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.3482549317147192, |
| "grad_norm": 0.5522517561912537, |
| "learning_rate": 4.547386165432048e-06, |
| "loss": 1.2542, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.3505311077389985, |
| "grad_norm": 0.5714395046234131, |
| "learning_rate": 4.545605952272249e-06, |
| "loss": 1.2343, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.3528072837632776, |
| "grad_norm": 0.5690736174583435, |
| "learning_rate": 4.543822594924874e-06, |
| "loss": 1.2462, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.355083459787557, |
| "grad_norm": 0.5521000027656555, |
| "learning_rate": 4.54203609613102e-06, |
| "loss": 1.2512, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.357359635811836, |
| "grad_norm": 0.5685454607009888, |
| "learning_rate": 4.540246458636619e-06, |
| "loss": 1.2296, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.3596358118361154, |
| "grad_norm": 0.5521453022956848, |
| "learning_rate": 4.538453685192421e-06, |
| "loss": 1.2533, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.3619119878603945, |
| "grad_norm": 0.545840322971344, |
| "learning_rate": 4.536657778554e-06, |
| "loss": 1.2456, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.3641881638846738, |
| "grad_norm": 0.5703026056289673, |
| "learning_rate": 4.534858741481745e-06, |
| "loss": 1.2293, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.3664643399089529, |
| "grad_norm": 0.5508074760437012, |
| "learning_rate": 4.5330565767408555e-06, |
| "loss": 1.2657, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.3687405159332322, |
| "grad_norm": 0.5637306571006775, |
| "learning_rate": 4.531251287101338e-06, |
| "loss": 1.2199, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.3710166919575113, |
| "grad_norm": 0.5585516095161438, |
| "learning_rate": 4.529442875338005e-06, |
| "loss": 1.2331, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.3732928679817906, |
| "grad_norm": 0.5738129019737244, |
| "learning_rate": 4.527631344230466e-06, |
| "loss": 1.215, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.37556904400607, |
| "grad_norm": 0.5905203223228455, |
| "learning_rate": 4.525816696563123e-06, |
| "loss": 1.2322, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.377845220030349, |
| "grad_norm": 0.5772601366043091, |
| "learning_rate": 4.523998935125173e-06, |
| "loss": 1.2344, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.3801213960546281, |
| "grad_norm": 0.6194104552268982, |
| "learning_rate": 4.5221780627105945e-06, |
| "loss": 1.2647, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.3823975720789075, |
| "grad_norm": 0.5779480934143066, |
| "learning_rate": 4.520354082118151e-06, |
| "loss": 1.2148, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.3846737481031868, |
| "grad_norm": 0.5630953907966614, |
| "learning_rate": 4.518526996151381e-06, |
| "loss": 1.2647, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.3869499241274659, |
| "grad_norm": 0.5726267099380493, |
| "learning_rate": 4.516696807618598e-06, |
| "loss": 1.2741, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.389226100151745, |
| "grad_norm": 0.5838750600814819, |
| "learning_rate": 4.514863519332882e-06, |
| "loss": 1.1919, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.3915022761760243, |
| "grad_norm": 0.5766186714172363, |
| "learning_rate": 4.5130271341120805e-06, |
| "loss": 1.2359, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.3937784522003036, |
| "grad_norm": 0.5568646192550659, |
| "learning_rate": 4.511187654778798e-06, |
| "loss": 1.2107, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.3960546282245827, |
| "grad_norm": 0.5602480173110962, |
| "learning_rate": 4.509345084160397e-06, |
| "loss": 1.2276, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.3983308042488618, |
| "grad_norm": 0.5605113506317139, |
| "learning_rate": 4.507499425088991e-06, |
| "loss": 1.2259, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.4006069802731411, |
| "grad_norm": 0.5589579939842224, |
| "learning_rate": 4.505650680401441e-06, |
| "loss": 1.2212, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.4028831562974204, |
| "grad_norm": 0.5683750510215759, |
| "learning_rate": 4.503798852939347e-06, |
| "loss": 1.2313, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.4051593323216995, |
| "grad_norm": 0.5655199885368347, |
| "learning_rate": 4.501943945549054e-06, |
| "loss": 1.2199, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.4074355083459786, |
| "grad_norm": 0.5633233785629272, |
| "learning_rate": 4.500085961081635e-06, |
| "loss": 1.2305, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.409711684370258, |
| "grad_norm": 0.5716864466667175, |
| "learning_rate": 4.498224902392896e-06, |
| "loss": 1.2135, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.4119878603945373, |
| "grad_norm": 0.5524502992630005, |
| "learning_rate": 4.496360772343367e-06, |
| "loss": 1.221, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.4142640364188164, |
| "grad_norm": 0.5607890486717224, |
| "learning_rate": 4.494493573798299e-06, |
| "loss": 1.2243, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.4165402124430955, |
| "grad_norm": 0.5746079683303833, |
| "learning_rate": 4.49262330962766e-06, |
| "loss": 1.2064, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.4188163884673748, |
| "grad_norm": 0.5607832670211792, |
| "learning_rate": 4.490749982706128e-06, |
| "loss": 1.2248, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.421092564491654, |
| "grad_norm": 0.5688823461532593, |
| "learning_rate": 4.488873595913092e-06, |
| "loss": 1.232, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.4233687405159332, |
| "grad_norm": 0.5820784568786621, |
| "learning_rate": 4.48699415213264e-06, |
| "loss": 1.2485, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.4256449165402125, |
| "grad_norm": 0.56890869140625, |
| "learning_rate": 4.4851116542535625e-06, |
| "loss": 1.2286, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.4279210925644916, |
| "grad_norm": 0.6012819409370422, |
| "learning_rate": 4.483226105169341e-06, |
| "loss": 1.2343, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.430197268588771, |
| "grad_norm": 0.570756733417511, |
| "learning_rate": 4.481337507778151e-06, |
| "loss": 1.2447, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.43247344461305, |
| "grad_norm": 0.5640760660171509, |
| "learning_rate": 4.47944586498285e-06, |
| "loss": 1.2298, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.4347496206373294, |
| "grad_norm": 0.5836703777313232, |
| "learning_rate": 4.477551179690977e-06, |
| "loss": 1.2099, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.4370257966616085, |
| "grad_norm": 0.5838893055915833, |
| "learning_rate": 4.475653454814746e-06, |
| "loss": 1.2437, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.4393019726858878, |
| "grad_norm": 0.5973705053329468, |
| "learning_rate": 4.473752693271048e-06, |
| "loss": 1.2872, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.4415781487101669, |
| "grad_norm": 0.5992927551269531, |
| "learning_rate": 4.471848897981437e-06, |
| "loss": 1.2072, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.4438543247344462, |
| "grad_norm": 0.566234827041626, |
| "learning_rate": 4.46994207187213e-06, |
| "loss": 1.2181, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.4461305007587253, |
| "grad_norm": 0.5693137645721436, |
| "learning_rate": 4.4680322178740056e-06, |
| "loss": 1.1862, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.4484066767830046, |
| "grad_norm": 0.5798976421356201, |
| "learning_rate": 4.466119338922593e-06, |
| "loss": 1.2225, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.4506828528072837, |
| "grad_norm": 0.575389564037323, |
| "learning_rate": 4.464203437958075e-06, |
| "loss": 1.2257, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.452959028831563, |
| "grad_norm": 0.6053541302680969, |
| "learning_rate": 4.4622845179252735e-06, |
| "loss": 1.241, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.4552352048558421, |
| "grad_norm": 0.5716749429702759, |
| "learning_rate": 4.460362581773656e-06, |
| "loss": 1.2278, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.4575113808801214, |
| "grad_norm": 0.5863229036331177, |
| "learning_rate": 4.458437632457325e-06, |
| "loss": 1.2238, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.4597875569044005, |
| "grad_norm": 0.6117021441459656, |
| "learning_rate": 4.456509672935011e-06, |
| "loss": 1.2318, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.4620637329286799, |
| "grad_norm": 0.6031973361968994, |
| "learning_rate": 4.454578706170075e-06, |
| "loss": 1.2309, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.464339908952959, |
| "grad_norm": 0.6449349522590637, |
| "learning_rate": 4.4526447351304995e-06, |
| "loss": 1.2357, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.4666160849772383, |
| "grad_norm": 0.5698959231376648, |
| "learning_rate": 4.450707762788884e-06, |
| "loss": 1.2064, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.4688922610015174, |
| "grad_norm": 0.6145030856132507, |
| "learning_rate": 4.44876779212244e-06, |
| "loss": 1.1837, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.4711684370257967, |
| "grad_norm": 0.6202698349952698, |
| "learning_rate": 4.446824826112992e-06, |
| "loss": 1.2459, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.4734446130500758, |
| "grad_norm": 0.5868430137634277, |
| "learning_rate": 4.444878867746962e-06, |
| "loss": 1.1797, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.475720789074355, |
| "grad_norm": 0.6009106636047363, |
| "learning_rate": 4.442929920015377e-06, |
| "loss": 1.2008, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.4779969650986344, |
| "grad_norm": 0.6000754237174988, |
| "learning_rate": 4.440977985913856e-06, |
| "loss": 1.199, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.4802731411229135, |
| "grad_norm": 0.5801194310188293, |
| "learning_rate": 4.439023068442608e-06, |
| "loss": 1.2806, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.4825493171471926, |
| "grad_norm": 0.6096365451812744, |
| "learning_rate": 4.43706517060643e-06, |
| "loss": 1.2434, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.484825493171472, |
| "grad_norm": 0.6116917133331299, |
| "learning_rate": 4.435104295414697e-06, |
| "loss": 1.2262, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.4871016691957513, |
| "grad_norm": 0.5588528513908386, |
| "learning_rate": 4.4331404458813615e-06, |
| "loss": 1.2373, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.4893778452200304, |
| "grad_norm": 0.5834910869598389, |
| "learning_rate": 4.431173625024948e-06, |
| "loss": 1.2766, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.4916540212443095, |
| "grad_norm": 0.623333215713501, |
| "learning_rate": 4.429203835868549e-06, |
| "loss": 1.2375, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.4939301972685888, |
| "grad_norm": 0.6033525466918945, |
| "learning_rate": 4.427231081439817e-06, |
| "loss": 1.2, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.496206373292868, |
| "grad_norm": 0.5829868912696838, |
| "learning_rate": 4.4252553647709635e-06, |
| "loss": 1.2349, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.4984825493171472, |
| "grad_norm": 0.5703787803649902, |
| "learning_rate": 4.423276688898754e-06, |
| "loss": 1.2213, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.5007587253414263, |
| "grad_norm": 0.5715304017066956, |
| "learning_rate": 4.421295056864501e-06, |
| "loss": 1.2394, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.5030349013657056, |
| "grad_norm": 0.6249496340751648, |
| "learning_rate": 4.419310471714061e-06, |
| "loss": 1.2027, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.505311077389985, |
| "grad_norm": 0.5828440189361572, |
| "learning_rate": 4.417322936497831e-06, |
| "loss": 1.2442, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.507587253414264, |
| "grad_norm": 0.5692103505134583, |
| "learning_rate": 4.415332454270741e-06, |
| "loss": 1.1791, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.5098634294385431, |
| "grad_norm": 0.595786988735199, |
| "learning_rate": 4.41333902809225e-06, |
| "loss": 1.231, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.5121396054628224, |
| "grad_norm": 0.5955888032913208, |
| "learning_rate": 4.411342661026342e-06, |
| "loss": 1.2206, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.5144157814871018, |
| "grad_norm": 0.582911491394043, |
| "learning_rate": 4.409343356141525e-06, |
| "loss": 1.2169, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.5166919575113809, |
| "grad_norm": 0.585781455039978, |
| "learning_rate": 4.407341116510818e-06, |
| "loss": 1.2345, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.51896813353566, |
| "grad_norm": 0.5766403675079346, |
| "learning_rate": 4.405335945211754e-06, |
| "loss": 1.2307, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.5212443095599393, |
| "grad_norm": 0.5894457101821899, |
| "learning_rate": 4.4033278453263685e-06, |
| "loss": 1.2445, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.5235204855842186, |
| "grad_norm": 0.5737869143486023, |
| "learning_rate": 4.401316819941203e-06, |
| "loss": 1.2311, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.5257966616084977, |
| "grad_norm": 0.5908883213996887, |
| "learning_rate": 4.399302872147292e-06, |
| "loss": 1.2381, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.5280728376327768, |
| "grad_norm": 0.6145277619361877, |
| "learning_rate": 4.397286005040162e-06, |
| "loss": 1.2394, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.5303490136570561, |
| "grad_norm": 0.5731965899467468, |
| "learning_rate": 4.395266221719829e-06, |
| "loss": 1.2369, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.5326251896813354, |
| "grad_norm": 0.5849004983901978, |
| "learning_rate": 4.3932435252907914e-06, |
| "loss": 1.2308, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.5349013657056145, |
| "grad_norm": 0.5686678290367126, |
| "learning_rate": 4.391217918862021e-06, |
| "loss": 1.259, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.5371775417298936, |
| "grad_norm": 0.580635666847229, |
| "learning_rate": 4.389189405546966e-06, |
| "loss": 1.2359, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.539453717754173, |
| "grad_norm": 0.5722584128379822, |
| "learning_rate": 4.387157988463544e-06, |
| "loss": 1.231, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.5417298937784523, |
| "grad_norm": 0.5868629813194275, |
| "learning_rate": 4.38512367073413e-06, |
| "loss": 1.2363, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.5440060698027314, |
| "grad_norm": 0.5766255259513855, |
| "learning_rate": 4.383086455485564e-06, |
| "loss": 1.2556, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.5462822458270105, |
| "grad_norm": 0.5849782824516296, |
| "learning_rate": 4.381046345849136e-06, |
| "loss": 1.2189, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.5485584218512898, |
| "grad_norm": 0.6070932149887085, |
| "learning_rate": 4.379003344960585e-06, |
| "loss": 1.2351, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.550834597875569, |
| "grad_norm": 0.6085125803947449, |
| "learning_rate": 4.376957455960094e-06, |
| "loss": 1.2218, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.5531107738998484, |
| "grad_norm": 0.5707188844680786, |
| "learning_rate": 4.374908681992287e-06, |
| "loss": 1.2501, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.5553869499241275, |
| "grad_norm": 0.6099936366081238, |
| "learning_rate": 4.37285702620622e-06, |
| "loss": 1.2436, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.5576631259484066, |
| "grad_norm": 0.603273332118988, |
| "learning_rate": 4.37080249175538e-06, |
| "loss": 1.239, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.559939301972686, |
| "grad_norm": 0.5822923183441162, |
| "learning_rate": 4.368745081797678e-06, |
| "loss": 1.22, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.5622154779969653, |
| "grad_norm": 0.5922508835792542, |
| "learning_rate": 4.3666847994954445e-06, |
| "loss": 1.2138, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.5644916540212443, |
| "grad_norm": 0.585437536239624, |
| "learning_rate": 4.364621648015426e-06, |
| "loss": 1.207, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.5667678300455234, |
| "grad_norm": 0.5693568587303162, |
| "learning_rate": 4.362555630528776e-06, |
| "loss": 1.2036, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.5690440060698028, |
| "grad_norm": 0.5950521230697632, |
| "learning_rate": 4.360486750211059e-06, |
| "loss": 1.2682, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.571320182094082, |
| "grad_norm": 0.5919183492660522, |
| "learning_rate": 4.358415010242234e-06, |
| "loss": 1.2082, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.5735963581183612, |
| "grad_norm": 0.6143742203712463, |
| "learning_rate": 4.356340413806658e-06, |
| "loss": 1.1925, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.5758725341426403, |
| "grad_norm": 0.6028359532356262, |
| "learning_rate": 4.354262964093079e-06, |
| "loss": 1.2196, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.5781487101669196, |
| "grad_norm": 0.6061824560165405, |
| "learning_rate": 4.35218266429463e-06, |
| "loss": 1.2266, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.580424886191199, |
| "grad_norm": 0.6007355451583862, |
| "learning_rate": 4.3500995176088235e-06, |
| "loss": 1.2104, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.582701062215478, |
| "grad_norm": 0.6342191100120544, |
| "learning_rate": 4.348013527237549e-06, |
| "loss": 1.2197, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.5849772382397571, |
| "grad_norm": 0.5949456095695496, |
| "learning_rate": 4.345924696387067e-06, |
| "loss": 1.2258, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.5872534142640364, |
| "grad_norm": 0.6161270141601562, |
| "learning_rate": 4.343833028268004e-06, |
| "loss": 1.2299, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.5895295902883158, |
| "grad_norm": 0.5942959785461426, |
| "learning_rate": 4.341738526095348e-06, |
| "loss": 1.2594, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.5918057663125948, |
| "grad_norm": 0.5933099389076233, |
| "learning_rate": 4.339641193088439e-06, |
| "loss": 1.1932, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.594081942336874, |
| "grad_norm": 0.5857350826263428, |
| "learning_rate": 4.337541032470976e-06, |
| "loss": 1.3019, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.5963581183611533, |
| "grad_norm": 0.604029655456543, |
| "learning_rate": 4.335438047470996e-06, |
| "loss": 1.2227, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.5986342943854326, |
| "grad_norm": 0.5927514433860779, |
| "learning_rate": 4.333332241320882e-06, |
| "loss": 1.2742, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.6009104704097117, |
| "grad_norm": 0.5811514854431152, |
| "learning_rate": 4.331223617257351e-06, |
| "loss": 1.23, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.6031866464339908, |
| "grad_norm": 0.5948609709739685, |
| "learning_rate": 4.329112178521454e-06, |
| "loss": 1.2114, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.60546282245827, |
| "grad_norm": 0.6194981932640076, |
| "learning_rate": 4.326997928358565e-06, |
| "loss": 1.2439, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.6077389984825494, |
| "grad_norm": 0.5834797024726868, |
| "learning_rate": 4.324880870018382e-06, |
| "loss": 1.2269, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.6100151745068285, |
| "grad_norm": 0.5746902823448181, |
| "learning_rate": 4.322761006754916e-06, |
| "loss": 1.2175, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.6122913505311076, |
| "grad_norm": 0.6000075936317444, |
| "learning_rate": 4.320638341826494e-06, |
| "loss": 1.2316, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.614567526555387, |
| "grad_norm": 0.588010311126709, |
| "learning_rate": 4.318512878495745e-06, |
| "loss": 1.245, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.6168437025796663, |
| "grad_norm": 0.6053698658943176, |
| "learning_rate": 4.316384620029601e-06, |
| "loss": 1.228, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.6191198786039454, |
| "grad_norm": 0.5857113599777222, |
| "learning_rate": 4.314253569699292e-06, |
| "loss": 1.2511, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.6213960546282244, |
| "grad_norm": 0.5974637866020203, |
| "learning_rate": 4.312119730780334e-06, |
| "loss": 1.2377, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.6236722306525038, |
| "grad_norm": 0.5964690446853638, |
| "learning_rate": 4.309983106552535e-06, |
| "loss": 1.2307, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.625948406676783, |
| "grad_norm": 0.5781478881835938, |
| "learning_rate": 4.307843700299982e-06, |
| "loss": 1.2295, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.6282245827010622, |
| "grad_norm": 0.597053587436676, |
| "learning_rate": 4.305701515311037e-06, |
| "loss": 1.2085, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.6305007587253413, |
| "grad_norm": 0.6326000690460205, |
| "learning_rate": 4.303556554878333e-06, |
| "loss": 1.238, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.6327769347496206, |
| "grad_norm": 0.6087371706962585, |
| "learning_rate": 4.3014088222987714e-06, |
| "loss": 1.2275, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.6350531107739, |
| "grad_norm": 0.5937424898147583, |
| "learning_rate": 4.299258320873513e-06, |
| "loss": 1.2144, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.637329286798179, |
| "grad_norm": 0.5922595262527466, |
| "learning_rate": 4.297105053907973e-06, |
| "loss": 1.2078, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.6396054628224581, |
| "grad_norm": 0.603537380695343, |
| "learning_rate": 4.294949024711819e-06, |
| "loss": 1.2054, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.6418816388467374, |
| "grad_norm": 0.5896364450454712, |
| "learning_rate": 4.2927902365989645e-06, |
| "loss": 1.2038, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.6441578148710168, |
| "grad_norm": 0.614658534526825, |
| "learning_rate": 4.290628692887564e-06, |
| "loss": 1.2428, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.6464339908952959, |
| "grad_norm": 0.5901724100112915, |
| "learning_rate": 4.288464396900005e-06, |
| "loss": 1.2464, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.648710166919575, |
| "grad_norm": 0.6086544394493103, |
| "learning_rate": 4.286297351962908e-06, |
| "loss": 1.1895, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.6509863429438543, |
| "grad_norm": 0.5841042399406433, |
| "learning_rate": 4.284127561407118e-06, |
| "loss": 1.2222, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.6532625189681336, |
| "grad_norm": 0.5791555643081665, |
| "learning_rate": 4.281955028567698e-06, |
| "loss": 1.2489, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.655538694992413, |
| "grad_norm": 0.6219162344932556, |
| "learning_rate": 4.27977975678393e-06, |
| "loss": 1.2208, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.657814871016692, |
| "grad_norm": 0.597656786441803, |
| "learning_rate": 4.277601749399301e-06, |
| "loss": 1.2049, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.660091047040971, |
| "grad_norm": 0.5991064310073853, |
| "learning_rate": 4.27542100976151e-06, |
| "loss": 1.2602, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.6623672230652504, |
| "grad_norm": 0.5922961831092834, |
| "learning_rate": 4.273237541222447e-06, |
| "loss": 1.2077, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.6646433990895297, |
| "grad_norm": 0.6028023362159729, |
| "learning_rate": 4.2710513471382005e-06, |
| "loss": 1.2092, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.6669195751138088, |
| "grad_norm": 0.581685483455658, |
| "learning_rate": 4.268862430869052e-06, |
| "loss": 1.2192, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.669195751138088, |
| "grad_norm": 0.6332095265388489, |
| "learning_rate": 4.26667079577946e-06, |
| "loss": 1.2573, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.6714719271623673, |
| "grad_norm": 0.6062667369842529, |
| "learning_rate": 4.2644764452380675e-06, |
| "loss": 1.2994, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.6737481031866466, |
| "grad_norm": 0.5829861164093018, |
| "learning_rate": 4.262279382617687e-06, |
| "loss": 1.2286, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.6760242792109257, |
| "grad_norm": 0.587378203868866, |
| "learning_rate": 4.260079611295303e-06, |
| "loss": 1.182, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.6783004552352048, |
| "grad_norm": 0.6240544319152832, |
| "learning_rate": 4.257877134652062e-06, |
| "loss": 1.2543, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.680576631259484, |
| "grad_norm": 0.5865784287452698, |
| "learning_rate": 4.255671956073269e-06, |
| "loss": 1.2355, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.6828528072837634, |
| "grad_norm": 0.5847815871238708, |
| "learning_rate": 4.253464078948382e-06, |
| "loss": 1.2069, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.6851289833080425, |
| "grad_norm": 0.5941992402076721, |
| "learning_rate": 4.251253506671006e-06, |
| "loss": 1.2423, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.6874051593323216, |
| "grad_norm": 0.6245031952857971, |
| "learning_rate": 4.249040242638889e-06, |
| "loss": 1.2555, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.689681335356601, |
| "grad_norm": 0.6055291295051575, |
| "learning_rate": 4.246824290253917e-06, |
| "loss": 1.2261, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.6919575113808802, |
| "grad_norm": 0.5905616283416748, |
| "learning_rate": 4.244605652922108e-06, |
| "loss": 1.2385, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.6942336874051593, |
| "grad_norm": 0.5896965265274048, |
| "learning_rate": 4.2423843340536066e-06, |
| "loss": 1.1945, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.6965098634294384, |
| "grad_norm": 0.6129325032234192, |
| "learning_rate": 4.240160337062678e-06, |
| "loss": 1.223, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.6987860394537178, |
| "grad_norm": 0.5988030433654785, |
| "learning_rate": 4.237933665367705e-06, |
| "loss": 1.2197, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.701062215477997, |
| "grad_norm": 0.599388837814331, |
| "learning_rate": 4.235704322391181e-06, |
| "loss": 1.2214, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.7033383915022762, |
| "grad_norm": 0.6087759137153625, |
| "learning_rate": 4.233472311559708e-06, |
| "loss": 1.2302, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.7056145675265553, |
| "grad_norm": 0.5895616412162781, |
| "learning_rate": 4.231237636303982e-06, |
| "loss": 1.1976, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.7078907435508346, |
| "grad_norm": 0.6117663383483887, |
| "learning_rate": 4.229000300058802e-06, |
| "loss": 1.1928, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.710166919575114, |
| "grad_norm": 0.5945206880569458, |
| "learning_rate": 4.2267603062630526e-06, |
| "loss": 1.201, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.712443095599393, |
| "grad_norm": 0.6434623599052429, |
| "learning_rate": 4.224517658359704e-06, |
| "loss": 1.239, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.714719271623672, |
| "grad_norm": 0.5895166397094727, |
| "learning_rate": 4.222272359795806e-06, |
| "loss": 1.2305, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.7169954476479514, |
| "grad_norm": 0.6248841285705566, |
| "learning_rate": 4.220024414022482e-06, |
| "loss": 1.2332, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.7192716236722307, |
| "grad_norm": 0.6209638118743896, |
| "learning_rate": 4.217773824494926e-06, |
| "loss": 1.2773, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.7215477996965098, |
| "grad_norm": 0.5973532199859619, |
| "learning_rate": 4.215520594672394e-06, |
| "loss": 1.1992, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.723823975720789, |
| "grad_norm": 0.5936313271522522, |
| "learning_rate": 4.2132647280182e-06, |
| "loss": 1.2412, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.7261001517450683, |
| "grad_norm": 0.6053516268730164, |
| "learning_rate": 4.211006227999713e-06, |
| "loss": 1.2129, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.7283763277693476, |
| "grad_norm": 0.6065954566001892, |
| "learning_rate": 4.208745098088348e-06, |
| "loss": 1.2395, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.7306525037936267, |
| "grad_norm": 0.6134182214736938, |
| "learning_rate": 4.206481341759562e-06, |
| "loss": 1.1969, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.7329286798179058, |
| "grad_norm": 0.6103958487510681, |
| "learning_rate": 4.204214962492849e-06, |
| "loss": 1.2583, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.735204855842185, |
| "grad_norm": 0.6010955572128296, |
| "learning_rate": 4.201945963771736e-06, |
| "loss": 1.2638, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.7374810318664644, |
| "grad_norm": 0.6201740503311157, |
| "learning_rate": 4.199674349083776e-06, |
| "loss": 1.2491, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.7397572078907435, |
| "grad_norm": 0.6140694618225098, |
| "learning_rate": 4.197400121920539e-06, |
| "loss": 1.243, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.7420333839150226, |
| "grad_norm": 0.6441624164581299, |
| "learning_rate": 4.1951232857776164e-06, |
| "loss": 1.2614, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.744309559939302, |
| "grad_norm": 0.6050844192504883, |
| "learning_rate": 4.192843844154606e-06, |
| "loss": 1.1756, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.7465857359635812, |
| "grad_norm": 0.6491802930831909, |
| "learning_rate": 4.190561800555111e-06, |
| "loss": 1.2029, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.7488619119878603, |
| "grad_norm": 0.6259174942970276, |
| "learning_rate": 4.1882771584867345e-06, |
| "loss": 1.1912, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.7511380880121397, |
| "grad_norm": 0.5955666303634644, |
| "learning_rate": 4.1859899214610735e-06, |
| "loss": 1.2701, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.7534142640364188, |
| "grad_norm": 0.6060442924499512, |
| "learning_rate": 4.183700092993712e-06, |
| "loss": 1.2269, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.755690440060698, |
| "grad_norm": 0.6210846900939941, |
| "learning_rate": 4.1814076766042206e-06, |
| "loss": 1.2679, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.7579666160849774, |
| "grad_norm": 0.5922744870185852, |
| "learning_rate": 4.179112675816144e-06, |
| "loss": 1.2171, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.7602427921092565, |
| "grad_norm": 0.6048167943954468, |
| "learning_rate": 4.176815094157e-06, |
| "loss": 1.1887, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.7625189681335356, |
| "grad_norm": 0.6661959290504456, |
| "learning_rate": 4.174514935158277e-06, |
| "loss": 1.2439, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.764795144157815, |
| "grad_norm": 0.5862908959388733, |
| "learning_rate": 4.172212202355419e-06, |
| "loss": 1.2594, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.7670713201820942, |
| "grad_norm": 0.615178644657135, |
| "learning_rate": 4.16990689928783e-06, |
| "loss": 1.2137, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.7693474962063733, |
| "grad_norm": 0.6170365810394287, |
| "learning_rate": 4.167599029498865e-06, |
| "loss": 1.2278, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.7716236722306524, |
| "grad_norm": 0.6055428385734558, |
| "learning_rate": 4.165288596535821e-06, |
| "loss": 1.232, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.7738998482549317, |
| "grad_norm": 0.6081527471542358, |
| "learning_rate": 4.162975603949937e-06, |
| "loss": 1.2392, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.776176024279211, |
| "grad_norm": 0.6220976710319519, |
| "learning_rate": 4.160660055296385e-06, |
| "loss": 1.2467, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.7784522003034902, |
| "grad_norm": 0.5995768904685974, |
| "learning_rate": 4.158341954134268e-06, |
| "loss": 1.2141, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.7807283763277693, |
| "grad_norm": 0.5946653485298157, |
| "learning_rate": 4.15602130402661e-06, |
| "loss": 1.255, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.7830045523520486, |
| "grad_norm": 0.6094076633453369, |
| "learning_rate": 4.1536981085403546e-06, |
| "loss": 1.243, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.785280728376328, |
| "grad_norm": 0.6584082841873169, |
| "learning_rate": 4.151372371246356e-06, |
| "loss": 1.2382, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.787556904400607, |
| "grad_norm": 0.6139714121818542, |
| "learning_rate": 4.149044095719377e-06, |
| "loss": 1.2528, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.789833080424886, |
| "grad_norm": 0.6047011017799377, |
| "learning_rate": 4.14671328553808e-06, |
| "loss": 1.2034, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.7921092564491654, |
| "grad_norm": 0.6093196868896484, |
| "learning_rate": 4.144379944285024e-06, |
| "loss": 1.2669, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.7943854324734447, |
| "grad_norm": 0.6222574710845947, |
| "learning_rate": 4.142044075546658e-06, |
| "loss": 1.1817, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.7966616084977238, |
| "grad_norm": 0.6427398920059204, |
| "learning_rate": 4.13970568291332e-06, |
| "loss": 1.2165, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.798937784522003, |
| "grad_norm": 0.6227960586547852, |
| "learning_rate": 4.13736476997922e-06, |
| "loss": 1.1816, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.8012139605462822, |
| "grad_norm": 0.6001450419425964, |
| "learning_rate": 4.135021340342446e-06, |
| "loss": 1.2373, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.8034901365705616, |
| "grad_norm": 0.6028245091438293, |
| "learning_rate": 4.132675397604956e-06, |
| "loss": 1.2524, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.8057663125948407, |
| "grad_norm": 0.5959303379058838, |
| "learning_rate": 4.130326945372567e-06, |
| "loss": 1.198, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.8080424886191198, |
| "grad_norm": 0.6001620888710022, |
| "learning_rate": 4.127975987254955e-06, |
| "loss": 1.2137, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.810318664643399, |
| "grad_norm": 0.5951507091522217, |
| "learning_rate": 4.125622526865647e-06, |
| "loss": 1.2285, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.8125948406676784, |
| "grad_norm": 0.614658534526825, |
| "learning_rate": 4.123266567822017e-06, |
| "loss": 1.2119, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.8148710166919575, |
| "grad_norm": 0.6394176483154297, |
| "learning_rate": 4.120908113745281e-06, |
| "loss": 1.2444, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.8171471927162366, |
| "grad_norm": 0.5989351868629456, |
| "learning_rate": 4.118547168260485e-06, |
| "loss": 1.1838, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.819423368740516, |
| "grad_norm": 0.6235303282737732, |
| "learning_rate": 4.11618373499651e-06, |
| "loss": 1.2163, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.8216995447647952, |
| "grad_norm": 0.6402750015258789, |
| "learning_rate": 4.113817817586055e-06, |
| "loss": 1.2445, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.8239757207890743, |
| "grad_norm": 0.5973191857337952, |
| "learning_rate": 4.111449419665645e-06, |
| "loss": 1.2308, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.8262518968133534, |
| "grad_norm": 0.6300286650657654, |
| "learning_rate": 4.1090785448756096e-06, |
| "loss": 1.2319, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.8285280728376327, |
| "grad_norm": 0.5970984697341919, |
| "learning_rate": 4.1067051968600914e-06, |
| "loss": 1.1944, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.830804248861912, |
| "grad_norm": 0.607427179813385, |
| "learning_rate": 4.104329379267031e-06, |
| "loss": 1.2331, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.8330804248861912, |
| "grad_norm": 0.6165644526481628, |
| "learning_rate": 4.101951095748166e-06, |
| "loss": 1.2337, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.8353566009104703, |
| "grad_norm": 0.639166533946991, |
| "learning_rate": 4.099570349959025e-06, |
| "loss": 1.2263, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.8376327769347496, |
| "grad_norm": 0.6345863342285156, |
| "learning_rate": 4.097187145558919e-06, |
| "loss": 1.2397, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.839908952959029, |
| "grad_norm": 0.607635498046875, |
| "learning_rate": 4.094801486210941e-06, |
| "loss": 1.1972, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.842185128983308, |
| "grad_norm": 0.6224584579467773, |
| "learning_rate": 4.092413375581955e-06, |
| "loss": 1.231, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.844461305007587, |
| "grad_norm": 0.5929398536682129, |
| "learning_rate": 4.090022817342593e-06, |
| "loss": 1.2234, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.8467374810318664, |
| "grad_norm": 0.6391967535018921, |
| "learning_rate": 4.0876298151672525e-06, |
| "loss": 1.1931, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.8490136570561457, |
| "grad_norm": 0.599383533000946, |
| "learning_rate": 4.08523437273408e-06, |
| "loss": 1.2425, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.851289833080425, |
| "grad_norm": 0.5998767614364624, |
| "learning_rate": 4.082836493724981e-06, |
| "loss": 1.2188, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.8535660091047041, |
| "grad_norm": 0.5895645618438721, |
| "learning_rate": 4.080436181825601e-06, |
| "loss": 1.2286, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.8558421851289832, |
| "grad_norm": 0.6172052621841431, |
| "learning_rate": 4.078033440725327e-06, |
| "loss": 1.2007, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.8581183611532626, |
| "grad_norm": 0.613259494304657, |
| "learning_rate": 4.075628274117279e-06, |
| "loss": 1.2256, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.8603945371775419, |
| "grad_norm": 0.6026812791824341, |
| "learning_rate": 4.073220685698304e-06, |
| "loss": 1.2317, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.862670713201821, |
| "grad_norm": 0.6112560629844666, |
| "learning_rate": 4.070810679168975e-06, |
| "loss": 1.2275, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.8649468892261, |
| "grad_norm": 0.6044736504554749, |
| "learning_rate": 4.068398258233579e-06, |
| "loss": 1.2515, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.8672230652503794, |
| "grad_norm": 0.6291022896766663, |
| "learning_rate": 4.065983426600113e-06, |
| "loss": 1.2137, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.8694992412746587, |
| "grad_norm": 0.6136301755905151, |
| "learning_rate": 4.063566187980282e-06, |
| "loss": 1.2144, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.8717754172989378, |
| "grad_norm": 0.6166698932647705, |
| "learning_rate": 4.06114654608949e-06, |
| "loss": 1.2434, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.874051593323217, |
| "grad_norm": 0.6023617386817932, |
| "learning_rate": 4.058724504646834e-06, |
| "loss": 1.2186, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.8763277693474962, |
| "grad_norm": 0.6259661912918091, |
| "learning_rate": 4.0563000673751e-06, |
| "loss": 1.1989, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.8786039453717756, |
| "grad_norm": 0.6420421004295349, |
| "learning_rate": 4.053873238000756e-06, |
| "loss": 1.1981, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.8808801213960546, |
| "grad_norm": 0.6250731348991394, |
| "learning_rate": 4.051444020253947e-06, |
| "loss": 1.246, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.8831562974203337, |
| "grad_norm": 0.6473506689071655, |
| "learning_rate": 4.0490124178684884e-06, |
| "loss": 1.213, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.885432473444613, |
| "grad_norm": 0.6448357701301575, |
| "learning_rate": 4.046578434581862e-06, |
| "loss": 1.1696, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.8877086494688924, |
| "grad_norm": 0.6176803112030029, |
| "learning_rate": 4.044142074135209e-06, |
| "loss": 1.2453, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.8899848254931715, |
| "grad_norm": 0.6398005485534668, |
| "learning_rate": 4.0417033402733244e-06, |
| "loss": 1.2198, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.8922610015174506, |
| "grad_norm": 0.6350208520889282, |
| "learning_rate": 4.03926223674465e-06, |
| "loss": 1.2528, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.89453717754173, |
| "grad_norm": 0.5937830209732056, |
| "learning_rate": 4.03681876730127e-06, |
| "loss": 1.1594, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.8968133535660092, |
| "grad_norm": 0.6130216121673584, |
| "learning_rate": 4.034372935698908e-06, |
| "loss": 1.222, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.8990895295902883, |
| "grad_norm": 0.6638323664665222, |
| "learning_rate": 4.031924745696916e-06, |
| "loss": 1.2338, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.9013657056145674, |
| "grad_norm": 0.6491904258728027, |
| "learning_rate": 4.029474201058269e-06, |
| "loss": 1.2219, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.9036418816388467, |
| "grad_norm": 0.612301766872406, |
| "learning_rate": 4.027021305549565e-06, |
| "loss": 1.2663, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.905918057663126, |
| "grad_norm": 0.6025054454803467, |
| "learning_rate": 4.024566062941014e-06, |
| "loss": 1.2264, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.9081942336874052, |
| "grad_norm": 0.6344963312149048, |
| "learning_rate": 4.022108477006434e-06, |
| "loss": 1.1948, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.9104704097116842, |
| "grad_norm": 0.6077335476875305, |
| "learning_rate": 4.019648551523243e-06, |
| "loss": 1.2394, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.9127465857359636, |
| "grad_norm": 0.6338925361633301, |
| "learning_rate": 4.017186290272456e-06, |
| "loss": 1.2136, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.9150227617602429, |
| "grad_norm": 0.6291373372077942, |
| "learning_rate": 4.014721697038678e-06, |
| "loss": 1.2374, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.917298937784522, |
| "grad_norm": 0.6118108630180359, |
| "learning_rate": 4.0122547756101005e-06, |
| "loss": 1.2045, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.919575113808801, |
| "grad_norm": 0.6250407695770264, |
| "learning_rate": 4.009785529778489e-06, |
| "loss": 1.2349, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.9218512898330804, |
| "grad_norm": 0.6737698912620544, |
| "learning_rate": 4.007313963339188e-06, |
| "loss": 1.2334, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.9241274658573597, |
| "grad_norm": 0.649118959903717, |
| "learning_rate": 4.004840080091103e-06, |
| "loss": 1.1981, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.9264036418816388, |
| "grad_norm": 0.6312914490699768, |
| "learning_rate": 4.002363883836704e-06, |
| "loss": 1.2341, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.928679817905918, |
| "grad_norm": 0.6146298050880432, |
| "learning_rate": 3.999885378382013e-06, |
| "loss": 1.1925, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.9309559939301972, |
| "grad_norm": 0.6233289241790771, |
| "learning_rate": 3.997404567536606e-06, |
| "loss": 1.2407, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.9332321699544766, |
| "grad_norm": 0.6072235107421875, |
| "learning_rate": 3.994921455113598e-06, |
| "loss": 1.2033, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.9355083459787557, |
| "grad_norm": 0.6547655463218689, |
| "learning_rate": 3.992436044929645e-06, |
| "loss": 1.2368, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.9377845220030347, |
| "grad_norm": 0.6056034564971924, |
| "learning_rate": 3.989948340804932e-06, |
| "loss": 1.2212, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.940060698027314, |
| "grad_norm": 0.6160012483596802, |
| "learning_rate": 3.9874583465631725e-06, |
| "loss": 1.1944, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.9423368740515934, |
| "grad_norm": 0.641826868057251, |
| "learning_rate": 3.984966066031598e-06, |
| "loss": 1.2499, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.9446130500758725, |
| "grad_norm": 0.6412007808685303, |
| "learning_rate": 3.982471503040954e-06, |
| "loss": 1.2024, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.9468892261001516, |
| "grad_norm": 0.6296584606170654, |
| "learning_rate": 3.979974661425497e-06, |
| "loss": 1.1813, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.949165402124431, |
| "grad_norm": 0.6448803544044495, |
| "learning_rate": 3.977475545022983e-06, |
| "loss": 1.2672, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.9514415781487102, |
| "grad_norm": 0.6320902705192566, |
| "learning_rate": 3.9749741576746645e-06, |
| "loss": 1.196, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.9537177541729895, |
| "grad_norm": 0.6109302639961243, |
| "learning_rate": 3.972470503225285e-06, |
| "loss": 1.2277, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.9559939301972686, |
| "grad_norm": 0.6240274310112, |
| "learning_rate": 3.969964585523076e-06, |
| "loss": 1.2625, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.9582701062215477, |
| "grad_norm": 0.5958450436592102, |
| "learning_rate": 3.967456408419742e-06, |
| "loss": 1.2133, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.960546282245827, |
| "grad_norm": 0.6262888312339783, |
| "learning_rate": 3.964945975770464e-06, |
| "loss": 1.2238, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.9628224582701064, |
| "grad_norm": 0.6366564631462097, |
| "learning_rate": 3.962433291433889e-06, |
| "loss": 1.2372, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.9650986342943855, |
| "grad_norm": 0.6750831007957458, |
| "learning_rate": 3.959918359272125e-06, |
| "loss": 1.2409, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.9673748103186646, |
| "grad_norm": 0.5879358649253845, |
| "learning_rate": 3.957401183150734e-06, |
| "loss": 1.2122, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.9696509863429439, |
| "grad_norm": 0.6384773254394531, |
| "learning_rate": 3.9548817669387295e-06, |
| "loss": 1.2046, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.9719271623672232, |
| "grad_norm": 0.6435151100158691, |
| "learning_rate": 3.952360114508565e-06, |
| "loss": 1.2545, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.9742033383915023, |
| "grad_norm": 0.6609162092208862, |
| "learning_rate": 3.949836229736133e-06, |
| "loss": 1.2548, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.9764795144157814, |
| "grad_norm": 0.6402998566627502, |
| "learning_rate": 3.947310116500758e-06, |
| "loss": 1.2369, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.9787556904400607, |
| "grad_norm": 0.6171389222145081, |
| "learning_rate": 3.944781778685189e-06, |
| "loss": 1.1803, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.98103186646434, |
| "grad_norm": 0.6790279150009155, |
| "learning_rate": 3.9422512201755925e-06, |
| "loss": 1.2349, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.9833080424886191, |
| "grad_norm": 0.636738121509552, |
| "learning_rate": 3.93971844486155e-06, |
| "loss": 1.233, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.9855842185128982, |
| "grad_norm": 0.6281400918960571, |
| "learning_rate": 3.937183456636051e-06, |
| "loss": 1.1973, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.9878603945371776, |
| "grad_norm": 0.6086034774780273, |
| "learning_rate": 3.9346462593954845e-06, |
| "loss": 1.2017, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.9901365705614569, |
| "grad_norm": 0.6195533871650696, |
| "learning_rate": 3.932106857039637e-06, |
| "loss": 1.22, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.992412746585736, |
| "grad_norm": 0.6325448155403137, |
| "learning_rate": 3.929565253471681e-06, |
| "loss": 1.2081, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.994688922610015, |
| "grad_norm": 0.6466575860977173, |
| "learning_rate": 3.927021452598177e-06, |
| "loss": 1.2734, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.9969650986342944, |
| "grad_norm": 0.648371160030365, |
| "learning_rate": 3.924475458329059e-06, |
| "loss": 1.2018, |
| "step": 878 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2634, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 439, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.965165450266411e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|