| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.990136570561456, |
| "eval_steps": 500, |
| "global_step": 2195, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002276176024279211, |
| "grad_norm": 5.864941596984863, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 1.982, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004552352048558422, |
| "grad_norm": 6.175215244293213, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 2.0217, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006828528072837633, |
| "grad_norm": 6.1325860023498535, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 2.0283, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.009104704097116844, |
| "grad_norm": 6.438838481903076, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 2.0133, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.011380880121396054, |
| "grad_norm": 6.120014190673828, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.9788, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.013657056145675266, |
| "grad_norm": 6.399510860443115, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 2.0115, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.015933232169954476, |
| "grad_norm": 6.267389297485352, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 2.034, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.018209408194233688, |
| "grad_norm": 6.195969581604004, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 2.0221, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0204855842185129, |
| "grad_norm": 6.281792163848877, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 2.034, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02276176024279211, |
| "grad_norm": 6.259925365447998, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.9919, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02503793626707132, |
| "grad_norm": 6.189306259155273, |
| "learning_rate": 5.5e-07, |
| "loss": 1.9989, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.027314112291350532, |
| "grad_norm": 6.382223606109619, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 2.0004, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02959028831562974, |
| "grad_norm": 6.581198215484619, |
| "learning_rate": 6.5e-07, |
| "loss": 1.9606, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03186646433990895, |
| "grad_norm": 6.698477268218994, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 1.9986, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03414264036418816, |
| "grad_norm": 6.462113857269287, |
| "learning_rate": 7.5e-07, |
| "loss": 1.9435, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.036418816388467376, |
| "grad_norm": 6.667123794555664, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.9262, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.038694992412746584, |
| "grad_norm": 6.812009334564209, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 1.9341, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0409711684370258, |
| "grad_norm": 6.460822582244873, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.8857, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04324734446130501, |
| "grad_norm": 5.623890399932861, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.8256, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04552352048558422, |
| "grad_norm": 4.976780414581299, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.8312, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04779969650986343, |
| "grad_norm": 4.3025383949279785, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.8263, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05007587253414264, |
| "grad_norm": 3.7881436347961426, |
| "learning_rate": 1.1e-06, |
| "loss": 1.7652, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.05235204855842185, |
| "grad_norm": 3.4925425052642822, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.7603, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.054628224582701064, |
| "grad_norm": 3.0760865211486816, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.7599, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05690440060698027, |
| "grad_norm": 2.7170724868774414, |
| "learning_rate": 1.25e-06, |
| "loss": 1.7725, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05918057663125948, |
| "grad_norm": 2.0981554985046387, |
| "learning_rate": 1.3e-06, |
| "loss": 1.6781, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.061456752655538696, |
| "grad_norm": 1.9057221412658691, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 1.6897, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0637329286798179, |
| "grad_norm": 1.678957223892212, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.7124, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06600910470409711, |
| "grad_norm": 1.594223141670227, |
| "learning_rate": 1.45e-06, |
| "loss": 1.6953, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06828528072837632, |
| "grad_norm": 1.5038321018218994, |
| "learning_rate": 1.5e-06, |
| "loss": 1.6392, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07056145675265554, |
| "grad_norm": 1.5202770233154297, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 1.6756, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.07283763277693475, |
| "grad_norm": 1.4849720001220703, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.6587, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07511380880121396, |
| "grad_norm": 1.4973641633987427, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 1.6222, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07738998482549317, |
| "grad_norm": 1.4055628776550293, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 1.6318, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07966616084977238, |
| "grad_norm": 1.365734338760376, |
| "learning_rate": 1.75e-06, |
| "loss": 1.5656, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0819423368740516, |
| "grad_norm": 1.2574050426483154, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 1.602, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.08421851289833081, |
| "grad_norm": 1.2459263801574707, |
| "learning_rate": 1.85e-06, |
| "loss": 1.571, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08649468892261002, |
| "grad_norm": 1.1563637256622314, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 1.5968, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08877086494688922, |
| "grad_norm": 1.0916545391082764, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 1.5493, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.09104704097116843, |
| "grad_norm": 1.0802186727523804, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.529, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09332321699544764, |
| "grad_norm": 1.0635664463043213, |
| "learning_rate": 2.05e-06, |
| "loss": 1.4784, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.09559939301972686, |
| "grad_norm": 0.985824465751648, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 1.5508, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09787556904400607, |
| "grad_norm": 1.036191701889038, |
| "learning_rate": 2.15e-06, |
| "loss": 1.5465, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.10015174506828528, |
| "grad_norm": 1.0564978122711182, |
| "learning_rate": 2.2e-06, |
| "loss": 1.503, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.10242792109256449, |
| "grad_norm": 1.1553199291229248, |
| "learning_rate": 2.25e-06, |
| "loss": 1.4578, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1047040971168437, |
| "grad_norm": 1.1265777349472046, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 1.4497, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10698027314112292, |
| "grad_norm": 0.9469030499458313, |
| "learning_rate": 2.35e-06, |
| "loss": 1.4676, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.10925644916540213, |
| "grad_norm": 0.649141252040863, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 1.455, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.11153262518968134, |
| "grad_norm": 0.6022727489471436, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 1.4814, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.11380880121396054, |
| "grad_norm": 0.7700338363647461, |
| "learning_rate": 2.5e-06, |
| "loss": 1.4786, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11608497723823975, |
| "grad_norm": 0.924614429473877, |
| "learning_rate": 2.55e-06, |
| "loss": 1.4338, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.11836115326251896, |
| "grad_norm": 0.8892627954483032, |
| "learning_rate": 2.6e-06, |
| "loss": 1.441, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.12063732928679818, |
| "grad_norm": 0.7454217076301575, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 1.4016, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.12291350531107739, |
| "grad_norm": 0.5784000754356384, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 1.4222, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1251896813353566, |
| "grad_norm": 0.5783917903900146, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 1.4087, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1274658573596358, |
| "grad_norm": 0.5947427153587341, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 1.4008, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.12974203338391502, |
| "grad_norm": 0.6172689199447632, |
| "learning_rate": 2.85e-06, |
| "loss": 1.4292, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.13201820940819423, |
| "grad_norm": 0.6890118718147278, |
| "learning_rate": 2.9e-06, |
| "loss": 1.4215, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.13429438543247343, |
| "grad_norm": 0.5748654007911682, |
| "learning_rate": 2.95e-06, |
| "loss": 1.4402, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.13657056145675264, |
| "grad_norm": 0.5015429258346558, |
| "learning_rate": 3e-06, |
| "loss": 1.4338, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13884673748103188, |
| "grad_norm": 0.4844941794872284, |
| "learning_rate": 3.05e-06, |
| "loss": 1.3846, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.1411229135053111, |
| "grad_norm": 0.48353612422943115, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 1.3864, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1433990895295903, |
| "grad_norm": 0.47880005836486816, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 1.3764, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1456752655538695, |
| "grad_norm": 0.5600204467773438, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 1.398, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1479514415781487, |
| "grad_norm": 0.4868157207965851, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 1.3959, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.15022761760242792, |
| "grad_norm": 0.4253179430961609, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 1.3695, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.15250379362670713, |
| "grad_norm": 0.4152253270149231, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 1.428, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.15477996965098634, |
| "grad_norm": 0.43653807044029236, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 1.4244, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.15705614567526555, |
| "grad_norm": 0.4184909164905548, |
| "learning_rate": 3.45e-06, |
| "loss": 1.413, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.15933232169954475, |
| "grad_norm": 0.4401929974555969, |
| "learning_rate": 3.5e-06, |
| "loss": 1.3769, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16160849772382396, |
| "grad_norm": 0.42470934987068176, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 1.328, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1638846737481032, |
| "grad_norm": 0.43167445063591003, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 1.3585, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1661608497723824, |
| "grad_norm": 0.39305731654167175, |
| "learning_rate": 3.65e-06, |
| "loss": 1.3635, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.16843702579666162, |
| "grad_norm": 0.3937039077281952, |
| "learning_rate": 3.7e-06, |
| "loss": 1.3583, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.17071320182094082, |
| "grad_norm": 0.4098603129386902, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 1.3651, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.17298937784522003, |
| "grad_norm": 0.41061389446258545, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 1.4184, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.17526555386949924, |
| "grad_norm": 0.3926120698451996, |
| "learning_rate": 3.85e-06, |
| "loss": 1.3693, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.17754172989377845, |
| "grad_norm": 0.41317838430404663, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 1.3354, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.17981790591805766, |
| "grad_norm": 0.37922877073287964, |
| "learning_rate": 3.95e-06, |
| "loss": 1.364, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.18209408194233687, |
| "grad_norm": 0.3894996643066406, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.3495, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18437025796661607, |
| "grad_norm": 0.4024641513824463, |
| "learning_rate": 4.05e-06, |
| "loss": 1.3604, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.18664643399089528, |
| "grad_norm": 0.38427308201789856, |
| "learning_rate": 4.1e-06, |
| "loss": 1.3734, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.18892261001517452, |
| "grad_norm": 0.38881292939186096, |
| "learning_rate": 4.15e-06, |
| "loss": 1.3235, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.19119878603945373, |
| "grad_norm": 0.4112228453159332, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 1.3714, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.19347496206373294, |
| "grad_norm": 0.3790343999862671, |
| "learning_rate": 4.25e-06, |
| "loss": 1.3508, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.19575113808801214, |
| "grad_norm": 0.38511818647384644, |
| "learning_rate": 4.3e-06, |
| "loss": 1.3726, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.19802731411229135, |
| "grad_norm": 0.3809172213077545, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 1.3978, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.20030349013657056, |
| "grad_norm": 0.39862319827079773, |
| "learning_rate": 4.4e-06, |
| "loss": 1.3402, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.20257966616084977, |
| "grad_norm": 0.3779354989528656, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 1.3585, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.20485584218512898, |
| "grad_norm": 0.3755280375480652, |
| "learning_rate": 4.5e-06, |
| "loss": 1.3809, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2071320182094082, |
| "grad_norm": 0.4072270691394806, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 1.337, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2094081942336874, |
| "grad_norm": 0.3852587938308716, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 1.3239, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2116843702579666, |
| "grad_norm": 0.3857567012310028, |
| "learning_rate": 4.65e-06, |
| "loss": 1.3676, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.21396054628224584, |
| "grad_norm": 0.39954471588134766, |
| "learning_rate": 4.7e-06, |
| "loss": 1.372, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.21623672230652505, |
| "grad_norm": 0.3801283836364746, |
| "learning_rate": 4.75e-06, |
| "loss": 1.3636, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.21851289833080426, |
| "grad_norm": 0.37748953700065613, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 1.3298, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.22078907435508346, |
| "grad_norm": 0.3678078055381775, |
| "learning_rate": 4.85e-06, |
| "loss": 1.3267, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.22306525037936267, |
| "grad_norm": 0.3928042948246002, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 1.3705, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.22534142640364188, |
| "grad_norm": 0.3824443817138672, |
| "learning_rate": 4.95e-06, |
| "loss": 1.3536, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2276176024279211, |
| "grad_norm": 0.38775718212127686, |
| "learning_rate": 5e-06, |
| "loss": 1.3366, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2298937784522003, |
| "grad_norm": 0.39415422081947327, |
| "learning_rate": 4.999998078694254e-06, |
| "loss": 1.3369, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2321699544764795, |
| "grad_norm": 0.3640560507774353, |
| "learning_rate": 4.999992314779968e-06, |
| "loss": 1.3548, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.23444613050075871, |
| "grad_norm": 0.38077881932258606, |
| "learning_rate": 4.999982708266002e-06, |
| "loss": 1.322, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.23672230652503792, |
| "grad_norm": 0.3910675346851349, |
| "learning_rate": 4.999969259167121e-06, |
| "loss": 1.3568, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.23899848254931716, |
| "grad_norm": 0.3724777102470398, |
| "learning_rate": 4.999951967503998e-06, |
| "loss": 1.3657, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.24127465857359637, |
| "grad_norm": 0.39835065603256226, |
| "learning_rate": 4.9999308333032095e-06, |
| "loss": 1.3728, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.24355083459787558, |
| "grad_norm": 0.3887874186038971, |
| "learning_rate": 4.999905856597241e-06, |
| "loss": 1.3269, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.24582701062215478, |
| "grad_norm": 0.37291401624679565, |
| "learning_rate": 4.999877037424482e-06, |
| "loss": 1.3522, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.248103186646434, |
| "grad_norm": 0.3793584406375885, |
| "learning_rate": 4.999844375829229e-06, |
| "loss": 1.3459, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.2503793626707132, |
| "grad_norm": 0.38437148928642273, |
| "learning_rate": 4.999807871861686e-06, |
| "loss": 1.3419, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2526555386949924, |
| "grad_norm": 0.37772583961486816, |
| "learning_rate": 4.999767525577958e-06, |
| "loss": 1.3349, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2549317147192716, |
| "grad_norm": 0.3829944133758545, |
| "learning_rate": 4.999723337040062e-06, |
| "loss": 1.3193, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.2572078907435508, |
| "grad_norm": 0.38355737924575806, |
| "learning_rate": 4.999675306315917e-06, |
| "loss": 1.3457, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.25948406676783003, |
| "grad_norm": 0.39071688055992126, |
| "learning_rate": 4.999623433479346e-06, |
| "loss": 1.3401, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.26176024279210924, |
| "grad_norm": 0.3796067535877228, |
| "learning_rate": 4.9995677186100835e-06, |
| "loss": 1.3593, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.26403641881638845, |
| "grad_norm": 0.3870932459831238, |
| "learning_rate": 4.9995081617937635e-06, |
| "loss": 1.3678, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.26631259484066766, |
| "grad_norm": 0.3870759606361389, |
| "learning_rate": 4.999444763121928e-06, |
| "loss": 1.331, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.26858877086494687, |
| "grad_norm": 0.37003180384635925, |
| "learning_rate": 4.999377522692023e-06, |
| "loss": 1.3242, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2708649468892261, |
| "grad_norm": 0.3826284408569336, |
| "learning_rate": 4.999306440607401e-06, |
| "loss": 1.2921, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2731411229135053, |
| "grad_norm": 0.3886045515537262, |
| "learning_rate": 4.999231516977318e-06, |
| "loss": 1.2971, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.27541729893778455, |
| "grad_norm": 0.3992857336997986, |
| "learning_rate": 4.999152751916936e-06, |
| "loss": 1.2872, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.27769347496206376, |
| "grad_norm": 0.4303230941295624, |
| "learning_rate": 4.999070145547318e-06, |
| "loss": 1.3562, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.27996965098634297, |
| "grad_norm": 0.40188783407211304, |
| "learning_rate": 4.998983697995435e-06, |
| "loss": 1.3251, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2822458270106222, |
| "grad_norm": 0.41683951020240784, |
| "learning_rate": 4.998893409394162e-06, |
| "loss": 1.3279, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2845220030349014, |
| "grad_norm": 0.4539605379104614, |
| "learning_rate": 4.9987992798822745e-06, |
| "loss": 1.3133, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2867981790591806, |
| "grad_norm": 0.40195104479789734, |
| "learning_rate": 4.998701309604454e-06, |
| "loss": 1.3372, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2890743550834598, |
| "grad_norm": 0.40602678060531616, |
| "learning_rate": 4.998599498711287e-06, |
| "loss": 1.3008, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.291350531107739, |
| "grad_norm": 0.37955862283706665, |
| "learning_rate": 4.99849384735926e-06, |
| "loss": 1.2919, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2936267071320182, |
| "grad_norm": 0.38034912943840027, |
| "learning_rate": 4.9983843557107635e-06, |
| "loss": 1.3307, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.2959028831562974, |
| "grad_norm": 0.3922058641910553, |
| "learning_rate": 4.9982710239340915e-06, |
| "loss": 1.3211, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.29817905918057663, |
| "grad_norm": 0.4012414515018463, |
| "learning_rate": 4.998153852203441e-06, |
| "loss": 1.3762, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.30045523520485584, |
| "grad_norm": 0.41045159101486206, |
| "learning_rate": 4.998032840698909e-06, |
| "loss": 1.3384, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.30273141122913505, |
| "grad_norm": 0.3880952298641205, |
| "learning_rate": 4.997907989606495e-06, |
| "loss": 1.2976, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.30500758725341426, |
| "grad_norm": 0.39358070492744446, |
| "learning_rate": 4.997779299118102e-06, |
| "loss": 1.3036, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.30728376327769347, |
| "grad_norm": 0.400647908449173, |
| "learning_rate": 4.997646769431532e-06, |
| "loss": 1.3573, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.3095599393019727, |
| "grad_norm": 0.40589869022369385, |
| "learning_rate": 4.99751040075049e-06, |
| "loss": 1.3462, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3118361153262519, |
| "grad_norm": 0.420673131942749, |
| "learning_rate": 4.997370193284581e-06, |
| "loss": 1.317, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.3141122913505311, |
| "grad_norm": 0.3844830393791199, |
| "learning_rate": 4.997226147249309e-06, |
| "loss": 1.3437, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3163884673748103, |
| "grad_norm": 0.37681150436401367, |
| "learning_rate": 4.9970782628660794e-06, |
| "loss": 1.3216, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.3186646433990895, |
| "grad_norm": 0.40281322598457336, |
| "learning_rate": 4.996926540362198e-06, |
| "loss": 1.3578, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3209408194233687, |
| "grad_norm": 0.3950099050998688, |
| "learning_rate": 4.9967709799708675e-06, |
| "loss": 1.3472, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3232169954476479, |
| "grad_norm": 0.3890508711338043, |
| "learning_rate": 4.9966115819311926e-06, |
| "loss": 1.3112, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.3254931714719272, |
| "grad_norm": 0.3960939347743988, |
| "learning_rate": 4.996448346488175e-06, |
| "loss": 1.331, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3277693474962064, |
| "grad_norm": 0.394761323928833, |
| "learning_rate": 4.9962812738927135e-06, |
| "loss": 1.3265, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3300455235204856, |
| "grad_norm": 0.4139835238456726, |
| "learning_rate": 4.996110364401607e-06, |
| "loss": 1.3423, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3323216995447648, |
| "grad_norm": 0.40223428606987, |
| "learning_rate": 4.9959356182775525e-06, |
| "loss": 1.3213, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.334597875569044, |
| "grad_norm": 0.41239285469055176, |
| "learning_rate": 4.9957570357891406e-06, |
| "loss": 1.3488, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.33687405159332323, |
| "grad_norm": 0.41569817066192627, |
| "learning_rate": 4.995574617210861e-06, |
| "loss": 1.3373, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.33915022761760244, |
| "grad_norm": 0.40224048495292664, |
| "learning_rate": 4.9953883628231e-06, |
| "loss": 1.3086, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.34142640364188165, |
| "grad_norm": 0.4080573618412018, |
| "learning_rate": 4.995198272912137e-06, |
| "loss": 1.3221, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.34370257966616086, |
| "grad_norm": 0.41279059648513794, |
| "learning_rate": 4.9950043477701505e-06, |
| "loss": 1.3336, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.34597875569044007, |
| "grad_norm": 0.4138430655002594, |
| "learning_rate": 4.994806587695212e-06, |
| "loss": 1.3245, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3482549317147193, |
| "grad_norm": 0.4141685664653778, |
| "learning_rate": 4.994604992991287e-06, |
| "loss": 1.3459, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.3505311077389985, |
| "grad_norm": 0.4655224680900574, |
| "learning_rate": 4.994399563968235e-06, |
| "loss": 1.307, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3528072837632777, |
| "grad_norm": 0.40181776881217957, |
| "learning_rate": 4.99419030094181e-06, |
| "loss": 1.2951, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3550834597875569, |
| "grad_norm": 0.4349536597728729, |
| "learning_rate": 4.99397720423366e-06, |
| "loss": 1.3346, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3573596358118361, |
| "grad_norm": 0.47389090061187744, |
| "learning_rate": 4.993760274171322e-06, |
| "loss": 1.2918, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.3596358118361153, |
| "grad_norm": 0.43464231491088867, |
| "learning_rate": 4.993539511088228e-06, |
| "loss": 1.3469, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3619119878603945, |
| "grad_norm": 0.43050721287727356, |
| "learning_rate": 4.993314915323701e-06, |
| "loss": 1.2993, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.36418816388467373, |
| "grad_norm": 0.4154967665672302, |
| "learning_rate": 4.9930864872229555e-06, |
| "loss": 1.301, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.36646433990895294, |
| "grad_norm": 0.4043583869934082, |
| "learning_rate": 4.992854227137094e-06, |
| "loss": 1.3357, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.36874051593323215, |
| "grad_norm": 0.4242326617240906, |
| "learning_rate": 4.992618135423111e-06, |
| "loss": 1.3139, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.37101669195751136, |
| "grad_norm": 0.4029645621776581, |
| "learning_rate": 4.992378212443891e-06, |
| "loss": 1.2773, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.37329286798179057, |
| "grad_norm": 0.3948841989040375, |
| "learning_rate": 4.992134458568205e-06, |
| "loss": 1.3267, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.37556904400606983, |
| "grad_norm": 0.4325512647628784, |
| "learning_rate": 4.991886874170715e-06, |
| "loss": 1.2986, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.37784522003034904, |
| "grad_norm": 0.4292261600494385, |
| "learning_rate": 4.991635459631968e-06, |
| "loss": 1.3383, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.38012139605462825, |
| "grad_norm": 0.407819539308548, |
| "learning_rate": 4.991380215338399e-06, |
| "loss": 1.2798, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.38239757207890746, |
| "grad_norm": 0.41592007875442505, |
| "learning_rate": 4.991121141682332e-06, |
| "loss": 1.3161, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.38467374810318666, |
| "grad_norm": 0.4135512411594391, |
| "learning_rate": 4.990858239061973e-06, |
| "loss": 1.3221, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.38694992412746587, |
| "grad_norm": 0.4168025851249695, |
| "learning_rate": 4.990591507881416e-06, |
| "loss": 1.3094, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3892261001517451, |
| "grad_norm": 0.42845603823661804, |
| "learning_rate": 4.990320948550638e-06, |
| "loss": 1.3086, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.3915022761760243, |
| "grad_norm": 0.4117361009120941, |
| "learning_rate": 4.9900465614855e-06, |
| "loss": 1.3074, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3937784522003035, |
| "grad_norm": 0.40385058522224426, |
| "learning_rate": 4.989768347107749e-06, |
| "loss": 1.3015, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3960546282245827, |
| "grad_norm": 0.42507070302963257, |
| "learning_rate": 4.989486305845012e-06, |
| "loss": 1.303, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3983308042488619, |
| "grad_norm": 0.4167408347129822, |
| "learning_rate": 4.989200438130799e-06, |
| "loss": 1.3246, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4006069802731411, |
| "grad_norm": 0.4459727108478546, |
| "learning_rate": 4.988910744404501e-06, |
| "loss": 1.3082, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.40288315629742033, |
| "grad_norm": 0.41572514176368713, |
| "learning_rate": 4.988617225111392e-06, |
| "loss": 1.329, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.40515933232169954, |
| "grad_norm": 0.40346917510032654, |
| "learning_rate": 4.988319880702621e-06, |
| "loss": 1.3204, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.40743550834597875, |
| "grad_norm": 0.49305301904678345, |
| "learning_rate": 4.988018711635223e-06, |
| "loss": 1.3174, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.40971168437025796, |
| "grad_norm": 0.4136899411678314, |
| "learning_rate": 4.987713718372106e-06, |
| "loss": 1.3153, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.41198786039453716, |
| "grad_norm": 0.4320002794265747, |
| "learning_rate": 4.98740490138206e-06, |
| "loss": 1.3233, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.4142640364188164, |
| "grad_norm": 0.40051817893981934, |
| "learning_rate": 4.9870922611397484e-06, |
| "loss": 1.3298, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.4165402124430956, |
| "grad_norm": 0.43490317463874817, |
| "learning_rate": 4.986775798125715e-06, |
| "loss": 1.2924, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.4188163884673748, |
| "grad_norm": 0.41733044385910034, |
| "learning_rate": 4.986455512826377e-06, |
| "loss": 1.3407, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.421092564491654, |
| "grad_norm": 0.45686185359954834, |
| "learning_rate": 4.986131405734027e-06, |
| "loss": 1.3002, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.4233687405159332, |
| "grad_norm": 0.4178033173084259, |
| "learning_rate": 4.985803477346832e-06, |
| "loss": 1.2707, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.42564491654021247, |
| "grad_norm": 0.44030341506004333, |
| "learning_rate": 4.985471728168832e-06, |
| "loss": 1.3522, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.4279210925644917, |
| "grad_norm": 0.4167434573173523, |
| "learning_rate": 4.985136158709942e-06, |
| "loss": 1.2952, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.4301972685887709, |
| "grad_norm": 0.43799030780792236, |
| "learning_rate": 4.984796769485946e-06, |
| "loss": 1.3204, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.4324734446130501, |
| "grad_norm": 0.3963024914264679, |
| "learning_rate": 4.984453561018501e-06, |
| "loss": 1.2852, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4347496206373293, |
| "grad_norm": 0.4606306850910187, |
| "learning_rate": 4.984106533835132e-06, |
| "loss": 1.3, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.4370257966616085, |
| "grad_norm": 0.43703702092170715, |
| "learning_rate": 4.9837556884692374e-06, |
| "loss": 1.2865, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.4393019726858877, |
| "grad_norm": 0.419226735830307, |
| "learning_rate": 4.9834010254600814e-06, |
| "loss": 1.3212, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.44157814871016693, |
| "grad_norm": 0.4051378071308136, |
| "learning_rate": 4.983042545352796e-06, |
| "loss": 1.3102, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.44385432473444614, |
| "grad_norm": 0.44308584928512573, |
| "learning_rate": 4.982680248698383e-06, |
| "loss": 1.2753, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.44613050075872535, |
| "grad_norm": 0.48592913150787354, |
| "learning_rate": 4.982314136053707e-06, |
| "loss": 1.3468, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.44840667678300455, |
| "grad_norm": 0.4361239969730377, |
| "learning_rate": 4.981944207981499e-06, |
| "loss": 1.2345, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.45068285280728376, |
| "grad_norm": 0.4420235753059387, |
| "learning_rate": 4.981570465050357e-06, |
| "loss": 1.308, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.45295902883156297, |
| "grad_norm": 0.4724012315273285, |
| "learning_rate": 4.98119290783474e-06, |
| "loss": 1.3451, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.4552352048558422, |
| "grad_norm": 0.4347815215587616, |
| "learning_rate": 4.980811536914968e-06, |
| "loss": 1.2926, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4575113808801214, |
| "grad_norm": 0.4243141710758209, |
| "learning_rate": 4.980426352877228e-06, |
| "loss": 1.2863, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.4597875569044006, |
| "grad_norm": 0.41129249334335327, |
| "learning_rate": 4.980037356313563e-06, |
| "loss": 1.3017, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4620637329286798, |
| "grad_norm": 0.4349686801433563, |
| "learning_rate": 4.979644547821879e-06, |
| "loss": 1.3655, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.464339908952959, |
| "grad_norm": 0.438151478767395, |
| "learning_rate": 4.97924792800594e-06, |
| "loss": 1.304, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4666160849772382, |
| "grad_norm": 0.46755126118659973, |
| "learning_rate": 4.978847497475369e-06, |
| "loss": 1.3282, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.46889226100151743, |
| "grad_norm": 0.42544615268707275, |
| "learning_rate": 4.9784432568456445e-06, |
| "loss": 1.3524, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.47116843702579664, |
| "grad_norm": 0.4163425862789154, |
| "learning_rate": 4.9780352067381024e-06, |
| "loss": 1.3303, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.47344461305007585, |
| "grad_norm": 0.4662051498889923, |
| "learning_rate": 4.977623347779935e-06, |
| "loss": 1.2723, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4757207890743551, |
| "grad_norm": 0.4841192662715912, |
| "learning_rate": 4.977207680604187e-06, |
| "loss": 1.3281, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.4779969650986343, |
| "grad_norm": 0.47023245692253113, |
| "learning_rate": 4.976788205849758e-06, |
| "loss": 1.2983, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4802731411229135, |
| "grad_norm": 0.4251156449317932, |
| "learning_rate": 4.9763649241613985e-06, |
| "loss": 1.3215, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.48254931714719274, |
| "grad_norm": 0.436788409948349, |
| "learning_rate": 4.975937836189712e-06, |
| "loss": 1.3006, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.48482549317147194, |
| "grad_norm": 0.46025222539901733, |
| "learning_rate": 4.975506942591152e-06, |
| "loss": 1.3121, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.48710166919575115, |
| "grad_norm": 0.43663930892944336, |
| "learning_rate": 4.97507224402802e-06, |
| "loss": 1.3133, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.48937784522003036, |
| "grad_norm": 0.48787179589271545, |
| "learning_rate": 4.974633741168469e-06, |
| "loss": 1.266, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.49165402124430957, |
| "grad_norm": 0.4265913665294647, |
| "learning_rate": 4.974191434686496e-06, |
| "loss": 1.3035, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.4939301972685888, |
| "grad_norm": 0.4345017373561859, |
| "learning_rate": 4.973745325261946e-06, |
| "loss": 1.2987, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.496206373292868, |
| "grad_norm": 0.47078996896743774, |
| "learning_rate": 4.973295413580509e-06, |
| "loss": 1.3176, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4984825493171472, |
| "grad_norm": 0.4349548816680908, |
| "learning_rate": 4.97284170033372e-06, |
| "loss": 1.2829, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5007587253414264, |
| "grad_norm": 0.4705260694026947, |
| "learning_rate": 4.9723841862189555e-06, |
| "loss": 1.2847, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5030349013657056, |
| "grad_norm": 0.4285137951374054, |
| "learning_rate": 4.971922871939436e-06, |
| "loss": 1.2774, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5053110773899848, |
| "grad_norm": 0.46022048592567444, |
| "learning_rate": 4.971457758204221e-06, |
| "loss": 1.3006, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.507587253414264, |
| "grad_norm": 0.4904478192329407, |
| "learning_rate": 4.970988845728213e-06, |
| "loss": 1.3032, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5098634294385432, |
| "grad_norm": 0.4171503484249115, |
| "learning_rate": 4.9705161352321496e-06, |
| "loss": 1.3118, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5121396054628224, |
| "grad_norm": 0.4424084722995758, |
| "learning_rate": 4.970039627442608e-06, |
| "loss": 1.2342, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5144157814871017, |
| "grad_norm": 0.45744988322257996, |
| "learning_rate": 4.969559323092004e-06, |
| "loss": 1.2975, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5166919575113809, |
| "grad_norm": 0.4306228756904602, |
| "learning_rate": 4.969075222918583e-06, |
| "loss": 1.2791, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5189681335356601, |
| "grad_norm": 0.43930479884147644, |
| "learning_rate": 4.9685873276664324e-06, |
| "loss": 1.2952, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.5212443095599393, |
| "grad_norm": 0.4268686771392822, |
| "learning_rate": 4.968095638085467e-06, |
| "loss": 1.2902, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5235204855842185, |
| "grad_norm": 0.4320680499076843, |
| "learning_rate": 4.9676001549314356e-06, |
| "loss": 1.2941, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5257966616084977, |
| "grad_norm": 0.4509009122848511, |
| "learning_rate": 4.967100878965918e-06, |
| "loss": 1.3353, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5280728376327769, |
| "grad_norm": 0.4458315670490265, |
| "learning_rate": 4.966597810956325e-06, |
| "loss": 1.2918, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5303490136570561, |
| "grad_norm": 0.4613376259803772, |
| "learning_rate": 4.966090951675893e-06, |
| "loss": 1.3085, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5326251896813353, |
| "grad_norm": 0.4486188590526581, |
| "learning_rate": 4.9655803019036875e-06, |
| "loss": 1.2783, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.5349013657056145, |
| "grad_norm": 0.44070056080818176, |
| "learning_rate": 4.9650658624246e-06, |
| "loss": 1.2969, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5371775417298937, |
| "grad_norm": 0.45442667603492737, |
| "learning_rate": 4.9645476340293474e-06, |
| "loss": 1.273, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.539453717754173, |
| "grad_norm": 0.4485810697078705, |
| "learning_rate": 4.96402561751447e-06, |
| "loss": 1.2524, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.5417298937784522, |
| "grad_norm": 0.43408727645874023, |
| "learning_rate": 4.96349981368233e-06, |
| "loss": 1.3, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5440060698027314, |
| "grad_norm": 0.45317673683166504, |
| "learning_rate": 4.962970223341112e-06, |
| "loss": 1.2959, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.5462822458270106, |
| "grad_norm": 0.45147350430488586, |
| "learning_rate": 4.962436847304818e-06, |
| "loss": 1.2588, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5485584218512898, |
| "grad_norm": 0.4372202157974243, |
| "learning_rate": 4.961899686393273e-06, |
| "loss": 1.2472, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5508345978755691, |
| "grad_norm": 0.4300381541252136, |
| "learning_rate": 4.961358741432116e-06, |
| "loss": 1.2892, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.5531107738998483, |
| "grad_norm": 0.4326576888561249, |
| "learning_rate": 4.9608140132528045e-06, |
| "loss": 1.2873, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5553869499241275, |
| "grad_norm": 0.42891374230384827, |
| "learning_rate": 4.960265502692609e-06, |
| "loss": 1.3159, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5576631259484067, |
| "grad_norm": 0.44637322425842285, |
| "learning_rate": 4.959713210594616e-06, |
| "loss": 1.2964, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.5599393019726859, |
| "grad_norm": 0.4534567892551422, |
| "learning_rate": 4.959157137807721e-06, |
| "loss": 1.2811, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5622154779969651, |
| "grad_norm": 0.4480896294116974, |
| "learning_rate": 4.958597285186635e-06, |
| "loss": 1.2887, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5644916540212443, |
| "grad_norm": 0.42966964840888977, |
| "learning_rate": 4.958033653591874e-06, |
| "loss": 1.2927, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5667678300455236, |
| "grad_norm": 0.4520474076271057, |
| "learning_rate": 4.9574662438897675e-06, |
| "loss": 1.334, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5690440060698028, |
| "grad_norm": 0.4476149082183838, |
| "learning_rate": 4.956895056952448e-06, |
| "loss": 1.2813, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.571320182094082, |
| "grad_norm": 0.4495325982570648, |
| "learning_rate": 4.956320093657855e-06, |
| "loss": 1.3455, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5735963581183612, |
| "grad_norm": 0.4634062945842743, |
| "learning_rate": 4.955741354889734e-06, |
| "loss": 1.3009, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5758725341426404, |
| "grad_norm": 0.43844589591026306, |
| "learning_rate": 4.955158841537632e-06, |
| "loss": 1.2775, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5781487101669196, |
| "grad_norm": 0.4297947585582733, |
| "learning_rate": 4.954572554496897e-06, |
| "loss": 1.3005, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5804248861911988, |
| "grad_norm": 0.45026981830596924, |
| "learning_rate": 4.953982494668679e-06, |
| "loss": 1.2829, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.582701062215478, |
| "grad_norm": 0.4508177936077118, |
| "learning_rate": 4.953388662959926e-06, |
| "loss": 1.3249, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5849772382397572, |
| "grad_norm": 0.4628501236438751, |
| "learning_rate": 4.952791060283384e-06, |
| "loss": 1.2772, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5872534142640364, |
| "grad_norm": 0.47145721316337585, |
| "learning_rate": 4.952189687557595e-06, |
| "loss": 1.2843, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5895295902883156, |
| "grad_norm": 0.44380298256874084, |
| "learning_rate": 4.951584545706896e-06, |
| "loss": 1.3169, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5918057663125948, |
| "grad_norm": 0.45627689361572266, |
| "learning_rate": 4.950975635661416e-06, |
| "loss": 1.2855, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5940819423368741, |
| "grad_norm": 0.43097957968711853, |
| "learning_rate": 4.950362958357078e-06, |
| "loss": 1.2802, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5963581183611533, |
| "grad_norm": 0.4480797052383423, |
| "learning_rate": 4.949746514735594e-06, |
| "loss": 1.2845, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5986342943854325, |
| "grad_norm": 0.4356028139591217, |
| "learning_rate": 4.949126305744466e-06, |
| "loss": 1.2559, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.6009104704097117, |
| "grad_norm": 0.45533114671707153, |
| "learning_rate": 4.948502332336982e-06, |
| "loss": 1.333, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.6031866464339909, |
| "grad_norm": 0.43486839532852173, |
| "learning_rate": 4.947874595472216e-06, |
| "loss": 1.299, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6054628224582701, |
| "grad_norm": 0.45472636818885803, |
| "learning_rate": 4.947243096115028e-06, |
| "loss": 1.2853, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.6077389984825493, |
| "grad_norm": 0.448030024766922, |
| "learning_rate": 4.946607835236064e-06, |
| "loss": 1.2549, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.6100151745068285, |
| "grad_norm": 0.46248579025268555, |
| "learning_rate": 4.945968813811743e-06, |
| "loss": 1.2845, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.6122913505311077, |
| "grad_norm": 0.47284016013145447, |
| "learning_rate": 4.9453260328242735e-06, |
| "loss": 1.274, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.6145675265553869, |
| "grad_norm": 0.46916916966438293, |
| "learning_rate": 4.944679493261637e-06, |
| "loss": 1.272, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6168437025796661, |
| "grad_norm": 0.4469199776649475, |
| "learning_rate": 4.944029196117594e-06, |
| "loss": 1.273, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.6191198786039454, |
| "grad_norm": 0.4460132420063019, |
| "learning_rate": 4.943375142391679e-06, |
| "loss": 1.2749, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6213960546282246, |
| "grad_norm": 0.45281344652175903, |
| "learning_rate": 4.942717333089204e-06, |
| "loss": 1.2858, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.6236722306525038, |
| "grad_norm": 0.4766104221343994, |
| "learning_rate": 4.942055769221249e-06, |
| "loss": 1.3047, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.625948406676783, |
| "grad_norm": 0.4342869818210602, |
| "learning_rate": 4.941390451804668e-06, |
| "loss": 1.258, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.6282245827010622, |
| "grad_norm": 0.44943931698799133, |
| "learning_rate": 4.940721381862083e-06, |
| "loss": 1.2714, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.6305007587253414, |
| "grad_norm": 0.4642450213432312, |
| "learning_rate": 4.940048560421887e-06, |
| "loss": 1.2883, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.6327769347496206, |
| "grad_norm": 0.530925989151001, |
| "learning_rate": 4.9393719885182335e-06, |
| "loss": 1.2869, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.6350531107738998, |
| "grad_norm": 0.44706323742866516, |
| "learning_rate": 4.938691667191044e-06, |
| "loss": 1.2912, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.637329286798179, |
| "grad_norm": 0.46952497959136963, |
| "learning_rate": 4.938007597486005e-06, |
| "loss": 1.3293, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6396054628224582, |
| "grad_norm": 0.45387259125709534, |
| "learning_rate": 4.937319780454559e-06, |
| "loss": 1.2328, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.6418816388467374, |
| "grad_norm": 0.4683968126773834, |
| "learning_rate": 4.936628217153914e-06, |
| "loss": 1.3101, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6441578148710166, |
| "grad_norm": 0.4984208941459656, |
| "learning_rate": 4.935932908647033e-06, |
| "loss": 1.3078, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6464339908952959, |
| "grad_norm": 0.47393515706062317, |
| "learning_rate": 4.935233856002635e-06, |
| "loss": 1.2667, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.6487101669195751, |
| "grad_norm": 0.4559146761894226, |
| "learning_rate": 4.9345310602951964e-06, |
| "loss": 1.2816, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.6509863429438544, |
| "grad_norm": 0.4612574279308319, |
| "learning_rate": 4.933824522604945e-06, |
| "loss": 1.3009, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.6532625189681336, |
| "grad_norm": 0.4839983880519867, |
| "learning_rate": 4.933114244017861e-06, |
| "loss": 1.2762, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6555386949924128, |
| "grad_norm": 0.47950032353401184, |
| "learning_rate": 4.932400225625674e-06, |
| "loss": 1.2639, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.657814871016692, |
| "grad_norm": 0.46797841787338257, |
| "learning_rate": 4.931682468525863e-06, |
| "loss": 1.3116, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6600910470409712, |
| "grad_norm": 0.46507689356803894, |
| "learning_rate": 4.93096097382165e-06, |
| "loss": 1.2795, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6623672230652504, |
| "grad_norm": 0.4672064781188965, |
| "learning_rate": 4.9302357426220086e-06, |
| "loss": 1.2769, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.6646433990895296, |
| "grad_norm": 0.469881147146225, |
| "learning_rate": 4.929506776041648e-06, |
| "loss": 1.246, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6669195751138088, |
| "grad_norm": 0.49012723565101624, |
| "learning_rate": 4.928774075201024e-06, |
| "loss": 1.3308, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.669195751138088, |
| "grad_norm": 0.47186344861984253, |
| "learning_rate": 4.9280376412263295e-06, |
| "loss": 1.2685, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.6714719271623673, |
| "grad_norm": 0.4914249777793884, |
| "learning_rate": 4.9272974752494974e-06, |
| "loss": 1.3029, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6737481031866465, |
| "grad_norm": 0.4709179699420929, |
| "learning_rate": 4.9265535784081965e-06, |
| "loss": 1.2459, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6760242792109257, |
| "grad_norm": 0.46568986773490906, |
| "learning_rate": 4.925805951845826e-06, |
| "loss": 1.2713, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6783004552352049, |
| "grad_norm": 0.46113038063049316, |
| "learning_rate": 4.925054596711526e-06, |
| "loss": 1.2787, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6805766312594841, |
| "grad_norm": 0.49636346101760864, |
| "learning_rate": 4.92429951416016e-06, |
| "loss": 1.2787, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6828528072837633, |
| "grad_norm": 0.4823263883590698, |
| "learning_rate": 4.9235407053523235e-06, |
| "loss": 1.3029, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6851289833080425, |
| "grad_norm": 0.45272234082221985, |
| "learning_rate": 4.92277817145434e-06, |
| "loss": 1.3053, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.6874051593323217, |
| "grad_norm": 0.4724232256412506, |
| "learning_rate": 4.922011913638258e-06, |
| "loss": 1.2594, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6896813353566009, |
| "grad_norm": 0.5244677066802979, |
| "learning_rate": 4.92124193308185e-06, |
| "loss": 1.305, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6919575113808801, |
| "grad_norm": 0.4562852382659912, |
| "learning_rate": 4.92046823096861e-06, |
| "loss": 1.283, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6942336874051593, |
| "grad_norm": 0.460565447807312, |
| "learning_rate": 4.919690808487754e-06, |
| "loss": 1.3004, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6965098634294385, |
| "grad_norm": 0.4588528871536255, |
| "learning_rate": 4.918909666834214e-06, |
| "loss": 1.2745, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6987860394537178, |
| "grad_norm": 0.4980691075325012, |
| "learning_rate": 4.91812480720864e-06, |
| "loss": 1.2802, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.701062215477997, |
| "grad_norm": 0.5080570578575134, |
| "learning_rate": 4.917336230817396e-06, |
| "loss": 1.286, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.7033383915022762, |
| "grad_norm": 0.46659743785858154, |
| "learning_rate": 4.9165439388725585e-06, |
| "loss": 1.3093, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.7056145675265554, |
| "grad_norm": 0.4846821129322052, |
| "learning_rate": 4.915747932591916e-06, |
| "loss": 1.2904, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7078907435508346, |
| "grad_norm": 0.4945422112941742, |
| "learning_rate": 4.914948213198966e-06, |
| "loss": 1.2592, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.7101669195751138, |
| "grad_norm": 0.49606069922447205, |
| "learning_rate": 4.9141447819229125e-06, |
| "loss": 1.2699, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.712443095599393, |
| "grad_norm": 0.48810863494873047, |
| "learning_rate": 4.913337639998666e-06, |
| "loss": 1.2993, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.7147192716236722, |
| "grad_norm": 0.4933323562145233, |
| "learning_rate": 4.912526788666838e-06, |
| "loss": 1.2514, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.7169954476479514, |
| "grad_norm": 0.4674908220767975, |
| "learning_rate": 4.911712229173745e-06, |
| "loss": 1.2602, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.7192716236722306, |
| "grad_norm": 0.5178641676902771, |
| "learning_rate": 4.9108939627714e-06, |
| "loss": 1.312, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.7215477996965098, |
| "grad_norm": 0.4949224293231964, |
| "learning_rate": 4.910071990717516e-06, |
| "loss": 1.2787, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.723823975720789, |
| "grad_norm": 0.4700353443622589, |
| "learning_rate": 4.909246314275499e-06, |
| "loss": 1.251, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.7261001517450683, |
| "grad_norm": 0.4828815758228302, |
| "learning_rate": 4.908416934714452e-06, |
| "loss": 1.2967, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.7283763277693475, |
| "grad_norm": 0.47781631350517273, |
| "learning_rate": 4.907583853309168e-06, |
| "loss": 1.3108, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7306525037936267, |
| "grad_norm": 0.4467979073524475, |
| "learning_rate": 4.90674707134013e-06, |
| "loss": 1.2332, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.7329286798179059, |
| "grad_norm": 0.4529818892478943, |
| "learning_rate": 4.90590659009351e-06, |
| "loss": 1.2958, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.7352048558421851, |
| "grad_norm": 0.4782491624355316, |
| "learning_rate": 4.905062410861164e-06, |
| "loss": 1.2754, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.7374810318664643, |
| "grad_norm": 0.4517338275909424, |
| "learning_rate": 4.9042145349406335e-06, |
| "loss": 1.3098, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.7397572078907435, |
| "grad_norm": 0.4599636197090149, |
| "learning_rate": 4.903362963635142e-06, |
| "loss": 1.2843, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.7420333839150227, |
| "grad_norm": 0.4922712743282318, |
| "learning_rate": 4.902507698253593e-06, |
| "loss": 1.2987, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.7443095599393019, |
| "grad_norm": 0.47610870003700256, |
| "learning_rate": 4.901648740110566e-06, |
| "loss": 1.2739, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.7465857359635811, |
| "grad_norm": 0.46494367718696594, |
| "learning_rate": 4.900786090526319e-06, |
| "loss": 1.2579, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.7488619119878603, |
| "grad_norm": 0.46867313981056213, |
| "learning_rate": 4.899919750826784e-06, |
| "loss": 1.2838, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.7511380880121397, |
| "grad_norm": 0.49616602063179016, |
| "learning_rate": 4.899049722343561e-06, |
| "loss": 1.3108, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7534142640364189, |
| "grad_norm": 0.46307483315467834, |
| "learning_rate": 4.898176006413925e-06, |
| "loss": 1.3047, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.7556904400606981, |
| "grad_norm": 0.47475141286849976, |
| "learning_rate": 4.897298604380816e-06, |
| "loss": 1.2416, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.7579666160849773, |
| "grad_norm": 0.468184232711792, |
| "learning_rate": 4.896417517592838e-06, |
| "loss": 1.2904, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.7602427921092565, |
| "grad_norm": 0.47171875834465027, |
| "learning_rate": 4.895532747404263e-06, |
| "loss": 1.2641, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7625189681335357, |
| "grad_norm": 0.45646342635154724, |
| "learning_rate": 4.8946442951750215e-06, |
| "loss": 1.285, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.7647951441578149, |
| "grad_norm": 0.48363035917282104, |
| "learning_rate": 4.893752162270704e-06, |
| "loss": 1.2507, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.7670713201820941, |
| "grad_norm": 0.4761241674423218, |
| "learning_rate": 4.892856350062558e-06, |
| "loss": 1.2628, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.7693474962063733, |
| "grad_norm": 0.47408172488212585, |
| "learning_rate": 4.891956859927489e-06, |
| "loss": 1.2919, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.7716236722306525, |
| "grad_norm": 0.48075783252716064, |
| "learning_rate": 4.89105369324805e-06, |
| "loss": 1.282, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.7738998482549317, |
| "grad_norm": 0.45937585830688477, |
| "learning_rate": 4.890146851412452e-06, |
| "loss": 1.2823, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.776176024279211, |
| "grad_norm": 0.5253570675849915, |
| "learning_rate": 4.889236335814549e-06, |
| "loss": 1.2657, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.7784522003034902, |
| "grad_norm": 0.47888922691345215, |
| "learning_rate": 4.888322147853846e-06, |
| "loss": 1.3003, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.7807283763277694, |
| "grad_norm": 0.4705219566822052, |
| "learning_rate": 4.887404288935488e-06, |
| "loss": 1.2822, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.7830045523520486, |
| "grad_norm": 0.5236004590988159, |
| "learning_rate": 4.8864827604702675e-06, |
| "loss": 1.2338, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7852807283763278, |
| "grad_norm": 0.4856922924518585, |
| "learning_rate": 4.885557563874614e-06, |
| "loss": 1.2394, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.787556904400607, |
| "grad_norm": 0.48127493262290955, |
| "learning_rate": 4.884628700570595e-06, |
| "loss": 1.2827, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.7898330804248862, |
| "grad_norm": 0.46932077407836914, |
| "learning_rate": 4.883696171985917e-06, |
| "loss": 1.2608, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7921092564491654, |
| "grad_norm": 0.5052128434181213, |
| "learning_rate": 4.882759979553916e-06, |
| "loss": 1.2727, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7943854324734446, |
| "grad_norm": 0.5077352523803711, |
| "learning_rate": 4.881820124713562e-06, |
| "loss": 1.2364, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.7966616084977238, |
| "grad_norm": 0.5095151662826538, |
| "learning_rate": 4.880876608909454e-06, |
| "loss": 1.2788, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.798937784522003, |
| "grad_norm": 0.4920441806316376, |
| "learning_rate": 4.8799294335918185e-06, |
| "loss": 1.2944, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.8012139605462822, |
| "grad_norm": 0.4824545085430145, |
| "learning_rate": 4.8789786002165055e-06, |
| "loss": 1.2669, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.8034901365705615, |
| "grad_norm": 0.49492961168289185, |
| "learning_rate": 4.878024110244988e-06, |
| "loss": 1.3021, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.8057663125948407, |
| "grad_norm": 0.5213160514831543, |
| "learning_rate": 4.877065965144361e-06, |
| "loss": 1.2832, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.8080424886191199, |
| "grad_norm": 0.4782240390777588, |
| "learning_rate": 4.8761041663873345e-06, |
| "loss": 1.2812, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.8103186646433991, |
| "grad_norm": 0.4901832938194275, |
| "learning_rate": 4.875138715452237e-06, |
| "loss": 1.289, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.8125948406676783, |
| "grad_norm": 0.48875507712364197, |
| "learning_rate": 4.87416961382301e-06, |
| "loss": 1.2876, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.8148710166919575, |
| "grad_norm": 0.49773871898651123, |
| "learning_rate": 4.873196862989205e-06, |
| "loss": 1.2766, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.8171471927162367, |
| "grad_norm": 0.5069698691368103, |
| "learning_rate": 4.872220464445983e-06, |
| "loss": 1.284, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.8194233687405159, |
| "grad_norm": 0.4725041389465332, |
| "learning_rate": 4.871240419694115e-06, |
| "loss": 1.2183, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8216995447647951, |
| "grad_norm": 0.4846250116825104, |
| "learning_rate": 4.8702567302399705e-06, |
| "loss": 1.2851, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.8239757207890743, |
| "grad_norm": 0.4825296998023987, |
| "learning_rate": 4.869269397595525e-06, |
| "loss": 1.2621, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.8262518968133535, |
| "grad_norm": 0.4880293905735016, |
| "learning_rate": 4.8682784232783535e-06, |
| "loss": 1.2684, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.8285280728376327, |
| "grad_norm": 0.4805878698825836, |
| "learning_rate": 4.867283808811626e-06, |
| "loss": 1.2604, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.830804248861912, |
| "grad_norm": 0.5031499266624451, |
| "learning_rate": 4.86628555572411e-06, |
| "loss": 1.2701, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.8330804248861912, |
| "grad_norm": 0.49856945872306824, |
| "learning_rate": 4.865283665550167e-06, |
| "loss": 1.266, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.8353566009104704, |
| "grad_norm": 0.49834373593330383, |
| "learning_rate": 4.864278139829745e-06, |
| "loss": 1.254, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.8376327769347496, |
| "grad_norm": 0.47436273097991943, |
| "learning_rate": 4.863268980108381e-06, |
| "loss": 1.308, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.8399089529590288, |
| "grad_norm": 0.4866158962249756, |
| "learning_rate": 4.8622561879372e-06, |
| "loss": 1.2565, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.842185128983308, |
| "grad_norm": 0.46591049432754517, |
| "learning_rate": 4.861239764872909e-06, |
| "loss": 1.2528, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8444613050075872, |
| "grad_norm": 0.5084807872772217, |
| "learning_rate": 4.860219712477795e-06, |
| "loss": 1.2727, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.8467374810318664, |
| "grad_norm": 0.49390751123428345, |
| "learning_rate": 4.859196032319724e-06, |
| "loss": 1.2544, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.8490136570561456, |
| "grad_norm": 0.4931376576423645, |
| "learning_rate": 4.8581687259721375e-06, |
| "loss": 1.2728, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.8512898330804249, |
| "grad_norm": 0.4991268813610077, |
| "learning_rate": 4.857137795014051e-06, |
| "loss": 1.2382, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.8535660091047041, |
| "grad_norm": 0.48629266023635864, |
| "learning_rate": 4.856103241030054e-06, |
| "loss": 1.2464, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.8558421851289834, |
| "grad_norm": 0.4945109188556671, |
| "learning_rate": 4.855065065610298e-06, |
| "loss": 1.2592, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.8581183611532626, |
| "grad_norm": 0.4683839678764343, |
| "learning_rate": 4.8540232703505085e-06, |
| "loss": 1.2795, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.8603945371775418, |
| "grad_norm": 0.4917154610157013, |
| "learning_rate": 4.8529778568519695e-06, |
| "loss": 1.297, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.862670713201821, |
| "grad_norm": 0.4950079917907715, |
| "learning_rate": 4.851928826721528e-06, |
| "loss": 1.2424, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.8649468892261002, |
| "grad_norm": 0.49165982007980347, |
| "learning_rate": 4.850876181571592e-06, |
| "loss": 1.2442, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8672230652503794, |
| "grad_norm": 0.47863882780075073, |
| "learning_rate": 4.849819923020121e-06, |
| "loss": 1.2946, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.8694992412746586, |
| "grad_norm": 0.5066231489181519, |
| "learning_rate": 4.848760052690635e-06, |
| "loss": 1.2658, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8717754172989378, |
| "grad_norm": 0.46788156032562256, |
| "learning_rate": 4.847696572212199e-06, |
| "loss": 1.2787, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.874051593323217, |
| "grad_norm": 0.5010194182395935, |
| "learning_rate": 4.846629483219431e-06, |
| "loss": 1.2645, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.8763277693474962, |
| "grad_norm": 0.480258584022522, |
| "learning_rate": 4.845558787352495e-06, |
| "loss": 1.2535, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.8786039453717754, |
| "grad_norm": 0.5160472393035889, |
| "learning_rate": 4.844484486257097e-06, |
| "loss": 1.2838, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.8808801213960546, |
| "grad_norm": 0.5098587870597839, |
| "learning_rate": 4.843406581584487e-06, |
| "loss": 1.2834, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.8831562974203339, |
| "grad_norm": 0.5033400058746338, |
| "learning_rate": 4.8423250749914515e-06, |
| "loss": 1.2959, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8854324734446131, |
| "grad_norm": 0.506367564201355, |
| "learning_rate": 4.841239968140316e-06, |
| "loss": 1.2757, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.8877086494688923, |
| "grad_norm": 0.47980019450187683, |
| "learning_rate": 4.8401512626989354e-06, |
| "loss": 1.2683, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8899848254931715, |
| "grad_norm": 0.48923107981681824, |
| "learning_rate": 4.8390589603407005e-06, |
| "loss": 1.2325, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.8922610015174507, |
| "grad_norm": 0.4891837537288666, |
| "learning_rate": 4.8379630627445286e-06, |
| "loss": 1.2508, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.8945371775417299, |
| "grad_norm": 0.4819527566432953, |
| "learning_rate": 4.836863571594863e-06, |
| "loss": 1.2655, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.8968133535660091, |
| "grad_norm": 0.5067424178123474, |
| "learning_rate": 4.83576048858167e-06, |
| "loss": 1.2477, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.8990895295902883, |
| "grad_norm": 0.5201086401939392, |
| "learning_rate": 4.8346538154004386e-06, |
| "loss": 1.249, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.9013657056145675, |
| "grad_norm": 0.5033949017524719, |
| "learning_rate": 4.833543553752173e-06, |
| "loss": 1.2882, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.9036418816388467, |
| "grad_norm": 0.4921282231807709, |
| "learning_rate": 4.8324297053433975e-06, |
| "loss": 1.2355, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.9059180576631259, |
| "grad_norm": 0.49898359179496765, |
| "learning_rate": 4.831312271886145e-06, |
| "loss": 1.24, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.9081942336874052, |
| "grad_norm": 0.4932885468006134, |
| "learning_rate": 4.83019125509796e-06, |
| "loss": 1.2651, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.9104704097116844, |
| "grad_norm": 0.5081654191017151, |
| "learning_rate": 4.829066656701897e-06, |
| "loss": 1.2846, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9127465857359636, |
| "grad_norm": 0.4848720133304596, |
| "learning_rate": 4.8279384784265124e-06, |
| "loss": 1.2834, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.9150227617602428, |
| "grad_norm": 0.47641217708587646, |
| "learning_rate": 4.826806722005868e-06, |
| "loss": 1.2556, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.917298937784522, |
| "grad_norm": 0.5004164576530457, |
| "learning_rate": 4.825671389179522e-06, |
| "loss": 1.2852, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.9195751138088012, |
| "grad_norm": 0.5069151520729065, |
| "learning_rate": 4.824532481692533e-06, |
| "loss": 1.2468, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.9218512898330804, |
| "grad_norm": 0.5043609738349915, |
| "learning_rate": 4.823390001295453e-06, |
| "loss": 1.2602, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.9241274658573596, |
| "grad_norm": 0.47922301292419434, |
| "learning_rate": 4.822243949744324e-06, |
| "loss": 1.2909, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.9264036418816388, |
| "grad_norm": 0.5012561082839966, |
| "learning_rate": 4.821094328800678e-06, |
| "loss": 1.3058, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.928679817905918, |
| "grad_norm": 0.5232773423194885, |
| "learning_rate": 4.8199411402315356e-06, |
| "loss": 1.2689, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.9309559939301972, |
| "grad_norm": 0.5023229718208313, |
| "learning_rate": 4.8187843858093975e-06, |
| "loss": 1.2623, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.9332321699544764, |
| "grad_norm": 0.5061272382736206, |
| "learning_rate": 4.817624067312247e-06, |
| "loss": 1.2771, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9355083459787557, |
| "grad_norm": 0.47715064883232117, |
| "learning_rate": 4.816460186523547e-06, |
| "loss": 1.266, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.9377845220030349, |
| "grad_norm": 0.5037026405334473, |
| "learning_rate": 4.815292745232233e-06, |
| "loss": 1.2812, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.9400606980273141, |
| "grad_norm": 0.47421544790267944, |
| "learning_rate": 4.814121745232714e-06, |
| "loss": 1.2349, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.9423368740515933, |
| "grad_norm": 0.5214923620223999, |
| "learning_rate": 4.812947188324868e-06, |
| "loss": 1.2986, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.9446130500758725, |
| "grad_norm": 0.5169025659561157, |
| "learning_rate": 4.811769076314044e-06, |
| "loss": 1.2687, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.9468892261001517, |
| "grad_norm": 0.5028119087219238, |
| "learning_rate": 4.8105874110110516e-06, |
| "loss": 1.2666, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.9491654021244309, |
| "grad_norm": 0.5233621597290039, |
| "learning_rate": 4.809402194232163e-06, |
| "loss": 1.2817, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.9514415781487102, |
| "grad_norm": 0.5662165880203247, |
| "learning_rate": 4.808213427799108e-06, |
| "loss": 1.212, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.9537177541729894, |
| "grad_norm": 0.5214280486106873, |
| "learning_rate": 4.807021113539077e-06, |
| "loss": 1.2659, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.9559939301972686, |
| "grad_norm": 0.5059605240821838, |
| "learning_rate": 4.805825253284706e-06, |
| "loss": 1.2417, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9582701062215478, |
| "grad_norm": 0.48347723484039307, |
| "learning_rate": 4.804625848874088e-06, |
| "loss": 1.279, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.960546282245827, |
| "grad_norm": 0.5225522518157959, |
| "learning_rate": 4.803422902150762e-06, |
| "loss": 1.2555, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.9628224582701063, |
| "grad_norm": 0.49709466099739075, |
| "learning_rate": 4.802216414963708e-06, |
| "loss": 1.2956, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.9650986342943855, |
| "grad_norm": 0.500357985496521, |
| "learning_rate": 4.801006389167352e-06, |
| "loss": 1.2748, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.9673748103186647, |
| "grad_norm": 0.504552960395813, |
| "learning_rate": 4.799792826621559e-06, |
| "loss": 1.2939, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.9696509863429439, |
| "grad_norm": 0.4881986379623413, |
| "learning_rate": 4.7985757291916264e-06, |
| "loss": 1.2827, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.9719271623672231, |
| "grad_norm": 0.517511785030365, |
| "learning_rate": 4.797355098748289e-06, |
| "loss": 1.2668, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.9742033383915023, |
| "grad_norm": 0.49534812569618225, |
| "learning_rate": 4.796130937167709e-06, |
| "loss": 1.2878, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9764795144157815, |
| "grad_norm": 0.4725462794303894, |
| "learning_rate": 4.794903246331477e-06, |
| "loss": 1.2612, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.9787556904400607, |
| "grad_norm": 0.49760913848876953, |
| "learning_rate": 4.79367202812661e-06, |
| "loss": 1.284, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9810318664643399, |
| "grad_norm": 0.5361410975456238, |
| "learning_rate": 4.792437284445545e-06, |
| "loss": 1.2517, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.9833080424886191, |
| "grad_norm": 0.5160269141197205, |
| "learning_rate": 4.791199017186137e-06, |
| "loss": 1.2422, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9855842185128983, |
| "grad_norm": 0.5418286919593811, |
| "learning_rate": 4.7899572282516596e-06, |
| "loss": 1.2697, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.9878603945371776, |
| "grad_norm": 0.5236756801605225, |
| "learning_rate": 4.788711919550796e-06, |
| "loss": 1.2546, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.9901365705614568, |
| "grad_norm": 0.4919045567512512, |
| "learning_rate": 4.787463092997643e-06, |
| "loss": 1.2478, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.992412746585736, |
| "grad_norm": 0.4918051064014435, |
| "learning_rate": 4.786210750511701e-06, |
| "loss": 1.2522, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.9946889226100152, |
| "grad_norm": 0.5032536387443542, |
| "learning_rate": 4.784954894017878e-06, |
| "loss": 1.2924, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.9969650986342944, |
| "grad_norm": 0.5253746509552002, |
| "learning_rate": 4.78369552544648e-06, |
| "loss": 1.258, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.9992412746585736, |
| "grad_norm": 0.5097838044166565, |
| "learning_rate": 4.782432646733214e-06, |
| "loss": 1.2479, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5097838044166565, |
| "learning_rate": 4.781166259819179e-06, |
| "loss": 1.2895, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.0022761760242793, |
| "grad_norm": 1.0558606386184692, |
| "learning_rate": 4.77989636665087e-06, |
| "loss": 1.2707, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.0045523520485584, |
| "grad_norm": 0.47916215658187866, |
| "learning_rate": 4.778622969180167e-06, |
| "loss": 1.2364, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.0068285280728377, |
| "grad_norm": 0.5158357620239258, |
| "learning_rate": 4.777346069364343e-06, |
| "loss": 1.2421, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.0091047040971168, |
| "grad_norm": 0.4970231354236603, |
| "learning_rate": 4.776065669166045e-06, |
| "loss": 1.2534, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.0113808801213962, |
| "grad_norm": 0.529381513595581, |
| "learning_rate": 4.774781770553309e-06, |
| "loss": 1.2429, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.0136570561456753, |
| "grad_norm": 0.5027406811714172, |
| "learning_rate": 4.773494375499543e-06, |
| "loss": 1.2427, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.0159332321699546, |
| "grad_norm": 0.5164632797241211, |
| "learning_rate": 4.772203485983531e-06, |
| "loss": 1.273, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.0182094081942337, |
| "grad_norm": 0.5203757882118225, |
| "learning_rate": 4.770909103989426e-06, |
| "loss": 1.2261, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.020485584218513, |
| "grad_norm": 0.518552839756012, |
| "learning_rate": 4.769611231506753e-06, |
| "loss": 1.2404, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.022761760242792, |
| "grad_norm": 0.5020595788955688, |
| "learning_rate": 4.7683098705303995e-06, |
| "loss": 1.2722, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.0250379362670714, |
| "grad_norm": 0.508852481842041, |
| "learning_rate": 4.767005023060615e-06, |
| "loss": 1.2344, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.0273141122913505, |
| "grad_norm": 0.5240857005119324, |
| "learning_rate": 4.765696691103008e-06, |
| "loss": 1.2553, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.0295902883156298, |
| "grad_norm": 0.5548052787780762, |
| "learning_rate": 4.764384876668542e-06, |
| "loss": 1.3039, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.031866464339909, |
| "grad_norm": 0.5021058917045593, |
| "learning_rate": 4.763069581773537e-06, |
| "loss": 1.2636, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.0341426403641882, |
| "grad_norm": 0.5170218348503113, |
| "learning_rate": 4.761750808439658e-06, |
| "loss": 1.2584, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.0364188163884673, |
| "grad_norm": 0.5254265069961548, |
| "learning_rate": 4.760428558693919e-06, |
| "loss": 1.2578, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.0386949924127467, |
| "grad_norm": 0.5046964883804321, |
| "learning_rate": 4.7591028345686765e-06, |
| "loss": 1.253, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.0409711684370258, |
| "grad_norm": 0.5212562084197998, |
| "learning_rate": 4.757773638101629e-06, |
| "loss": 1.2453, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.043247344461305, |
| "grad_norm": 0.5397632718086243, |
| "learning_rate": 4.7564409713358075e-06, |
| "loss": 1.2612, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.0455235204855842, |
| "grad_norm": 0.5086544752120972, |
| "learning_rate": 4.755104836319583e-06, |
| "loss": 1.27, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.0477996965098635, |
| "grad_norm": 0.4974862337112427, |
| "learning_rate": 4.7537652351066545e-06, |
| "loss": 1.1955, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.0500758725341426, |
| "grad_norm": 0.5382196307182312, |
| "learning_rate": 4.752422169756048e-06, |
| "loss": 1.2996, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.052352048558422, |
| "grad_norm": 0.5093661546707153, |
| "learning_rate": 4.751075642332116e-06, |
| "loss": 1.2671, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.054628224582701, |
| "grad_norm": 0.53044593334198, |
| "learning_rate": 4.749725654904529e-06, |
| "loss": 1.2572, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.0569044006069803, |
| "grad_norm": 0.5372816920280457, |
| "learning_rate": 4.74837220954828e-06, |
| "loss": 1.2215, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.0591805766312594, |
| "grad_norm": 0.5148317217826843, |
| "learning_rate": 4.747015308343673e-06, |
| "loss": 1.2636, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.0614567526555387, |
| "grad_norm": 0.5267722010612488, |
| "learning_rate": 4.745654953376327e-06, |
| "loss": 1.2786, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.0637329286798178, |
| "grad_norm": 0.5123690366744995, |
| "learning_rate": 4.744291146737169e-06, |
| "loss": 1.2217, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.0660091047040972, |
| "grad_norm": 0.5397908687591553, |
| "learning_rate": 4.74292389052243e-06, |
| "loss": 1.2353, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.0682852807283763, |
| "grad_norm": 0.5311163067817688, |
| "learning_rate": 4.741553186833642e-06, |
| "loss": 1.2307, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.0705614567526556, |
| "grad_norm": 0.5108172297477722, |
| "learning_rate": 4.740179037777639e-06, |
| "loss": 1.2526, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.0728376327769347, |
| "grad_norm": 0.5670639276504517, |
| "learning_rate": 4.7388014454665495e-06, |
| "loss": 1.214, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.075113808801214, |
| "grad_norm": 0.5621855854988098, |
| "learning_rate": 4.737420412017795e-06, |
| "loss": 1.2202, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.077389984825493, |
| "grad_norm": 0.5175919532775879, |
| "learning_rate": 4.736035939554084e-06, |
| "loss": 1.2295, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.0796661608497724, |
| "grad_norm": 0.510009765625, |
| "learning_rate": 4.7346480302034144e-06, |
| "loss": 1.2489, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.0819423368740515, |
| "grad_norm": 0.5198955535888672, |
| "learning_rate": 4.733256686099063e-06, |
| "loss": 1.2148, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.0842185128983308, |
| "grad_norm": 0.5157918334007263, |
| "learning_rate": 4.731861909379588e-06, |
| "loss": 1.2858, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.08649468892261, |
| "grad_norm": 0.5016840100288391, |
| "learning_rate": 4.730463702188824e-06, |
| "loss": 1.2137, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.0887708649468892, |
| "grad_norm": 0.5427749156951904, |
| "learning_rate": 4.729062066675877e-06, |
| "loss": 1.2616, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.0910470409711683, |
| "grad_norm": 0.5368303656578064, |
| "learning_rate": 4.727657004995124e-06, |
| "loss": 1.22, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.0933232169954477, |
| "grad_norm": 0.5127097964286804, |
| "learning_rate": 4.726248519306208e-06, |
| "loss": 1.1953, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.095599393019727, |
| "grad_norm": 0.5109656453132629, |
| "learning_rate": 4.724836611774032e-06, |
| "loss": 1.2483, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.097875569044006, |
| "grad_norm": 0.5445286631584167, |
| "learning_rate": 4.723421284568764e-06, |
| "loss": 1.242, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.1001517450682852, |
| "grad_norm": 0.5462026000022888, |
| "learning_rate": 4.722002539865823e-06, |
| "loss": 1.2475, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.1024279210925645, |
| "grad_norm": 0.5589436292648315, |
| "learning_rate": 4.720580379845884e-06, |
| "loss": 1.2511, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.1047040971168438, |
| "grad_norm": 0.5450273752212524, |
| "learning_rate": 4.719154806694869e-06, |
| "loss": 1.2843, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.106980273141123, |
| "grad_norm": 0.5322884321212769, |
| "learning_rate": 4.717725822603948e-06, |
| "loss": 1.2159, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.1092564491654022, |
| "grad_norm": 0.5098543763160706, |
| "learning_rate": 4.716293429769534e-06, |
| "loss": 1.2818, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.1115326251896813, |
| "grad_norm": 0.5248117446899414, |
| "learning_rate": 4.7148576303932784e-06, |
| "loss": 1.2497, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.1138088012139606, |
| "grad_norm": 0.5317633748054504, |
| "learning_rate": 4.7134184266820675e-06, |
| "loss": 1.2174, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.1160849772382397, |
| "grad_norm": 0.5104670524597168, |
| "learning_rate": 4.711975820848024e-06, |
| "loss": 1.2492, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.118361153262519, |
| "grad_norm": 0.5210446715354919, |
| "learning_rate": 4.710529815108496e-06, |
| "loss": 1.2478, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.1206373292867982, |
| "grad_norm": 0.5357753038406372, |
| "learning_rate": 4.7090804116860574e-06, |
| "loss": 1.2533, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.1229135053110775, |
| "grad_norm": 0.5544043779373169, |
| "learning_rate": 4.707627612808509e-06, |
| "loss": 1.2315, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.1251896813353566, |
| "grad_norm": 0.5387628674507141, |
| "learning_rate": 4.706171420708866e-06, |
| "loss": 1.2492, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.127465857359636, |
| "grad_norm": 0.5289620757102966, |
| "learning_rate": 4.704711837625361e-06, |
| "loss": 1.1865, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.129742033383915, |
| "grad_norm": 0.5673317909240723, |
| "learning_rate": 4.703248865801436e-06, |
| "loss": 1.1963, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.1320182094081943, |
| "grad_norm": 0.5180116295814514, |
| "learning_rate": 4.701782507485747e-06, |
| "loss": 1.2431, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.1342943854324734, |
| "grad_norm": 0.5326710343360901, |
| "learning_rate": 4.700312764932151e-06, |
| "loss": 1.2543, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.1365705614567527, |
| "grad_norm": 0.536686360836029, |
| "learning_rate": 4.698839640399707e-06, |
| "loss": 1.2664, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.1388467374810318, |
| "grad_norm": 0.5708869695663452, |
| "learning_rate": 4.6973631361526745e-06, |
| "loss": 1.2445, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.1411229135053111, |
| "grad_norm": 0.5445765852928162, |
| "learning_rate": 4.695883254460505e-06, |
| "loss": 1.2111, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.1433990895295902, |
| "grad_norm": 0.5529754161834717, |
| "learning_rate": 4.6943999975978445e-06, |
| "loss": 1.2346, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.1456752655538696, |
| "grad_norm": 0.5409250855445862, |
| "learning_rate": 4.692913367844523e-06, |
| "loss": 1.2338, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.1479514415781487, |
| "grad_norm": 0.5459516644477844, |
| "learning_rate": 4.691423367485558e-06, |
| "loss": 1.2487, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.150227617602428, |
| "grad_norm": 0.5377400517463684, |
| "learning_rate": 4.689929998811145e-06, |
| "loss": 1.2719, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.152503793626707, |
| "grad_norm": 0.5768429636955261, |
| "learning_rate": 4.68843326411666e-06, |
| "loss": 1.2106, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.1547799696509864, |
| "grad_norm": 0.5586393475532532, |
| "learning_rate": 4.686933165702651e-06, |
| "loss": 1.2469, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.1570561456752655, |
| "grad_norm": 0.5209569334983826, |
| "learning_rate": 4.685429705874834e-06, |
| "loss": 1.2453, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.1593323216995448, |
| "grad_norm": 0.5145371556282043, |
| "learning_rate": 4.6839228869440965e-06, |
| "loss": 1.2484, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.161608497723824, |
| "grad_norm": 0.5463981032371521, |
| "learning_rate": 4.682412711226485e-06, |
| "loss": 1.2691, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.1638846737481032, |
| "grad_norm": 0.5128470659255981, |
| "learning_rate": 4.680899181043206e-06, |
| "loss": 1.2579, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.1661608497723823, |
| "grad_norm": 0.5277767777442932, |
| "learning_rate": 4.679382298720625e-06, |
| "loss": 1.2247, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.1684370257966616, |
| "grad_norm": 0.5547785758972168, |
| "learning_rate": 4.6778620665902566e-06, |
| "loss": 1.2492, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.1707132018209407, |
| "grad_norm": 0.5689957737922668, |
| "learning_rate": 4.676338486988765e-06, |
| "loss": 1.2384, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.17298937784522, |
| "grad_norm": 0.5139868259429932, |
| "learning_rate": 4.674811562257961e-06, |
| "loss": 1.2562, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.1752655538694992, |
| "grad_norm": 0.5729711055755615, |
| "learning_rate": 4.673281294744796e-06, |
| "loss": 1.2833, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.1775417298937785, |
| "grad_norm": 0.5735371708869934, |
| "learning_rate": 4.671747686801358e-06, |
| "loss": 1.2481, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.1798179059180576, |
| "grad_norm": 0.5259848833084106, |
| "learning_rate": 4.670210740784872e-06, |
| "loss": 1.2496, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.182094081942337, |
| "grad_norm": 0.5374155640602112, |
| "learning_rate": 4.668670459057693e-06, |
| "loss": 1.2484, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.184370257966616, |
| "grad_norm": 0.5365428328514099, |
| "learning_rate": 4.667126843987301e-06, |
| "loss": 1.2651, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.1866464339908953, |
| "grad_norm": 0.5263276100158691, |
| "learning_rate": 4.665579897946303e-06, |
| "loss": 1.19, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.1889226100151746, |
| "grad_norm": 0.5412886142730713, |
| "learning_rate": 4.664029623312422e-06, |
| "loss": 1.2551, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.1911987860394537, |
| "grad_norm": 0.5376629829406738, |
| "learning_rate": 4.662476022468503e-06, |
| "loss": 1.2541, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.1934749620637328, |
| "grad_norm": 0.5543259382247925, |
| "learning_rate": 4.660919097802495e-06, |
| "loss": 1.2745, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.1957511380880121, |
| "grad_norm": 0.5453343987464905, |
| "learning_rate": 4.659358851707464e-06, |
| "loss": 1.238, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.1980273141122915, |
| "grad_norm": 0.5588712692260742, |
| "learning_rate": 4.657795286581576e-06, |
| "loss": 1.1767, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.2003034901365706, |
| "grad_norm": 0.5432548522949219, |
| "learning_rate": 4.656228404828102e-06, |
| "loss": 1.2243, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.2025796661608497, |
| "grad_norm": 0.5616108179092407, |
| "learning_rate": 4.654658208855408e-06, |
| "loss": 1.1937, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.204855842185129, |
| "grad_norm": 0.5578548908233643, |
| "learning_rate": 4.653084701076955e-06, |
| "loss": 1.2454, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.2071320182094083, |
| "grad_norm": 0.5913681983947754, |
| "learning_rate": 4.651507883911296e-06, |
| "loss": 1.2717, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.2094081942336874, |
| "grad_norm": 0.5625573992729187, |
| "learning_rate": 4.649927759782068e-06, |
| "loss": 1.2619, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.2116843702579665, |
| "grad_norm": 0.5766717195510864, |
| "learning_rate": 4.648344331117992e-06, |
| "loss": 1.2748, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.2139605462822458, |
| "grad_norm": 0.529719889163971, |
| "learning_rate": 4.64675760035287e-06, |
| "loss": 1.2443, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.2162367223065251, |
| "grad_norm": 0.5937225222587585, |
| "learning_rate": 4.645167569925577e-06, |
| "loss": 1.253, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.2185128983308042, |
| "grad_norm": 0.6403617262840271, |
| "learning_rate": 4.64357424228006e-06, |
| "loss": 1.1932, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.2207890743550835, |
| "grad_norm": 0.5702269077301025, |
| "learning_rate": 4.6419776198653365e-06, |
| "loss": 1.2498, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.2230652503793626, |
| "grad_norm": 0.5545888543128967, |
| "learning_rate": 4.640377705135485e-06, |
| "loss": 1.2517, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.225341426403642, |
| "grad_norm": 0.5598457455635071, |
| "learning_rate": 4.638774500549645e-06, |
| "loss": 1.2503, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.227617602427921, |
| "grad_norm": 0.5853296518325806, |
| "learning_rate": 4.637168008572016e-06, |
| "loss": 1.2418, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.2298937784522004, |
| "grad_norm": 0.5423877239227295, |
| "learning_rate": 4.635558231671846e-06, |
| "loss": 1.2295, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.2321699544764795, |
| "grad_norm": 0.5638657808303833, |
| "learning_rate": 4.633945172323434e-06, |
| "loss": 1.2934, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.2344461305007588, |
| "grad_norm": 0.5612449645996094, |
| "learning_rate": 4.6323288330061244e-06, |
| "loss": 1.2624, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.236722306525038, |
| "grad_norm": 0.5534572601318359, |
| "learning_rate": 4.630709216204303e-06, |
| "loss": 1.2488, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.2389984825493172, |
| "grad_norm": 0.5525970458984375, |
| "learning_rate": 4.629086324407393e-06, |
| "loss": 1.231, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.2412746585735963, |
| "grad_norm": 0.5725768804550171, |
| "learning_rate": 4.6274601601098505e-06, |
| "loss": 1.2959, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.2435508345978756, |
| "grad_norm": 0.582775354385376, |
| "learning_rate": 4.625830725811164e-06, |
| "loss": 1.2554, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.2458270106221547, |
| "grad_norm": 0.5522809028625488, |
| "learning_rate": 4.624198024015845e-06, |
| "loss": 1.2487, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.248103186646434, |
| "grad_norm": 0.5601561069488525, |
| "learning_rate": 4.622562057233431e-06, |
| "loss": 1.2489, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.2503793626707131, |
| "grad_norm": 0.5581909418106079, |
| "learning_rate": 4.620922827978475e-06, |
| "loss": 1.205, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.2526555386949925, |
| "grad_norm": 0.5560769438743591, |
| "learning_rate": 4.619280338770545e-06, |
| "loss": 1.2253, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.2549317147192716, |
| "grad_norm": 0.5541017651557922, |
| "learning_rate": 4.617634592134221e-06, |
| "loss": 1.2476, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.2572078907435509, |
| "grad_norm": 0.5714686512947083, |
| "learning_rate": 4.615985590599088e-06, |
| "loss": 1.2274, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.25948406676783, |
| "grad_norm": 0.5909372568130493, |
| "learning_rate": 4.6143333366997354e-06, |
| "loss": 1.2481, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.2617602427921093, |
| "grad_norm": 0.5704237818717957, |
| "learning_rate": 4.612677832975751e-06, |
| "loss": 1.2607, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.2640364188163884, |
| "grad_norm": 0.5494899749755859, |
| "learning_rate": 4.611019081971719e-06, |
| "loss": 1.2171, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.2663125948406677, |
| "grad_norm": 0.5628857612609863, |
| "learning_rate": 4.609357086237213e-06, |
| "loss": 1.2185, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.2685887708649468, |
| "grad_norm": 0.5746468305587769, |
| "learning_rate": 4.607691848326793e-06, |
| "loss": 1.2485, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.2708649468892261, |
| "grad_norm": 0.5731273889541626, |
| "learning_rate": 4.606023370800006e-06, |
| "loss": 1.2302, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.2731411229135052, |
| "grad_norm": 0.5782604217529297, |
| "learning_rate": 4.604351656221374e-06, |
| "loss": 1.2281, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.2754172989377845, |
| "grad_norm": 0.5706422328948975, |
| "learning_rate": 4.6026767071604e-06, |
| "loss": 1.2145, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.2776934749620636, |
| "grad_norm": 0.5888031125068665, |
| "learning_rate": 4.6009985261915536e-06, |
| "loss": 1.1982, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.279969650986343, |
| "grad_norm": 0.543771505355835, |
| "learning_rate": 4.599317115894273e-06, |
| "loss": 1.2439, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.2822458270106223, |
| "grad_norm": 0.5837553143501282, |
| "learning_rate": 4.597632478852963e-06, |
| "loss": 1.22, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.2845220030349014, |
| "grad_norm": 0.5469195246696472, |
| "learning_rate": 4.595944617656984e-06, |
| "loss": 1.2161, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.2867981790591805, |
| "grad_norm": 0.5544828772544861, |
| "learning_rate": 4.594253534900656e-06, |
| "loss": 1.22, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.2890743550834598, |
| "grad_norm": 0.5594440698623657, |
| "learning_rate": 4.592559233183246e-06, |
| "loss": 1.2088, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.2913505311077391, |
| "grad_norm": 0.541545569896698, |
| "learning_rate": 4.590861715108972e-06, |
| "loss": 1.2185, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.2936267071320182, |
| "grad_norm": 0.5520378947257996, |
| "learning_rate": 4.5891609832869964e-06, |
| "loss": 1.2268, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.2959028831562973, |
| "grad_norm": 0.5583465695381165, |
| "learning_rate": 4.587457040331419e-06, |
| "loss": 1.2225, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.2981790591805766, |
| "grad_norm": 0.5398393869400024, |
| "learning_rate": 4.5857498888612755e-06, |
| "loss": 1.2479, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.300455235204856, |
| "grad_norm": 0.5736100673675537, |
| "learning_rate": 4.584039531500535e-06, |
| "loss": 1.2572, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.302731411229135, |
| "grad_norm": 0.5614636540412903, |
| "learning_rate": 4.582325970878092e-06, |
| "loss": 1.2221, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.3050075872534141, |
| "grad_norm": 0.5580296516418457, |
| "learning_rate": 4.580609209627766e-06, |
| "loss": 1.232, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.3072837632776935, |
| "grad_norm": 0.5606446266174316, |
| "learning_rate": 4.578889250388296e-06, |
| "loss": 1.2214, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.3095599393019728, |
| "grad_norm": 0.5508303642272949, |
| "learning_rate": 4.577166095803336e-06, |
| "loss": 1.244, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.3118361153262519, |
| "grad_norm": 0.557896614074707, |
| "learning_rate": 4.5754397485214505e-06, |
| "loss": 1.2668, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.314112291350531, |
| "grad_norm": 0.5473496317863464, |
| "learning_rate": 4.573710211196113e-06, |
| "loss": 1.2265, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.3163884673748103, |
| "grad_norm": 0.5576569437980652, |
| "learning_rate": 4.5719774864857e-06, |
| "loss": 1.2626, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.3186646433990896, |
| "grad_norm": 0.5799663662910461, |
| "learning_rate": 4.570241577053486e-06, |
| "loss": 1.2573, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.3209408194233687, |
| "grad_norm": 0.555438756942749, |
| "learning_rate": 4.568502485567641e-06, |
| "loss": 1.2775, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.3232169954476478, |
| "grad_norm": 0.5486553907394409, |
| "learning_rate": 4.566760214701227e-06, |
| "loss": 1.2588, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.3254931714719271, |
| "grad_norm": 0.5853822231292725, |
| "learning_rate": 4.565014767132191e-06, |
| "loss": 1.2185, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.3277693474962065, |
| "grad_norm": 0.569977879524231, |
| "learning_rate": 4.563266145543364e-06, |
| "loss": 1.2387, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.3300455235204856, |
| "grad_norm": 0.5845345258712769, |
| "learning_rate": 4.5615143526224555e-06, |
| "loss": 1.2935, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.3323216995447649, |
| "grad_norm": 0.5513466000556946, |
| "learning_rate": 4.559759391062051e-06, |
| "loss": 1.2347, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.334597875569044, |
| "grad_norm": 0.5497938990592957, |
| "learning_rate": 4.558001263559602e-06, |
| "loss": 1.2266, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.3368740515933233, |
| "grad_norm": 0.5504549145698547, |
| "learning_rate": 4.556239972817429e-06, |
| "loss": 1.2535, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.3391502276176024, |
| "grad_norm": 0.5670903325080872, |
| "learning_rate": 4.5544755215427175e-06, |
| "loss": 1.261, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.3414264036418817, |
| "grad_norm": 0.5838532447814941, |
| "learning_rate": 4.552707912447504e-06, |
| "loss": 1.2487, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.3437025796661608, |
| "grad_norm": 0.5291898250579834, |
| "learning_rate": 4.550937148248685e-06, |
| "loss": 1.2528, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.3459787556904401, |
| "grad_norm": 0.5700204968452454, |
| "learning_rate": 4.549163231668004e-06, |
| "loss": 1.2657, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.3482549317147192, |
| "grad_norm": 0.5522517561912537, |
| "learning_rate": 4.547386165432048e-06, |
| "loss": 1.2542, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.3505311077389985, |
| "grad_norm": 0.5714395046234131, |
| "learning_rate": 4.545605952272249e-06, |
| "loss": 1.2343, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.3528072837632776, |
| "grad_norm": 0.5690736174583435, |
| "learning_rate": 4.543822594924874e-06, |
| "loss": 1.2462, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.355083459787557, |
| "grad_norm": 0.5521000027656555, |
| "learning_rate": 4.54203609613102e-06, |
| "loss": 1.2512, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.357359635811836, |
| "grad_norm": 0.5685454607009888, |
| "learning_rate": 4.540246458636619e-06, |
| "loss": 1.2296, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.3596358118361154, |
| "grad_norm": 0.5521453022956848, |
| "learning_rate": 4.538453685192421e-06, |
| "loss": 1.2533, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.3619119878603945, |
| "grad_norm": 0.545840322971344, |
| "learning_rate": 4.536657778554e-06, |
| "loss": 1.2456, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.3641881638846738, |
| "grad_norm": 0.5703026056289673, |
| "learning_rate": 4.534858741481745e-06, |
| "loss": 1.2293, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.3664643399089529, |
| "grad_norm": 0.5508074760437012, |
| "learning_rate": 4.5330565767408555e-06, |
| "loss": 1.2657, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.3687405159332322, |
| "grad_norm": 0.5637306571006775, |
| "learning_rate": 4.531251287101338e-06, |
| "loss": 1.2199, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.3710166919575113, |
| "grad_norm": 0.5585516095161438, |
| "learning_rate": 4.529442875338005e-06, |
| "loss": 1.2331, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.3732928679817906, |
| "grad_norm": 0.5738129019737244, |
| "learning_rate": 4.527631344230466e-06, |
| "loss": 1.215, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.37556904400607, |
| "grad_norm": 0.5905203223228455, |
| "learning_rate": 4.525816696563123e-06, |
| "loss": 1.2322, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.377845220030349, |
| "grad_norm": 0.5772601366043091, |
| "learning_rate": 4.523998935125173e-06, |
| "loss": 1.2344, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.3801213960546281, |
| "grad_norm": 0.6194104552268982, |
| "learning_rate": 4.5221780627105945e-06, |
| "loss": 1.2647, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.3823975720789075, |
| "grad_norm": 0.5779480934143066, |
| "learning_rate": 4.520354082118151e-06, |
| "loss": 1.2148, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.3846737481031868, |
| "grad_norm": 0.5630953907966614, |
| "learning_rate": 4.518526996151381e-06, |
| "loss": 1.2647, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.3869499241274659, |
| "grad_norm": 0.5726267099380493, |
| "learning_rate": 4.516696807618598e-06, |
| "loss": 1.2741, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.389226100151745, |
| "grad_norm": 0.5838750600814819, |
| "learning_rate": 4.514863519332882e-06, |
| "loss": 1.1919, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.3915022761760243, |
| "grad_norm": 0.5766186714172363, |
| "learning_rate": 4.5130271341120805e-06, |
| "loss": 1.2359, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.3937784522003036, |
| "grad_norm": 0.5568646192550659, |
| "learning_rate": 4.511187654778798e-06, |
| "loss": 1.2107, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.3960546282245827, |
| "grad_norm": 0.5602480173110962, |
| "learning_rate": 4.509345084160397e-06, |
| "loss": 1.2276, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.3983308042488618, |
| "grad_norm": 0.5605113506317139, |
| "learning_rate": 4.507499425088991e-06, |
| "loss": 1.2259, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.4006069802731411, |
| "grad_norm": 0.5589579939842224, |
| "learning_rate": 4.505650680401441e-06, |
| "loss": 1.2212, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.4028831562974204, |
| "grad_norm": 0.5683750510215759, |
| "learning_rate": 4.503798852939347e-06, |
| "loss": 1.2313, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.4051593323216995, |
| "grad_norm": 0.5655199885368347, |
| "learning_rate": 4.501943945549054e-06, |
| "loss": 1.2199, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.4074355083459786, |
| "grad_norm": 0.5633233785629272, |
| "learning_rate": 4.500085961081635e-06, |
| "loss": 1.2305, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.409711684370258, |
| "grad_norm": 0.5716864466667175, |
| "learning_rate": 4.498224902392896e-06, |
| "loss": 1.2135, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.4119878603945373, |
| "grad_norm": 0.5524502992630005, |
| "learning_rate": 4.496360772343367e-06, |
| "loss": 1.221, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.4142640364188164, |
| "grad_norm": 0.5607890486717224, |
| "learning_rate": 4.494493573798299e-06, |
| "loss": 1.2243, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.4165402124430955, |
| "grad_norm": 0.5746079683303833, |
| "learning_rate": 4.49262330962766e-06, |
| "loss": 1.2064, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.4188163884673748, |
| "grad_norm": 0.5607832670211792, |
| "learning_rate": 4.490749982706128e-06, |
| "loss": 1.2248, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.421092564491654, |
| "grad_norm": 0.5688823461532593, |
| "learning_rate": 4.488873595913092e-06, |
| "loss": 1.232, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.4233687405159332, |
| "grad_norm": 0.5820784568786621, |
| "learning_rate": 4.48699415213264e-06, |
| "loss": 1.2485, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.4256449165402125, |
| "grad_norm": 0.56890869140625, |
| "learning_rate": 4.4851116542535625e-06, |
| "loss": 1.2286, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.4279210925644916, |
| "grad_norm": 0.6012819409370422, |
| "learning_rate": 4.483226105169341e-06, |
| "loss": 1.2343, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.430197268588771, |
| "grad_norm": 0.570756733417511, |
| "learning_rate": 4.481337507778151e-06, |
| "loss": 1.2447, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.43247344461305, |
| "grad_norm": 0.5640760660171509, |
| "learning_rate": 4.47944586498285e-06, |
| "loss": 1.2298, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.4347496206373294, |
| "grad_norm": 0.5836703777313232, |
| "learning_rate": 4.477551179690977e-06, |
| "loss": 1.2099, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.4370257966616085, |
| "grad_norm": 0.5838893055915833, |
| "learning_rate": 4.475653454814746e-06, |
| "loss": 1.2437, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.4393019726858878, |
| "grad_norm": 0.5973705053329468, |
| "learning_rate": 4.473752693271048e-06, |
| "loss": 1.2872, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.4415781487101669, |
| "grad_norm": 0.5992927551269531, |
| "learning_rate": 4.471848897981437e-06, |
| "loss": 1.2072, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.4438543247344462, |
| "grad_norm": 0.566234827041626, |
| "learning_rate": 4.46994207187213e-06, |
| "loss": 1.2181, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.4461305007587253, |
| "grad_norm": 0.5693137645721436, |
| "learning_rate": 4.4680322178740056e-06, |
| "loss": 1.1862, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.4484066767830046, |
| "grad_norm": 0.5798976421356201, |
| "learning_rate": 4.466119338922593e-06, |
| "loss": 1.2225, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.4506828528072837, |
| "grad_norm": 0.575389564037323, |
| "learning_rate": 4.464203437958075e-06, |
| "loss": 1.2257, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.452959028831563, |
| "grad_norm": 0.6053541302680969, |
| "learning_rate": 4.4622845179252735e-06, |
| "loss": 1.241, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.4552352048558421, |
| "grad_norm": 0.5716749429702759, |
| "learning_rate": 4.460362581773656e-06, |
| "loss": 1.2278, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.4575113808801214, |
| "grad_norm": 0.5863229036331177, |
| "learning_rate": 4.458437632457325e-06, |
| "loss": 1.2238, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.4597875569044005, |
| "grad_norm": 0.6117021441459656, |
| "learning_rate": 4.456509672935011e-06, |
| "loss": 1.2318, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.4620637329286799, |
| "grad_norm": 0.6031973361968994, |
| "learning_rate": 4.454578706170075e-06, |
| "loss": 1.2309, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.464339908952959, |
| "grad_norm": 0.6449349522590637, |
| "learning_rate": 4.4526447351304995e-06, |
| "loss": 1.2357, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.4666160849772383, |
| "grad_norm": 0.5698959231376648, |
| "learning_rate": 4.450707762788884e-06, |
| "loss": 1.2064, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.4688922610015174, |
| "grad_norm": 0.6145030856132507, |
| "learning_rate": 4.44876779212244e-06, |
| "loss": 1.1837, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.4711684370257967, |
| "grad_norm": 0.6202698349952698, |
| "learning_rate": 4.446824826112992e-06, |
| "loss": 1.2459, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.4734446130500758, |
| "grad_norm": 0.5868430137634277, |
| "learning_rate": 4.444878867746962e-06, |
| "loss": 1.1797, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.475720789074355, |
| "grad_norm": 0.6009106636047363, |
| "learning_rate": 4.442929920015377e-06, |
| "loss": 1.2008, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.4779969650986344, |
| "grad_norm": 0.6000754237174988, |
| "learning_rate": 4.440977985913856e-06, |
| "loss": 1.199, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.4802731411229135, |
| "grad_norm": 0.5801194310188293, |
| "learning_rate": 4.439023068442608e-06, |
| "loss": 1.2806, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.4825493171471926, |
| "grad_norm": 0.6096365451812744, |
| "learning_rate": 4.43706517060643e-06, |
| "loss": 1.2434, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.484825493171472, |
| "grad_norm": 0.6116917133331299, |
| "learning_rate": 4.435104295414697e-06, |
| "loss": 1.2262, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.4871016691957513, |
| "grad_norm": 0.5588528513908386, |
| "learning_rate": 4.4331404458813615e-06, |
| "loss": 1.2373, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.4893778452200304, |
| "grad_norm": 0.5834910869598389, |
| "learning_rate": 4.431173625024948e-06, |
| "loss": 1.2766, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.4916540212443095, |
| "grad_norm": 0.623333215713501, |
| "learning_rate": 4.429203835868549e-06, |
| "loss": 1.2375, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.4939301972685888, |
| "grad_norm": 0.6033525466918945, |
| "learning_rate": 4.427231081439817e-06, |
| "loss": 1.2, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.496206373292868, |
| "grad_norm": 0.5829868912696838, |
| "learning_rate": 4.4252553647709635e-06, |
| "loss": 1.2349, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.4984825493171472, |
| "grad_norm": 0.5703787803649902, |
| "learning_rate": 4.423276688898754e-06, |
| "loss": 1.2213, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.5007587253414263, |
| "grad_norm": 0.5715304017066956, |
| "learning_rate": 4.421295056864501e-06, |
| "loss": 1.2394, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.5030349013657056, |
| "grad_norm": 0.6249496340751648, |
| "learning_rate": 4.419310471714061e-06, |
| "loss": 1.2027, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.505311077389985, |
| "grad_norm": 0.5828440189361572, |
| "learning_rate": 4.417322936497831e-06, |
| "loss": 1.2442, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.507587253414264, |
| "grad_norm": 0.5692103505134583, |
| "learning_rate": 4.415332454270741e-06, |
| "loss": 1.1791, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.5098634294385431, |
| "grad_norm": 0.595786988735199, |
| "learning_rate": 4.41333902809225e-06, |
| "loss": 1.231, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.5121396054628224, |
| "grad_norm": 0.5955888032913208, |
| "learning_rate": 4.411342661026342e-06, |
| "loss": 1.2206, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.5144157814871018, |
| "grad_norm": 0.582911491394043, |
| "learning_rate": 4.409343356141525e-06, |
| "loss": 1.2169, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.5166919575113809, |
| "grad_norm": 0.585781455039978, |
| "learning_rate": 4.407341116510818e-06, |
| "loss": 1.2345, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.51896813353566, |
| "grad_norm": 0.5766403675079346, |
| "learning_rate": 4.405335945211754e-06, |
| "loss": 1.2307, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.5212443095599393, |
| "grad_norm": 0.5894457101821899, |
| "learning_rate": 4.4033278453263685e-06, |
| "loss": 1.2445, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.5235204855842186, |
| "grad_norm": 0.5737869143486023, |
| "learning_rate": 4.401316819941203e-06, |
| "loss": 1.2311, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.5257966616084977, |
| "grad_norm": 0.5908883213996887, |
| "learning_rate": 4.399302872147292e-06, |
| "loss": 1.2381, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.5280728376327768, |
| "grad_norm": 0.6145277619361877, |
| "learning_rate": 4.397286005040162e-06, |
| "loss": 1.2394, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.5303490136570561, |
| "grad_norm": 0.5731965899467468, |
| "learning_rate": 4.395266221719829e-06, |
| "loss": 1.2369, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.5326251896813354, |
| "grad_norm": 0.5849004983901978, |
| "learning_rate": 4.3932435252907914e-06, |
| "loss": 1.2308, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.5349013657056145, |
| "grad_norm": 0.5686678290367126, |
| "learning_rate": 4.391217918862021e-06, |
| "loss": 1.259, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.5371775417298936, |
| "grad_norm": 0.580635666847229, |
| "learning_rate": 4.389189405546966e-06, |
| "loss": 1.2359, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.539453717754173, |
| "grad_norm": 0.5722584128379822, |
| "learning_rate": 4.387157988463544e-06, |
| "loss": 1.231, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.5417298937784523, |
| "grad_norm": 0.5868629813194275, |
| "learning_rate": 4.38512367073413e-06, |
| "loss": 1.2363, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.5440060698027314, |
| "grad_norm": 0.5766255259513855, |
| "learning_rate": 4.383086455485564e-06, |
| "loss": 1.2556, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.5462822458270105, |
| "grad_norm": 0.5849782824516296, |
| "learning_rate": 4.381046345849136e-06, |
| "loss": 1.2189, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.5485584218512898, |
| "grad_norm": 0.6070932149887085, |
| "learning_rate": 4.379003344960585e-06, |
| "loss": 1.2351, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.550834597875569, |
| "grad_norm": 0.6085125803947449, |
| "learning_rate": 4.376957455960094e-06, |
| "loss": 1.2218, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.5531107738998484, |
| "grad_norm": 0.5707188844680786, |
| "learning_rate": 4.374908681992287e-06, |
| "loss": 1.2501, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.5553869499241275, |
| "grad_norm": 0.6099936366081238, |
| "learning_rate": 4.37285702620622e-06, |
| "loss": 1.2436, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.5576631259484066, |
| "grad_norm": 0.603273332118988, |
| "learning_rate": 4.37080249175538e-06, |
| "loss": 1.239, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.559939301972686, |
| "grad_norm": 0.5822923183441162, |
| "learning_rate": 4.368745081797678e-06, |
| "loss": 1.22, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.5622154779969653, |
| "grad_norm": 0.5922508835792542, |
| "learning_rate": 4.3666847994954445e-06, |
| "loss": 1.2138, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.5644916540212443, |
| "grad_norm": 0.585437536239624, |
| "learning_rate": 4.364621648015426e-06, |
| "loss": 1.207, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.5667678300455234, |
| "grad_norm": 0.5693568587303162, |
| "learning_rate": 4.362555630528776e-06, |
| "loss": 1.2036, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.5690440060698028, |
| "grad_norm": 0.5950521230697632, |
| "learning_rate": 4.360486750211059e-06, |
| "loss": 1.2682, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.571320182094082, |
| "grad_norm": 0.5919183492660522, |
| "learning_rate": 4.358415010242234e-06, |
| "loss": 1.2082, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.5735963581183612, |
| "grad_norm": 0.6143742203712463, |
| "learning_rate": 4.356340413806658e-06, |
| "loss": 1.1925, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.5758725341426403, |
| "grad_norm": 0.6028359532356262, |
| "learning_rate": 4.354262964093079e-06, |
| "loss": 1.2196, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.5781487101669196, |
| "grad_norm": 0.6061824560165405, |
| "learning_rate": 4.35218266429463e-06, |
| "loss": 1.2266, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.580424886191199, |
| "grad_norm": 0.6007355451583862, |
| "learning_rate": 4.3500995176088235e-06, |
| "loss": 1.2104, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.582701062215478, |
| "grad_norm": 0.6342191100120544, |
| "learning_rate": 4.348013527237549e-06, |
| "loss": 1.2197, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.5849772382397571, |
| "grad_norm": 0.5949456095695496, |
| "learning_rate": 4.345924696387067e-06, |
| "loss": 1.2258, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.5872534142640364, |
| "grad_norm": 0.6161270141601562, |
| "learning_rate": 4.343833028268004e-06, |
| "loss": 1.2299, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.5895295902883158, |
| "grad_norm": 0.5942959785461426, |
| "learning_rate": 4.341738526095348e-06, |
| "loss": 1.2594, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.5918057663125948, |
| "grad_norm": 0.5933099389076233, |
| "learning_rate": 4.339641193088439e-06, |
| "loss": 1.1932, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.594081942336874, |
| "grad_norm": 0.5857350826263428, |
| "learning_rate": 4.337541032470976e-06, |
| "loss": 1.3019, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.5963581183611533, |
| "grad_norm": 0.604029655456543, |
| "learning_rate": 4.335438047470996e-06, |
| "loss": 1.2227, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.5986342943854326, |
| "grad_norm": 0.5927514433860779, |
| "learning_rate": 4.333332241320882e-06, |
| "loss": 1.2742, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.6009104704097117, |
| "grad_norm": 0.5811514854431152, |
| "learning_rate": 4.331223617257351e-06, |
| "loss": 1.23, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.6031866464339908, |
| "grad_norm": 0.5948609709739685, |
| "learning_rate": 4.329112178521454e-06, |
| "loss": 1.2114, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.60546282245827, |
| "grad_norm": 0.6194981932640076, |
| "learning_rate": 4.326997928358565e-06, |
| "loss": 1.2439, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.6077389984825494, |
| "grad_norm": 0.5834797024726868, |
| "learning_rate": 4.324880870018382e-06, |
| "loss": 1.2269, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.6100151745068285, |
| "grad_norm": 0.5746902823448181, |
| "learning_rate": 4.322761006754916e-06, |
| "loss": 1.2175, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.6122913505311076, |
| "grad_norm": 0.6000075936317444, |
| "learning_rate": 4.320638341826494e-06, |
| "loss": 1.2316, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.614567526555387, |
| "grad_norm": 0.588010311126709, |
| "learning_rate": 4.318512878495745e-06, |
| "loss": 1.245, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.6168437025796663, |
| "grad_norm": 0.6053698658943176, |
| "learning_rate": 4.316384620029601e-06, |
| "loss": 1.228, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.6191198786039454, |
| "grad_norm": 0.5857113599777222, |
| "learning_rate": 4.314253569699292e-06, |
| "loss": 1.2511, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.6213960546282244, |
| "grad_norm": 0.5974637866020203, |
| "learning_rate": 4.312119730780334e-06, |
| "loss": 1.2377, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.6236722306525038, |
| "grad_norm": 0.5964690446853638, |
| "learning_rate": 4.309983106552535e-06, |
| "loss": 1.2307, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.625948406676783, |
| "grad_norm": 0.5781478881835938, |
| "learning_rate": 4.307843700299982e-06, |
| "loss": 1.2295, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.6282245827010622, |
| "grad_norm": 0.597053587436676, |
| "learning_rate": 4.305701515311037e-06, |
| "loss": 1.2085, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.6305007587253413, |
| "grad_norm": 0.6326000690460205, |
| "learning_rate": 4.303556554878333e-06, |
| "loss": 1.238, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.6327769347496206, |
| "grad_norm": 0.6087371706962585, |
| "learning_rate": 4.3014088222987714e-06, |
| "loss": 1.2275, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.6350531107739, |
| "grad_norm": 0.5937424898147583, |
| "learning_rate": 4.299258320873513e-06, |
| "loss": 1.2144, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.637329286798179, |
| "grad_norm": 0.5922595262527466, |
| "learning_rate": 4.297105053907973e-06, |
| "loss": 1.2078, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.6396054628224581, |
| "grad_norm": 0.603537380695343, |
| "learning_rate": 4.294949024711819e-06, |
| "loss": 1.2054, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.6418816388467374, |
| "grad_norm": 0.5896364450454712, |
| "learning_rate": 4.2927902365989645e-06, |
| "loss": 1.2038, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.6441578148710168, |
| "grad_norm": 0.614658534526825, |
| "learning_rate": 4.290628692887564e-06, |
| "loss": 1.2428, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.6464339908952959, |
| "grad_norm": 0.5901724100112915, |
| "learning_rate": 4.288464396900005e-06, |
| "loss": 1.2464, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.648710166919575, |
| "grad_norm": 0.6086544394493103, |
| "learning_rate": 4.286297351962908e-06, |
| "loss": 1.1895, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.6509863429438543, |
| "grad_norm": 0.5841042399406433, |
| "learning_rate": 4.284127561407118e-06, |
| "loss": 1.2222, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.6532625189681336, |
| "grad_norm": 0.5791555643081665, |
| "learning_rate": 4.281955028567698e-06, |
| "loss": 1.2489, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.655538694992413, |
| "grad_norm": 0.6219162344932556, |
| "learning_rate": 4.27977975678393e-06, |
| "loss": 1.2208, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.657814871016692, |
| "grad_norm": 0.597656786441803, |
| "learning_rate": 4.277601749399301e-06, |
| "loss": 1.2049, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.660091047040971, |
| "grad_norm": 0.5991064310073853, |
| "learning_rate": 4.27542100976151e-06, |
| "loss": 1.2602, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.6623672230652504, |
| "grad_norm": 0.5922961831092834, |
| "learning_rate": 4.273237541222447e-06, |
| "loss": 1.2077, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.6646433990895297, |
| "grad_norm": 0.6028023362159729, |
| "learning_rate": 4.2710513471382005e-06, |
| "loss": 1.2092, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.6669195751138088, |
| "grad_norm": 0.581685483455658, |
| "learning_rate": 4.268862430869052e-06, |
| "loss": 1.2192, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.669195751138088, |
| "grad_norm": 0.6332095265388489, |
| "learning_rate": 4.26667079577946e-06, |
| "loss": 1.2573, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.6714719271623673, |
| "grad_norm": 0.6062667369842529, |
| "learning_rate": 4.2644764452380675e-06, |
| "loss": 1.2994, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.6737481031866466, |
| "grad_norm": 0.5829861164093018, |
| "learning_rate": 4.262279382617687e-06, |
| "loss": 1.2286, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.6760242792109257, |
| "grad_norm": 0.587378203868866, |
| "learning_rate": 4.260079611295303e-06, |
| "loss": 1.182, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.6783004552352048, |
| "grad_norm": 0.6240544319152832, |
| "learning_rate": 4.257877134652062e-06, |
| "loss": 1.2543, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.680576631259484, |
| "grad_norm": 0.5865784287452698, |
| "learning_rate": 4.255671956073269e-06, |
| "loss": 1.2355, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.6828528072837634, |
| "grad_norm": 0.5847815871238708, |
| "learning_rate": 4.253464078948382e-06, |
| "loss": 1.2069, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.6851289833080425, |
| "grad_norm": 0.5941992402076721, |
| "learning_rate": 4.251253506671006e-06, |
| "loss": 1.2423, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.6874051593323216, |
| "grad_norm": 0.6245031952857971, |
| "learning_rate": 4.249040242638889e-06, |
| "loss": 1.2555, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.689681335356601, |
| "grad_norm": 0.6055291295051575, |
| "learning_rate": 4.246824290253917e-06, |
| "loss": 1.2261, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.6919575113808802, |
| "grad_norm": 0.5905616283416748, |
| "learning_rate": 4.244605652922108e-06, |
| "loss": 1.2385, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.6942336874051593, |
| "grad_norm": 0.5896965265274048, |
| "learning_rate": 4.2423843340536066e-06, |
| "loss": 1.1945, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.6965098634294384, |
| "grad_norm": 0.6129325032234192, |
| "learning_rate": 4.240160337062678e-06, |
| "loss": 1.223, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.6987860394537178, |
| "grad_norm": 0.5988030433654785, |
| "learning_rate": 4.237933665367705e-06, |
| "loss": 1.2197, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.701062215477997, |
| "grad_norm": 0.599388837814331, |
| "learning_rate": 4.235704322391181e-06, |
| "loss": 1.2214, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.7033383915022762, |
| "grad_norm": 0.6087759137153625, |
| "learning_rate": 4.233472311559708e-06, |
| "loss": 1.2302, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.7056145675265553, |
| "grad_norm": 0.5895616412162781, |
| "learning_rate": 4.231237636303982e-06, |
| "loss": 1.1976, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.7078907435508346, |
| "grad_norm": 0.6117663383483887, |
| "learning_rate": 4.229000300058802e-06, |
| "loss": 1.1928, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.710166919575114, |
| "grad_norm": 0.5945206880569458, |
| "learning_rate": 4.2267603062630526e-06, |
| "loss": 1.201, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.712443095599393, |
| "grad_norm": 0.6434623599052429, |
| "learning_rate": 4.224517658359704e-06, |
| "loss": 1.239, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.714719271623672, |
| "grad_norm": 0.5895166397094727, |
| "learning_rate": 4.222272359795806e-06, |
| "loss": 1.2305, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.7169954476479514, |
| "grad_norm": 0.6248841285705566, |
| "learning_rate": 4.220024414022482e-06, |
| "loss": 1.2332, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.7192716236722307, |
| "grad_norm": 0.6209638118743896, |
| "learning_rate": 4.217773824494926e-06, |
| "loss": 1.2773, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.7215477996965098, |
| "grad_norm": 0.5973532199859619, |
| "learning_rate": 4.215520594672394e-06, |
| "loss": 1.1992, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.723823975720789, |
| "grad_norm": 0.5936313271522522, |
| "learning_rate": 4.2132647280182e-06, |
| "loss": 1.2412, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.7261001517450683, |
| "grad_norm": 0.6053516268730164, |
| "learning_rate": 4.211006227999713e-06, |
| "loss": 1.2129, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.7283763277693476, |
| "grad_norm": 0.6065954566001892, |
| "learning_rate": 4.208745098088348e-06, |
| "loss": 1.2395, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.7306525037936267, |
| "grad_norm": 0.6134182214736938, |
| "learning_rate": 4.206481341759562e-06, |
| "loss": 1.1969, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.7329286798179058, |
| "grad_norm": 0.6103958487510681, |
| "learning_rate": 4.204214962492849e-06, |
| "loss": 1.2583, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.735204855842185, |
| "grad_norm": 0.6010955572128296, |
| "learning_rate": 4.201945963771736e-06, |
| "loss": 1.2638, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.7374810318664644, |
| "grad_norm": 0.6201740503311157, |
| "learning_rate": 4.199674349083776e-06, |
| "loss": 1.2491, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.7397572078907435, |
| "grad_norm": 0.6140694618225098, |
| "learning_rate": 4.197400121920539e-06, |
| "loss": 1.243, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.7420333839150226, |
| "grad_norm": 0.6441624164581299, |
| "learning_rate": 4.1951232857776164e-06, |
| "loss": 1.2614, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.744309559939302, |
| "grad_norm": 0.6050844192504883, |
| "learning_rate": 4.192843844154606e-06, |
| "loss": 1.1756, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.7465857359635812, |
| "grad_norm": 0.6491802930831909, |
| "learning_rate": 4.190561800555111e-06, |
| "loss": 1.2029, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.7488619119878603, |
| "grad_norm": 0.6259174942970276, |
| "learning_rate": 4.1882771584867345e-06, |
| "loss": 1.1912, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.7511380880121397, |
| "grad_norm": 0.5955666303634644, |
| "learning_rate": 4.1859899214610735e-06, |
| "loss": 1.2701, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.7534142640364188, |
| "grad_norm": 0.6060442924499512, |
| "learning_rate": 4.183700092993712e-06, |
| "loss": 1.2269, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.755690440060698, |
| "grad_norm": 0.6210846900939941, |
| "learning_rate": 4.1814076766042206e-06, |
| "loss": 1.2679, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.7579666160849774, |
| "grad_norm": 0.5922744870185852, |
| "learning_rate": 4.179112675816144e-06, |
| "loss": 1.2171, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.7602427921092565, |
| "grad_norm": 0.6048167943954468, |
| "learning_rate": 4.176815094157e-06, |
| "loss": 1.1887, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.7625189681335356, |
| "grad_norm": 0.6661959290504456, |
| "learning_rate": 4.174514935158277e-06, |
| "loss": 1.2439, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.764795144157815, |
| "grad_norm": 0.5862908959388733, |
| "learning_rate": 4.172212202355419e-06, |
| "loss": 1.2594, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.7670713201820942, |
| "grad_norm": 0.615178644657135, |
| "learning_rate": 4.16990689928783e-06, |
| "loss": 1.2137, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.7693474962063733, |
| "grad_norm": 0.6170365810394287, |
| "learning_rate": 4.167599029498865e-06, |
| "loss": 1.2278, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.7716236722306524, |
| "grad_norm": 0.6055428385734558, |
| "learning_rate": 4.165288596535821e-06, |
| "loss": 1.232, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.7738998482549317, |
| "grad_norm": 0.6081527471542358, |
| "learning_rate": 4.162975603949937e-06, |
| "loss": 1.2392, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.776176024279211, |
| "grad_norm": 0.6220976710319519, |
| "learning_rate": 4.160660055296385e-06, |
| "loss": 1.2467, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.7784522003034902, |
| "grad_norm": 0.5995768904685974, |
| "learning_rate": 4.158341954134268e-06, |
| "loss": 1.2141, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.7807283763277693, |
| "grad_norm": 0.5946653485298157, |
| "learning_rate": 4.15602130402661e-06, |
| "loss": 1.255, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.7830045523520486, |
| "grad_norm": 0.6094076633453369, |
| "learning_rate": 4.1536981085403546e-06, |
| "loss": 1.243, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.785280728376328, |
| "grad_norm": 0.6584082841873169, |
| "learning_rate": 4.151372371246356e-06, |
| "loss": 1.2382, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.787556904400607, |
| "grad_norm": 0.6139714121818542, |
| "learning_rate": 4.149044095719377e-06, |
| "loss": 1.2528, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.789833080424886, |
| "grad_norm": 0.6047011017799377, |
| "learning_rate": 4.14671328553808e-06, |
| "loss": 1.2034, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.7921092564491654, |
| "grad_norm": 0.6093196868896484, |
| "learning_rate": 4.144379944285024e-06, |
| "loss": 1.2669, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.7943854324734447, |
| "grad_norm": 0.6222574710845947, |
| "learning_rate": 4.142044075546658e-06, |
| "loss": 1.1817, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.7966616084977238, |
| "grad_norm": 0.6427398920059204, |
| "learning_rate": 4.13970568291332e-06, |
| "loss": 1.2165, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.798937784522003, |
| "grad_norm": 0.6227960586547852, |
| "learning_rate": 4.13736476997922e-06, |
| "loss": 1.1816, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.8012139605462822, |
| "grad_norm": 0.6001450419425964, |
| "learning_rate": 4.135021340342446e-06, |
| "loss": 1.2373, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.8034901365705616, |
| "grad_norm": 0.6028245091438293, |
| "learning_rate": 4.132675397604956e-06, |
| "loss": 1.2524, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.8057663125948407, |
| "grad_norm": 0.5959303379058838, |
| "learning_rate": 4.130326945372567e-06, |
| "loss": 1.198, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.8080424886191198, |
| "grad_norm": 0.6001620888710022, |
| "learning_rate": 4.127975987254955e-06, |
| "loss": 1.2137, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.810318664643399, |
| "grad_norm": 0.5951507091522217, |
| "learning_rate": 4.125622526865647e-06, |
| "loss": 1.2285, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.8125948406676784, |
| "grad_norm": 0.614658534526825, |
| "learning_rate": 4.123266567822017e-06, |
| "loss": 1.2119, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.8148710166919575, |
| "grad_norm": 0.6394176483154297, |
| "learning_rate": 4.120908113745281e-06, |
| "loss": 1.2444, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.8171471927162366, |
| "grad_norm": 0.5989351868629456, |
| "learning_rate": 4.118547168260485e-06, |
| "loss": 1.1838, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.819423368740516, |
| "grad_norm": 0.6235303282737732, |
| "learning_rate": 4.11618373499651e-06, |
| "loss": 1.2163, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.8216995447647952, |
| "grad_norm": 0.6402750015258789, |
| "learning_rate": 4.113817817586055e-06, |
| "loss": 1.2445, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.8239757207890743, |
| "grad_norm": 0.5973191857337952, |
| "learning_rate": 4.111449419665645e-06, |
| "loss": 1.2308, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.8262518968133534, |
| "grad_norm": 0.6300286650657654, |
| "learning_rate": 4.1090785448756096e-06, |
| "loss": 1.2319, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.8285280728376327, |
| "grad_norm": 0.5970984697341919, |
| "learning_rate": 4.1067051968600914e-06, |
| "loss": 1.1944, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.830804248861912, |
| "grad_norm": 0.607427179813385, |
| "learning_rate": 4.104329379267031e-06, |
| "loss": 1.2331, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.8330804248861912, |
| "grad_norm": 0.6165644526481628, |
| "learning_rate": 4.101951095748166e-06, |
| "loss": 1.2337, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.8353566009104703, |
| "grad_norm": 0.639166533946991, |
| "learning_rate": 4.099570349959025e-06, |
| "loss": 1.2263, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.8376327769347496, |
| "grad_norm": 0.6345863342285156, |
| "learning_rate": 4.097187145558919e-06, |
| "loss": 1.2397, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.839908952959029, |
| "grad_norm": 0.607635498046875, |
| "learning_rate": 4.094801486210941e-06, |
| "loss": 1.1972, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.842185128983308, |
| "grad_norm": 0.6224584579467773, |
| "learning_rate": 4.092413375581955e-06, |
| "loss": 1.231, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.844461305007587, |
| "grad_norm": 0.5929398536682129, |
| "learning_rate": 4.090022817342593e-06, |
| "loss": 1.2234, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.8467374810318664, |
| "grad_norm": 0.6391967535018921, |
| "learning_rate": 4.0876298151672525e-06, |
| "loss": 1.1931, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.8490136570561457, |
| "grad_norm": 0.599383533000946, |
| "learning_rate": 4.08523437273408e-06, |
| "loss": 1.2425, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.851289833080425, |
| "grad_norm": 0.5998767614364624, |
| "learning_rate": 4.082836493724981e-06, |
| "loss": 1.2188, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.8535660091047041, |
| "grad_norm": 0.5895645618438721, |
| "learning_rate": 4.080436181825601e-06, |
| "loss": 1.2286, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.8558421851289832, |
| "grad_norm": 0.6172052621841431, |
| "learning_rate": 4.078033440725327e-06, |
| "loss": 1.2007, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.8581183611532626, |
| "grad_norm": 0.613259494304657, |
| "learning_rate": 4.075628274117279e-06, |
| "loss": 1.2256, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.8603945371775419, |
| "grad_norm": 0.6026812791824341, |
| "learning_rate": 4.073220685698304e-06, |
| "loss": 1.2317, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.862670713201821, |
| "grad_norm": 0.6112560629844666, |
| "learning_rate": 4.070810679168975e-06, |
| "loss": 1.2275, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.8649468892261, |
| "grad_norm": 0.6044736504554749, |
| "learning_rate": 4.068398258233579e-06, |
| "loss": 1.2515, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.8672230652503794, |
| "grad_norm": 0.6291022896766663, |
| "learning_rate": 4.065983426600113e-06, |
| "loss": 1.2137, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.8694992412746587, |
| "grad_norm": 0.6136301755905151, |
| "learning_rate": 4.063566187980282e-06, |
| "loss": 1.2144, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.8717754172989378, |
| "grad_norm": 0.6166698932647705, |
| "learning_rate": 4.06114654608949e-06, |
| "loss": 1.2434, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.874051593323217, |
| "grad_norm": 0.6023617386817932, |
| "learning_rate": 4.058724504646834e-06, |
| "loss": 1.2186, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.8763277693474962, |
| "grad_norm": 0.6259661912918091, |
| "learning_rate": 4.0563000673751e-06, |
| "loss": 1.1989, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.8786039453717756, |
| "grad_norm": 0.6420421004295349, |
| "learning_rate": 4.053873238000756e-06, |
| "loss": 1.1981, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.8808801213960546, |
| "grad_norm": 0.6250731348991394, |
| "learning_rate": 4.051444020253947e-06, |
| "loss": 1.246, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.8831562974203337, |
| "grad_norm": 0.6473506689071655, |
| "learning_rate": 4.0490124178684884e-06, |
| "loss": 1.213, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.885432473444613, |
| "grad_norm": 0.6448357701301575, |
| "learning_rate": 4.046578434581862e-06, |
| "loss": 1.1696, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.8877086494688924, |
| "grad_norm": 0.6176803112030029, |
| "learning_rate": 4.044142074135209e-06, |
| "loss": 1.2453, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.8899848254931715, |
| "grad_norm": 0.6398005485534668, |
| "learning_rate": 4.0417033402733244e-06, |
| "loss": 1.2198, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.8922610015174506, |
| "grad_norm": 0.6350208520889282, |
| "learning_rate": 4.03926223674465e-06, |
| "loss": 1.2528, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.89453717754173, |
| "grad_norm": 0.5937830209732056, |
| "learning_rate": 4.03681876730127e-06, |
| "loss": 1.1594, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.8968133535660092, |
| "grad_norm": 0.6130216121673584, |
| "learning_rate": 4.034372935698908e-06, |
| "loss": 1.222, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.8990895295902883, |
| "grad_norm": 0.6638323664665222, |
| "learning_rate": 4.031924745696916e-06, |
| "loss": 1.2338, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.9013657056145674, |
| "grad_norm": 0.6491904258728027, |
| "learning_rate": 4.029474201058269e-06, |
| "loss": 1.2219, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.9036418816388467, |
| "grad_norm": 0.612301766872406, |
| "learning_rate": 4.027021305549565e-06, |
| "loss": 1.2663, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.905918057663126, |
| "grad_norm": 0.6025054454803467, |
| "learning_rate": 4.024566062941014e-06, |
| "loss": 1.2264, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.9081942336874052, |
| "grad_norm": 0.6344963312149048, |
| "learning_rate": 4.022108477006434e-06, |
| "loss": 1.1948, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.9104704097116842, |
| "grad_norm": 0.6077335476875305, |
| "learning_rate": 4.019648551523243e-06, |
| "loss": 1.2394, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.9127465857359636, |
| "grad_norm": 0.6338925361633301, |
| "learning_rate": 4.017186290272456e-06, |
| "loss": 1.2136, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.9150227617602429, |
| "grad_norm": 0.6291373372077942, |
| "learning_rate": 4.014721697038678e-06, |
| "loss": 1.2374, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.917298937784522, |
| "grad_norm": 0.6118108630180359, |
| "learning_rate": 4.0122547756101005e-06, |
| "loss": 1.2045, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.919575113808801, |
| "grad_norm": 0.6250407695770264, |
| "learning_rate": 4.009785529778489e-06, |
| "loss": 1.2349, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.9218512898330804, |
| "grad_norm": 0.6737698912620544, |
| "learning_rate": 4.007313963339188e-06, |
| "loss": 1.2334, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.9241274658573597, |
| "grad_norm": 0.649118959903717, |
| "learning_rate": 4.004840080091103e-06, |
| "loss": 1.1981, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.9264036418816388, |
| "grad_norm": 0.6312914490699768, |
| "learning_rate": 4.002363883836704e-06, |
| "loss": 1.2341, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.928679817905918, |
| "grad_norm": 0.6146298050880432, |
| "learning_rate": 3.999885378382013e-06, |
| "loss": 1.1925, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.9309559939301972, |
| "grad_norm": 0.6233289241790771, |
| "learning_rate": 3.997404567536606e-06, |
| "loss": 1.2407, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.9332321699544766, |
| "grad_norm": 0.6072235107421875, |
| "learning_rate": 3.994921455113598e-06, |
| "loss": 1.2033, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.9355083459787557, |
| "grad_norm": 0.6547655463218689, |
| "learning_rate": 3.992436044929645e-06, |
| "loss": 1.2368, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.9377845220030347, |
| "grad_norm": 0.6056034564971924, |
| "learning_rate": 3.989948340804932e-06, |
| "loss": 1.2212, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.940060698027314, |
| "grad_norm": 0.6160012483596802, |
| "learning_rate": 3.9874583465631725e-06, |
| "loss": 1.1944, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.9423368740515934, |
| "grad_norm": 0.641826868057251, |
| "learning_rate": 3.984966066031598e-06, |
| "loss": 1.2499, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.9446130500758725, |
| "grad_norm": 0.6412007808685303, |
| "learning_rate": 3.982471503040954e-06, |
| "loss": 1.2024, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.9468892261001516, |
| "grad_norm": 0.6296584606170654, |
| "learning_rate": 3.979974661425497e-06, |
| "loss": 1.1813, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.949165402124431, |
| "grad_norm": 0.6448803544044495, |
| "learning_rate": 3.977475545022983e-06, |
| "loss": 1.2672, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.9514415781487102, |
| "grad_norm": 0.6320902705192566, |
| "learning_rate": 3.9749741576746645e-06, |
| "loss": 1.196, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.9537177541729895, |
| "grad_norm": 0.6109302639961243, |
| "learning_rate": 3.972470503225285e-06, |
| "loss": 1.2277, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.9559939301972686, |
| "grad_norm": 0.6240274310112, |
| "learning_rate": 3.969964585523076e-06, |
| "loss": 1.2625, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.9582701062215477, |
| "grad_norm": 0.5958450436592102, |
| "learning_rate": 3.967456408419742e-06, |
| "loss": 1.2133, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.960546282245827, |
| "grad_norm": 0.6262888312339783, |
| "learning_rate": 3.964945975770464e-06, |
| "loss": 1.2238, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.9628224582701064, |
| "grad_norm": 0.6366564631462097, |
| "learning_rate": 3.962433291433889e-06, |
| "loss": 1.2372, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.9650986342943855, |
| "grad_norm": 0.6750831007957458, |
| "learning_rate": 3.959918359272125e-06, |
| "loss": 1.2409, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.9673748103186646, |
| "grad_norm": 0.5879358649253845, |
| "learning_rate": 3.957401183150734e-06, |
| "loss": 1.2122, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.9696509863429439, |
| "grad_norm": 0.6384773254394531, |
| "learning_rate": 3.9548817669387295e-06, |
| "loss": 1.2046, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.9719271623672232, |
| "grad_norm": 0.6435151100158691, |
| "learning_rate": 3.952360114508565e-06, |
| "loss": 1.2545, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.9742033383915023, |
| "grad_norm": 0.6609162092208862, |
| "learning_rate": 3.949836229736133e-06, |
| "loss": 1.2548, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.9764795144157814, |
| "grad_norm": 0.6402998566627502, |
| "learning_rate": 3.947310116500758e-06, |
| "loss": 1.2369, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.9787556904400607, |
| "grad_norm": 0.6171389222145081, |
| "learning_rate": 3.944781778685189e-06, |
| "loss": 1.1803, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.98103186646434, |
| "grad_norm": 0.6790279150009155, |
| "learning_rate": 3.9422512201755925e-06, |
| "loss": 1.2349, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.9833080424886191, |
| "grad_norm": 0.636738121509552, |
| "learning_rate": 3.93971844486155e-06, |
| "loss": 1.233, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.9855842185128982, |
| "grad_norm": 0.6281400918960571, |
| "learning_rate": 3.937183456636051e-06, |
| "loss": 1.1973, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.9878603945371776, |
| "grad_norm": 0.6086034774780273, |
| "learning_rate": 3.9346462593954845e-06, |
| "loss": 1.2017, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.9901365705614569, |
| "grad_norm": 0.6195533871650696, |
| "learning_rate": 3.932106857039637e-06, |
| "loss": 1.22, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.992412746585736, |
| "grad_norm": 0.6325448155403137, |
| "learning_rate": 3.929565253471681e-06, |
| "loss": 1.2081, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.994688922610015, |
| "grad_norm": 0.6466575860977173, |
| "learning_rate": 3.927021452598177e-06, |
| "loss": 1.2734, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.9969650986342944, |
| "grad_norm": 0.648371160030365, |
| "learning_rate": 3.924475458329059e-06, |
| "loss": 1.2018, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.9992412746585737, |
| "grad_norm": 0.6124558448791504, |
| "learning_rate": 3.921927274577633e-06, |
| "loss": 1.2244, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.6124558448791504, |
| "learning_rate": 3.919376905260575e-06, |
| "loss": 1.1772, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.0022761760242793, |
| "grad_norm": 1.1429736614227295, |
| "learning_rate": 3.916824354297911e-06, |
| "loss": 1.208, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.0045523520485586, |
| "grad_norm": 0.6282771229743958, |
| "learning_rate": 3.91426962561303e-06, |
| "loss": 1.196, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.0068285280728375, |
| "grad_norm": 0.6108458042144775, |
| "learning_rate": 3.911712723132661e-06, |
| "loss": 1.2091, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.009104704097117, |
| "grad_norm": 0.6176791787147522, |
| "learning_rate": 3.909153650786878e-06, |
| "loss": 1.1683, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.011380880121396, |
| "grad_norm": 0.6084854006767273, |
| "learning_rate": 3.9065924125090905e-06, |
| "loss": 1.1683, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.0136570561456755, |
| "grad_norm": 0.6014538407325745, |
| "learning_rate": 3.904029012236033e-06, |
| "loss": 1.2182, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.0159332321699543, |
| "grad_norm": 0.6618431210517883, |
| "learning_rate": 3.901463453907771e-06, |
| "loss": 1.2022, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.0182094081942337, |
| "grad_norm": 0.6439629197120667, |
| "learning_rate": 3.898895741467678e-06, |
| "loss": 1.1718, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.020485584218513, |
| "grad_norm": 0.6629829406738281, |
| "learning_rate": 3.8963258788624425e-06, |
| "loss": 1.2078, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.0227617602427923, |
| "grad_norm": 0.6440435647964478, |
| "learning_rate": 3.89375387004206e-06, |
| "loss": 1.226, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.025037936267071, |
| "grad_norm": 0.6640979647636414, |
| "learning_rate": 3.891179718959822e-06, |
| "loss": 1.2087, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.0273141122913505, |
| "grad_norm": 0.6583533883094788, |
| "learning_rate": 3.888603429572314e-06, |
| "loss": 1.2205, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.02959028831563, |
| "grad_norm": 0.6385941505432129, |
| "learning_rate": 3.886025005839406e-06, |
| "loss": 1.2329, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.031866464339909, |
| "grad_norm": 0.625807523727417, |
| "learning_rate": 3.883444451724251e-06, |
| "loss": 1.2047, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.034142640364188, |
| "grad_norm": 0.6311827301979065, |
| "learning_rate": 3.8808617711932776e-06, |
| "loss": 1.1932, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.0364188163884673, |
| "grad_norm": 0.6245951652526855, |
| "learning_rate": 3.878276968216178e-06, |
| "loss": 1.1699, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.0386949924127467, |
| "grad_norm": 0.6247482895851135, |
| "learning_rate": 3.875690046765912e-06, |
| "loss": 1.1894, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.040971168437026, |
| "grad_norm": 0.6321713924407959, |
| "learning_rate": 3.873101010818692e-06, |
| "loss": 1.1908, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.043247344461305, |
| "grad_norm": 0.6260454058647156, |
| "learning_rate": 3.8705098643539825e-06, |
| "loss": 1.1934, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.045523520485584, |
| "grad_norm": 0.6460039019584656, |
| "learning_rate": 3.867916611354489e-06, |
| "loss": 1.1728, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.0477996965098635, |
| "grad_norm": 0.6572577357292175, |
| "learning_rate": 3.865321255806161e-06, |
| "loss": 1.196, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.050075872534143, |
| "grad_norm": 0.6264122724533081, |
| "learning_rate": 3.8627238016981726e-06, |
| "loss": 1.1786, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.0523520485584217, |
| "grad_norm": 0.6537541747093201, |
| "learning_rate": 3.860124253022928e-06, |
| "loss": 1.2392, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.054628224582701, |
| "grad_norm": 0.6670436263084412, |
| "learning_rate": 3.857522613776048e-06, |
| "loss": 1.2374, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.0569044006069803, |
| "grad_norm": 0.6385306715965271, |
| "learning_rate": 3.8549188879563685e-06, |
| "loss": 1.2107, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.0591805766312596, |
| "grad_norm": 0.6829336881637573, |
| "learning_rate": 3.852313079565933e-06, |
| "loss": 1.1949, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.0614567526555385, |
| "grad_norm": 0.6565775871276855, |
| "learning_rate": 3.849705192609987e-06, |
| "loss": 1.1741, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.063732928679818, |
| "grad_norm": 0.6490259766578674, |
| "learning_rate": 3.847095231096965e-06, |
| "loss": 1.1502, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.066009104704097, |
| "grad_norm": 0.666535496711731, |
| "learning_rate": 3.844483199038497e-06, |
| "loss": 1.2038, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.0682852807283765, |
| "grad_norm": 0.6315106153488159, |
| "learning_rate": 3.841869100449392e-06, |
| "loss": 1.1853, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.0705614567526553, |
| "grad_norm": 0.6364261507987976, |
| "learning_rate": 3.839252939347636e-06, |
| "loss": 1.2354, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.0728376327769347, |
| "grad_norm": 0.6435543298721313, |
| "learning_rate": 3.836634719754385e-06, |
| "loss": 1.1969, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.075113808801214, |
| "grad_norm": 0.6444733142852783, |
| "learning_rate": 3.834014445693961e-06, |
| "loss": 1.1584, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.0773899848254933, |
| "grad_norm": 0.6528134942054749, |
| "learning_rate": 3.83139212119384e-06, |
| "loss": 1.2509, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.079666160849772, |
| "grad_norm": 0.6634727120399475, |
| "learning_rate": 3.828767750284652e-06, |
| "loss": 1.1778, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.0819423368740515, |
| "grad_norm": 0.6481142044067383, |
| "learning_rate": 3.826141337000173e-06, |
| "loss": 1.2162, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.084218512898331, |
| "grad_norm": 0.6647942066192627, |
| "learning_rate": 3.8235128853773175e-06, |
| "loss": 1.2049, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.08649468892261, |
| "grad_norm": 0.6879945397377014, |
| "learning_rate": 3.820882399456132e-06, |
| "loss": 1.1974, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.088770864946889, |
| "grad_norm": 0.6415085196495056, |
| "learning_rate": 3.818249883279791e-06, |
| "loss": 1.1889, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.0910470409711683, |
| "grad_norm": 0.6825420260429382, |
| "learning_rate": 3.8156153408945884e-06, |
| "loss": 1.1798, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.0933232169954477, |
| "grad_norm": 0.672672688961029, |
| "learning_rate": 3.8129787763499354e-06, |
| "loss": 1.1754, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.095599393019727, |
| "grad_norm": 0.6526525616645813, |
| "learning_rate": 3.810340193698348e-06, |
| "loss": 1.2003, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.097875569044006, |
| "grad_norm": 0.6436169147491455, |
| "learning_rate": 3.807699596995445e-06, |
| "loss": 1.1869, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.100151745068285, |
| "grad_norm": 0.6731212139129639, |
| "learning_rate": 3.805056990299942e-06, |
| "loss": 1.1897, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.1024279210925645, |
| "grad_norm": 0.6585912108421326, |
| "learning_rate": 3.8024123776736433e-06, |
| "loss": 1.2104, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.104704097116844, |
| "grad_norm": 0.6583617329597473, |
| "learning_rate": 3.7997657631814366e-06, |
| "loss": 1.2077, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.106980273141123, |
| "grad_norm": 0.6234392523765564, |
| "learning_rate": 3.797117150891285e-06, |
| "loss": 1.2304, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.109256449165402, |
| "grad_norm": 0.6528206467628479, |
| "learning_rate": 3.7944665448742257e-06, |
| "loss": 1.2274, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.1115326251896813, |
| "grad_norm": 0.6302603483200073, |
| "learning_rate": 3.7918139492043572e-06, |
| "loss": 1.2271, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.1138088012139606, |
| "grad_norm": 0.6452434659004211, |
| "learning_rate": 3.789159367958838e-06, |
| "loss": 1.1668, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.11608497723824, |
| "grad_norm": 0.6380173563957214, |
| "learning_rate": 3.786502805217877e-06, |
| "loss": 1.2107, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.118361153262519, |
| "grad_norm": 0.6548686027526855, |
| "learning_rate": 3.7838442650647307e-06, |
| "loss": 1.1961, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.120637329286798, |
| "grad_norm": 0.6462785005569458, |
| "learning_rate": 3.781183751585693e-06, |
| "loss": 1.2093, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.1229135053110775, |
| "grad_norm": 0.6643891930580139, |
| "learning_rate": 3.7785212688700917e-06, |
| "loss": 1.2116, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.125189681335357, |
| "grad_norm": 0.6929313540458679, |
| "learning_rate": 3.775856821010282e-06, |
| "loss": 1.1751, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.1274658573596357, |
| "grad_norm": 0.6516011357307434, |
| "learning_rate": 3.7731904121016394e-06, |
| "loss": 1.1917, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.129742033383915, |
| "grad_norm": 0.6524226665496826, |
| "learning_rate": 3.770522046242552e-06, |
| "loss": 1.1569, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.1320182094081943, |
| "grad_norm": 0.6462170481681824, |
| "learning_rate": 3.7678517275344184e-06, |
| "loss": 1.2077, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.1342943854324736, |
| "grad_norm": 0.6451734900474548, |
| "learning_rate": 3.765179460081636e-06, |
| "loss": 1.1946, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.1365705614567525, |
| "grad_norm": 0.6531258225440979, |
| "learning_rate": 3.762505247991601e-06, |
| "loss": 1.1757, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.138846737481032, |
| "grad_norm": 0.6745213270187378, |
| "learning_rate": 3.759829095374697e-06, |
| "loss": 1.2196, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.141122913505311, |
| "grad_norm": 0.6472035050392151, |
| "learning_rate": 3.7571510063442873e-06, |
| "loss": 1.1318, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.1433990895295905, |
| "grad_norm": 0.6743549704551697, |
| "learning_rate": 3.754470985016716e-06, |
| "loss": 1.2066, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.1456752655538693, |
| "grad_norm": 0.6646394729614258, |
| "learning_rate": 3.751789035511294e-06, |
| "loss": 1.2378, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.1479514415781487, |
| "grad_norm": 0.6676492691040039, |
| "learning_rate": 3.749105161950299e-06, |
| "loss": 1.1922, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.150227617602428, |
| "grad_norm": 0.6555543541908264, |
| "learning_rate": 3.7464193684589637e-06, |
| "loss": 1.1849, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.1525037936267073, |
| "grad_norm": 0.6590687036514282, |
| "learning_rate": 3.7437316591654726e-06, |
| "loss": 1.2332, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.154779969650986, |
| "grad_norm": 0.645908534526825, |
| "learning_rate": 3.7410420382009544e-06, |
| "loss": 1.2452, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.1570561456752655, |
| "grad_norm": 0.6459996104240417, |
| "learning_rate": 3.7383505096994764e-06, |
| "loss": 1.2464, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.159332321699545, |
| "grad_norm": 0.6533696055412292, |
| "learning_rate": 3.7356570777980377e-06, |
| "loss": 1.1695, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.161608497723824, |
| "grad_norm": 0.6279348731040955, |
| "learning_rate": 3.7329617466365648e-06, |
| "loss": 1.2133, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.163884673748103, |
| "grad_norm": 0.6500206589698792, |
| "learning_rate": 3.7302645203579004e-06, |
| "loss": 1.2656, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.1661608497723823, |
| "grad_norm": 0.6392419338226318, |
| "learning_rate": 3.727565403107801e-06, |
| "loss": 1.1728, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.1684370257966616, |
| "grad_norm": 0.6631274819374084, |
| "learning_rate": 3.724864399034932e-06, |
| "loss": 1.213, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.170713201820941, |
| "grad_norm": 0.6563039422035217, |
| "learning_rate": 3.7221615122908566e-06, |
| "loss": 1.203, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.17298937784522, |
| "grad_norm": 0.6851363778114319, |
| "learning_rate": 3.719456747030032e-06, |
| "loss": 1.2404, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.175265553869499, |
| "grad_norm": 0.6711891293525696, |
| "learning_rate": 3.7167501074098023e-06, |
| "loss": 1.2512, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.1775417298937785, |
| "grad_norm": 0.6652558445930481, |
| "learning_rate": 3.714041597590394e-06, |
| "loss": 1.1676, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.179817905918058, |
| "grad_norm": 0.6488142609596252, |
| "learning_rate": 3.711331221734908e-06, |
| "loss": 1.1614, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.1820940819423367, |
| "grad_norm": 0.6525776386260986, |
| "learning_rate": 3.7086189840093125e-06, |
| "loss": 1.2314, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.184370257966616, |
| "grad_norm": 0.6627135276794434, |
| "learning_rate": 3.7059048885824367e-06, |
| "loss": 1.2194, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.1866464339908953, |
| "grad_norm": 0.6578177213668823, |
| "learning_rate": 3.703188939625968e-06, |
| "loss": 1.2638, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.1889226100151746, |
| "grad_norm": 0.6674039363861084, |
| "learning_rate": 3.7004711413144404e-06, |
| "loss": 1.2127, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.191198786039454, |
| "grad_norm": 0.6704514026641846, |
| "learning_rate": 3.697751497825231e-06, |
| "loss": 1.1941, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.193474962063733, |
| "grad_norm": 0.6485816836357117, |
| "learning_rate": 3.6950300133385524e-06, |
| "loss": 1.2305, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.195751138088012, |
| "grad_norm": 0.6731365323066711, |
| "learning_rate": 3.6923066920374494e-06, |
| "loss": 1.2207, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.1980273141122915, |
| "grad_norm": 0.6393440961837769, |
| "learning_rate": 3.6895815381077874e-06, |
| "loss": 1.2081, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.2003034901365703, |
| "grad_norm": 0.6894279718399048, |
| "learning_rate": 3.686854555738249e-06, |
| "loss": 1.1939, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.2025796661608497, |
| "grad_norm": 0.6538242101669312, |
| "learning_rate": 3.684125749120329e-06, |
| "loss": 1.2074, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.204855842185129, |
| "grad_norm": 0.6664542555809021, |
| "learning_rate": 3.6813951224483226e-06, |
| "loss": 1.1853, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.2071320182094083, |
| "grad_norm": 0.68585205078125, |
| "learning_rate": 3.678662679919327e-06, |
| "loss": 1.2169, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.2094081942336876, |
| "grad_norm": 0.6920559406280518, |
| "learning_rate": 3.675928425733227e-06, |
| "loss": 1.2321, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.2116843702579665, |
| "grad_norm": 0.7139000296592712, |
| "learning_rate": 3.6731923640926943e-06, |
| "loss": 1.2052, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.213960546282246, |
| "grad_norm": 0.6535520553588867, |
| "learning_rate": 3.6704544992031766e-06, |
| "loss": 1.2275, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.216236722306525, |
| "grad_norm": 0.6742737889289856, |
| "learning_rate": 3.6677148352728947e-06, |
| "loss": 1.2093, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.2185128983308045, |
| "grad_norm": 0.6573116779327393, |
| "learning_rate": 3.6649733765128344e-06, |
| "loss": 1.1972, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.2207890743550833, |
| "grad_norm": 0.6521129012107849, |
| "learning_rate": 3.66223012713674e-06, |
| "loss": 1.1748, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.2230652503793626, |
| "grad_norm": 0.6640632152557373, |
| "learning_rate": 3.6594850913611085e-06, |
| "loss": 1.205, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.225341426403642, |
| "grad_norm": 0.660221517086029, |
| "learning_rate": 3.6567382734051815e-06, |
| "loss": 1.2089, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.2276176024279213, |
| "grad_norm": 0.6484681963920593, |
| "learning_rate": 3.6539896774909405e-06, |
| "loss": 1.2025, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.2298937784522, |
| "grad_norm": 0.6685246825218201, |
| "learning_rate": 3.6512393078431013e-06, |
| "loss": 1.1783, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.2321699544764795, |
| "grad_norm": 0.6591317057609558, |
| "learning_rate": 3.6484871686891044e-06, |
| "loss": 1.2257, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.234446130500759, |
| "grad_norm": 0.6399511694908142, |
| "learning_rate": 3.645733264259109e-06, |
| "loss": 1.1855, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.236722306525038, |
| "grad_norm": 0.6378699541091919, |
| "learning_rate": 3.642977598785991e-06, |
| "loss": 1.1868, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.238998482549317, |
| "grad_norm": 0.6714856624603271, |
| "learning_rate": 3.6402201765053295e-06, |
| "loss": 1.2395, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.2412746585735963, |
| "grad_norm": 0.6694827675819397, |
| "learning_rate": 3.6374610016554068e-06, |
| "loss": 1.1766, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.2435508345978756, |
| "grad_norm": 0.6676952838897705, |
| "learning_rate": 3.634700078477197e-06, |
| "loss": 1.2046, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.245827010622155, |
| "grad_norm": 0.6898479461669922, |
| "learning_rate": 3.6319374112143618e-06, |
| "loss": 1.1922, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.248103186646434, |
| "grad_norm": 0.6739006042480469, |
| "learning_rate": 3.629173004113245e-06, |
| "loss": 1.2118, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.250379362670713, |
| "grad_norm": 0.6676629185676575, |
| "learning_rate": 3.6264068614228625e-06, |
| "loss": 1.2002, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.2526555386949925, |
| "grad_norm": 0.6608707308769226, |
| "learning_rate": 3.6236389873948995e-06, |
| "loss": 1.2316, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.254931714719272, |
| "grad_norm": 0.6895280480384827, |
| "learning_rate": 3.6208693862837023e-06, |
| "loss": 1.2418, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.2572078907435507, |
| "grad_norm": 0.6558012962341309, |
| "learning_rate": 3.618098062346271e-06, |
| "loss": 1.2236, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.25948406676783, |
| "grad_norm": 0.6760514974594116, |
| "learning_rate": 3.615325019842253e-06, |
| "loss": 1.1848, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.2617602427921093, |
| "grad_norm": 0.6816234588623047, |
| "learning_rate": 3.61255026303394e-06, |
| "loss": 1.1821, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.2640364188163886, |
| "grad_norm": 0.6997912526130676, |
| "learning_rate": 3.609773796186256e-06, |
| "loss": 1.1973, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.2663125948406675, |
| "grad_norm": 0.6629183888435364, |
| "learning_rate": 3.6069956235667547e-06, |
| "loss": 1.2428, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.268588770864947, |
| "grad_norm": 0.66066575050354, |
| "learning_rate": 3.604215749445611e-06, |
| "loss": 1.2153, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.270864946889226, |
| "grad_norm": 0.6838124394416809, |
| "learning_rate": 3.6014341780956157e-06, |
| "loss": 1.1784, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.2731411229135055, |
| "grad_norm": 0.6711928248405457, |
| "learning_rate": 3.5986509137921677e-06, |
| "loss": 1.183, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.2754172989377848, |
| "grad_norm": 0.6433883905410767, |
| "learning_rate": 3.595865960813269e-06, |
| "loss": 1.2432, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.2776934749620636, |
| "grad_norm": 0.6601680517196655, |
| "learning_rate": 3.5930793234395157e-06, |
| "loss": 1.1752, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.279969650986343, |
| "grad_norm": 0.6449770927429199, |
| "learning_rate": 3.590291005954094e-06, |
| "loss": 1.1987, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.2822458270106223, |
| "grad_norm": 0.7027525901794434, |
| "learning_rate": 3.5875010126427733e-06, |
| "loss": 1.1809, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.284522003034901, |
| "grad_norm": 0.675951361656189, |
| "learning_rate": 3.5847093477938955e-06, |
| "loss": 1.1955, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.2867981790591805, |
| "grad_norm": 0.656053900718689, |
| "learning_rate": 3.581916015698376e-06, |
| "loss": 1.2256, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.28907435508346, |
| "grad_norm": 0.6874861121177673, |
| "learning_rate": 3.5791210206496897e-06, |
| "loss": 1.1917, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.291350531107739, |
| "grad_norm": 0.6393834948539734, |
| "learning_rate": 3.5763243669438696e-06, |
| "loss": 1.1689, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.2936267071320184, |
| "grad_norm": 0.6576155424118042, |
| "learning_rate": 3.5735260588794955e-06, |
| "loss": 1.1979, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.2959028831562973, |
| "grad_norm": 0.6721700429916382, |
| "learning_rate": 3.570726100757693e-06, |
| "loss": 1.1886, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.2981790591805766, |
| "grad_norm": 0.7089741826057434, |
| "learning_rate": 3.5679244968821235e-06, |
| "loss": 1.1678, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.300455235204856, |
| "grad_norm": 0.6565976142883301, |
| "learning_rate": 3.565121251558975e-06, |
| "loss": 1.1886, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.302731411229135, |
| "grad_norm": 0.6577697992324829, |
| "learning_rate": 3.562316369096962e-06, |
| "loss": 1.2083, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.305007587253414, |
| "grad_norm": 0.6564366817474365, |
| "learning_rate": 3.559509853807313e-06, |
| "loss": 1.2012, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.3072837632776935, |
| "grad_norm": 0.6902772784233093, |
| "learning_rate": 3.5567017100037683e-06, |
| "loss": 1.1863, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.309559939301973, |
| "grad_norm": 0.6742022633552551, |
| "learning_rate": 3.553891942002569e-06, |
| "loss": 1.1892, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.311836115326252, |
| "grad_norm": 0.6750596761703491, |
| "learning_rate": 3.5510805541224536e-06, |
| "loss": 1.1811, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.314112291350531, |
| "grad_norm": 0.695951521396637, |
| "learning_rate": 3.5482675506846527e-06, |
| "loss": 1.2489, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.3163884673748103, |
| "grad_norm": 0.6974602937698364, |
| "learning_rate": 3.5454529360128763e-06, |
| "loss": 1.2016, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.3186646433990896, |
| "grad_norm": 0.7286347150802612, |
| "learning_rate": 3.542636714433312e-06, |
| "loss": 1.2065, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.3209408194233685, |
| "grad_norm": 0.6884462833404541, |
| "learning_rate": 3.53981889027462e-06, |
| "loss": 1.2105, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.323216995447648, |
| "grad_norm": 0.6842535138130188, |
| "learning_rate": 3.536999467867921e-06, |
| "loss": 1.1876, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.325493171471927, |
| "grad_norm": 0.7068074941635132, |
| "learning_rate": 3.5341784515467926e-06, |
| "loss": 1.2378, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.3277693474962065, |
| "grad_norm": 0.6900534629821777, |
| "learning_rate": 3.5313558456472623e-06, |
| "loss": 1.2192, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.3300455235204858, |
| "grad_norm": 0.6720012426376343, |
| "learning_rate": 3.5285316545078018e-06, |
| "loss": 1.1843, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.3323216995447646, |
| "grad_norm": 0.6527561545372009, |
| "learning_rate": 3.5257058824693197e-06, |
| "loss": 1.2391, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.334597875569044, |
| "grad_norm": 0.6607952117919922, |
| "learning_rate": 3.5228785338751525e-06, |
| "loss": 1.1932, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.3368740515933233, |
| "grad_norm": 0.7056044340133667, |
| "learning_rate": 3.5200496130710606e-06, |
| "loss": 1.1677, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.3391502276176026, |
| "grad_norm": 0.6829009056091309, |
| "learning_rate": 3.517219124405222e-06, |
| "loss": 1.2209, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.3414264036418815, |
| "grad_norm": 0.6884670853614807, |
| "learning_rate": 3.5143870722282257e-06, |
| "loss": 1.1822, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.343702579666161, |
| "grad_norm": 0.6838653683662415, |
| "learning_rate": 3.511553460893059e-06, |
| "loss": 1.2193, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.34597875569044, |
| "grad_norm": 0.6836718320846558, |
| "learning_rate": 3.5087182947551113e-06, |
| "loss": 1.2061, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.3482549317147194, |
| "grad_norm": 0.6770094037055969, |
| "learning_rate": 3.505881578172159e-06, |
| "loss": 1.2141, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.3505311077389983, |
| "grad_norm": 0.7328886389732361, |
| "learning_rate": 3.503043315504361e-06, |
| "loss": 1.2023, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.3528072837632776, |
| "grad_norm": 0.7042023539543152, |
| "learning_rate": 3.5002035111142543e-06, |
| "loss": 1.2014, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.355083459787557, |
| "grad_norm": 0.7104170918464661, |
| "learning_rate": 3.4973621693667446e-06, |
| "loss": 1.2263, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.3573596358118363, |
| "grad_norm": 0.6627414226531982, |
| "learning_rate": 3.4945192946291016e-06, |
| "loss": 1.2012, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.359635811836115, |
| "grad_norm": 0.6971149444580078, |
| "learning_rate": 3.4916748912709506e-06, |
| "loss": 1.2168, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.3619119878603945, |
| "grad_norm": 0.6787693500518799, |
| "learning_rate": 3.4888289636642645e-06, |
| "loss": 1.2049, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.364188163884674, |
| "grad_norm": 0.6599656939506531, |
| "learning_rate": 3.4859815161833636e-06, |
| "loss": 1.2252, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.366464339908953, |
| "grad_norm": 0.6828228235244751, |
| "learning_rate": 3.4831325532049e-06, |
| "loss": 1.228, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.368740515933232, |
| "grad_norm": 0.6853594779968262, |
| "learning_rate": 3.480282079107857e-06, |
| "loss": 1.1806, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.3710166919575113, |
| "grad_norm": 0.6828693747520447, |
| "learning_rate": 3.477430098273541e-06, |
| "loss": 1.1818, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.3732928679817906, |
| "grad_norm": 0.6563565731048584, |
| "learning_rate": 3.4745766150855738e-06, |
| "loss": 1.2072, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.37556904400607, |
| "grad_norm": 0.7111157774925232, |
| "learning_rate": 3.4717216339298852e-06, |
| "loss": 1.2176, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.3778452200303493, |
| "grad_norm": 0.6725479364395142, |
| "learning_rate": 3.46886515919471e-06, |
| "loss": 1.1899, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.380121396054628, |
| "grad_norm": 0.7368158102035522, |
| "learning_rate": 3.4660071952705752e-06, |
| "loss": 1.1762, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.3823975720789075, |
| "grad_norm": 0.6941075325012207, |
| "learning_rate": 3.4631477465503018e-06, |
| "loss": 1.202, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.3846737481031868, |
| "grad_norm": 0.6628252863883972, |
| "learning_rate": 3.460286817428987e-06, |
| "loss": 1.1803, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.3869499241274656, |
| "grad_norm": 0.689207911491394, |
| "learning_rate": 3.4574244123040083e-06, |
| "loss": 1.2108, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.389226100151745, |
| "grad_norm": 0.7419899702072144, |
| "learning_rate": 3.4545605355750096e-06, |
| "loss": 1.2356, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.3915022761760243, |
| "grad_norm": 0.7288059592247009, |
| "learning_rate": 3.4516951916438974e-06, |
| "loss": 1.2054, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.3937784522003036, |
| "grad_norm": 0.7065550088882446, |
| "learning_rate": 3.4488283849148324e-06, |
| "loss": 1.1689, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.396054628224583, |
| "grad_norm": 0.7047455906867981, |
| "learning_rate": 3.445960119794225e-06, |
| "loss": 1.2038, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.398330804248862, |
| "grad_norm": 0.6833023428916931, |
| "learning_rate": 3.443090400690726e-06, |
| "loss": 1.1766, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.400606980273141, |
| "grad_norm": 0.6780257821083069, |
| "learning_rate": 3.440219232015222e-06, |
| "loss": 1.151, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.4028831562974204, |
| "grad_norm": 0.6979094743728638, |
| "learning_rate": 3.4373466181808284e-06, |
| "loss": 1.2191, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.4051593323216993, |
| "grad_norm": 0.6790235042572021, |
| "learning_rate": 3.4344725636028787e-06, |
| "loss": 1.1576, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.4074355083459786, |
| "grad_norm": 0.6650685667991638, |
| "learning_rate": 3.4315970726989244e-06, |
| "loss": 1.1853, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.409711684370258, |
| "grad_norm": 0.7322258949279785, |
| "learning_rate": 3.428720149888723e-06, |
| "loss": 1.1959, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.4119878603945373, |
| "grad_norm": 0.7185081243515015, |
| "learning_rate": 3.425841799594233e-06, |
| "loss": 1.1897, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.4142640364188166, |
| "grad_norm": 0.7351638674736023, |
| "learning_rate": 3.4229620262396063e-06, |
| "loss": 1.2453, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.4165402124430955, |
| "grad_norm": 0.6832559108734131, |
| "learning_rate": 3.4200808342511845e-06, |
| "loss": 1.1628, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.418816388467375, |
| "grad_norm": 0.7146325707435608, |
| "learning_rate": 3.4171982280574877e-06, |
| "loss": 1.1572, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.421092564491654, |
| "grad_norm": 0.6700774431228638, |
| "learning_rate": 3.414314212089209e-06, |
| "loss": 1.2444, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.423368740515933, |
| "grad_norm": 0.7039579749107361, |
| "learning_rate": 3.4114287907792115e-06, |
| "loss": 1.1903, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.4256449165402123, |
| "grad_norm": 0.6913965940475464, |
| "learning_rate": 3.4085419685625153e-06, |
| "loss": 1.1349, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.4279210925644916, |
| "grad_norm": 0.6725445985794067, |
| "learning_rate": 3.4056537498762955e-06, |
| "loss": 1.1814, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.430197268588771, |
| "grad_norm": 0.6761681437492371, |
| "learning_rate": 3.402764139159872e-06, |
| "loss": 1.1526, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.4324734446130503, |
| "grad_norm": 0.6802586913108826, |
| "learning_rate": 3.3998731408547065e-06, |
| "loss": 1.2111, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.434749620637329, |
| "grad_norm": 0.7151522040367126, |
| "learning_rate": 3.3969807594043913e-06, |
| "loss": 1.1904, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.4370257966616085, |
| "grad_norm": 0.6683356761932373, |
| "learning_rate": 3.3940869992546467e-06, |
| "loss": 1.2203, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.4393019726858878, |
| "grad_norm": 0.7200412154197693, |
| "learning_rate": 3.3911918648533094e-06, |
| "loss": 1.2061, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.441578148710167, |
| "grad_norm": 0.694735050201416, |
| "learning_rate": 3.3882953606503323e-06, |
| "loss": 1.2051, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.443854324734446, |
| "grad_norm": 0.7197138071060181, |
| "learning_rate": 3.3853974910977706e-06, |
| "loss": 1.2354, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.4461305007587253, |
| "grad_norm": 0.6728242039680481, |
| "learning_rate": 3.382498260649778e-06, |
| "loss": 1.1792, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.4484066767830046, |
| "grad_norm": 0.6801380515098572, |
| "learning_rate": 3.3795976737626025e-06, |
| "loss": 1.167, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.450682852807284, |
| "grad_norm": 0.7056938409805298, |
| "learning_rate": 3.376695734894575e-06, |
| "loss": 1.1945, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.452959028831563, |
| "grad_norm": 0.6955033540725708, |
| "learning_rate": 3.3737924485061046e-06, |
| "loss": 1.1787, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.455235204855842, |
| "grad_norm": 0.6794352531433105, |
| "learning_rate": 3.3708878190596724e-06, |
| "loss": 1.1848, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.4575113808801214, |
| "grad_norm": 0.7221025824546814, |
| "learning_rate": 3.3679818510198224e-06, |
| "loss": 1.2013, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.4597875569044008, |
| "grad_norm": 0.670121431350708, |
| "learning_rate": 3.3650745488531593e-06, |
| "loss": 1.1737, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.4620637329286796, |
| "grad_norm": 0.7007501125335693, |
| "learning_rate": 3.362165917028334e-06, |
| "loss": 1.2041, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.464339908952959, |
| "grad_norm": 0.7078006863594055, |
| "learning_rate": 3.3592559600160446e-06, |
| "loss": 1.1799, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.4666160849772383, |
| "grad_norm": 0.6946449875831604, |
| "learning_rate": 3.3563446822890246e-06, |
| "loss": 1.213, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.4688922610015176, |
| "grad_norm": 0.7082563042640686, |
| "learning_rate": 3.3534320883220367e-06, |
| "loss": 1.1749, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.4711684370257965, |
| "grad_norm": 0.6920600533485413, |
| "learning_rate": 3.3505181825918685e-06, |
| "loss": 1.2008, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.473444613050076, |
| "grad_norm": 0.6847231984138489, |
| "learning_rate": 3.347602969577323e-06, |
| "loss": 1.1531, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.475720789074355, |
| "grad_norm": 0.6741018295288086, |
| "learning_rate": 3.344686453759213e-06, |
| "loss": 1.2028, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.4779969650986344, |
| "grad_norm": 0.6896122694015503, |
| "learning_rate": 3.341768639620353e-06, |
| "loss": 1.1821, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.4802731411229137, |
| "grad_norm": 0.6965914368629456, |
| "learning_rate": 3.3388495316455525e-06, |
| "loss": 1.1665, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.4825493171471926, |
| "grad_norm": 0.6966100335121155, |
| "learning_rate": 3.3359291343216126e-06, |
| "loss": 1.2321, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.484825493171472, |
| "grad_norm": 0.6773708462715149, |
| "learning_rate": 3.3330074521373134e-06, |
| "loss": 1.1853, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.4871016691957513, |
| "grad_norm": 0.7116764187812805, |
| "learning_rate": 3.33008448958341e-06, |
| "loss": 1.2027, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.48937784522003, |
| "grad_norm": 0.6633743643760681, |
| "learning_rate": 3.327160251152627e-06, |
| "loss": 1.2257, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.4916540212443095, |
| "grad_norm": 0.6858770251274109, |
| "learning_rate": 3.3242347413396488e-06, |
| "loss": 1.1807, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.4939301972685888, |
| "grad_norm": 0.680118978023529, |
| "learning_rate": 3.321307964641115e-06, |
| "loss": 1.2371, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.496206373292868, |
| "grad_norm": 0.6915070414543152, |
| "learning_rate": 3.3183799255556115e-06, |
| "loss": 1.1813, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.4984825493171474, |
| "grad_norm": 0.6780185103416443, |
| "learning_rate": 3.3154506285836645e-06, |
| "loss": 1.1874, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.5007587253414263, |
| "grad_norm": 0.6911877393722534, |
| "learning_rate": 3.3125200782277356e-06, |
| "loss": 1.2274, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.5030349013657056, |
| "grad_norm": 0.6855583786964417, |
| "learning_rate": 3.3095882789922102e-06, |
| "loss": 1.1905, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.505311077389985, |
| "grad_norm": 0.7394909858703613, |
| "learning_rate": 3.306655235383394e-06, |
| "loss": 1.1557, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.507587253414264, |
| "grad_norm": 0.6894367933273315, |
| "learning_rate": 3.3037209519095072e-06, |
| "loss": 1.2127, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.509863429438543, |
| "grad_norm": 0.7138856649398804, |
| "learning_rate": 3.3007854330806733e-06, |
| "loss": 1.1866, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.5121396054628224, |
| "grad_norm": 0.7123029232025146, |
| "learning_rate": 3.2978486834089163e-06, |
| "loss": 1.1866, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.5144157814871018, |
| "grad_norm": 0.7103905081748962, |
| "learning_rate": 3.294910707408151e-06, |
| "loss": 1.2229, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.516691957511381, |
| "grad_norm": 0.7092157602310181, |
| "learning_rate": 3.2919715095941774e-06, |
| "loss": 1.1981, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.51896813353566, |
| "grad_norm": 0.6840993762016296, |
| "learning_rate": 3.289031094484675e-06, |
| "loss": 1.1945, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.5212443095599393, |
| "grad_norm": 0.6992108225822449, |
| "learning_rate": 3.286089466599191e-06, |
| "loss": 1.1687, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.5235204855842186, |
| "grad_norm": 0.7055697441101074, |
| "learning_rate": 3.2831466304591396e-06, |
| "loss": 1.2066, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.5257966616084975, |
| "grad_norm": 0.6737038493156433, |
| "learning_rate": 3.2802025905877916e-06, |
| "loss": 1.1869, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.528072837632777, |
| "grad_norm": 0.74284428358078, |
| "learning_rate": 3.277257351510267e-06, |
| "loss": 1.2072, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.530349013657056, |
| "grad_norm": 0.6852964758872986, |
| "learning_rate": 3.2743109177535292e-06, |
| "loss": 1.2403, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.5326251896813354, |
| "grad_norm": 0.712960958480835, |
| "learning_rate": 3.2713632938463785e-06, |
| "loss": 1.1688, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.5349013657056148, |
| "grad_norm": 0.6892920136451721, |
| "learning_rate": 3.2684144843194453e-06, |
| "loss": 1.202, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.5371775417298936, |
| "grad_norm": 0.6760216355323792, |
| "learning_rate": 3.265464493705181e-06, |
| "loss": 1.2104, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.539453717754173, |
| "grad_norm": 0.687100887298584, |
| "learning_rate": 3.262513326537852e-06, |
| "loss": 1.1846, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.5417298937784523, |
| "grad_norm": 0.6958955526351929, |
| "learning_rate": 3.2595609873535335e-06, |
| "loss": 1.2094, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.544006069802731, |
| "grad_norm": 0.6893970966339111, |
| "learning_rate": 3.256607480690104e-06, |
| "loss": 1.186, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.5462822458270105, |
| "grad_norm": 0.6832887530326843, |
| "learning_rate": 3.253652811087234e-06, |
| "loss": 1.2228, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.54855842185129, |
| "grad_norm": 0.6801987886428833, |
| "learning_rate": 3.2506969830863824e-06, |
| "loss": 1.1701, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.550834597875569, |
| "grad_norm": 0.6851920485496521, |
| "learning_rate": 3.2477400012307885e-06, |
| "loss": 1.1747, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.5531107738998484, |
| "grad_norm": 0.701210618019104, |
| "learning_rate": 3.2447818700654667e-06, |
| "loss": 1.2172, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.5553869499241273, |
| "grad_norm": 0.7184703350067139, |
| "learning_rate": 3.2418225941371957e-06, |
| "loss": 1.1676, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.5576631259484066, |
| "grad_norm": 0.6979882717132568, |
| "learning_rate": 3.2388621779945136e-06, |
| "loss": 1.204, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.559939301972686, |
| "grad_norm": 0.7246853113174438, |
| "learning_rate": 3.235900626187713e-06, |
| "loss": 1.2352, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.5622154779969653, |
| "grad_norm": 0.7014902234077454, |
| "learning_rate": 3.2329379432688314e-06, |
| "loss": 1.2407, |
| "step": 1127 |
| }, |
| { |
| "epoch": 2.5644916540212446, |
| "grad_norm": 0.6897662281990051, |
| "learning_rate": 3.229974133791643e-06, |
| "loss": 1.2321, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.5667678300455234, |
| "grad_norm": 0.6940773725509644, |
| "learning_rate": 3.2270092023116564e-06, |
| "loss": 1.217, |
| "step": 1129 |
| }, |
| { |
| "epoch": 2.5690440060698028, |
| "grad_norm": 0.7043965458869934, |
| "learning_rate": 3.224043153386104e-06, |
| "loss": 1.1847, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.571320182094082, |
| "grad_norm": 0.7059136033058167, |
| "learning_rate": 3.221075991573935e-06, |
| "loss": 1.1589, |
| "step": 1131 |
| }, |
| { |
| "epoch": 2.573596358118361, |
| "grad_norm": 0.6794278621673584, |
| "learning_rate": 3.218107721435808e-06, |
| "loss": 1.179, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.5758725341426403, |
| "grad_norm": 0.6917217373847961, |
| "learning_rate": 3.2151383475340875e-06, |
| "loss": 1.2174, |
| "step": 1133 |
| }, |
| { |
| "epoch": 2.5781487101669196, |
| "grad_norm": 0.7160817384719849, |
| "learning_rate": 3.2121678744328343e-06, |
| "loss": 1.1958, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.580424886191199, |
| "grad_norm": 0.6953707933425903, |
| "learning_rate": 3.209196306697798e-06, |
| "loss": 1.2311, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.5827010622154782, |
| "grad_norm": 0.6894403100013733, |
| "learning_rate": 3.206223648896409e-06, |
| "loss": 1.1963, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.584977238239757, |
| "grad_norm": 0.7261834144592285, |
| "learning_rate": 3.203249905597777e-06, |
| "loss": 1.2048, |
| "step": 1137 |
| }, |
| { |
| "epoch": 2.5872534142640364, |
| "grad_norm": 0.7032581567764282, |
| "learning_rate": 3.2002750813726774e-06, |
| "loss": 1.2023, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.5895295902883158, |
| "grad_norm": 0.6896634697914124, |
| "learning_rate": 3.1972991807935473e-06, |
| "loss": 1.2132, |
| "step": 1139 |
| }, |
| { |
| "epoch": 2.5918057663125946, |
| "grad_norm": 0.7110108733177185, |
| "learning_rate": 3.194322208434478e-06, |
| "loss": 1.1932, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.594081942336874, |
| "grad_norm": 0.7088103294372559, |
| "learning_rate": 3.191344168871211e-06, |
| "loss": 1.1936, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.5963581183611533, |
| "grad_norm": 0.6900802254676819, |
| "learning_rate": 3.1883650666811237e-06, |
| "loss": 1.1975, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.5986342943854326, |
| "grad_norm": 0.7168719172477722, |
| "learning_rate": 3.1853849064432296e-06, |
| "loss": 1.181, |
| "step": 1143 |
| }, |
| { |
| "epoch": 2.600910470409712, |
| "grad_norm": 0.6837049722671509, |
| "learning_rate": 3.182403692738168e-06, |
| "loss": 1.175, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.603186646433991, |
| "grad_norm": 0.6712286472320557, |
| "learning_rate": 3.1794214301481978e-06, |
| "loss": 1.1729, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.60546282245827, |
| "grad_norm": 0.6919139623641968, |
| "learning_rate": 3.1764381232571894e-06, |
| "loss": 1.1975, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.6077389984825494, |
| "grad_norm": 0.6942136883735657, |
| "learning_rate": 3.173453776650618e-06, |
| "loss": 1.2079, |
| "step": 1147 |
| }, |
| { |
| "epoch": 2.6100151745068283, |
| "grad_norm": 0.7067781090736389, |
| "learning_rate": 3.170468394915558e-06, |
| "loss": 1.1622, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.6122913505311076, |
| "grad_norm": 0.6851193308830261, |
| "learning_rate": 3.1674819826406744e-06, |
| "loss": 1.1951, |
| "step": 1149 |
| }, |
| { |
| "epoch": 2.614567526555387, |
| "grad_norm": 0.6892246603965759, |
| "learning_rate": 3.1644945444162155e-06, |
| "loss": 1.1746, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.6168437025796663, |
| "grad_norm": 0.7149601578712463, |
| "learning_rate": 3.1615060848340066e-06, |
| "loss": 1.1893, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.6191198786039456, |
| "grad_norm": 0.6909357905387878, |
| "learning_rate": 3.1585166084874446e-06, |
| "loss": 1.1756, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.6213960546282244, |
| "grad_norm": 0.7025142312049866, |
| "learning_rate": 3.155526119971488e-06, |
| "loss": 1.226, |
| "step": 1153 |
| }, |
| { |
| "epoch": 2.6236722306525038, |
| "grad_norm": 0.7248009443283081, |
| "learning_rate": 3.15253462388265e-06, |
| "loss": 1.1613, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.625948406676783, |
| "grad_norm": 0.7198726534843445, |
| "learning_rate": 3.149542124818993e-06, |
| "loss": 1.2107, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.628224582701062, |
| "grad_norm": 0.70361328125, |
| "learning_rate": 3.146548627380124e-06, |
| "loss": 1.148, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.6305007587253413, |
| "grad_norm": 0.6757749319076538, |
| "learning_rate": 3.14355413616718e-06, |
| "loss": 1.1859, |
| "step": 1157 |
| }, |
| { |
| "epoch": 2.6327769347496206, |
| "grad_norm": 0.7123044729232788, |
| "learning_rate": 3.1405586557828275e-06, |
| "loss": 1.2212, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.6350531107739, |
| "grad_norm": 0.7022401690483093, |
| "learning_rate": 3.137562190831255e-06, |
| "loss": 1.1943, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.6373292867981792, |
| "grad_norm": 0.7225694060325623, |
| "learning_rate": 3.134564745918161e-06, |
| "loss": 1.2306, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.639605462822458, |
| "grad_norm": 0.7538037896156311, |
| "learning_rate": 3.1315663256507533e-06, |
| "loss": 1.1613, |
| "step": 1161 |
| }, |
| { |
| "epoch": 2.6418816388467374, |
| "grad_norm": 0.7213900685310364, |
| "learning_rate": 3.1285669346377363e-06, |
| "loss": 1.1686, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.6441578148710168, |
| "grad_norm": 0.6754977107048035, |
| "learning_rate": 3.1255665774893085e-06, |
| "loss": 1.2004, |
| "step": 1163 |
| }, |
| { |
| "epoch": 2.6464339908952956, |
| "grad_norm": 0.6743507981300354, |
| "learning_rate": 3.1225652588171534e-06, |
| "loss": 1.2097, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.648710166919575, |
| "grad_norm": 0.7086999416351318, |
| "learning_rate": 3.119562983234431e-06, |
| "loss": 1.1811, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.6509863429438543, |
| "grad_norm": 0.7122178077697754, |
| "learning_rate": 3.116559755355772e-06, |
| "loss": 1.1792, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.6532625189681336, |
| "grad_norm": 0.7318682670593262, |
| "learning_rate": 3.1135555797972715e-06, |
| "loss": 1.1885, |
| "step": 1167 |
| }, |
| { |
| "epoch": 2.655538694992413, |
| "grad_norm": 0.7025822997093201, |
| "learning_rate": 3.110550461176484e-06, |
| "loss": 1.1924, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.657814871016692, |
| "grad_norm": 0.683168351650238, |
| "learning_rate": 3.1075444041124077e-06, |
| "loss": 1.1656, |
| "step": 1169 |
| }, |
| { |
| "epoch": 2.660091047040971, |
| "grad_norm": 0.7369166016578674, |
| "learning_rate": 3.1045374132254875e-06, |
| "loss": 1.2286, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.6623672230652504, |
| "grad_norm": 0.7125474214553833, |
| "learning_rate": 3.1015294931376035e-06, |
| "loss": 1.2138, |
| "step": 1171 |
| }, |
| { |
| "epoch": 2.6646433990895297, |
| "grad_norm": 0.7190746068954468, |
| "learning_rate": 3.0985206484720616e-06, |
| "loss": 1.1815, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.666919575113809, |
| "grad_norm": 0.6940246224403381, |
| "learning_rate": 3.09551088385359e-06, |
| "loss": 1.2445, |
| "step": 1173 |
| }, |
| { |
| "epoch": 2.669195751138088, |
| "grad_norm": 0.7135853171348572, |
| "learning_rate": 3.092500203908332e-06, |
| "loss": 1.1845, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.6714719271623673, |
| "grad_norm": 0.6984366774559021, |
| "learning_rate": 3.0894886132638375e-06, |
| "loss": 1.186, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.6737481031866466, |
| "grad_norm": 0.7473638653755188, |
| "learning_rate": 3.0864761165490546e-06, |
| "loss": 1.1662, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.6760242792109254, |
| "grad_norm": 0.6937222480773926, |
| "learning_rate": 3.0834627183943255e-06, |
| "loss": 1.1268, |
| "step": 1177 |
| }, |
| { |
| "epoch": 2.6783004552352048, |
| "grad_norm": 0.6648399829864502, |
| "learning_rate": 3.080448423431377e-06, |
| "loss": 1.1714, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.680576631259484, |
| "grad_norm": 0.7059394121170044, |
| "learning_rate": 3.0774332362933163e-06, |
| "loss": 1.1788, |
| "step": 1179 |
| }, |
| { |
| "epoch": 2.6828528072837634, |
| "grad_norm": 0.7164271473884583, |
| "learning_rate": 3.074417161614619e-06, |
| "loss": 1.1699, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.6851289833080427, |
| "grad_norm": 0.6942355632781982, |
| "learning_rate": 3.071400204031127e-06, |
| "loss": 1.2212, |
| "step": 1181 |
| }, |
| { |
| "epoch": 2.6874051593323216, |
| "grad_norm": 0.7147754430770874, |
| "learning_rate": 3.0683823681800382e-06, |
| "loss": 1.1895, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.689681335356601, |
| "grad_norm": 0.7072470188140869, |
| "learning_rate": 3.0653636586999025e-06, |
| "loss": 1.1963, |
| "step": 1183 |
| }, |
| { |
| "epoch": 2.6919575113808802, |
| "grad_norm": 0.7091546654701233, |
| "learning_rate": 3.0623440802306087e-06, |
| "loss": 1.167, |
| "step": 1184 |
| }, |
| { |
| "epoch": 2.694233687405159, |
| "grad_norm": 0.698569655418396, |
| "learning_rate": 3.059323637413385e-06, |
| "loss": 1.166, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.6965098634294384, |
| "grad_norm": 0.7070002555847168, |
| "learning_rate": 3.056302334890786e-06, |
| "loss": 1.1733, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.6987860394537178, |
| "grad_norm": 0.7015774250030518, |
| "learning_rate": 3.05328017730669e-06, |
| "loss": 1.2012, |
| "step": 1187 |
| }, |
| { |
| "epoch": 2.701062215477997, |
| "grad_norm": 0.6821005344390869, |
| "learning_rate": 3.0502571693062856e-06, |
| "loss": 1.1639, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.7033383915022764, |
| "grad_norm": 0.7214947938919067, |
| "learning_rate": 3.0472333155360724e-06, |
| "loss": 1.185, |
| "step": 1189 |
| }, |
| { |
| "epoch": 2.7056145675265553, |
| "grad_norm": 0.703359842300415, |
| "learning_rate": 3.0442086206438483e-06, |
| "loss": 1.1825, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.7078907435508346, |
| "grad_norm": 0.6933659911155701, |
| "learning_rate": 3.041183089278704e-06, |
| "loss": 1.1595, |
| "step": 1191 |
| }, |
| { |
| "epoch": 2.710166919575114, |
| "grad_norm": 0.7080999612808228, |
| "learning_rate": 3.0381567260910166e-06, |
| "loss": 1.2293, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.712443095599393, |
| "grad_norm": 0.6948418021202087, |
| "learning_rate": 3.0351295357324405e-06, |
| "loss": 1.2046, |
| "step": 1193 |
| }, |
| { |
| "epoch": 2.714719271623672, |
| "grad_norm": 0.7300384044647217, |
| "learning_rate": 3.0321015228559035e-06, |
| "loss": 1.2105, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.7169954476479514, |
| "grad_norm": 0.6972754597663879, |
| "learning_rate": 3.0290726921155954e-06, |
| "loss": 1.1983, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.7192716236722307, |
| "grad_norm": 0.6930494904518127, |
| "learning_rate": 3.026043048166964e-06, |
| "loss": 1.1745, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.72154779969651, |
| "grad_norm": 0.7130087018013, |
| "learning_rate": 3.023012595666708e-06, |
| "loss": 1.2083, |
| "step": 1197 |
| }, |
| { |
| "epoch": 2.723823975720789, |
| "grad_norm": 0.6889429688453674, |
| "learning_rate": 3.019981339272768e-06, |
| "loss": 1.2154, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.7261001517450683, |
| "grad_norm": 0.7289022207260132, |
| "learning_rate": 3.016949283644319e-06, |
| "loss": 1.2305, |
| "step": 1199 |
| }, |
| { |
| "epoch": 2.7283763277693476, |
| "grad_norm": 0.7369369864463806, |
| "learning_rate": 3.0139164334417665e-06, |
| "loss": 1.1922, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.7306525037936265, |
| "grad_norm": 0.6780570149421692, |
| "learning_rate": 3.010882793326737e-06, |
| "loss": 1.2169, |
| "step": 1201 |
| }, |
| { |
| "epoch": 2.7329286798179058, |
| "grad_norm": 0.70427405834198, |
| "learning_rate": 3.0078483679620706e-06, |
| "loss": 1.1819, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.735204855842185, |
| "grad_norm": 0.7048825621604919, |
| "learning_rate": 3.0048131620118137e-06, |
| "loss": 1.1639, |
| "step": 1203 |
| }, |
| { |
| "epoch": 2.7374810318664644, |
| "grad_norm": 0.7250698804855347, |
| "learning_rate": 3.001777180141213e-06, |
| "loss": 1.2162, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.7397572078907437, |
| "grad_norm": 0.715922474861145, |
| "learning_rate": 2.99874042701671e-06, |
| "loss": 1.209, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.7420333839150226, |
| "grad_norm": 0.6971619129180908, |
| "learning_rate": 2.9957029073059276e-06, |
| "loss": 1.1778, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.744309559939302, |
| "grad_norm": 0.6936290264129639, |
| "learning_rate": 2.992664625677669e-06, |
| "loss": 1.2263, |
| "step": 1207 |
| }, |
| { |
| "epoch": 2.7465857359635812, |
| "grad_norm": 0.7032736539840698, |
| "learning_rate": 2.9896255868019102e-06, |
| "loss": 1.2502, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.74886191198786, |
| "grad_norm": 0.6784445643424988, |
| "learning_rate": 2.9865857953497896e-06, |
| "loss": 1.1699, |
| "step": 1209 |
| }, |
| { |
| "epoch": 2.75113808801214, |
| "grad_norm": 0.7215850949287415, |
| "learning_rate": 2.9835452559935996e-06, |
| "loss": 1.189, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.7534142640364188, |
| "grad_norm": 0.7106872797012329, |
| "learning_rate": 2.9805039734067863e-06, |
| "loss": 1.2023, |
| "step": 1211 |
| }, |
| { |
| "epoch": 2.755690440060698, |
| "grad_norm": 0.6902872920036316, |
| "learning_rate": 2.977461952263938e-06, |
| "loss": 1.1879, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.7579666160849774, |
| "grad_norm": 0.722634494304657, |
| "learning_rate": 2.9744191972407754e-06, |
| "loss": 1.1981, |
| "step": 1213 |
| }, |
| { |
| "epoch": 2.7602427921092563, |
| "grad_norm": 0.7154293656349182, |
| "learning_rate": 2.9713757130141483e-06, |
| "loss": 1.2136, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.7625189681335356, |
| "grad_norm": 0.7049538493156433, |
| "learning_rate": 2.968331504262028e-06, |
| "loss": 1.2065, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.764795144157815, |
| "grad_norm": 0.7349944114685059, |
| "learning_rate": 2.9652865756635007e-06, |
| "loss": 1.2482, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.7670713201820942, |
| "grad_norm": 0.728399932384491, |
| "learning_rate": 2.9622409318987554e-06, |
| "loss": 1.1997, |
| "step": 1217 |
| }, |
| { |
| "epoch": 2.7693474962063735, |
| "grad_norm": 0.6748265624046326, |
| "learning_rate": 2.959194577649083e-06, |
| "loss": 1.1792, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.7716236722306524, |
| "grad_norm": 0.7254298329353333, |
| "learning_rate": 2.9561475175968663e-06, |
| "loss": 1.1635, |
| "step": 1219 |
| }, |
| { |
| "epoch": 2.7738998482549317, |
| "grad_norm": 0.7145636081695557, |
| "learning_rate": 2.9530997564255728e-06, |
| "loss": 1.1879, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.776176024279211, |
| "grad_norm": 0.7130534648895264, |
| "learning_rate": 2.950051298819746e-06, |
| "loss": 1.2206, |
| "step": 1221 |
| }, |
| { |
| "epoch": 2.77845220030349, |
| "grad_norm": 0.724371612071991, |
| "learning_rate": 2.9470021494650016e-06, |
| "loss": 1.1766, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.7807283763277693, |
| "grad_norm": 0.7017263770103455, |
| "learning_rate": 2.9439523130480185e-06, |
| "loss": 1.1888, |
| "step": 1223 |
| }, |
| { |
| "epoch": 2.7830045523520486, |
| "grad_norm": 0.715480625629425, |
| "learning_rate": 2.940901794256533e-06, |
| "loss": 1.2084, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.785280728376328, |
| "grad_norm": 0.7027157545089722, |
| "learning_rate": 2.9378505977793246e-06, |
| "loss": 1.18, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.787556904400607, |
| "grad_norm": 0.7351143956184387, |
| "learning_rate": 2.9347987283062213e-06, |
| "loss": 1.1632, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.789833080424886, |
| "grad_norm": 0.710259735584259, |
| "learning_rate": 2.931746190528082e-06, |
| "loss": 1.1886, |
| "step": 1227 |
| }, |
| { |
| "epoch": 2.7921092564491654, |
| "grad_norm": 0.7170918583869934, |
| "learning_rate": 2.9286929891367936e-06, |
| "loss": 1.1326, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.7943854324734447, |
| "grad_norm": 0.7224164605140686, |
| "learning_rate": 2.9256391288252617e-06, |
| "loss": 1.1565, |
| "step": 1229 |
| }, |
| { |
| "epoch": 2.7966616084977236, |
| "grad_norm": 0.7351495027542114, |
| "learning_rate": 2.9225846142874064e-06, |
| "loss": 1.1525, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.798937784522003, |
| "grad_norm": 0.7327077984809875, |
| "learning_rate": 2.919529450218154e-06, |
| "loss": 1.2468, |
| "step": 1231 |
| }, |
| { |
| "epoch": 2.8012139605462822, |
| "grad_norm": 0.756344199180603, |
| "learning_rate": 2.9164736413134263e-06, |
| "loss": 1.2027, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.8034901365705616, |
| "grad_norm": 0.7047960162162781, |
| "learning_rate": 2.9134171922701383e-06, |
| "loss": 1.1763, |
| "step": 1233 |
| }, |
| { |
| "epoch": 2.805766312594841, |
| "grad_norm": 0.6881158947944641, |
| "learning_rate": 2.9103601077861875e-06, |
| "loss": 1.1651, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.8080424886191198, |
| "grad_norm": 0.707214891910553, |
| "learning_rate": 2.907302392560452e-06, |
| "loss": 1.207, |
| "step": 1235 |
| }, |
| { |
| "epoch": 2.810318664643399, |
| "grad_norm": 0.7242740988731384, |
| "learning_rate": 2.904244051292774e-06, |
| "loss": 1.2092, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.8125948406676784, |
| "grad_norm": 0.7232706546783447, |
| "learning_rate": 2.9011850886839604e-06, |
| "loss": 1.154, |
| "step": 1237 |
| }, |
| { |
| "epoch": 2.8148710166919573, |
| "grad_norm": 0.7449567317962646, |
| "learning_rate": 2.8981255094357742e-06, |
| "loss": 1.1846, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.8171471927162366, |
| "grad_norm": 0.6920654773712158, |
| "learning_rate": 2.8950653182509253e-06, |
| "loss": 1.1999, |
| "step": 1239 |
| }, |
| { |
| "epoch": 2.819423368740516, |
| "grad_norm": 0.676856517791748, |
| "learning_rate": 2.892004519833063e-06, |
| "loss": 1.2061, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.8216995447647952, |
| "grad_norm": 0.7137316465377808, |
| "learning_rate": 2.888943118886771e-06, |
| "loss": 1.2316, |
| "step": 1241 |
| }, |
| { |
| "epoch": 2.8239757207890746, |
| "grad_norm": 0.7022238373756409, |
| "learning_rate": 2.88588112011756e-06, |
| "loss": 1.2096, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.8262518968133534, |
| "grad_norm": 0.7406110167503357, |
| "learning_rate": 2.8828185282318588e-06, |
| "loss": 1.2334, |
| "step": 1243 |
| }, |
| { |
| "epoch": 2.8285280728376327, |
| "grad_norm": 0.7122722268104553, |
| "learning_rate": 2.879755347937006e-06, |
| "loss": 1.1957, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.830804248861912, |
| "grad_norm": 0.7112712860107422, |
| "learning_rate": 2.876691583941248e-06, |
| "loss": 1.1985, |
| "step": 1245 |
| }, |
| { |
| "epoch": 2.833080424886191, |
| "grad_norm": 0.7014644145965576, |
| "learning_rate": 2.8736272409537257e-06, |
| "loss": 1.2052, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.8353566009104703, |
| "grad_norm": 0.7228798866271973, |
| "learning_rate": 2.870562323684473e-06, |
| "loss": 1.1795, |
| "step": 1247 |
| }, |
| { |
| "epoch": 2.8376327769347496, |
| "grad_norm": 0.7157189846038818, |
| "learning_rate": 2.8674968368444004e-06, |
| "loss": 1.22, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.839908952959029, |
| "grad_norm": 0.7256055474281311, |
| "learning_rate": 2.864430785145301e-06, |
| "loss": 1.1724, |
| "step": 1249 |
| }, |
| { |
| "epoch": 2.842185128983308, |
| "grad_norm": 0.6792826056480408, |
| "learning_rate": 2.8613641732998338e-06, |
| "loss": 1.1944, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.844461305007587, |
| "grad_norm": 0.743437647819519, |
| "learning_rate": 2.858297006021515e-06, |
| "loss": 1.2023, |
| "step": 1251 |
| }, |
| { |
| "epoch": 2.8467374810318664, |
| "grad_norm": 0.6970618367195129, |
| "learning_rate": 2.855229288024719e-06, |
| "loss": 1.1884, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.8490136570561457, |
| "grad_norm": 0.734380304813385, |
| "learning_rate": 2.8521610240246657e-06, |
| "loss": 1.1759, |
| "step": 1253 |
| }, |
| { |
| "epoch": 2.851289833080425, |
| "grad_norm": 0.7149617671966553, |
| "learning_rate": 2.8490922187374132e-06, |
| "loss": 1.1868, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.8535660091047044, |
| "grad_norm": 0.6973447799682617, |
| "learning_rate": 2.8460228768798507e-06, |
| "loss": 1.2332, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.8558421851289832, |
| "grad_norm": 0.6929753422737122, |
| "learning_rate": 2.8429530031696954e-06, |
| "loss": 1.1955, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.8581183611532626, |
| "grad_norm": 0.7042129635810852, |
| "learning_rate": 2.8398826023254804e-06, |
| "loss": 1.1998, |
| "step": 1257 |
| }, |
| { |
| "epoch": 2.860394537177542, |
| "grad_norm": 0.6975540518760681, |
| "learning_rate": 2.8368116790665478e-06, |
| "loss": 1.2024, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.8626707132018208, |
| "grad_norm": 0.7412719130516052, |
| "learning_rate": 2.8337402381130426e-06, |
| "loss": 1.1479, |
| "step": 1259 |
| }, |
| { |
| "epoch": 2.8649468892261, |
| "grad_norm": 0.6957024335861206, |
| "learning_rate": 2.830668284185908e-06, |
| "loss": 1.2087, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.8672230652503794, |
| "grad_norm": 0.6976204514503479, |
| "learning_rate": 2.827595822006874e-06, |
| "loss": 1.2259, |
| "step": 1261 |
| }, |
| { |
| "epoch": 2.8694992412746587, |
| "grad_norm": 0.7119215726852417, |
| "learning_rate": 2.8245228562984518e-06, |
| "loss": 1.1775, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.871775417298938, |
| "grad_norm": 0.7110069394111633, |
| "learning_rate": 2.8214493917839264e-06, |
| "loss": 1.2107, |
| "step": 1263 |
| }, |
| { |
| "epoch": 2.874051593323217, |
| "grad_norm": 0.6970719695091248, |
| "learning_rate": 2.81837543318735e-06, |
| "loss": 1.2019, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.8763277693474962, |
| "grad_norm": 0.683922529220581, |
| "learning_rate": 2.815300985233535e-06, |
| "loss": 1.1741, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.8786039453717756, |
| "grad_norm": 0.7189832925796509, |
| "learning_rate": 2.8122260526480433e-06, |
| "loss": 1.1423, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.8808801213960544, |
| "grad_norm": 0.7439128160476685, |
| "learning_rate": 2.8091506401571846e-06, |
| "loss": 1.183, |
| "step": 1267 |
| }, |
| { |
| "epoch": 2.8831562974203337, |
| "grad_norm": 0.7034988403320312, |
| "learning_rate": 2.8060747524880045e-06, |
| "loss": 1.1821, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.885432473444613, |
| "grad_norm": 0.7155296206474304, |
| "learning_rate": 2.80299839436828e-06, |
| "loss": 1.1895, |
| "step": 1269 |
| }, |
| { |
| "epoch": 2.8877086494688924, |
| "grad_norm": 0.7293388247489929, |
| "learning_rate": 2.7999215705265104e-06, |
| "loss": 1.167, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.8899848254931717, |
| "grad_norm": 0.6977200508117676, |
| "learning_rate": 2.7968442856919116e-06, |
| "loss": 1.1888, |
| "step": 1271 |
| }, |
| { |
| "epoch": 2.8922610015174506, |
| "grad_norm": 0.6939589381217957, |
| "learning_rate": 2.7937665445944075e-06, |
| "loss": 1.2061, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.89453717754173, |
| "grad_norm": 0.7107787728309631, |
| "learning_rate": 2.7906883519646227e-06, |
| "loss": 1.1893, |
| "step": 1273 |
| }, |
| { |
| "epoch": 2.896813353566009, |
| "grad_norm": 0.7114563584327698, |
| "learning_rate": 2.787609712533877e-06, |
| "loss": 1.1933, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.899089529590288, |
| "grad_norm": 0.6855188012123108, |
| "learning_rate": 2.784530631034176e-06, |
| "loss": 1.1904, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.9013657056145674, |
| "grad_norm": 0.7423205971717834, |
| "learning_rate": 2.781451112198208e-06, |
| "loss": 1.1694, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.9036418816388467, |
| "grad_norm": 0.7629468441009521, |
| "learning_rate": 2.778371160759327e-06, |
| "loss": 1.1773, |
| "step": 1277 |
| }, |
| { |
| "epoch": 2.905918057663126, |
| "grad_norm": 0.7157300710678101, |
| "learning_rate": 2.7752907814515573e-06, |
| "loss": 1.1702, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.9081942336874054, |
| "grad_norm": 0.688721776008606, |
| "learning_rate": 2.7722099790095793e-06, |
| "loss": 1.1629, |
| "step": 1279 |
| }, |
| { |
| "epoch": 2.9104704097116842, |
| "grad_norm": 0.7173575162887573, |
| "learning_rate": 2.769128758168725e-06, |
| "loss": 1.1998, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.9127465857359636, |
| "grad_norm": 0.714175283908844, |
| "learning_rate": 2.766047123664966e-06, |
| "loss": 1.2034, |
| "step": 1281 |
| }, |
| { |
| "epoch": 2.915022761760243, |
| "grad_norm": 0.723911702632904, |
| "learning_rate": 2.7629650802349127e-06, |
| "loss": 1.1596, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.9172989377845218, |
| "grad_norm": 0.68398118019104, |
| "learning_rate": 2.7598826326158045e-06, |
| "loss": 1.1954, |
| "step": 1283 |
| }, |
| { |
| "epoch": 2.919575113808801, |
| "grad_norm": 0.7057693600654602, |
| "learning_rate": 2.7567997855454998e-06, |
| "loss": 1.1715, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.9218512898330804, |
| "grad_norm": 0.7358039021492004, |
| "learning_rate": 2.7537165437624715e-06, |
| "loss": 1.2015, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.9241274658573597, |
| "grad_norm": 0.7091867923736572, |
| "learning_rate": 2.750632912005801e-06, |
| "loss": 1.1549, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.926403641881639, |
| "grad_norm": 0.743848979473114, |
| "learning_rate": 2.747548895015167e-06, |
| "loss": 1.1734, |
| "step": 1287 |
| }, |
| { |
| "epoch": 2.928679817905918, |
| "grad_norm": 0.7099263072013855, |
| "learning_rate": 2.744464497530842e-06, |
| "loss": 1.185, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.9309559939301972, |
| "grad_norm": 0.7112051248550415, |
| "learning_rate": 2.7413797242936806e-06, |
| "loss": 1.1913, |
| "step": 1289 |
| }, |
| { |
| "epoch": 2.9332321699544766, |
| "grad_norm": 0.780794084072113, |
| "learning_rate": 2.738294580045119e-06, |
| "loss": 1.2336, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.9355083459787554, |
| "grad_norm": 0.7613317966461182, |
| "learning_rate": 2.7352090695271614e-06, |
| "loss": 1.2095, |
| "step": 1291 |
| }, |
| { |
| "epoch": 2.9377845220030347, |
| "grad_norm": 0.7245267033576965, |
| "learning_rate": 2.7321231974823732e-06, |
| "loss": 1.188, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.940060698027314, |
| "grad_norm": 0.7280985116958618, |
| "learning_rate": 2.729036968653878e-06, |
| "loss": 1.2282, |
| "step": 1293 |
| }, |
| { |
| "epoch": 2.9423368740515934, |
| "grad_norm": 0.6889516115188599, |
| "learning_rate": 2.725950387785349e-06, |
| "loss": 1.1587, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.9446130500758727, |
| "grad_norm": 0.7361829876899719, |
| "learning_rate": 2.722863459620997e-06, |
| "loss": 1.2065, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.9468892261001516, |
| "grad_norm": 0.7415118217468262, |
| "learning_rate": 2.7197761889055674e-06, |
| "loss": 1.1946, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.949165402124431, |
| "grad_norm": 0.7229265570640564, |
| "learning_rate": 2.7166885803843347e-06, |
| "loss": 1.1816, |
| "step": 1297 |
| }, |
| { |
| "epoch": 2.95144157814871, |
| "grad_norm": 0.6884288787841797, |
| "learning_rate": 2.71360063880309e-06, |
| "loss": 1.2027, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.9537177541729895, |
| "grad_norm": 0.7227476239204407, |
| "learning_rate": 2.710512368908138e-06, |
| "loss": 1.248, |
| "step": 1299 |
| }, |
| { |
| "epoch": 2.955993930197269, |
| "grad_norm": 0.7278752326965332, |
| "learning_rate": 2.707423775446286e-06, |
| "loss": 1.2036, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.9582701062215477, |
| "grad_norm": 0.7094444632530212, |
| "learning_rate": 2.7043348631648415e-06, |
| "loss": 1.2214, |
| "step": 1301 |
| }, |
| { |
| "epoch": 2.960546282245827, |
| "grad_norm": 0.6929619908332825, |
| "learning_rate": 2.701245636811599e-06, |
| "loss": 1.1794, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.9628224582701064, |
| "grad_norm": 0.7046913504600525, |
| "learning_rate": 2.6981561011348385e-06, |
| "loss": 1.2069, |
| "step": 1303 |
| }, |
| { |
| "epoch": 2.9650986342943852, |
| "grad_norm": 0.7059223055839539, |
| "learning_rate": 2.695066260883313e-06, |
| "loss": 1.2024, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.9673748103186646, |
| "grad_norm": 0.7080976366996765, |
| "learning_rate": 2.6919761208062445e-06, |
| "loss": 1.1625, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.969650986342944, |
| "grad_norm": 0.7362763285636902, |
| "learning_rate": 2.688885685653318e-06, |
| "loss": 1.2217, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.971927162367223, |
| "grad_norm": 0.7461861371994019, |
| "learning_rate": 2.6857949601746676e-06, |
| "loss": 1.2043, |
| "step": 1307 |
| }, |
| { |
| "epoch": 2.9742033383915025, |
| "grad_norm": 0.7257667183876038, |
| "learning_rate": 2.682703949120878e-06, |
| "loss": 1.1749, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.9764795144157814, |
| "grad_norm": 0.7328771352767944, |
| "learning_rate": 2.6796126572429703e-06, |
| "loss": 1.2021, |
| "step": 1309 |
| }, |
| { |
| "epoch": 2.9787556904400607, |
| "grad_norm": 0.705711305141449, |
| "learning_rate": 2.6765210892923986e-06, |
| "loss": 1.189, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.98103186646434, |
| "grad_norm": 0.7146801352500916, |
| "learning_rate": 2.67342925002104e-06, |
| "loss": 1.1967, |
| "step": 1311 |
| }, |
| { |
| "epoch": 2.983308042488619, |
| "grad_norm": 0.7356604933738708, |
| "learning_rate": 2.67033714418119e-06, |
| "loss": 1.1506, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.9855842185128982, |
| "grad_norm": 0.6894738674163818, |
| "learning_rate": 2.667244776525553e-06, |
| "loss": 1.1712, |
| "step": 1313 |
| }, |
| { |
| "epoch": 2.9878603945371776, |
| "grad_norm": 0.7163074612617493, |
| "learning_rate": 2.6641521518072355e-06, |
| "loss": 1.2052, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.990136570561457, |
| "grad_norm": 0.7195286154747009, |
| "learning_rate": 2.6610592747797397e-06, |
| "loss": 1.1834, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.992412746585736, |
| "grad_norm": 0.7429676055908203, |
| "learning_rate": 2.657966150196956e-06, |
| "loss": 1.1779, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.994688922610015, |
| "grad_norm": 0.7009066343307495, |
| "learning_rate": 2.6548727828131554e-06, |
| "loss": 1.1745, |
| "step": 1317 |
| }, |
| { |
| "epoch": 2.9969650986342944, |
| "grad_norm": 0.6867721676826477, |
| "learning_rate": 2.65177917738298e-06, |
| "loss": 1.1665, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.9992412746585737, |
| "grad_norm": 0.7170870900154114, |
| "learning_rate": 2.6486853386614397e-06, |
| "loss": 1.158, |
| "step": 1319 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.3577916622161865, |
| "learning_rate": 2.6455912714039033e-06, |
| "loss": 1.1783, |
| "step": 1320 |
| }, |
| { |
| "epoch": 3.0022761760242793, |
| "grad_norm": 0.7354686856269836, |
| "learning_rate": 2.6424969803660903e-06, |
| "loss": 1.169, |
| "step": 1321 |
| }, |
| { |
| "epoch": 3.0045523520485586, |
| "grad_norm": 0.7197363972663879, |
| "learning_rate": 2.639402470304063e-06, |
| "loss": 1.1474, |
| "step": 1322 |
| }, |
| { |
| "epoch": 3.0068285280728375, |
| "grad_norm": 0.7077065110206604, |
| "learning_rate": 2.6363077459742214e-06, |
| "loss": 1.1614, |
| "step": 1323 |
| }, |
| { |
| "epoch": 3.009104704097117, |
| "grad_norm": 0.7212318778038025, |
| "learning_rate": 2.6332128121332967e-06, |
| "loss": 1.1648, |
| "step": 1324 |
| }, |
| { |
| "epoch": 3.011380880121396, |
| "grad_norm": 0.7356379628181458, |
| "learning_rate": 2.6301176735383382e-06, |
| "loss": 1.1547, |
| "step": 1325 |
| }, |
| { |
| "epoch": 3.0136570561456755, |
| "grad_norm": 0.7490338087081909, |
| "learning_rate": 2.627022334946712e-06, |
| "loss": 1.1572, |
| "step": 1326 |
| }, |
| { |
| "epoch": 3.0159332321699543, |
| "grad_norm": 0.7195541262626648, |
| "learning_rate": 2.6239268011160923e-06, |
| "loss": 1.1909, |
| "step": 1327 |
| }, |
| { |
| "epoch": 3.0182094081942337, |
| "grad_norm": 0.7337531447410583, |
| "learning_rate": 2.620831076804453e-06, |
| "loss": 1.1573, |
| "step": 1328 |
| }, |
| { |
| "epoch": 3.020485584218513, |
| "grad_norm": 0.7338323593139648, |
| "learning_rate": 2.61773516677006e-06, |
| "loss": 1.141, |
| "step": 1329 |
| }, |
| { |
| "epoch": 3.0227617602427923, |
| "grad_norm": 0.7600909471511841, |
| "learning_rate": 2.614639075771465e-06, |
| "loss": 1.1509, |
| "step": 1330 |
| }, |
| { |
| "epoch": 3.025037936267071, |
| "grad_norm": 0.7509938478469849, |
| "learning_rate": 2.611542808567497e-06, |
| "loss": 1.1715, |
| "step": 1331 |
| }, |
| { |
| "epoch": 3.0273141122913505, |
| "grad_norm": 0.7426080703735352, |
| "learning_rate": 2.6084463699172594e-06, |
| "loss": 1.1395, |
| "step": 1332 |
| }, |
| { |
| "epoch": 3.02959028831563, |
| "grad_norm": 0.712645947933197, |
| "learning_rate": 2.6053497645801133e-06, |
| "loss": 1.1786, |
| "step": 1333 |
| }, |
| { |
| "epoch": 3.031866464339909, |
| "grad_norm": 0.7545322775840759, |
| "learning_rate": 2.6022529973156813e-06, |
| "loss": 1.1562, |
| "step": 1334 |
| }, |
| { |
| "epoch": 3.034142640364188, |
| "grad_norm": 0.7290534973144531, |
| "learning_rate": 2.5991560728838326e-06, |
| "loss": 1.1683, |
| "step": 1335 |
| }, |
| { |
| "epoch": 3.0364188163884673, |
| "grad_norm": 0.7173789143562317, |
| "learning_rate": 2.596058996044678e-06, |
| "loss": 1.1608, |
| "step": 1336 |
| }, |
| { |
| "epoch": 3.0386949924127467, |
| "grad_norm": 0.7625917792320251, |
| "learning_rate": 2.5929617715585614e-06, |
| "loss": 1.1349, |
| "step": 1337 |
| }, |
| { |
| "epoch": 3.040971168437026, |
| "grad_norm": 0.7284402251243591, |
| "learning_rate": 2.5898644041860567e-06, |
| "loss": 1.1264, |
| "step": 1338 |
| }, |
| { |
| "epoch": 3.043247344461305, |
| "grad_norm": 0.7259999513626099, |
| "learning_rate": 2.586766898687955e-06, |
| "loss": 1.1817, |
| "step": 1339 |
| }, |
| { |
| "epoch": 3.045523520485584, |
| "grad_norm": 0.8065728545188904, |
| "learning_rate": 2.583669259825261e-06, |
| "loss": 1.2324, |
| "step": 1340 |
| }, |
| { |
| "epoch": 3.0477996965098635, |
| "grad_norm": 0.7757611870765686, |
| "learning_rate": 2.580571492359183e-06, |
| "loss": 1.181, |
| "step": 1341 |
| }, |
| { |
| "epoch": 3.050075872534143, |
| "grad_norm": 0.6941875219345093, |
| "learning_rate": 2.5774736010511275e-06, |
| "loss": 1.1928, |
| "step": 1342 |
| }, |
| { |
| "epoch": 3.0523520485584217, |
| "grad_norm": 0.7500492334365845, |
| "learning_rate": 2.5743755906626928e-06, |
| "loss": 1.2046, |
| "step": 1343 |
| }, |
| { |
| "epoch": 3.054628224582701, |
| "grad_norm": 0.7713794112205505, |
| "learning_rate": 2.571277465955658e-06, |
| "loss": 1.1514, |
| "step": 1344 |
| }, |
| { |
| "epoch": 3.0569044006069803, |
| "grad_norm": 0.7205464243888855, |
| "learning_rate": 2.5681792316919785e-06, |
| "loss": 1.1583, |
| "step": 1345 |
| }, |
| { |
| "epoch": 3.0591805766312596, |
| "grad_norm": 0.7222645878791809, |
| "learning_rate": 2.56508089263378e-06, |
| "loss": 1.143, |
| "step": 1346 |
| }, |
| { |
| "epoch": 3.0614567526555385, |
| "grad_norm": 0.7436219453811646, |
| "learning_rate": 2.561982453543348e-06, |
| "loss": 1.1691, |
| "step": 1347 |
| }, |
| { |
| "epoch": 3.063732928679818, |
| "grad_norm": 0.7247343063354492, |
| "learning_rate": 2.5588839191831196e-06, |
| "loss": 1.1691, |
| "step": 1348 |
| }, |
| { |
| "epoch": 3.066009104704097, |
| "grad_norm": 0.7624401450157166, |
| "learning_rate": 2.5557852943156807e-06, |
| "loss": 1.1982, |
| "step": 1349 |
| }, |
| { |
| "epoch": 3.0682852807283765, |
| "grad_norm": 0.7353934645652771, |
| "learning_rate": 2.552686583703758e-06, |
| "loss": 1.1609, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.0705614567526553, |
| "grad_norm": 0.7236819863319397, |
| "learning_rate": 2.5495877921102056e-06, |
| "loss": 1.1766, |
| "step": 1351 |
| }, |
| { |
| "epoch": 3.0728376327769347, |
| "grad_norm": 0.7330083250999451, |
| "learning_rate": 2.546488924298006e-06, |
| "loss": 1.1572, |
| "step": 1352 |
| }, |
| { |
| "epoch": 3.075113808801214, |
| "grad_norm": 0.72292160987854, |
| "learning_rate": 2.5433899850302552e-06, |
| "loss": 1.194, |
| "step": 1353 |
| }, |
| { |
| "epoch": 3.0773899848254933, |
| "grad_norm": 0.7504494190216064, |
| "learning_rate": 2.5402909790701636e-06, |
| "loss": 1.1868, |
| "step": 1354 |
| }, |
| { |
| "epoch": 3.079666160849772, |
| "grad_norm": 0.7421271800994873, |
| "learning_rate": 2.53719191118104e-06, |
| "loss": 1.1708, |
| "step": 1355 |
| }, |
| { |
| "epoch": 3.0819423368740515, |
| "grad_norm": 0.7539661526679993, |
| "learning_rate": 2.53409278612629e-06, |
| "loss": 1.1786, |
| "step": 1356 |
| }, |
| { |
| "epoch": 3.084218512898331, |
| "grad_norm": 0.7343708872795105, |
| "learning_rate": 2.530993608669407e-06, |
| "loss": 1.191, |
| "step": 1357 |
| }, |
| { |
| "epoch": 3.08649468892261, |
| "grad_norm": 0.7485260367393494, |
| "learning_rate": 2.5278943835739656e-06, |
| "loss": 1.1513, |
| "step": 1358 |
| }, |
| { |
| "epoch": 3.088770864946889, |
| "grad_norm": 0.7451958656311035, |
| "learning_rate": 2.524795115603613e-06, |
| "loss": 1.1514, |
| "step": 1359 |
| }, |
| { |
| "epoch": 3.0910470409711683, |
| "grad_norm": 0.726077139377594, |
| "learning_rate": 2.521695809522061e-06, |
| "loss": 1.1405, |
| "step": 1360 |
| }, |
| { |
| "epoch": 3.0933232169954477, |
| "grad_norm": 0.7406827807426453, |
| "learning_rate": 2.518596470093083e-06, |
| "loss": 1.197, |
| "step": 1361 |
| }, |
| { |
| "epoch": 3.095599393019727, |
| "grad_norm": 0.7313615679740906, |
| "learning_rate": 2.5154971020805018e-06, |
| "loss": 1.191, |
| "step": 1362 |
| }, |
| { |
| "epoch": 3.097875569044006, |
| "grad_norm": 0.7504832744598389, |
| "learning_rate": 2.512397710248182e-06, |
| "loss": 1.2038, |
| "step": 1363 |
| }, |
| { |
| "epoch": 3.100151745068285, |
| "grad_norm": 0.7463207244873047, |
| "learning_rate": 2.5092982993600294e-06, |
| "loss": 1.1504, |
| "step": 1364 |
| }, |
| { |
| "epoch": 3.1024279210925645, |
| "grad_norm": 0.723032534122467, |
| "learning_rate": 2.506198874179976e-06, |
| "loss": 1.1612, |
| "step": 1365 |
| }, |
| { |
| "epoch": 3.104704097116844, |
| "grad_norm": 0.7396848797798157, |
| "learning_rate": 2.503099439471977e-06, |
| "loss": 1.1658, |
| "step": 1366 |
| }, |
| { |
| "epoch": 3.106980273141123, |
| "grad_norm": 0.7143369913101196, |
| "learning_rate": 2.5e-06, |
| "loss": 1.1587, |
| "step": 1367 |
| }, |
| { |
| "epoch": 3.109256449165402, |
| "grad_norm": 0.7214608192443848, |
| "learning_rate": 2.4969005605280243e-06, |
| "loss": 1.1627, |
| "step": 1368 |
| }, |
| { |
| "epoch": 3.1115326251896813, |
| "grad_norm": 0.7474159598350525, |
| "learning_rate": 2.4938011258200244e-06, |
| "loss": 1.1744, |
| "step": 1369 |
| }, |
| { |
| "epoch": 3.1138088012139606, |
| "grad_norm": 0.74878990650177, |
| "learning_rate": 2.4907017006399715e-06, |
| "loss": 1.1715, |
| "step": 1370 |
| }, |
| { |
| "epoch": 3.11608497723824, |
| "grad_norm": 0.7635120749473572, |
| "learning_rate": 2.487602289751819e-06, |
| "loss": 1.1706, |
| "step": 1371 |
| }, |
| { |
| "epoch": 3.118361153262519, |
| "grad_norm": 0.7503812313079834, |
| "learning_rate": 2.484502897919499e-06, |
| "loss": 1.1673, |
| "step": 1372 |
| }, |
| { |
| "epoch": 3.120637329286798, |
| "grad_norm": 0.7519100904464722, |
| "learning_rate": 2.481403529906918e-06, |
| "loss": 1.1862, |
| "step": 1373 |
| }, |
| { |
| "epoch": 3.1229135053110775, |
| "grad_norm": 0.7507646679878235, |
| "learning_rate": 2.4783041904779386e-06, |
| "loss": 1.1761, |
| "step": 1374 |
| }, |
| { |
| "epoch": 3.125189681335357, |
| "grad_norm": 0.7176265120506287, |
| "learning_rate": 2.4752048843963877e-06, |
| "loss": 1.1601, |
| "step": 1375 |
| }, |
| { |
| "epoch": 3.1274658573596357, |
| "grad_norm": 0.7137569785118103, |
| "learning_rate": 2.4721056164260348e-06, |
| "loss": 1.1542, |
| "step": 1376 |
| }, |
| { |
| "epoch": 3.129742033383915, |
| "grad_norm": 0.745905876159668, |
| "learning_rate": 2.4690063913305936e-06, |
| "loss": 1.1524, |
| "step": 1377 |
| }, |
| { |
| "epoch": 3.1320182094081943, |
| "grad_norm": 0.7238267660140991, |
| "learning_rate": 2.465907213873711e-06, |
| "loss": 1.1493, |
| "step": 1378 |
| }, |
| { |
| "epoch": 3.1342943854324736, |
| "grad_norm": 0.7267714142799377, |
| "learning_rate": 2.462808088818961e-06, |
| "loss": 1.1969, |
| "step": 1379 |
| }, |
| { |
| "epoch": 3.1365705614567525, |
| "grad_norm": 0.7494551539421082, |
| "learning_rate": 2.4597090209298372e-06, |
| "loss": 1.1708, |
| "step": 1380 |
| }, |
| { |
| "epoch": 3.138846737481032, |
| "grad_norm": 0.7473982572555542, |
| "learning_rate": 2.4566100149697456e-06, |
| "loss": 1.1915, |
| "step": 1381 |
| }, |
| { |
| "epoch": 3.141122913505311, |
| "grad_norm": 0.7496033906936646, |
| "learning_rate": 2.453511075701996e-06, |
| "loss": 1.1942, |
| "step": 1382 |
| }, |
| { |
| "epoch": 3.1433990895295905, |
| "grad_norm": 0.7699615359306335, |
| "learning_rate": 2.4504122078897948e-06, |
| "loss": 1.214, |
| "step": 1383 |
| }, |
| { |
| "epoch": 3.1456752655538693, |
| "grad_norm": 0.7535260915756226, |
| "learning_rate": 2.447313416296243e-06, |
| "loss": 1.1413, |
| "step": 1384 |
| }, |
| { |
| "epoch": 3.1479514415781487, |
| "grad_norm": 0.7432587146759033, |
| "learning_rate": 2.4442147056843193e-06, |
| "loss": 1.2077, |
| "step": 1385 |
| }, |
| { |
| "epoch": 3.150227617602428, |
| "grad_norm": 0.7599760293960571, |
| "learning_rate": 2.4411160808168817e-06, |
| "loss": 1.2043, |
| "step": 1386 |
| }, |
| { |
| "epoch": 3.1525037936267073, |
| "grad_norm": 0.7298524379730225, |
| "learning_rate": 2.4380175464566534e-06, |
| "loss": 1.2061, |
| "step": 1387 |
| }, |
| { |
| "epoch": 3.154779969650986, |
| "grad_norm": 0.743593156337738, |
| "learning_rate": 2.4349191073662203e-06, |
| "loss": 1.1708, |
| "step": 1388 |
| }, |
| { |
| "epoch": 3.1570561456752655, |
| "grad_norm": 0.7407417893409729, |
| "learning_rate": 2.431820768308022e-06, |
| "loss": 1.1469, |
| "step": 1389 |
| }, |
| { |
| "epoch": 3.159332321699545, |
| "grad_norm": 0.7478795051574707, |
| "learning_rate": 2.4287225340443434e-06, |
| "loss": 1.172, |
| "step": 1390 |
| }, |
| { |
| "epoch": 3.161608497723824, |
| "grad_norm": 0.7392578721046448, |
| "learning_rate": 2.425624409337308e-06, |
| "loss": 1.1722, |
| "step": 1391 |
| }, |
| { |
| "epoch": 3.163884673748103, |
| "grad_norm": 0.7476488947868347, |
| "learning_rate": 2.4225263989488733e-06, |
| "loss": 1.1907, |
| "step": 1392 |
| }, |
| { |
| "epoch": 3.1661608497723823, |
| "grad_norm": 0.7702009677886963, |
| "learning_rate": 2.4194285076408175e-06, |
| "loss": 1.1757, |
| "step": 1393 |
| }, |
| { |
| "epoch": 3.1684370257966616, |
| "grad_norm": 0.7480501532554626, |
| "learning_rate": 2.4163307401747393e-06, |
| "loss": 1.1914, |
| "step": 1394 |
| }, |
| { |
| "epoch": 3.170713201820941, |
| "grad_norm": 0.7919908165931702, |
| "learning_rate": 2.4132331013120454e-06, |
| "loss": 1.1775, |
| "step": 1395 |
| }, |
| { |
| "epoch": 3.17298937784522, |
| "grad_norm": 0.7680845260620117, |
| "learning_rate": 2.4101355958139437e-06, |
| "loss": 1.1885, |
| "step": 1396 |
| }, |
| { |
| "epoch": 3.175265553869499, |
| "grad_norm": 0.7591391205787659, |
| "learning_rate": 2.407038228441439e-06, |
| "loss": 1.1877, |
| "step": 1397 |
| }, |
| { |
| "epoch": 3.1775417298937785, |
| "grad_norm": 0.762646496295929, |
| "learning_rate": 2.4039410039553233e-06, |
| "loss": 1.2105, |
| "step": 1398 |
| }, |
| { |
| "epoch": 3.179817905918058, |
| "grad_norm": 0.7501146197319031, |
| "learning_rate": 2.4008439271161678e-06, |
| "loss": 1.1484, |
| "step": 1399 |
| }, |
| { |
| "epoch": 3.1820940819423367, |
| "grad_norm": 0.7478851675987244, |
| "learning_rate": 2.3977470026843196e-06, |
| "loss": 1.187, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.184370257966616, |
| "grad_norm": 0.7486398220062256, |
| "learning_rate": 2.3946502354198875e-06, |
| "loss": 1.2158, |
| "step": 1401 |
| }, |
| { |
| "epoch": 3.1866464339908953, |
| "grad_norm": 0.7453305721282959, |
| "learning_rate": 2.3915536300827414e-06, |
| "loss": 1.2463, |
| "step": 1402 |
| }, |
| { |
| "epoch": 3.1889226100151746, |
| "grad_norm": 0.8247939944267273, |
| "learning_rate": 2.3884571914325034e-06, |
| "loss": 1.1362, |
| "step": 1403 |
| }, |
| { |
| "epoch": 3.191198786039454, |
| "grad_norm": 0.7734161019325256, |
| "learning_rate": 2.3853609242285356e-06, |
| "loss": 1.1811, |
| "step": 1404 |
| }, |
| { |
| "epoch": 3.193474962063733, |
| "grad_norm": 0.7732148170471191, |
| "learning_rate": 2.3822648332299405e-06, |
| "loss": 1.1969, |
| "step": 1405 |
| }, |
| { |
| "epoch": 3.195751138088012, |
| "grad_norm": 0.7798835039138794, |
| "learning_rate": 2.3791689231955473e-06, |
| "loss": 1.167, |
| "step": 1406 |
| }, |
| { |
| "epoch": 3.1980273141122915, |
| "grad_norm": 0.7879831790924072, |
| "learning_rate": 2.3760731988839077e-06, |
| "loss": 1.1513, |
| "step": 1407 |
| }, |
| { |
| "epoch": 3.2003034901365703, |
| "grad_norm": 0.7619938254356384, |
| "learning_rate": 2.3729776650532887e-06, |
| "loss": 1.1692, |
| "step": 1408 |
| }, |
| { |
| "epoch": 3.2025796661608497, |
| "grad_norm": 0.7818293571472168, |
| "learning_rate": 2.3698823264616635e-06, |
| "loss": 1.1752, |
| "step": 1409 |
| }, |
| { |
| "epoch": 3.204855842185129, |
| "grad_norm": 0.7367649078369141, |
| "learning_rate": 2.366787187866704e-06, |
| "loss": 1.1639, |
| "step": 1410 |
| }, |
| { |
| "epoch": 3.2071320182094083, |
| "grad_norm": 0.7616274356842041, |
| "learning_rate": 2.363692254025779e-06, |
| "loss": 1.1844, |
| "step": 1411 |
| }, |
| { |
| "epoch": 3.2094081942336876, |
| "grad_norm": 0.7888926863670349, |
| "learning_rate": 2.360597529695938e-06, |
| "loss": 1.1797, |
| "step": 1412 |
| }, |
| { |
| "epoch": 3.2116843702579665, |
| "grad_norm": 0.7439415454864502, |
| "learning_rate": 2.35750301963391e-06, |
| "loss": 1.1933, |
| "step": 1413 |
| }, |
| { |
| "epoch": 3.213960546282246, |
| "grad_norm": 0.7649375200271606, |
| "learning_rate": 2.3544087285960975e-06, |
| "loss": 1.1848, |
| "step": 1414 |
| }, |
| { |
| "epoch": 3.216236722306525, |
| "grad_norm": 0.7349463701248169, |
| "learning_rate": 2.3513146613385603e-06, |
| "loss": 1.1557, |
| "step": 1415 |
| }, |
| { |
| "epoch": 3.2185128983308045, |
| "grad_norm": 0.756278395652771, |
| "learning_rate": 2.348220822617021e-06, |
| "loss": 1.2136, |
| "step": 1416 |
| }, |
| { |
| "epoch": 3.2207890743550833, |
| "grad_norm": 0.7582489252090454, |
| "learning_rate": 2.345127217186846e-06, |
| "loss": 1.1606, |
| "step": 1417 |
| }, |
| { |
| "epoch": 3.2230652503793626, |
| "grad_norm": 0.7773423194885254, |
| "learning_rate": 2.3420338498030445e-06, |
| "loss": 1.1465, |
| "step": 1418 |
| }, |
| { |
| "epoch": 3.225341426403642, |
| "grad_norm": 0.7887737154960632, |
| "learning_rate": 2.3389407252202607e-06, |
| "loss": 1.1791, |
| "step": 1419 |
| }, |
| { |
| "epoch": 3.2276176024279213, |
| "grad_norm": 0.7461211681365967, |
| "learning_rate": 2.3358478481927657e-06, |
| "loss": 1.1888, |
| "step": 1420 |
| }, |
| { |
| "epoch": 3.2298937784522, |
| "grad_norm": 0.7704999446868896, |
| "learning_rate": 2.332755223474448e-06, |
| "loss": 1.1494, |
| "step": 1421 |
| }, |
| { |
| "epoch": 3.2321699544764795, |
| "grad_norm": 0.7470236420631409, |
| "learning_rate": 2.329662855818811e-06, |
| "loss": 1.1556, |
| "step": 1422 |
| }, |
| { |
| "epoch": 3.234446130500759, |
| "grad_norm": 0.7861848473548889, |
| "learning_rate": 2.32657074997896e-06, |
| "loss": 1.1904, |
| "step": 1423 |
| }, |
| { |
| "epoch": 3.236722306525038, |
| "grad_norm": 0.7543413043022156, |
| "learning_rate": 2.323478910707602e-06, |
| "loss": 1.1813, |
| "step": 1424 |
| }, |
| { |
| "epoch": 3.238998482549317, |
| "grad_norm": 0.7611058950424194, |
| "learning_rate": 2.3203873427570305e-06, |
| "loss": 1.1658, |
| "step": 1425 |
| }, |
| { |
| "epoch": 3.2412746585735963, |
| "grad_norm": 0.7355452179908752, |
| "learning_rate": 2.3172960508791225e-06, |
| "loss": 1.1899, |
| "step": 1426 |
| }, |
| { |
| "epoch": 3.2435508345978756, |
| "grad_norm": 0.7388221025466919, |
| "learning_rate": 2.314205039825333e-06, |
| "loss": 1.1881, |
| "step": 1427 |
| }, |
| { |
| "epoch": 3.245827010622155, |
| "grad_norm": 0.7748705744743347, |
| "learning_rate": 2.3111143143466836e-06, |
| "loss": 1.2042, |
| "step": 1428 |
| }, |
| { |
| "epoch": 3.248103186646434, |
| "grad_norm": 0.7724549770355225, |
| "learning_rate": 2.308023879193756e-06, |
| "loss": 1.183, |
| "step": 1429 |
| }, |
| { |
| "epoch": 3.250379362670713, |
| "grad_norm": 0.7296189665794373, |
| "learning_rate": 2.3049337391166884e-06, |
| "loss": 1.1326, |
| "step": 1430 |
| }, |
| { |
| "epoch": 3.2526555386949925, |
| "grad_norm": 0.7788570523262024, |
| "learning_rate": 2.3018438988651628e-06, |
| "loss": 1.1718, |
| "step": 1431 |
| }, |
| { |
| "epoch": 3.254931714719272, |
| "grad_norm": 0.7668931484222412, |
| "learning_rate": 2.2987543631884014e-06, |
| "loss": 1.1742, |
| "step": 1432 |
| }, |
| { |
| "epoch": 3.2572078907435507, |
| "grad_norm": 0.7767664790153503, |
| "learning_rate": 2.2956651368351597e-06, |
| "loss": 1.2012, |
| "step": 1433 |
| }, |
| { |
| "epoch": 3.25948406676783, |
| "grad_norm": 0.7626579999923706, |
| "learning_rate": 2.2925762245537135e-06, |
| "loss": 1.1495, |
| "step": 1434 |
| }, |
| { |
| "epoch": 3.2617602427921093, |
| "grad_norm": 0.7605862021446228, |
| "learning_rate": 2.289487631091863e-06, |
| "loss": 1.1791, |
| "step": 1435 |
| }, |
| { |
| "epoch": 3.2640364188163886, |
| "grad_norm": 0.7632426023483276, |
| "learning_rate": 2.2863993611969105e-06, |
| "loss": 1.1704, |
| "step": 1436 |
| }, |
| { |
| "epoch": 3.2663125948406675, |
| "grad_norm": 0.741607129573822, |
| "learning_rate": 2.2833114196156657e-06, |
| "loss": 1.162, |
| "step": 1437 |
| }, |
| { |
| "epoch": 3.268588770864947, |
| "grad_norm": 0.7359287142753601, |
| "learning_rate": 2.2802238110944335e-06, |
| "loss": 1.1825, |
| "step": 1438 |
| }, |
| { |
| "epoch": 3.270864946889226, |
| "grad_norm": 0.798477053642273, |
| "learning_rate": 2.2771365403790046e-06, |
| "loss": 1.1829, |
| "step": 1439 |
| }, |
| { |
| "epoch": 3.2731411229135055, |
| "grad_norm": 0.777813196182251, |
| "learning_rate": 2.274049612214652e-06, |
| "loss": 1.1611, |
| "step": 1440 |
| }, |
| { |
| "epoch": 3.2754172989377848, |
| "grad_norm": 0.7524014115333557, |
| "learning_rate": 2.2709630313461224e-06, |
| "loss": 1.1774, |
| "step": 1441 |
| }, |
| { |
| "epoch": 3.2776934749620636, |
| "grad_norm": 0.7729030847549438, |
| "learning_rate": 2.267876802517628e-06, |
| "loss": 1.1656, |
| "step": 1442 |
| }, |
| { |
| "epoch": 3.279969650986343, |
| "grad_norm": 0.7829767465591431, |
| "learning_rate": 2.2647909304728394e-06, |
| "loss": 1.1561, |
| "step": 1443 |
| }, |
| { |
| "epoch": 3.2822458270106223, |
| "grad_norm": 0.7700682282447815, |
| "learning_rate": 2.261705419954882e-06, |
| "loss": 1.17, |
| "step": 1444 |
| }, |
| { |
| "epoch": 3.284522003034901, |
| "grad_norm": 0.7718088626861572, |
| "learning_rate": 2.258620275706319e-06, |
| "loss": 1.1421, |
| "step": 1445 |
| }, |
| { |
| "epoch": 3.2867981790591805, |
| "grad_norm": 0.775188148021698, |
| "learning_rate": 2.255535502469159e-06, |
| "loss": 1.2197, |
| "step": 1446 |
| }, |
| { |
| "epoch": 3.28907435508346, |
| "grad_norm": 0.7680513858795166, |
| "learning_rate": 2.2524511049848335e-06, |
| "loss": 1.2133, |
| "step": 1447 |
| }, |
| { |
| "epoch": 3.291350531107739, |
| "grad_norm": 0.7592117786407471, |
| "learning_rate": 2.2493670879941996e-06, |
| "loss": 1.1552, |
| "step": 1448 |
| }, |
| { |
| "epoch": 3.2936267071320184, |
| "grad_norm": 0.748960554599762, |
| "learning_rate": 2.246283456237529e-06, |
| "loss": 1.1759, |
| "step": 1449 |
| }, |
| { |
| "epoch": 3.2959028831562973, |
| "grad_norm": 0.7754799723625183, |
| "learning_rate": 2.2432002144545015e-06, |
| "loss": 1.1703, |
| "step": 1450 |
| }, |
| { |
| "epoch": 3.2981790591805766, |
| "grad_norm": 0.757481038570404, |
| "learning_rate": 2.2401173673841963e-06, |
| "loss": 1.1892, |
| "step": 1451 |
| }, |
| { |
| "epoch": 3.300455235204856, |
| "grad_norm": 0.7509388327598572, |
| "learning_rate": 2.2370349197650877e-06, |
| "loss": 1.2018, |
| "step": 1452 |
| }, |
| { |
| "epoch": 3.302731411229135, |
| "grad_norm": 0.7432618141174316, |
| "learning_rate": 2.2339528763350353e-06, |
| "loss": 1.1659, |
| "step": 1453 |
| }, |
| { |
| "epoch": 3.305007587253414, |
| "grad_norm": 0.7313361763954163, |
| "learning_rate": 2.230871241831276e-06, |
| "loss": 1.1701, |
| "step": 1454 |
| }, |
| { |
| "epoch": 3.3072837632776935, |
| "grad_norm": 0.7643156051635742, |
| "learning_rate": 2.2277900209904215e-06, |
| "loss": 1.145, |
| "step": 1455 |
| }, |
| { |
| "epoch": 3.309559939301973, |
| "grad_norm": 0.7757098078727722, |
| "learning_rate": 2.224709218548443e-06, |
| "loss": 1.1609, |
| "step": 1456 |
| }, |
| { |
| "epoch": 3.311836115326252, |
| "grad_norm": 0.7871479392051697, |
| "learning_rate": 2.221628839240674e-06, |
| "loss": 1.1922, |
| "step": 1457 |
| }, |
| { |
| "epoch": 3.314112291350531, |
| "grad_norm": 0.774569034576416, |
| "learning_rate": 2.2185488878017934e-06, |
| "loss": 1.1608, |
| "step": 1458 |
| }, |
| { |
| "epoch": 3.3163884673748103, |
| "grad_norm": 0.7552246451377869, |
| "learning_rate": 2.215469368965824e-06, |
| "loss": 1.1586, |
| "step": 1459 |
| }, |
| { |
| "epoch": 3.3186646433990896, |
| "grad_norm": 0.7549350261688232, |
| "learning_rate": 2.2123902874661237e-06, |
| "loss": 1.2161, |
| "step": 1460 |
| }, |
| { |
| "epoch": 3.3209408194233685, |
| "grad_norm": 0.7435033917427063, |
| "learning_rate": 2.2093116480353785e-06, |
| "loss": 1.1543, |
| "step": 1461 |
| }, |
| { |
| "epoch": 3.323216995447648, |
| "grad_norm": 0.7678859233856201, |
| "learning_rate": 2.2062334554055937e-06, |
| "loss": 1.1509, |
| "step": 1462 |
| }, |
| { |
| "epoch": 3.325493171471927, |
| "grad_norm": 0.7754087448120117, |
| "learning_rate": 2.2031557143080896e-06, |
| "loss": 1.1821, |
| "step": 1463 |
| }, |
| { |
| "epoch": 3.3277693474962065, |
| "grad_norm": 0.7418244481086731, |
| "learning_rate": 2.2000784294734896e-06, |
| "loss": 1.1803, |
| "step": 1464 |
| }, |
| { |
| "epoch": 3.3300455235204858, |
| "grad_norm": 0.7413983345031738, |
| "learning_rate": 2.1970016056317202e-06, |
| "loss": 1.1761, |
| "step": 1465 |
| }, |
| { |
| "epoch": 3.3323216995447646, |
| "grad_norm": 0.7306979894638062, |
| "learning_rate": 2.193925247511996e-06, |
| "loss": 1.1418, |
| "step": 1466 |
| }, |
| { |
| "epoch": 3.334597875569044, |
| "grad_norm": 0.7656619548797607, |
| "learning_rate": 2.190849359842816e-06, |
| "loss": 1.1635, |
| "step": 1467 |
| }, |
| { |
| "epoch": 3.3368740515933233, |
| "grad_norm": 0.7424888610839844, |
| "learning_rate": 2.1877739473519575e-06, |
| "loss": 1.1551, |
| "step": 1468 |
| }, |
| { |
| "epoch": 3.3391502276176026, |
| "grad_norm": 0.7459004521369934, |
| "learning_rate": 2.184699014766466e-06, |
| "loss": 1.1799, |
| "step": 1469 |
| }, |
| { |
| "epoch": 3.3414264036418815, |
| "grad_norm": 0.7800491452217102, |
| "learning_rate": 2.1816245668126506e-06, |
| "loss": 1.1304, |
| "step": 1470 |
| }, |
| { |
| "epoch": 3.343702579666161, |
| "grad_norm": 0.7625274658203125, |
| "learning_rate": 2.1785506082160745e-06, |
| "loss": 1.1869, |
| "step": 1471 |
| }, |
| { |
| "epoch": 3.34597875569044, |
| "grad_norm": 0.7717932462692261, |
| "learning_rate": 2.1754771437015495e-06, |
| "loss": 1.1893, |
| "step": 1472 |
| }, |
| { |
| "epoch": 3.3482549317147194, |
| "grad_norm": 0.7545213103294373, |
| "learning_rate": 2.1724041779931266e-06, |
| "loss": 1.1929, |
| "step": 1473 |
| }, |
| { |
| "epoch": 3.3505311077389983, |
| "grad_norm": 0.7686711549758911, |
| "learning_rate": 2.169331715814093e-06, |
| "loss": 1.1781, |
| "step": 1474 |
| }, |
| { |
| "epoch": 3.3528072837632776, |
| "grad_norm": 0.7366244792938232, |
| "learning_rate": 2.1662597618869574e-06, |
| "loss": 1.1401, |
| "step": 1475 |
| }, |
| { |
| "epoch": 3.355083459787557, |
| "grad_norm": 0.7632220387458801, |
| "learning_rate": 2.163188320933453e-06, |
| "loss": 1.1431, |
| "step": 1476 |
| }, |
| { |
| "epoch": 3.3573596358118363, |
| "grad_norm": 0.7904044389724731, |
| "learning_rate": 2.1601173976745205e-06, |
| "loss": 1.2071, |
| "step": 1477 |
| }, |
| { |
| "epoch": 3.359635811836115, |
| "grad_norm": 0.7487012147903442, |
| "learning_rate": 2.157046996830304e-06, |
| "loss": 1.1827, |
| "step": 1478 |
| }, |
| { |
| "epoch": 3.3619119878603945, |
| "grad_norm": 0.7721722722053528, |
| "learning_rate": 2.1539771231201497e-06, |
| "loss": 1.1984, |
| "step": 1479 |
| }, |
| { |
| "epoch": 3.364188163884674, |
| "grad_norm": 0.7438533902168274, |
| "learning_rate": 2.1509077812625885e-06, |
| "loss": 1.161, |
| "step": 1480 |
| }, |
| { |
| "epoch": 3.366464339908953, |
| "grad_norm": 0.7563179731369019, |
| "learning_rate": 2.147838975975335e-06, |
| "loss": 1.182, |
| "step": 1481 |
| }, |
| { |
| "epoch": 3.368740515933232, |
| "grad_norm": 0.7858923673629761, |
| "learning_rate": 2.1447707119752817e-06, |
| "loss": 1.2036, |
| "step": 1482 |
| }, |
| { |
| "epoch": 3.3710166919575113, |
| "grad_norm": 0.7806487083435059, |
| "learning_rate": 2.141702993978486e-06, |
| "loss": 1.1444, |
| "step": 1483 |
| }, |
| { |
| "epoch": 3.3732928679817906, |
| "grad_norm": 0.7645002603530884, |
| "learning_rate": 2.138635826700167e-06, |
| "loss": 1.1818, |
| "step": 1484 |
| }, |
| { |
| "epoch": 3.37556904400607, |
| "grad_norm": 0.7865437865257263, |
| "learning_rate": 2.1355692148546993e-06, |
| "loss": 1.1859, |
| "step": 1485 |
| }, |
| { |
| "epoch": 3.3778452200303493, |
| "grad_norm": 0.7360503673553467, |
| "learning_rate": 2.1325031631555996e-06, |
| "loss": 1.175, |
| "step": 1486 |
| }, |
| { |
| "epoch": 3.380121396054628, |
| "grad_norm": 0.7604530453681946, |
| "learning_rate": 2.1294376763155284e-06, |
| "loss": 1.1844, |
| "step": 1487 |
| }, |
| { |
| "epoch": 3.3823975720789075, |
| "grad_norm": 0.7645239233970642, |
| "learning_rate": 2.1263727590462747e-06, |
| "loss": 1.1488, |
| "step": 1488 |
| }, |
| { |
| "epoch": 3.3846737481031868, |
| "grad_norm": 0.7591574788093567, |
| "learning_rate": 2.1233084160587524e-06, |
| "loss": 1.1975, |
| "step": 1489 |
| }, |
| { |
| "epoch": 3.3869499241274656, |
| "grad_norm": 0.7940670847892761, |
| "learning_rate": 2.1202446520629945e-06, |
| "loss": 1.1756, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.389226100151745, |
| "grad_norm": 0.76637864112854, |
| "learning_rate": 2.117181471768143e-06, |
| "loss": 1.1533, |
| "step": 1491 |
| }, |
| { |
| "epoch": 3.3915022761760243, |
| "grad_norm": 0.8150556683540344, |
| "learning_rate": 2.1141188798824404e-06, |
| "loss": 1.2104, |
| "step": 1492 |
| }, |
| { |
| "epoch": 3.3937784522003036, |
| "grad_norm": 0.7532956600189209, |
| "learning_rate": 2.11105688111323e-06, |
| "loss": 1.1345, |
| "step": 1493 |
| }, |
| { |
| "epoch": 3.396054628224583, |
| "grad_norm": 0.7805771827697754, |
| "learning_rate": 2.107995480166937e-06, |
| "loss": 1.1963, |
| "step": 1494 |
| }, |
| { |
| "epoch": 3.398330804248862, |
| "grad_norm": 0.8066619634628296, |
| "learning_rate": 2.1049346817490756e-06, |
| "loss": 1.171, |
| "step": 1495 |
| }, |
| { |
| "epoch": 3.400606980273141, |
| "grad_norm": 0.7702916264533997, |
| "learning_rate": 2.101874490564227e-06, |
| "loss": 1.191, |
| "step": 1496 |
| }, |
| { |
| "epoch": 3.4028831562974204, |
| "grad_norm": 0.7325629591941833, |
| "learning_rate": 2.0988149113160395e-06, |
| "loss": 1.1501, |
| "step": 1497 |
| }, |
| { |
| "epoch": 3.4051593323216993, |
| "grad_norm": 0.7644321918487549, |
| "learning_rate": 2.095755948707227e-06, |
| "loss": 1.1714, |
| "step": 1498 |
| }, |
| { |
| "epoch": 3.4074355083459786, |
| "grad_norm": 0.7521936893463135, |
| "learning_rate": 2.092697607439549e-06, |
| "loss": 1.1995, |
| "step": 1499 |
| }, |
| { |
| "epoch": 3.409711684370258, |
| "grad_norm": 0.7865051627159119, |
| "learning_rate": 2.0896398922138124e-06, |
| "loss": 1.1151, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.4119878603945373, |
| "grad_norm": 0.74745774269104, |
| "learning_rate": 2.086582807729863e-06, |
| "loss": 1.1869, |
| "step": 1501 |
| }, |
| { |
| "epoch": 3.4142640364188166, |
| "grad_norm": 0.7475553750991821, |
| "learning_rate": 2.083526358686575e-06, |
| "loss": 1.147, |
| "step": 1502 |
| }, |
| { |
| "epoch": 3.4165402124430955, |
| "grad_norm": 0.7867413759231567, |
| "learning_rate": 2.0804705497818466e-06, |
| "loss": 1.1804, |
| "step": 1503 |
| }, |
| { |
| "epoch": 3.418816388467375, |
| "grad_norm": 0.7480242252349854, |
| "learning_rate": 2.077415385712594e-06, |
| "loss": 1.1545, |
| "step": 1504 |
| }, |
| { |
| "epoch": 3.421092564491654, |
| "grad_norm": 0.7600266933441162, |
| "learning_rate": 2.0743608711747383e-06, |
| "loss": 1.1668, |
| "step": 1505 |
| }, |
| { |
| "epoch": 3.423368740515933, |
| "grad_norm": 0.7508987188339233, |
| "learning_rate": 2.0713070108632072e-06, |
| "loss": 1.1793, |
| "step": 1506 |
| }, |
| { |
| "epoch": 3.4256449165402123, |
| "grad_norm": 0.7640750408172607, |
| "learning_rate": 2.0682538094719183e-06, |
| "loss": 1.1797, |
| "step": 1507 |
| }, |
| { |
| "epoch": 3.4279210925644916, |
| "grad_norm": 0.7698054313659668, |
| "learning_rate": 2.065201271693779e-06, |
| "loss": 1.1753, |
| "step": 1508 |
| }, |
| { |
| "epoch": 3.430197268588771, |
| "grad_norm": 0.7779369354248047, |
| "learning_rate": 2.0621494022206758e-06, |
| "loss": 1.1866, |
| "step": 1509 |
| }, |
| { |
| "epoch": 3.4324734446130503, |
| "grad_norm": 0.7479546666145325, |
| "learning_rate": 2.0590982057434684e-06, |
| "loss": 1.1501, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.434749620637329, |
| "grad_norm": 0.7484161853790283, |
| "learning_rate": 2.0560476869519815e-06, |
| "loss": 1.172, |
| "step": 1511 |
| }, |
| { |
| "epoch": 3.4370257966616085, |
| "grad_norm": 0.7611399292945862, |
| "learning_rate": 2.052997850534999e-06, |
| "loss": 1.1624, |
| "step": 1512 |
| }, |
| { |
| "epoch": 3.4393019726858878, |
| "grad_norm": 0.7756854295730591, |
| "learning_rate": 2.0499487011802554e-06, |
| "loss": 1.1852, |
| "step": 1513 |
| }, |
| { |
| "epoch": 3.441578148710167, |
| "grad_norm": 0.783087968826294, |
| "learning_rate": 2.0469002435744285e-06, |
| "loss": 1.1806, |
| "step": 1514 |
| }, |
| { |
| "epoch": 3.443854324734446, |
| "grad_norm": 0.8397075533866882, |
| "learning_rate": 2.0438524824031346e-06, |
| "loss": 1.1927, |
| "step": 1515 |
| }, |
| { |
| "epoch": 3.4461305007587253, |
| "grad_norm": 0.7895216941833496, |
| "learning_rate": 2.0408054223509173e-06, |
| "loss": 1.158, |
| "step": 1516 |
| }, |
| { |
| "epoch": 3.4484066767830046, |
| "grad_norm": 0.7796897292137146, |
| "learning_rate": 2.0377590681012454e-06, |
| "loss": 1.1838, |
| "step": 1517 |
| }, |
| { |
| "epoch": 3.450682852807284, |
| "grad_norm": 0.7745197415351868, |
| "learning_rate": 2.0347134243365e-06, |
| "loss": 1.1581, |
| "step": 1518 |
| }, |
| { |
| "epoch": 3.452959028831563, |
| "grad_norm": 0.7631146907806396, |
| "learning_rate": 2.031668495737972e-06, |
| "loss": 1.2247, |
| "step": 1519 |
| }, |
| { |
| "epoch": 3.455235204855842, |
| "grad_norm": 0.7887859344482422, |
| "learning_rate": 2.0286242869858525e-06, |
| "loss": 1.1955, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.4575113808801214, |
| "grad_norm": 0.7753341794013977, |
| "learning_rate": 2.0255808027592263e-06, |
| "loss": 1.2184, |
| "step": 1521 |
| }, |
| { |
| "epoch": 3.4597875569044008, |
| "grad_norm": 0.8212582468986511, |
| "learning_rate": 2.022538047736063e-06, |
| "loss": 1.1923, |
| "step": 1522 |
| }, |
| { |
| "epoch": 3.4620637329286796, |
| "grad_norm": 0.7870779037475586, |
| "learning_rate": 2.019496026593214e-06, |
| "loss": 1.2015, |
| "step": 1523 |
| }, |
| { |
| "epoch": 3.464339908952959, |
| "grad_norm": 0.7596839070320129, |
| "learning_rate": 2.0164547440064017e-06, |
| "loss": 1.1863, |
| "step": 1524 |
| }, |
| { |
| "epoch": 3.4666160849772383, |
| "grad_norm": 0.7594588994979858, |
| "learning_rate": 2.0134142046502112e-06, |
| "loss": 1.1413, |
| "step": 1525 |
| }, |
| { |
| "epoch": 3.4688922610015176, |
| "grad_norm": 0.7960179448127747, |
| "learning_rate": 2.0103744131980906e-06, |
| "loss": 1.1916, |
| "step": 1526 |
| }, |
| { |
| "epoch": 3.4711684370257965, |
| "grad_norm": 0.7636160850524902, |
| "learning_rate": 2.007335374322331e-06, |
| "loss": 1.1859, |
| "step": 1527 |
| }, |
| { |
| "epoch": 3.473444613050076, |
| "grad_norm": 0.8009867072105408, |
| "learning_rate": 2.004297092694073e-06, |
| "loss": 1.1821, |
| "step": 1528 |
| }, |
| { |
| "epoch": 3.475720789074355, |
| "grad_norm": 0.7843029499053955, |
| "learning_rate": 2.001259572983291e-06, |
| "loss": 1.1519, |
| "step": 1529 |
| }, |
| { |
| "epoch": 3.4779969650986344, |
| "grad_norm": 0.7983155250549316, |
| "learning_rate": 1.998222819858787e-06, |
| "loss": 1.1777, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.4802731411229137, |
| "grad_norm": 0.7752939462661743, |
| "learning_rate": 1.995186837988187e-06, |
| "loss": 1.176, |
| "step": 1531 |
| }, |
| { |
| "epoch": 3.4825493171471926, |
| "grad_norm": 0.7518580555915833, |
| "learning_rate": 1.9921516320379306e-06, |
| "loss": 1.1867, |
| "step": 1532 |
| }, |
| { |
| "epoch": 3.484825493171472, |
| "grad_norm": 0.7821219563484192, |
| "learning_rate": 1.989117206673264e-06, |
| "loss": 1.1544, |
| "step": 1533 |
| }, |
| { |
| "epoch": 3.4871016691957513, |
| "grad_norm": 0.7886951565742493, |
| "learning_rate": 1.9860835665582343e-06, |
| "loss": 1.1812, |
| "step": 1534 |
| }, |
| { |
| "epoch": 3.48937784522003, |
| "grad_norm": 0.7971673607826233, |
| "learning_rate": 1.9830507163556815e-06, |
| "loss": 1.1644, |
| "step": 1535 |
| }, |
| { |
| "epoch": 3.4916540212443095, |
| "grad_norm": 0.7841876149177551, |
| "learning_rate": 1.9800186607272333e-06, |
| "loss": 1.1841, |
| "step": 1536 |
| }, |
| { |
| "epoch": 3.4939301972685888, |
| "grad_norm": 0.760624349117279, |
| "learning_rate": 1.9769874043332934e-06, |
| "loss": 1.1542, |
| "step": 1537 |
| }, |
| { |
| "epoch": 3.496206373292868, |
| "grad_norm": 0.8207852244377136, |
| "learning_rate": 1.9739569518330364e-06, |
| "loss": 1.2127, |
| "step": 1538 |
| }, |
| { |
| "epoch": 3.4984825493171474, |
| "grad_norm": 0.7697487473487854, |
| "learning_rate": 1.9709273078844054e-06, |
| "loss": 1.1335, |
| "step": 1539 |
| }, |
| { |
| "epoch": 3.5007587253414263, |
| "grad_norm": 0.7819302082061768, |
| "learning_rate": 1.9678984771440974e-06, |
| "loss": 1.1826, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.5030349013657056, |
| "grad_norm": 0.818231999874115, |
| "learning_rate": 1.96487046426756e-06, |
| "loss": 1.1659, |
| "step": 1541 |
| }, |
| { |
| "epoch": 3.505311077389985, |
| "grad_norm": 0.7885006070137024, |
| "learning_rate": 1.9618432739089843e-06, |
| "loss": 1.1239, |
| "step": 1542 |
| }, |
| { |
| "epoch": 3.507587253414264, |
| "grad_norm": 0.7705325484275818, |
| "learning_rate": 1.9588169107212968e-06, |
| "loss": 1.1393, |
| "step": 1543 |
| }, |
| { |
| "epoch": 3.509863429438543, |
| "grad_norm": 0.7711304426193237, |
| "learning_rate": 1.955791379356152e-06, |
| "loss": 1.1612, |
| "step": 1544 |
| }, |
| { |
| "epoch": 3.5121396054628224, |
| "grad_norm": 0.7557392716407776, |
| "learning_rate": 1.952766684463929e-06, |
| "loss": 1.184, |
| "step": 1545 |
| }, |
| { |
| "epoch": 3.5144157814871018, |
| "grad_norm": 0.7863343358039856, |
| "learning_rate": 1.9497428306937148e-06, |
| "loss": 1.1757, |
| "step": 1546 |
| }, |
| { |
| "epoch": 3.516691957511381, |
| "grad_norm": 0.7670086622238159, |
| "learning_rate": 1.946719822693311e-06, |
| "loss": 1.192, |
| "step": 1547 |
| }, |
| { |
| "epoch": 3.51896813353566, |
| "grad_norm": 0.7611085176467896, |
| "learning_rate": 1.9436976651092143e-06, |
| "loss": 1.1697, |
| "step": 1548 |
| }, |
| { |
| "epoch": 3.5212443095599393, |
| "grad_norm": 0.7767881155014038, |
| "learning_rate": 1.9406763625866155e-06, |
| "loss": 1.1844, |
| "step": 1549 |
| }, |
| { |
| "epoch": 3.5235204855842186, |
| "grad_norm": 0.792441189289093, |
| "learning_rate": 1.937655919769392e-06, |
| "loss": 1.1898, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.5257966616084975, |
| "grad_norm": 0.7552328705787659, |
| "learning_rate": 1.9346363413000988e-06, |
| "loss": 1.1162, |
| "step": 1551 |
| }, |
| { |
| "epoch": 3.528072837632777, |
| "grad_norm": 0.7915894389152527, |
| "learning_rate": 1.931617631819962e-06, |
| "loss": 1.1508, |
| "step": 1552 |
| }, |
| { |
| "epoch": 3.530349013657056, |
| "grad_norm": 0.7816686034202576, |
| "learning_rate": 1.9285997959688742e-06, |
| "loss": 1.1886, |
| "step": 1553 |
| }, |
| { |
| "epoch": 3.5326251896813354, |
| "grad_norm": 0.7791935205459595, |
| "learning_rate": 1.9255828383853822e-06, |
| "loss": 1.1651, |
| "step": 1554 |
| }, |
| { |
| "epoch": 3.5349013657056148, |
| "grad_norm": 0.7524689435958862, |
| "learning_rate": 1.9225667637066845e-06, |
| "loss": 1.1455, |
| "step": 1555 |
| }, |
| { |
| "epoch": 3.5371775417298936, |
| "grad_norm": 0.7957069873809814, |
| "learning_rate": 1.9195515765686237e-06, |
| "loss": 1.1811, |
| "step": 1556 |
| }, |
| { |
| "epoch": 3.539453717754173, |
| "grad_norm": 0.7892540693283081, |
| "learning_rate": 1.916537281605675e-06, |
| "loss": 1.1673, |
| "step": 1557 |
| }, |
| { |
| "epoch": 3.5417298937784523, |
| "grad_norm": 0.7897363305091858, |
| "learning_rate": 1.913523883450946e-06, |
| "loss": 1.2002, |
| "step": 1558 |
| }, |
| { |
| "epoch": 3.544006069802731, |
| "grad_norm": 0.7709572315216064, |
| "learning_rate": 1.9105113867361633e-06, |
| "loss": 1.1636, |
| "step": 1559 |
| }, |
| { |
| "epoch": 3.5462822458270105, |
| "grad_norm": 0.8005422353744507, |
| "learning_rate": 1.907499796091668e-06, |
| "loss": 1.1738, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.54855842185129, |
| "grad_norm": 0.803570032119751, |
| "learning_rate": 1.9044891161464108e-06, |
| "loss": 1.1728, |
| "step": 1561 |
| }, |
| { |
| "epoch": 3.550834597875569, |
| "grad_norm": 0.8024869561195374, |
| "learning_rate": 1.90147935152794e-06, |
| "loss": 1.1666, |
| "step": 1562 |
| }, |
| { |
| "epoch": 3.5531107738998484, |
| "grad_norm": 0.7676922082901001, |
| "learning_rate": 1.8984705068623976e-06, |
| "loss": 1.1665, |
| "step": 1563 |
| }, |
| { |
| "epoch": 3.5553869499241273, |
| "grad_norm": 0.7918079495429993, |
| "learning_rate": 1.895462586774513e-06, |
| "loss": 1.162, |
| "step": 1564 |
| }, |
| { |
| "epoch": 3.5576631259484066, |
| "grad_norm": 0.7807730436325073, |
| "learning_rate": 1.8924555958875923e-06, |
| "loss": 1.1971, |
| "step": 1565 |
| }, |
| { |
| "epoch": 3.559939301972686, |
| "grad_norm": 0.7498663067817688, |
| "learning_rate": 1.8894495388235165e-06, |
| "loss": 1.1726, |
| "step": 1566 |
| }, |
| { |
| "epoch": 3.5622154779969653, |
| "grad_norm": 0.7693403959274292, |
| "learning_rate": 1.8864444202027287e-06, |
| "loss": 1.1912, |
| "step": 1567 |
| }, |
| { |
| "epoch": 3.5644916540212446, |
| "grad_norm": 0.7785525918006897, |
| "learning_rate": 1.8834402446442284e-06, |
| "loss": 1.1865, |
| "step": 1568 |
| }, |
| { |
| "epoch": 3.5667678300455234, |
| "grad_norm": 0.7578516006469727, |
| "learning_rate": 1.88043701676557e-06, |
| "loss": 1.1451, |
| "step": 1569 |
| }, |
| { |
| "epoch": 3.5690440060698028, |
| "grad_norm": 0.7615971565246582, |
| "learning_rate": 1.8774347411828472e-06, |
| "loss": 1.2075, |
| "step": 1570 |
| }, |
| { |
| "epoch": 3.571320182094082, |
| "grad_norm": 0.7744317054748535, |
| "learning_rate": 1.8744334225106917e-06, |
| "loss": 1.2109, |
| "step": 1571 |
| }, |
| { |
| "epoch": 3.573596358118361, |
| "grad_norm": 0.749718427658081, |
| "learning_rate": 1.8714330653622645e-06, |
| "loss": 1.163, |
| "step": 1572 |
| }, |
| { |
| "epoch": 3.5758725341426403, |
| "grad_norm": 0.7745640873908997, |
| "learning_rate": 1.8684336743492481e-06, |
| "loss": 1.1909, |
| "step": 1573 |
| }, |
| { |
| "epoch": 3.5781487101669196, |
| "grad_norm": 0.7815781235694885, |
| "learning_rate": 1.8654352540818398e-06, |
| "loss": 1.225, |
| "step": 1574 |
| }, |
| { |
| "epoch": 3.580424886191199, |
| "grad_norm": 0.7583136558532715, |
| "learning_rate": 1.862437809168746e-06, |
| "loss": 1.1589, |
| "step": 1575 |
| }, |
| { |
| "epoch": 3.5827010622154782, |
| "grad_norm": 0.7747395038604736, |
| "learning_rate": 1.8594413442171722e-06, |
| "loss": 1.1941, |
| "step": 1576 |
| }, |
| { |
| "epoch": 3.584977238239757, |
| "grad_norm": 0.7689574956893921, |
| "learning_rate": 1.8564458638328203e-06, |
| "loss": 1.1382, |
| "step": 1577 |
| }, |
| { |
| "epoch": 3.5872534142640364, |
| "grad_norm": 0.7516262531280518, |
| "learning_rate": 1.8534513726198773e-06, |
| "loss": 1.1587, |
| "step": 1578 |
| }, |
| { |
| "epoch": 3.5895295902883158, |
| "grad_norm": 0.7683000564575195, |
| "learning_rate": 1.8504578751810066e-06, |
| "loss": 1.211, |
| "step": 1579 |
| }, |
| { |
| "epoch": 3.5918057663125946, |
| "grad_norm": 0.7480950951576233, |
| "learning_rate": 1.8474653761173506e-06, |
| "loss": 1.1418, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.594081942336874, |
| "grad_norm": 0.7797414064407349, |
| "learning_rate": 1.8444738800285128e-06, |
| "loss": 1.1673, |
| "step": 1581 |
| }, |
| { |
| "epoch": 3.5963581183611533, |
| "grad_norm": 0.7567870020866394, |
| "learning_rate": 1.8414833915125554e-06, |
| "loss": 1.1997, |
| "step": 1582 |
| }, |
| { |
| "epoch": 3.5986342943854326, |
| "grad_norm": 0.7724438905715942, |
| "learning_rate": 1.8384939151659936e-06, |
| "loss": 1.2075, |
| "step": 1583 |
| }, |
| { |
| "epoch": 3.600910470409712, |
| "grad_norm": 0.7550652623176575, |
| "learning_rate": 1.835505455583786e-06, |
| "loss": 1.1633, |
| "step": 1584 |
| }, |
| { |
| "epoch": 3.603186646433991, |
| "grad_norm": 0.7587825059890747, |
| "learning_rate": 1.8325180173593265e-06, |
| "loss": 1.1306, |
| "step": 1585 |
| }, |
| { |
| "epoch": 3.60546282245827, |
| "grad_norm": 0.76627117395401, |
| "learning_rate": 1.8295316050844428e-06, |
| "loss": 1.164, |
| "step": 1586 |
| }, |
| { |
| "epoch": 3.6077389984825494, |
| "grad_norm": 0.7493066191673279, |
| "learning_rate": 1.8265462233493819e-06, |
| "loss": 1.1746, |
| "step": 1587 |
| }, |
| { |
| "epoch": 3.6100151745068283, |
| "grad_norm": 0.7724924683570862, |
| "learning_rate": 1.823561876742811e-06, |
| "loss": 1.1918, |
| "step": 1588 |
| }, |
| { |
| "epoch": 3.6122913505311076, |
| "grad_norm": 0.7840549945831299, |
| "learning_rate": 1.8205785698518024e-06, |
| "loss": 1.1779, |
| "step": 1589 |
| }, |
| { |
| "epoch": 3.614567526555387, |
| "grad_norm": 0.7741526961326599, |
| "learning_rate": 1.817596307261832e-06, |
| "loss": 1.1731, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.6168437025796663, |
| "grad_norm": 0.7666813135147095, |
| "learning_rate": 1.8146150935567712e-06, |
| "loss": 1.2023, |
| "step": 1591 |
| }, |
| { |
| "epoch": 3.6191198786039456, |
| "grad_norm": 0.7450817227363586, |
| "learning_rate": 1.8116349333188775e-06, |
| "loss": 1.1614, |
| "step": 1592 |
| }, |
| { |
| "epoch": 3.6213960546282244, |
| "grad_norm": 0.7609484195709229, |
| "learning_rate": 1.80865583112879e-06, |
| "loss": 1.1923, |
| "step": 1593 |
| }, |
| { |
| "epoch": 3.6236722306525038, |
| "grad_norm": 0.7363880276679993, |
| "learning_rate": 1.8056777915655223e-06, |
| "loss": 1.1198, |
| "step": 1594 |
| }, |
| { |
| "epoch": 3.625948406676783, |
| "grad_norm": 0.7470148801803589, |
| "learning_rate": 1.8027008192064537e-06, |
| "loss": 1.1398, |
| "step": 1595 |
| }, |
| { |
| "epoch": 3.628224582701062, |
| "grad_norm": 0.7653167247772217, |
| "learning_rate": 1.7997249186273233e-06, |
| "loss": 1.1567, |
| "step": 1596 |
| }, |
| { |
| "epoch": 3.6305007587253413, |
| "grad_norm": 0.7713541984558105, |
| "learning_rate": 1.7967500944022237e-06, |
| "loss": 1.1461, |
| "step": 1597 |
| }, |
| { |
| "epoch": 3.6327769347496206, |
| "grad_norm": 0.7798824310302734, |
| "learning_rate": 1.7937763511035904e-06, |
| "loss": 1.1613, |
| "step": 1598 |
| }, |
| { |
| "epoch": 3.6350531107739, |
| "grad_norm": 0.7728201746940613, |
| "learning_rate": 1.7908036933022027e-06, |
| "loss": 1.1569, |
| "step": 1599 |
| }, |
| { |
| "epoch": 3.6373292867981792, |
| "grad_norm": 0.7966005802154541, |
| "learning_rate": 1.787832125567166e-06, |
| "loss": 1.1902, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.639605462822458, |
| "grad_norm": 0.7702760696411133, |
| "learning_rate": 1.7848616524659125e-06, |
| "loss": 1.1803, |
| "step": 1601 |
| }, |
| { |
| "epoch": 3.6418816388467374, |
| "grad_norm": 0.750785768032074, |
| "learning_rate": 1.781892278564193e-06, |
| "loss": 1.1602, |
| "step": 1602 |
| }, |
| { |
| "epoch": 3.6441578148710168, |
| "grad_norm": 0.777713418006897, |
| "learning_rate": 1.7789240084260668e-06, |
| "loss": 1.164, |
| "step": 1603 |
| }, |
| { |
| "epoch": 3.6464339908952956, |
| "grad_norm": 0.7839711904525757, |
| "learning_rate": 1.7759568466138966e-06, |
| "loss": 1.1698, |
| "step": 1604 |
| }, |
| { |
| "epoch": 3.648710166919575, |
| "grad_norm": 0.7893310189247131, |
| "learning_rate": 1.7729907976883443e-06, |
| "loss": 1.1634, |
| "step": 1605 |
| }, |
| { |
| "epoch": 3.6509863429438543, |
| "grad_norm": 0.7450976967811584, |
| "learning_rate": 1.7700258662083574e-06, |
| "loss": 1.1598, |
| "step": 1606 |
| }, |
| { |
| "epoch": 3.6532625189681336, |
| "grad_norm": 0.7937871217727661, |
| "learning_rate": 1.7670620567311696e-06, |
| "loss": 1.1632, |
| "step": 1607 |
| }, |
| { |
| "epoch": 3.655538694992413, |
| "grad_norm": 0.7774436473846436, |
| "learning_rate": 1.7640993738122886e-06, |
| "loss": 1.1634, |
| "step": 1608 |
| }, |
| { |
| "epoch": 3.657814871016692, |
| "grad_norm": 0.7838451266288757, |
| "learning_rate": 1.761137822005487e-06, |
| "loss": 1.1642, |
| "step": 1609 |
| }, |
| { |
| "epoch": 3.660091047040971, |
| "grad_norm": 0.7670826315879822, |
| "learning_rate": 1.7581774058628054e-06, |
| "loss": 1.1602, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.6623672230652504, |
| "grad_norm": 0.7767178416252136, |
| "learning_rate": 1.755218129934534e-06, |
| "loss": 1.154, |
| "step": 1611 |
| }, |
| { |
| "epoch": 3.6646433990895297, |
| "grad_norm": 0.8149465322494507, |
| "learning_rate": 1.7522599987692113e-06, |
| "loss": 1.2108, |
| "step": 1612 |
| }, |
| { |
| "epoch": 3.666919575113809, |
| "grad_norm": 0.780608594417572, |
| "learning_rate": 1.7493030169136183e-06, |
| "loss": 1.1816, |
| "step": 1613 |
| }, |
| { |
| "epoch": 3.669195751138088, |
| "grad_norm": 0.7886972427368164, |
| "learning_rate": 1.7463471889127673e-06, |
| "loss": 1.177, |
| "step": 1614 |
| }, |
| { |
| "epoch": 3.6714719271623673, |
| "grad_norm": 0.8002546429634094, |
| "learning_rate": 1.743392519309897e-06, |
| "loss": 1.2167, |
| "step": 1615 |
| }, |
| { |
| "epoch": 3.6737481031866466, |
| "grad_norm": 0.8180500864982605, |
| "learning_rate": 1.7404390126464676e-06, |
| "loss": 1.1524, |
| "step": 1616 |
| }, |
| { |
| "epoch": 3.6760242792109254, |
| "grad_norm": 0.7811232209205627, |
| "learning_rate": 1.7374866734621487e-06, |
| "loss": 1.1734, |
| "step": 1617 |
| }, |
| { |
| "epoch": 3.6783004552352048, |
| "grad_norm": 0.7839295268058777, |
| "learning_rate": 1.73453550629482e-06, |
| "loss": 1.2271, |
| "step": 1618 |
| }, |
| { |
| "epoch": 3.680576631259484, |
| "grad_norm": 0.7847591042518616, |
| "learning_rate": 1.7315855156805558e-06, |
| "loss": 1.1878, |
| "step": 1619 |
| }, |
| { |
| "epoch": 3.6828528072837634, |
| "grad_norm": 0.77289217710495, |
| "learning_rate": 1.7286367061536215e-06, |
| "loss": 1.1785, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.6851289833080427, |
| "grad_norm": 0.7843948602676392, |
| "learning_rate": 1.7256890822464716e-06, |
| "loss": 1.1834, |
| "step": 1621 |
| }, |
| { |
| "epoch": 3.6874051593323216, |
| "grad_norm": 0.8314946889877319, |
| "learning_rate": 1.7227426484897345e-06, |
| "loss": 1.2082, |
| "step": 1622 |
| }, |
| { |
| "epoch": 3.689681335356601, |
| "grad_norm": 0.7974013686180115, |
| "learning_rate": 1.7197974094122095e-06, |
| "loss": 1.139, |
| "step": 1623 |
| }, |
| { |
| "epoch": 3.6919575113808802, |
| "grad_norm": 0.7534367442131042, |
| "learning_rate": 1.7168533695408612e-06, |
| "loss": 1.1726, |
| "step": 1624 |
| }, |
| { |
| "epoch": 3.694233687405159, |
| "grad_norm": 0.7677541971206665, |
| "learning_rate": 1.7139105334008099e-06, |
| "loss": 1.1262, |
| "step": 1625 |
| }, |
| { |
| "epoch": 3.6965098634294384, |
| "grad_norm": 0.7810897827148438, |
| "learning_rate": 1.7109689055153261e-06, |
| "loss": 1.1605, |
| "step": 1626 |
| }, |
| { |
| "epoch": 3.6987860394537178, |
| "grad_norm": 0.8533305525779724, |
| "learning_rate": 1.708028490405823e-06, |
| "loss": 1.1913, |
| "step": 1627 |
| }, |
| { |
| "epoch": 3.701062215477997, |
| "grad_norm": 0.80948805809021, |
| "learning_rate": 1.7050892925918491e-06, |
| "loss": 1.1917, |
| "step": 1628 |
| }, |
| { |
| "epoch": 3.7033383915022764, |
| "grad_norm": 0.7783576846122742, |
| "learning_rate": 1.7021513165910841e-06, |
| "loss": 1.215, |
| "step": 1629 |
| }, |
| { |
| "epoch": 3.7056145675265553, |
| "grad_norm": 0.7968083024024963, |
| "learning_rate": 1.699214566919327e-06, |
| "loss": 1.2006, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.7078907435508346, |
| "grad_norm": 0.7826852798461914, |
| "learning_rate": 1.6962790480904934e-06, |
| "loss": 1.1686, |
| "step": 1631 |
| }, |
| { |
| "epoch": 3.710166919575114, |
| "grad_norm": 0.7661287784576416, |
| "learning_rate": 1.6933447646166069e-06, |
| "loss": 1.149, |
| "step": 1632 |
| }, |
| { |
| "epoch": 3.712443095599393, |
| "grad_norm": 0.8017462491989136, |
| "learning_rate": 1.690411721007791e-06, |
| "loss": 1.1997, |
| "step": 1633 |
| }, |
| { |
| "epoch": 3.714719271623672, |
| "grad_norm": 0.78822261095047, |
| "learning_rate": 1.6874799217722652e-06, |
| "loss": 1.1678, |
| "step": 1634 |
| }, |
| { |
| "epoch": 3.7169954476479514, |
| "grad_norm": 0.8100883364677429, |
| "learning_rate": 1.6845493714163361e-06, |
| "loss": 1.1477, |
| "step": 1635 |
| }, |
| { |
| "epoch": 3.7192716236722307, |
| "grad_norm": 0.7745562195777893, |
| "learning_rate": 1.681620074444389e-06, |
| "loss": 1.1496, |
| "step": 1636 |
| }, |
| { |
| "epoch": 3.72154779969651, |
| "grad_norm": 0.7901548743247986, |
| "learning_rate": 1.6786920353588859e-06, |
| "loss": 1.1845, |
| "step": 1637 |
| }, |
| { |
| "epoch": 3.723823975720789, |
| "grad_norm": 0.8251738548278809, |
| "learning_rate": 1.6757652586603523e-06, |
| "loss": 1.1844, |
| "step": 1638 |
| }, |
| { |
| "epoch": 3.7261001517450683, |
| "grad_norm": 0.7896043062210083, |
| "learning_rate": 1.6728397488473733e-06, |
| "loss": 1.2202, |
| "step": 1639 |
| }, |
| { |
| "epoch": 3.7283763277693476, |
| "grad_norm": 0.7613170742988586, |
| "learning_rate": 1.6699155104165903e-06, |
| "loss": 1.2186, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.7306525037936265, |
| "grad_norm": 0.7568488717079163, |
| "learning_rate": 1.6669925478626874e-06, |
| "loss": 1.18, |
| "step": 1641 |
| }, |
| { |
| "epoch": 3.7329286798179058, |
| "grad_norm": 0.7813270688056946, |
| "learning_rate": 1.6640708656783878e-06, |
| "loss": 1.1845, |
| "step": 1642 |
| }, |
| { |
| "epoch": 3.735204855842185, |
| "grad_norm": 0.800815761089325, |
| "learning_rate": 1.6611504683544477e-06, |
| "loss": 1.131, |
| "step": 1643 |
| }, |
| { |
| "epoch": 3.7374810318664644, |
| "grad_norm": 0.748050332069397, |
| "learning_rate": 1.6582313603796485e-06, |
| "loss": 1.1603, |
| "step": 1644 |
| }, |
| { |
| "epoch": 3.7397572078907437, |
| "grad_norm": 0.7783641219139099, |
| "learning_rate": 1.6553135462407876e-06, |
| "loss": 1.1516, |
| "step": 1645 |
| }, |
| { |
| "epoch": 3.7420333839150226, |
| "grad_norm": 0.7908198237419128, |
| "learning_rate": 1.6523970304226778e-06, |
| "loss": 1.2066, |
| "step": 1646 |
| }, |
| { |
| "epoch": 3.744309559939302, |
| "grad_norm": 0.7437866926193237, |
| "learning_rate": 1.6494818174081317e-06, |
| "loss": 1.1779, |
| "step": 1647 |
| }, |
| { |
| "epoch": 3.7465857359635812, |
| "grad_norm": 0.7539526224136353, |
| "learning_rate": 1.646567911677964e-06, |
| "loss": 1.2012, |
| "step": 1648 |
| }, |
| { |
| "epoch": 3.74886191198786, |
| "grad_norm": 0.7456071972846985, |
| "learning_rate": 1.643655317710977e-06, |
| "loss": 1.1731, |
| "step": 1649 |
| }, |
| { |
| "epoch": 3.75113808801214, |
| "grad_norm": 0.771332323551178, |
| "learning_rate": 1.6407440399839558e-06, |
| "loss": 1.1825, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.7534142640364188, |
| "grad_norm": 0.8010808825492859, |
| "learning_rate": 1.6378340829716662e-06, |
| "loss": 1.2054, |
| "step": 1651 |
| }, |
| { |
| "epoch": 3.755690440060698, |
| "grad_norm": 0.7861669063568115, |
| "learning_rate": 1.6349254511468415e-06, |
| "loss": 1.159, |
| "step": 1652 |
| }, |
| { |
| "epoch": 3.7579666160849774, |
| "grad_norm": 0.777998149394989, |
| "learning_rate": 1.6320181489801774e-06, |
| "loss": 1.2102, |
| "step": 1653 |
| }, |
| { |
| "epoch": 3.7602427921092563, |
| "grad_norm": 0.7587355375289917, |
| "learning_rate": 1.6291121809403287e-06, |
| "loss": 1.1602, |
| "step": 1654 |
| }, |
| { |
| "epoch": 3.7625189681335356, |
| "grad_norm": 0.8161150217056274, |
| "learning_rate": 1.6262075514938966e-06, |
| "loss": 1.1934, |
| "step": 1655 |
| }, |
| { |
| "epoch": 3.764795144157815, |
| "grad_norm": 0.7970815896987915, |
| "learning_rate": 1.6233042651054258e-06, |
| "loss": 1.1995, |
| "step": 1656 |
| }, |
| { |
| "epoch": 3.7670713201820942, |
| "grad_norm": 0.7849256992340088, |
| "learning_rate": 1.6204023262373985e-06, |
| "loss": 1.1368, |
| "step": 1657 |
| }, |
| { |
| "epoch": 3.7693474962063735, |
| "grad_norm": 0.7707874774932861, |
| "learning_rate": 1.6175017393502223e-06, |
| "loss": 1.1544, |
| "step": 1658 |
| }, |
| { |
| "epoch": 3.7716236722306524, |
| "grad_norm": 0.7858372330665588, |
| "learning_rate": 1.6146025089022304e-06, |
| "loss": 1.2052, |
| "step": 1659 |
| }, |
| { |
| "epoch": 3.7738998482549317, |
| "grad_norm": 0.7694425582885742, |
| "learning_rate": 1.6117046393496685e-06, |
| "loss": 1.189, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.776176024279211, |
| "grad_norm": 0.7819948196411133, |
| "learning_rate": 1.6088081351466908e-06, |
| "loss": 1.1476, |
| "step": 1661 |
| }, |
| { |
| "epoch": 3.77845220030349, |
| "grad_norm": 0.7882603406906128, |
| "learning_rate": 1.6059130007453544e-06, |
| "loss": 1.165, |
| "step": 1662 |
| }, |
| { |
| "epoch": 3.7807283763277693, |
| "grad_norm": 0.7949514389038086, |
| "learning_rate": 1.6030192405956097e-06, |
| "loss": 1.2037, |
| "step": 1663 |
| }, |
| { |
| "epoch": 3.7830045523520486, |
| "grad_norm": 0.7628163695335388, |
| "learning_rate": 1.6001268591452946e-06, |
| "loss": 1.1531, |
| "step": 1664 |
| }, |
| { |
| "epoch": 3.785280728376328, |
| "grad_norm": 0.7776125073432922, |
| "learning_rate": 1.5972358608401286e-06, |
| "loss": 1.145, |
| "step": 1665 |
| }, |
| { |
| "epoch": 3.787556904400607, |
| "grad_norm": 0.7946699857711792, |
| "learning_rate": 1.5943462501237055e-06, |
| "loss": 1.1877, |
| "step": 1666 |
| }, |
| { |
| "epoch": 3.789833080424886, |
| "grad_norm": 0.7983624339103699, |
| "learning_rate": 1.591458031437485e-06, |
| "loss": 1.1365, |
| "step": 1667 |
| }, |
| { |
| "epoch": 3.7921092564491654, |
| "grad_norm": 0.7676346302032471, |
| "learning_rate": 1.588571209220789e-06, |
| "loss": 1.1635, |
| "step": 1668 |
| }, |
| { |
| "epoch": 3.7943854324734447, |
| "grad_norm": 0.7794129848480225, |
| "learning_rate": 1.5856857879107907e-06, |
| "loss": 1.172, |
| "step": 1669 |
| }, |
| { |
| "epoch": 3.7966616084977236, |
| "grad_norm": 0.7796185612678528, |
| "learning_rate": 1.5828017719425131e-06, |
| "loss": 1.1559, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.798937784522003, |
| "grad_norm": 0.7712554931640625, |
| "learning_rate": 1.5799191657488162e-06, |
| "loss": 1.1982, |
| "step": 1671 |
| }, |
| { |
| "epoch": 3.8012139605462822, |
| "grad_norm": 0.7830713987350464, |
| "learning_rate": 1.577037973760394e-06, |
| "loss": 1.1714, |
| "step": 1672 |
| }, |
| { |
| "epoch": 3.8034901365705616, |
| "grad_norm": 0.7980160713195801, |
| "learning_rate": 1.5741582004057683e-06, |
| "loss": 1.154, |
| "step": 1673 |
| }, |
| { |
| "epoch": 3.805766312594841, |
| "grad_norm": 0.7877330183982849, |
| "learning_rate": 1.571279850111278e-06, |
| "loss": 1.1599, |
| "step": 1674 |
| }, |
| { |
| "epoch": 3.8080424886191198, |
| "grad_norm": 0.7914711833000183, |
| "learning_rate": 1.5684029273010762e-06, |
| "loss": 1.1495, |
| "step": 1675 |
| }, |
| { |
| "epoch": 3.810318664643399, |
| "grad_norm": 0.7610024213790894, |
| "learning_rate": 1.5655274363971222e-06, |
| "loss": 1.1686, |
| "step": 1676 |
| }, |
| { |
| "epoch": 3.8125948406676784, |
| "grad_norm": 0.7735294103622437, |
| "learning_rate": 1.562653381819172e-06, |
| "loss": 1.1742, |
| "step": 1677 |
| }, |
| { |
| "epoch": 3.8148710166919573, |
| "grad_norm": 0.7893885374069214, |
| "learning_rate": 1.5597807679847782e-06, |
| "loss": 1.1584, |
| "step": 1678 |
| }, |
| { |
| "epoch": 3.8171471927162366, |
| "grad_norm": 0.7622188925743103, |
| "learning_rate": 1.5569095993092747e-06, |
| "loss": 1.1389, |
| "step": 1679 |
| }, |
| { |
| "epoch": 3.819423368740516, |
| "grad_norm": 0.80954509973526, |
| "learning_rate": 1.5540398802057755e-06, |
| "loss": 1.1898, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.8216995447647952, |
| "grad_norm": 0.7971329689025879, |
| "learning_rate": 1.551171615085168e-06, |
| "loss": 1.1567, |
| "step": 1681 |
| }, |
| { |
| "epoch": 3.8239757207890746, |
| "grad_norm": 0.8103090524673462, |
| "learning_rate": 1.5483048083561036e-06, |
| "loss": 1.1744, |
| "step": 1682 |
| }, |
| { |
| "epoch": 3.8262518968133534, |
| "grad_norm": 0.7729601860046387, |
| "learning_rate": 1.545439464424991e-06, |
| "loss": 1.1338, |
| "step": 1683 |
| }, |
| { |
| "epoch": 3.8285280728376327, |
| "grad_norm": 0.8032084107398987, |
| "learning_rate": 1.5425755876959925e-06, |
| "loss": 1.1474, |
| "step": 1684 |
| }, |
| { |
| "epoch": 3.830804248861912, |
| "grad_norm": 0.776904284954071, |
| "learning_rate": 1.5397131825710137e-06, |
| "loss": 1.1574, |
| "step": 1685 |
| }, |
| { |
| "epoch": 3.833080424886191, |
| "grad_norm": 0.8134787678718567, |
| "learning_rate": 1.5368522534496993e-06, |
| "loss": 1.2379, |
| "step": 1686 |
| }, |
| { |
| "epoch": 3.8353566009104703, |
| "grad_norm": 0.7977766394615173, |
| "learning_rate": 1.5339928047294256e-06, |
| "loss": 1.1946, |
| "step": 1687 |
| }, |
| { |
| "epoch": 3.8376327769347496, |
| "grad_norm": 0.7849652767181396, |
| "learning_rate": 1.5311348408052905e-06, |
| "loss": 1.154, |
| "step": 1688 |
| }, |
| { |
| "epoch": 3.839908952959029, |
| "grad_norm": 0.8153653740882874, |
| "learning_rate": 1.5282783660701154e-06, |
| "loss": 1.1685, |
| "step": 1689 |
| }, |
| { |
| "epoch": 3.842185128983308, |
| "grad_norm": 0.794487714767456, |
| "learning_rate": 1.525423384914428e-06, |
| "loss": 1.1452, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.844461305007587, |
| "grad_norm": 0.7951854467391968, |
| "learning_rate": 1.522569901726459e-06, |
| "loss": 1.1637, |
| "step": 1691 |
| }, |
| { |
| "epoch": 3.8467374810318664, |
| "grad_norm": 0.8048427104949951, |
| "learning_rate": 1.5197179208921434e-06, |
| "loss": 1.1898, |
| "step": 1692 |
| }, |
| { |
| "epoch": 3.8490136570561457, |
| "grad_norm": 0.8001375794410706, |
| "learning_rate": 1.5168674467951008e-06, |
| "loss": 1.1973, |
| "step": 1693 |
| }, |
| { |
| "epoch": 3.851289833080425, |
| "grad_norm": 0.7969352006912231, |
| "learning_rate": 1.5140184838166368e-06, |
| "loss": 1.1641, |
| "step": 1694 |
| }, |
| { |
| "epoch": 3.8535660091047044, |
| "grad_norm": 0.7886945605278015, |
| "learning_rate": 1.5111710363357357e-06, |
| "loss": 1.1504, |
| "step": 1695 |
| }, |
| { |
| "epoch": 3.8558421851289832, |
| "grad_norm": 0.8164499402046204, |
| "learning_rate": 1.5083251087290506e-06, |
| "loss": 1.1686, |
| "step": 1696 |
| }, |
| { |
| "epoch": 3.8581183611532626, |
| "grad_norm": 0.7746224999427795, |
| "learning_rate": 1.5054807053708988e-06, |
| "loss": 1.1458, |
| "step": 1697 |
| }, |
| { |
| "epoch": 3.860394537177542, |
| "grad_norm": 0.8148301243782043, |
| "learning_rate": 1.5026378306332563e-06, |
| "loss": 1.2068, |
| "step": 1698 |
| }, |
| { |
| "epoch": 3.8626707132018208, |
| "grad_norm": 0.8069217801094055, |
| "learning_rate": 1.4997964888857457e-06, |
| "loss": 1.1743, |
| "step": 1699 |
| }, |
| { |
| "epoch": 3.8649468892261, |
| "grad_norm": 0.7751657962799072, |
| "learning_rate": 1.4969566844956397e-06, |
| "loss": 1.1802, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.8672230652503794, |
| "grad_norm": 0.797619104385376, |
| "learning_rate": 1.494118421827842e-06, |
| "loss": 1.1807, |
| "step": 1701 |
| }, |
| { |
| "epoch": 3.8694992412746587, |
| "grad_norm": 0.784611165523529, |
| "learning_rate": 1.4912817052448891e-06, |
| "loss": 1.1702, |
| "step": 1702 |
| }, |
| { |
| "epoch": 3.871775417298938, |
| "grad_norm": 0.784424901008606, |
| "learning_rate": 1.4884465391069415e-06, |
| "loss": 1.1321, |
| "step": 1703 |
| }, |
| { |
| "epoch": 3.874051593323217, |
| "grad_norm": 0.7754137516021729, |
| "learning_rate": 1.4856129277717758e-06, |
| "loss": 1.1553, |
| "step": 1704 |
| }, |
| { |
| "epoch": 3.8763277693474962, |
| "grad_norm": 0.7742170691490173, |
| "learning_rate": 1.482780875594778e-06, |
| "loss": 1.1627, |
| "step": 1705 |
| }, |
| { |
| "epoch": 3.8786039453717756, |
| "grad_norm": 0.7944441437721252, |
| "learning_rate": 1.4799503869289402e-06, |
| "loss": 1.1726, |
| "step": 1706 |
| }, |
| { |
| "epoch": 3.8808801213960544, |
| "grad_norm": 0.7838897109031677, |
| "learning_rate": 1.477121466124848e-06, |
| "loss": 1.1731, |
| "step": 1707 |
| }, |
| { |
| "epoch": 3.8831562974203337, |
| "grad_norm": 0.7788136005401611, |
| "learning_rate": 1.474294117530681e-06, |
| "loss": 1.1877, |
| "step": 1708 |
| }, |
| { |
| "epoch": 3.885432473444613, |
| "grad_norm": 0.7596397399902344, |
| "learning_rate": 1.4714683454921986e-06, |
| "loss": 1.1627, |
| "step": 1709 |
| }, |
| { |
| "epoch": 3.8877086494688924, |
| "grad_norm": 0.7933751940727234, |
| "learning_rate": 1.4686441543527374e-06, |
| "loss": 1.1785, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.8899848254931717, |
| "grad_norm": 0.7714102268218994, |
| "learning_rate": 1.465821548453208e-06, |
| "loss": 1.1882, |
| "step": 1711 |
| }, |
| { |
| "epoch": 3.8922610015174506, |
| "grad_norm": 0.7759813666343689, |
| "learning_rate": 1.4630005321320796e-06, |
| "loss": 1.1538, |
| "step": 1712 |
| }, |
| { |
| "epoch": 3.89453717754173, |
| "grad_norm": 0.757717490196228, |
| "learning_rate": 1.46018110972538e-06, |
| "loss": 1.1462, |
| "step": 1713 |
| }, |
| { |
| "epoch": 3.896813353566009, |
| "grad_norm": 0.7566017508506775, |
| "learning_rate": 1.4573632855666887e-06, |
| "loss": 1.1943, |
| "step": 1714 |
| }, |
| { |
| "epoch": 3.899089529590288, |
| "grad_norm": 0.784542441368103, |
| "learning_rate": 1.4545470639871256e-06, |
| "loss": 1.1897, |
| "step": 1715 |
| }, |
| { |
| "epoch": 3.9013657056145674, |
| "grad_norm": 0.7824509143829346, |
| "learning_rate": 1.4517324493153481e-06, |
| "loss": 1.1691, |
| "step": 1716 |
| }, |
| { |
| "epoch": 3.9036418816388467, |
| "grad_norm": 0.7722765207290649, |
| "learning_rate": 1.4489194458775468e-06, |
| "loss": 1.1754, |
| "step": 1717 |
| }, |
| { |
| "epoch": 3.905918057663126, |
| "grad_norm": 0.7892184853553772, |
| "learning_rate": 1.4461080579974316e-06, |
| "loss": 1.1507, |
| "step": 1718 |
| }, |
| { |
| "epoch": 3.9081942336874054, |
| "grad_norm": 0.7828143239021301, |
| "learning_rate": 1.4432982899962326e-06, |
| "loss": 1.1833, |
| "step": 1719 |
| }, |
| { |
| "epoch": 3.9104704097116842, |
| "grad_norm": 0.7912802696228027, |
| "learning_rate": 1.4404901461926873e-06, |
| "loss": 1.1668, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.9127465857359636, |
| "grad_norm": 0.8008295297622681, |
| "learning_rate": 1.437683630903039e-06, |
| "loss": 1.1395, |
| "step": 1721 |
| }, |
| { |
| "epoch": 3.915022761760243, |
| "grad_norm": 0.8199208974838257, |
| "learning_rate": 1.434878748441026e-06, |
| "loss": 1.1546, |
| "step": 1722 |
| }, |
| { |
| "epoch": 3.9172989377845218, |
| "grad_norm": 0.7762032151222229, |
| "learning_rate": 1.432075503117878e-06, |
| "loss": 1.158, |
| "step": 1723 |
| }, |
| { |
| "epoch": 3.919575113808801, |
| "grad_norm": 0.8160228133201599, |
| "learning_rate": 1.4292738992423066e-06, |
| "loss": 1.2023, |
| "step": 1724 |
| }, |
| { |
| "epoch": 3.9218512898330804, |
| "grad_norm": 0.7772383689880371, |
| "learning_rate": 1.4264739411205047e-06, |
| "loss": 1.1636, |
| "step": 1725 |
| }, |
| { |
| "epoch": 3.9241274658573597, |
| "grad_norm": 0.8001096248626709, |
| "learning_rate": 1.4236756330561319e-06, |
| "loss": 1.1817, |
| "step": 1726 |
| }, |
| { |
| "epoch": 3.926403641881639, |
| "grad_norm": 0.7925685048103333, |
| "learning_rate": 1.4208789793503103e-06, |
| "loss": 1.1283, |
| "step": 1727 |
| }, |
| { |
| "epoch": 3.928679817905918, |
| "grad_norm": 0.783243715763092, |
| "learning_rate": 1.4180839843016246e-06, |
| "loss": 1.1621, |
| "step": 1728 |
| }, |
| { |
| "epoch": 3.9309559939301972, |
| "grad_norm": 0.8283969759941101, |
| "learning_rate": 1.415290652206105e-06, |
| "loss": 1.2235, |
| "step": 1729 |
| }, |
| { |
| "epoch": 3.9332321699544766, |
| "grad_norm": 0.7659640312194824, |
| "learning_rate": 1.4124989873572282e-06, |
| "loss": 1.146, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.9355083459787554, |
| "grad_norm": 0.7988297343254089, |
| "learning_rate": 1.409708994045907e-06, |
| "loss": 1.1957, |
| "step": 1731 |
| }, |
| { |
| "epoch": 3.9377845220030347, |
| "grad_norm": 0.7699180245399475, |
| "learning_rate": 1.4069206765604845e-06, |
| "loss": 1.1627, |
| "step": 1732 |
| }, |
| { |
| "epoch": 3.940060698027314, |
| "grad_norm": 0.7823548913002014, |
| "learning_rate": 1.4041340391867313e-06, |
| "loss": 1.2001, |
| "step": 1733 |
| }, |
| { |
| "epoch": 3.9423368740515934, |
| "grad_norm": 0.7833247780799866, |
| "learning_rate": 1.4013490862078327e-06, |
| "loss": 1.1744, |
| "step": 1734 |
| }, |
| { |
| "epoch": 3.9446130500758727, |
| "grad_norm": 0.7722859978675842, |
| "learning_rate": 1.3985658219043843e-06, |
| "loss": 1.1858, |
| "step": 1735 |
| }, |
| { |
| "epoch": 3.9468892261001516, |
| "grad_norm": 0.8108896613121033, |
| "learning_rate": 1.3957842505543893e-06, |
| "loss": 1.1314, |
| "step": 1736 |
| }, |
| { |
| "epoch": 3.949165402124431, |
| "grad_norm": 0.7728601694107056, |
| "learning_rate": 1.3930043764332457e-06, |
| "loss": 1.1643, |
| "step": 1737 |
| }, |
| { |
| "epoch": 3.95144157814871, |
| "grad_norm": 0.7797267436981201, |
| "learning_rate": 1.3902262038137449e-06, |
| "loss": 1.1802, |
| "step": 1738 |
| }, |
| { |
| "epoch": 3.9537177541729895, |
| "grad_norm": 0.7659248113632202, |
| "learning_rate": 1.387449736966061e-06, |
| "loss": 1.1851, |
| "step": 1739 |
| }, |
| { |
| "epoch": 3.955993930197269, |
| "grad_norm": 0.7742710709571838, |
| "learning_rate": 1.384674980157747e-06, |
| "loss": 1.1694, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.9582701062215477, |
| "grad_norm": 0.7799730896949768, |
| "learning_rate": 1.3819019376537299e-06, |
| "loss": 1.1809, |
| "step": 1741 |
| }, |
| { |
| "epoch": 3.960546282245827, |
| "grad_norm": 0.792921781539917, |
| "learning_rate": 1.3791306137162985e-06, |
| "loss": 1.1651, |
| "step": 1742 |
| }, |
| { |
| "epoch": 3.9628224582701064, |
| "grad_norm": 0.7535181641578674, |
| "learning_rate": 1.3763610126051014e-06, |
| "loss": 1.1209, |
| "step": 1743 |
| }, |
| { |
| "epoch": 3.9650986342943852, |
| "grad_norm": 0.7926993370056152, |
| "learning_rate": 1.3735931385771386e-06, |
| "loss": 1.1498, |
| "step": 1744 |
| }, |
| { |
| "epoch": 3.9673748103186646, |
| "grad_norm": 0.7826522588729858, |
| "learning_rate": 1.3708269958867565e-06, |
| "loss": 1.1504, |
| "step": 1745 |
| }, |
| { |
| "epoch": 3.969650986342944, |
| "grad_norm": 0.7804858088493347, |
| "learning_rate": 1.3680625887856386e-06, |
| "loss": 1.1618, |
| "step": 1746 |
| }, |
| { |
| "epoch": 3.971927162367223, |
| "grad_norm": 0.7874334454536438, |
| "learning_rate": 1.365299921522804e-06, |
| "loss": 1.1396, |
| "step": 1747 |
| }, |
| { |
| "epoch": 3.9742033383915025, |
| "grad_norm": 0.7938604950904846, |
| "learning_rate": 1.3625389983445932e-06, |
| "loss": 1.1497, |
| "step": 1748 |
| }, |
| { |
| "epoch": 3.9764795144157814, |
| "grad_norm": 0.8329913020133972, |
| "learning_rate": 1.3597798234946705e-06, |
| "loss": 1.1946, |
| "step": 1749 |
| }, |
| { |
| "epoch": 3.9787556904400607, |
| "grad_norm": 0.7830440998077393, |
| "learning_rate": 1.3570224012140096e-06, |
| "loss": 1.1734, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.98103186646434, |
| "grad_norm": 0.8128320574760437, |
| "learning_rate": 1.3542667357408915e-06, |
| "loss": 1.1875, |
| "step": 1751 |
| }, |
| { |
| "epoch": 3.983308042488619, |
| "grad_norm": 0.8153894543647766, |
| "learning_rate": 1.3515128313108966e-06, |
| "loss": 1.1496, |
| "step": 1752 |
| }, |
| { |
| "epoch": 3.9855842185128982, |
| "grad_norm": 0.7969558238983154, |
| "learning_rate": 1.3487606921568995e-06, |
| "loss": 1.1872, |
| "step": 1753 |
| }, |
| { |
| "epoch": 3.9878603945371776, |
| "grad_norm": 0.7873245477676392, |
| "learning_rate": 1.3460103225090599e-06, |
| "loss": 1.1803, |
| "step": 1754 |
| }, |
| { |
| "epoch": 3.990136570561457, |
| "grad_norm": 0.7957016825675964, |
| "learning_rate": 1.3432617265948196e-06, |
| "loss": 1.1586, |
| "step": 1755 |
| }, |
| { |
| "epoch": 3.992412746585736, |
| "grad_norm": 0.7677023410797119, |
| "learning_rate": 1.3405149086388928e-06, |
| "loss": 1.1406, |
| "step": 1756 |
| }, |
| { |
| "epoch": 3.994688922610015, |
| "grad_norm": 0.8339362144470215, |
| "learning_rate": 1.3377698728632599e-06, |
| "loss": 1.1411, |
| "step": 1757 |
| }, |
| { |
| "epoch": 3.9969650986342944, |
| "grad_norm": 0.7654924392700195, |
| "learning_rate": 1.335026623487166e-06, |
| "loss": 1.1947, |
| "step": 1758 |
| }, |
| { |
| "epoch": 3.9992412746585737, |
| "grad_norm": 0.8090951442718506, |
| "learning_rate": 1.3322851647271057e-06, |
| "loss": 1.1684, |
| "step": 1759 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.8090951442718506, |
| "learning_rate": 1.3295455007968245e-06, |
| "loss": 1.1183, |
| "step": 1760 |
| }, |
| { |
| "epoch": 4.002276176024279, |
| "grad_norm": 1.8324748277664185, |
| "learning_rate": 1.3268076359073068e-06, |
| "loss": 1.1734, |
| "step": 1761 |
| }, |
| { |
| "epoch": 4.004552352048559, |
| "grad_norm": 0.7935696840286255, |
| "learning_rate": 1.3240715742667732e-06, |
| "loss": 1.1447, |
| "step": 1762 |
| }, |
| { |
| "epoch": 4.0068285280728375, |
| "grad_norm": 0.8030862212181091, |
| "learning_rate": 1.3213373200806738e-06, |
| "loss": 1.1581, |
| "step": 1763 |
| }, |
| { |
| "epoch": 4.009104704097117, |
| "grad_norm": 0.7820909023284912, |
| "learning_rate": 1.3186048775516782e-06, |
| "loss": 1.1474, |
| "step": 1764 |
| }, |
| { |
| "epoch": 4.011380880121396, |
| "grad_norm": 0.7807154059410095, |
| "learning_rate": 1.3158742508796718e-06, |
| "loss": 1.1618, |
| "step": 1765 |
| }, |
| { |
| "epoch": 4.013657056145675, |
| "grad_norm": 0.7716015577316284, |
| "learning_rate": 1.3131454442617523e-06, |
| "loss": 1.1246, |
| "step": 1766 |
| }, |
| { |
| "epoch": 4.015933232169955, |
| "grad_norm": 0.7621894478797913, |
| "learning_rate": 1.3104184618922142e-06, |
| "loss": 1.1691, |
| "step": 1767 |
| }, |
| { |
| "epoch": 4.018209408194234, |
| "grad_norm": 0.8327509164810181, |
| "learning_rate": 1.3076933079625508e-06, |
| "loss": 1.1942, |
| "step": 1768 |
| }, |
| { |
| "epoch": 4.0204855842185125, |
| "grad_norm": 0.7734883427619934, |
| "learning_rate": 1.304969986661448e-06, |
| "loss": 1.1896, |
| "step": 1769 |
| }, |
| { |
| "epoch": 4.022761760242792, |
| "grad_norm": 0.83384108543396, |
| "learning_rate": 1.3022485021747693e-06, |
| "loss": 1.1164, |
| "step": 1770 |
| }, |
| { |
| "epoch": 4.025037936267071, |
| "grad_norm": 0.805117130279541, |
| "learning_rate": 1.29952885868556e-06, |
| "loss": 1.1337, |
| "step": 1771 |
| }, |
| { |
| "epoch": 4.027314112291351, |
| "grad_norm": 0.7874476909637451, |
| "learning_rate": 1.2968110603740325e-06, |
| "loss": 1.1443, |
| "step": 1772 |
| }, |
| { |
| "epoch": 4.02959028831563, |
| "grad_norm": 0.8174652457237244, |
| "learning_rate": 1.2940951114175637e-06, |
| "loss": 1.1477, |
| "step": 1773 |
| }, |
| { |
| "epoch": 4.031866464339909, |
| "grad_norm": 0.7917712330818176, |
| "learning_rate": 1.291381015990689e-06, |
| "loss": 1.1864, |
| "step": 1774 |
| }, |
| { |
| "epoch": 4.0341426403641885, |
| "grad_norm": 0.7918281555175781, |
| "learning_rate": 1.288668778265093e-06, |
| "loss": 1.1196, |
| "step": 1775 |
| }, |
| { |
| "epoch": 4.036418816388467, |
| "grad_norm": 0.8245083093643188, |
| "learning_rate": 1.2859584024096062e-06, |
| "loss": 1.143, |
| "step": 1776 |
| }, |
| { |
| "epoch": 4.038694992412746, |
| "grad_norm": 0.8343380689620972, |
| "learning_rate": 1.2832498925901984e-06, |
| "loss": 1.2433, |
| "step": 1777 |
| }, |
| { |
| "epoch": 4.040971168437026, |
| "grad_norm": 0.7949922680854797, |
| "learning_rate": 1.2805432529699686e-06, |
| "loss": 1.1572, |
| "step": 1778 |
| }, |
| { |
| "epoch": 4.043247344461305, |
| "grad_norm": 0.8003636598587036, |
| "learning_rate": 1.2778384877091438e-06, |
| "loss": 1.1255, |
| "step": 1779 |
| }, |
| { |
| "epoch": 4.045523520485585, |
| "grad_norm": 0.8091865181922913, |
| "learning_rate": 1.275135600965068e-06, |
| "loss": 1.154, |
| "step": 1780 |
| }, |
| { |
| "epoch": 4.0477996965098635, |
| "grad_norm": 0.8288428783416748, |
| "learning_rate": 1.272434596892199e-06, |
| "loss": 1.1757, |
| "step": 1781 |
| }, |
| { |
| "epoch": 4.050075872534142, |
| "grad_norm": 0.8075243830680847, |
| "learning_rate": 1.2697354796421007e-06, |
| "loss": 1.1537, |
| "step": 1782 |
| }, |
| { |
| "epoch": 4.052352048558422, |
| "grad_norm": 0.8341973423957825, |
| "learning_rate": 1.2670382533634365e-06, |
| "loss": 1.1628, |
| "step": 1783 |
| }, |
| { |
| "epoch": 4.054628224582701, |
| "grad_norm": 0.8466330766677856, |
| "learning_rate": 1.2643429222019623e-06, |
| "loss": 1.1386, |
| "step": 1784 |
| }, |
| { |
| "epoch": 4.05690440060698, |
| "grad_norm": 0.782442033290863, |
| "learning_rate": 1.2616494903005244e-06, |
| "loss": 1.1272, |
| "step": 1785 |
| }, |
| { |
| "epoch": 4.05918057663126, |
| "grad_norm": 0.7994256615638733, |
| "learning_rate": 1.2589579617990466e-06, |
| "loss": 1.17, |
| "step": 1786 |
| }, |
| { |
| "epoch": 4.0614567526555385, |
| "grad_norm": 0.7817173004150391, |
| "learning_rate": 1.2562683408345279e-06, |
| "loss": 1.142, |
| "step": 1787 |
| }, |
| { |
| "epoch": 4.063732928679818, |
| "grad_norm": 0.8269613981246948, |
| "learning_rate": 1.2535806315410365e-06, |
| "loss": 1.1204, |
| "step": 1788 |
| }, |
| { |
| "epoch": 4.066009104704097, |
| "grad_norm": 0.8326630592346191, |
| "learning_rate": 1.2508948380497012e-06, |
| "loss": 1.1796, |
| "step": 1789 |
| }, |
| { |
| "epoch": 4.068285280728376, |
| "grad_norm": 0.8466435074806213, |
| "learning_rate": 1.2482109644887064e-06, |
| "loss": 1.0959, |
| "step": 1790 |
| }, |
| { |
| "epoch": 4.070561456752656, |
| "grad_norm": 0.794165313243866, |
| "learning_rate": 1.2455290149832856e-06, |
| "loss": 1.1375, |
| "step": 1791 |
| }, |
| { |
| "epoch": 4.072837632776935, |
| "grad_norm": 0.7998282313346863, |
| "learning_rate": 1.2428489936557131e-06, |
| "loss": 1.2033, |
| "step": 1792 |
| }, |
| { |
| "epoch": 4.0751138088012135, |
| "grad_norm": 0.7995575666427612, |
| "learning_rate": 1.2401709046253038e-06, |
| "loss": 1.1629, |
| "step": 1793 |
| }, |
| { |
| "epoch": 4.077389984825493, |
| "grad_norm": 0.8074122071266174, |
| "learning_rate": 1.237494752008399e-06, |
| "loss": 1.1649, |
| "step": 1794 |
| }, |
| { |
| "epoch": 4.079666160849772, |
| "grad_norm": 0.807697057723999, |
| "learning_rate": 1.2348205399183632e-06, |
| "loss": 1.1257, |
| "step": 1795 |
| }, |
| { |
| "epoch": 4.081942336874052, |
| "grad_norm": 0.8166428208351135, |
| "learning_rate": 1.232148272465583e-06, |
| "loss": 1.153, |
| "step": 1796 |
| }, |
| { |
| "epoch": 4.084218512898331, |
| "grad_norm": 0.8070620894432068, |
| "learning_rate": 1.2294779537574495e-06, |
| "loss": 1.1732, |
| "step": 1797 |
| }, |
| { |
| "epoch": 4.08649468892261, |
| "grad_norm": 0.8349140286445618, |
| "learning_rate": 1.2268095878983617e-06, |
| "loss": 1.1604, |
| "step": 1798 |
| }, |
| { |
| "epoch": 4.0887708649468895, |
| "grad_norm": 0.8050034642219543, |
| "learning_rate": 1.2241431789897188e-06, |
| "loss": 1.1658, |
| "step": 1799 |
| }, |
| { |
| "epoch": 4.091047040971168, |
| "grad_norm": 0.8552670478820801, |
| "learning_rate": 1.2214787311299085e-06, |
| "loss": 1.1373, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.093323216995447, |
| "grad_norm": 0.7918221950531006, |
| "learning_rate": 1.2188162484143077e-06, |
| "loss": 1.1648, |
| "step": 1801 |
| }, |
| { |
| "epoch": 4.095599393019727, |
| "grad_norm": 0.818851113319397, |
| "learning_rate": 1.21615573493527e-06, |
| "loss": 1.1468, |
| "step": 1802 |
| }, |
| { |
| "epoch": 4.097875569044006, |
| "grad_norm": 0.8211784362792969, |
| "learning_rate": 1.2134971947821236e-06, |
| "loss": 1.1566, |
| "step": 1803 |
| }, |
| { |
| "epoch": 4.100151745068286, |
| "grad_norm": 0.8089801073074341, |
| "learning_rate": 1.2108406320411632e-06, |
| "loss": 1.1235, |
| "step": 1804 |
| }, |
| { |
| "epoch": 4.1024279210925645, |
| "grad_norm": 0.7928494811058044, |
| "learning_rate": 1.2081860507956438e-06, |
| "loss": 1.1476, |
| "step": 1805 |
| }, |
| { |
| "epoch": 4.104704097116843, |
| "grad_norm": 0.8036773204803467, |
| "learning_rate": 1.2055334551257747e-06, |
| "loss": 1.1873, |
| "step": 1806 |
| }, |
| { |
| "epoch": 4.106980273141123, |
| "grad_norm": 0.7972615957260132, |
| "learning_rate": 1.2028828491087155e-06, |
| "loss": 1.1559, |
| "step": 1807 |
| }, |
| { |
| "epoch": 4.109256449165402, |
| "grad_norm": 0.8029186725616455, |
| "learning_rate": 1.2002342368185638e-06, |
| "loss": 1.1704, |
| "step": 1808 |
| }, |
| { |
| "epoch": 4.111532625189682, |
| "grad_norm": 0.8103779554367065, |
| "learning_rate": 1.1975876223263569e-06, |
| "loss": 1.185, |
| "step": 1809 |
| }, |
| { |
| "epoch": 4.113808801213961, |
| "grad_norm": 0.8010536432266235, |
| "learning_rate": 1.1949430097000584e-06, |
| "loss": 1.1596, |
| "step": 1810 |
| }, |
| { |
| "epoch": 4.1160849772382395, |
| "grad_norm": 0.7956396341323853, |
| "learning_rate": 1.1923004030045556e-06, |
| "loss": 1.1719, |
| "step": 1811 |
| }, |
| { |
| "epoch": 4.118361153262519, |
| "grad_norm": 0.8688467144966125, |
| "learning_rate": 1.1896598063016531e-06, |
| "loss": 1.1714, |
| "step": 1812 |
| }, |
| { |
| "epoch": 4.120637329286798, |
| "grad_norm": 0.7763445973396301, |
| "learning_rate": 1.1870212236500659e-06, |
| "loss": 1.1822, |
| "step": 1813 |
| }, |
| { |
| "epoch": 4.122913505311077, |
| "grad_norm": 0.8073698282241821, |
| "learning_rate": 1.1843846591054117e-06, |
| "loss": 1.2203, |
| "step": 1814 |
| }, |
| { |
| "epoch": 4.125189681335357, |
| "grad_norm": 0.7985076308250427, |
| "learning_rate": 1.18175011672021e-06, |
| "loss": 1.1586, |
| "step": 1815 |
| }, |
| { |
| "epoch": 4.127465857359636, |
| "grad_norm": 0.7932565212249756, |
| "learning_rate": 1.1791176005438692e-06, |
| "loss": 1.1562, |
| "step": 1816 |
| }, |
| { |
| "epoch": 4.129742033383915, |
| "grad_norm": 0.8024340271949768, |
| "learning_rate": 1.176487114622683e-06, |
| "loss": 1.1752, |
| "step": 1817 |
| }, |
| { |
| "epoch": 4.132018209408194, |
| "grad_norm": 0.8179346323013306, |
| "learning_rate": 1.1738586629998272e-06, |
| "loss": 1.1726, |
| "step": 1818 |
| }, |
| { |
| "epoch": 4.134294385432473, |
| "grad_norm": 0.8161062598228455, |
| "learning_rate": 1.1712322497153486e-06, |
| "loss": 1.1127, |
| "step": 1819 |
| }, |
| { |
| "epoch": 4.136570561456753, |
| "grad_norm": 0.7926722168922424, |
| "learning_rate": 1.1686078788061612e-06, |
| "loss": 1.1157, |
| "step": 1820 |
| }, |
| { |
| "epoch": 4.138846737481032, |
| "grad_norm": 0.8068000674247742, |
| "learning_rate": 1.1659855543060405e-06, |
| "loss": 1.1799, |
| "step": 1821 |
| }, |
| { |
| "epoch": 4.141122913505311, |
| "grad_norm": 0.7692276835441589, |
| "learning_rate": 1.163365280245615e-06, |
| "loss": 1.1622, |
| "step": 1822 |
| }, |
| { |
| "epoch": 4.1433990895295905, |
| "grad_norm": 0.8077060580253601, |
| "learning_rate": 1.1607470606523646e-06, |
| "loss": 1.1528, |
| "step": 1823 |
| }, |
| { |
| "epoch": 4.145675265553869, |
| "grad_norm": 0.8008897304534912, |
| "learning_rate": 1.1581308995506088e-06, |
| "loss": 1.149, |
| "step": 1824 |
| }, |
| { |
| "epoch": 4.147951441578149, |
| "grad_norm": 0.8196450471878052, |
| "learning_rate": 1.1555168009615039e-06, |
| "loss": 1.1427, |
| "step": 1825 |
| }, |
| { |
| "epoch": 4.150227617602428, |
| "grad_norm": 0.8355448246002197, |
| "learning_rate": 1.152904768903036e-06, |
| "loss": 1.1397, |
| "step": 1826 |
| }, |
| { |
| "epoch": 4.152503793626707, |
| "grad_norm": 0.8095211386680603, |
| "learning_rate": 1.1502948073900148e-06, |
| "loss": 1.1932, |
| "step": 1827 |
| }, |
| { |
| "epoch": 4.154779969650987, |
| "grad_norm": 0.8357805609703064, |
| "learning_rate": 1.1476869204340665e-06, |
| "loss": 1.1586, |
| "step": 1828 |
| }, |
| { |
| "epoch": 4.1570561456752655, |
| "grad_norm": 0.827434778213501, |
| "learning_rate": 1.1450811120436319e-06, |
| "loss": 1.1342, |
| "step": 1829 |
| }, |
| { |
| "epoch": 4.159332321699544, |
| "grad_norm": 0.8159314393997192, |
| "learning_rate": 1.1424773862239527e-06, |
| "loss": 1.1705, |
| "step": 1830 |
| }, |
| { |
| "epoch": 4.161608497723824, |
| "grad_norm": 0.8122511506080627, |
| "learning_rate": 1.1398757469770732e-06, |
| "loss": 1.1408, |
| "step": 1831 |
| }, |
| { |
| "epoch": 4.163884673748103, |
| "grad_norm": 0.8244626522064209, |
| "learning_rate": 1.1372761983018283e-06, |
| "loss": 1.1666, |
| "step": 1832 |
| }, |
| { |
| "epoch": 4.166160849772383, |
| "grad_norm": 0.8183591365814209, |
| "learning_rate": 1.1346787441938398e-06, |
| "loss": 1.1652, |
| "step": 1833 |
| }, |
| { |
| "epoch": 4.168437025796662, |
| "grad_norm": 0.8229405283927917, |
| "learning_rate": 1.132083388645511e-06, |
| "loss": 1.1964, |
| "step": 1834 |
| }, |
| { |
| "epoch": 4.1707132018209405, |
| "grad_norm": 0.8457160592079163, |
| "learning_rate": 1.1294901356460192e-06, |
| "loss": 1.1677, |
| "step": 1835 |
| }, |
| { |
| "epoch": 4.17298937784522, |
| "grad_norm": 0.8162142634391785, |
| "learning_rate": 1.1268989891813085e-06, |
| "loss": 1.142, |
| "step": 1836 |
| }, |
| { |
| "epoch": 4.175265553869499, |
| "grad_norm": 0.7983715534210205, |
| "learning_rate": 1.1243099532340888e-06, |
| "loss": 1.151, |
| "step": 1837 |
| }, |
| { |
| "epoch": 4.177541729893778, |
| "grad_norm": 0.8044275641441345, |
| "learning_rate": 1.1217230317838227e-06, |
| "loss": 1.1799, |
| "step": 1838 |
| }, |
| { |
| "epoch": 4.179817905918058, |
| "grad_norm": 0.8157429695129395, |
| "learning_rate": 1.1191382288067228e-06, |
| "loss": 1.1363, |
| "step": 1839 |
| }, |
| { |
| "epoch": 4.182094081942337, |
| "grad_norm": 0.8047868013381958, |
| "learning_rate": 1.116555548275749e-06, |
| "loss": 1.1847, |
| "step": 1840 |
| }, |
| { |
| "epoch": 4.184370257966616, |
| "grad_norm": 0.8166713118553162, |
| "learning_rate": 1.1139749941605949e-06, |
| "loss": 1.1487, |
| "step": 1841 |
| }, |
| { |
| "epoch": 4.186646433990895, |
| "grad_norm": 0.8015767931938171, |
| "learning_rate": 1.1113965704276874e-06, |
| "loss": 1.1356, |
| "step": 1842 |
| }, |
| { |
| "epoch": 4.188922610015174, |
| "grad_norm": 0.7973054051399231, |
| "learning_rate": 1.1088202810401789e-06, |
| "loss": 1.1751, |
| "step": 1843 |
| }, |
| { |
| "epoch": 4.191198786039454, |
| "grad_norm": 0.794691264629364, |
| "learning_rate": 1.1062461299579399e-06, |
| "loss": 1.1655, |
| "step": 1844 |
| }, |
| { |
| "epoch": 4.193474962063733, |
| "grad_norm": 0.8075692653656006, |
| "learning_rate": 1.1036741211375577e-06, |
| "loss": 1.1609, |
| "step": 1845 |
| }, |
| { |
| "epoch": 4.195751138088012, |
| "grad_norm": 0.8212078213691711, |
| "learning_rate": 1.1011042585323235e-06, |
| "loss": 1.1359, |
| "step": 1846 |
| }, |
| { |
| "epoch": 4.1980273141122915, |
| "grad_norm": 0.811058759689331, |
| "learning_rate": 1.0985365460922293e-06, |
| "loss": 1.1255, |
| "step": 1847 |
| }, |
| { |
| "epoch": 4.20030349013657, |
| "grad_norm": 0.7934993505477905, |
| "learning_rate": 1.095970987763967e-06, |
| "loss": 1.1596, |
| "step": 1848 |
| }, |
| { |
| "epoch": 4.20257966616085, |
| "grad_norm": 0.8093920350074768, |
| "learning_rate": 1.0934075874909103e-06, |
| "loss": 1.1553, |
| "step": 1849 |
| }, |
| { |
| "epoch": 4.204855842185129, |
| "grad_norm": 0.8409412503242493, |
| "learning_rate": 1.0908463492131227e-06, |
| "loss": 1.1393, |
| "step": 1850 |
| }, |
| { |
| "epoch": 4.207132018209408, |
| "grad_norm": 0.8130190968513489, |
| "learning_rate": 1.0882872768673402e-06, |
| "loss": 1.1094, |
| "step": 1851 |
| }, |
| { |
| "epoch": 4.209408194233688, |
| "grad_norm": 0.8121690154075623, |
| "learning_rate": 1.0857303743869707e-06, |
| "loss": 1.1595, |
| "step": 1852 |
| }, |
| { |
| "epoch": 4.2116843702579665, |
| "grad_norm": 0.806825578212738, |
| "learning_rate": 1.083175645702089e-06, |
| "loss": 1.2038, |
| "step": 1853 |
| }, |
| { |
| "epoch": 4.213960546282246, |
| "grad_norm": 0.798464834690094, |
| "learning_rate": 1.080623094739426e-06, |
| "loss": 1.1796, |
| "step": 1854 |
| }, |
| { |
| "epoch": 4.216236722306525, |
| "grad_norm": 0.8041526079177856, |
| "learning_rate": 1.0780727254223666e-06, |
| "loss": 1.1309, |
| "step": 1855 |
| }, |
| { |
| "epoch": 4.218512898330804, |
| "grad_norm": 0.8249539136886597, |
| "learning_rate": 1.075524541670942e-06, |
| "loss": 1.1777, |
| "step": 1856 |
| }, |
| { |
| "epoch": 4.220789074355084, |
| "grad_norm": 0.8255074620246887, |
| "learning_rate": 1.0729785474018243e-06, |
| "loss": 1.1509, |
| "step": 1857 |
| }, |
| { |
| "epoch": 4.223065250379363, |
| "grad_norm": 0.828778862953186, |
| "learning_rate": 1.0704347465283194e-06, |
| "loss": 1.1356, |
| "step": 1858 |
| }, |
| { |
| "epoch": 4.2253414264036415, |
| "grad_norm": 0.8041991591453552, |
| "learning_rate": 1.0678931429603641e-06, |
| "loss": 1.1588, |
| "step": 1859 |
| }, |
| { |
| "epoch": 4.227617602427921, |
| "grad_norm": 0.8260260820388794, |
| "learning_rate": 1.0653537406045157e-06, |
| "loss": 1.1428, |
| "step": 1860 |
| }, |
| { |
| "epoch": 4.2298937784522, |
| "grad_norm": 0.8194622993469238, |
| "learning_rate": 1.0628165433639493e-06, |
| "loss": 1.1496, |
| "step": 1861 |
| }, |
| { |
| "epoch": 4.23216995447648, |
| "grad_norm": 0.8133224844932556, |
| "learning_rate": 1.0602815551384502e-06, |
| "loss": 1.1684, |
| "step": 1862 |
| }, |
| { |
| "epoch": 4.234446130500759, |
| "grad_norm": 0.8095599412918091, |
| "learning_rate": 1.0577487798244081e-06, |
| "loss": 1.186, |
| "step": 1863 |
| }, |
| { |
| "epoch": 4.236722306525038, |
| "grad_norm": 0.8215416669845581, |
| "learning_rate": 1.0552182213148119e-06, |
| "loss": 1.1679, |
| "step": 1864 |
| }, |
| { |
| "epoch": 4.238998482549317, |
| "grad_norm": 0.8311387896537781, |
| "learning_rate": 1.0526898834992422e-06, |
| "loss": 1.1679, |
| "step": 1865 |
| }, |
| { |
| "epoch": 4.241274658573596, |
| "grad_norm": 0.8266900181770325, |
| "learning_rate": 1.0501637702638666e-06, |
| "loss": 1.1585, |
| "step": 1866 |
| }, |
| { |
| "epoch": 4.243550834597875, |
| "grad_norm": 0.8136650323867798, |
| "learning_rate": 1.0476398854914355e-06, |
| "loss": 1.1466, |
| "step": 1867 |
| }, |
| { |
| "epoch": 4.245827010622155, |
| "grad_norm": 0.8111891150474548, |
| "learning_rate": 1.0451182330612715e-06, |
| "loss": 1.1611, |
| "step": 1868 |
| }, |
| { |
| "epoch": 4.248103186646434, |
| "grad_norm": 0.7687380313873291, |
| "learning_rate": 1.0425988168492659e-06, |
| "loss": 1.1466, |
| "step": 1869 |
| }, |
| { |
| "epoch": 4.250379362670714, |
| "grad_norm": 0.7977138161659241, |
| "learning_rate": 1.0400816407278754e-06, |
| "loss": 1.1897, |
| "step": 1870 |
| }, |
| { |
| "epoch": 4.2526555386949925, |
| "grad_norm": 0.8051496744155884, |
| "learning_rate": 1.0375667085661115e-06, |
| "loss": 1.1285, |
| "step": 1871 |
| }, |
| { |
| "epoch": 4.254931714719271, |
| "grad_norm": 0.7781476974487305, |
| "learning_rate": 1.0350540242295367e-06, |
| "loss": 1.1211, |
| "step": 1872 |
| }, |
| { |
| "epoch": 4.257207890743551, |
| "grad_norm": 0.838594913482666, |
| "learning_rate": 1.032543591580259e-06, |
| "loss": 1.2148, |
| "step": 1873 |
| }, |
| { |
| "epoch": 4.25948406676783, |
| "grad_norm": 0.8080545663833618, |
| "learning_rate": 1.0300354144769245e-06, |
| "loss": 1.0911, |
| "step": 1874 |
| }, |
| { |
| "epoch": 4.261760242792109, |
| "grad_norm": 0.7992098331451416, |
| "learning_rate": 1.027529496774715e-06, |
| "loss": 1.1572, |
| "step": 1875 |
| }, |
| { |
| "epoch": 4.264036418816389, |
| "grad_norm": 0.7982013821601868, |
| "learning_rate": 1.0250258423253367e-06, |
| "loss": 1.1533, |
| "step": 1876 |
| }, |
| { |
| "epoch": 4.2663125948406675, |
| "grad_norm": 0.7892776131629944, |
| "learning_rate": 1.0225244549770175e-06, |
| "loss": 1.1394, |
| "step": 1877 |
| }, |
| { |
| "epoch": 4.268588770864947, |
| "grad_norm": 0.8064398169517517, |
| "learning_rate": 1.020025338574504e-06, |
| "loss": 1.1703, |
| "step": 1878 |
| }, |
| { |
| "epoch": 4.270864946889226, |
| "grad_norm": 0.8267671465873718, |
| "learning_rate": 1.0175284969590457e-06, |
| "loss": 1.1699, |
| "step": 1879 |
| }, |
| { |
| "epoch": 4.273141122913505, |
| "grad_norm": 0.8314613103866577, |
| "learning_rate": 1.0150339339684026e-06, |
| "loss": 1.1349, |
| "step": 1880 |
| }, |
| { |
| "epoch": 4.275417298937785, |
| "grad_norm": 0.7835724353790283, |
| "learning_rate": 1.0125416534368279e-06, |
| "loss": 1.1296, |
| "step": 1881 |
| }, |
| { |
| "epoch": 4.277693474962064, |
| "grad_norm": 0.826766312122345, |
| "learning_rate": 1.0100516591950676e-06, |
| "loss": 1.1924, |
| "step": 1882 |
| }, |
| { |
| "epoch": 4.279969650986343, |
| "grad_norm": 0.8165386915206909, |
| "learning_rate": 1.0075639550703553e-06, |
| "loss": 1.124, |
| "step": 1883 |
| }, |
| { |
| "epoch": 4.282245827010622, |
| "grad_norm": 0.807698130607605, |
| "learning_rate": 1.0050785448864022e-06, |
| "loss": 1.1557, |
| "step": 1884 |
| }, |
| { |
| "epoch": 4.284522003034901, |
| "grad_norm": 0.8152635097503662, |
| "learning_rate": 1.0025954324633949e-06, |
| "loss": 1.1825, |
| "step": 1885 |
| }, |
| { |
| "epoch": 4.286798179059181, |
| "grad_norm": 0.8133754134178162, |
| "learning_rate": 1.000114621617988e-06, |
| "loss": 1.147, |
| "step": 1886 |
| }, |
| { |
| "epoch": 4.28907435508346, |
| "grad_norm": 0.803455650806427, |
| "learning_rate": 9.976361161632977e-07, |
| "loss": 1.2258, |
| "step": 1887 |
| }, |
| { |
| "epoch": 4.291350531107739, |
| "grad_norm": 0.8496401309967041, |
| "learning_rate": 9.951599199088977e-07, |
| "loss": 1.1188, |
| "step": 1888 |
| }, |
| { |
| "epoch": 4.293626707132018, |
| "grad_norm": 0.8170112371444702, |
| "learning_rate": 9.926860366608128e-07, |
| "loss": 1.162, |
| "step": 1889 |
| }, |
| { |
| "epoch": 4.295902883156297, |
| "grad_norm": 0.8310915231704712, |
| "learning_rate": 9.902144702215102e-07, |
| "loss": 1.1522, |
| "step": 1890 |
| }, |
| { |
| "epoch": 4.298179059180576, |
| "grad_norm": 0.7971112728118896, |
| "learning_rate": 9.877452243899003e-07, |
| "loss": 1.1522, |
| "step": 1891 |
| }, |
| { |
| "epoch": 4.300455235204856, |
| "grad_norm": 0.8279590010643005, |
| "learning_rate": 9.852783029613224e-07, |
| "loss": 1.1391, |
| "step": 1892 |
| }, |
| { |
| "epoch": 4.302731411229135, |
| "grad_norm": 0.842805802822113, |
| "learning_rate": 9.828137097275454e-07, |
| "loss": 1.1617, |
| "step": 1893 |
| }, |
| { |
| "epoch": 4.305007587253415, |
| "grad_norm": 0.8263971209526062, |
| "learning_rate": 9.803514484767582e-07, |
| "loss": 1.1846, |
| "step": 1894 |
| }, |
| { |
| "epoch": 4.3072837632776935, |
| "grad_norm": 0.8175419569015503, |
| "learning_rate": 9.77891522993567e-07, |
| "loss": 1.1615, |
| "step": 1895 |
| }, |
| { |
| "epoch": 4.309559939301972, |
| "grad_norm": 0.8182225823402405, |
| "learning_rate": 9.754339370589854e-07, |
| "loss": 1.1318, |
| "step": 1896 |
| }, |
| { |
| "epoch": 4.311836115326252, |
| "grad_norm": 0.7963380217552185, |
| "learning_rate": 9.72978694450435e-07, |
| "loss": 1.1567, |
| "step": 1897 |
| }, |
| { |
| "epoch": 4.314112291350531, |
| "grad_norm": 0.8066350817680359, |
| "learning_rate": 9.705257989417315e-07, |
| "loss": 1.1575, |
| "step": 1898 |
| }, |
| { |
| "epoch": 4.316388467374811, |
| "grad_norm": 0.8392632603645325, |
| "learning_rate": 9.680752543030844e-07, |
| "loss": 1.1697, |
| "step": 1899 |
| }, |
| { |
| "epoch": 4.31866464339909, |
| "grad_norm": 0.8114210367202759, |
| "learning_rate": 9.656270643010917e-07, |
| "loss": 1.1911, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.3209408194233685, |
| "grad_norm": 0.805618166923523, |
| "learning_rate": 9.6318123269873e-07, |
| "loss": 1.1543, |
| "step": 1901 |
| }, |
| { |
| "epoch": 4.323216995447648, |
| "grad_norm": 0.7983014583587646, |
| "learning_rate": 9.60737763255351e-07, |
| "loss": 1.1322, |
| "step": 1902 |
| }, |
| { |
| "epoch": 4.325493171471927, |
| "grad_norm": 0.8233836889266968, |
| "learning_rate": 9.582966597266768e-07, |
| "loss": 1.1761, |
| "step": 1903 |
| }, |
| { |
| "epoch": 4.327769347496206, |
| "grad_norm": 0.8023352026939392, |
| "learning_rate": 9.55857925864791e-07, |
| "loss": 1.1663, |
| "step": 1904 |
| }, |
| { |
| "epoch": 4.330045523520486, |
| "grad_norm": 0.8328466415405273, |
| "learning_rate": 9.534215654181384e-07, |
| "loss": 1.1736, |
| "step": 1905 |
| }, |
| { |
| "epoch": 4.332321699544765, |
| "grad_norm": 0.809187650680542, |
| "learning_rate": 9.509875821315126e-07, |
| "loss": 1.1839, |
| "step": 1906 |
| }, |
| { |
| "epoch": 4.334597875569044, |
| "grad_norm": 0.7974687218666077, |
| "learning_rate": 9.485559797460544e-07, |
| "loss": 1.1693, |
| "step": 1907 |
| }, |
| { |
| "epoch": 4.336874051593323, |
| "grad_norm": 0.8204778432846069, |
| "learning_rate": 9.461267619992453e-07, |
| "loss": 1.1418, |
| "step": 1908 |
| }, |
| { |
| "epoch": 4.339150227617602, |
| "grad_norm": 0.8286815285682678, |
| "learning_rate": 9.436999326249013e-07, |
| "loss": 1.1902, |
| "step": 1909 |
| }, |
| { |
| "epoch": 4.341426403641882, |
| "grad_norm": 0.824350893497467, |
| "learning_rate": 9.412754953531664e-07, |
| "loss": 1.1553, |
| "step": 1910 |
| }, |
| { |
| "epoch": 4.343702579666161, |
| "grad_norm": 0.7882816791534424, |
| "learning_rate": 9.388534539105107e-07, |
| "loss": 1.1776, |
| "step": 1911 |
| }, |
| { |
| "epoch": 4.34597875569044, |
| "grad_norm": 0.7909602522850037, |
| "learning_rate": 9.364338120197181e-07, |
| "loss": 1.1548, |
| "step": 1912 |
| }, |
| { |
| "epoch": 4.348254931714719, |
| "grad_norm": 0.8286248445510864, |
| "learning_rate": 9.340165733998877e-07, |
| "loss": 1.1333, |
| "step": 1913 |
| }, |
| { |
| "epoch": 4.350531107738998, |
| "grad_norm": 0.8236697912216187, |
| "learning_rate": 9.316017417664222e-07, |
| "loss": 1.1992, |
| "step": 1914 |
| }, |
| { |
| "epoch": 4.352807283763278, |
| "grad_norm": 0.8361827731132507, |
| "learning_rate": 9.291893208310257e-07, |
| "loss": 1.119, |
| "step": 1915 |
| }, |
| { |
| "epoch": 4.355083459787557, |
| "grad_norm": 0.8130219578742981, |
| "learning_rate": 9.267793143016967e-07, |
| "loss": 1.1503, |
| "step": 1916 |
| }, |
| { |
| "epoch": 4.357359635811836, |
| "grad_norm": 0.8324246406555176, |
| "learning_rate": 9.243717258827228e-07, |
| "loss": 1.1888, |
| "step": 1917 |
| }, |
| { |
| "epoch": 4.359635811836116, |
| "grad_norm": 0.8405057787895203, |
| "learning_rate": 9.219665592746738e-07, |
| "loss": 1.1459, |
| "step": 1918 |
| }, |
| { |
| "epoch": 4.3619119878603945, |
| "grad_norm": 0.8185500502586365, |
| "learning_rate": 9.195638181743996e-07, |
| "loss": 1.1404, |
| "step": 1919 |
| }, |
| { |
| "epoch": 4.364188163884673, |
| "grad_norm": 0.8271884918212891, |
| "learning_rate": 9.171635062750189e-07, |
| "loss": 1.1827, |
| "step": 1920 |
| }, |
| { |
| "epoch": 4.366464339908953, |
| "grad_norm": 0.8363154530525208, |
| "learning_rate": 9.147656272659197e-07, |
| "loss": 1.1247, |
| "step": 1921 |
| }, |
| { |
| "epoch": 4.368740515933232, |
| "grad_norm": 0.8215076923370361, |
| "learning_rate": 9.123701848327485e-07, |
| "loss": 1.1362, |
| "step": 1922 |
| }, |
| { |
| "epoch": 4.371016691957512, |
| "grad_norm": 0.7907038331031799, |
| "learning_rate": 9.099771826574069e-07, |
| "loss": 1.1445, |
| "step": 1923 |
| }, |
| { |
| "epoch": 4.373292867981791, |
| "grad_norm": 0.8025128841400146, |
| "learning_rate": 9.075866244180459e-07, |
| "loss": 1.108, |
| "step": 1924 |
| }, |
| { |
| "epoch": 4.3755690440060695, |
| "grad_norm": 0.8598397374153137, |
| "learning_rate": 9.051985137890601e-07, |
| "loss": 1.1311, |
| "step": 1925 |
| }, |
| { |
| "epoch": 4.377845220030349, |
| "grad_norm": 0.8464901447296143, |
| "learning_rate": 9.028128544410814e-07, |
| "loss": 1.134, |
| "step": 1926 |
| }, |
| { |
| "epoch": 4.380121396054628, |
| "grad_norm": 0.8181291222572327, |
| "learning_rate": 9.004296500409759e-07, |
| "loss": 1.1774, |
| "step": 1927 |
| }, |
| { |
| "epoch": 4.382397572078908, |
| "grad_norm": 0.7884581685066223, |
| "learning_rate": 8.980489042518348e-07, |
| "loss": 1.1325, |
| "step": 1928 |
| }, |
| { |
| "epoch": 4.384673748103187, |
| "grad_norm": 0.79710453748703, |
| "learning_rate": 8.956706207329694e-07, |
| "loss": 1.1751, |
| "step": 1929 |
| }, |
| { |
| "epoch": 4.386949924127466, |
| "grad_norm": 0.8176055550575256, |
| "learning_rate": 8.932948031399099e-07, |
| "loss": 1.1749, |
| "step": 1930 |
| }, |
| { |
| "epoch": 4.389226100151745, |
| "grad_norm": 0.8096023797988892, |
| "learning_rate": 8.909214551243908e-07, |
| "loss": 1.1616, |
| "step": 1931 |
| }, |
| { |
| "epoch": 4.391502276176024, |
| "grad_norm": 0.8391863107681274, |
| "learning_rate": 8.885505803343561e-07, |
| "loss": 1.1719, |
| "step": 1932 |
| }, |
| { |
| "epoch": 4.393778452200303, |
| "grad_norm": 0.8197864294052124, |
| "learning_rate": 8.861821824139455e-07, |
| "loss": 1.1678, |
| "step": 1933 |
| }, |
| { |
| "epoch": 4.396054628224583, |
| "grad_norm": 0.8241091370582581, |
| "learning_rate": 8.838162650034912e-07, |
| "loss": 1.1282, |
| "step": 1934 |
| }, |
| { |
| "epoch": 4.398330804248862, |
| "grad_norm": 0.7828463315963745, |
| "learning_rate": 8.814528317395155e-07, |
| "loss": 1.1597, |
| "step": 1935 |
| }, |
| { |
| "epoch": 4.400606980273141, |
| "grad_norm": 0.8351078629493713, |
| "learning_rate": 8.790918862547201e-07, |
| "loss": 1.1592, |
| "step": 1936 |
| }, |
| { |
| "epoch": 4.40288315629742, |
| "grad_norm": 0.8341789841651917, |
| "learning_rate": 8.767334321779831e-07, |
| "loss": 1.1755, |
| "step": 1937 |
| }, |
| { |
| "epoch": 4.405159332321699, |
| "grad_norm": 0.8040433526039124, |
| "learning_rate": 8.743774731343541e-07, |
| "loss": 1.1688, |
| "step": 1938 |
| }, |
| { |
| "epoch": 4.407435508345979, |
| "grad_norm": 0.8132420778274536, |
| "learning_rate": 8.720240127450466e-07, |
| "loss": 1.1287, |
| "step": 1939 |
| }, |
| { |
| "epoch": 4.409711684370258, |
| "grad_norm": 0.8121916055679321, |
| "learning_rate": 8.69673054627434e-07, |
| "loss": 1.1637, |
| "step": 1940 |
| }, |
| { |
| "epoch": 4.411987860394537, |
| "grad_norm": 0.8064197301864624, |
| "learning_rate": 8.673246023950449e-07, |
| "loss": 1.1867, |
| "step": 1941 |
| }, |
| { |
| "epoch": 4.414264036418817, |
| "grad_norm": 0.8088646531105042, |
| "learning_rate": 8.649786596575538e-07, |
| "loss": 1.1627, |
| "step": 1942 |
| }, |
| { |
| "epoch": 4.4165402124430955, |
| "grad_norm": 0.8088123202323914, |
| "learning_rate": 8.626352300207808e-07, |
| "loss": 1.155, |
| "step": 1943 |
| }, |
| { |
| "epoch": 4.418816388467375, |
| "grad_norm": 0.8343124389648438, |
| "learning_rate": 8.602943170866809e-07, |
| "loss": 1.1284, |
| "step": 1944 |
| }, |
| { |
| "epoch": 4.421092564491654, |
| "grad_norm": 0.8085336089134216, |
| "learning_rate": 8.579559244533416e-07, |
| "loss": 1.1536, |
| "step": 1945 |
| }, |
| { |
| "epoch": 4.423368740515933, |
| "grad_norm": 0.8021194338798523, |
| "learning_rate": 8.556200557149771e-07, |
| "loss": 1.1656, |
| "step": 1946 |
| }, |
| { |
| "epoch": 4.425644916540213, |
| "grad_norm": 0.8365726470947266, |
| "learning_rate": 8.532867144619217e-07, |
| "loss": 1.1704, |
| "step": 1947 |
| }, |
| { |
| "epoch": 4.427921092564492, |
| "grad_norm": 0.8232294321060181, |
| "learning_rate": 8.509559042806237e-07, |
| "loss": 1.1315, |
| "step": 1948 |
| }, |
| { |
| "epoch": 4.4301972685887705, |
| "grad_norm": 0.83378005027771, |
| "learning_rate": 8.486276287536444e-07, |
| "loss": 1.1661, |
| "step": 1949 |
| }, |
| { |
| "epoch": 4.43247344461305, |
| "grad_norm": 0.7996213436126709, |
| "learning_rate": 8.463018914596449e-07, |
| "loss": 1.16, |
| "step": 1950 |
| }, |
| { |
| "epoch": 4.434749620637329, |
| "grad_norm": 0.8141563534736633, |
| "learning_rate": 8.439786959733895e-07, |
| "loss": 1.1678, |
| "step": 1951 |
| }, |
| { |
| "epoch": 4.437025796661609, |
| "grad_norm": 0.8117381930351257, |
| "learning_rate": 8.416580458657322e-07, |
| "loss": 1.1405, |
| "step": 1952 |
| }, |
| { |
| "epoch": 4.439301972685888, |
| "grad_norm": 0.8416996598243713, |
| "learning_rate": 8.393399447036155e-07, |
| "loss": 1.1442, |
| "step": 1953 |
| }, |
| { |
| "epoch": 4.441578148710167, |
| "grad_norm": 0.8117838501930237, |
| "learning_rate": 8.370243960500646e-07, |
| "loss": 1.1743, |
| "step": 1954 |
| }, |
| { |
| "epoch": 4.443854324734446, |
| "grad_norm": 0.8272352814674377, |
| "learning_rate": 8.347114034641807e-07, |
| "loss": 1.1774, |
| "step": 1955 |
| }, |
| { |
| "epoch": 4.446130500758725, |
| "grad_norm": 0.8017353415489197, |
| "learning_rate": 8.324009705011357e-07, |
| "loss": 1.1253, |
| "step": 1956 |
| }, |
| { |
| "epoch": 4.448406676783004, |
| "grad_norm": 0.8281505107879639, |
| "learning_rate": 8.300931007121701e-07, |
| "loss": 1.1341, |
| "step": 1957 |
| }, |
| { |
| "epoch": 4.450682852807284, |
| "grad_norm": 0.7994737029075623, |
| "learning_rate": 8.277877976445819e-07, |
| "loss": 1.1655, |
| "step": 1958 |
| }, |
| { |
| "epoch": 4.452959028831563, |
| "grad_norm": 0.8165961503982544, |
| "learning_rate": 8.254850648417234e-07, |
| "loss": 1.147, |
| "step": 1959 |
| }, |
| { |
| "epoch": 4.455235204855843, |
| "grad_norm": 0.8197671175003052, |
| "learning_rate": 8.231849058430005e-07, |
| "loss": 1.1588, |
| "step": 1960 |
| }, |
| { |
| "epoch": 4.457511380880121, |
| "grad_norm": 0.8069389462471008, |
| "learning_rate": 8.208873241838569e-07, |
| "loss": 1.1662, |
| "step": 1961 |
| }, |
| { |
| "epoch": 4.4597875569044, |
| "grad_norm": 0.801243782043457, |
| "learning_rate": 8.185923233957802e-07, |
| "loss": 1.1433, |
| "step": 1962 |
| }, |
| { |
| "epoch": 4.46206373292868, |
| "grad_norm": 0.8067401051521301, |
| "learning_rate": 8.162999070062885e-07, |
| "loss": 1.1697, |
| "step": 1963 |
| }, |
| { |
| "epoch": 4.464339908952959, |
| "grad_norm": 0.8027750849723816, |
| "learning_rate": 8.140100785389271e-07, |
| "loss": 1.1336, |
| "step": 1964 |
| }, |
| { |
| "epoch": 4.466616084977238, |
| "grad_norm": 0.8215749859809875, |
| "learning_rate": 8.117228415132658e-07, |
| "loss": 1.1875, |
| "step": 1965 |
| }, |
| { |
| "epoch": 4.468892261001518, |
| "grad_norm": 0.8175052404403687, |
| "learning_rate": 8.094381994448897e-07, |
| "loss": 1.1357, |
| "step": 1966 |
| }, |
| { |
| "epoch": 4.4711684370257965, |
| "grad_norm": 0.8070038557052612, |
| "learning_rate": 8.07156155845395e-07, |
| "loss": 1.1345, |
| "step": 1967 |
| }, |
| { |
| "epoch": 4.473444613050076, |
| "grad_norm": 0.8089066743850708, |
| "learning_rate": 8.048767142223845e-07, |
| "loss": 1.1524, |
| "step": 1968 |
| }, |
| { |
| "epoch": 4.475720789074355, |
| "grad_norm": 0.817398190498352, |
| "learning_rate": 8.025998780794622e-07, |
| "loss": 1.1694, |
| "step": 1969 |
| }, |
| { |
| "epoch": 4.477996965098634, |
| "grad_norm": 0.8068968653678894, |
| "learning_rate": 8.003256509162252e-07, |
| "loss": 1.1189, |
| "step": 1970 |
| }, |
| { |
| "epoch": 4.480273141122914, |
| "grad_norm": 0.8235507607460022, |
| "learning_rate": 7.980540362282643e-07, |
| "loss": 1.193, |
| "step": 1971 |
| }, |
| { |
| "epoch": 4.482549317147193, |
| "grad_norm": 0.823232114315033, |
| "learning_rate": 7.95785037507151e-07, |
| "loss": 1.1441, |
| "step": 1972 |
| }, |
| { |
| "epoch": 4.484825493171472, |
| "grad_norm": 0.7972595691680908, |
| "learning_rate": 7.935186582404386e-07, |
| "loss": 1.1691, |
| "step": 1973 |
| }, |
| { |
| "epoch": 4.487101669195751, |
| "grad_norm": 0.7991148233413696, |
| "learning_rate": 7.912549019116528e-07, |
| "loss": 1.1403, |
| "step": 1974 |
| }, |
| { |
| "epoch": 4.48937784522003, |
| "grad_norm": 0.8262163400650024, |
| "learning_rate": 7.889937720002874e-07, |
| "loss": 1.1792, |
| "step": 1975 |
| }, |
| { |
| "epoch": 4.49165402124431, |
| "grad_norm": 0.8145712614059448, |
| "learning_rate": 7.867352719818008e-07, |
| "loss": 1.1707, |
| "step": 1976 |
| }, |
| { |
| "epoch": 4.493930197268589, |
| "grad_norm": 0.8145231008529663, |
| "learning_rate": 7.844794053276076e-07, |
| "loss": 1.1955, |
| "step": 1977 |
| }, |
| { |
| "epoch": 4.496206373292868, |
| "grad_norm": 0.8229146599769592, |
| "learning_rate": 7.82226175505075e-07, |
| "loss": 1.128, |
| "step": 1978 |
| }, |
| { |
| "epoch": 4.498482549317147, |
| "grad_norm": 0.8210102319717407, |
| "learning_rate": 7.79975585977519e-07, |
| "loss": 1.1757, |
| "step": 1979 |
| }, |
| { |
| "epoch": 4.500758725341426, |
| "grad_norm": 0.7983988523483276, |
| "learning_rate": 7.777276402041956e-07, |
| "loss": 1.1589, |
| "step": 1980 |
| }, |
| { |
| "epoch": 4.503034901365705, |
| "grad_norm": 0.8079198002815247, |
| "learning_rate": 7.754823416402965e-07, |
| "loss": 1.1685, |
| "step": 1981 |
| }, |
| { |
| "epoch": 4.505311077389985, |
| "grad_norm": 0.831045925617218, |
| "learning_rate": 7.732396937369479e-07, |
| "loss": 1.1415, |
| "step": 1982 |
| }, |
| { |
| "epoch": 4.507587253414264, |
| "grad_norm": 0.7844316363334656, |
| "learning_rate": 7.709996999411984e-07, |
| "loss": 1.1581, |
| "step": 1983 |
| }, |
| { |
| "epoch": 4.509863429438544, |
| "grad_norm": 0.8027727007865906, |
| "learning_rate": 7.687623636960184e-07, |
| "loss": 1.1683, |
| "step": 1984 |
| }, |
| { |
| "epoch": 4.5121396054628224, |
| "grad_norm": 0.8118910789489746, |
| "learning_rate": 7.665276884402936e-07, |
| "loss": 1.1924, |
| "step": 1985 |
| }, |
| { |
| "epoch": 4.514415781487101, |
| "grad_norm": 0.8252329230308533, |
| "learning_rate": 7.642956776088187e-07, |
| "loss": 1.1428, |
| "step": 1986 |
| }, |
| { |
| "epoch": 4.516691957511381, |
| "grad_norm": 0.8044348359107971, |
| "learning_rate": 7.620663346322956e-07, |
| "loss": 1.1572, |
| "step": 1987 |
| }, |
| { |
| "epoch": 4.51896813353566, |
| "grad_norm": 0.8026670813560486, |
| "learning_rate": 7.598396629373228e-07, |
| "loss": 1.1693, |
| "step": 1988 |
| }, |
| { |
| "epoch": 4.52124430955994, |
| "grad_norm": 0.7939416170120239, |
| "learning_rate": 7.576156659463943e-07, |
| "loss": 1.1549, |
| "step": 1989 |
| }, |
| { |
| "epoch": 4.523520485584219, |
| "grad_norm": 0.779471218585968, |
| "learning_rate": 7.553943470778927e-07, |
| "loss": 1.117, |
| "step": 1990 |
| }, |
| { |
| "epoch": 4.5257966616084975, |
| "grad_norm": 0.8062757849693298, |
| "learning_rate": 7.531757097460828e-07, |
| "loss": 1.2131, |
| "step": 1991 |
| }, |
| { |
| "epoch": 4.528072837632777, |
| "grad_norm": 0.8512879610061646, |
| "learning_rate": 7.509597573611113e-07, |
| "loss": 1.1859, |
| "step": 1992 |
| }, |
| { |
| "epoch": 4.530349013657056, |
| "grad_norm": 0.8231443762779236, |
| "learning_rate": 7.487464933289948e-07, |
| "loss": 1.1504, |
| "step": 1993 |
| }, |
| { |
| "epoch": 4.532625189681335, |
| "grad_norm": 0.8112626075744629, |
| "learning_rate": 7.465359210516182e-07, |
| "loss": 1.1982, |
| "step": 1994 |
| }, |
| { |
| "epoch": 4.534901365705615, |
| "grad_norm": 0.8114172220230103, |
| "learning_rate": 7.443280439267311e-07, |
| "loss": 1.1768, |
| "step": 1995 |
| }, |
| { |
| "epoch": 4.537177541729894, |
| "grad_norm": 0.809261679649353, |
| "learning_rate": 7.421228653479385e-07, |
| "loss": 1.1516, |
| "step": 1996 |
| }, |
| { |
| "epoch": 4.5394537177541725, |
| "grad_norm": 0.8252702355384827, |
| "learning_rate": 7.399203887046977e-07, |
| "loss": 1.1944, |
| "step": 1997 |
| }, |
| { |
| "epoch": 4.541729893778452, |
| "grad_norm": 0.8462184071540833, |
| "learning_rate": 7.377206173823142e-07, |
| "loss": 1.179, |
| "step": 1998 |
| }, |
| { |
| "epoch": 4.544006069802731, |
| "grad_norm": 0.8015170097351074, |
| "learning_rate": 7.355235547619341e-07, |
| "loss": 1.1374, |
| "step": 1999 |
| }, |
| { |
| "epoch": 4.546282245827011, |
| "grad_norm": 0.8240376710891724, |
| "learning_rate": 7.333292042205404e-07, |
| "loss": 1.1751, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.54855842185129, |
| "grad_norm": 0.8022201061248779, |
| "learning_rate": 7.311375691309488e-07, |
| "loss": 1.1666, |
| "step": 2001 |
| }, |
| { |
| "epoch": 4.5508345978755695, |
| "grad_norm": 0.8332545161247253, |
| "learning_rate": 7.289486528617986e-07, |
| "loss": 1.1528, |
| "step": 2002 |
| }, |
| { |
| "epoch": 4.553110773899848, |
| "grad_norm": 0.8448001742362976, |
| "learning_rate": 7.267624587775537e-07, |
| "loss": 1.1488, |
| "step": 2003 |
| }, |
| { |
| "epoch": 4.555386949924127, |
| "grad_norm": 0.8457192182540894, |
| "learning_rate": 7.245789902384908e-07, |
| "loss": 1.1254, |
| "step": 2004 |
| }, |
| { |
| "epoch": 4.557663125948407, |
| "grad_norm": 0.8168480396270752, |
| "learning_rate": 7.223982506006988e-07, |
| "loss": 1.1782, |
| "step": 2005 |
| }, |
| { |
| "epoch": 4.559939301972686, |
| "grad_norm": 0.8271265029907227, |
| "learning_rate": 7.202202432160713e-07, |
| "loss": 1.168, |
| "step": 2006 |
| }, |
| { |
| "epoch": 4.562215477996965, |
| "grad_norm": 0.8210901618003845, |
| "learning_rate": 7.180449714323032e-07, |
| "loss": 1.168, |
| "step": 2007 |
| }, |
| { |
| "epoch": 4.564491654021245, |
| "grad_norm": 0.83175128698349, |
| "learning_rate": 7.158724385928828e-07, |
| "loss": 1.1645, |
| "step": 2008 |
| }, |
| { |
| "epoch": 4.5667678300455234, |
| "grad_norm": 0.7995832562446594, |
| "learning_rate": 7.137026480370923e-07, |
| "loss": 1.1604, |
| "step": 2009 |
| }, |
| { |
| "epoch": 4.569044006069802, |
| "grad_norm": 0.7985032796859741, |
| "learning_rate": 7.115356030999954e-07, |
| "loss": 1.1848, |
| "step": 2010 |
| }, |
| { |
| "epoch": 4.571320182094082, |
| "grad_norm": 0.8107689619064331, |
| "learning_rate": 7.093713071124361e-07, |
| "loss": 1.1495, |
| "step": 2011 |
| }, |
| { |
| "epoch": 4.573596358118361, |
| "grad_norm": 0.8028638958930969, |
| "learning_rate": 7.072097634010353e-07, |
| "loss": 1.1259, |
| "step": 2012 |
| }, |
| { |
| "epoch": 4.575872534142641, |
| "grad_norm": 0.8166947960853577, |
| "learning_rate": 7.050509752881815e-07, |
| "loss": 1.1866, |
| "step": 2013 |
| }, |
| { |
| "epoch": 4.57814871016692, |
| "grad_norm": 0.8298448920249939, |
| "learning_rate": 7.028949460920282e-07, |
| "loss": 1.1801, |
| "step": 2014 |
| }, |
| { |
| "epoch": 4.5804248861911985, |
| "grad_norm": 0.7964473366737366, |
| "learning_rate": 7.007416791264882e-07, |
| "loss": 1.1624, |
| "step": 2015 |
| }, |
| { |
| "epoch": 4.582701062215478, |
| "grad_norm": 0.805364727973938, |
| "learning_rate": 6.985911777012286e-07, |
| "loss": 1.1575, |
| "step": 2016 |
| }, |
| { |
| "epoch": 4.584977238239757, |
| "grad_norm": 0.8281643986701965, |
| "learning_rate": 6.96443445121667e-07, |
| "loss": 1.1739, |
| "step": 2017 |
| }, |
| { |
| "epoch": 4.587253414264037, |
| "grad_norm": 0.78781658411026, |
| "learning_rate": 6.942984846889639e-07, |
| "loss": 1.1249, |
| "step": 2018 |
| }, |
| { |
| "epoch": 4.589529590288316, |
| "grad_norm": 0.841881275177002, |
| "learning_rate": 6.921562997000186e-07, |
| "loss": 1.1361, |
| "step": 2019 |
| }, |
| { |
| "epoch": 4.591805766312595, |
| "grad_norm": 0.8270856738090515, |
| "learning_rate": 6.900168934474655e-07, |
| "loss": 1.1668, |
| "step": 2020 |
| }, |
| { |
| "epoch": 4.594081942336874, |
| "grad_norm": 0.8418042659759521, |
| "learning_rate": 6.878802692196663e-07, |
| "loss": 1.1731, |
| "step": 2021 |
| }, |
| { |
| "epoch": 4.596358118361153, |
| "grad_norm": 0.8001435995101929, |
| "learning_rate": 6.857464303007091e-07, |
| "loss": 1.1323, |
| "step": 2022 |
| }, |
| { |
| "epoch": 4.598634294385432, |
| "grad_norm": 0.8071247339248657, |
| "learning_rate": 6.836153799703993e-07, |
| "loss": 1.1322, |
| "step": 2023 |
| }, |
| { |
| "epoch": 4.600910470409712, |
| "grad_norm": 0.8100282549858093, |
| "learning_rate": 6.814871215042552e-07, |
| "loss": 1.1978, |
| "step": 2024 |
| }, |
| { |
| "epoch": 4.603186646433991, |
| "grad_norm": 0.8448064923286438, |
| "learning_rate": 6.793616581735063e-07, |
| "loss": 1.1592, |
| "step": 2025 |
| }, |
| { |
| "epoch": 4.60546282245827, |
| "grad_norm": 0.8261284232139587, |
| "learning_rate": 6.772389932450841e-07, |
| "loss": 1.1742, |
| "step": 2026 |
| }, |
| { |
| "epoch": 4.607738998482549, |
| "grad_norm": 0.8271704316139221, |
| "learning_rate": 6.751191299816192e-07, |
| "loss": 1.1841, |
| "step": 2027 |
| }, |
| { |
| "epoch": 4.610015174506828, |
| "grad_norm": 0.8234429955482483, |
| "learning_rate": 6.730020716414357e-07, |
| "loss": 1.1664, |
| "step": 2028 |
| }, |
| { |
| "epoch": 4.612291350531108, |
| "grad_norm": 0.8064398169517517, |
| "learning_rate": 6.708878214785472e-07, |
| "loss": 1.1777, |
| "step": 2029 |
| }, |
| { |
| "epoch": 4.614567526555387, |
| "grad_norm": 0.8364176154136658, |
| "learning_rate": 6.687763827426491e-07, |
| "loss": 1.1359, |
| "step": 2030 |
| }, |
| { |
| "epoch": 4.616843702579666, |
| "grad_norm": 0.8281029462814331, |
| "learning_rate": 6.66667758679119e-07, |
| "loss": 1.126, |
| "step": 2031 |
| }, |
| { |
| "epoch": 4.619119878603946, |
| "grad_norm": 0.8018792867660522, |
| "learning_rate": 6.645619525290043e-07, |
| "loss": 1.1828, |
| "step": 2032 |
| }, |
| { |
| "epoch": 4.6213960546282244, |
| "grad_norm": 0.8104063272476196, |
| "learning_rate": 6.624589675290244e-07, |
| "loss": 1.1446, |
| "step": 2033 |
| }, |
| { |
| "epoch": 4.623672230652504, |
| "grad_norm": 0.8424516916275024, |
| "learning_rate": 6.603588069115605e-07, |
| "loss": 1.1931, |
| "step": 2034 |
| }, |
| { |
| "epoch": 4.625948406676783, |
| "grad_norm": 0.8288046717643738, |
| "learning_rate": 6.582614739046528e-07, |
| "loss": 1.1313, |
| "step": 2035 |
| }, |
| { |
| "epoch": 4.628224582701062, |
| "grad_norm": 0.8373975157737732, |
| "learning_rate": 6.561669717319962e-07, |
| "loss": 1.1495, |
| "step": 2036 |
| }, |
| { |
| "epoch": 4.630500758725342, |
| "grad_norm": 0.8243685364723206, |
| "learning_rate": 6.540753036129336e-07, |
| "loss": 1.1612, |
| "step": 2037 |
| }, |
| { |
| "epoch": 4.632776934749621, |
| "grad_norm": 0.8179749846458435, |
| "learning_rate": 6.519864727624514e-07, |
| "loss": 1.1444, |
| "step": 2038 |
| }, |
| { |
| "epoch": 4.6350531107738995, |
| "grad_norm": 0.8195221424102783, |
| "learning_rate": 6.499004823911772e-07, |
| "loss": 1.169, |
| "step": 2039 |
| }, |
| { |
| "epoch": 4.637329286798179, |
| "grad_norm": 0.8341556787490845, |
| "learning_rate": 6.47817335705371e-07, |
| "loss": 1.144, |
| "step": 2040 |
| }, |
| { |
| "epoch": 4.639605462822458, |
| "grad_norm": 0.8077003359794617, |
| "learning_rate": 6.457370359069209e-07, |
| "loss": 1.2334, |
| "step": 2041 |
| }, |
| { |
| "epoch": 4.641881638846737, |
| "grad_norm": 0.8267129063606262, |
| "learning_rate": 6.436595861933428e-07, |
| "loss": 1.19, |
| "step": 2042 |
| }, |
| { |
| "epoch": 4.644157814871017, |
| "grad_norm": 0.8191685676574707, |
| "learning_rate": 6.415849897577667e-07, |
| "loss": 1.1607, |
| "step": 2043 |
| }, |
| { |
| "epoch": 4.646433990895296, |
| "grad_norm": 0.8216390013694763, |
| "learning_rate": 6.39513249788942e-07, |
| "loss": 1.1454, |
| "step": 2044 |
| }, |
| { |
| "epoch": 4.648710166919575, |
| "grad_norm": 0.8405131101608276, |
| "learning_rate": 6.374443694712246e-07, |
| "loss": 1.1722, |
| "step": 2045 |
| }, |
| { |
| "epoch": 4.650986342943854, |
| "grad_norm": 0.8507837057113647, |
| "learning_rate": 6.353783519845752e-07, |
| "loss": 1.1185, |
| "step": 2046 |
| }, |
| { |
| "epoch": 4.653262518968134, |
| "grad_norm": 0.8254572153091431, |
| "learning_rate": 6.333152005045562e-07, |
| "loss": 1.1494, |
| "step": 2047 |
| }, |
| { |
| "epoch": 4.655538694992413, |
| "grad_norm": 0.8171166181564331, |
| "learning_rate": 6.312549182023229e-07, |
| "loss": 1.1168, |
| "step": 2048 |
| }, |
| { |
| "epoch": 4.657814871016692, |
| "grad_norm": 0.8100786805152893, |
| "learning_rate": 6.291975082446206e-07, |
| "loss": 1.1426, |
| "step": 2049 |
| }, |
| { |
| "epoch": 4.6600910470409715, |
| "grad_norm": 0.8109079003334045, |
| "learning_rate": 6.271429737937806e-07, |
| "loss": 1.1661, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.66236722306525, |
| "grad_norm": 0.7933968305587769, |
| "learning_rate": 6.250913180077139e-07, |
| "loss": 1.1421, |
| "step": 2051 |
| }, |
| { |
| "epoch": 4.664643399089529, |
| "grad_norm": 0.8203511834144592, |
| "learning_rate": 6.230425440399065e-07, |
| "loss": 1.1702, |
| "step": 2052 |
| }, |
| { |
| "epoch": 4.666919575113809, |
| "grad_norm": 0.8336530923843384, |
| "learning_rate": 6.209966550394162e-07, |
| "loss": 1.1474, |
| "step": 2053 |
| }, |
| { |
| "epoch": 4.669195751138088, |
| "grad_norm": 0.8284228444099426, |
| "learning_rate": 6.189536541508645e-07, |
| "loss": 1.1556, |
| "step": 2054 |
| }, |
| { |
| "epoch": 4.671471927162367, |
| "grad_norm": 0.8003067374229431, |
| "learning_rate": 6.169135445144364e-07, |
| "loss": 1.1865, |
| "step": 2055 |
| }, |
| { |
| "epoch": 4.673748103186647, |
| "grad_norm": 0.8248746991157532, |
| "learning_rate": 6.148763292658704e-07, |
| "loss": 1.2081, |
| "step": 2056 |
| }, |
| { |
| "epoch": 4.6760242792109254, |
| "grad_norm": 0.8294516205787659, |
| "learning_rate": 6.128420115364575e-07, |
| "loss": 1.1637, |
| "step": 2057 |
| }, |
| { |
| "epoch": 4.678300455235205, |
| "grad_norm": 0.8218663334846497, |
| "learning_rate": 6.108105944530346e-07, |
| "loss": 1.1297, |
| "step": 2058 |
| }, |
| { |
| "epoch": 4.680576631259484, |
| "grad_norm": 0.8338181376457214, |
| "learning_rate": 6.087820811379802e-07, |
| "loss": 1.1064, |
| "step": 2059 |
| }, |
| { |
| "epoch": 4.682852807283763, |
| "grad_norm": 0.821843147277832, |
| "learning_rate": 6.067564747092095e-07, |
| "loss": 1.1483, |
| "step": 2060 |
| }, |
| { |
| "epoch": 4.685128983308043, |
| "grad_norm": 0.8181595802307129, |
| "learning_rate": 6.04733778280171e-07, |
| "loss": 1.1717, |
| "step": 2061 |
| }, |
| { |
| "epoch": 4.687405159332322, |
| "grad_norm": 0.7944086194038391, |
| "learning_rate": 6.02713994959838e-07, |
| "loss": 1.1617, |
| "step": 2062 |
| }, |
| { |
| "epoch": 4.689681335356601, |
| "grad_norm": 0.8191626667976379, |
| "learning_rate": 6.006971278527085e-07, |
| "loss": 1.1005, |
| "step": 2063 |
| }, |
| { |
| "epoch": 4.69195751138088, |
| "grad_norm": 0.8066505789756775, |
| "learning_rate": 5.986831800587972e-07, |
| "loss": 1.1382, |
| "step": 2064 |
| }, |
| { |
| "epoch": 4.694233687405159, |
| "grad_norm": 0.831630527973175, |
| "learning_rate": 5.966721546736315e-07, |
| "loss": 1.1558, |
| "step": 2065 |
| }, |
| { |
| "epoch": 4.696509863429439, |
| "grad_norm": 0.8153077960014343, |
| "learning_rate": 5.946640547882468e-07, |
| "loss": 1.1598, |
| "step": 2066 |
| }, |
| { |
| "epoch": 4.698786039453718, |
| "grad_norm": 0.8225074410438538, |
| "learning_rate": 5.926588834891823e-07, |
| "loss": 1.1484, |
| "step": 2067 |
| }, |
| { |
| "epoch": 4.701062215477997, |
| "grad_norm": 0.818878173828125, |
| "learning_rate": 5.906566438584752e-07, |
| "loss": 1.1618, |
| "step": 2068 |
| }, |
| { |
| "epoch": 4.703338391502276, |
| "grad_norm": 0.8222529888153076, |
| "learning_rate": 5.88657338973658e-07, |
| "loss": 1.1248, |
| "step": 2069 |
| }, |
| { |
| "epoch": 4.705614567526555, |
| "grad_norm": 0.8568587303161621, |
| "learning_rate": 5.866609719077515e-07, |
| "loss": 1.1797, |
| "step": 2070 |
| }, |
| { |
| "epoch": 4.707890743550834, |
| "grad_norm": 0.8610350489616394, |
| "learning_rate": 5.846675457292597e-07, |
| "loss": 1.1441, |
| "step": 2071 |
| }, |
| { |
| "epoch": 4.710166919575114, |
| "grad_norm": 0.8169013857841492, |
| "learning_rate": 5.826770635021697e-07, |
| "loss": 1.1417, |
| "step": 2072 |
| }, |
| { |
| "epoch": 4.712443095599393, |
| "grad_norm": 0.8121698498725891, |
| "learning_rate": 5.80689528285939e-07, |
| "loss": 1.2031, |
| "step": 2073 |
| }, |
| { |
| "epoch": 4.7147192716236725, |
| "grad_norm": 0.8544483780860901, |
| "learning_rate": 5.787049431354996e-07, |
| "loss": 1.15, |
| "step": 2074 |
| }, |
| { |
| "epoch": 4.716995447647951, |
| "grad_norm": 0.8276944756507874, |
| "learning_rate": 5.767233111012466e-07, |
| "loss": 1.1589, |
| "step": 2075 |
| }, |
| { |
| "epoch": 4.71927162367223, |
| "grad_norm": 0.8428331017494202, |
| "learning_rate": 5.747446352290364e-07, |
| "loss": 1.1786, |
| "step": 2076 |
| }, |
| { |
| "epoch": 4.72154779969651, |
| "grad_norm": 0.8325369954109192, |
| "learning_rate": 5.727689185601834e-07, |
| "loss": 1.1864, |
| "step": 2077 |
| }, |
| { |
| "epoch": 4.723823975720789, |
| "grad_norm": 0.8243165016174316, |
| "learning_rate": 5.707961641314516e-07, |
| "loss": 1.1329, |
| "step": 2078 |
| }, |
| { |
| "epoch": 4.726100151745069, |
| "grad_norm": 0.8249309659004211, |
| "learning_rate": 5.688263749750523e-07, |
| "loss": 1.1502, |
| "step": 2079 |
| }, |
| { |
| "epoch": 4.728376327769348, |
| "grad_norm": 0.8111382126808167, |
| "learning_rate": 5.668595541186395e-07, |
| "loss": 1.1593, |
| "step": 2080 |
| }, |
| { |
| "epoch": 4.7306525037936265, |
| "grad_norm": 0.8352332711219788, |
| "learning_rate": 5.648957045853043e-07, |
| "loss": 1.1917, |
| "step": 2081 |
| }, |
| { |
| "epoch": 4.732928679817906, |
| "grad_norm": 0.8187192678451538, |
| "learning_rate": 5.629348293935704e-07, |
| "loss": 1.1352, |
| "step": 2082 |
| }, |
| { |
| "epoch": 4.735204855842185, |
| "grad_norm": 0.8193315267562866, |
| "learning_rate": 5.609769315573921e-07, |
| "loss": 1.1494, |
| "step": 2083 |
| }, |
| { |
| "epoch": 4.737481031866464, |
| "grad_norm": 0.8024821281433105, |
| "learning_rate": 5.590220140861441e-07, |
| "loss": 1.1278, |
| "step": 2084 |
| }, |
| { |
| "epoch": 4.739757207890744, |
| "grad_norm": 0.8180125951766968, |
| "learning_rate": 5.570700799846232e-07, |
| "loss": 1.1684, |
| "step": 2085 |
| }, |
| { |
| "epoch": 4.742033383915023, |
| "grad_norm": 0.8202872276306152, |
| "learning_rate": 5.551211322530381e-07, |
| "loss": 1.1732, |
| "step": 2086 |
| }, |
| { |
| "epoch": 4.7443095599393015, |
| "grad_norm": 0.8189080357551575, |
| "learning_rate": 5.531751738870089e-07, |
| "loss": 1.1718, |
| "step": 2087 |
| }, |
| { |
| "epoch": 4.746585735963581, |
| "grad_norm": 0.8162835836410522, |
| "learning_rate": 5.512322078775603e-07, |
| "loss": 1.1112, |
| "step": 2088 |
| }, |
| { |
| "epoch": 4.74886191198786, |
| "grad_norm": 0.824478268623352, |
| "learning_rate": 5.492922372111173e-07, |
| "loss": 1.1374, |
| "step": 2089 |
| }, |
| { |
| "epoch": 4.75113808801214, |
| "grad_norm": 0.8006694316864014, |
| "learning_rate": 5.47355264869501e-07, |
| "loss": 1.1511, |
| "step": 2090 |
| }, |
| { |
| "epoch": 4.753414264036419, |
| "grad_norm": 0.8223046064376831, |
| "learning_rate": 5.454212938299256e-07, |
| "loss": 1.1636, |
| "step": 2091 |
| }, |
| { |
| "epoch": 4.7556904400606985, |
| "grad_norm": 0.7983687520027161, |
| "learning_rate": 5.434903270649894e-07, |
| "loss": 1.1965, |
| "step": 2092 |
| }, |
| { |
| "epoch": 4.757966616084977, |
| "grad_norm": 0.8100834488868713, |
| "learning_rate": 5.415623675426759e-07, |
| "loss": 1.1471, |
| "step": 2093 |
| }, |
| { |
| "epoch": 4.760242792109256, |
| "grad_norm": 0.8282079696655273, |
| "learning_rate": 5.396374182263442e-07, |
| "loss": 1.1684, |
| "step": 2094 |
| }, |
| { |
| "epoch": 4.762518968133536, |
| "grad_norm": 0.8106188178062439, |
| "learning_rate": 5.377154820747271e-07, |
| "loss": 1.1781, |
| "step": 2095 |
| }, |
| { |
| "epoch": 4.764795144157815, |
| "grad_norm": 0.8157429099082947, |
| "learning_rate": 5.357965620419262e-07, |
| "loss": 1.1867, |
| "step": 2096 |
| }, |
| { |
| "epoch": 4.767071320182094, |
| "grad_norm": 0.811551570892334, |
| "learning_rate": 5.338806610774072e-07, |
| "loss": 1.1776, |
| "step": 2097 |
| }, |
| { |
| "epoch": 4.7693474962063735, |
| "grad_norm": 0.8151900172233582, |
| "learning_rate": 5.319677821259947e-07, |
| "loss": 1.1505, |
| "step": 2098 |
| }, |
| { |
| "epoch": 4.771623672230652, |
| "grad_norm": 0.816935658454895, |
| "learning_rate": 5.300579281278703e-07, |
| "loss": 1.1596, |
| "step": 2099 |
| }, |
| { |
| "epoch": 4.773899848254931, |
| "grad_norm": 0.8230782747268677, |
| "learning_rate": 5.281511020185639e-07, |
| "loss": 1.1585, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.776176024279211, |
| "grad_norm": 0.8262774348258972, |
| "learning_rate": 5.262473067289528e-07, |
| "loss": 1.1281, |
| "step": 2101 |
| }, |
| { |
| "epoch": 4.77845220030349, |
| "grad_norm": 0.8364963531494141, |
| "learning_rate": 5.243465451852548e-07, |
| "loss": 1.1742, |
| "step": 2102 |
| }, |
| { |
| "epoch": 4.78072837632777, |
| "grad_norm": 0.8024651408195496, |
| "learning_rate": 5.224488203090241e-07, |
| "loss": 1.1709, |
| "step": 2103 |
| }, |
| { |
| "epoch": 4.783004552352049, |
| "grad_norm": 0.8232298493385315, |
| "learning_rate": 5.205541350171508e-07, |
| "loss": 1.1824, |
| "step": 2104 |
| }, |
| { |
| "epoch": 4.7852807283763275, |
| "grad_norm": 0.8233107328414917, |
| "learning_rate": 5.186624922218495e-07, |
| "loss": 1.1335, |
| "step": 2105 |
| }, |
| { |
| "epoch": 4.787556904400607, |
| "grad_norm": 0.8403355479240417, |
| "learning_rate": 5.167738948306586e-07, |
| "loss": 1.1386, |
| "step": 2106 |
| }, |
| { |
| "epoch": 4.789833080424886, |
| "grad_norm": 0.8523696660995483, |
| "learning_rate": 5.148883457464385e-07, |
| "loss": 1.2001, |
| "step": 2107 |
| }, |
| { |
| "epoch": 4.792109256449166, |
| "grad_norm": 0.825985848903656, |
| "learning_rate": 5.130058478673608e-07, |
| "loss": 1.1253, |
| "step": 2108 |
| }, |
| { |
| "epoch": 4.794385432473445, |
| "grad_norm": 0.8368105292320251, |
| "learning_rate": 5.111264040869093e-07, |
| "loss": 1.1424, |
| "step": 2109 |
| }, |
| { |
| "epoch": 4.796661608497724, |
| "grad_norm": 0.8407347798347473, |
| "learning_rate": 5.092500172938728e-07, |
| "loss": 1.2224, |
| "step": 2110 |
| }, |
| { |
| "epoch": 4.798937784522003, |
| "grad_norm": 0.8046110272407532, |
| "learning_rate": 5.073766903723415e-07, |
| "loss": 1.1643, |
| "step": 2111 |
| }, |
| { |
| "epoch": 4.801213960546282, |
| "grad_norm": 0.8466883301734924, |
| "learning_rate": 5.055064262017012e-07, |
| "loss": 1.1745, |
| "step": 2112 |
| }, |
| { |
| "epoch": 4.803490136570561, |
| "grad_norm": 0.8486934304237366, |
| "learning_rate": 5.036392276566335e-07, |
| "loss": 1.1093, |
| "step": 2113 |
| }, |
| { |
| "epoch": 4.805766312594841, |
| "grad_norm": 0.8254931569099426, |
| "learning_rate": 5.01775097607104e-07, |
| "loss": 1.1839, |
| "step": 2114 |
| }, |
| { |
| "epoch": 4.80804248861912, |
| "grad_norm": 0.8326417803764343, |
| "learning_rate": 4.999140389183652e-07, |
| "loss": 1.1827, |
| "step": 2115 |
| }, |
| { |
| "epoch": 4.810318664643399, |
| "grad_norm": 0.8163275122642517, |
| "learning_rate": 4.980560544509467e-07, |
| "loss": 1.1775, |
| "step": 2116 |
| }, |
| { |
| "epoch": 4.812594840667678, |
| "grad_norm": 0.8416717648506165, |
| "learning_rate": 4.962011470606531e-07, |
| "loss": 1.1539, |
| "step": 2117 |
| }, |
| { |
| "epoch": 4.814871016691957, |
| "grad_norm": 0.785467803478241, |
| "learning_rate": 4.943493195985604e-07, |
| "loss": 1.1366, |
| "step": 2118 |
| }, |
| { |
| "epoch": 4.817147192716237, |
| "grad_norm": 0.8316680192947388, |
| "learning_rate": 4.925005749110096e-07, |
| "loss": 1.1258, |
| "step": 2119 |
| }, |
| { |
| "epoch": 4.819423368740516, |
| "grad_norm": 0.8190419673919678, |
| "learning_rate": 4.906549158396029e-07, |
| "loss": 1.1238, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.821699544764795, |
| "grad_norm": 0.8137006759643555, |
| "learning_rate": 4.888123452212023e-07, |
| "loss": 1.1634, |
| "step": 2121 |
| }, |
| { |
| "epoch": 4.8239757207890746, |
| "grad_norm": 0.8443368077278137, |
| "learning_rate": 4.869728658879205e-07, |
| "loss": 1.1868, |
| "step": 2122 |
| }, |
| { |
| "epoch": 4.826251896813353, |
| "grad_norm": 0.8072926998138428, |
| "learning_rate": 4.85136480667118e-07, |
| "loss": 1.1586, |
| "step": 2123 |
| }, |
| { |
| "epoch": 4.828528072837633, |
| "grad_norm": 0.8307923674583435, |
| "learning_rate": 4.833031923814033e-07, |
| "loss": 1.1681, |
| "step": 2124 |
| }, |
| { |
| "epoch": 4.830804248861912, |
| "grad_norm": 0.7928266525268555, |
| "learning_rate": 4.814730038486193e-07, |
| "loss": 1.1716, |
| "step": 2125 |
| }, |
| { |
| "epoch": 4.833080424886191, |
| "grad_norm": 0.8232721090316772, |
| "learning_rate": 4.796459178818496e-07, |
| "loss": 1.1974, |
| "step": 2126 |
| }, |
| { |
| "epoch": 4.835356600910471, |
| "grad_norm": 0.8364123106002808, |
| "learning_rate": 4.77821937289406e-07, |
| "loss": 1.1508, |
| "step": 2127 |
| }, |
| { |
| "epoch": 4.83763277693475, |
| "grad_norm": 0.8027380108833313, |
| "learning_rate": 4.760010648748273e-07, |
| "loss": 1.1578, |
| "step": 2128 |
| }, |
| { |
| "epoch": 4.8399089529590285, |
| "grad_norm": 0.8701675534248352, |
| "learning_rate": 4.7418330343687703e-07, |
| "loss": 1.1634, |
| "step": 2129 |
| }, |
| { |
| "epoch": 4.842185128983308, |
| "grad_norm": 0.8248427510261536, |
| "learning_rate": 4.723686557695351e-07, |
| "loss": 1.1305, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.844461305007587, |
| "grad_norm": 0.8026407957077026, |
| "learning_rate": 4.705571246619955e-07, |
| "loss": 1.182, |
| "step": 2131 |
| }, |
| { |
| "epoch": 4.846737481031866, |
| "grad_norm": 0.8148173689842224, |
| "learning_rate": 4.687487128986629e-07, |
| "loss": 1.1769, |
| "step": 2132 |
| }, |
| { |
| "epoch": 4.849013657056146, |
| "grad_norm": 0.8292121291160583, |
| "learning_rate": 4.669434232591455e-07, |
| "loss": 1.1573, |
| "step": 2133 |
| }, |
| { |
| "epoch": 4.851289833080425, |
| "grad_norm": 0.8071368932723999, |
| "learning_rate": 4.6514125851825574e-07, |
| "loss": 1.1858, |
| "step": 2134 |
| }, |
| { |
| "epoch": 4.853566009104704, |
| "grad_norm": 0.8253504633903503, |
| "learning_rate": 4.633422214460004e-07, |
| "loss": 1.1667, |
| "step": 2135 |
| }, |
| { |
| "epoch": 4.855842185128983, |
| "grad_norm": 0.8513967990875244, |
| "learning_rate": 4.6154631480757913e-07, |
| "loss": 1.1429, |
| "step": 2136 |
| }, |
| { |
| "epoch": 4.858118361153263, |
| "grad_norm": 0.8523240089416504, |
| "learning_rate": 4.5975354136338164e-07, |
| "loss": 1.168, |
| "step": 2137 |
| }, |
| { |
| "epoch": 4.860394537177542, |
| "grad_norm": 0.8168431520462036, |
| "learning_rate": 4.579639038689804e-07, |
| "loss": 1.0975, |
| "step": 2138 |
| }, |
| { |
| "epoch": 4.862670713201821, |
| "grad_norm": 0.8387389183044434, |
| "learning_rate": 4.561774050751275e-07, |
| "loss": 1.1652, |
| "step": 2139 |
| }, |
| { |
| "epoch": 4.8649468892261005, |
| "grad_norm": 0.8411140441894531, |
| "learning_rate": 4.543940477277517e-07, |
| "loss": 1.1272, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.867223065250379, |
| "grad_norm": 0.8062079548835754, |
| "learning_rate": 4.526138345679526e-07, |
| "loss": 1.128, |
| "step": 2141 |
| }, |
| { |
| "epoch": 4.869499241274658, |
| "grad_norm": 0.8193447589874268, |
| "learning_rate": 4.508367683319967e-07, |
| "loss": 1.1541, |
| "step": 2142 |
| }, |
| { |
| "epoch": 4.871775417298938, |
| "grad_norm": 0.8450594544410706, |
| "learning_rate": 4.4906285175131515e-07, |
| "loss": 1.162, |
| "step": 2143 |
| }, |
| { |
| "epoch": 4.874051593323217, |
| "grad_norm": 0.8277652263641357, |
| "learning_rate": 4.472920875524958e-07, |
| "loss": 1.1554, |
| "step": 2144 |
| }, |
| { |
| "epoch": 4.876327769347496, |
| "grad_norm": 0.833281397819519, |
| "learning_rate": 4.455244784572832e-07, |
| "loss": 1.1684, |
| "step": 2145 |
| }, |
| { |
| "epoch": 4.8786039453717756, |
| "grad_norm": 0.8380672931671143, |
| "learning_rate": 4.4376002718257095e-07, |
| "loss": 1.2199, |
| "step": 2146 |
| }, |
| { |
| "epoch": 4.880880121396054, |
| "grad_norm": 0.8138743042945862, |
| "learning_rate": 4.419987364403991e-07, |
| "loss": 1.1682, |
| "step": 2147 |
| }, |
| { |
| "epoch": 4.883156297420334, |
| "grad_norm": 0.8843202590942383, |
| "learning_rate": 4.402406089379502e-07, |
| "loss": 1.1937, |
| "step": 2148 |
| }, |
| { |
| "epoch": 4.885432473444613, |
| "grad_norm": 0.8394472002983093, |
| "learning_rate": 4.384856473775448e-07, |
| "loss": 1.1256, |
| "step": 2149 |
| }, |
| { |
| "epoch": 4.887708649468892, |
| "grad_norm": 0.8245413899421692, |
| "learning_rate": 4.367338544566363e-07, |
| "loss": 1.1426, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.889984825493172, |
| "grad_norm": 0.8412423729896545, |
| "learning_rate": 4.3498523286780973e-07, |
| "loss": 1.1481, |
| "step": 2151 |
| }, |
| { |
| "epoch": 4.892261001517451, |
| "grad_norm": 0.8549144268035889, |
| "learning_rate": 4.332397852987741e-07, |
| "loss": 1.1438, |
| "step": 2152 |
| }, |
| { |
| "epoch": 4.89453717754173, |
| "grad_norm": 0.8178640604019165, |
| "learning_rate": 4.314975144323591e-07, |
| "loss": 1.1949, |
| "step": 2153 |
| }, |
| { |
| "epoch": 4.896813353566009, |
| "grad_norm": 0.8267374038696289, |
| "learning_rate": 4.297584229465149e-07, |
| "loss": 1.1239, |
| "step": 2154 |
| }, |
| { |
| "epoch": 4.899089529590288, |
| "grad_norm": 0.8270822763442993, |
| "learning_rate": 4.280225135143004e-07, |
| "loss": 1.1741, |
| "step": 2155 |
| }, |
| { |
| "epoch": 4.901365705614568, |
| "grad_norm": 0.8518311381340027, |
| "learning_rate": 4.262897888038872e-07, |
| "loss": 1.1664, |
| "step": 2156 |
| }, |
| { |
| "epoch": 4.903641881638847, |
| "grad_norm": 0.8309064507484436, |
| "learning_rate": 4.2456025147855016e-07, |
| "loss": 1.1916, |
| "step": 2157 |
| }, |
| { |
| "epoch": 4.905918057663126, |
| "grad_norm": 0.8361365795135498, |
| "learning_rate": 4.228339041966645e-07, |
| "loss": 1.1917, |
| "step": 2158 |
| }, |
| { |
| "epoch": 4.908194233687405, |
| "grad_norm": 0.8316016793251038, |
| "learning_rate": 4.211107496117042e-07, |
| "loss": 1.1765, |
| "step": 2159 |
| }, |
| { |
| "epoch": 4.910470409711684, |
| "grad_norm": 0.8184367418289185, |
| "learning_rate": 4.193907903722344e-07, |
| "loss": 1.1862, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.912746585735963, |
| "grad_norm": 0.8196964859962463, |
| "learning_rate": 4.176740291219089e-07, |
| "loss": 1.156, |
| "step": 2161 |
| }, |
| { |
| "epoch": 4.915022761760243, |
| "grad_norm": 0.8255534172058105, |
| "learning_rate": 4.1596046849946614e-07, |
| "loss": 1.1498, |
| "step": 2162 |
| }, |
| { |
| "epoch": 4.917298937784522, |
| "grad_norm": 0.8433260321617126, |
| "learning_rate": 4.142501111387251e-07, |
| "loss": 1.1963, |
| "step": 2163 |
| }, |
| { |
| "epoch": 4.9195751138088015, |
| "grad_norm": 0.8118224740028381, |
| "learning_rate": 4.1254295966858206e-07, |
| "loss": 1.1702, |
| "step": 2164 |
| }, |
| { |
| "epoch": 4.92185128983308, |
| "grad_norm": 0.8457343578338623, |
| "learning_rate": 4.108390167130044e-07, |
| "loss": 1.1766, |
| "step": 2165 |
| }, |
| { |
| "epoch": 4.924127465857359, |
| "grad_norm": 0.8077833652496338, |
| "learning_rate": 4.0913828489102804e-07, |
| "loss": 1.1485, |
| "step": 2166 |
| }, |
| { |
| "epoch": 4.926403641881639, |
| "grad_norm": 0.8197788000106812, |
| "learning_rate": 4.074407668167549e-07, |
| "loss": 1.1737, |
| "step": 2167 |
| }, |
| { |
| "epoch": 4.928679817905918, |
| "grad_norm": 0.833842933177948, |
| "learning_rate": 4.057464650993451e-07, |
| "loss": 1.117, |
| "step": 2168 |
| }, |
| { |
| "epoch": 4.930955993930198, |
| "grad_norm": 0.8162358403205872, |
| "learning_rate": 4.0405538234301627e-07, |
| "loss": 1.1513, |
| "step": 2169 |
| }, |
| { |
| "epoch": 4.9332321699544766, |
| "grad_norm": 0.8365995287895203, |
| "learning_rate": 4.0236752114703764e-07, |
| "loss": 1.1059, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.935508345978755, |
| "grad_norm": 0.8199490308761597, |
| "learning_rate": 4.006828841057273e-07, |
| "loss": 1.1429, |
| "step": 2171 |
| }, |
| { |
| "epoch": 4.937784522003035, |
| "grad_norm": 0.8299211263656616, |
| "learning_rate": 3.9900147380844716e-07, |
| "loss": 1.1555, |
| "step": 2172 |
| }, |
| { |
| "epoch": 4.940060698027314, |
| "grad_norm": 0.8137705326080322, |
| "learning_rate": 3.9732329283960065e-07, |
| "loss": 1.1494, |
| "step": 2173 |
| }, |
| { |
| "epoch": 4.942336874051593, |
| "grad_norm": 0.8200099468231201, |
| "learning_rate": 3.956483437786257e-07, |
| "loss": 1.1554, |
| "step": 2174 |
| }, |
| { |
| "epoch": 4.944613050075873, |
| "grad_norm": 0.8227297067642212, |
| "learning_rate": 3.9397662919999495e-07, |
| "loss": 1.171, |
| "step": 2175 |
| }, |
| { |
| "epoch": 4.946889226100152, |
| "grad_norm": 0.8356049656867981, |
| "learning_rate": 3.923081516732077e-07, |
| "loss": 1.1456, |
| "step": 2176 |
| }, |
| { |
| "epoch": 4.9491654021244305, |
| "grad_norm": 0.8176268339157104, |
| "learning_rate": 3.906429137627882e-07, |
| "loss": 1.123, |
| "step": 2177 |
| }, |
| { |
| "epoch": 4.95144157814871, |
| "grad_norm": 0.8163107633590698, |
| "learning_rate": 3.8898091802828135e-07, |
| "loss": 1.1503, |
| "step": 2178 |
| }, |
| { |
| "epoch": 4.953717754172989, |
| "grad_norm": 0.7986423969268799, |
| "learning_rate": 3.8732216702424915e-07, |
| "loss": 1.145, |
| "step": 2179 |
| }, |
| { |
| "epoch": 4.955993930197269, |
| "grad_norm": 0.8431774973869324, |
| "learning_rate": 3.856666633002648e-07, |
| "loss": 1.1953, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.958270106221548, |
| "grad_norm": 0.840099036693573, |
| "learning_rate": 3.840144094009124e-07, |
| "loss": 1.1333, |
| "step": 2181 |
| }, |
| { |
| "epoch": 4.9605462822458275, |
| "grad_norm": 0.8205875754356384, |
| "learning_rate": 3.8236540786577987e-07, |
| "loss": 1.1537, |
| "step": 2182 |
| }, |
| { |
| "epoch": 4.962822458270106, |
| "grad_norm": 0.8261964917182922, |
| "learning_rate": 3.8071966122945585e-07, |
| "loss": 1.1524, |
| "step": 2183 |
| }, |
| { |
| "epoch": 4.965098634294385, |
| "grad_norm": 0.8418870568275452, |
| "learning_rate": 3.790771720215261e-07, |
| "loss": 1.1054, |
| "step": 2184 |
| }, |
| { |
| "epoch": 4.967374810318665, |
| "grad_norm": 0.8424736857414246, |
| "learning_rate": 3.774379427665695e-07, |
| "loss": 1.1525, |
| "step": 2185 |
| }, |
| { |
| "epoch": 4.969650986342944, |
| "grad_norm": 0.8084613680839539, |
| "learning_rate": 3.7580197598415523e-07, |
| "loss": 1.1821, |
| "step": 2186 |
| }, |
| { |
| "epoch": 4.971927162367223, |
| "grad_norm": 0.8364375233650208, |
| "learning_rate": 3.7416927418883724e-07, |
| "loss": 1.1547, |
| "step": 2187 |
| }, |
| { |
| "epoch": 4.9742033383915025, |
| "grad_norm": 0.8641977906227112, |
| "learning_rate": 3.7253983989015e-07, |
| "loss": 1.1181, |
| "step": 2188 |
| }, |
| { |
| "epoch": 4.976479514415781, |
| "grad_norm": 0.7975889444351196, |
| "learning_rate": 3.709136755926082e-07, |
| "loss": 1.1443, |
| "step": 2189 |
| }, |
| { |
| "epoch": 4.97875569044006, |
| "grad_norm": 0.8529811501502991, |
| "learning_rate": 3.69290783795698e-07, |
| "loss": 1.1151, |
| "step": 2190 |
| }, |
| { |
| "epoch": 4.98103186646434, |
| "grad_norm": 0.8240101933479309, |
| "learning_rate": 3.676711669938765e-07, |
| "loss": 1.1451, |
| "step": 2191 |
| }, |
| { |
| "epoch": 4.983308042488619, |
| "grad_norm": 0.8223132491111755, |
| "learning_rate": 3.6605482767656684e-07, |
| "loss": 1.1656, |
| "step": 2192 |
| }, |
| { |
| "epoch": 4.985584218512899, |
| "grad_norm": 0.8219751715660095, |
| "learning_rate": 3.644417683281551e-07, |
| "loss": 1.1648, |
| "step": 2193 |
| }, |
| { |
| "epoch": 4.9878603945371776, |
| "grad_norm": 0.8212680816650391, |
| "learning_rate": 3.628319914279843e-07, |
| "loss": 1.1567, |
| "step": 2194 |
| }, |
| { |
| "epoch": 4.990136570561456, |
| "grad_norm": 0.8115838170051575, |
| "learning_rate": 3.61225499450355e-07, |
| "loss": 1.1528, |
| "step": 2195 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2634, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 439, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.910766821998592e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|