| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.998573466476462, |
| "eval_steps": 500, |
| "global_step": 1095, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00456490727532097, |
| "grad_norm": 6.303826851202761, |
| "learning_rate": 7.272727272727273e-07, |
| "loss": 0.8692, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00912981455064194, |
| "grad_norm": 6.417436981270692, |
| "learning_rate": 1.4545454545454546e-06, |
| "loss": 0.8764, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.013694721825962911, |
| "grad_norm": 6.266046371260386, |
| "learning_rate": 2.181818181818182e-06, |
| "loss": 0.8604, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01825962910128388, |
| "grad_norm": 5.871418428231522, |
| "learning_rate": 2.9090909090909093e-06, |
| "loss": 0.8563, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.02282453637660485, |
| "grad_norm": 4.515904039751017, |
| "learning_rate": 3.6363636363636366e-06, |
| "loss": 0.8116, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.027389443651925822, |
| "grad_norm": 4.154460736926, |
| "learning_rate": 4.363636363636364e-06, |
| "loss": 0.8014, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.03195435092724679, |
| "grad_norm": 2.436881516437234, |
| "learning_rate": 5.090909090909091e-06, |
| "loss": 0.7707, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.03651925820256776, |
| "grad_norm": 2.091681221098197, |
| "learning_rate": 5.8181818181818185e-06, |
| "loss": 0.7609, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.04108416547788873, |
| "grad_norm": 4.371182375936429, |
| "learning_rate": 6.545454545454546e-06, |
| "loss": 0.7714, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0456490727532097, |
| "grad_norm": 4.419533826001375, |
| "learning_rate": 7.272727272727273e-06, |
| "loss": 0.7639, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05021398002853067, |
| "grad_norm": 4.0789627340324515, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.738, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.054778887303851644, |
| "grad_norm": 4.249564440992856, |
| "learning_rate": 8.727272727272728e-06, |
| "loss": 0.7147, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.05934379457917261, |
| "grad_norm": 3.376070643486793, |
| "learning_rate": 9.454545454545456e-06, |
| "loss": 0.6984, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.06390870185449359, |
| "grad_norm": 2.084910644051683, |
| "learning_rate": 1.0181818181818182e-05, |
| "loss": 0.6752, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06847360912981455, |
| "grad_norm": 1.7829575333835848, |
| "learning_rate": 1.0909090909090909e-05, |
| "loss": 0.6564, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.07303851640513552, |
| "grad_norm": 2.3763989053887546, |
| "learning_rate": 1.1636363636363637e-05, |
| "loss": 0.6595, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.07760342368045649, |
| "grad_norm": 1.8994179663807598, |
| "learning_rate": 1.2363636363636364e-05, |
| "loss": 0.6332, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.08216833095577745, |
| "grad_norm": 1.0918180057614881, |
| "learning_rate": 1.3090909090909092e-05, |
| "loss": 0.6275, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.08673323823109844, |
| "grad_norm": 1.162384665458545, |
| "learning_rate": 1.381818181818182e-05, |
| "loss": 0.617, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0912981455064194, |
| "grad_norm": 1.1197113251451245, |
| "learning_rate": 1.4545454545454546e-05, |
| "loss": 0.6102, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09586305278174037, |
| "grad_norm": 0.7298206551544844, |
| "learning_rate": 1.5272727272727276e-05, |
| "loss": 0.6001, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.10042796005706134, |
| "grad_norm": 0.8218899465689595, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.5908, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.1049928673323823, |
| "grad_norm": 0.7081964845957092, |
| "learning_rate": 1.672727272727273e-05, |
| "loss": 0.5857, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.10955777460770329, |
| "grad_norm": 0.7152113888700518, |
| "learning_rate": 1.7454545454545456e-05, |
| "loss": 0.5813, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.11412268188302425, |
| "grad_norm": 0.5908192981103763, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 0.5654, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11868758915834522, |
| "grad_norm": 0.5657878890257612, |
| "learning_rate": 1.8909090909090912e-05, |
| "loss": 0.5681, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.12325249643366619, |
| "grad_norm": 0.5407715119275086, |
| "learning_rate": 1.963636363636364e-05, |
| "loss": 0.5726, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.12781740370898717, |
| "grad_norm": 0.6238305162701853, |
| "learning_rate": 2.0363636363636365e-05, |
| "loss": 0.5707, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.13238231098430814, |
| "grad_norm": 0.7048129462669643, |
| "learning_rate": 2.109090909090909e-05, |
| "loss": 0.562, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.1369472182596291, |
| "grad_norm": 0.4792558557034262, |
| "learning_rate": 2.1818181818181818e-05, |
| "loss": 0.5622, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.14151212553495007, |
| "grad_norm": 0.648556738189594, |
| "learning_rate": 2.2545454545454544e-05, |
| "loss": 0.5556, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.14607703281027104, |
| "grad_norm": 0.5375842492193321, |
| "learning_rate": 2.3272727272727274e-05, |
| "loss": 0.5521, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.150641940085592, |
| "grad_norm": 0.6204936152045187, |
| "learning_rate": 2.4e-05, |
| "loss": 0.5521, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.15520684736091298, |
| "grad_norm": 0.7793633373804746, |
| "learning_rate": 2.4727272727272727e-05, |
| "loss": 0.5556, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.15977175463623394, |
| "grad_norm": 1.077779426955439, |
| "learning_rate": 2.5454545454545457e-05, |
| "loss": 0.5439, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1643366619115549, |
| "grad_norm": 0.8548536747868706, |
| "learning_rate": 2.6181818181818183e-05, |
| "loss": 0.5399, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1689015691868759, |
| "grad_norm": 0.6156323737152537, |
| "learning_rate": 2.690909090909091e-05, |
| "loss": 0.534, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.17346647646219687, |
| "grad_norm": 0.7996160287459234, |
| "learning_rate": 2.763636363636364e-05, |
| "loss": 0.5432, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.17803138373751784, |
| "grad_norm": 0.8822172922338606, |
| "learning_rate": 2.8363636363636366e-05, |
| "loss": 0.538, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.1825962910128388, |
| "grad_norm": 0.7341791467955449, |
| "learning_rate": 2.9090909090909093e-05, |
| "loss": 0.5311, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.18716119828815977, |
| "grad_norm": 0.7448434985433166, |
| "learning_rate": 2.9818181818181823e-05, |
| "loss": 0.5302, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.19172610556348074, |
| "grad_norm": 1.3134209857981531, |
| "learning_rate": 3.054545454545455e-05, |
| "loss": 0.5287, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1962910128388017, |
| "grad_norm": 1.4079955673671256, |
| "learning_rate": 3.127272727272728e-05, |
| "loss": 0.529, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.20085592011412268, |
| "grad_norm": 0.7806435793966361, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.5211, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.20542082738944364, |
| "grad_norm": 1.696793688392228, |
| "learning_rate": 3.272727272727273e-05, |
| "loss": 0.5286, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2099857346647646, |
| "grad_norm": 0.9163370159272217, |
| "learning_rate": 3.345454545454546e-05, |
| "loss": 0.5251, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.21455064194008558, |
| "grad_norm": 1.6162977222772477, |
| "learning_rate": 3.4181818181818185e-05, |
| "loss": 0.5307, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.21911554921540657, |
| "grad_norm": 0.8813157838119612, |
| "learning_rate": 3.490909090909091e-05, |
| "loss": 0.5119, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.22368045649072754, |
| "grad_norm": 1.6994696349279637, |
| "learning_rate": 3.563636363636364e-05, |
| "loss": 0.522, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.2282453637660485, |
| "grad_norm": 1.1550073074270106, |
| "learning_rate": 3.6363636363636364e-05, |
| "loss": 0.5185, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.23281027104136948, |
| "grad_norm": 2.273035117030142, |
| "learning_rate": 3.709090909090909e-05, |
| "loss": 0.5178, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.23737517831669044, |
| "grad_norm": 2.142670073640285, |
| "learning_rate": 3.7818181818181824e-05, |
| "loss": 0.5164, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.2419400855920114, |
| "grad_norm": 1.178743753159302, |
| "learning_rate": 3.854545454545455e-05, |
| "loss": 0.5161, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.24650499286733238, |
| "grad_norm": 2.0029521626283864, |
| "learning_rate": 3.927272727272728e-05, |
| "loss": 0.5142, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.25106990014265335, |
| "grad_norm": 1.2261785948783424, |
| "learning_rate": 4e-05, |
| "loss": 0.5097, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.25563480741797434, |
| "grad_norm": 1.9929717547816588, |
| "learning_rate": 4.072727272727273e-05, |
| "loss": 0.52, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.2601997146932953, |
| "grad_norm": 1.6292265517873907, |
| "learning_rate": 4.1454545454545456e-05, |
| "loss": 0.5214, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.2647646219686163, |
| "grad_norm": 1.3334399259774528, |
| "learning_rate": 4.218181818181818e-05, |
| "loss": 0.5131, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2693295292439372, |
| "grad_norm": 1.7895060448687017, |
| "learning_rate": 4.2909090909090916e-05, |
| "loss": 0.5156, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.2738944365192582, |
| "grad_norm": 1.2434711772815448, |
| "learning_rate": 4.3636363636363636e-05, |
| "loss": 0.5063, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.27845934379457915, |
| "grad_norm": 1.674712591320644, |
| "learning_rate": 4.436363636363637e-05, |
| "loss": 0.5058, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.28302425106990015, |
| "grad_norm": 1.7391744678652716, |
| "learning_rate": 4.509090909090909e-05, |
| "loss": 0.5169, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2875891583452211, |
| "grad_norm": 1.2704765036397798, |
| "learning_rate": 4.581818181818182e-05, |
| "loss": 0.5077, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2921540656205421, |
| "grad_norm": 1.7222012804003408, |
| "learning_rate": 4.654545454545455e-05, |
| "loss": 0.5137, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2967189728958631, |
| "grad_norm": 1.293937134094515, |
| "learning_rate": 4.727272727272728e-05, |
| "loss": 0.5066, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.301283880171184, |
| "grad_norm": 1.1101483554895153, |
| "learning_rate": 4.8e-05, |
| "loss": 0.5012, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.305848787446505, |
| "grad_norm": 1.2651179859008774, |
| "learning_rate": 4.8727272727272734e-05, |
| "loss": 0.4996, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.31041369472182595, |
| "grad_norm": 1.4372242319377802, |
| "learning_rate": 4.9454545454545454e-05, |
| "loss": 0.5048, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.31497860199714695, |
| "grad_norm": 1.1173991577334563, |
| "learning_rate": 5.018181818181819e-05, |
| "loss": 0.5005, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.3195435092724679, |
| "grad_norm": 1.0219934142555631, |
| "learning_rate": 5.0909090909090914e-05, |
| "loss": 0.5011, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3241084165477889, |
| "grad_norm": 1.7032035010169793, |
| "learning_rate": 5.163636363636365e-05, |
| "loss": 0.5094, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3286733238231098, |
| "grad_norm": 0.8364496496396441, |
| "learning_rate": 5.236363636363637e-05, |
| "loss": 0.4963, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3332382310984308, |
| "grad_norm": 1.3858540890091062, |
| "learning_rate": 5.30909090909091e-05, |
| "loss": 0.5079, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3378031383737518, |
| "grad_norm": 1.3469249650056214, |
| "learning_rate": 5.381818181818182e-05, |
| "loss": 0.5094, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.34236804564907275, |
| "grad_norm": 2.3532033300244266, |
| "learning_rate": 5.4545454545454546e-05, |
| "loss": 0.5101, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.34693295292439374, |
| "grad_norm": 1.2379724037013697, |
| "learning_rate": 5.527272727272728e-05, |
| "loss": 0.5009, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3514978601997147, |
| "grad_norm": 2.1483188283998365, |
| "learning_rate": 5.6e-05, |
| "loss": 0.5185, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.3560627674750357, |
| "grad_norm": 1.6081458329954548, |
| "learning_rate": 5.672727272727273e-05, |
| "loss": 0.5033, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.3606276747503566, |
| "grad_norm": 1.379511883135697, |
| "learning_rate": 5.745454545454546e-05, |
| "loss": 0.5038, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.3651925820256776, |
| "grad_norm": 2.314662935076633, |
| "learning_rate": 5.8181818181818185e-05, |
| "loss": 0.4969, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.36975748930099855, |
| "grad_norm": 1.3583834941356125, |
| "learning_rate": 5.890909090909091e-05, |
| "loss": 0.5045, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.37432239657631955, |
| "grad_norm": 2.6348157549287157, |
| "learning_rate": 5.9636363636363645e-05, |
| "loss": 0.5058, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.3788873038516405, |
| "grad_norm": 1.944213494123695, |
| "learning_rate": 6.0363636363636365e-05, |
| "loss": 0.5064, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.3834522111269615, |
| "grad_norm": 2.0575793444325194, |
| "learning_rate": 6.10909090909091e-05, |
| "loss": 0.5018, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.3880171184022825, |
| "grad_norm": 1.978062761509244, |
| "learning_rate": 6.181818181818182e-05, |
| "loss": 0.5017, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3925820256776034, |
| "grad_norm": 1.4801623619535438, |
| "learning_rate": 6.254545454545456e-05, |
| "loss": 0.498, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.3971469329529244, |
| "grad_norm": 1.8537084794208918, |
| "learning_rate": 6.327272727272727e-05, |
| "loss": 0.5019, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.40171184022824535, |
| "grad_norm": 1.3702877983773376, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 0.4968, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.40627674750356635, |
| "grad_norm": 1.885572104808451, |
| "learning_rate": 6.472727272727274e-05, |
| "loss": 0.4957, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.4108416547788873, |
| "grad_norm": 1.3542328558823338, |
| "learning_rate": 6.545454545454546e-05, |
| "loss": 0.4966, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4154065620542083, |
| "grad_norm": 1.7718230138924214, |
| "learning_rate": 6.618181818181819e-05, |
| "loss": 0.4966, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.4199714693295292, |
| "grad_norm": 1.3980578794491678, |
| "learning_rate": 6.690909090909092e-05, |
| "loss": 0.4925, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4245363766048502, |
| "grad_norm": 1.5334202672866126, |
| "learning_rate": 6.763636363636364e-05, |
| "loss": 0.4876, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.42910128388017116, |
| "grad_norm": 1.4720834156887759, |
| "learning_rate": 6.836363636363637e-05, |
| "loss": 0.4913, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.43366619115549215, |
| "grad_norm": 1.338014419694779, |
| "learning_rate": 6.90909090909091e-05, |
| "loss": 0.4903, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.43823109843081315, |
| "grad_norm": 1.0761852253986315, |
| "learning_rate": 6.981818181818182e-05, |
| "loss": 0.4912, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.4427960057061341, |
| "grad_norm": 1.6024764467654846, |
| "learning_rate": 7.054545454545455e-05, |
| "loss": 0.4901, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.4473609129814551, |
| "grad_norm": 1.492316053791499, |
| "learning_rate": 7.127272727272728e-05, |
| "loss": 0.4873, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.451925820256776, |
| "grad_norm": 1.525604026590939, |
| "learning_rate": 7.2e-05, |
| "loss": 0.4891, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.456490727532097, |
| "grad_norm": 1.3034542842679084, |
| "learning_rate": 7.272727272727273e-05, |
| "loss": 0.49, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.46105563480741796, |
| "grad_norm": 2.1642815195092666, |
| "learning_rate": 7.345454545454547e-05, |
| "loss": 0.5055, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.46562054208273895, |
| "grad_norm": 1.1678257677428678, |
| "learning_rate": 7.418181818181818e-05, |
| "loss": 0.4852, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.4701854493580599, |
| "grad_norm": 1.4078725478189906, |
| "learning_rate": 7.490909090909092e-05, |
| "loss": 0.4919, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.4747503566333809, |
| "grad_norm": 2.357567405283945, |
| "learning_rate": 7.563636363636365e-05, |
| "loss": 0.4968, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.4793152639087018, |
| "grad_norm": 1.4758593059891392, |
| "learning_rate": 7.636363636363637e-05, |
| "loss": 0.4874, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4838801711840228, |
| "grad_norm": 1.980018157376651, |
| "learning_rate": 7.70909090909091e-05, |
| "loss": 0.4953, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.4884450784593438, |
| "grad_norm": 2.0401864619014467, |
| "learning_rate": 7.781818181818183e-05, |
| "loss": 0.5006, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.49300998573466476, |
| "grad_norm": 1.0897659104208783, |
| "learning_rate": 7.854545454545455e-05, |
| "loss": 0.4871, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.49757489300998575, |
| "grad_norm": 1.8417886476775482, |
| "learning_rate": 7.927272727272728e-05, |
| "loss": 0.4975, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.5021398002853067, |
| "grad_norm": 1.2728786506557457, |
| "learning_rate": 8e-05, |
| "loss": 0.492, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5067047075606277, |
| "grad_norm": 2.4453848065966817, |
| "learning_rate": 7.999979655036647e-05, |
| "loss": 0.5094, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.5112696148359487, |
| "grad_norm": 1.6374824473909455, |
| "learning_rate": 7.999918620353548e-05, |
| "loss": 0.4931, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.5158345221112696, |
| "grad_norm": 1.8705264691341723, |
| "learning_rate": 7.999816896571574e-05, |
| "loss": 0.5051, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.5203994293865906, |
| "grad_norm": 1.6343167214375214, |
| "learning_rate": 7.999674484725512e-05, |
| "loss": 0.4984, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.5249643366619116, |
| "grad_norm": 1.9112883595327201, |
| "learning_rate": 7.999491386264042e-05, |
| "loss": 0.492, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5295292439372326, |
| "grad_norm": 1.35541163706321, |
| "learning_rate": 7.999267603049729e-05, |
| "loss": 0.4902, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.5340941512125535, |
| "grad_norm": 1.6103620556897125, |
| "learning_rate": 7.999003137359006e-05, |
| "loss": 0.4927, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.5386590584878744, |
| "grad_norm": 1.4810569251620247, |
| "learning_rate": 7.998697991882144e-05, |
| "loss": 0.4876, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.5432239657631954, |
| "grad_norm": 1.3665267966836736, |
| "learning_rate": 7.998352169723229e-05, |
| "loss": 0.4865, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.5477888730385164, |
| "grad_norm": 1.1684492343192325, |
| "learning_rate": 7.997965674400132e-05, |
| "loss": 0.4898, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5523537803138374, |
| "grad_norm": 1.6135417427578114, |
| "learning_rate": 7.997538509844469e-05, |
| "loss": 0.4884, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5569186875891583, |
| "grad_norm": 1.081634967694094, |
| "learning_rate": 7.997070680401562e-05, |
| "loss": 0.4814, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.5614835948644793, |
| "grad_norm": 1.323277108291229, |
| "learning_rate": 7.9965621908304e-05, |
| "loss": 0.4862, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5660485021398003, |
| "grad_norm": 1.5122505161166728, |
| "learning_rate": 7.996013046303583e-05, |
| "loss": 0.4907, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5706134094151213, |
| "grad_norm": 1.3517400921660416, |
| "learning_rate": 7.995423252407275e-05, |
| "loss": 0.4849, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5751783166904422, |
| "grad_norm": 1.2964472361111674, |
| "learning_rate": 7.99479281514114e-05, |
| "loss": 0.4829, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.5797432239657632, |
| "grad_norm": 1.441528044357245, |
| "learning_rate": 7.994121740918293e-05, |
| "loss": 0.4888, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5843081312410842, |
| "grad_norm": 1.2409245733342162, |
| "learning_rate": 7.993410036565223e-05, |
| "loss": 0.4776, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.5888730385164052, |
| "grad_norm": 1.255055709641733, |
| "learning_rate": 7.992657709321728e-05, |
| "loss": 0.4856, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.5934379457917262, |
| "grad_norm": 1.2617494433472416, |
| "learning_rate": 7.991864766840846e-05, |
| "loss": 0.4832, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.598002853067047, |
| "grad_norm": 1.3678248560826736, |
| "learning_rate": 7.991031217188769e-05, |
| "loss": 0.483, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.602567760342368, |
| "grad_norm": 1.5165238137397685, |
| "learning_rate": 7.990157068844764e-05, |
| "loss": 0.4762, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.607132667617689, |
| "grad_norm": 1.0122655955764246, |
| "learning_rate": 7.989242330701089e-05, |
| "loss": 0.4794, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.61169757489301, |
| "grad_norm": 1.4882369944794684, |
| "learning_rate": 7.988287012062902e-05, |
| "loss": 0.4772, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.6162624821683309, |
| "grad_norm": 0.9815809304445058, |
| "learning_rate": 7.987291122648165e-05, |
| "loss": 0.4844, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.6208273894436519, |
| "grad_norm": 1.8345734791985764, |
| "learning_rate": 7.986254672587544e-05, |
| "loss": 0.4872, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.6253922967189729, |
| "grad_norm": 1.2322860510435374, |
| "learning_rate": 7.985177672424309e-05, |
| "loss": 0.4742, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.6299572039942939, |
| "grad_norm": 1.0934283610887234, |
| "learning_rate": 7.984060133114222e-05, |
| "loss": 0.4828, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.6345221112696149, |
| "grad_norm": 1.0617428680491003, |
| "learning_rate": 7.982902066025433e-05, |
| "loss": 0.4841, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.6390870185449358, |
| "grad_norm": 1.4504986628664012, |
| "learning_rate": 7.981703482938361e-05, |
| "loss": 0.4765, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6436519258202568, |
| "grad_norm": 1.2196737951896366, |
| "learning_rate": 7.980464396045565e-05, |
| "loss": 0.48, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.6482168330955778, |
| "grad_norm": 1.1746117544945598, |
| "learning_rate": 7.979184817951638e-05, |
| "loss": 0.472, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.6527817403708988, |
| "grad_norm": 1.462647851373356, |
| "learning_rate": 7.977864761673062e-05, |
| "loss": 0.4819, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.6573466476462196, |
| "grad_norm": 1.0920910565051827, |
| "learning_rate": 7.976504240638088e-05, |
| "loss": 0.4759, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.6619115549215406, |
| "grad_norm": 0.8915983200577166, |
| "learning_rate": 7.975103268686587e-05, |
| "loss": 0.4708, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6664764621968616, |
| "grad_norm": 1.1195632191410694, |
| "learning_rate": 7.973661860069925e-05, |
| "loss": 0.481, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.6710413694721826, |
| "grad_norm": 0.9280898217043119, |
| "learning_rate": 7.972180029450804e-05, |
| "loss": 0.4771, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.6756062767475036, |
| "grad_norm": 1.2010937440977014, |
| "learning_rate": 7.970657791903115e-05, |
| "loss": 0.4766, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.6801711840228245, |
| "grad_norm": 1.1648502616511074, |
| "learning_rate": 7.969095162911796e-05, |
| "loss": 0.4765, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.6847360912981455, |
| "grad_norm": 1.629472140008341, |
| "learning_rate": 7.967492158372659e-05, |
| "loss": 0.4708, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6893009985734665, |
| "grad_norm": 0.7685771258524148, |
| "learning_rate": 7.965848794592241e-05, |
| "loss": 0.4702, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.6938659058487875, |
| "grad_norm": 1.2979818888222598, |
| "learning_rate": 7.964165088287627e-05, |
| "loss": 0.4756, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.6984308131241084, |
| "grad_norm": 1.479596160746902, |
| "learning_rate": 7.96244105658629e-05, |
| "loss": 0.4684, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.7029957203994294, |
| "grad_norm": 0.9533216688157553, |
| "learning_rate": 7.960676717025912e-05, |
| "loss": 0.4674, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.7075606276747504, |
| "grad_norm": 1.1144585740648676, |
| "learning_rate": 7.958872087554204e-05, |
| "loss": 0.4759, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.7121255349500714, |
| "grad_norm": 1.1392667564805274, |
| "learning_rate": 7.957027186528724e-05, |
| "loss": 0.4792, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.7166904422253922, |
| "grad_norm": 1.576213566462258, |
| "learning_rate": 7.955142032716696e-05, |
| "loss": 0.4711, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.7212553495007132, |
| "grad_norm": 1.0153629340656216, |
| "learning_rate": 7.953216645294813e-05, |
| "loss": 0.4785, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.7258202567760342, |
| "grad_norm": 1.178946056258982, |
| "learning_rate": 7.951251043849043e-05, |
| "loss": 0.4703, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.7303851640513552, |
| "grad_norm": 1.2039627482563626, |
| "learning_rate": 7.94924524837443e-05, |
| "loss": 0.4691, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7349500713266762, |
| "grad_norm": 1.2758536553065434, |
| "learning_rate": 7.947199279274892e-05, |
| "loss": 0.4719, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.7395149786019971, |
| "grad_norm": 0.7199695023812126, |
| "learning_rate": 7.945113157363012e-05, |
| "loss": 0.4705, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.7440798858773181, |
| "grad_norm": 1.187228986720961, |
| "learning_rate": 7.942986903859826e-05, |
| "loss": 0.476, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.7486447931526391, |
| "grad_norm": 1.2191468243222228, |
| "learning_rate": 7.940820540394611e-05, |
| "loss": 0.4685, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.7532097004279601, |
| "grad_norm": 1.763163381472643, |
| "learning_rate": 7.938614089004659e-05, |
| "loss": 0.4634, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.757774607703281, |
| "grad_norm": 0.7821277377021038, |
| "learning_rate": 7.936367572135056e-05, |
| "loss": 0.4741, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.762339514978602, |
| "grad_norm": 2.297761933831456, |
| "learning_rate": 7.934081012638452e-05, |
| "loss": 0.4801, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.766904422253923, |
| "grad_norm": 1.3332466148839683, |
| "learning_rate": 7.931754433774835e-05, |
| "loss": 0.4753, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.771469329529244, |
| "grad_norm": 2.149549037927675, |
| "learning_rate": 7.929387859211283e-05, |
| "loss": 0.4864, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.776034236804565, |
| "grad_norm": 1.5521858392131513, |
| "learning_rate": 7.926981313021734e-05, |
| "loss": 0.4845, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7805991440798858, |
| "grad_norm": 1.5726199718313127, |
| "learning_rate": 7.924534819686735e-05, |
| "loss": 0.4807, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.7851640513552068, |
| "grad_norm": 1.25504183274313, |
| "learning_rate": 7.922048404093193e-05, |
| "loss": 0.4875, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.7897289586305278, |
| "grad_norm": 0.9918446302267643, |
| "learning_rate": 7.919522091534125e-05, |
| "loss": 0.4751, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.7942938659058488, |
| "grad_norm": 1.2133961064200827, |
| "learning_rate": 7.916955907708403e-05, |
| "loss": 0.4751, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.7988587731811697, |
| "grad_norm": 1.0816089763802976, |
| "learning_rate": 7.91434987872048e-05, |
| "loss": 0.467, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.8034236804564907, |
| "grad_norm": 0.8971343840889208, |
| "learning_rate": 7.911704031080142e-05, |
| "loss": 0.4734, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.8079885877318117, |
| "grad_norm": 0.9356730941151444, |
| "learning_rate": 7.909018391702224e-05, |
| "loss": 0.47, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.8125534950071327, |
| "grad_norm": 0.7910454206167575, |
| "learning_rate": 7.906292987906343e-05, |
| "loss": 0.4683, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.8171184022824537, |
| "grad_norm": 1.192520435930123, |
| "learning_rate": 7.90352784741662e-05, |
| "loss": 0.4683, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.8216833095577746, |
| "grad_norm": 1.0963304160109035, |
| "learning_rate": 7.900722998361394e-05, |
| "loss": 0.4667, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8262482168330956, |
| "grad_norm": 1.1053490528947663, |
| "learning_rate": 7.897878469272943e-05, |
| "loss": 0.472, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.8308131241084166, |
| "grad_norm": 0.8848774693139051, |
| "learning_rate": 7.894994289087187e-05, |
| "loss": 0.4628, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.8353780313837376, |
| "grad_norm": 1.115946428081902, |
| "learning_rate": 7.892070487143395e-05, |
| "loss": 0.4621, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.8399429386590584, |
| "grad_norm": 1.0565554494854816, |
| "learning_rate": 7.88910709318389e-05, |
| "loss": 0.4665, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.8445078459343794, |
| "grad_norm": 1.2557316528836924, |
| "learning_rate": 7.88610413735374e-05, |
| "loss": 0.4629, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.8490727532097004, |
| "grad_norm": 0.9500438718947202, |
| "learning_rate": 7.883061650200459e-05, |
| "loss": 0.4671, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.8536376604850214, |
| "grad_norm": 1.0129984995159482, |
| "learning_rate": 7.879979662673695e-05, |
| "loss": 0.4613, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.8582025677603423, |
| "grad_norm": 1.2473784057361619, |
| "learning_rate": 7.876858206124907e-05, |
| "loss": 0.4697, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.8627674750356633, |
| "grad_norm": 0.8979338734910455, |
| "learning_rate": 7.873697312307054e-05, |
| "loss": 0.4696, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.8673323823109843, |
| "grad_norm": 0.7812764034273535, |
| "learning_rate": 7.870497013374272e-05, |
| "loss": 0.4639, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8718972895863053, |
| "grad_norm": 1.012860566654511, |
| "learning_rate": 7.867257341881542e-05, |
| "loss": 0.4653, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.8764621968616263, |
| "grad_norm": 1.4227576277179481, |
| "learning_rate": 7.863978330784364e-05, |
| "loss": 0.4675, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.8810271041369472, |
| "grad_norm": 0.7511911038781995, |
| "learning_rate": 7.860660013438418e-05, |
| "loss": 0.4602, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.8855920114122682, |
| "grad_norm": 1.0008483459158608, |
| "learning_rate": 7.857302423599225e-05, |
| "loss": 0.4642, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.8901569186875892, |
| "grad_norm": 1.3635357598280822, |
| "learning_rate": 7.853905595421808e-05, |
| "loss": 0.4718, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.8947218259629102, |
| "grad_norm": 1.1150832530124606, |
| "learning_rate": 7.850469563460339e-05, |
| "loss": 0.4697, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.899286733238231, |
| "grad_norm": 1.306691006655152, |
| "learning_rate": 7.84699436266779e-05, |
| "loss": 0.4645, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.903851640513552, |
| "grad_norm": 0.5646785863195107, |
| "learning_rate": 7.843480028395578e-05, |
| "loss": 0.4598, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.908416547788873, |
| "grad_norm": 1.416929427327476, |
| "learning_rate": 7.839926596393202e-05, |
| "loss": 0.4613, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.912981455064194, |
| "grad_norm": 0.6815794459118021, |
| "learning_rate": 7.836334102807886e-05, |
| "loss": 0.4552, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.917546362339515, |
| "grad_norm": 1.3377535332836705, |
| "learning_rate": 7.832702584184204e-05, |
| "loss": 0.4609, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.9221112696148359, |
| "grad_norm": 0.7289345910500463, |
| "learning_rate": 7.829032077463713e-05, |
| "loss": 0.4614, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.9266761768901569, |
| "grad_norm": 0.7256574283679788, |
| "learning_rate": 7.825322619984576e-05, |
| "loss": 0.4583, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.9312410841654779, |
| "grad_norm": 0.7702106659855981, |
| "learning_rate": 7.821574249481179e-05, |
| "loss": 0.4568, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.9358059914407989, |
| "grad_norm": 0.9102238446313804, |
| "learning_rate": 7.817787004083756e-05, |
| "loss": 0.4586, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.9403708987161198, |
| "grad_norm": 1.7972329933237008, |
| "learning_rate": 7.813960922317988e-05, |
| "loss": 0.4604, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.9449358059914408, |
| "grad_norm": 0.6227351510323046, |
| "learning_rate": 7.810096043104623e-05, |
| "loss": 0.4622, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.9495007132667618, |
| "grad_norm": 1.7735653279890655, |
| "learning_rate": 7.806192405759074e-05, |
| "loss": 0.4649, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.9540656205420828, |
| "grad_norm": 1.0308472520464786, |
| "learning_rate": 7.80225004999102e-05, |
| "loss": 0.4664, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.9586305278174037, |
| "grad_norm": 0.8116494461929854, |
| "learning_rate": 7.798269015904004e-05, |
| "loss": 0.4617, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9631954350927246, |
| "grad_norm": 1.047955603167114, |
| "learning_rate": 7.79424934399502e-05, |
| "loss": 0.4614, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.9677603423680456, |
| "grad_norm": 1.5898132874597775, |
| "learning_rate": 7.790191075154109e-05, |
| "loss": 0.4607, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.9723252496433666, |
| "grad_norm": 0.9545090947993602, |
| "learning_rate": 7.786094250663936e-05, |
| "loss": 0.4652, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.9768901569186876, |
| "grad_norm": 1.6571396240037084, |
| "learning_rate": 7.781958912199372e-05, |
| "loss": 0.4618, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.9814550641940085, |
| "grad_norm": 1.2024242198019108, |
| "learning_rate": 7.777785101827073e-05, |
| "loss": 0.4662, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.9860199714693295, |
| "grad_norm": 1.1021742864631379, |
| "learning_rate": 7.773572862005048e-05, |
| "loss": 0.4573, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.9905848787446505, |
| "grad_norm": 1.199436776468982, |
| "learning_rate": 7.76932223558223e-05, |
| "loss": 0.461, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.9951497860199715, |
| "grad_norm": 1.063128318792733, |
| "learning_rate": 7.765033265798038e-05, |
| "loss": 0.455, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.9997146932952924, |
| "grad_norm": 0.6606867422821489, |
| "learning_rate": 7.760705996281937e-05, |
| "loss": 0.4591, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.0042796005706134, |
| "grad_norm": 1.8960244617895399, |
| "learning_rate": 7.756340471052998e-05, |
| "loss": 0.8785, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.0088445078459345, |
| "grad_norm": 1.4080967185223767, |
| "learning_rate": 7.751936734519448e-05, |
| "loss": 0.4574, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.0134094151212554, |
| "grad_norm": 0.6771897541703611, |
| "learning_rate": 7.747494831478214e-05, |
| "loss": 0.4431, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.0179743223965763, |
| "grad_norm": 1.2677400355158506, |
| "learning_rate": 7.743014807114475e-05, |
| "loss": 0.4477, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.0225392296718974, |
| "grad_norm": 0.8527685554112466, |
| "learning_rate": 7.738496707001195e-05, |
| "loss": 0.4383, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.0271041369472182, |
| "grad_norm": 0.8122477750639683, |
| "learning_rate": 7.733940577098666e-05, |
| "loss": 0.4418, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.0316690442225391, |
| "grad_norm": 0.8426217905101726, |
| "learning_rate": 7.729346463754035e-05, |
| "loss": 0.4421, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.0362339514978602, |
| "grad_norm": 0.9171730058782802, |
| "learning_rate": 7.724714413700836e-05, |
| "loss": 0.4418, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.0407988587731811, |
| "grad_norm": 1.0216657454100806, |
| "learning_rate": 7.720044474058515e-05, |
| "loss": 0.4463, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.0453637660485022, |
| "grad_norm": 1.1305237470904832, |
| "learning_rate": 7.715336692331944e-05, |
| "loss": 0.4382, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.0499286733238231, |
| "grad_norm": 1.0426775975151317, |
| "learning_rate": 7.71059111641095e-05, |
| "loss": 0.4466, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.054493580599144, |
| "grad_norm": 1.0597968056899956, |
| "learning_rate": 7.705807794569815e-05, |
| "loss": 0.4486, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.059058487874465, |
| "grad_norm": 1.1716663560960234, |
| "learning_rate": 7.700986775466792e-05, |
| "loss": 0.4439, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.063623395149786, |
| "grad_norm": 0.9803996514179776, |
| "learning_rate": 7.696128108143612e-05, |
| "loss": 0.4461, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.0681883024251069, |
| "grad_norm": 1.2259077645154464, |
| "learning_rate": 7.691231842024977e-05, |
| "loss": 0.4489, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.072753209700428, |
| "grad_norm": 0.853759148071036, |
| "learning_rate": 7.686298026918067e-05, |
| "loss": 0.4421, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.0773181169757489, |
| "grad_norm": 0.7461561534105968, |
| "learning_rate": 7.681326713012024e-05, |
| "loss": 0.4389, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.08188302425107, |
| "grad_norm": 1.1421762335895749, |
| "learning_rate": 7.676317950877446e-05, |
| "loss": 0.4396, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.0864479315263909, |
| "grad_norm": 0.8992066222764188, |
| "learning_rate": 7.671271791465877e-05, |
| "loss": 0.4404, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.0910128388017117, |
| "grad_norm": 0.8169936643046695, |
| "learning_rate": 7.666188286109279e-05, |
| "loss": 0.4375, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.0955777460770328, |
| "grad_norm": 1.0409973585291696, |
| "learning_rate": 7.66106748651952e-05, |
| "loss": 0.4395, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.1001426533523537, |
| "grad_norm": 1.0016647356785184, |
| "learning_rate": 7.655909444787837e-05, |
| "loss": 0.4436, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.1047075606276748, |
| "grad_norm": 0.9158140898097673, |
| "learning_rate": 7.650714213384317e-05, |
| "loss": 0.4362, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.1092724679029957, |
| "grad_norm": 0.8583658856818915, |
| "learning_rate": 7.645481845157353e-05, |
| "loss": 0.4359, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.1138373751783166, |
| "grad_norm": 0.6939369526031025, |
| "learning_rate": 7.640212393333117e-05, |
| "loss": 0.4306, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.1184022824536377, |
| "grad_norm": 0.592410776540685, |
| "learning_rate": 7.634905911515014e-05, |
| "loss": 0.4354, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.1229671897289586, |
| "grad_norm": 0.8601769845121724, |
| "learning_rate": 7.62956245368313e-05, |
| "loss": 0.4355, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.1275320970042797, |
| "grad_norm": 0.7237332967962335, |
| "learning_rate": 7.624182074193691e-05, |
| "loss": 0.4399, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.1320970042796006, |
| "grad_norm": 0.6811877132981848, |
| "learning_rate": 7.61876482777851e-05, |
| "loss": 0.4411, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.1366619115549215, |
| "grad_norm": 0.8543886977059173, |
| "learning_rate": 7.613310769544428e-05, |
| "loss": 0.4355, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.1412268188302426, |
| "grad_norm": 0.8286191112581163, |
| "learning_rate": 7.607819954972752e-05, |
| "loss": 0.4383, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1457917261055635, |
| "grad_norm": 1.022170225407258, |
| "learning_rate": 7.60229243991869e-05, |
| "loss": 0.4416, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.1503566333808846, |
| "grad_norm": 1.1340537050864723, |
| "learning_rate": 7.59672828061079e-05, |
| "loss": 0.4382, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.1549215406562054, |
| "grad_norm": 0.5980326106048516, |
| "learning_rate": 7.591127533650362e-05, |
| "loss": 0.4369, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.1594864479315263, |
| "grad_norm": 0.5810971453495677, |
| "learning_rate": 7.585490256010899e-05, |
| "loss": 0.4319, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.1640513552068474, |
| "grad_norm": 0.8641814822644016, |
| "learning_rate": 7.579816505037505e-05, |
| "loss": 0.4386, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.1686162624821683, |
| "grad_norm": 0.9787426567126707, |
| "learning_rate": 7.574106338446309e-05, |
| "loss": 0.4327, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.1731811697574892, |
| "grad_norm": 1.1084677725616903, |
| "learning_rate": 7.568359814323876e-05, |
| "loss": 0.4364, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.1777460770328103, |
| "grad_norm": 0.8794156667429015, |
| "learning_rate": 7.562576991126616e-05, |
| "loss": 0.4387, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.1823109843081312, |
| "grad_norm": 0.7016232671145584, |
| "learning_rate": 7.556757927680192e-05, |
| "loss": 0.4334, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.1868758915834523, |
| "grad_norm": 0.7028177191087993, |
| "learning_rate": 7.550902683178923e-05, |
| "loss": 0.4346, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.1914407988587732, |
| "grad_norm": 0.7601681237280019, |
| "learning_rate": 7.545011317185172e-05, |
| "loss": 0.4374, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.196005706134094, |
| "grad_norm": 0.9527083913769249, |
| "learning_rate": 7.539083889628755e-05, |
| "loss": 0.4394, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.2005706134094152, |
| "grad_norm": 1.144372718254701, |
| "learning_rate": 7.53312046080632e-05, |
| "loss": 0.4445, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.205135520684736, |
| "grad_norm": 0.9321132440752812, |
| "learning_rate": 7.527121091380737e-05, |
| "loss": 0.436, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.209700427960057, |
| "grad_norm": 1.0349154486726628, |
| "learning_rate": 7.52108584238048e-05, |
| "loss": 0.436, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.214265335235378, |
| "grad_norm": 1.0640705130270223, |
| "learning_rate": 7.515014775199011e-05, |
| "loss": 0.4394, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.218830242510699, |
| "grad_norm": 1.0477802116229236, |
| "learning_rate": 7.508907951594149e-05, |
| "loss": 0.4326, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.22339514978602, |
| "grad_norm": 1.0464613549121728, |
| "learning_rate": 7.502765433687444e-05, |
| "loss": 0.4377, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.227960057061341, |
| "grad_norm": 0.8450812112061008, |
| "learning_rate": 7.496587283963549e-05, |
| "loss": 0.4369, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.2325249643366618, |
| "grad_norm": 0.6557155968758558, |
| "learning_rate": 7.490373565269575e-05, |
| "loss": 0.4339, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.237089871611983, |
| "grad_norm": 0.5572121626093545, |
| "learning_rate": 7.484124340814467e-05, |
| "loss": 0.4344, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.2416547788873038, |
| "grad_norm": 0.5678369025757513, |
| "learning_rate": 7.477839674168342e-05, |
| "loss": 0.4256, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.246219686162625, |
| "grad_norm": 0.5209219264497257, |
| "learning_rate": 7.471519629261859e-05, |
| "loss": 0.4327, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.2507845934379458, |
| "grad_norm": 0.4721960308852361, |
| "learning_rate": 7.465164270385558e-05, |
| "loss": 0.4304, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.2553495007132667, |
| "grad_norm": 0.5657942084693068, |
| "learning_rate": 7.45877366218921e-05, |
| "loss": 0.4382, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.2599144079885878, |
| "grad_norm": 0.7196787321817991, |
| "learning_rate": 7.452347869681159e-05, |
| "loss": 0.4356, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.2644793152639087, |
| "grad_norm": 0.6908825865399278, |
| "learning_rate": 7.445886958227665e-05, |
| "loss": 0.4291, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.2690442225392298, |
| "grad_norm": 0.5440820157677637, |
| "learning_rate": 7.439390993552227e-05, |
| "loss": 0.4362, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.2736091298145507, |
| "grad_norm": 0.6147849930389729, |
| "learning_rate": 7.43286004173493e-05, |
| "loss": 0.4321, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.2781740370898715, |
| "grad_norm": 0.5681098435881913, |
| "learning_rate": 7.426294169211762e-05, |
| "loss": 0.4348, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.2827389443651926, |
| "grad_norm": 0.44708040023745554, |
| "learning_rate": 7.419693442773937e-05, |
| "loss": 0.4375, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.2873038516405135, |
| "grad_norm": 0.5713599240412557, |
| "learning_rate": 7.413057929567227e-05, |
| "loss": 0.4298, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.2918687589158346, |
| "grad_norm": 0.6323338024890408, |
| "learning_rate": 7.406387697091269e-05, |
| "loss": 0.4374, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.2964336661911555, |
| "grad_norm": 0.9056516523433347, |
| "learning_rate": 7.399682813198879e-05, |
| "loss": 0.4362, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.3009985734664764, |
| "grad_norm": 1.182899493470329, |
| "learning_rate": 7.392943346095366e-05, |
| "loss": 0.4361, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.3055634807417975, |
| "grad_norm": 0.7775593404006284, |
| "learning_rate": 7.386169364337833e-05, |
| "loss": 0.438, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.3101283880171184, |
| "grad_norm": 0.6198494591654299, |
| "learning_rate": 7.379360936834492e-05, |
| "loss": 0.429, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.3146932952924395, |
| "grad_norm": 0.7793764973737607, |
| "learning_rate": 7.372518132843941e-05, |
| "loss": 0.4385, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.3192582025677604, |
| "grad_norm": 0.9303862541132153, |
| "learning_rate": 7.365641021974478e-05, |
| "loss": 0.4335, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.3238231098430813, |
| "grad_norm": 0.8432690646689237, |
| "learning_rate": 7.358729674183392e-05, |
| "loss": 0.4317, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.3283880171184022, |
| "grad_norm": 0.8874816517796578, |
| "learning_rate": 7.351784159776238e-05, |
| "loss": 0.4304, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.3329529243937233, |
| "grad_norm": 0.868796496930731, |
| "learning_rate": 7.344804549406135e-05, |
| "loss": 0.4371, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.3375178316690441, |
| "grad_norm": 0.7852711094517002, |
| "learning_rate": 7.337790914073043e-05, |
| "loss": 0.4294, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.3420827389443652, |
| "grad_norm": 0.936953145650728, |
| "learning_rate": 7.330743325123036e-05, |
| "loss": 0.4391, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.3466476462196861, |
| "grad_norm": 0.9704040812645053, |
| "learning_rate": 7.323661854247587e-05, |
| "loss": 0.4349, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.351212553495007, |
| "grad_norm": 0.8823873892473993, |
| "learning_rate": 7.316546573482828e-05, |
| "loss": 0.4315, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.3557774607703281, |
| "grad_norm": 0.9155700667464731, |
| "learning_rate": 7.309397555208817e-05, |
| "loss": 0.4352, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.360342368045649, |
| "grad_norm": 0.9500779897069331, |
| "learning_rate": 7.302214872148817e-05, |
| "loss": 0.4341, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.3649072753209701, |
| "grad_norm": 0.715286965255515, |
| "learning_rate": 7.29499859736854e-05, |
| "loss": 0.43, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.369472182596291, |
| "grad_norm": 0.42900565746793906, |
| "learning_rate": 7.287748804275406e-05, |
| "loss": 0.4257, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3740370898716119, |
| "grad_norm": 0.44848107810486365, |
| "learning_rate": 7.280465566617804e-05, |
| "loss": 0.4282, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.378601997146933, |
| "grad_norm": 0.5499399617735641, |
| "learning_rate": 7.273148958484335e-05, |
| "loss": 0.4342, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.3831669044222539, |
| "grad_norm": 0.7779728624601656, |
| "learning_rate": 7.265799054303062e-05, |
| "loss": 0.4338, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.387731811697575, |
| "grad_norm": 0.7555091815426274, |
| "learning_rate": 7.258415928840749e-05, |
| "loss": 0.4311, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.3922967189728959, |
| "grad_norm": 0.6206089776911131, |
| "learning_rate": 7.250999657202107e-05, |
| "loss": 0.4296, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.3968616262482167, |
| "grad_norm": 0.7912293160493962, |
| "learning_rate": 7.24355031482902e-05, |
| "loss": 0.436, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.4014265335235379, |
| "grad_norm": 1.125338548493621, |
| "learning_rate": 7.236067977499791e-05, |
| "loss": 0.4342, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.4059914407988587, |
| "grad_norm": 1.04913732233305, |
| "learning_rate": 7.228552721328354e-05, |
| "loss": 0.4388, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.4105563480741798, |
| "grad_norm": 0.8118850698239931, |
| "learning_rate": 7.22100462276352e-05, |
| "loss": 0.4257, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.4151212553495007, |
| "grad_norm": 0.6971634722121615, |
| "learning_rate": 7.213423758588182e-05, |
| "loss": 0.4314, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.4196861626248216, |
| "grad_norm": 0.9112349240874238, |
| "learning_rate": 7.20581020591854e-05, |
| "loss": 0.4311, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.4242510699001427, |
| "grad_norm": 0.8933771253980181, |
| "learning_rate": 7.19816404220332e-05, |
| "loss": 0.4337, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.4288159771754636, |
| "grad_norm": 0.7078422348562297, |
| "learning_rate": 7.190485345222981e-05, |
| "loss": 0.4297, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.4333808844507847, |
| "grad_norm": 0.659819970126346, |
| "learning_rate": 7.18277419308893e-05, |
| "loss": 0.4258, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.4379457917261056, |
| "grad_norm": 0.5948618977549561, |
| "learning_rate": 7.17503066424272e-05, |
| "loss": 0.4307, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.4425106990014265, |
| "grad_norm": 0.5522078226460925, |
| "learning_rate": 7.167254837455254e-05, |
| "loss": 0.423, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.4470756062767476, |
| "grad_norm": 0.46370094074478, |
| "learning_rate": 7.15944679182599e-05, |
| "loss": 0.4226, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.4516405135520685, |
| "grad_norm": 0.4538500924366929, |
| "learning_rate": 7.15160660678213e-05, |
| "loss": 0.4334, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.4562054208273896, |
| "grad_norm": 0.5046426426810093, |
| "learning_rate": 7.143734362077809e-05, |
| "loss": 0.4333, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.4607703281027105, |
| "grad_norm": 0.4578123337897367, |
| "learning_rate": 7.135830137793295e-05, |
| "loss": 0.4236, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.4653352353780313, |
| "grad_norm": 0.4655315773535026, |
| "learning_rate": 7.127894014334163e-05, |
| "loss": 0.4279, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.4699001426533522, |
| "grad_norm": 0.4651249472363875, |
| "learning_rate": 7.119926072430485e-05, |
| "loss": 0.4253, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.4744650499286733, |
| "grad_norm": 0.4978505684470615, |
| "learning_rate": 7.111926393136002e-05, |
| "loss": 0.428, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.4790299572039942, |
| "grad_norm": 0.536231847135624, |
| "learning_rate": 7.103895057827308e-05, |
| "loss": 0.43, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.4835948644793153, |
| "grad_norm": 0.3912812640636196, |
| "learning_rate": 7.095832148203013e-05, |
| "loss": 0.4295, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.4881597717546362, |
| "grad_norm": 0.40464250838428917, |
| "learning_rate": 7.087737746282916e-05, |
| "loss": 0.4277, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.492724679029957, |
| "grad_norm": 0.5277590990428639, |
| "learning_rate": 7.079611934407173e-05, |
| "loss": 0.4335, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.4972895863052782, |
| "grad_norm": 0.7191902847601642, |
| "learning_rate": 7.071454795235457e-05, |
| "loss": 0.428, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.5018544935805993, |
| "grad_norm": 0.8201553068276188, |
| "learning_rate": 7.063266411746116e-05, |
| "loss": 0.4243, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.5064194008559202, |
| "grad_norm": 0.8029687675080593, |
| "learning_rate": 7.055046867235331e-05, |
| "loss": 0.4297, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.510984308131241, |
| "grad_norm": 0.7677427204023601, |
| "learning_rate": 7.046796245316267e-05, |
| "loss": 0.428, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.515549215406562, |
| "grad_norm": 0.7499549959421509, |
| "learning_rate": 7.038514629918228e-05, |
| "loss": 0.4307, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.520114122681883, |
| "grad_norm": 0.8211692796656908, |
| "learning_rate": 7.030202105285792e-05, |
| "loss": 0.433, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.524679029957204, |
| "grad_norm": 1.0730200098155518, |
| "learning_rate": 7.021858755977964e-05, |
| "loss": 0.4241, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.529243937232525, |
| "grad_norm": 1.1727832985354492, |
| "learning_rate": 7.013484666867312e-05, |
| "loss": 0.4333, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.533808844507846, |
| "grad_norm": 0.7964684055522075, |
| "learning_rate": 7.005079923139104e-05, |
| "loss": 0.4317, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.5383737517831668, |
| "grad_norm": 0.7927868140861069, |
| "learning_rate": 6.996644610290441e-05, |
| "loss": 0.4293, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.542938659058488, |
| "grad_norm": 0.722547974680119, |
| "learning_rate": 6.988178814129388e-05, |
| "loss": 0.4297, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.5475035663338088, |
| "grad_norm": 0.5891193027337546, |
| "learning_rate": 6.979682620774104e-05, |
| "loss": 0.4301, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.55206847360913, |
| "grad_norm": 0.5088111344725053, |
| "learning_rate": 6.971156116651958e-05, |
| "loss": 0.4244, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.5566333808844508, |
| "grad_norm": 0.5076201573377456, |
| "learning_rate": 6.962599388498657e-05, |
| "loss": 0.4257, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.5611982881597717, |
| "grad_norm": 0.5666496245303375, |
| "learning_rate": 6.954012523357362e-05, |
| "loss": 0.4308, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.5657631954350926, |
| "grad_norm": 0.5307337032247661, |
| "learning_rate": 6.945395608577801e-05, |
| "loss": 0.4356, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.5703281027104137, |
| "grad_norm": 0.44314588391528825, |
| "learning_rate": 6.936748731815382e-05, |
| "loss": 0.4249, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.5748930099857348, |
| "grad_norm": 0.4804047628466697, |
| "learning_rate": 6.9280719810303e-05, |
| "loss": 0.4332, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.5794579172610557, |
| "grad_norm": 0.4642513033873333, |
| "learning_rate": 6.919365444486644e-05, |
| "loss": 0.4226, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.5840228245363766, |
| "grad_norm": 0.5568335056382052, |
| "learning_rate": 6.910629210751497e-05, |
| "loss": 0.4238, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.5885877318116974, |
| "grad_norm": 0.5686750155147203, |
| "learning_rate": 6.901863368694036e-05, |
| "loss": 0.4229, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.5931526390870185, |
| "grad_norm": 0.5594409908646193, |
| "learning_rate": 6.893068007484628e-05, |
| "loss": 0.431, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.5977175463623396, |
| "grad_norm": 0.7542738892463182, |
| "learning_rate": 6.884243216593928e-05, |
| "loss": 0.4278, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.6022824536376605, |
| "grad_norm": 0.909785720326975, |
| "learning_rate": 6.875389085791956e-05, |
| "loss": 0.4237, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.6068473609129814, |
| "grad_norm": 1.0518321051704833, |
| "learning_rate": 6.866505705147195e-05, |
| "loss": 0.4297, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.6114122681883023, |
| "grad_norm": 1.1017254090246513, |
| "learning_rate": 6.857593165025674e-05, |
| "loss": 0.4245, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.6159771754636234, |
| "grad_norm": 0.8187539090198782, |
| "learning_rate": 6.848651556090042e-05, |
| "loss": 0.4258, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.6205420827389445, |
| "grad_norm": 0.5573740074216886, |
| "learning_rate": 6.839680969298653e-05, |
| "loss": 0.4285, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.6251069900142654, |
| "grad_norm": 0.6164738882872269, |
| "learning_rate": 6.830681495904637e-05, |
| "loss": 0.4286, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.6296718972895863, |
| "grad_norm": 0.8801449131438988, |
| "learning_rate": 6.821653227454973e-05, |
| "loss": 0.4221, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.6342368045649072, |
| "grad_norm": 1.0792158764449444, |
| "learning_rate": 6.812596255789553e-05, |
| "loss": 0.4267, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.6388017118402283, |
| "grad_norm": 0.8680834731545233, |
| "learning_rate": 6.80351067304026e-05, |
| "loss": 0.424, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.6433666191155494, |
| "grad_norm": 0.6086251558676785, |
| "learning_rate": 6.794396571630015e-05, |
| "loss": 0.4293, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.6479315263908703, |
| "grad_norm": 0.501395339998093, |
| "learning_rate": 6.785254044271848e-05, |
| "loss": 0.4285, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.6524964336661911, |
| "grad_norm": 0.5979602956912436, |
| "learning_rate": 6.776083183967951e-05, |
| "loss": 0.4275, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.657061340941512, |
| "grad_norm": 0.6937203619342673, |
| "learning_rate": 6.766884084008734e-05, |
| "loss": 0.425, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.6616262482168331, |
| "grad_norm": 0.7101090354502062, |
| "learning_rate": 6.757656837971872e-05, |
| "loss": 0.4309, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.666191155492154, |
| "grad_norm": 0.643930488180388, |
| "learning_rate": 6.748401539721353e-05, |
| "loss": 0.4287, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.6707560627674751, |
| "grad_norm": 0.7810718246548566, |
| "learning_rate": 6.739118283406533e-05, |
| "loss": 0.4264, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.675320970042796, |
| "grad_norm": 0.9283460979463781, |
| "learning_rate": 6.729807163461165e-05, |
| "loss": 0.4319, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.679885877318117, |
| "grad_norm": 0.9210967683307637, |
| "learning_rate": 6.720468274602446e-05, |
| "loss": 0.4282, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.684450784593438, |
| "grad_norm": 0.6497876560161834, |
| "learning_rate": 6.711101711830054e-05, |
| "loss": 0.4284, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.6890156918687589, |
| "grad_norm": 0.4725196750675396, |
| "learning_rate": 6.701707570425174e-05, |
| "loss": 0.4262, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.69358059914408, |
| "grad_norm": 0.5985153130104276, |
| "learning_rate": 6.69228594594954e-05, |
| "loss": 0.4309, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.6981455064194009, |
| "grad_norm": 0.7045200474002776, |
| "learning_rate": 6.682836934244452e-05, |
| "loss": 0.4268, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.7027104136947218, |
| "grad_norm": 0.7522673563908682, |
| "learning_rate": 6.67336063142981e-05, |
| "loss": 0.429, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.7072753209700426, |
| "grad_norm": 0.5776326484944384, |
| "learning_rate": 6.663857133903128e-05, |
| "loss": 0.4243, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.7118402282453637, |
| "grad_norm": 0.34017458565072894, |
| "learning_rate": 6.654326538338565e-05, |
| "loss": 0.4235, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.7164051355206849, |
| "grad_norm": 0.5718314127403422, |
| "learning_rate": 6.644768941685928e-05, |
| "loss": 0.4223, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.7209700427960057, |
| "grad_norm": 0.7259779441818939, |
| "learning_rate": 6.63518444116969e-05, |
| "loss": 0.4257, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.7255349500713266, |
| "grad_norm": 0.5302976449413679, |
| "learning_rate": 6.625573134288012e-05, |
| "loss": 0.4156, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.7300998573466475, |
| "grad_norm": 0.4044589367438007, |
| "learning_rate": 6.615935118811737e-05, |
| "loss": 0.4217, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.7346647646219686, |
| "grad_norm": 0.6930212301593457, |
| "learning_rate": 6.606270492783395e-05, |
| "loss": 0.4228, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.7392296718972897, |
| "grad_norm": 0.7344014385124219, |
| "learning_rate": 6.596579354516225e-05, |
| "loss": 0.4232, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.7437945791726106, |
| "grad_norm": 0.5534474445480675, |
| "learning_rate": 6.586861802593147e-05, |
| "loss": 0.4233, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.7483594864479315, |
| "grad_norm": 0.6126829361696522, |
| "learning_rate": 6.577117935865785e-05, |
| "loss": 0.4268, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.7529243937232524, |
| "grad_norm": 0.6533034681110785, |
| "learning_rate": 6.567347853453439e-05, |
| "loss": 0.422, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.7574893009985735, |
| "grad_norm": 0.5424516205530866, |
| "learning_rate": 6.557551654742099e-05, |
| "loss": 0.4198, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.7620542082738946, |
| "grad_norm": 0.6137606774154565, |
| "learning_rate": 6.547729439383414e-05, |
| "loss": 0.4266, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.7666191155492155, |
| "grad_norm": 0.6945685962943688, |
| "learning_rate": 6.537881307293691e-05, |
| "loss": 0.4231, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.7711840228245364, |
| "grad_norm": 0.6051697889503573, |
| "learning_rate": 6.528007358652871e-05, |
| "loss": 0.424, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.7757489300998572, |
| "grad_norm": 0.497689232889849, |
| "learning_rate": 6.518107693903519e-05, |
| "loss": 0.4221, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.7803138373751783, |
| "grad_norm": 0.3881400873717001, |
| "learning_rate": 6.50818241374979e-05, |
| "loss": 0.4266, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.7848787446504994, |
| "grad_norm": 0.4053350043040276, |
| "learning_rate": 6.498231619156416e-05, |
| "loss": 0.4212, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.7894436519258203, |
| "grad_norm": 0.4228076243058947, |
| "learning_rate": 6.488255411347673e-05, |
| "loss": 0.421, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.7940085592011412, |
| "grad_norm": 0.3651462975399496, |
| "learning_rate": 6.478253891806353e-05, |
| "loss": 0.4203, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.798573466476462, |
| "grad_norm": 0.3813241127258593, |
| "learning_rate": 6.468227162272726e-05, |
| "loss": 0.4256, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.8031383737517832, |
| "grad_norm": 0.4876471377127721, |
| "learning_rate": 6.45817532474352e-05, |
| "loss": 0.4249, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.807703281027104, |
| "grad_norm": 0.5450947841751673, |
| "learning_rate": 6.448098481470863e-05, |
| "loss": 0.4203, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.8122681883024252, |
| "grad_norm": 0.6516167352248161, |
| "learning_rate": 6.437996734961262e-05, |
| "loss": 0.4306, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.816833095577746, |
| "grad_norm": 0.7968992579354538, |
| "learning_rate": 6.427870187974548e-05, |
| "loss": 0.4213, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.821398002853067, |
| "grad_norm": 0.8730538354851684, |
| "learning_rate": 6.417718943522835e-05, |
| "loss": 0.4315, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.825962910128388, |
| "grad_norm": 0.801131615179908, |
| "learning_rate": 6.407543104869469e-05, |
| "loss": 0.4209, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.830527817403709, |
| "grad_norm": 0.645824395128567, |
| "learning_rate": 6.397342775527982e-05, |
| "loss": 0.4277, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.83509272467903, |
| "grad_norm": 0.7410730591928698, |
| "learning_rate": 6.38711805926104e-05, |
| "loss": 0.4196, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.839657631954351, |
| "grad_norm": 0.9347780145308188, |
| "learning_rate": 6.376869060079381e-05, |
| "loss": 0.4226, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.8442225392296718, |
| "grad_norm": 0.8659116193168973, |
| "learning_rate": 6.366595882240761e-05, |
| "loss": 0.4255, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.8487874465049927, |
| "grad_norm": 0.7573788312065334, |
| "learning_rate": 6.356298630248893e-05, |
| "loss": 0.4319, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.8533523537803138, |
| "grad_norm": 0.698786388331663, |
| "learning_rate": 6.345977408852383e-05, |
| "loss": 0.423, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.857917261055635, |
| "grad_norm": 0.6540039704024841, |
| "learning_rate": 6.335632323043671e-05, |
| "loss": 0.4239, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.8624821683309558, |
| "grad_norm": 0.5449620669554056, |
| "learning_rate": 6.325263478057947e-05, |
| "loss": 0.4279, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.8670470756062767, |
| "grad_norm": 0.4777538440000814, |
| "learning_rate": 6.314870979372102e-05, |
| "loss": 0.4247, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.8716119828815976, |
| "grad_norm": 0.38933468433699886, |
| "learning_rate": 6.304454932703633e-05, |
| "loss": 0.4231, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.8761768901569187, |
| "grad_norm": 0.4120383799689869, |
| "learning_rate": 6.29401544400959e-05, |
| "loss": 0.4293, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.8807417974322398, |
| "grad_norm": 0.47381071482947645, |
| "learning_rate": 6.283552619485476e-05, |
| "loss": 0.4177, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.8853067047075607, |
| "grad_norm": 0.3580390483686331, |
| "learning_rate": 6.27306656556418e-05, |
| "loss": 0.4196, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.8898716119828816, |
| "grad_norm": 0.38078475072495094, |
| "learning_rate": 6.2625573889149e-05, |
| "loss": 0.4193, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.8944365192582024, |
| "grad_norm": 0.4278816772061641, |
| "learning_rate": 6.25202519644204e-05, |
| "loss": 0.418, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.8990014265335236, |
| "grad_norm": 0.4475822733130566, |
| "learning_rate": 6.241470095284133e-05, |
| "loss": 0.4262, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.9035663338088447, |
| "grad_norm": 0.5284917554953473, |
| "learning_rate": 6.230892192812752e-05, |
| "loss": 0.4241, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.9081312410841655, |
| "grad_norm": 0.5411660617767071, |
| "learning_rate": 6.220291596631417e-05, |
| "loss": 0.425, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.9126961483594864, |
| "grad_norm": 0.4446094839845814, |
| "learning_rate": 6.209668414574502e-05, |
| "loss": 0.4217, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.9172610556348073, |
| "grad_norm": 0.38671232428761154, |
| "learning_rate": 6.199022754706127e-05, |
| "loss": 0.4288, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.9218259629101284, |
| "grad_norm": 0.3611456455331726, |
| "learning_rate": 6.188354725319074e-05, |
| "loss": 0.4217, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.9263908701854495, |
| "grad_norm": 0.327267880459524, |
| "learning_rate": 6.177664434933676e-05, |
| "loss": 0.4239, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.9309557774607704, |
| "grad_norm": 0.2734657156850823, |
| "learning_rate": 6.166951992296716e-05, |
| "loss": 0.424, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.9355206847360913, |
| "grad_norm": 0.30352197416491244, |
| "learning_rate": 6.15621750638032e-05, |
| "loss": 0.422, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.9400855920114122, |
| "grad_norm": 0.2974349992819165, |
| "learning_rate": 6.145461086380848e-05, |
| "loss": 0.4251, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.9446504992867333, |
| "grad_norm": 0.3823668291519408, |
| "learning_rate": 6.134682841717792e-05, |
| "loss": 0.4165, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.9492154065620542, |
| "grad_norm": 0.6312244786498952, |
| "learning_rate": 6.123882882032639e-05, |
| "loss": 0.4194, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.9537803138373753, |
| "grad_norm": 0.8700056231822155, |
| "learning_rate": 6.113061317187789e-05, |
| "loss": 0.4231, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.9583452211126962, |
| "grad_norm": 0.9887590587117958, |
| "learning_rate": 6.1022182572654063e-05, |
| "loss": 0.4228, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.962910128388017, |
| "grad_norm": 0.9884807393198494, |
| "learning_rate": 6.0913538125663236e-05, |
| "loss": 0.4306, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.967475035663338, |
| "grad_norm": 0.911102518039347, |
| "learning_rate": 6.0804680936089025e-05, |
| "loss": 0.4225, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.972039942938659, |
| "grad_norm": 0.7402245060126661, |
| "learning_rate": 6.069561211127919e-05, |
| "loss": 0.4222, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.9766048502139801, |
| "grad_norm": 0.5083536699969557, |
| "learning_rate": 6.05863327607344e-05, |
| "loss": 0.4219, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.981169757489301, |
| "grad_norm": 0.3496862590580212, |
| "learning_rate": 6.0476843996096795e-05, |
| "loss": 0.4203, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.985734664764622, |
| "grad_norm": 0.4037616186832233, |
| "learning_rate": 6.0367146931138866e-05, |
| "loss": 0.4184, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.9902995720399428, |
| "grad_norm": 0.4436594227881451, |
| "learning_rate": 6.025724268175197e-05, |
| "loss": 0.4208, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.994864479315264, |
| "grad_norm": 0.4512851557242651, |
| "learning_rate": 6.0147132365935065e-05, |
| "loss": 0.4165, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.999429386590585, |
| "grad_norm": 0.4765645074474141, |
| "learning_rate": 6.003681710378335e-05, |
| "loss": 0.4225, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.003994293865906, |
| "grad_norm": 0.9681866786412074, |
| "learning_rate": 5.9926298017476774e-05, |
| "loss": 0.7716, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.0085592011412268, |
| "grad_norm": 1.5630521704235794, |
| "learning_rate": 5.981557623126876e-05, |
| "loss": 0.4014, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.0131241084165477, |
| "grad_norm": 0.571055488243649, |
| "learning_rate": 5.970465287147461e-05, |
| "loss": 0.4024, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.017689015691869, |
| "grad_norm": 1.7926013030849834, |
| "learning_rate": 5.959352906646018e-05, |
| "loss": 0.4049, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.02225392296719, |
| "grad_norm": 0.8802074425647698, |
| "learning_rate": 5.948220594663035e-05, |
| "loss": 0.4054, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.0268188302425107, |
| "grad_norm": 1.8078241844652807, |
| "learning_rate": 5.93706846444175e-05, |
| "loss": 0.4158, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.0313837375178316, |
| "grad_norm": 1.3366588545623441, |
| "learning_rate": 5.925896629427006e-05, |
| "loss": 0.4088, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.0359486447931525, |
| "grad_norm": 1.2979771341051034, |
| "learning_rate": 5.9147052032640886e-05, |
| "loss": 0.4112, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.0405135520684734, |
| "grad_norm": 1.0266378699260492, |
| "learning_rate": 5.9034942997975744e-05, |
| "loss": 0.4105, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.0450784593437947, |
| "grad_norm": 1.0531734053882162, |
| "learning_rate": 5.8922640330701734e-05, |
| "loss": 0.4069, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.0496433666191156, |
| "grad_norm": 0.7366171898891467, |
| "learning_rate": 5.8810145173215694e-05, |
| "loss": 0.3995, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.0542082738944365, |
| "grad_norm": 0.8679822170363295, |
| "learning_rate": 5.869745866987256e-05, |
| "loss": 0.4064, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.0587731811697574, |
| "grad_norm": 0.6944746602819868, |
| "learning_rate": 5.8584581966973696e-05, |
| "loss": 0.403, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.0633380884450783, |
| "grad_norm": 0.5466028481958466, |
| "learning_rate": 5.847151621275531e-05, |
| "loss": 0.3997, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.0679029957203996, |
| "grad_norm": 0.6055399950709338, |
| "learning_rate": 5.8358262557376725e-05, |
| "loss": 0.3994, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.0724679029957205, |
| "grad_norm": 0.4848917512592404, |
| "learning_rate": 5.824482215290865e-05, |
| "loss": 0.404, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.0770328102710414, |
| "grad_norm": 0.498636327804515, |
| "learning_rate": 5.813119615332154e-05, |
| "loss": 0.3993, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.0815977175463622, |
| "grad_norm": 0.46905229993366143, |
| "learning_rate": 5.801738571447378e-05, |
| "loss": 0.4053, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.086162624821683, |
| "grad_norm": 0.4431759724923076, |
| "learning_rate": 5.79033919941e-05, |
| "loss": 0.3966, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.0907275320970045, |
| "grad_norm": 0.5210110584044579, |
| "learning_rate": 5.7789216151799196e-05, |
| "loss": 0.3918, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.0952924393723253, |
| "grad_norm": 0.43971729383879754, |
| "learning_rate": 5.7674859349023064e-05, |
| "loss": 0.4008, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.0998573466476462, |
| "grad_norm": 0.3602233291051038, |
| "learning_rate": 5.756032274906405e-05, |
| "loss": 0.3985, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.104422253922967, |
| "grad_norm": 0.3514674260539934, |
| "learning_rate": 5.7445607517043646e-05, |
| "loss": 0.3948, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.108987161198288, |
| "grad_norm": 0.3740970628030136, |
| "learning_rate": 5.733071481990046e-05, |
| "loss": 0.3969, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.1135520684736093, |
| "grad_norm": 0.3848411215975852, |
| "learning_rate": 5.721564582637829e-05, |
| "loss": 0.3997, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.11811697574893, |
| "grad_norm": 0.36362834255516524, |
| "learning_rate": 5.710040170701443e-05, |
| "loss": 0.3941, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.122681883024251, |
| "grad_norm": 0.387749019583618, |
| "learning_rate": 5.6984983634127534e-05, |
| "loss": 0.3964, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.127246790299572, |
| "grad_norm": 0.28828513667702704, |
| "learning_rate": 5.686939278180585e-05, |
| "loss": 0.3947, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.131811697574893, |
| "grad_norm": 0.30878428184767404, |
| "learning_rate": 5.675363032589521e-05, |
| "loss": 0.4029, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.1363766048502137, |
| "grad_norm": 0.3090749309313414, |
| "learning_rate": 5.6637697443987044e-05, |
| "loss": 0.3957, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.140941512125535, |
| "grad_norm": 0.3150439666682486, |
| "learning_rate": 5.6521595315406505e-05, |
| "loss": 0.3982, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.145506419400856, |
| "grad_norm": 0.30763731181006676, |
| "learning_rate": 5.640532512120036e-05, |
| "loss": 0.3978, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.150071326676177, |
| "grad_norm": 0.3156068121770724, |
| "learning_rate": 5.6288888044125005e-05, |
| "loss": 0.3989, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.1546362339514977, |
| "grad_norm": 0.3220879180231534, |
| "learning_rate": 5.6172285268634503e-05, |
| "loss": 0.3966, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.159201141226819, |
| "grad_norm": 0.2977772434873077, |
| "learning_rate": 5.6055517980868434e-05, |
| "loss": 0.3953, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.16376604850214, |
| "grad_norm": 0.29810536611214883, |
| "learning_rate": 5.59385873686399e-05, |
| "loss": 0.3918, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.168330955777461, |
| "grad_norm": 0.2852081639489458, |
| "learning_rate": 5.582149462142341e-05, |
| "loss": 0.3917, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.1728958630527817, |
| "grad_norm": 0.3081209690427737, |
| "learning_rate": 5.570424093034279e-05, |
| "loss": 0.3968, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.1774607703281026, |
| "grad_norm": 0.29194114681550415, |
| "learning_rate": 5.558682748815907e-05, |
| "loss": 0.3976, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.1820256776034235, |
| "grad_norm": 0.2933275523587884, |
| "learning_rate": 5.546925548925831e-05, |
| "loss": 0.396, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.186590584878745, |
| "grad_norm": 0.3010159866072722, |
| "learning_rate": 5.5351526129639556e-05, |
| "loss": 0.3947, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.1911554921540657, |
| "grad_norm": 0.22593376332738507, |
| "learning_rate": 5.523364060690253e-05, |
| "loss": 0.3947, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.1957203994293866, |
| "grad_norm": 0.263407864251361, |
| "learning_rate": 5.511560012023558e-05, |
| "loss": 0.3968, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.2002853067047075, |
| "grad_norm": 0.2621235557428232, |
| "learning_rate": 5.499740587040337e-05, |
| "loss": 0.3957, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.2048502139800283, |
| "grad_norm": 0.28672274408401716, |
| "learning_rate": 5.487905905973474e-05, |
| "loss": 0.3982, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.2094151212553497, |
| "grad_norm": 0.2961044553045987, |
| "learning_rate": 5.476056089211047e-05, |
| "loss": 0.3953, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.2139800285306706, |
| "grad_norm": 0.24362816536099371, |
| "learning_rate": 5.464191257295099e-05, |
| "loss": 0.3947, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.2185449358059914, |
| "grad_norm": 0.20568524425771714, |
| "learning_rate": 5.4523115309204154e-05, |
| "loss": 0.3904, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.2231098430813123, |
| "grad_norm": 0.23433795861015624, |
| "learning_rate": 5.440417030933296e-05, |
| "loss": 0.3887, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.227674750356633, |
| "grad_norm": 0.2637906019753822, |
| "learning_rate": 5.4285078783303204e-05, |
| "loss": 0.398, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.2322396576319545, |
| "grad_norm": 0.27336753680428566, |
| "learning_rate": 5.41658419425713e-05, |
| "loss": 0.4012, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.2368045649072754, |
| "grad_norm": 0.36009442351022874, |
| "learning_rate": 5.404646100007179e-05, |
| "loss": 0.3946, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.2413694721825963, |
| "grad_norm": 0.34745386802364503, |
| "learning_rate": 5.3926937170205147e-05, |
| "loss": 0.3988, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.245934379457917, |
| "grad_norm": 0.2560762047673561, |
| "learning_rate": 5.3807271668825336e-05, |
| "loss": 0.3981, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.250499286733238, |
| "grad_norm": 0.3039971848868009, |
| "learning_rate": 5.368746571322746e-05, |
| "loss": 0.3983, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.2550641940085594, |
| "grad_norm": 0.3132362113371044, |
| "learning_rate": 5.356752052213543e-05, |
| "loss": 0.3949, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.2596291012838803, |
| "grad_norm": 0.24114011641199656, |
| "learning_rate": 5.344743731568947e-05, |
| "loss": 0.398, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.264194008559201, |
| "grad_norm": 0.3047112832257029, |
| "learning_rate": 5.3327217315433836e-05, |
| "loss": 0.3942, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.268758915834522, |
| "grad_norm": 0.2902868163178195, |
| "learning_rate": 5.320686174430426e-05, |
| "loss": 0.3968, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.273323823109843, |
| "grad_norm": 0.21554384652324848, |
| "learning_rate": 5.30863718266156e-05, |
| "loss": 0.3945, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.277888730385164, |
| "grad_norm": 0.27255050697912414, |
| "learning_rate": 5.296574878804931e-05, |
| "loss": 0.3968, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.282453637660485, |
| "grad_norm": 0.24417856593881826, |
| "learning_rate": 5.284499385564105e-05, |
| "loss": 0.3943, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.287018544935806, |
| "grad_norm": 0.2765091011577604, |
| "learning_rate": 5.272410825776817e-05, |
| "loss": 0.3977, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.291583452211127, |
| "grad_norm": 0.25251367644483425, |
| "learning_rate": 5.260309322413717e-05, |
| "loss": 0.3965, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.296148359486448, |
| "grad_norm": 0.225515136380665, |
| "learning_rate": 5.2481949985771296e-05, |
| "loss": 0.397, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.300713266761769, |
| "grad_norm": 0.2463270131913551, |
| "learning_rate": 5.23606797749979e-05, |
| "loss": 0.3911, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.30527817403709, |
| "grad_norm": 0.2004757886081446, |
| "learning_rate": 5.223928382543599e-05, |
| "loss": 0.4003, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.309843081312411, |
| "grad_norm": 0.2455120366926149, |
| "learning_rate": 5.211776337198362e-05, |
| "loss": 0.3966, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.314407988587732, |
| "grad_norm": 0.22468493984434562, |
| "learning_rate": 5.199611965080539e-05, |
| "loss": 0.3936, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.3189728958630527, |
| "grad_norm": 0.2272194472097314, |
| "learning_rate": 5.187435389931984e-05, |
| "loss": 0.3995, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.3235378031383735, |
| "grad_norm": 0.24682570308387836, |
| "learning_rate": 5.175246735618681e-05, |
| "loss": 0.3936, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.328102710413695, |
| "grad_norm": 0.27271797206857185, |
| "learning_rate": 5.163046126129496e-05, |
| "loss": 0.3979, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.3326676176890158, |
| "grad_norm": 0.32336247263945955, |
| "learning_rate": 5.1508336855749046e-05, |
| "loss": 0.3984, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.3372325249643366, |
| "grad_norm": 0.3117192297000078, |
| "learning_rate": 5.138609538185732e-05, |
| "loss": 0.3957, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.3417974322396575, |
| "grad_norm": 0.24977202926389208, |
| "learning_rate": 5.126373808311897e-05, |
| "loss": 0.401, |
| "step": 513 |
| }, |
| { |
| "epoch": 2.3463623395149784, |
| "grad_norm": 0.2724907116929327, |
| "learning_rate": 5.114126620421135e-05, |
| "loss": 0.3992, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.3509272467902997, |
| "grad_norm": 0.30820474754190696, |
| "learning_rate": 5.101868099097741e-05, |
| "loss": 0.3891, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.3554921540656206, |
| "grad_norm": 0.3185274771440913, |
| "learning_rate": 5.0895983690413013e-05, |
| "loss": 0.3901, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.3600570613409415, |
| "grad_norm": 0.2581547898350787, |
| "learning_rate": 5.077317555065417e-05, |
| "loss": 0.3996, |
| "step": 517 |
| }, |
| { |
| "epoch": 2.3646219686162624, |
| "grad_norm": 0.3277207154296582, |
| "learning_rate": 5.065025782096443e-05, |
| "loss": 0.4004, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.3691868758915833, |
| "grad_norm": 0.3507469445057821, |
| "learning_rate": 5.052723175172216e-05, |
| "loss": 0.3961, |
| "step": 519 |
| }, |
| { |
| "epoch": 2.3737517831669046, |
| "grad_norm": 0.31296981719792, |
| "learning_rate": 5.04040985944078e-05, |
| "loss": 0.3916, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.3783166904422255, |
| "grad_norm": 0.25011058332771036, |
| "learning_rate": 5.0280859601591134e-05, |
| "loss": 0.3946, |
| "step": 521 |
| }, |
| { |
| "epoch": 2.3828815977175464, |
| "grad_norm": 0.2843447982406519, |
| "learning_rate": 5.015751602691853e-05, |
| "loss": 0.3951, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.3874465049928673, |
| "grad_norm": 0.2611424665922288, |
| "learning_rate": 5.003406912510028e-05, |
| "loss": 0.3905, |
| "step": 523 |
| }, |
| { |
| "epoch": 2.392011412268188, |
| "grad_norm": 0.24619751471031087, |
| "learning_rate": 4.99105201518977e-05, |
| "loss": 0.395, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.3965763195435095, |
| "grad_norm": 0.2808716720105363, |
| "learning_rate": 4.9786870364110496e-05, |
| "loss": 0.3999, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.4011412268188304, |
| "grad_norm": 0.2353990460137413, |
| "learning_rate": 4.9663121019563825e-05, |
| "loss": 0.3998, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.4057061340941512, |
| "grad_norm": 0.2143855784356749, |
| "learning_rate": 4.953927337709564e-05, |
| "loss": 0.39, |
| "step": 527 |
| }, |
| { |
| "epoch": 2.410271041369472, |
| "grad_norm": 0.30814345667504994, |
| "learning_rate": 4.941532869654383e-05, |
| "loss": 0.3946, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.414835948644793, |
| "grad_norm": 0.2524848431138124, |
| "learning_rate": 4.929128823873338e-05, |
| "loss": 0.3916, |
| "step": 529 |
| }, |
| { |
| "epoch": 2.419400855920114, |
| "grad_norm": 0.2949956098139477, |
| "learning_rate": 4.916715326546356e-05, |
| "loss": 0.3973, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.423965763195435, |
| "grad_norm": 0.2866631443858236, |
| "learning_rate": 4.9042925039495126e-05, |
| "loss": 0.4008, |
| "step": 531 |
| }, |
| { |
| "epoch": 2.428530670470756, |
| "grad_norm": 0.24434538853731672, |
| "learning_rate": 4.8918604824537426e-05, |
| "loss": 0.3925, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.433095577746077, |
| "grad_norm": 0.27376916436365306, |
| "learning_rate": 4.879419388523557e-05, |
| "loss": 0.396, |
| "step": 533 |
| }, |
| { |
| "epoch": 2.437660485021398, |
| "grad_norm": 0.29192166428895333, |
| "learning_rate": 4.866969348715755e-05, |
| "loss": 0.3939, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.442225392296719, |
| "grad_norm": 0.22678026571928478, |
| "learning_rate": 4.8545104896781396e-05, |
| "loss": 0.3931, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.44679029957204, |
| "grad_norm": 0.2714093005509255, |
| "learning_rate": 4.8420429381482254e-05, |
| "loss": 0.3918, |
| "step": 536 |
| }, |
| { |
| "epoch": 2.451355206847361, |
| "grad_norm": 0.3672921491066907, |
| "learning_rate": 4.829566820951953e-05, |
| "loss": 0.3989, |
| "step": 537 |
| }, |
| { |
| "epoch": 2.455920114122682, |
| "grad_norm": 0.37866544196679924, |
| "learning_rate": 4.817082265002395e-05, |
| "loss": 0.3945, |
| "step": 538 |
| }, |
| { |
| "epoch": 2.4604850213980027, |
| "grad_norm": 0.342928509500153, |
| "learning_rate": 4.804589397298467e-05, |
| "loss": 0.3926, |
| "step": 539 |
| }, |
| { |
| "epoch": 2.4650499286733236, |
| "grad_norm": 0.30273238621233484, |
| "learning_rate": 4.792088344923639e-05, |
| "loss": 0.3964, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.469614835948645, |
| "grad_norm": 0.2137070264105446, |
| "learning_rate": 4.779579235044635e-05, |
| "loss": 0.4013, |
| "step": 541 |
| }, |
| { |
| "epoch": 2.474179743223966, |
| "grad_norm": 0.3118277046924234, |
| "learning_rate": 4.767062194910147e-05, |
| "loss": 0.3927, |
| "step": 542 |
| }, |
| { |
| "epoch": 2.4787446504992867, |
| "grad_norm": 0.34415527393891965, |
| "learning_rate": 4.7545373518495376e-05, |
| "loss": 0.3981, |
| "step": 543 |
| }, |
| { |
| "epoch": 2.4833095577746076, |
| "grad_norm": 0.25521983324842423, |
| "learning_rate": 4.7420048332715424e-05, |
| "loss": 0.3993, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.4878744650499285, |
| "grad_norm": 0.2861375081450235, |
| "learning_rate": 4.7294647666629764e-05, |
| "loss": 0.3962, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.49243937232525, |
| "grad_norm": 0.35327944054617416, |
| "learning_rate": 4.716917279587438e-05, |
| "loss": 0.3929, |
| "step": 546 |
| }, |
| { |
| "epoch": 2.4970042796005707, |
| "grad_norm": 0.3065127258311644, |
| "learning_rate": 4.704362499684009e-05, |
| "loss": 0.3917, |
| "step": 547 |
| }, |
| { |
| "epoch": 2.5015691868758916, |
| "grad_norm": 0.2971240514783471, |
| "learning_rate": 4.691800554665959e-05, |
| "loss": 0.3951, |
| "step": 548 |
| }, |
| { |
| "epoch": 2.5061340941512125, |
| "grad_norm": 0.2526840084770543, |
| "learning_rate": 4.679231572319442e-05, |
| "loss": 0.3965, |
| "step": 549 |
| }, |
| { |
| "epoch": 2.5106990014265333, |
| "grad_norm": 0.27506914680614103, |
| "learning_rate": 4.666655680502203e-05, |
| "loss": 0.3909, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.5152639087018542, |
| "grad_norm": 0.30990634996692296, |
| "learning_rate": 4.654073007142268e-05, |
| "loss": 0.3991, |
| "step": 551 |
| }, |
| { |
| "epoch": 2.5198288159771756, |
| "grad_norm": 0.2418594039084614, |
| "learning_rate": 4.641483680236654e-05, |
| "loss": 0.3942, |
| "step": 552 |
| }, |
| { |
| "epoch": 2.5243937232524964, |
| "grad_norm": 0.2176242576204114, |
| "learning_rate": 4.628887827850056e-05, |
| "loss": 0.3925, |
| "step": 553 |
| }, |
| { |
| "epoch": 2.5289586305278173, |
| "grad_norm": 0.21882885808826003, |
| "learning_rate": 4.6162855781135534e-05, |
| "loss": 0.3967, |
| "step": 554 |
| }, |
| { |
| "epoch": 2.533523537803138, |
| "grad_norm": 0.2753010159681235, |
| "learning_rate": 4.6036770592233e-05, |
| "loss": 0.3988, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.5380884450784595, |
| "grad_norm": 0.27865187586221307, |
| "learning_rate": 4.591062399439223e-05, |
| "loss": 0.3892, |
| "step": 556 |
| }, |
| { |
| "epoch": 2.5426533523537804, |
| "grad_norm": 0.25469855214873266, |
| "learning_rate": 4.578441727083718e-05, |
| "loss": 0.3975, |
| "step": 557 |
| }, |
| { |
| "epoch": 2.5472182596291013, |
| "grad_norm": 0.20957376603739114, |
| "learning_rate": 4.5658151705403416e-05, |
| "loss": 0.4014, |
| "step": 558 |
| }, |
| { |
| "epoch": 2.551783166904422, |
| "grad_norm": 0.2600853257379872, |
| "learning_rate": 4.553182858252514e-05, |
| "loss": 0.3971, |
| "step": 559 |
| }, |
| { |
| "epoch": 2.556348074179743, |
| "grad_norm": 0.2776565181123319, |
| "learning_rate": 4.540544918722195e-05, |
| "loss": 0.4039, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.560912981455064, |
| "grad_norm": 0.23379879408515905, |
| "learning_rate": 4.527901480508595e-05, |
| "loss": 0.3945, |
| "step": 561 |
| }, |
| { |
| "epoch": 2.5654778887303853, |
| "grad_norm": 0.24124737895746404, |
| "learning_rate": 4.515252672226858e-05, |
| "loss": 0.3945, |
| "step": 562 |
| }, |
| { |
| "epoch": 2.570042796005706, |
| "grad_norm": 0.2917521663677071, |
| "learning_rate": 4.5025986225467533e-05, |
| "loss": 0.3934, |
| "step": 563 |
| }, |
| { |
| "epoch": 2.574607703281027, |
| "grad_norm": 0.2530412971141297, |
| "learning_rate": 4.4899394601913724e-05, |
| "loss": 0.3937, |
| "step": 564 |
| }, |
| { |
| "epoch": 2.579172610556348, |
| "grad_norm": 0.22637706164941146, |
| "learning_rate": 4.477275313935807e-05, |
| "loss": 0.3926, |
| "step": 565 |
| }, |
| { |
| "epoch": 2.5837375178316693, |
| "grad_norm": 0.23838840534635813, |
| "learning_rate": 4.464606312605858e-05, |
| "loss": 0.3977, |
| "step": 566 |
| }, |
| { |
| "epoch": 2.58830242510699, |
| "grad_norm": 0.24947138899802643, |
| "learning_rate": 4.451932585076707e-05, |
| "loss": 0.3959, |
| "step": 567 |
| }, |
| { |
| "epoch": 2.592867332382311, |
| "grad_norm": 0.27260964086966344, |
| "learning_rate": 4.439254260271615e-05, |
| "loss": 0.3914, |
| "step": 568 |
| }, |
| { |
| "epoch": 2.597432239657632, |
| "grad_norm": 0.23394296464858344, |
| "learning_rate": 4.426571467160609e-05, |
| "loss": 0.3987, |
| "step": 569 |
| }, |
| { |
| "epoch": 2.601997146932953, |
| "grad_norm": 0.24289503390540143, |
| "learning_rate": 4.413884334759169e-05, |
| "loss": 0.3939, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.6065620542082737, |
| "grad_norm": 0.29117956540296497, |
| "learning_rate": 4.401192992126918e-05, |
| "loss": 0.3956, |
| "step": 571 |
| }, |
| { |
| "epoch": 2.611126961483595, |
| "grad_norm": 0.2237089907238584, |
| "learning_rate": 4.3884975683663076e-05, |
| "loss": 0.3955, |
| "step": 572 |
| }, |
| { |
| "epoch": 2.615691868758916, |
| "grad_norm": 0.24264576691075865, |
| "learning_rate": 4.375798192621298e-05, |
| "loss": 0.3928, |
| "step": 573 |
| }, |
| { |
| "epoch": 2.620256776034237, |
| "grad_norm": 0.2751685982168088, |
| "learning_rate": 4.363094994076063e-05, |
| "loss": 0.3966, |
| "step": 574 |
| }, |
| { |
| "epoch": 2.6248216833095577, |
| "grad_norm": 0.28747813965799607, |
| "learning_rate": 4.350388101953652e-05, |
| "loss": 0.3943, |
| "step": 575 |
| }, |
| { |
| "epoch": 2.629386590584879, |
| "grad_norm": 0.2805974268000736, |
| "learning_rate": 4.337677645514696e-05, |
| "loss": 0.3937, |
| "step": 576 |
| }, |
| { |
| "epoch": 2.6339514978602, |
| "grad_norm": 0.25668107207724133, |
| "learning_rate": 4.3249637540560775e-05, |
| "loss": 0.3992, |
| "step": 577 |
| }, |
| { |
| "epoch": 2.6385164051355208, |
| "grad_norm": 0.22123626260157714, |
| "learning_rate": 4.312246556909625e-05, |
| "loss": 0.3905, |
| "step": 578 |
| }, |
| { |
| "epoch": 2.6430813124108417, |
| "grad_norm": 0.2889764064202931, |
| "learning_rate": 4.299526183440795e-05, |
| "loss": 0.3939, |
| "step": 579 |
| }, |
| { |
| "epoch": 2.6476462196861625, |
| "grad_norm": 0.2813860421214819, |
| "learning_rate": 4.286802763047351e-05, |
| "loss": 0.3915, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.6522111269614834, |
| "grad_norm": 0.19910247285499882, |
| "learning_rate": 4.274076425158056e-05, |
| "loss": 0.3948, |
| "step": 581 |
| }, |
| { |
| "epoch": 2.6567760342368043, |
| "grad_norm": 0.29602208357801046, |
| "learning_rate": 4.2613472992313475e-05, |
| "loss": 0.394, |
| "step": 582 |
| }, |
| { |
| "epoch": 2.6613409415121256, |
| "grad_norm": 0.3211859467751027, |
| "learning_rate": 4.2486155147540275e-05, |
| "loss": 0.3952, |
| "step": 583 |
| }, |
| { |
| "epoch": 2.6659058487874465, |
| "grad_norm": 0.2957713814210968, |
| "learning_rate": 4.235881201239938e-05, |
| "loss": 0.3944, |
| "step": 584 |
| }, |
| { |
| "epoch": 2.6704707560627674, |
| "grad_norm": 0.19210627965781088, |
| "learning_rate": 4.22314448822865e-05, |
| "loss": 0.3973, |
| "step": 585 |
| }, |
| { |
| "epoch": 2.6750356633380883, |
| "grad_norm": 0.22185230850956422, |
| "learning_rate": 4.210405505284146e-05, |
| "loss": 0.3913, |
| "step": 586 |
| }, |
| { |
| "epoch": 2.6796005706134096, |
| "grad_norm": 0.26397988476550216, |
| "learning_rate": 4.197664381993495e-05, |
| "loss": 0.3933, |
| "step": 587 |
| }, |
| { |
| "epoch": 2.6841654778887305, |
| "grad_norm": 0.2616617955056205, |
| "learning_rate": 4.1849212479655404e-05, |
| "loss": 0.3978, |
| "step": 588 |
| }, |
| { |
| "epoch": 2.6887303851640514, |
| "grad_norm": 0.2067735440913514, |
| "learning_rate": 4.172176232829579e-05, |
| "loss": 0.3918, |
| "step": 589 |
| }, |
| { |
| "epoch": 2.6932952924393723, |
| "grad_norm": 0.2129097941200269, |
| "learning_rate": 4.159429466234042e-05, |
| "loss": 0.3934, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.697860199714693, |
| "grad_norm": 0.19320324303685169, |
| "learning_rate": 4.146681077845184e-05, |
| "loss": 0.4005, |
| "step": 591 |
| }, |
| { |
| "epoch": 2.702425106990014, |
| "grad_norm": 0.21243002834135477, |
| "learning_rate": 4.133931197345747e-05, |
| "loss": 0.3914, |
| "step": 592 |
| }, |
| { |
| "epoch": 2.7069900142653354, |
| "grad_norm": 0.21832438259968404, |
| "learning_rate": 4.1211799544336604e-05, |
| "loss": 0.3969, |
| "step": 593 |
| }, |
| { |
| "epoch": 2.7115549215406562, |
| "grad_norm": 0.22033236523137045, |
| "learning_rate": 4.108427478820707e-05, |
| "loss": 0.3914, |
| "step": 594 |
| }, |
| { |
| "epoch": 2.716119828815977, |
| "grad_norm": 0.22106513057663857, |
| "learning_rate": 4.095673900231212e-05, |
| "loss": 0.3951, |
| "step": 595 |
| }, |
| { |
| "epoch": 2.720684736091298, |
| "grad_norm": 0.22871838319618967, |
| "learning_rate": 4.0829193484007216e-05, |
| "loss": 0.3965, |
| "step": 596 |
| }, |
| { |
| "epoch": 2.7252496433666193, |
| "grad_norm": 0.22285026812146833, |
| "learning_rate": 4.070163953074676e-05, |
| "loss": 0.3896, |
| "step": 597 |
| }, |
| { |
| "epoch": 2.7298145506419402, |
| "grad_norm": 0.21511862495234116, |
| "learning_rate": 4.0574078440071056e-05, |
| "loss": 0.3908, |
| "step": 598 |
| }, |
| { |
| "epoch": 2.734379457917261, |
| "grad_norm": 0.21107167137669283, |
| "learning_rate": 4.044651150959294e-05, |
| "loss": 0.3917, |
| "step": 599 |
| }, |
| { |
| "epoch": 2.738944365192582, |
| "grad_norm": 0.23870814693322587, |
| "learning_rate": 4.031894003698467e-05, |
| "loss": 0.3955, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.743509272467903, |
| "grad_norm": 0.22220033003757905, |
| "learning_rate": 4.0191365319964724e-05, |
| "loss": 0.3917, |
| "step": 601 |
| }, |
| { |
| "epoch": 2.7480741797432238, |
| "grad_norm": 0.19453278202618698, |
| "learning_rate": 4.006378865628455e-05, |
| "loss": 0.3893, |
| "step": 602 |
| }, |
| { |
| "epoch": 2.752639087018545, |
| "grad_norm": 0.23239542400687424, |
| "learning_rate": 3.993621134371545e-05, |
| "loss": 0.3933, |
| "step": 603 |
| }, |
| { |
| "epoch": 2.757203994293866, |
| "grad_norm": 0.24428871107116487, |
| "learning_rate": 3.980863468003529e-05, |
| "loss": 0.3935, |
| "step": 604 |
| }, |
| { |
| "epoch": 2.761768901569187, |
| "grad_norm": 0.19760601046120077, |
| "learning_rate": 3.968105996301535e-05, |
| "loss": 0.3918, |
| "step": 605 |
| }, |
| { |
| "epoch": 2.7663338088445077, |
| "grad_norm": 0.2501742444848214, |
| "learning_rate": 3.955348849040707e-05, |
| "loss": 0.3897, |
| "step": 606 |
| }, |
| { |
| "epoch": 2.770898716119829, |
| "grad_norm": 0.2319084390296337, |
| "learning_rate": 3.942592155992895e-05, |
| "loss": 0.3961, |
| "step": 607 |
| }, |
| { |
| "epoch": 2.77546362339515, |
| "grad_norm": 0.2244356749086854, |
| "learning_rate": 3.929836046925323e-05, |
| "loss": 0.3943, |
| "step": 608 |
| }, |
| { |
| "epoch": 2.780028530670471, |
| "grad_norm": 0.21979544227639491, |
| "learning_rate": 3.91708065159928e-05, |
| "loss": 0.3955, |
| "step": 609 |
| }, |
| { |
| "epoch": 2.7845934379457917, |
| "grad_norm": 0.19742195901212328, |
| "learning_rate": 3.904326099768789e-05, |
| "loss": 0.3949, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.7891583452211126, |
| "grad_norm": 0.22234755877545048, |
| "learning_rate": 3.8915725211792944e-05, |
| "loss": 0.3953, |
| "step": 611 |
| }, |
| { |
| "epoch": 2.7937232524964335, |
| "grad_norm": 0.2231966829166695, |
| "learning_rate": 3.8788200455663416e-05, |
| "loss": 0.3881, |
| "step": 612 |
| }, |
| { |
| "epoch": 2.7982881597717544, |
| "grad_norm": 0.23610363359578615, |
| "learning_rate": 3.8660688026542544e-05, |
| "loss": 0.3953, |
| "step": 613 |
| }, |
| { |
| "epoch": 2.8028530670470757, |
| "grad_norm": 0.24368628185935592, |
| "learning_rate": 3.853318922154818e-05, |
| "loss": 0.3913, |
| "step": 614 |
| }, |
| { |
| "epoch": 2.8074179743223966, |
| "grad_norm": 0.22986532483466307, |
| "learning_rate": 3.840570533765959e-05, |
| "loss": 0.393, |
| "step": 615 |
| }, |
| { |
| "epoch": 2.8119828815977175, |
| "grad_norm": 0.2384209501930521, |
| "learning_rate": 3.827823767170423e-05, |
| "loss": 0.391, |
| "step": 616 |
| }, |
| { |
| "epoch": 2.8165477888730384, |
| "grad_norm": 0.22125793484405842, |
| "learning_rate": 3.815078752034461e-05, |
| "loss": 0.3946, |
| "step": 617 |
| }, |
| { |
| "epoch": 2.8211126961483597, |
| "grad_norm": 0.2535177819158247, |
| "learning_rate": 3.802335618006506e-05, |
| "loss": 0.3915, |
| "step": 618 |
| }, |
| { |
| "epoch": 2.8256776034236806, |
| "grad_norm": 0.2396475170274598, |
| "learning_rate": 3.7895944947158535e-05, |
| "loss": 0.3958, |
| "step": 619 |
| }, |
| { |
| "epoch": 2.8302425106990015, |
| "grad_norm": 0.20749976321329167, |
| "learning_rate": 3.77685551177135e-05, |
| "loss": 0.3952, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.8348074179743223, |
| "grad_norm": 0.16508755912089354, |
| "learning_rate": 3.764118798760064e-05, |
| "loss": 0.3844, |
| "step": 621 |
| }, |
| { |
| "epoch": 2.8393723252496432, |
| "grad_norm": 0.20726854721909735, |
| "learning_rate": 3.7513844852459745e-05, |
| "loss": 0.3945, |
| "step": 622 |
| }, |
| { |
| "epoch": 2.843937232524964, |
| "grad_norm": 0.18560003219863108, |
| "learning_rate": 3.738652700768653e-05, |
| "loss": 0.3974, |
| "step": 623 |
| }, |
| { |
| "epoch": 2.8485021398002854, |
| "grad_norm": 0.17587194547537272, |
| "learning_rate": 3.725923574841945e-05, |
| "loss": 0.3903, |
| "step": 624 |
| }, |
| { |
| "epoch": 2.8530670470756063, |
| "grad_norm": 0.2133689614478487, |
| "learning_rate": 3.7131972369526505e-05, |
| "loss": 0.3939, |
| "step": 625 |
| }, |
| { |
| "epoch": 2.857631954350927, |
| "grad_norm": 0.17416385677862986, |
| "learning_rate": 3.700473816559207e-05, |
| "loss": 0.3913, |
| "step": 626 |
| }, |
| { |
| "epoch": 2.862196861626248, |
| "grad_norm": 0.17910642220727754, |
| "learning_rate": 3.687753443090375e-05, |
| "loss": 0.3991, |
| "step": 627 |
| }, |
| { |
| "epoch": 2.8667617689015694, |
| "grad_norm": 0.18645917268183274, |
| "learning_rate": 3.675036245943923e-05, |
| "loss": 0.3933, |
| "step": 628 |
| }, |
| { |
| "epoch": 2.8713266761768903, |
| "grad_norm": 0.22702514829934878, |
| "learning_rate": 3.662322354485306e-05, |
| "loss": 0.3927, |
| "step": 629 |
| }, |
| { |
| "epoch": 2.875891583452211, |
| "grad_norm": 0.19751990109383138, |
| "learning_rate": 3.6496118980463486e-05, |
| "loss": 0.3913, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.880456490727532, |
| "grad_norm": 0.17351220103657103, |
| "learning_rate": 3.6369050059239384e-05, |
| "loss": 0.3918, |
| "step": 631 |
| }, |
| { |
| "epoch": 2.885021398002853, |
| "grad_norm": 0.20221410813950186, |
| "learning_rate": 3.624201807378703e-05, |
| "loss": 0.3934, |
| "step": 632 |
| }, |
| { |
| "epoch": 2.889586305278174, |
| "grad_norm": 0.15933130426619596, |
| "learning_rate": 3.6115024316336944e-05, |
| "loss": 0.3891, |
| "step": 633 |
| }, |
| { |
| "epoch": 2.894151212553495, |
| "grad_norm": 0.18767258317171362, |
| "learning_rate": 3.598807007873083e-05, |
| "loss": 0.3935, |
| "step": 634 |
| }, |
| { |
| "epoch": 2.898716119828816, |
| "grad_norm": 0.17072434668538097, |
| "learning_rate": 3.586115665240832e-05, |
| "loss": 0.3923, |
| "step": 635 |
| }, |
| { |
| "epoch": 2.903281027104137, |
| "grad_norm": 0.19788995991405794, |
| "learning_rate": 3.573428532839392e-05, |
| "loss": 0.3924, |
| "step": 636 |
| }, |
| { |
| "epoch": 2.907845934379458, |
| "grad_norm": 0.18823937945201608, |
| "learning_rate": 3.560745739728387e-05, |
| "loss": 0.389, |
| "step": 637 |
| }, |
| { |
| "epoch": 2.912410841654779, |
| "grad_norm": 0.16176660376720053, |
| "learning_rate": 3.548067414923294e-05, |
| "loss": 0.3974, |
| "step": 638 |
| }, |
| { |
| "epoch": 2.9169757489301, |
| "grad_norm": 0.15959896392390563, |
| "learning_rate": 3.5353936873941435e-05, |
| "loss": 0.3888, |
| "step": 639 |
| }, |
| { |
| "epoch": 2.921540656205421, |
| "grad_norm": 0.161034937587383, |
| "learning_rate": 3.522724686064194e-05, |
| "loss": 0.3953, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.926105563480742, |
| "grad_norm": 0.18385006824388012, |
| "learning_rate": 3.5100605398086296e-05, |
| "loss": 0.3923, |
| "step": 641 |
| }, |
| { |
| "epoch": 2.9306704707560627, |
| "grad_norm": 0.16430295552203472, |
| "learning_rate": 3.497401377453247e-05, |
| "loss": 0.3916, |
| "step": 642 |
| }, |
| { |
| "epoch": 2.9352353780313836, |
| "grad_norm": 0.15722062849324564, |
| "learning_rate": 3.484747327773142e-05, |
| "loss": 0.3887, |
| "step": 643 |
| }, |
| { |
| "epoch": 2.9398002853067045, |
| "grad_norm": 0.14393903212852108, |
| "learning_rate": 3.472098519491406e-05, |
| "loss": 0.3914, |
| "step": 644 |
| }, |
| { |
| "epoch": 2.944365192582026, |
| "grad_norm": 0.17232972585909226, |
| "learning_rate": 3.459455081277806e-05, |
| "loss": 0.393, |
| "step": 645 |
| }, |
| { |
| "epoch": 2.9489300998573467, |
| "grad_norm": 0.16872545932202826, |
| "learning_rate": 3.446817141747487e-05, |
| "loss": 0.394, |
| "step": 646 |
| }, |
| { |
| "epoch": 2.9534950071326675, |
| "grad_norm": 0.20373345038244411, |
| "learning_rate": 3.434184829459659e-05, |
| "loss": 0.3955, |
| "step": 647 |
| }, |
| { |
| "epoch": 2.9580599144079884, |
| "grad_norm": 0.16582792220621265, |
| "learning_rate": 3.421558272916284e-05, |
| "loss": 0.3897, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.9626248216833098, |
| "grad_norm": 0.17195177157795527, |
| "learning_rate": 3.408937600560778e-05, |
| "loss": 0.3931, |
| "step": 649 |
| }, |
| { |
| "epoch": 2.9671897289586306, |
| "grad_norm": 0.17099363542536267, |
| "learning_rate": 3.3963229407767014e-05, |
| "loss": 0.3932, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.9717546362339515, |
| "grad_norm": 0.18406454501706795, |
| "learning_rate": 3.3837144218864466e-05, |
| "loss": 0.3926, |
| "step": 651 |
| }, |
| { |
| "epoch": 2.9763195435092724, |
| "grad_norm": 0.17696057601736395, |
| "learning_rate": 3.371112172149945e-05, |
| "loss": 0.3951, |
| "step": 652 |
| }, |
| { |
| "epoch": 2.9808844507845933, |
| "grad_norm": 0.20588927188905604, |
| "learning_rate": 3.358516319763348e-05, |
| "loss": 0.3908, |
| "step": 653 |
| }, |
| { |
| "epoch": 2.985449358059914, |
| "grad_norm": 0.15024406311611282, |
| "learning_rate": 3.3459269928577326e-05, |
| "loss": 0.3965, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.9900142653352355, |
| "grad_norm": 0.18315742998535253, |
| "learning_rate": 3.3333443194977985e-05, |
| "loss": 0.3878, |
| "step": 655 |
| }, |
| { |
| "epoch": 2.9945791726105564, |
| "grad_norm": 0.15190454402880918, |
| "learning_rate": 3.32076842768056e-05, |
| "loss": 0.4001, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.9991440798858773, |
| "grad_norm": 0.1802522348541421, |
| "learning_rate": 3.3081994453340425e-05, |
| "loss": 0.3912, |
| "step": 657 |
| }, |
| { |
| "epoch": 3.003708987161198, |
| "grad_norm": 0.4371593688422328, |
| "learning_rate": 3.295637500315992e-05, |
| "loss": 0.7621, |
| "step": 658 |
| }, |
| { |
| "epoch": 3.008273894436519, |
| "grad_norm": 0.44775433837572887, |
| "learning_rate": 3.283082720412563e-05, |
| "loss": 0.3662, |
| "step": 659 |
| }, |
| { |
| "epoch": 3.0128388017118404, |
| "grad_norm": 0.3890902219171831, |
| "learning_rate": 3.270535233337024e-05, |
| "loss": 0.3743, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.0174037089871613, |
| "grad_norm": 0.3003030553223454, |
| "learning_rate": 3.2579951667284596e-05, |
| "loss": 0.3721, |
| "step": 661 |
| }, |
| { |
| "epoch": 3.021968616262482, |
| "grad_norm": 0.3515656966921728, |
| "learning_rate": 3.245462648150463e-05, |
| "loss": 0.3684, |
| "step": 662 |
| }, |
| { |
| "epoch": 3.026533523537803, |
| "grad_norm": 0.2934678186313399, |
| "learning_rate": 3.232937805089854e-05, |
| "loss": 0.3688, |
| "step": 663 |
| }, |
| { |
| "epoch": 3.031098430813124, |
| "grad_norm": 0.2665389758970002, |
| "learning_rate": 3.2204207649553665e-05, |
| "loss": 0.3687, |
| "step": 664 |
| }, |
| { |
| "epoch": 3.0356633380884452, |
| "grad_norm": 0.2886505257763211, |
| "learning_rate": 3.2079116550763624e-05, |
| "loss": 0.3715, |
| "step": 665 |
| }, |
| { |
| "epoch": 3.040228245363766, |
| "grad_norm": 0.26410869047435404, |
| "learning_rate": 3.195410602701535e-05, |
| "loss": 0.3755, |
| "step": 666 |
| }, |
| { |
| "epoch": 3.044793152639087, |
| "grad_norm": 0.33726378634419146, |
| "learning_rate": 3.182917734997607e-05, |
| "loss": 0.3715, |
| "step": 667 |
| }, |
| { |
| "epoch": 3.049358059914408, |
| "grad_norm": 0.23119146133187443, |
| "learning_rate": 3.170433179048048e-05, |
| "loss": 0.3752, |
| "step": 668 |
| }, |
| { |
| "epoch": 3.0539229671897288, |
| "grad_norm": 0.317803893708356, |
| "learning_rate": 3.157957061851775e-05, |
| "loss": 0.3668, |
| "step": 669 |
| }, |
| { |
| "epoch": 3.05848787446505, |
| "grad_norm": 0.32056814826697005, |
| "learning_rate": 3.1454895103218604e-05, |
| "loss": 0.3684, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.063052781740371, |
| "grad_norm": 0.2227493640724446, |
| "learning_rate": 3.133030651284246e-05, |
| "loss": 0.3708, |
| "step": 671 |
| }, |
| { |
| "epoch": 3.067617689015692, |
| "grad_norm": 0.3474416944410334, |
| "learning_rate": 3.1205806114764455e-05, |
| "loss": 0.37, |
| "step": 672 |
| }, |
| { |
| "epoch": 3.0721825962910128, |
| "grad_norm": 0.23598864945070333, |
| "learning_rate": 3.108139517546259e-05, |
| "loss": 0.3679, |
| "step": 673 |
| }, |
| { |
| "epoch": 3.0767475035663336, |
| "grad_norm": 0.26225028839730374, |
| "learning_rate": 3.095707496050489e-05, |
| "loss": 0.3711, |
| "step": 674 |
| }, |
| { |
| "epoch": 3.081312410841655, |
| "grad_norm": 0.30541556508523, |
| "learning_rate": 3.083284673453645e-05, |
| "loss": 0.3705, |
| "step": 675 |
| }, |
| { |
| "epoch": 3.085877318116976, |
| "grad_norm": 0.2375554525248402, |
| "learning_rate": 3.070871176126664e-05, |
| "loss": 0.3674, |
| "step": 676 |
| }, |
| { |
| "epoch": 3.0904422253922967, |
| "grad_norm": 0.24938738146004832, |
| "learning_rate": 3.058467130345619e-05, |
| "loss": 0.3693, |
| "step": 677 |
| }, |
| { |
| "epoch": 3.0950071326676176, |
| "grad_norm": 0.194861014641011, |
| "learning_rate": 3.0460726622904362e-05, |
| "loss": 0.3709, |
| "step": 678 |
| }, |
| { |
| "epoch": 3.0995720399429385, |
| "grad_norm": 0.22427886537482727, |
| "learning_rate": 3.033687898043619e-05, |
| "loss": 0.3674, |
| "step": 679 |
| }, |
| { |
| "epoch": 3.10413694721826, |
| "grad_norm": 0.1931234684152515, |
| "learning_rate": 3.0213129635889527e-05, |
| "loss": 0.3707, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.1087018544935807, |
| "grad_norm": 0.19750887437108353, |
| "learning_rate": 3.0089479848102302e-05, |
| "loss": 0.3726, |
| "step": 681 |
| }, |
| { |
| "epoch": 3.1132667617689016, |
| "grad_norm": 0.22984346500906291, |
| "learning_rate": 2.9965930874899734e-05, |
| "loss": 0.3691, |
| "step": 682 |
| }, |
| { |
| "epoch": 3.1178316690442225, |
| "grad_norm": 0.16695271136501227, |
| "learning_rate": 2.984248397308149e-05, |
| "loss": 0.3669, |
| "step": 683 |
| }, |
| { |
| "epoch": 3.1223965763195434, |
| "grad_norm": 0.17893989774867847, |
| "learning_rate": 2.971914039840888e-05, |
| "loss": 0.369, |
| "step": 684 |
| }, |
| { |
| "epoch": 3.1269614835948643, |
| "grad_norm": 0.17571871130738057, |
| "learning_rate": 2.9595901405592215e-05, |
| "loss": 0.3716, |
| "step": 685 |
| }, |
| { |
| "epoch": 3.1315263908701856, |
| "grad_norm": 0.1594921785853266, |
| "learning_rate": 2.947276824827784e-05, |
| "loss": 0.3712, |
| "step": 686 |
| }, |
| { |
| "epoch": 3.1360912981455065, |
| "grad_norm": 0.17980710605526548, |
| "learning_rate": 2.9349742179035575e-05, |
| "loss": 0.3656, |
| "step": 687 |
| }, |
| { |
| "epoch": 3.1406562054208274, |
| "grad_norm": 0.18019296867903034, |
| "learning_rate": 2.9226824449345854e-05, |
| "loss": 0.3711, |
| "step": 688 |
| }, |
| { |
| "epoch": 3.1452211126961482, |
| "grad_norm": 0.20009151820526971, |
| "learning_rate": 2.910401630958699e-05, |
| "loss": 0.3696, |
| "step": 689 |
| }, |
| { |
| "epoch": 3.1497860199714696, |
| "grad_norm": 0.19469556991313045, |
| "learning_rate": 2.898131900902259e-05, |
| "loss": 0.3664, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.1543509272467904, |
| "grad_norm": 0.1658297168263364, |
| "learning_rate": 2.8858733795788666e-05, |
| "loss": 0.3698, |
| "step": 691 |
| }, |
| { |
| "epoch": 3.1589158345221113, |
| "grad_norm": 0.1770728229288905, |
| "learning_rate": 2.873626191688104e-05, |
| "loss": 0.3707, |
| "step": 692 |
| }, |
| { |
| "epoch": 3.163480741797432, |
| "grad_norm": 0.158629076656555, |
| "learning_rate": 2.8613904618142698e-05, |
| "loss": 0.369, |
| "step": 693 |
| }, |
| { |
| "epoch": 3.168045649072753, |
| "grad_norm": 0.15694838864187477, |
| "learning_rate": 2.8491663144250964e-05, |
| "loss": 0.3714, |
| "step": 694 |
| }, |
| { |
| "epoch": 3.172610556348074, |
| "grad_norm": 0.17453143005622082, |
| "learning_rate": 2.836953873870505e-05, |
| "loss": 0.3724, |
| "step": 695 |
| }, |
| { |
| "epoch": 3.1771754636233953, |
| "grad_norm": 0.1700537102783032, |
| "learning_rate": 2.824753264381319e-05, |
| "loss": 0.3692, |
| "step": 696 |
| }, |
| { |
| "epoch": 3.181740370898716, |
| "grad_norm": 0.16759539668413778, |
| "learning_rate": 2.812564610068017e-05, |
| "loss": 0.371, |
| "step": 697 |
| }, |
| { |
| "epoch": 3.186305278174037, |
| "grad_norm": 0.16867962773238068, |
| "learning_rate": 2.800388034919461e-05, |
| "loss": 0.3662, |
| "step": 698 |
| }, |
| { |
| "epoch": 3.190870185449358, |
| "grad_norm": 0.19731656299531658, |
| "learning_rate": 2.788223662801639e-05, |
| "loss": 0.3731, |
| "step": 699 |
| }, |
| { |
| "epoch": 3.195435092724679, |
| "grad_norm": 0.14390885288139355, |
| "learning_rate": 2.776071617456402e-05, |
| "loss": 0.3685, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.17429954735264463, |
| "learning_rate": 2.7639320225002108e-05, |
| "loss": 0.3692, |
| "step": 701 |
| }, |
| { |
| "epoch": 3.204564907275321, |
| "grad_norm": 0.16193213783821153, |
| "learning_rate": 2.7518050014228707e-05, |
| "loss": 0.3694, |
| "step": 702 |
| }, |
| { |
| "epoch": 3.209129814550642, |
| "grad_norm": 0.17943687683108553, |
| "learning_rate": 2.739690677586284e-05, |
| "loss": 0.3735, |
| "step": 703 |
| }, |
| { |
| "epoch": 3.213694721825963, |
| "grad_norm": 0.16115261882412688, |
| "learning_rate": 2.7275891742231847e-05, |
| "loss": 0.3688, |
| "step": 704 |
| }, |
| { |
| "epoch": 3.2182596291012837, |
| "grad_norm": 0.15989318235835606, |
| "learning_rate": 2.7155006144358958e-05, |
| "loss": 0.3669, |
| "step": 705 |
| }, |
| { |
| "epoch": 3.222824536376605, |
| "grad_norm": 0.1833740301603784, |
| "learning_rate": 2.70342512119507e-05, |
| "loss": 0.3681, |
| "step": 706 |
| }, |
| { |
| "epoch": 3.227389443651926, |
| "grad_norm": 0.1701603517000583, |
| "learning_rate": 2.691362817338442e-05, |
| "loss": 0.3732, |
| "step": 707 |
| }, |
| { |
| "epoch": 3.231954350927247, |
| "grad_norm": 0.17981663756006744, |
| "learning_rate": 2.6793138255695743e-05, |
| "loss": 0.3669, |
| "step": 708 |
| }, |
| { |
| "epoch": 3.2365192582025677, |
| "grad_norm": 0.1611749828625592, |
| "learning_rate": 2.6672782684566167e-05, |
| "loss": 0.368, |
| "step": 709 |
| }, |
| { |
| "epoch": 3.2410841654778886, |
| "grad_norm": 0.1704405344311701, |
| "learning_rate": 2.6552562684310532e-05, |
| "loss": 0.3746, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.24564907275321, |
| "grad_norm": 0.15452048356052153, |
| "learning_rate": 2.6432479477864588e-05, |
| "loss": 0.3668, |
| "step": 711 |
| }, |
| { |
| "epoch": 3.250213980028531, |
| "grad_norm": 0.1636048167954058, |
| "learning_rate": 2.6312534286772558e-05, |
| "loss": 0.3665, |
| "step": 712 |
| }, |
| { |
| "epoch": 3.2547788873038517, |
| "grad_norm": 0.15805813062246202, |
| "learning_rate": 2.619272833117468e-05, |
| "loss": 0.3671, |
| "step": 713 |
| }, |
| { |
| "epoch": 3.2593437945791726, |
| "grad_norm": 0.15722251462834871, |
| "learning_rate": 2.6073062829794863e-05, |
| "loss": 0.3702, |
| "step": 714 |
| }, |
| { |
| "epoch": 3.2639087018544934, |
| "grad_norm": 0.1515655825894355, |
| "learning_rate": 2.5953538999928217e-05, |
| "loss": 0.3698, |
| "step": 715 |
| }, |
| { |
| "epoch": 3.2684736091298143, |
| "grad_norm": 0.15294949505967717, |
| "learning_rate": 2.5834158057428704e-05, |
| "loss": 0.3659, |
| "step": 716 |
| }, |
| { |
| "epoch": 3.2730385164051357, |
| "grad_norm": 0.17430429089098687, |
| "learning_rate": 2.5714921216696806e-05, |
| "loss": 0.3658, |
| "step": 717 |
| }, |
| { |
| "epoch": 3.2776034236804565, |
| "grad_norm": 0.15283446685063015, |
| "learning_rate": 2.559582969066706e-05, |
| "loss": 0.3655, |
| "step": 718 |
| }, |
| { |
| "epoch": 3.2821683309557774, |
| "grad_norm": 0.15900008730134152, |
| "learning_rate": 2.5476884690795853e-05, |
| "loss": 0.3686, |
| "step": 719 |
| }, |
| { |
| "epoch": 3.2867332382310983, |
| "grad_norm": 0.17376312137687447, |
| "learning_rate": 2.5358087427049016e-05, |
| "loss": 0.3679, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.2912981455064196, |
| "grad_norm": 0.16367071666419647, |
| "learning_rate": 2.523943910788953e-05, |
| "loss": 0.3652, |
| "step": 721 |
| }, |
| { |
| "epoch": 3.2958630527817405, |
| "grad_norm": 0.1493453159768574, |
| "learning_rate": 2.5120940940265276e-05, |
| "loss": 0.3712, |
| "step": 722 |
| }, |
| { |
| "epoch": 3.3004279600570614, |
| "grad_norm": 0.1600651037670884, |
| "learning_rate": 2.500259412959665e-05, |
| "loss": 0.368, |
| "step": 723 |
| }, |
| { |
| "epoch": 3.3049928673323823, |
| "grad_norm": 0.15510229863064848, |
| "learning_rate": 2.4884399879764437e-05, |
| "loss": 0.3714, |
| "step": 724 |
| }, |
| { |
| "epoch": 3.309557774607703, |
| "grad_norm": 0.1371776463153879, |
| "learning_rate": 2.4766359393097476e-05, |
| "loss": 0.3723, |
| "step": 725 |
| }, |
| { |
| "epoch": 3.314122681883024, |
| "grad_norm": 0.17941656580285384, |
| "learning_rate": 2.464847387036045e-05, |
| "loss": 0.3702, |
| "step": 726 |
| }, |
| { |
| "epoch": 3.3186875891583454, |
| "grad_norm": 0.13405327935015152, |
| "learning_rate": 2.4530744510741703e-05, |
| "loss": 0.3715, |
| "step": 727 |
| }, |
| { |
| "epoch": 3.3232524964336663, |
| "grad_norm": 0.16167817588999067, |
| "learning_rate": 2.4413172511840958e-05, |
| "loss": 0.3693, |
| "step": 728 |
| }, |
| { |
| "epoch": 3.327817403708987, |
| "grad_norm": 0.1509260817331625, |
| "learning_rate": 2.429575906965722e-05, |
| "loss": 0.3599, |
| "step": 729 |
| }, |
| { |
| "epoch": 3.332382310984308, |
| "grad_norm": 0.144720817062596, |
| "learning_rate": 2.4178505378576605e-05, |
| "loss": 0.3708, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.336947218259629, |
| "grad_norm": 0.1424755317935734, |
| "learning_rate": 2.4061412631360116e-05, |
| "loss": 0.3647, |
| "step": 731 |
| }, |
| { |
| "epoch": 3.3415121255349503, |
| "grad_norm": 0.1582677626986222, |
| "learning_rate": 2.394448201913158e-05, |
| "loss": 0.3656, |
| "step": 732 |
| }, |
| { |
| "epoch": 3.346077032810271, |
| "grad_norm": 0.13727256015572376, |
| "learning_rate": 2.3827714731365513e-05, |
| "loss": 0.3669, |
| "step": 733 |
| }, |
| { |
| "epoch": 3.350641940085592, |
| "grad_norm": 0.1482716204121623, |
| "learning_rate": 2.3711111955875018e-05, |
| "loss": 0.3661, |
| "step": 734 |
| }, |
| { |
| "epoch": 3.355206847360913, |
| "grad_norm": 0.1429391445826853, |
| "learning_rate": 2.3594674878799656e-05, |
| "loss": 0.3702, |
| "step": 735 |
| }, |
| { |
| "epoch": 3.359771754636234, |
| "grad_norm": 0.15144931311040605, |
| "learning_rate": 2.347840468459351e-05, |
| "loss": 0.3722, |
| "step": 736 |
| }, |
| { |
| "epoch": 3.364336661911555, |
| "grad_norm": 0.1584841215631846, |
| "learning_rate": 2.336230255601296e-05, |
| "loss": 0.3651, |
| "step": 737 |
| }, |
| { |
| "epoch": 3.368901569186876, |
| "grad_norm": 0.15860593207749524, |
| "learning_rate": 2.324636967410481e-05, |
| "loss": 0.3706, |
| "step": 738 |
| }, |
| { |
| "epoch": 3.373466476462197, |
| "grad_norm": 0.16524287919377015, |
| "learning_rate": 2.3130607218194153e-05, |
| "loss": 0.3667, |
| "step": 739 |
| }, |
| { |
| "epoch": 3.3780313837375178, |
| "grad_norm": 0.1553010070756002, |
| "learning_rate": 2.3015016365872462e-05, |
| "loss": 0.3694, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.3825962910128387, |
| "grad_norm": 0.19177517583446688, |
| "learning_rate": 2.289959829298558e-05, |
| "loss": 0.3684, |
| "step": 741 |
| }, |
| { |
| "epoch": 3.38716119828816, |
| "grad_norm": 0.13228111850501834, |
| "learning_rate": 2.2784354173621726e-05, |
| "loss": 0.3695, |
| "step": 742 |
| }, |
| { |
| "epoch": 3.391726105563481, |
| "grad_norm": 0.18742867451573136, |
| "learning_rate": 2.266928518009957e-05, |
| "loss": 0.3663, |
| "step": 743 |
| }, |
| { |
| "epoch": 3.3962910128388017, |
| "grad_norm": 0.14121057196985098, |
| "learning_rate": 2.2554392482956357e-05, |
| "loss": 0.362, |
| "step": 744 |
| }, |
| { |
| "epoch": 3.4008559201141226, |
| "grad_norm": 0.16109339289265964, |
| "learning_rate": 2.243967725093595e-05, |
| "loss": 0.3644, |
| "step": 745 |
| }, |
| { |
| "epoch": 3.4054208273894435, |
| "grad_norm": 0.13332750913355695, |
| "learning_rate": 2.2325140650976957e-05, |
| "loss": 0.3696, |
| "step": 746 |
| }, |
| { |
| "epoch": 3.4099857346647644, |
| "grad_norm": 0.14371453147838442, |
| "learning_rate": 2.221078384820082e-05, |
| "loss": 0.3672, |
| "step": 747 |
| }, |
| { |
| "epoch": 3.4145506419400857, |
| "grad_norm": 0.12959264833893533, |
| "learning_rate": 2.209660800590002e-05, |
| "loss": 0.369, |
| "step": 748 |
| }, |
| { |
| "epoch": 3.4191155492154066, |
| "grad_norm": 0.19007915393126823, |
| "learning_rate": 2.1982614285526214e-05, |
| "loss": 0.3695, |
| "step": 749 |
| }, |
| { |
| "epoch": 3.4236804564907275, |
| "grad_norm": 0.1374069769687416, |
| "learning_rate": 2.1868803846678475e-05, |
| "loss": 0.364, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.4282453637660484, |
| "grad_norm": 0.16184614734626082, |
| "learning_rate": 2.1755177847091357e-05, |
| "loss": 0.3701, |
| "step": 751 |
| }, |
| { |
| "epoch": 3.4328102710413697, |
| "grad_norm": 0.12874829449890968, |
| "learning_rate": 2.1641737442623295e-05, |
| "loss": 0.3702, |
| "step": 752 |
| }, |
| { |
| "epoch": 3.4373751783166906, |
| "grad_norm": 0.15265797312556922, |
| "learning_rate": 2.1528483787244695e-05, |
| "loss": 0.3631, |
| "step": 753 |
| }, |
| { |
| "epoch": 3.4419400855920115, |
| "grad_norm": 0.13901496827521181, |
| "learning_rate": 2.1415418033026303e-05, |
| "loss": 0.3698, |
| "step": 754 |
| }, |
| { |
| "epoch": 3.4465049928673324, |
| "grad_norm": 0.14408559598989043, |
| "learning_rate": 2.1302541330127456e-05, |
| "loss": 0.3722, |
| "step": 755 |
| }, |
| { |
| "epoch": 3.4510699001426532, |
| "grad_norm": 0.1553274863431962, |
| "learning_rate": 2.1189854826784306e-05, |
| "loss": 0.373, |
| "step": 756 |
| }, |
| { |
| "epoch": 3.455634807417974, |
| "grad_norm": 0.13896160154548598, |
| "learning_rate": 2.107735966929828e-05, |
| "loss": 0.3674, |
| "step": 757 |
| }, |
| { |
| "epoch": 3.4601997146932955, |
| "grad_norm": 0.14190658414204307, |
| "learning_rate": 2.096505700202427e-05, |
| "loss": 0.3713, |
| "step": 758 |
| }, |
| { |
| "epoch": 3.4647646219686163, |
| "grad_norm": 0.13297816575448404, |
| "learning_rate": 2.0852947967359124e-05, |
| "loss": 0.3659, |
| "step": 759 |
| }, |
| { |
| "epoch": 3.4693295292439372, |
| "grad_norm": 0.13293797385160067, |
| "learning_rate": 2.0741033705729946e-05, |
| "loss": 0.3688, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.473894436519258, |
| "grad_norm": 0.13476039599236855, |
| "learning_rate": 2.0629315355582493e-05, |
| "loss": 0.3734, |
| "step": 761 |
| }, |
| { |
| "epoch": 3.478459343794579, |
| "grad_norm": 0.12322657009585795, |
| "learning_rate": 2.0517794053369668e-05, |
| "loss": 0.3674, |
| "step": 762 |
| }, |
| { |
| "epoch": 3.4830242510699003, |
| "grad_norm": 0.11867881868833804, |
| "learning_rate": 2.040647093353983e-05, |
| "loss": 0.3656, |
| "step": 763 |
| }, |
| { |
| "epoch": 3.487589158345221, |
| "grad_norm": 0.11899054622753805, |
| "learning_rate": 2.02953471285254e-05, |
| "loss": 0.3636, |
| "step": 764 |
| }, |
| { |
| "epoch": 3.492154065620542, |
| "grad_norm": 0.12418349656227172, |
| "learning_rate": 2.018442376873126e-05, |
| "loss": 0.3717, |
| "step": 765 |
| }, |
| { |
| "epoch": 3.496718972895863, |
| "grad_norm": 0.11310966967990665, |
| "learning_rate": 2.007370198252324e-05, |
| "loss": 0.3611, |
| "step": 766 |
| }, |
| { |
| "epoch": 3.501283880171184, |
| "grad_norm": 0.12055741426438335, |
| "learning_rate": 1.9963182896216667e-05, |
| "loss": 0.3674, |
| "step": 767 |
| }, |
| { |
| "epoch": 3.5058487874465047, |
| "grad_norm": 0.12618096595715805, |
| "learning_rate": 1.985286763406494e-05, |
| "loss": 0.3725, |
| "step": 768 |
| }, |
| { |
| "epoch": 3.510413694721826, |
| "grad_norm": 0.11538183128314908, |
| "learning_rate": 1.974275731824804e-05, |
| "loss": 0.3703, |
| "step": 769 |
| }, |
| { |
| "epoch": 3.514978601997147, |
| "grad_norm": 0.12599278064501612, |
| "learning_rate": 1.9632853068861147e-05, |
| "loss": 0.3628, |
| "step": 770 |
| }, |
| { |
| "epoch": 3.519543509272468, |
| "grad_norm": 0.1139458312287587, |
| "learning_rate": 1.9523156003903215e-05, |
| "loss": 0.3664, |
| "step": 771 |
| }, |
| { |
| "epoch": 3.5241084165477887, |
| "grad_norm": 0.14077407102475697, |
| "learning_rate": 1.9413667239265615e-05, |
| "loss": 0.3652, |
| "step": 772 |
| }, |
| { |
| "epoch": 3.52867332382311, |
| "grad_norm": 0.13032016684712433, |
| "learning_rate": 1.9304387888720804e-05, |
| "loss": 0.3685, |
| "step": 773 |
| }, |
| { |
| "epoch": 3.533238231098431, |
| "grad_norm": 0.14031858267414743, |
| "learning_rate": 1.919531906391099e-05, |
| "loss": 0.3733, |
| "step": 774 |
| }, |
| { |
| "epoch": 3.537803138373752, |
| "grad_norm": 0.1486352640869334, |
| "learning_rate": 1.9086461874336777e-05, |
| "loss": 0.3713, |
| "step": 775 |
| }, |
| { |
| "epoch": 3.5423680456490727, |
| "grad_norm": 0.1357215608169487, |
| "learning_rate": 1.8977817427345946e-05, |
| "loss": 0.3617, |
| "step": 776 |
| }, |
| { |
| "epoch": 3.5469329529243936, |
| "grad_norm": 0.13543444529267776, |
| "learning_rate": 1.8869386828122125e-05, |
| "loss": 0.3723, |
| "step": 777 |
| }, |
| { |
| "epoch": 3.5514978601997145, |
| "grad_norm": 0.14518880548921387, |
| "learning_rate": 1.8761171179673604e-05, |
| "loss": 0.3646, |
| "step": 778 |
| }, |
| { |
| "epoch": 3.556062767475036, |
| "grad_norm": 0.1530615652429703, |
| "learning_rate": 1.8653171582822104e-05, |
| "loss": 0.3676, |
| "step": 779 |
| }, |
| { |
| "epoch": 3.5606276747503567, |
| "grad_norm": 0.14320914259229844, |
| "learning_rate": 1.854538913619151e-05, |
| "loss": 0.3708, |
| "step": 780 |
| }, |
| { |
| "epoch": 3.5651925820256776, |
| "grad_norm": 0.1371728530944999, |
| "learning_rate": 1.8437824936196823e-05, |
| "loss": 0.3749, |
| "step": 781 |
| }, |
| { |
| "epoch": 3.5697574893009985, |
| "grad_norm": 0.15231861361219254, |
| "learning_rate": 1.8330480077032858e-05, |
| "loss": 0.3672, |
| "step": 782 |
| }, |
| { |
| "epoch": 3.57432239657632, |
| "grad_norm": 0.11059078549485153, |
| "learning_rate": 1.822335565066325e-05, |
| "loss": 0.3677, |
| "step": 783 |
| }, |
| { |
| "epoch": 3.5788873038516407, |
| "grad_norm": 0.1576919764066523, |
| "learning_rate": 1.8116452746809275e-05, |
| "loss": 0.367, |
| "step": 784 |
| }, |
| { |
| "epoch": 3.5834522111269616, |
| "grad_norm": 0.12879507206191718, |
| "learning_rate": 1.800977245293875e-05, |
| "loss": 0.368, |
| "step": 785 |
| }, |
| { |
| "epoch": 3.5880171184022824, |
| "grad_norm": 0.12629882573611204, |
| "learning_rate": 1.7903315854254994e-05, |
| "loss": 0.3654, |
| "step": 786 |
| }, |
| { |
| "epoch": 3.5925820256776033, |
| "grad_norm": 0.13427520014143732, |
| "learning_rate": 1.779708403368582e-05, |
| "loss": 0.3713, |
| "step": 787 |
| }, |
| { |
| "epoch": 3.597146932952924, |
| "grad_norm": 0.1176497941246944, |
| "learning_rate": 1.7691078071872477e-05, |
| "loss": 0.3711, |
| "step": 788 |
| }, |
| { |
| "epoch": 3.601711840228245, |
| "grad_norm": 0.13036521061876197, |
| "learning_rate": 1.7585299047158688e-05, |
| "loss": 0.3703, |
| "step": 789 |
| }, |
| { |
| "epoch": 3.6062767475035664, |
| "grad_norm": 0.12201908677886805, |
| "learning_rate": 1.7479748035579625e-05, |
| "loss": 0.3664, |
| "step": 790 |
| }, |
| { |
| "epoch": 3.6108416547788873, |
| "grad_norm": 0.11159322212904035, |
| "learning_rate": 1.7374426110851e-05, |
| "loss": 0.3726, |
| "step": 791 |
| }, |
| { |
| "epoch": 3.615406562054208, |
| "grad_norm": 0.12179562963863867, |
| "learning_rate": 1.726933434435819e-05, |
| "loss": 0.3598, |
| "step": 792 |
| }, |
| { |
| "epoch": 3.6199714693295295, |
| "grad_norm": 0.12279846137825234, |
| "learning_rate": 1.716447380514526e-05, |
| "loss": 0.3643, |
| "step": 793 |
| }, |
| { |
| "epoch": 3.6245363766048504, |
| "grad_norm": 0.12287836059833575, |
| "learning_rate": 1.7059845559904115e-05, |
| "loss": 0.3702, |
| "step": 794 |
| }, |
| { |
| "epoch": 3.6291012838801713, |
| "grad_norm": 0.11039673465914712, |
| "learning_rate": 1.695545067296368e-05, |
| "loss": 0.3675, |
| "step": 795 |
| }, |
| { |
| "epoch": 3.633666191155492, |
| "grad_norm": 0.12022240785762481, |
| "learning_rate": 1.6851290206279e-05, |
| "loss": 0.3695, |
| "step": 796 |
| }, |
| { |
| "epoch": 3.638231098430813, |
| "grad_norm": 0.11435555984531806, |
| "learning_rate": 1.674736521942053e-05, |
| "loss": 0.365, |
| "step": 797 |
| }, |
| { |
| "epoch": 3.642796005706134, |
| "grad_norm": 0.131712471568275, |
| "learning_rate": 1.664367676956331e-05, |
| "loss": 0.3713, |
| "step": 798 |
| }, |
| { |
| "epoch": 3.647360912981455, |
| "grad_norm": 0.11477057659282204, |
| "learning_rate": 1.6540225911476172e-05, |
| "loss": 0.3648, |
| "step": 799 |
| }, |
| { |
| "epoch": 3.651925820256776, |
| "grad_norm": 0.11030623092197417, |
| "learning_rate": 1.643701369751109e-05, |
| "loss": 0.3646, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.656490727532097, |
| "grad_norm": 0.13051313133439071, |
| "learning_rate": 1.6334041177592403e-05, |
| "loss": 0.3711, |
| "step": 801 |
| }, |
| { |
| "epoch": 3.661055634807418, |
| "grad_norm": 0.12099586891245266, |
| "learning_rate": 1.623130939920619e-05, |
| "loss": 0.3665, |
| "step": 802 |
| }, |
| { |
| "epoch": 3.665620542082739, |
| "grad_norm": 0.12353194281368282, |
| "learning_rate": 1.6128819407389606e-05, |
| "loss": 0.3698, |
| "step": 803 |
| }, |
| { |
| "epoch": 3.67018544935806, |
| "grad_norm": 0.12746501918260686, |
| "learning_rate": 1.602657224472018e-05, |
| "loss": 0.3651, |
| "step": 804 |
| }, |
| { |
| "epoch": 3.674750356633381, |
| "grad_norm": 0.10793911581481992, |
| "learning_rate": 1.5924568951305328e-05, |
| "loss": 0.3651, |
| "step": 805 |
| }, |
| { |
| "epoch": 3.679315263908702, |
| "grad_norm": 0.13767950934095166, |
| "learning_rate": 1.5822810564771663e-05, |
| "loss": 0.3681, |
| "step": 806 |
| }, |
| { |
| "epoch": 3.683880171184023, |
| "grad_norm": 0.11149734780728035, |
| "learning_rate": 1.5721298120254514e-05, |
| "loss": 0.372, |
| "step": 807 |
| }, |
| { |
| "epoch": 3.6884450784593437, |
| "grad_norm": 0.1187146085257134, |
| "learning_rate": 1.562003265038738e-05, |
| "loss": 0.3698, |
| "step": 808 |
| }, |
| { |
| "epoch": 3.6930099857346645, |
| "grad_norm": 0.12127988390555491, |
| "learning_rate": 1.551901518529138e-05, |
| "loss": 0.3692, |
| "step": 809 |
| }, |
| { |
| "epoch": 3.697574893009986, |
| "grad_norm": 0.11702704275441794, |
| "learning_rate": 1.541824675256482e-05, |
| "loss": 0.3661, |
| "step": 810 |
| }, |
| { |
| "epoch": 3.7021398002853068, |
| "grad_norm": 0.11306685620844899, |
| "learning_rate": 1.531772837727274e-05, |
| "loss": 0.3695, |
| "step": 811 |
| }, |
| { |
| "epoch": 3.7067047075606276, |
| "grad_norm": 0.11312484841891161, |
| "learning_rate": 1.5217461081936478e-05, |
| "loss": 0.3671, |
| "step": 812 |
| }, |
| { |
| "epoch": 3.7112696148359485, |
| "grad_norm": 0.11428174495717341, |
| "learning_rate": 1.5117445886523272e-05, |
| "loss": 0.3674, |
| "step": 813 |
| }, |
| { |
| "epoch": 3.71583452211127, |
| "grad_norm": 0.10987603615873456, |
| "learning_rate": 1.501768380843585e-05, |
| "loss": 0.3673, |
| "step": 814 |
| }, |
| { |
| "epoch": 3.7203994293865907, |
| "grad_norm": 0.11178040039287562, |
| "learning_rate": 1.4918175862502104e-05, |
| "loss": 0.3719, |
| "step": 815 |
| }, |
| { |
| "epoch": 3.7249643366619116, |
| "grad_norm": 0.10859792201293215, |
| "learning_rate": 1.4818923060964814e-05, |
| "loss": 0.3615, |
| "step": 816 |
| }, |
| { |
| "epoch": 3.7295292439372325, |
| "grad_norm": 0.13172517052119273, |
| "learning_rate": 1.471992641347129e-05, |
| "loss": 0.3694, |
| "step": 817 |
| }, |
| { |
| "epoch": 3.7340941512125534, |
| "grad_norm": 0.12757152194459273, |
| "learning_rate": 1.4621186927063095e-05, |
| "loss": 0.3649, |
| "step": 818 |
| }, |
| { |
| "epoch": 3.7386590584878743, |
| "grad_norm": 0.1154895484553408, |
| "learning_rate": 1.4522705606165865e-05, |
| "loss": 0.3694, |
| "step": 819 |
| }, |
| { |
| "epoch": 3.743223965763195, |
| "grad_norm": 0.11273080460771058, |
| "learning_rate": 1.4424483452579012e-05, |
| "loss": 0.3625, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.7477888730385165, |
| "grad_norm": 0.13138142426919347, |
| "learning_rate": 1.4326521465465604e-05, |
| "loss": 0.366, |
| "step": 821 |
| }, |
| { |
| "epoch": 3.7523537803138374, |
| "grad_norm": 0.11883743670221694, |
| "learning_rate": 1.4228820641342172e-05, |
| "loss": 0.3682, |
| "step": 822 |
| }, |
| { |
| "epoch": 3.7569186875891583, |
| "grad_norm": 0.10968178906111543, |
| "learning_rate": 1.4131381974068533e-05, |
| "loss": 0.3655, |
| "step": 823 |
| }, |
| { |
| "epoch": 3.7614835948644796, |
| "grad_norm": 0.1326313322060334, |
| "learning_rate": 1.4034206454837768e-05, |
| "loss": 0.3715, |
| "step": 824 |
| }, |
| { |
| "epoch": 3.7660485021398005, |
| "grad_norm": 0.12319894462182598, |
| "learning_rate": 1.3937295072166061e-05, |
| "loss": 0.3739, |
| "step": 825 |
| }, |
| { |
| "epoch": 3.7706134094151214, |
| "grad_norm": 0.11752644369102541, |
| "learning_rate": 1.3840648811882646e-05, |
| "loss": 0.3641, |
| "step": 826 |
| }, |
| { |
| "epoch": 3.7751783166904422, |
| "grad_norm": 0.10779720073463613, |
| "learning_rate": 1.3744268657119886e-05, |
| "loss": 0.3672, |
| "step": 827 |
| }, |
| { |
| "epoch": 3.779743223965763, |
| "grad_norm": 0.12396985602516226, |
| "learning_rate": 1.3648155588303097e-05, |
| "loss": 0.3682, |
| "step": 828 |
| }, |
| { |
| "epoch": 3.784308131241084, |
| "grad_norm": 0.13035541174831453, |
| "learning_rate": 1.3552310583140744e-05, |
| "loss": 0.3695, |
| "step": 829 |
| }, |
| { |
| "epoch": 3.788873038516405, |
| "grad_norm": 0.11977978636195129, |
| "learning_rate": 1.3456734616614369e-05, |
| "loss": 0.3693, |
| "step": 830 |
| }, |
| { |
| "epoch": 3.793437945791726, |
| "grad_norm": 0.10612949145458676, |
| "learning_rate": 1.3361428660968713e-05, |
| "loss": 0.3654, |
| "step": 831 |
| }, |
| { |
| "epoch": 3.798002853067047, |
| "grad_norm": 0.12185371327777074, |
| "learning_rate": 1.3266393685701919e-05, |
| "loss": 0.3644, |
| "step": 832 |
| }, |
| { |
| "epoch": 3.802567760342368, |
| "grad_norm": 0.10533358955019909, |
| "learning_rate": 1.3171630657555499e-05, |
| "loss": 0.3688, |
| "step": 833 |
| }, |
| { |
| "epoch": 3.807132667617689, |
| "grad_norm": 0.1124577538423527, |
| "learning_rate": 1.3077140540504614e-05, |
| "loss": 0.3638, |
| "step": 834 |
| }, |
| { |
| "epoch": 3.81169757489301, |
| "grad_norm": 0.11642170801308592, |
| "learning_rate": 1.2982924295748274e-05, |
| "loss": 0.3684, |
| "step": 835 |
| }, |
| { |
| "epoch": 3.816262482168331, |
| "grad_norm": 0.10695378952599915, |
| "learning_rate": 1.2888982881699472e-05, |
| "loss": 0.3691, |
| "step": 836 |
| }, |
| { |
| "epoch": 3.820827389443652, |
| "grad_norm": 0.13400187716659637, |
| "learning_rate": 1.2795317253975537e-05, |
| "loss": 0.3691, |
| "step": 837 |
| }, |
| { |
| "epoch": 3.825392296718973, |
| "grad_norm": 0.09951593298438159, |
| "learning_rate": 1.270192836538836e-05, |
| "loss": 0.3697, |
| "step": 838 |
| }, |
| { |
| "epoch": 3.8299572039942937, |
| "grad_norm": 0.10935426565797807, |
| "learning_rate": 1.2608817165934681e-05, |
| "loss": 0.3674, |
| "step": 839 |
| }, |
| { |
| "epoch": 3.8345221112696146, |
| "grad_norm": 0.11667620915560488, |
| "learning_rate": 1.2515984602786487e-05, |
| "loss": 0.3662, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.839087018544936, |
| "grad_norm": 0.10968676357221958, |
| "learning_rate": 1.2423431620281306e-05, |
| "loss": 0.3618, |
| "step": 841 |
| }, |
| { |
| "epoch": 3.843651925820257, |
| "grad_norm": 0.12006579571295849, |
| "learning_rate": 1.2331159159912667e-05, |
| "loss": 0.3703, |
| "step": 842 |
| }, |
| { |
| "epoch": 3.8482168330955777, |
| "grad_norm": 0.11231326039006076, |
| "learning_rate": 1.22391681603205e-05, |
| "loss": 0.3674, |
| "step": 843 |
| }, |
| { |
| "epoch": 3.8527817403708986, |
| "grad_norm": 0.11239420460642152, |
| "learning_rate": 1.2147459557281543e-05, |
| "loss": 0.3731, |
| "step": 844 |
| }, |
| { |
| "epoch": 3.85734664764622, |
| "grad_norm": 0.10105192704809327, |
| "learning_rate": 1.2056034283699866e-05, |
| "loss": 0.3634, |
| "step": 845 |
| }, |
| { |
| "epoch": 3.861911554921541, |
| "grad_norm": 0.1324600771108394, |
| "learning_rate": 1.1964893269597408e-05, |
| "loss": 0.3698, |
| "step": 846 |
| }, |
| { |
| "epoch": 3.8664764621968617, |
| "grad_norm": 0.10492225515470834, |
| "learning_rate": 1.1874037442104464e-05, |
| "loss": 0.3656, |
| "step": 847 |
| }, |
| { |
| "epoch": 3.8710413694721826, |
| "grad_norm": 0.1049947478937808, |
| "learning_rate": 1.1783467725450288e-05, |
| "loss": 0.3673, |
| "step": 848 |
| }, |
| { |
| "epoch": 3.8756062767475035, |
| "grad_norm": 0.12343105623506188, |
| "learning_rate": 1.1693185040953647e-05, |
| "loss": 0.3692, |
| "step": 849 |
| }, |
| { |
| "epoch": 3.8801711840228243, |
| "grad_norm": 0.10907582190242261, |
| "learning_rate": 1.1603190307013485e-05, |
| "loss": 0.3689, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.8847360912981452, |
| "grad_norm": 0.1048746000015905, |
| "learning_rate": 1.1513484439099592e-05, |
| "loss": 0.3688, |
| "step": 851 |
| }, |
| { |
| "epoch": 3.8893009985734666, |
| "grad_norm": 0.110532809391589, |
| "learning_rate": 1.1424068349743282e-05, |
| "loss": 0.3707, |
| "step": 852 |
| }, |
| { |
| "epoch": 3.8938659058487874, |
| "grad_norm": 0.1082453458710121, |
| "learning_rate": 1.133494294852806e-05, |
| "loss": 0.3684, |
| "step": 853 |
| }, |
| { |
| "epoch": 3.8984308131241083, |
| "grad_norm": 0.10345660407308066, |
| "learning_rate": 1.1246109142080463e-05, |
| "loss": 0.3678, |
| "step": 854 |
| }, |
| { |
| "epoch": 3.9029957203994297, |
| "grad_norm": 0.10898385130369857, |
| "learning_rate": 1.1157567834060732e-05, |
| "loss": 0.3703, |
| "step": 855 |
| }, |
| { |
| "epoch": 3.9075606276747505, |
| "grad_norm": 0.10868802232496799, |
| "learning_rate": 1.1069319925153716e-05, |
| "loss": 0.3672, |
| "step": 856 |
| }, |
| { |
| "epoch": 3.9121255349500714, |
| "grad_norm": 0.1001172075462793, |
| "learning_rate": 1.098136631305966e-05, |
| "loss": 0.3647, |
| "step": 857 |
| }, |
| { |
| "epoch": 3.9166904422253923, |
| "grad_norm": 0.10002086132257204, |
| "learning_rate": 1.0893707892485046e-05, |
| "loss": 0.3687, |
| "step": 858 |
| }, |
| { |
| "epoch": 3.921255349500713, |
| "grad_norm": 0.11817525489059527, |
| "learning_rate": 1.080634555513358e-05, |
| "loss": 0.3681, |
| "step": 859 |
| }, |
| { |
| "epoch": 3.925820256776034, |
| "grad_norm": 0.10005386509374838, |
| "learning_rate": 1.0719280189697012e-05, |
| "loss": 0.371, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.930385164051355, |
| "grad_norm": 0.09735019950539485, |
| "learning_rate": 1.0632512681846188e-05, |
| "loss": 0.3647, |
| "step": 861 |
| }, |
| { |
| "epoch": 3.9349500713266763, |
| "grad_norm": 0.1096708423704249, |
| "learning_rate": 1.0546043914222004e-05, |
| "loss": 0.3689, |
| "step": 862 |
| }, |
| { |
| "epoch": 3.939514978601997, |
| "grad_norm": 0.10947940483538458, |
| "learning_rate": 1.045987476642639e-05, |
| "loss": 0.3653, |
| "step": 863 |
| }, |
| { |
| "epoch": 3.944079885877318, |
| "grad_norm": 0.10334503635714308, |
| "learning_rate": 1.0374006115013446e-05, |
| "loss": 0.3705, |
| "step": 864 |
| }, |
| { |
| "epoch": 3.948644793152639, |
| "grad_norm": 0.09613211541479365, |
| "learning_rate": 1.0288438833480434e-05, |
| "loss": 0.3653, |
| "step": 865 |
| }, |
| { |
| "epoch": 3.9532097004279603, |
| "grad_norm": 0.11391043223040556, |
| "learning_rate": 1.0203173792258964e-05, |
| "loss": 0.3709, |
| "step": 866 |
| }, |
| { |
| "epoch": 3.957774607703281, |
| "grad_norm": 0.1112000931274841, |
| "learning_rate": 1.0118211858706126e-05, |
| "loss": 0.3695, |
| "step": 867 |
| }, |
| { |
| "epoch": 3.962339514978602, |
| "grad_norm": 0.09623531613971865, |
| "learning_rate": 1.0033553897095611e-05, |
| "loss": 0.3673, |
| "step": 868 |
| }, |
| { |
| "epoch": 3.966904422253923, |
| "grad_norm": 0.12054451116095541, |
| "learning_rate": 9.949200768608978e-06, |
| "loss": 0.3662, |
| "step": 869 |
| }, |
| { |
| "epoch": 3.971469329529244, |
| "grad_norm": 0.10469643561676546, |
| "learning_rate": 9.865153331326888e-06, |
| "loss": 0.3722, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.9760342368045647, |
| "grad_norm": 0.10369572994121254, |
| "learning_rate": 9.781412440220364e-06, |
| "loss": 0.3658, |
| "step": 871 |
| }, |
| { |
| "epoch": 3.980599144079886, |
| "grad_norm": 0.11265124183687726, |
| "learning_rate": 9.697978947142083e-06, |
| "loss": 0.366, |
| "step": 872 |
| }, |
| { |
| "epoch": 3.985164051355207, |
| "grad_norm": 0.09909112429172705, |
| "learning_rate": 9.61485370081773e-06, |
| "loss": 0.3655, |
| "step": 873 |
| }, |
| { |
| "epoch": 3.989728958630528, |
| "grad_norm": 0.10458100196949884, |
| "learning_rate": 9.532037546837328e-06, |
| "loss": 0.3669, |
| "step": 874 |
| }, |
| { |
| "epoch": 3.9942938659058487, |
| "grad_norm": 0.10060786891352515, |
| "learning_rate": 9.4495313276467e-06, |
| "loss": 0.3684, |
| "step": 875 |
| }, |
| { |
| "epoch": 3.99885877318117, |
| "grad_norm": 0.11243962749211547, |
| "learning_rate": 9.367335882538859e-06, |
| "loss": 0.3651, |
| "step": 876 |
| }, |
| { |
| "epoch": 4.003423680456491, |
| "grad_norm": 0.37014150627969, |
| "learning_rate": 9.285452047645447e-06, |
| "loss": 0.6984, |
| "step": 877 |
| }, |
| { |
| "epoch": 4.007988587731812, |
| "grad_norm": 0.15671543024887574, |
| "learning_rate": 9.20388065592829e-06, |
| "loss": 0.3516, |
| "step": 878 |
| }, |
| { |
| "epoch": 4.012553495007133, |
| "grad_norm": 0.16714819900057928, |
| "learning_rate": 9.122622537170858e-06, |
| "loss": 0.3535, |
| "step": 879 |
| }, |
| { |
| "epoch": 4.0171184022824535, |
| "grad_norm": 0.15835660325889345, |
| "learning_rate": 9.041678517969878e-06, |
| "loss": 0.3516, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.021683309557774, |
| "grad_norm": 0.1420672767585784, |
| "learning_rate": 8.961049421726927e-06, |
| "loss": 0.3484, |
| "step": 881 |
| }, |
| { |
| "epoch": 4.026248216833095, |
| "grad_norm": 0.13783878715714176, |
| "learning_rate": 8.880736068639972e-06, |
| "loss": 0.3476, |
| "step": 882 |
| }, |
| { |
| "epoch": 4.030813124108416, |
| "grad_norm": 0.13731172171411218, |
| "learning_rate": 8.800739275695162e-06, |
| "loss": 0.3559, |
| "step": 883 |
| }, |
| { |
| "epoch": 4.035378031383738, |
| "grad_norm": 0.14178993491605563, |
| "learning_rate": 8.721059856658374e-06, |
| "loss": 0.3505, |
| "step": 884 |
| }, |
| { |
| "epoch": 4.039942938659059, |
| "grad_norm": 0.12951830366700642, |
| "learning_rate": 8.641698622067056e-06, |
| "loss": 0.3483, |
| "step": 885 |
| }, |
| { |
| "epoch": 4.04450784593438, |
| "grad_norm": 0.13767174076249047, |
| "learning_rate": 8.56265637922192e-06, |
| "loss": 0.3516, |
| "step": 886 |
| }, |
| { |
| "epoch": 4.049072753209701, |
| "grad_norm": 0.13591547948547417, |
| "learning_rate": 8.483933932178714e-06, |
| "loss": 0.3569, |
| "step": 887 |
| }, |
| { |
| "epoch": 4.0536376604850215, |
| "grad_norm": 0.12191863460297671, |
| "learning_rate": 8.405532081740104e-06, |
| "loss": 0.3479, |
| "step": 888 |
| }, |
| { |
| "epoch": 4.058202567760342, |
| "grad_norm": 0.1248887971732098, |
| "learning_rate": 8.327451625447462e-06, |
| "loss": 0.3494, |
| "step": 889 |
| }, |
| { |
| "epoch": 4.062767475035663, |
| "grad_norm": 0.13804862882003327, |
| "learning_rate": 8.24969335757281e-06, |
| "loss": 0.3508, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.067332382310984, |
| "grad_norm": 0.11333955902964997, |
| "learning_rate": 8.17225806911071e-06, |
| "loss": 0.3515, |
| "step": 891 |
| }, |
| { |
| "epoch": 4.071897289586305, |
| "grad_norm": 0.11550369078052372, |
| "learning_rate": 8.095146547770202e-06, |
| "loss": 0.3523, |
| "step": 892 |
| }, |
| { |
| "epoch": 4.076462196861626, |
| "grad_norm": 0.12066342905233718, |
| "learning_rate": 8.018359577966822e-06, |
| "loss": 0.351, |
| "step": 893 |
| }, |
| { |
| "epoch": 4.081027104136947, |
| "grad_norm": 0.12020962194954402, |
| "learning_rate": 7.941897940814613e-06, |
| "loss": 0.3511, |
| "step": 894 |
| }, |
| { |
| "epoch": 4.085592011412269, |
| "grad_norm": 0.10735766251661075, |
| "learning_rate": 7.865762414118197e-06, |
| "loss": 0.3515, |
| "step": 895 |
| }, |
| { |
| "epoch": 4.0901569186875895, |
| "grad_norm": 0.10152101132548039, |
| "learning_rate": 7.7899537723648e-06, |
| "loss": 0.3494, |
| "step": 896 |
| }, |
| { |
| "epoch": 4.09472182596291, |
| "grad_norm": 0.11175247171368977, |
| "learning_rate": 7.71447278671646e-06, |
| "loss": 0.3434, |
| "step": 897 |
| }, |
| { |
| "epoch": 4.099286733238231, |
| "grad_norm": 0.11008323120104475, |
| "learning_rate": 7.639320225002106e-06, |
| "loss": 0.3562, |
| "step": 898 |
| }, |
| { |
| "epoch": 4.103851640513552, |
| "grad_norm": 0.10079393628102, |
| "learning_rate": 7.564496851709799e-06, |
| "loss": 0.3462, |
| "step": 899 |
| }, |
| { |
| "epoch": 4.108416547788873, |
| "grad_norm": 0.10224032651804278, |
| "learning_rate": 7.490003427978947e-06, |
| "loss": 0.352, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.112981455064194, |
| "grad_norm": 0.10528872332896734, |
| "learning_rate": 7.415840711592515e-06, |
| "loss": 0.3583, |
| "step": 901 |
| }, |
| { |
| "epoch": 4.117546362339515, |
| "grad_norm": 0.09810153639928686, |
| "learning_rate": 7.342009456969394e-06, |
| "loss": 0.3507, |
| "step": 902 |
| }, |
| { |
| "epoch": 4.122111269614836, |
| "grad_norm": 0.09377946822299223, |
| "learning_rate": 7.26851041515666e-06, |
| "loss": 0.3478, |
| "step": 903 |
| }, |
| { |
| "epoch": 4.1266761768901565, |
| "grad_norm": 0.09941074294315165, |
| "learning_rate": 7.1953443338219635e-06, |
| "loss": 0.3474, |
| "step": 904 |
| }, |
| { |
| "epoch": 4.131241084165478, |
| "grad_norm": 0.09923689941405467, |
| "learning_rate": 7.12251195724595e-06, |
| "loss": 0.3497, |
| "step": 905 |
| }, |
| { |
| "epoch": 4.135805991440799, |
| "grad_norm": 0.09632085335191928, |
| "learning_rate": 7.0500140263146085e-06, |
| "loss": 0.3502, |
| "step": 906 |
| }, |
| { |
| "epoch": 4.14037089871612, |
| "grad_norm": 0.09318553780432773, |
| "learning_rate": 6.977851278511831e-06, |
| "loss": 0.3519, |
| "step": 907 |
| }, |
| { |
| "epoch": 4.144935805991441, |
| "grad_norm": 0.1055662018772662, |
| "learning_rate": 6.9060244479118325e-06, |
| "loss": 0.3447, |
| "step": 908 |
| }, |
| { |
| "epoch": 4.149500713266762, |
| "grad_norm": 0.09794765647352846, |
| "learning_rate": 6.8345342651717415e-06, |
| "loss": 0.3521, |
| "step": 909 |
| }, |
| { |
| "epoch": 4.154065620542083, |
| "grad_norm": 0.10262064175384165, |
| "learning_rate": 6.763381457524137e-06, |
| "loss": 0.3467, |
| "step": 910 |
| }, |
| { |
| "epoch": 4.158630527817404, |
| "grad_norm": 0.0924225272063823, |
| "learning_rate": 6.692566748769645e-06, |
| "loss": 0.348, |
| "step": 911 |
| }, |
| { |
| "epoch": 4.1631954350927245, |
| "grad_norm": 0.09396615551983964, |
| "learning_rate": 6.622090859269579e-06, |
| "loss": 0.3485, |
| "step": 912 |
| }, |
| { |
| "epoch": 4.167760342368045, |
| "grad_norm": 0.1085909553244327, |
| "learning_rate": 6.5519545059386495e-06, |
| "loss": 0.3503, |
| "step": 913 |
| }, |
| { |
| "epoch": 4.172325249643366, |
| "grad_norm": 0.10045646940751854, |
| "learning_rate": 6.482158402237622e-06, |
| "loss": 0.3515, |
| "step": 914 |
| }, |
| { |
| "epoch": 4.176890156918688, |
| "grad_norm": 0.10030605637198417, |
| "learning_rate": 6.412703258166089e-06, |
| "loss": 0.3513, |
| "step": 915 |
| }, |
| { |
| "epoch": 4.181455064194009, |
| "grad_norm": 0.08816024777952751, |
| "learning_rate": 6.343589780255226e-06, |
| "loss": 0.3463, |
| "step": 916 |
| }, |
| { |
| "epoch": 4.18601997146933, |
| "grad_norm": 0.10373572668229089, |
| "learning_rate": 6.274818671560612e-06, |
| "loss": 0.3521, |
| "step": 917 |
| }, |
| { |
| "epoch": 4.190584878744651, |
| "grad_norm": 0.09661236890820955, |
| "learning_rate": 6.2063906316550944e-06, |
| "loss": 0.3535, |
| "step": 918 |
| }, |
| { |
| "epoch": 4.195149786019972, |
| "grad_norm": 0.09307921133377361, |
| "learning_rate": 6.138306356621666e-06, |
| "loss": 0.352, |
| "step": 919 |
| }, |
| { |
| "epoch": 4.1997146932952925, |
| "grad_norm": 0.09643011644388448, |
| "learning_rate": 6.0705665390463545e-06, |
| "loss": 0.3495, |
| "step": 920 |
| }, |
| { |
| "epoch": 4.204279600570613, |
| "grad_norm": 0.09380570498729787, |
| "learning_rate": 6.003171868011226e-06, |
| "loss": 0.351, |
| "step": 921 |
| }, |
| { |
| "epoch": 4.208844507845934, |
| "grad_norm": 0.09110098326246356, |
| "learning_rate": 5.9361230290873175e-06, |
| "loss": 0.3501, |
| "step": 922 |
| }, |
| { |
| "epoch": 4.213409415121255, |
| "grad_norm": 0.10109563700718154, |
| "learning_rate": 5.869420704327722e-06, |
| "loss": 0.3523, |
| "step": 923 |
| }, |
| { |
| "epoch": 4.217974322396576, |
| "grad_norm": 0.08878313714056194, |
| "learning_rate": 5.803065572260633e-06, |
| "loss": 0.3482, |
| "step": 924 |
| }, |
| { |
| "epoch": 4.222539229671897, |
| "grad_norm": 0.08818694187121523, |
| "learning_rate": 5.737058307882391e-06, |
| "loss": 0.3528, |
| "step": 925 |
| }, |
| { |
| "epoch": 4.227104136947219, |
| "grad_norm": 0.09295440449783292, |
| "learning_rate": 5.671399582650705e-06, |
| "loss": 0.3461, |
| "step": 926 |
| }, |
| { |
| "epoch": 4.2316690442225395, |
| "grad_norm": 0.09685863269312232, |
| "learning_rate": 5.606090064477738e-06, |
| "loss": 0.3503, |
| "step": 927 |
| }, |
| { |
| "epoch": 4.23623395149786, |
| "grad_norm": 0.09009726502122058, |
| "learning_rate": 5.541130417723359e-06, |
| "loss": 0.3439, |
| "step": 928 |
| }, |
| { |
| "epoch": 4.240798858773181, |
| "grad_norm": 0.08660914537757003, |
| "learning_rate": 5.476521303188414e-06, |
| "loss": 0.353, |
| "step": 929 |
| }, |
| { |
| "epoch": 4.245363766048502, |
| "grad_norm": 0.09086935202462741, |
| "learning_rate": 5.4122633781079135e-06, |
| "loss": 0.3523, |
| "step": 930 |
| }, |
| { |
| "epoch": 4.249928673323823, |
| "grad_norm": 0.09963483067768689, |
| "learning_rate": 5.348357296144437e-06, |
| "loss": 0.3528, |
| "step": 931 |
| }, |
| { |
| "epoch": 4.254493580599144, |
| "grad_norm": 0.08898518315403384, |
| "learning_rate": 5.2848037073814255e-06, |
| "loss": 0.3492, |
| "step": 932 |
| }, |
| { |
| "epoch": 4.259058487874465, |
| "grad_norm": 0.09983473387703683, |
| "learning_rate": 5.221603258316577e-06, |
| "loss": 0.3537, |
| "step": 933 |
| }, |
| { |
| "epoch": 4.263623395149786, |
| "grad_norm": 0.09231469544481466, |
| "learning_rate": 5.158756591855336e-06, |
| "loss": 0.3505, |
| "step": 934 |
| }, |
| { |
| "epoch": 4.268188302425107, |
| "grad_norm": 0.08932201492732685, |
| "learning_rate": 5.0962643473042536e-06, |
| "loss": 0.3506, |
| "step": 935 |
| }, |
| { |
| "epoch": 4.2727532097004275, |
| "grad_norm": 0.09165674817192716, |
| "learning_rate": 5.034127160364528e-06, |
| "loss": 0.3542, |
| "step": 936 |
| }, |
| { |
| "epoch": 4.277318116975749, |
| "grad_norm": 0.09295144892311318, |
| "learning_rate": 4.972345663125575e-06, |
| "loss": 0.3458, |
| "step": 937 |
| }, |
| { |
| "epoch": 4.28188302425107, |
| "grad_norm": 0.09773667025776654, |
| "learning_rate": 4.910920484058519e-06, |
| "loss": 0.3489, |
| "step": 938 |
| }, |
| { |
| "epoch": 4.286447931526391, |
| "grad_norm": 0.09478260876178425, |
| "learning_rate": 4.849852248009899e-06, |
| "loss": 0.3518, |
| "step": 939 |
| }, |
| { |
| "epoch": 4.291012838801712, |
| "grad_norm": 0.08633818761277473, |
| "learning_rate": 4.789141576195207e-06, |
| "loss": 0.3481, |
| "step": 940 |
| }, |
| { |
| "epoch": 4.295577746077033, |
| "grad_norm": 0.09290651843039628, |
| "learning_rate": 4.72878908619264e-06, |
| "loss": 0.3459, |
| "step": 941 |
| }, |
| { |
| "epoch": 4.300142653352354, |
| "grad_norm": 0.0901405739554656, |
| "learning_rate": 4.668795391936805e-06, |
| "loss": 0.3438, |
| "step": 942 |
| }, |
| { |
| "epoch": 4.304707560627675, |
| "grad_norm": 0.09095933811292778, |
| "learning_rate": 4.609161103712447e-06, |
| "loss": 0.35, |
| "step": 943 |
| }, |
| { |
| "epoch": 4.3092724679029955, |
| "grad_norm": 0.08514226082856571, |
| "learning_rate": 4.54988682814828e-06, |
| "loss": 0.3514, |
| "step": 944 |
| }, |
| { |
| "epoch": 4.313837375178316, |
| "grad_norm": 0.09103184637713459, |
| "learning_rate": 4.490973168210788e-06, |
| "loss": 0.351, |
| "step": 945 |
| }, |
| { |
| "epoch": 4.318402282453638, |
| "grad_norm": 0.08771070966673426, |
| "learning_rate": 4.43242072319809e-06, |
| "loss": 0.3514, |
| "step": 946 |
| }, |
| { |
| "epoch": 4.322967189728959, |
| "grad_norm": 0.08785642021314082, |
| "learning_rate": 4.374230088733855e-06, |
| "loss": 0.349, |
| "step": 947 |
| }, |
| { |
| "epoch": 4.32753209700428, |
| "grad_norm": 0.09441411098621398, |
| "learning_rate": 4.3164018567612495e-06, |
| "loss": 0.3505, |
| "step": 948 |
| }, |
| { |
| "epoch": 4.332097004279601, |
| "grad_norm": 0.08194712702951042, |
| "learning_rate": 4.2589366155369125e-06, |
| "loss": 0.3487, |
| "step": 949 |
| }, |
| { |
| "epoch": 4.336661911554922, |
| "grad_norm": 0.08937141345166866, |
| "learning_rate": 4.201834949624957e-06, |
| "loss": 0.3523, |
| "step": 950 |
| }, |
| { |
| "epoch": 4.3412268188302425, |
| "grad_norm": 0.08875313880651403, |
| "learning_rate": 4.145097439891026e-06, |
| "loss": 0.3529, |
| "step": 951 |
| }, |
| { |
| "epoch": 4.345791726105563, |
| "grad_norm": 0.08216164995036938, |
| "learning_rate": 4.088724663496391e-06, |
| "loss": 0.3487, |
| "step": 952 |
| }, |
| { |
| "epoch": 4.350356633380884, |
| "grad_norm": 0.0844163738277272, |
| "learning_rate": 4.032717193892097e-06, |
| "loss": 0.3536, |
| "step": 953 |
| }, |
| { |
| "epoch": 4.354921540656205, |
| "grad_norm": 0.07958475981719833, |
| "learning_rate": 3.977075600813112e-06, |
| "loss": 0.3524, |
| "step": 954 |
| }, |
| { |
| "epoch": 4.359486447931526, |
| "grad_norm": 0.08312472608098892, |
| "learning_rate": 3.921800450272497e-06, |
| "loss": 0.3564, |
| "step": 955 |
| }, |
| { |
| "epoch": 4.364051355206847, |
| "grad_norm": 0.09090031843488945, |
| "learning_rate": 3.866892304555729e-06, |
| "loss": 0.3503, |
| "step": 956 |
| }, |
| { |
| "epoch": 4.368616262482169, |
| "grad_norm": 0.08646905004538202, |
| "learning_rate": 3.8123517222149064e-06, |
| "loss": 0.3539, |
| "step": 957 |
| }, |
| { |
| "epoch": 4.37318116975749, |
| "grad_norm": 0.08630929584464324, |
| "learning_rate": 3.7581792580630995e-06, |
| "loss": 0.3508, |
| "step": 958 |
| }, |
| { |
| "epoch": 4.3777460770328105, |
| "grad_norm": 0.08420773366472539, |
| "learning_rate": 3.7043754631687168e-06, |
| "loss": 0.3506, |
| "step": 959 |
| }, |
| { |
| "epoch": 4.382310984308131, |
| "grad_norm": 0.08626254558605037, |
| "learning_rate": 3.650940884849865e-06, |
| "loss": 0.3496, |
| "step": 960 |
| }, |
| { |
| "epoch": 4.386875891583452, |
| "grad_norm": 0.08202547191435747, |
| "learning_rate": 3.5978760666688283e-06, |
| "loss": 0.3463, |
| "step": 961 |
| }, |
| { |
| "epoch": 4.391440798858773, |
| "grad_norm": 0.08400830894322005, |
| "learning_rate": 3.545181548426482e-06, |
| "loss": 0.3534, |
| "step": 962 |
| }, |
| { |
| "epoch": 4.396005706134094, |
| "grad_norm": 0.08174812448321671, |
| "learning_rate": 3.4928578661568513e-06, |
| "loss": 0.3505, |
| "step": 963 |
| }, |
| { |
| "epoch": 4.400570613409415, |
| "grad_norm": 0.08855138380230874, |
| "learning_rate": 3.4409055521216472e-06, |
| "loss": 0.3484, |
| "step": 964 |
| }, |
| { |
| "epoch": 4.405135520684736, |
| "grad_norm": 0.08806266323171609, |
| "learning_rate": 3.3893251348048107e-06, |
| "loss": 0.3517, |
| "step": 965 |
| }, |
| { |
| "epoch": 4.409700427960057, |
| "grad_norm": 0.08598387207772194, |
| "learning_rate": 3.3381171389072155e-06, |
| "loss": 0.3464, |
| "step": 966 |
| }, |
| { |
| "epoch": 4.414265335235378, |
| "grad_norm": 0.08257564200380783, |
| "learning_rate": 3.287282085341237e-06, |
| "loss": 0.3434, |
| "step": 967 |
| }, |
| { |
| "epoch": 4.418830242510699, |
| "grad_norm": 0.08050286636135041, |
| "learning_rate": 3.236820491225543e-06, |
| "loss": 0.3507, |
| "step": 968 |
| }, |
| { |
| "epoch": 4.42339514978602, |
| "grad_norm": 0.0804766651869741, |
| "learning_rate": 3.1867328698797784e-06, |
| "loss": 0.3559, |
| "step": 969 |
| }, |
| { |
| "epoch": 4.427960057061341, |
| "grad_norm": 0.08321920927722377, |
| "learning_rate": 3.1370197308193464e-06, |
| "loss": 0.3495, |
| "step": 970 |
| }, |
| { |
| "epoch": 4.432524964336662, |
| "grad_norm": 0.08340309487334537, |
| "learning_rate": 3.08768157975023e-06, |
| "loss": 0.3485, |
| "step": 971 |
| }, |
| { |
| "epoch": 4.437089871611983, |
| "grad_norm": 0.08628533559205914, |
| "learning_rate": 3.0387189185638877e-06, |
| "loss": 0.3465, |
| "step": 972 |
| }, |
| { |
| "epoch": 4.441654778887304, |
| "grad_norm": 0.0839367124944577, |
| "learning_rate": 2.99013224533208e-06, |
| "loss": 0.3514, |
| "step": 973 |
| }, |
| { |
| "epoch": 4.446219686162625, |
| "grad_norm": 0.08431513019450357, |
| "learning_rate": 2.9419220543018647e-06, |
| "loss": 0.35, |
| "step": 974 |
| }, |
| { |
| "epoch": 4.4507845934379455, |
| "grad_norm": 0.08138210314265082, |
| "learning_rate": 2.894088835890512e-06, |
| "loss": 0.3503, |
| "step": 975 |
| }, |
| { |
| "epoch": 4.455349500713266, |
| "grad_norm": 0.08209216743333449, |
| "learning_rate": 2.846633076680565e-06, |
| "loss": 0.3501, |
| "step": 976 |
| }, |
| { |
| "epoch": 4.459914407988588, |
| "grad_norm": 0.07980157930120314, |
| "learning_rate": 2.7995552594148613e-06, |
| "loss": 0.3477, |
| "step": 977 |
| }, |
| { |
| "epoch": 4.464479315263909, |
| "grad_norm": 0.08454274847735264, |
| "learning_rate": 2.7528558629916457e-06, |
| "loss": 0.3508, |
| "step": 978 |
| }, |
| { |
| "epoch": 4.46904422253923, |
| "grad_norm": 0.07827269612143122, |
| "learning_rate": 2.706535362459657e-06, |
| "loss": 0.3541, |
| "step": 979 |
| }, |
| { |
| "epoch": 4.473609129814551, |
| "grad_norm": 0.08066212537936188, |
| "learning_rate": 2.6605942290133515e-06, |
| "loss": 0.3468, |
| "step": 980 |
| }, |
| { |
| "epoch": 4.478174037089872, |
| "grad_norm": 0.08038509522306664, |
| "learning_rate": 2.615032929988055e-06, |
| "loss": 0.3493, |
| "step": 981 |
| }, |
| { |
| "epoch": 4.482738944365193, |
| "grad_norm": 0.08121370144409773, |
| "learning_rate": 2.569851928855256e-06, |
| "loss": 0.3486, |
| "step": 982 |
| }, |
| { |
| "epoch": 4.4873038516405135, |
| "grad_norm": 0.07856168793082442, |
| "learning_rate": 2.525051685217865e-06, |
| "loss": 0.3507, |
| "step": 983 |
| }, |
| { |
| "epoch": 4.491868758915834, |
| "grad_norm": 0.07958873197319534, |
| "learning_rate": 2.4806326548055238e-06, |
| "loss": 0.3493, |
| "step": 984 |
| }, |
| { |
| "epoch": 4.496433666191155, |
| "grad_norm": 0.08171369971311804, |
| "learning_rate": 2.436595289470023e-06, |
| "loss": 0.3508, |
| "step": 985 |
| }, |
| { |
| "epoch": 4.500998573466476, |
| "grad_norm": 0.08120306757647351, |
| "learning_rate": 2.3929400371806377e-06, |
| "loss": 0.3521, |
| "step": 986 |
| }, |
| { |
| "epoch": 4.505563480741797, |
| "grad_norm": 0.07637699134693174, |
| "learning_rate": 2.3496673420196326e-06, |
| "loss": 0.3466, |
| "step": 987 |
| }, |
| { |
| "epoch": 4.510128388017119, |
| "grad_norm": 0.08223264882483422, |
| "learning_rate": 2.306777644177709e-06, |
| "loss": 0.3507, |
| "step": 988 |
| }, |
| { |
| "epoch": 4.51469329529244, |
| "grad_norm": 0.07915711031881265, |
| "learning_rate": 2.2642713799495207e-06, |
| "loss": 0.3554, |
| "step": 989 |
| }, |
| { |
| "epoch": 4.519258202567761, |
| "grad_norm": 0.08144647282074215, |
| "learning_rate": 2.222148981729273e-06, |
| "loss": 0.3529, |
| "step": 990 |
| }, |
| { |
| "epoch": 4.5238231098430814, |
| "grad_norm": 0.08053252018883734, |
| "learning_rate": 2.1804108780062805e-06, |
| "loss": 0.3464, |
| "step": 991 |
| }, |
| { |
| "epoch": 4.528388017118402, |
| "grad_norm": 0.08178510466321848, |
| "learning_rate": 2.139057493360643e-06, |
| "loss": 0.3477, |
| "step": 992 |
| }, |
| { |
| "epoch": 4.532952924393723, |
| "grad_norm": 0.08178556560050178, |
| "learning_rate": 2.098089248458912e-06, |
| "loss": 0.3485, |
| "step": 993 |
| }, |
| { |
| "epoch": 4.537517831669044, |
| "grad_norm": 0.08220979435959991, |
| "learning_rate": 2.0575065600498067e-06, |
| "loss": 0.3517, |
| "step": 994 |
| }, |
| { |
| "epoch": 4.542082738944365, |
| "grad_norm": 0.07762389992501054, |
| "learning_rate": 2.0173098409599757e-06, |
| "loss": 0.3504, |
| "step": 995 |
| }, |
| { |
| "epoch": 4.546647646219686, |
| "grad_norm": 0.07845697347936922, |
| "learning_rate": 1.977499500089808e-06, |
| "loss": 0.3473, |
| "step": 996 |
| }, |
| { |
| "epoch": 4.551212553495007, |
| "grad_norm": 0.08243028759784551, |
| "learning_rate": 1.9380759424092722e-06, |
| "loss": 0.3488, |
| "step": 997 |
| }, |
| { |
| "epoch": 4.555777460770328, |
| "grad_norm": 0.08482588178810972, |
| "learning_rate": 1.899039568953782e-06, |
| "loss": 0.3485, |
| "step": 998 |
| }, |
| { |
| "epoch": 4.560342368045649, |
| "grad_norm": 0.08930393433101652, |
| "learning_rate": 1.8603907768201335e-06, |
| "loss": 0.3477, |
| "step": 999 |
| }, |
| { |
| "epoch": 4.56490727532097, |
| "grad_norm": 0.07646082270023902, |
| "learning_rate": 1.8221299591624531e-06, |
| "loss": 0.3541, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.569472182596291, |
| "grad_norm": 0.08228826529784623, |
| "learning_rate": 1.7842575051882117e-06, |
| "loss": 0.3455, |
| "step": 1001 |
| }, |
| { |
| "epoch": 4.574037089871612, |
| "grad_norm": 0.0826320638970819, |
| "learning_rate": 1.7467738001542534e-06, |
| "loss": 0.3541, |
| "step": 1002 |
| }, |
| { |
| "epoch": 4.578601997146933, |
| "grad_norm": 0.07749734600577586, |
| "learning_rate": 1.7096792253628747e-06, |
| "loss": 0.3508, |
| "step": 1003 |
| }, |
| { |
| "epoch": 4.583166904422254, |
| "grad_norm": 0.07849844115759268, |
| "learning_rate": 1.6729741581579695e-06, |
| "loss": 0.3483, |
| "step": 1004 |
| }, |
| { |
| "epoch": 4.587731811697575, |
| "grad_norm": 0.08061411035349854, |
| "learning_rate": 1.6366589719211478e-06, |
| "loss": 0.3459, |
| "step": 1005 |
| }, |
| { |
| "epoch": 4.592296718972896, |
| "grad_norm": 0.07707354315110565, |
| "learning_rate": 1.6007340360679835e-06, |
| "loss": 0.3472, |
| "step": 1006 |
| }, |
| { |
| "epoch": 4.5968616262482165, |
| "grad_norm": 0.07564998473974219, |
| "learning_rate": 1.56519971604423e-06, |
| "loss": 0.3536, |
| "step": 1007 |
| }, |
| { |
| "epoch": 4.601426533523538, |
| "grad_norm": 0.07973487309962904, |
| "learning_rate": 1.5300563733220997e-06, |
| "loss": 0.3524, |
| "step": 1008 |
| }, |
| { |
| "epoch": 4.605991440798859, |
| "grad_norm": 0.0798405736175932, |
| "learning_rate": 1.4953043653966125e-06, |
| "loss": 0.3437, |
| "step": 1009 |
| }, |
| { |
| "epoch": 4.61055634807418, |
| "grad_norm": 0.07661822938491461, |
| "learning_rate": 1.4609440457819201e-06, |
| "loss": 0.3505, |
| "step": 1010 |
| }, |
| { |
| "epoch": 4.615121255349501, |
| "grad_norm": 0.0803917140814825, |
| "learning_rate": 1.4269757640077474e-06, |
| "loss": 0.3473, |
| "step": 1011 |
| }, |
| { |
| "epoch": 4.619686162624822, |
| "grad_norm": 0.08142790095124294, |
| "learning_rate": 1.393399865615832e-06, |
| "loss": 0.356, |
| "step": 1012 |
| }, |
| { |
| "epoch": 4.624251069900143, |
| "grad_norm": 0.07941212240240511, |
| "learning_rate": 1.3602166921563709e-06, |
| "loss": 0.3469, |
| "step": 1013 |
| }, |
| { |
| "epoch": 4.628815977175464, |
| "grad_norm": 0.07567140453271615, |
| "learning_rate": 1.3274265811845877e-06, |
| "loss": 0.3565, |
| "step": 1014 |
| }, |
| { |
| "epoch": 4.633380884450784, |
| "grad_norm": 0.07693089631655367, |
| "learning_rate": 1.2950298662572914e-06, |
| "loss": 0.3511, |
| "step": 1015 |
| }, |
| { |
| "epoch": 4.637945791726105, |
| "grad_norm": 0.07649326760766532, |
| "learning_rate": 1.2630268769294695e-06, |
| "loss": 0.3488, |
| "step": 1016 |
| }, |
| { |
| "epoch": 4.642510699001426, |
| "grad_norm": 0.07907653808032476, |
| "learning_rate": 1.2314179387509451e-06, |
| "loss": 0.3498, |
| "step": 1017 |
| }, |
| { |
| "epoch": 4.647075606276747, |
| "grad_norm": 0.08218341042555163, |
| "learning_rate": 1.2002033732630624e-06, |
| "loss": 0.3456, |
| "step": 1018 |
| }, |
| { |
| "epoch": 4.651640513552069, |
| "grad_norm": 0.078114066869552, |
| "learning_rate": 1.169383497995411e-06, |
| "loss": 0.3519, |
| "step": 1019 |
| }, |
| { |
| "epoch": 4.65620542082739, |
| "grad_norm": 0.08045459715620887, |
| "learning_rate": 1.1389586264626141e-06, |
| "loss": 0.3509, |
| "step": 1020 |
| }, |
| { |
| "epoch": 4.660770328102711, |
| "grad_norm": 0.07667337101962392, |
| "learning_rate": 1.108929068161122e-06, |
| "loss": 0.3497, |
| "step": 1021 |
| }, |
| { |
| "epoch": 4.6653352353780315, |
| "grad_norm": 0.08000069107101468, |
| "learning_rate": 1.0792951285660601e-06, |
| "loss": 0.3496, |
| "step": 1022 |
| }, |
| { |
| "epoch": 4.669900142653352, |
| "grad_norm": 0.07789612908736167, |
| "learning_rate": 1.0500571091281375e-06, |
| "loss": 0.3493, |
| "step": 1023 |
| }, |
| { |
| "epoch": 4.674465049928673, |
| "grad_norm": 0.08267558856337517, |
| "learning_rate": 1.0212153072705732e-06, |
| "loss": 0.3479, |
| "step": 1024 |
| }, |
| { |
| "epoch": 4.679029957203994, |
| "grad_norm": 0.07420363546139669, |
| "learning_rate": 9.927700163860642e-07, |
| "loss": 0.3533, |
| "step": 1025 |
| }, |
| { |
| "epoch": 4.683594864479315, |
| "grad_norm": 0.07447011872925986, |
| "learning_rate": 9.647215258338138e-07, |
| "loss": 0.3546, |
| "step": 1026 |
| }, |
| { |
| "epoch": 4.688159771754636, |
| "grad_norm": 0.07716250968421168, |
| "learning_rate": 9.370701209365784e-07, |
| "loss": 0.3482, |
| "step": 1027 |
| }, |
| { |
| "epoch": 4.692724679029957, |
| "grad_norm": 0.07514426209617693, |
| "learning_rate": 9.098160829777724e-07, |
| "loss": 0.3455, |
| "step": 1028 |
| }, |
| { |
| "epoch": 4.697289586305278, |
| "grad_norm": 0.079548149699182, |
| "learning_rate": 8.829596891985859e-07, |
| "loss": 0.3511, |
| "step": 1029 |
| }, |
| { |
| "epoch": 4.7018544935805995, |
| "grad_norm": 0.07934376344480484, |
| "learning_rate": 8.565012127951955e-07, |
| "loss": 0.3519, |
| "step": 1030 |
| }, |
| { |
| "epoch": 4.70641940085592, |
| "grad_norm": 0.07581124441915535, |
| "learning_rate": 8.304409229159804e-07, |
| "loss": 0.346, |
| "step": 1031 |
| }, |
| { |
| "epoch": 4.710984308131241, |
| "grad_norm": 0.07394023460690179, |
| "learning_rate": 8.047790846587467e-07, |
| "loss": 0.3533, |
| "step": 1032 |
| }, |
| { |
| "epoch": 4.715549215406562, |
| "grad_norm": 0.07412942941301379, |
| "learning_rate": 7.7951595906808e-07, |
| "loss": 0.3525, |
| "step": 1033 |
| }, |
| { |
| "epoch": 4.720114122681883, |
| "grad_norm": 0.07682082219491011, |
| "learning_rate": 7.546518031326644e-07, |
| "loss": 0.3515, |
| "step": 1034 |
| }, |
| { |
| "epoch": 4.724679029957204, |
| "grad_norm": 0.07823025700829145, |
| "learning_rate": 7.301868697826608e-07, |
| "loss": 0.3492, |
| "step": 1035 |
| }, |
| { |
| "epoch": 4.729243937232525, |
| "grad_norm": 0.0766178375837658, |
| "learning_rate": 7.061214078871725e-07, |
| "loss": 0.3519, |
| "step": 1036 |
| }, |
| { |
| "epoch": 4.733808844507846, |
| "grad_norm": 0.0760779964551882, |
| "learning_rate": 6.824556622516599e-07, |
| "loss": 0.3512, |
| "step": 1037 |
| }, |
| { |
| "epoch": 4.7383737517831666, |
| "grad_norm": 0.07486994881455176, |
| "learning_rate": 6.591898736154801e-07, |
| "loss": 0.3508, |
| "step": 1038 |
| }, |
| { |
| "epoch": 4.742938659058488, |
| "grad_norm": 0.0759371546012345, |
| "learning_rate": 6.363242786494539e-07, |
| "loss": 0.3489, |
| "step": 1039 |
| }, |
| { |
| "epoch": 4.747503566333809, |
| "grad_norm": 0.0749018727178018, |
| "learning_rate": 6.138591099534141e-07, |
| "loss": 0.3493, |
| "step": 1040 |
| }, |
| { |
| "epoch": 4.75206847360913, |
| "grad_norm": 0.0753265709026964, |
| "learning_rate": 5.917945960538918e-07, |
| "loss": 0.3466, |
| "step": 1041 |
| }, |
| { |
| "epoch": 4.756633380884451, |
| "grad_norm": 0.07840244976592427, |
| "learning_rate": 5.701309614017447e-07, |
| "loss": 0.3505, |
| "step": 1042 |
| }, |
| { |
| "epoch": 4.761198288159772, |
| "grad_norm": 0.07383075199179212, |
| "learning_rate": 5.488684263698929e-07, |
| "loss": 0.3536, |
| "step": 1043 |
| }, |
| { |
| "epoch": 4.765763195435093, |
| "grad_norm": 0.11122241090091968, |
| "learning_rate": 5.280072072510933e-07, |
| "loss": 0.3461, |
| "step": 1044 |
| }, |
| { |
| "epoch": 4.770328102710414, |
| "grad_norm": 0.0762041664722857, |
| "learning_rate": 5.075475162557109e-07, |
| "loss": 0.3506, |
| "step": 1045 |
| }, |
| { |
| "epoch": 4.7748930099857345, |
| "grad_norm": 0.07751822838032038, |
| "learning_rate": 4.874895615095776e-07, |
| "loss": 0.3492, |
| "step": 1046 |
| }, |
| { |
| "epoch": 4.779457917261055, |
| "grad_norm": 0.0719584207148954, |
| "learning_rate": 4.6783354705187466e-07, |
| "loss": 0.3466, |
| "step": 1047 |
| }, |
| { |
| "epoch": 4.784022824536376, |
| "grad_norm": 0.07737673543131036, |
| "learning_rate": 4.485796728330449e-07, |
| "loss": 0.3541, |
| "step": 1048 |
| }, |
| { |
| "epoch": 4.788587731811697, |
| "grad_norm": 0.07610517221913907, |
| "learning_rate": 4.29728134712768e-07, |
| "loss": 0.3515, |
| "step": 1049 |
| }, |
| { |
| "epoch": 4.793152639087019, |
| "grad_norm": 0.07503429819011145, |
| "learning_rate": 4.11279124457975e-07, |
| "loss": 0.3499, |
| "step": 1050 |
| }, |
| { |
| "epoch": 4.79771754636234, |
| "grad_norm": 0.07627319972442853, |
| "learning_rate": 3.9323282974088164e-07, |
| "loss": 0.3487, |
| "step": 1051 |
| }, |
| { |
| "epoch": 4.802282453637661, |
| "grad_norm": 0.07282031998275167, |
| "learning_rate": 3.7558943413709583e-07, |
| "loss": 0.3465, |
| "step": 1052 |
| }, |
| { |
| "epoch": 4.806847360912982, |
| "grad_norm": 0.07525573928294778, |
| "learning_rate": 3.5834911712373076e-07, |
| "loss": 0.3488, |
| "step": 1053 |
| }, |
| { |
| "epoch": 4.8114122681883025, |
| "grad_norm": 0.07245362406747542, |
| "learning_rate": 3.4151205407759736e-07, |
| "loss": 0.3515, |
| "step": 1054 |
| }, |
| { |
| "epoch": 4.815977175463623, |
| "grad_norm": 0.0732430683885161, |
| "learning_rate": 3.2507841627341e-07, |
| "loss": 0.3531, |
| "step": 1055 |
| }, |
| { |
| "epoch": 4.820542082738944, |
| "grad_norm": 0.0745846212106986, |
| "learning_rate": 3.090483708820502e-07, |
| "loss": 0.3497, |
| "step": 1056 |
| }, |
| { |
| "epoch": 4.825106990014265, |
| "grad_norm": 0.07347343844152147, |
| "learning_rate": 2.934220809688526e-07, |
| "loss": 0.3518, |
| "step": 1057 |
| }, |
| { |
| "epoch": 4.829671897289586, |
| "grad_norm": 0.07558186917350596, |
| "learning_rate": 2.7819970549197937e-07, |
| "loss": 0.3511, |
| "step": 1058 |
| }, |
| { |
| "epoch": 4.834236804564907, |
| "grad_norm": 0.07275618304565004, |
| "learning_rate": 2.63381399300755e-07, |
| "loss": 0.3472, |
| "step": 1059 |
| }, |
| { |
| "epoch": 4.838801711840228, |
| "grad_norm": 0.07287512649205556, |
| "learning_rate": 2.489673131341297e-07, |
| "loss": 0.3462, |
| "step": 1060 |
| }, |
| { |
| "epoch": 4.8433666191155496, |
| "grad_norm": 0.07194032638043274, |
| "learning_rate": 2.349575936191384e-07, |
| "loss": 0.3476, |
| "step": 1061 |
| }, |
| { |
| "epoch": 4.84793152639087, |
| "grad_norm": 0.07547182977690173, |
| "learning_rate": 2.2135238326938646e-07, |
| "loss": 0.3471, |
| "step": 1062 |
| }, |
| { |
| "epoch": 4.852496433666191, |
| "grad_norm": 0.07389752198521447, |
| "learning_rate": 2.0815182048362858e-07, |
| "loss": 0.3511, |
| "step": 1063 |
| }, |
| { |
| "epoch": 4.857061340941512, |
| "grad_norm": 0.07948970245950392, |
| "learning_rate": 1.953560395443521e-07, |
| "loss": 0.3515, |
| "step": 1064 |
| }, |
| { |
| "epoch": 4.861626248216833, |
| "grad_norm": 0.07428844813444034, |
| "learning_rate": 1.829651706164004e-07, |
| "loss": 0.3481, |
| "step": 1065 |
| }, |
| { |
| "epoch": 4.866191155492154, |
| "grad_norm": 0.07304235969500948, |
| "learning_rate": 1.7097933974566272e-07, |
| "loss": 0.3553, |
| "step": 1066 |
| }, |
| { |
| "epoch": 4.870756062767475, |
| "grad_norm": 0.07616979320061058, |
| "learning_rate": 1.5939866885778198e-07, |
| "loss": 0.3532, |
| "step": 1067 |
| }, |
| { |
| "epoch": 4.875320970042796, |
| "grad_norm": 0.07825400387440254, |
| "learning_rate": 1.4822327575692464e-07, |
| "loss": 0.3479, |
| "step": 1068 |
| }, |
| { |
| "epoch": 4.879885877318117, |
| "grad_norm": 0.08117745285619632, |
| "learning_rate": 1.374532741245682e-07, |
| "loss": 0.3512, |
| "step": 1069 |
| }, |
| { |
| "epoch": 4.884450784593438, |
| "grad_norm": 0.07977853089608672, |
| "learning_rate": 1.2708877351835569e-07, |
| "loss": 0.3485, |
| "step": 1070 |
| }, |
| { |
| "epoch": 4.889015691868759, |
| "grad_norm": 0.07367012787142511, |
| "learning_rate": 1.1712987937098519e-07, |
| "loss": 0.3518, |
| "step": 1071 |
| }, |
| { |
| "epoch": 4.89358059914408, |
| "grad_norm": 0.07383920187031665, |
| "learning_rate": 1.0757669298912199e-07, |
| "loss": 0.3478, |
| "step": 1072 |
| }, |
| { |
| "epoch": 4.898145506419401, |
| "grad_norm": 0.07294907888019937, |
| "learning_rate": 9.842931155238156e-08, |
| "loss": 0.3528, |
| "step": 1073 |
| }, |
| { |
| "epoch": 4.902710413694722, |
| "grad_norm": 0.07451910212338729, |
| "learning_rate": 8.96878281123259e-08, |
| "loss": 0.349, |
| "step": 1074 |
| }, |
| { |
| "epoch": 4.907275320970043, |
| "grad_norm": 0.07200140543046635, |
| "learning_rate": 8.135233159154431e-08, |
| "loss": 0.3485, |
| "step": 1075 |
| }, |
| { |
| "epoch": 4.911840228245364, |
| "grad_norm": 0.07562528202585733, |
| "learning_rate": 7.342290678272079e-08, |
| "loss": 0.3498, |
| "step": 1076 |
| }, |
| { |
| "epoch": 4.916405135520685, |
| "grad_norm": 0.07457317771382703, |
| "learning_rate": 6.58996343477769e-08, |
| "loss": 0.3537, |
| "step": 1077 |
| }, |
| { |
| "epoch": 4.9209700427960055, |
| "grad_norm": 0.07363984134436599, |
| "learning_rate": 5.878259081707249e-08, |
| "loss": 0.3543, |
| "step": 1078 |
| }, |
| { |
| "epoch": 4.925534950071326, |
| "grad_norm": 0.07252929837329791, |
| "learning_rate": 5.2071848588601815e-08, |
| "loss": 0.3473, |
| "step": 1079 |
| }, |
| { |
| "epoch": 4.930099857346647, |
| "grad_norm": 0.07518239735309995, |
| "learning_rate": 4.576747592726083e-08, |
| "loss": 0.3503, |
| "step": 1080 |
| }, |
| { |
| "epoch": 4.934664764621969, |
| "grad_norm": 0.07799050676264847, |
| "learning_rate": 3.9869536964167734e-08, |
| "loss": 0.3548, |
| "step": 1081 |
| }, |
| { |
| "epoch": 4.93922967189729, |
| "grad_norm": 0.075339876329051, |
| "learning_rate": 3.437809169600126e-08, |
| "loss": 0.3502, |
| "step": 1082 |
| }, |
| { |
| "epoch": 4.943794579172611, |
| "grad_norm": 0.07479700944498346, |
| "learning_rate": 2.9293195984383405e-08, |
| "loss": 0.3468, |
| "step": 1083 |
| }, |
| { |
| "epoch": 4.948359486447932, |
| "grad_norm": 0.07210974118530611, |
| "learning_rate": 2.461490155532875e-08, |
| "loss": 0.3507, |
| "step": 1084 |
| }, |
| { |
| "epoch": 4.9529243937232525, |
| "grad_norm": 0.07652036354657446, |
| "learning_rate": 2.03432559986938e-08, |
| "loss": 0.3476, |
| "step": 1085 |
| }, |
| { |
| "epoch": 4.957489300998573, |
| "grad_norm": 0.07127307671646077, |
| "learning_rate": 1.6478302767719555e-08, |
| "loss": 0.3504, |
| "step": 1086 |
| }, |
| { |
| "epoch": 4.962054208273894, |
| "grad_norm": 0.07366450733389644, |
| "learning_rate": 1.3020081178574117e-08, |
| "loss": 0.3488, |
| "step": 1087 |
| }, |
| { |
| "epoch": 4.966619115549215, |
| "grad_norm": 0.07323536126136222, |
| "learning_rate": 9.968626409948556e-09, |
| "loss": 0.353, |
| "step": 1088 |
| }, |
| { |
| "epoch": 4.971184022824536, |
| "grad_norm": 0.07340553491000143, |
| "learning_rate": 7.323969502710526e-09, |
| "loss": 0.3501, |
| "step": 1089 |
| }, |
| { |
| "epoch": 4.975748930099857, |
| "grad_norm": 0.07588823565965415, |
| "learning_rate": 5.0861373595889605e-09, |
| "loss": 0.349, |
| "step": 1090 |
| }, |
| { |
| "epoch": 4.980313837375178, |
| "grad_norm": 0.07676438153761625, |
| "learning_rate": 3.255152744885415e-09, |
| "loss": 0.3526, |
| "step": 1091 |
| }, |
| { |
| "epoch": 4.9848787446505, |
| "grad_norm": 0.07181335005694361, |
| "learning_rate": 1.831034284260902e-09, |
| "loss": 0.3477, |
| "step": 1092 |
| }, |
| { |
| "epoch": 4.9894436519258205, |
| "grad_norm": 0.07171739407306418, |
| "learning_rate": 8.137964645316132e-10, |
| "loss": 0.3434, |
| "step": 1093 |
| }, |
| { |
| "epoch": 4.994008559201141, |
| "grad_norm": 0.07293042506861117, |
| "learning_rate": 2.0344963353124969e-10, |
| "loss": 0.3491, |
| "step": 1094 |
| }, |
| { |
| "epoch": 4.998573466476462, |
| "grad_norm": 0.07498693212494786, |
| "learning_rate": 0.0, |
| "loss": 0.3471, |
| "step": 1095 |
| }, |
| { |
| "epoch": 4.998573466476462, |
| "step": 1095, |
| "total_flos": 2.8177610658514207e+19, |
| "train_loss": 0.41344334662777105, |
| "train_runtime": 239120.7765, |
| "train_samples_per_second": 2.345, |
| "train_steps_per_second": 0.005 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1095, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.8177610658514207e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|