diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.5252605122701985, + "epoch": 0.7704762173712499, "eval_steps": 500, - "global_step": 223200, + "global_step": 327400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -312487,6 +312487,145886 @@ "learning_rate": 0.00016785002554023598, "loss": 2.0624, "step": 223200 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016784866777342625, + "loss": 2.0614, + "step": 223205 + }, + { + "epoch": 0.53, + "grad_norm": 2.46875, + "learning_rate": 0.00016784730998343816, + "loss": 2.115, + "step": 223210 + }, + { + "epoch": 0.53, + "grad_norm": 2.96875, + "learning_rate": 0.00016784595217027222, + "loss": 2.0661, + "step": 223215 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016784459433392886, + "loss": 1.9062, + "step": 223220 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.00016784323647440856, + "loss": 2.2478, + "step": 223225 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.00016784187859171176, + "loss": 2.152, + "step": 223230 + }, + { + "epoch": 0.53, + "grad_norm": 1.8984375, + "learning_rate": 0.00016784052068583897, + "loss": 2.1974, + "step": 223235 + }, + { + "epoch": 0.53, + "grad_norm": 1.9765625, + "learning_rate": 0.0001678391627567906, + "loss": 2.0411, + "step": 223240 + }, + { + "epoch": 0.53, + "grad_norm": 2.546875, + "learning_rate": 0.0001678378048045672, + "loss": 2.0948, + "step": 223245 + }, + { + "epoch": 0.53, + "grad_norm": 2.484375, + "learning_rate": 0.0001678364468291691, + "loss": 2.1167, + "step": 223250 + }, + { + "epoch": 0.53, + "grad_norm": 2.40625, + "learning_rate": 0.0001678350888305969, + "loss": 2.1689, + "step": 223255 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016783373080885098, + "loss": 2.2333, + "step": 223260 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016783237276393184, + "loss": 2.3798, + "step": 223265 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.0001678310146958399, + "loss": 1.9692, + "step": 223270 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016782965660457565, + "loss": 2.1187, + "step": 223275 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.00016782829849013958, + "loss": 1.9444, + "step": 223280 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016782694035253216, + "loss": 2.1884, + "step": 223285 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.00016782558219175382, + "loss": 2.014, + "step": 223290 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.000167824224007805, + "loss": 2.092, + "step": 223295 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016782286580068622, + "loss": 2.2491, + "step": 223300 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001678215075703979, + "loss": 2.1327, + "step": 223305 + }, + { + "epoch": 0.53, + "grad_norm": 1.9765625, + "learning_rate": 0.00016782014931694054, + "loss": 1.889, + "step": 223310 + }, + { + "epoch": 0.53, + "grad_norm": 1.8671875, + "learning_rate": 0.00016781879104031458, + "loss": 2.0293, + "step": 223315 + }, + { + "epoch": 0.53, + "grad_norm": 2.59375, + "learning_rate": 0.00016781743274052048, + "loss": 2.1208, + "step": 223320 + }, + { + "epoch": 0.53, + "grad_norm": 2.46875, + "learning_rate": 0.00016781607441755872, + "loss": 2.1021, + "step": 223325 + }, + { + "epoch": 0.53, + "grad_norm": 1.8515625, + "learning_rate": 0.00016781471607142976, + "loss": 1.8657, + "step": 223330 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.00016781335770213408, + "loss": 2.1163, + "step": 223335 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016781199930967212, + "loss": 2.1405, + "step": 223340 + }, + { + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 0.00016781064089404436, + "loss": 2.2499, + "step": 223345 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.0001678092824552512, + "loss": 2.0042, + "step": 223350 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016780792399329322, + "loss": 2.0092, + "step": 223355 + }, + { + "epoch": 0.53, + "grad_norm": 1.9453125, + "learning_rate": 0.00016780656550817078, + "loss": 2.2444, + "step": 223360 + }, + { + "epoch": 0.53, + "grad_norm": 1.9140625, + "learning_rate": 0.00016780520699988442, + "loss": 2.0866, + "step": 223365 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016780384846843454, + "loss": 2.298, + "step": 223370 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.0001678024899138217, + "loss": 2.0394, + "step": 223375 + }, + { + "epoch": 0.53, + "grad_norm": 1.953125, + "learning_rate": 0.00016780113133604626, + "loss": 2.1933, + "step": 223380 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.0001677997727351087, + "loss": 2.0903, + "step": 223385 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016779841411100954, + "loss": 1.9954, + "step": 223390 + }, + { + "epoch": 0.53, + "grad_norm": 1.71875, + "learning_rate": 0.0001677970554637492, + "loss": 2.2318, + "step": 223395 + }, + { + "epoch": 0.53, + "grad_norm": 2.609375, + "learning_rate": 0.00016779569679332816, + "loss": 2.034, + "step": 223400 + }, + { + "epoch": 0.53, + "grad_norm": 1.765625, + "learning_rate": 0.00016779433809974688, + "loss": 1.9693, + "step": 223405 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.0001677929793830058, + "loss": 1.8557, + "step": 223410 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016779162064310544, + "loss": 2.1375, + "step": 223415 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.00016779026188004626, + "loss": 2.1387, + "step": 223420 + }, + { + "epoch": 0.53, + "grad_norm": 1.953125, + "learning_rate": 0.00016778890309382863, + "loss": 2.176, + "step": 223425 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.00016778754428445312, + "loss": 2.0683, + "step": 223430 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016778618545192016, + "loss": 2.1118, + "step": 223435 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016778482659623016, + "loss": 2.1417, + "step": 223440 + }, + { + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 0.0001677834677173837, + "loss": 2.0838, + "step": 223445 + }, + { + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 0.00016778210881538116, + "loss": 2.0649, + "step": 223450 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016778074989022302, + "loss": 1.9166, + "step": 223455 + }, + { + "epoch": 0.53, + "grad_norm": 3.625, + "learning_rate": 0.00016777939094190974, + "loss": 2.1928, + "step": 223460 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016777803197044175, + "loss": 2.0387, + "step": 223465 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016777667297581962, + "loss": 2.1306, + "step": 223470 + }, + { + "epoch": 0.53, + "grad_norm": 1.90625, + "learning_rate": 0.00016777531395804373, + "loss": 2.2465, + "step": 223475 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.00016777395491711453, + "loss": 2.2141, + "step": 223480 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.00016777259585303256, + "loss": 1.9562, + "step": 223485 + }, + { + "epoch": 0.53, + "grad_norm": 2.734375, + "learning_rate": 0.00016777123676579823, + "loss": 2.2729, + "step": 223490 + }, + { + "epoch": 0.53, + "grad_norm": 2.53125, + "learning_rate": 0.000167769877655412, + "loss": 2.1015, + "step": 223495 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.0001677685185218744, + "loss": 2.1217, + "step": 223500 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.0001677671593651858, + "loss": 2.2734, + "step": 223505 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.0001677658001853467, + "loss": 2.2404, + "step": 223510 + }, + { + "epoch": 0.53, + "grad_norm": 1.7578125, + "learning_rate": 0.0001677644409823576, + "loss": 2.1713, + "step": 223515 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016776308175621893, + "loss": 2.2013, + "step": 223520 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016776172250693115, + "loss": 1.9616, + "step": 223525 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016776036323449476, + "loss": 2.1091, + "step": 223530 + }, + { + "epoch": 0.53, + "grad_norm": 2.390625, + "learning_rate": 0.0001677590039389102, + "loss": 2.0422, + "step": 223535 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016775764462017794, + "loss": 2.2032, + "step": 223540 + }, + { + "epoch": 0.53, + "grad_norm": 1.984375, + "learning_rate": 0.00016775628527829838, + "loss": 1.9506, + "step": 223545 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.0001677549259132721, + "loss": 2.1739, + "step": 223550 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016775356652509952, + "loss": 2.1874, + "step": 223555 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016775220711378106, + "loss": 2.0794, + "step": 223560 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016775084767931724, + "loss": 2.2806, + "step": 223565 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016774948822170847, + "loss": 2.1333, + "step": 223570 + }, + { + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 0.0001677481287409553, + "loss": 2.1682, + "step": 223575 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.0001677467692370581, + "loss": 2.029, + "step": 223580 + }, + { + "epoch": 0.53, + "grad_norm": 3.5625, + "learning_rate": 0.00016774540971001736, + "loss": 2.0161, + "step": 223585 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001677440501598336, + "loss": 2.0973, + "step": 223590 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.00016774269058650722, + "loss": 2.03, + "step": 223595 + }, + { + "epoch": 0.53, + "grad_norm": 1.8828125, + "learning_rate": 0.00016774133099003871, + "loss": 2.074, + "step": 223600 + }, + { + "epoch": 0.53, + "grad_norm": 2.40625, + "learning_rate": 0.00016773997137042854, + "loss": 2.1499, + "step": 223605 + }, + { + "epoch": 0.53, + "grad_norm": 2.390625, + "learning_rate": 0.00016773861172767718, + "loss": 2.0921, + "step": 223610 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016773725206178504, + "loss": 2.1176, + "step": 223615 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.00016773589237275267, + "loss": 2.0839, + "step": 223620 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016773453266058047, + "loss": 2.106, + "step": 223625 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.0001677331729252689, + "loss": 2.0781, + "step": 223630 + }, + { + "epoch": 0.53, + "grad_norm": 2.390625, + "learning_rate": 0.0001677318131668185, + "loss": 2.1352, + "step": 223635 + }, + { + "epoch": 0.53, + "grad_norm": 1.953125, + "learning_rate": 0.00016773045338522966, + "loss": 1.8356, + "step": 223640 + }, + { + "epoch": 0.53, + "grad_norm": 1.71875, + "learning_rate": 0.0001677290935805029, + "loss": 1.7767, + "step": 223645 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.00016772773375263857, + "loss": 2.1617, + "step": 223650 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.0001677263739016373, + "loss": 2.0495, + "step": 223655 + }, + { + "epoch": 0.53, + "grad_norm": 1.8515625, + "learning_rate": 0.00016772501402749943, + "loss": 2.1114, + "step": 223660 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016772365413022548, + "loss": 2.0496, + "step": 223665 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.0001677222942098159, + "loss": 1.9153, + "step": 223670 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016772093426627117, + "loss": 2.2178, + "step": 223675 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016771957429959173, + "loss": 2.1511, + "step": 223680 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016771821430977806, + "loss": 2.0338, + "step": 223685 + }, + { + "epoch": 0.53, + "grad_norm": 2.625, + "learning_rate": 0.0001677168542968306, + "loss": 2.1014, + "step": 223690 + }, + { + "epoch": 0.53, + "grad_norm": 1.984375, + "learning_rate": 0.00016771549426074987, + "loss": 2.0127, + "step": 223695 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016771413420153626, + "loss": 2.1937, + "step": 223700 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.00016771277411919028, + "loss": 2.1109, + "step": 223705 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.0001677114140137124, + "loss": 2.1753, + "step": 223710 + }, + { + "epoch": 0.53, + "grad_norm": 1.78125, + "learning_rate": 0.0001677100538851031, + "loss": 2.1652, + "step": 223715 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016770869373336278, + "loss": 2.1016, + "step": 223720 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016770733355849196, + "loss": 2.0954, + "step": 223725 + }, + { + "epoch": 0.53, + "grad_norm": 1.84375, + "learning_rate": 0.00016770597336049107, + "loss": 2.2484, + "step": 223730 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.0001677046131393606, + "loss": 2.0109, + "step": 223735 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.000167703252895101, + "loss": 2.0007, + "step": 223740 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016770189262771274, + "loss": 2.0675, + "step": 223745 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001677005323371963, + "loss": 2.3203, + "step": 223750 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.00016769917202355214, + "loss": 2.0814, + "step": 223755 + }, + { + "epoch": 0.53, + "grad_norm": 2.515625, + "learning_rate": 0.00016769781168678069, + "loss": 2.0983, + "step": 223760 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016769645132688244, + "loss": 2.1787, + "step": 223765 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.0001676950909438579, + "loss": 2.1107, + "step": 223770 + }, + { + "epoch": 0.53, + "grad_norm": 1.890625, + "learning_rate": 0.00016769373053770746, + "loss": 2.0695, + "step": 223775 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.0001676923701084316, + "loss": 2.189, + "step": 223780 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016769100965603078, + "loss": 2.1511, + "step": 223785 + }, + { + "epoch": 0.53, + "grad_norm": 1.9765625, + "learning_rate": 0.00016768964918050552, + "loss": 2.0867, + "step": 223790 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.0001676882886818562, + "loss": 2.0117, + "step": 223795 + }, + { + "epoch": 0.53, + "grad_norm": 2.515625, + "learning_rate": 0.0001676869281600834, + "loss": 2.1335, + "step": 223800 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016768556761518748, + "loss": 2.0662, + "step": 223805 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016768420704716896, + "loss": 2.0357, + "step": 223810 + }, + { + "epoch": 0.53, + "grad_norm": 1.828125, + "learning_rate": 0.0001676828464560283, + "loss": 2.0148, + "step": 223815 + }, + { + "epoch": 0.53, + "grad_norm": 3.109375, + "learning_rate": 0.00016768148584176592, + "loss": 2.0808, + "step": 223820 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.0001676801252043823, + "loss": 2.2122, + "step": 223825 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.00016767876454387796, + "loss": 2.006, + "step": 223830 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016767740386025332, + "loss": 2.1231, + "step": 223835 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016767604315350885, + "loss": 2.1647, + "step": 223840 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.000167674682423645, + "loss": 2.0659, + "step": 223845 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016767332167066227, + "loss": 2.2394, + "step": 223850 + }, + { + "epoch": 0.53, + "grad_norm": 1.8671875, + "learning_rate": 0.00016767196089456112, + "loss": 2.1918, + "step": 223855 + }, + { + "epoch": 0.53, + "grad_norm": 2.546875, + "learning_rate": 0.00016767060009534196, + "loss": 2.1973, + "step": 223860 + }, + { + "epoch": 0.53, + "grad_norm": 2.640625, + "learning_rate": 0.0001676692392730053, + "loss": 2.255, + "step": 223865 + }, + { + "epoch": 0.53, + "grad_norm": 1.8046875, + "learning_rate": 0.00016766787842755162, + "loss": 2.1685, + "step": 223870 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.00016766651755898137, + "loss": 2.1433, + "step": 223875 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.000167665156667295, + "loss": 2.1434, + "step": 223880 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.000167663795752493, + "loss": 2.227, + "step": 223885 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016766243481457582, + "loss": 2.0986, + "step": 223890 + }, + { + "epoch": 0.53, + "grad_norm": 2.515625, + "learning_rate": 0.0001676610738535439, + "loss": 2.1875, + "step": 223895 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016765971286939775, + "loss": 2.3529, + "step": 223900 + }, + { + "epoch": 0.53, + "grad_norm": 1.96875, + "learning_rate": 0.00016765835186213781, + "loss": 2.2211, + "step": 223905 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016765699083176453, + "loss": 2.105, + "step": 223910 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.00016765562977827844, + "loss": 1.9976, + "step": 223915 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.0001676542687016799, + "loss": 2.1406, + "step": 223920 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.0001676529076019695, + "loss": 2.0645, + "step": 223925 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001676515464791476, + "loss": 1.8824, + "step": 223930 + }, + { + "epoch": 0.53, + "grad_norm": 1.8046875, + "learning_rate": 0.0001676501853332147, + "loss": 1.7713, + "step": 223935 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001676488241641713, + "loss": 1.9488, + "step": 223940 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.0001676474629720178, + "loss": 2.1877, + "step": 223945 + }, + { + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 0.00016764610175675472, + "loss": 2.1536, + "step": 223950 + }, + { + "epoch": 0.53, + "grad_norm": 2.859375, + "learning_rate": 0.00016764474051838253, + "loss": 2.213, + "step": 223955 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.00016764337925690163, + "loss": 2.0799, + "step": 223960 + }, + { + "epoch": 0.53, + "grad_norm": 1.90625, + "learning_rate": 0.00016764201797231254, + "loss": 1.962, + "step": 223965 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016764065666461574, + "loss": 2.1876, + "step": 223970 + }, + { + "epoch": 0.53, + "grad_norm": 1.7890625, + "learning_rate": 0.0001676392953338116, + "loss": 2.2039, + "step": 223975 + }, + { + "epoch": 0.53, + "grad_norm": 1.875, + "learning_rate": 0.00016763793397990066, + "loss": 2.0506, + "step": 223980 + }, + { + "epoch": 0.53, + "grad_norm": 2.765625, + "learning_rate": 0.0001676365726028834, + "loss": 1.9909, + "step": 223985 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.0001676352112027603, + "loss": 2.0048, + "step": 223990 + }, + { + "epoch": 0.53, + "grad_norm": 2.5, + "learning_rate": 0.00016763384977953172, + "loss": 1.9602, + "step": 223995 + }, + { + "epoch": 0.53, + "grad_norm": 1.8359375, + "learning_rate": 0.0001676324883331982, + "loss": 2.2895, + "step": 224000 + }, + { + "epoch": 0.53, + "grad_norm": 1.9921875, + "learning_rate": 0.0001676311268637602, + "loss": 2.0123, + "step": 224005 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016762976537121823, + "loss": 2.1189, + "step": 224010 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016762840385557267, + "loss": 2.0907, + "step": 224015 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016762704231682398, + "loss": 1.9527, + "step": 224020 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.0001676256807549727, + "loss": 2.0739, + "step": 224025 + }, + { + "epoch": 0.53, + "grad_norm": 1.78125, + "learning_rate": 0.00016762431917001925, + "loss": 2.3182, + "step": 224030 + }, + { + "epoch": 0.53, + "grad_norm": 1.5, + "learning_rate": 0.00016762295756196413, + "loss": 2.1654, + "step": 224035 + }, + { + "epoch": 0.53, + "grad_norm": 1.8359375, + "learning_rate": 0.00016762159593080778, + "loss": 2.0564, + "step": 224040 + }, + { + "epoch": 0.53, + "grad_norm": 1.8671875, + "learning_rate": 0.00016762023427655066, + "loss": 2.0787, + "step": 224045 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016761887259919323, + "loss": 2.0527, + "step": 224050 + }, + { + "epoch": 0.53, + "grad_norm": 1.796875, + "learning_rate": 0.00016761751089873597, + "loss": 2.2896, + "step": 224055 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016761614917517936, + "loss": 2.1998, + "step": 224060 + }, + { + "epoch": 0.53, + "grad_norm": 2.609375, + "learning_rate": 0.00016761478742852385, + "loss": 1.9585, + "step": 224065 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016761342565876986, + "loss": 1.9621, + "step": 224070 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016761206386591793, + "loss": 2.1449, + "step": 224075 + }, + { + "epoch": 0.53, + "grad_norm": 1.953125, + "learning_rate": 0.00016761070204996848, + "loss": 2.1979, + "step": 224080 + }, + { + "epoch": 0.53, + "grad_norm": 1.9609375, + "learning_rate": 0.00016760934021092198, + "loss": 2.0694, + "step": 224085 + }, + { + "epoch": 0.53, + "grad_norm": 2.59375, + "learning_rate": 0.00016760797834877893, + "loss": 2.1454, + "step": 224090 + }, + { + "epoch": 0.53, + "grad_norm": 1.9375, + "learning_rate": 0.00016760661646353974, + "loss": 1.9941, + "step": 224095 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016760525455520494, + "loss": 2.1685, + "step": 224100 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016760389262377495, + "loss": 2.1148, + "step": 224105 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.0001676025306692502, + "loss": 2.0929, + "step": 224110 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016760116869163124, + "loss": 2.0931, + "step": 224115 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016759980669091848, + "loss": 2.0977, + "step": 224120 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016759844466711243, + "loss": 2.0319, + "step": 224125 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.0001675970826202135, + "loss": 2.1289, + "step": 224130 + }, + { + "epoch": 0.53, + "grad_norm": 2.40625, + "learning_rate": 0.00016759572055022217, + "loss": 2.1874, + "step": 224135 + }, + { + "epoch": 0.53, + "grad_norm": 1.8046875, + "learning_rate": 0.00016759435845713894, + "loss": 2.2475, + "step": 224140 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016759299634096423, + "loss": 2.1755, + "step": 224145 + }, + { + "epoch": 0.53, + "grad_norm": 2.65625, + "learning_rate": 0.00016759163420169854, + "loss": 2.0827, + "step": 224150 + }, + { + "epoch": 0.53, + "grad_norm": 2.53125, + "learning_rate": 0.00016759027203934232, + "loss": 1.9803, + "step": 224155 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016758890985389604, + "loss": 2.1666, + "step": 224160 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.00016758754764536015, + "loss": 2.0215, + "step": 224165 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016758618541373515, + "loss": 1.9848, + "step": 224170 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016758482315902147, + "loss": 2.0406, + "step": 224175 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.0001675834608812196, + "loss": 2.2479, + "step": 224180 + }, + { + "epoch": 0.53, + "grad_norm": 1.9765625, + "learning_rate": 0.00016758209858032998, + "loss": 2.0684, + "step": 224185 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016758073625635308, + "loss": 2.281, + "step": 224190 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016757937390928938, + "loss": 2.0221, + "step": 224195 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016757801153913934, + "loss": 2.0275, + "step": 224200 + }, + { + "epoch": 0.53, + "grad_norm": 1.875, + "learning_rate": 0.00016757664914590344, + "loss": 2.143, + "step": 224205 + }, + { + "epoch": 0.53, + "grad_norm": 2.390625, + "learning_rate": 0.0001675752867295821, + "loss": 1.945, + "step": 224210 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016757392429017585, + "loss": 2.0877, + "step": 224215 + }, + { + "epoch": 0.53, + "grad_norm": 1.96875, + "learning_rate": 0.00016757256182768512, + "loss": 1.9527, + "step": 224220 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016757119934211038, + "loss": 1.9235, + "step": 224225 + }, + { + "epoch": 0.53, + "grad_norm": 1.9140625, + "learning_rate": 0.0001675698368334521, + "loss": 2.2098, + "step": 224230 + }, + { + "epoch": 0.53, + "grad_norm": 1.6328125, + "learning_rate": 0.0001675684743017107, + "loss": 1.9457, + "step": 224235 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.0001675671117468867, + "loss": 2.0998, + "step": 224240 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016756574916898057, + "loss": 2.1693, + "step": 224245 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.00016756438656799274, + "loss": 2.0632, + "step": 224250 + }, + { + "epoch": 0.53, + "grad_norm": 2.5, + "learning_rate": 0.0001675630239439237, + "loss": 2.2877, + "step": 224255 + }, + { + "epoch": 0.53, + "grad_norm": 1.9765625, + "learning_rate": 0.00016756166129677387, + "loss": 2.2667, + "step": 224260 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.0001675602986265438, + "loss": 2.2282, + "step": 224265 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.00016755893593323385, + "loss": 2.189, + "step": 224270 + }, + { + "epoch": 0.53, + "grad_norm": 1.984375, + "learning_rate": 0.00016755757321684458, + "loss": 1.7126, + "step": 224275 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.0001675562104773764, + "loss": 2.1515, + "step": 224280 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016755484771482983, + "loss": 2.3555, + "step": 224285 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.00016755348492920527, + "loss": 2.0678, + "step": 224290 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.0001675521221205032, + "loss": 2.1572, + "step": 224295 + }, + { + "epoch": 0.53, + "grad_norm": 2.515625, + "learning_rate": 0.00016755075928872413, + "loss": 2.1364, + "step": 224300 + }, + { + "epoch": 0.53, + "grad_norm": 1.7578125, + "learning_rate": 0.0001675493964338685, + "loss": 2.0422, + "step": 224305 + }, + { + "epoch": 0.53, + "grad_norm": 1.8359375, + "learning_rate": 0.00016754803355593673, + "loss": 2.1243, + "step": 224310 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016754667065492935, + "loss": 2.1393, + "step": 224315 + }, + { + "epoch": 0.53, + "grad_norm": 2.609375, + "learning_rate": 0.00016754530773084684, + "loss": 2.1499, + "step": 224320 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016754394478368955, + "loss": 1.9112, + "step": 224325 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016754258181345808, + "loss": 1.8714, + "step": 224330 + }, + { + "epoch": 0.53, + "grad_norm": 1.9765625, + "learning_rate": 0.00016754121882015283, + "loss": 2.0473, + "step": 224335 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016753985580377428, + "loss": 2.0317, + "step": 224340 + }, + { + "epoch": 0.53, + "grad_norm": 1.8828125, + "learning_rate": 0.00016753849276432287, + "loss": 2.0135, + "step": 224345 + }, + { + "epoch": 0.53, + "grad_norm": 2.453125, + "learning_rate": 0.0001675371297017991, + "loss": 2.2548, + "step": 224350 + }, + { + "epoch": 0.53, + "grad_norm": 2.453125, + "learning_rate": 0.0001675357666162034, + "loss": 2.0043, + "step": 224355 + }, + { + "epoch": 0.53, + "grad_norm": 2.40625, + "learning_rate": 0.00016753440350753628, + "loss": 2.1243, + "step": 224360 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.0001675330403757982, + "loss": 1.865, + "step": 224365 + }, + { + "epoch": 0.53, + "grad_norm": 1.796875, + "learning_rate": 0.00016753167722098958, + "loss": 2.1193, + "step": 224370 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001675303140431109, + "loss": 2.0943, + "step": 224375 + }, + { + "epoch": 0.53, + "grad_norm": 2.5, + "learning_rate": 0.00016752895084216267, + "loss": 2.1673, + "step": 224380 + }, + { + "epoch": 0.53, + "grad_norm": 1.921875, + "learning_rate": 0.00016752758761814533, + "loss": 2.1234, + "step": 224385 + }, + { + "epoch": 0.53, + "grad_norm": 3.09375, + "learning_rate": 0.0001675262243710593, + "loss": 2.345, + "step": 224390 + }, + { + "epoch": 0.53, + "grad_norm": 1.859375, + "learning_rate": 0.00016752486110090513, + "loss": 2.0154, + "step": 224395 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016752349780768325, + "loss": 2.0537, + "step": 224400 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016752213449139407, + "loss": 1.9641, + "step": 224405 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016752077115203814, + "loss": 2.0838, + "step": 224410 + }, + { + "epoch": 0.53, + "grad_norm": 1.90625, + "learning_rate": 0.00016751940778961588, + "loss": 2.1554, + "step": 224415 + }, + { + "epoch": 0.53, + "grad_norm": 1.7265625, + "learning_rate": 0.00016751804440412777, + "loss": 2.2202, + "step": 224420 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016751668099557426, + "loss": 2.0321, + "step": 224425 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016751531756395584, + "loss": 2.1781, + "step": 224430 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.00016751395410927295, + "loss": 2.047, + "step": 224435 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016751259063152606, + "loss": 2.1339, + "step": 224440 + }, + { + "epoch": 0.53, + "grad_norm": 1.8984375, + "learning_rate": 0.00016751122713071567, + "loss": 1.9913, + "step": 224445 + }, + { + "epoch": 0.53, + "grad_norm": 1.90625, + "learning_rate": 0.00016750986360684222, + "loss": 1.8366, + "step": 224450 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016750850005990616, + "loss": 2.1112, + "step": 224455 + }, + { + "epoch": 0.53, + "grad_norm": 3.421875, + "learning_rate": 0.00016750713648990798, + "loss": 2.1041, + "step": 224460 + }, + { + "epoch": 0.53, + "grad_norm": 1.9296875, + "learning_rate": 0.00016750577289684814, + "loss": 1.8296, + "step": 224465 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016750440928072714, + "loss": 2.1356, + "step": 224470 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016750304564154536, + "loss": 2.0426, + "step": 224475 + }, + { + "epoch": 0.53, + "grad_norm": 1.9140625, + "learning_rate": 0.00016750168197930335, + "loss": 2.1694, + "step": 224480 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.0001675003182940015, + "loss": 1.8403, + "step": 224485 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016749895458564033, + "loss": 2.0597, + "step": 224490 + }, + { + "epoch": 0.53, + "grad_norm": 1.8984375, + "learning_rate": 0.00016749759085422027, + "loss": 2.0299, + "step": 224495 + }, + { + "epoch": 0.53, + "grad_norm": 1.9453125, + "learning_rate": 0.00016749622709974187, + "loss": 2.1757, + "step": 224500 + }, + { + "epoch": 0.53, + "grad_norm": 2.578125, + "learning_rate": 0.0001674948633222055, + "loss": 2.3122, + "step": 224505 + }, + { + "epoch": 0.53, + "grad_norm": 1.890625, + "learning_rate": 0.00016749349952161166, + "loss": 2.1141, + "step": 224510 + }, + { + "epoch": 0.53, + "grad_norm": 1.9609375, + "learning_rate": 0.0001674921356979608, + "loss": 2.157, + "step": 224515 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016749077185125341, + "loss": 2.2004, + "step": 224520 + }, + { + "epoch": 0.53, + "grad_norm": 1.859375, + "learning_rate": 0.00016748940798148998, + "loss": 2.1459, + "step": 224525 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016748804408867093, + "loss": 2.0933, + "step": 224530 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016748668017279675, + "loss": 2.0403, + "step": 224535 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.00016748531623386785, + "loss": 2.2275, + "step": 224540 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.00016748395227188478, + "loss": 2.0041, + "step": 224545 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016748258828684794, + "loss": 2.1924, + "step": 224550 + }, + { + "epoch": 0.53, + "grad_norm": 2.640625, + "learning_rate": 0.00016748122427875787, + "loss": 2.1074, + "step": 224555 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016747986024761498, + "loss": 2.1561, + "step": 224560 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.0001674784961934197, + "loss": 2.13, + "step": 224565 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.0001674771321161726, + "loss": 2.0236, + "step": 224570 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016747576801587403, + "loss": 2.0618, + "step": 224575 + }, + { + "epoch": 0.53, + "grad_norm": 2.640625, + "learning_rate": 0.00016747440389252457, + "loss": 2.0475, + "step": 224580 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.0001674730397461246, + "loss": 2.236, + "step": 224585 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.0001674716755766746, + "loss": 1.9974, + "step": 224590 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016747031138417508, + "loss": 2.1044, + "step": 224595 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016746894716862648, + "loss": 2.1788, + "step": 224600 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016746758293002925, + "loss": 2.2005, + "step": 224605 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016746621866838387, + "loss": 1.8215, + "step": 224610 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016746485438369078, + "loss": 2.0811, + "step": 224615 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016746349007595052, + "loss": 2.0062, + "step": 224620 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.0001674621257451635, + "loss": 2.0652, + "step": 224625 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.0001674607613913302, + "loss": 2.1361, + "step": 224630 + }, + { + "epoch": 0.53, + "grad_norm": 1.8671875, + "learning_rate": 0.00016745939701445106, + "loss": 2.1326, + "step": 224635 + }, + { + "epoch": 0.53, + "grad_norm": 1.828125, + "learning_rate": 0.00016745803261452658, + "loss": 2.1093, + "step": 224640 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.0001674566681915572, + "loss": 2.1228, + "step": 224645 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001674553037455434, + "loss": 2.1815, + "step": 224650 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016745393927648566, + "loss": 1.9667, + "step": 224655 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.00016745257478438438, + "loss": 2.2277, + "step": 224660 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.00016745121026924014, + "loss": 2.239, + "step": 224665 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.00016744984573105332, + "loss": 1.9545, + "step": 224670 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.0001674484811698244, + "loss": 2.1444, + "step": 224675 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.00016744711658555389, + "loss": 2.204, + "step": 224680 + }, + { + "epoch": 0.53, + "grad_norm": 2.65625, + "learning_rate": 0.00016744575197824216, + "loss": 1.8763, + "step": 224685 + }, + { + "epoch": 0.53, + "grad_norm": 1.8828125, + "learning_rate": 0.00016744438734788978, + "loss": 2.0609, + "step": 224690 + }, + { + "epoch": 0.53, + "grad_norm": 1.8515625, + "learning_rate": 0.00016744302269449716, + "loss": 1.9844, + "step": 224695 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016744165801806479, + "loss": 1.9922, + "step": 224700 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016744029331859314, + "loss": 2.0278, + "step": 224705 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016743892859608263, + "loss": 2.2616, + "step": 224710 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.0001674375638505338, + "loss": 1.9632, + "step": 224715 + }, + { + "epoch": 0.53, + "grad_norm": 1.9375, + "learning_rate": 0.00016743619908194705, + "loss": 2.0238, + "step": 224720 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016743483429032289, + "loss": 2.0376, + "step": 224725 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016743346947566172, + "loss": 1.9867, + "step": 224730 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.0001674321046379641, + "loss": 2.1762, + "step": 224735 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016743073977723043, + "loss": 1.9296, + "step": 224740 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.0001674293748934612, + "loss": 1.8124, + "step": 224745 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016742800998665686, + "loss": 1.9338, + "step": 224750 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016742664505681792, + "loss": 1.9998, + "step": 224755 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016742528010394478, + "loss": 2.1223, + "step": 224760 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016742391512803798, + "loss": 2.1353, + "step": 224765 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.00016742255012909793, + "loss": 1.9697, + "step": 224770 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.0001674211851071251, + "loss": 1.9201, + "step": 224775 + }, + { + "epoch": 0.53, + "grad_norm": 1.9140625, + "learning_rate": 0.00016741982006212, + "loss": 2.1079, + "step": 224780 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.000167418454994083, + "loss": 2.0327, + "step": 224785 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.0001674170899030147, + "loss": 2.0631, + "step": 224790 + }, + { + "epoch": 0.53, + "grad_norm": 1.8203125, + "learning_rate": 0.00016741572478891549, + "loss": 2.0843, + "step": 224795 + }, + { + "epoch": 0.53, + "grad_norm": 2.40625, + "learning_rate": 0.00016741435965178584, + "loss": 1.9199, + "step": 224800 + }, + { + "epoch": 0.53, + "grad_norm": 1.625, + "learning_rate": 0.00016741299449162623, + "loss": 2.0417, + "step": 224805 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016741162930843708, + "loss": 2.1065, + "step": 224810 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016741026410221893, + "loss": 1.9607, + "step": 224815 + }, + { + "epoch": 0.53, + "grad_norm": 2.46875, + "learning_rate": 0.00016740889887297222, + "loss": 2.0227, + "step": 224820 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016740753362069738, + "loss": 2.061, + "step": 224825 + }, + { + "epoch": 0.53, + "grad_norm": 2.796875, + "learning_rate": 0.0001674061683453949, + "loss": 2.2245, + "step": 224830 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016740480304706527, + "loss": 2.0349, + "step": 224835 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016740343772570895, + "loss": 2.096, + "step": 224840 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.00016740207238132636, + "loss": 1.968, + "step": 224845 + }, + { + "epoch": 0.53, + "grad_norm": 2.609375, + "learning_rate": 0.00016740070701391804, + "loss": 1.8633, + "step": 224850 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016739934162348437, + "loss": 2.1696, + "step": 224855 + }, + { + "epoch": 0.53, + "grad_norm": 1.96875, + "learning_rate": 0.00016739797621002588, + "loss": 2.1427, + "step": 224860 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016739661077354304, + "loss": 2.023, + "step": 224865 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.0001673952453140363, + "loss": 2.1294, + "step": 224870 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001673938798315061, + "loss": 2.1735, + "step": 224875 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.0001673925143259529, + "loss": 1.8106, + "step": 224880 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.0001673911487973772, + "loss": 2.0765, + "step": 224885 + }, + { + "epoch": 0.53, + "grad_norm": 1.8125, + "learning_rate": 0.0001673897832457795, + "loss": 2.1711, + "step": 224890 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016738841767116022, + "loss": 2.1361, + "step": 224895 + }, + { + "epoch": 0.53, + "grad_norm": 1.8046875, + "learning_rate": 0.00016738705207351983, + "loss": 2.0692, + "step": 224900 + }, + { + "epoch": 0.53, + "grad_norm": 2.71875, + "learning_rate": 0.00016738568645285881, + "loss": 2.2706, + "step": 224905 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016738432080917759, + "loss": 2.2092, + "step": 224910 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016738295514247668, + "loss": 2.1034, + "step": 224915 + }, + { + "epoch": 0.53, + "grad_norm": 2.609375, + "learning_rate": 0.00016738158945275652, + "loss": 2.0061, + "step": 224920 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.0001673802237400176, + "loss": 2.1954, + "step": 224925 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.0001673788580042604, + "loss": 2.1443, + "step": 224930 + }, + { + "epoch": 0.53, + "grad_norm": 1.953125, + "learning_rate": 0.0001673774922454853, + "loss": 2.163, + "step": 224935 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.00016737612646369288, + "loss": 2.1422, + "step": 224940 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.0001673747606588835, + "loss": 2.2119, + "step": 224945 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016737339483105773, + "loss": 2.153, + "step": 224950 + }, + { + "epoch": 0.53, + "grad_norm": 2.5, + "learning_rate": 0.00016737202898021596, + "loss": 2.0396, + "step": 224955 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.0001673706631063587, + "loss": 2.0548, + "step": 224960 + }, + { + "epoch": 0.53, + "grad_norm": 1.9609375, + "learning_rate": 0.00016736929720948638, + "loss": 2.2874, + "step": 224965 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.0001673679312895995, + "loss": 1.9928, + "step": 224970 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016736656534669853, + "loss": 2.0494, + "step": 224975 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016736519938078388, + "loss": 2.0744, + "step": 224980 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016736383339185608, + "loss": 2.2727, + "step": 224985 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016736246737991553, + "loss": 1.9735, + "step": 224990 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.0001673611013449628, + "loss": 2.1968, + "step": 224995 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.00016735973528699826, + "loss": 2.1038, + "step": 225000 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016735836920602246, + "loss": 2.2032, + "step": 225005 + }, + { + "epoch": 0.53, + "grad_norm": 1.8125, + "learning_rate": 0.00016735700310203578, + "loss": 2.0357, + "step": 225010 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016735563697503874, + "loss": 2.0758, + "step": 225015 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016735427082503174, + "loss": 2.1736, + "step": 225020 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016735290465201536, + "loss": 2.1155, + "step": 225025 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016735153845599002, + "loss": 2.278, + "step": 225030 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016735017223695612, + "loss": 1.95, + "step": 225035 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.0001673488059949142, + "loss": 2.0959, + "step": 225040 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.0001673474397298647, + "loss": 2.1132, + "step": 225045 + }, + { + "epoch": 0.53, + "grad_norm": 2.796875, + "learning_rate": 0.0001673460734418081, + "loss": 2.0646, + "step": 225050 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016734470713074487, + "loss": 2.1058, + "step": 225055 + }, + { + "epoch": 0.53, + "grad_norm": 2.390625, + "learning_rate": 0.00016734334079667547, + "loss": 2.1618, + "step": 225060 + }, + { + "epoch": 0.53, + "grad_norm": 1.921875, + "learning_rate": 0.00016734197443960035, + "loss": 1.9385, + "step": 225065 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016734060805951998, + "loss": 2.0609, + "step": 225070 + }, + { + "epoch": 0.53, + "grad_norm": 1.953125, + "learning_rate": 0.00016733924165643487, + "loss": 2.1957, + "step": 225075 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016733787523034543, + "loss": 2.0521, + "step": 225080 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016733650878125213, + "loss": 2.0092, + "step": 225085 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.0001673351423091555, + "loss": 2.2573, + "step": 225090 + }, + { + "epoch": 0.53, + "grad_norm": 1.8046875, + "learning_rate": 0.00016733377581405593, + "loss": 2.1364, + "step": 225095 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016733240929595392, + "loss": 2.1083, + "step": 225100 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016733104275484995, + "loss": 1.735, + "step": 225105 + }, + { + "epoch": 0.53, + "grad_norm": 1.8359375, + "learning_rate": 0.00016732967619074448, + "loss": 2.0144, + "step": 225110 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016732830960363798, + "loss": 1.9336, + "step": 225115 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016732694299353092, + "loss": 2.1483, + "step": 225120 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016732557636042373, + "loss": 2.1075, + "step": 225125 + }, + { + "epoch": 0.53, + "grad_norm": 2.75, + "learning_rate": 0.0001673242097043169, + "loss": 2.0138, + "step": 225130 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.0001673228430252109, + "loss": 1.9901, + "step": 225135 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.0001673214763231062, + "loss": 2.1665, + "step": 225140 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016732010959800327, + "loss": 2.155, + "step": 225145 + }, + { + "epoch": 0.53, + "grad_norm": 1.8359375, + "learning_rate": 0.00016731874284990256, + "loss": 2.1294, + "step": 225150 + }, + { + "epoch": 0.53, + "grad_norm": 1.90625, + "learning_rate": 0.00016731737607880454, + "loss": 2.2497, + "step": 225155 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.0001673160092847097, + "loss": 2.1812, + "step": 225160 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001673146424676185, + "loss": 2.03, + "step": 225165 + }, + { + "epoch": 0.53, + "grad_norm": 1.90625, + "learning_rate": 0.0001673132756275314, + "loss": 2.0007, + "step": 225170 + }, + { + "epoch": 0.53, + "grad_norm": 1.5546875, + "learning_rate": 0.00016731190876444885, + "loss": 2.0307, + "step": 225175 + }, + { + "epoch": 0.53, + "grad_norm": 1.859375, + "learning_rate": 0.0001673105418783713, + "loss": 2.0284, + "step": 225180 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.00016730917496929928, + "loss": 2.1311, + "step": 225185 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016730780803723324, + "loss": 2.1737, + "step": 225190 + }, + { + "epoch": 0.53, + "grad_norm": 2.453125, + "learning_rate": 0.0001673064410821736, + "loss": 2.1671, + "step": 225195 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016730507410412087, + "loss": 2.2306, + "step": 225200 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.0001673037071030755, + "loss": 2.1227, + "step": 225205 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016730234007903796, + "loss": 2.1294, + "step": 225210 + }, + { + "epoch": 0.53, + "grad_norm": 2.71875, + "learning_rate": 0.00016730097303200873, + "loss": 2.1021, + "step": 225215 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016729960596198828, + "loss": 2.0048, + "step": 225220 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016729823886897703, + "loss": 1.9097, + "step": 225225 + }, + { + "epoch": 0.53, + "grad_norm": 2.71875, + "learning_rate": 0.00016729687175297552, + "loss": 1.8844, + "step": 225230 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016729550461398415, + "loss": 2.1507, + "step": 225235 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016729413745200343, + "loss": 2.2852, + "step": 225240 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016729277026703381, + "loss": 2.0234, + "step": 225245 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.00016729140305907575, + "loss": 2.1127, + "step": 225250 + }, + { + "epoch": 0.53, + "grad_norm": 1.7109375, + "learning_rate": 0.00016729003582812975, + "loss": 2.0935, + "step": 225255 + }, + { + "epoch": 0.53, + "grad_norm": 2.46875, + "learning_rate": 0.0001672886685741962, + "loss": 2.1234, + "step": 225260 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.0001672873012972757, + "loss": 2.2147, + "step": 225265 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016728593399736858, + "loss": 2.1102, + "step": 225270 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016728456667447538, + "loss": 2.1515, + "step": 225275 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016728319932859655, + "loss": 2.0761, + "step": 225280 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.00016728183195973255, + "loss": 1.9824, + "step": 225285 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016728046456788387, + "loss": 1.993, + "step": 225290 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016727909715305095, + "loss": 2.0976, + "step": 225295 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016727772971523427, + "loss": 2.2588, + "step": 225300 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016727636225443435, + "loss": 2.1587, + "step": 225305 + }, + { + "epoch": 0.53, + "grad_norm": 2.484375, + "learning_rate": 0.00016727499477065157, + "loss": 2.0479, + "step": 225310 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.0001672736272638864, + "loss": 2.136, + "step": 225315 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016727225973413935, + "loss": 2.0887, + "step": 225320 + }, + { + "epoch": 0.53, + "grad_norm": 2.859375, + "learning_rate": 0.00016727089218141088, + "loss": 2.241, + "step": 225325 + }, + { + "epoch": 0.53, + "grad_norm": 1.9140625, + "learning_rate": 0.0001672695246057015, + "loss": 2.3281, + "step": 225330 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016726815700701154, + "loss": 2.1933, + "step": 225335 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016726678938534163, + "loss": 2.1865, + "step": 225340 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.00016726542174069214, + "loss": 2.0344, + "step": 225345 + }, + { + "epoch": 0.53, + "grad_norm": 1.9453125, + "learning_rate": 0.00016726405407306358, + "loss": 2.2205, + "step": 225350 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.00016726268638245638, + "loss": 2.063, + "step": 225355 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016726131866887103, + "loss": 2.2039, + "step": 225360 + }, + { + "epoch": 0.53, + "grad_norm": 1.859375, + "learning_rate": 0.00016725995093230797, + "loss": 2.1378, + "step": 225365 + }, + { + "epoch": 0.53, + "grad_norm": 1.8203125, + "learning_rate": 0.00016725858317276772, + "loss": 1.9045, + "step": 225370 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016725721539025075, + "loss": 1.8199, + "step": 225375 + }, + { + "epoch": 0.53, + "grad_norm": 1.984375, + "learning_rate": 0.00016725584758475742, + "loss": 2.1212, + "step": 225380 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016725447975628833, + "loss": 2.2127, + "step": 225385 + }, + { + "epoch": 0.53, + "grad_norm": 1.9609375, + "learning_rate": 0.0001672531119048439, + "loss": 2.258, + "step": 225390 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.00016725174403042455, + "loss": 2.1973, + "step": 225395 + }, + { + "epoch": 0.53, + "grad_norm": 2.8125, + "learning_rate": 0.00016725037613303075, + "loss": 2.1435, + "step": 225400 + }, + { + "epoch": 0.53, + "grad_norm": 1.9296875, + "learning_rate": 0.00016724900821266306, + "loss": 2.1792, + "step": 225405 + }, + { + "epoch": 0.53, + "grad_norm": 1.7890625, + "learning_rate": 0.0001672476402693219, + "loss": 2.0665, + "step": 225410 + }, + { + "epoch": 0.53, + "grad_norm": 2.484375, + "learning_rate": 0.00016724627230300772, + "loss": 2.2792, + "step": 225415 + }, + { + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 0.00016724490431372093, + "loss": 2.218, + "step": 225420 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.0001672435363014621, + "loss": 2.0782, + "step": 225425 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.0001672421682662317, + "loss": 2.1866, + "step": 225430 + }, + { + "epoch": 0.53, + "grad_norm": 1.7890625, + "learning_rate": 0.00016724080020803013, + "loss": 1.9457, + "step": 225435 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016723943212685786, + "loss": 1.8663, + "step": 225440 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.00016723806402271538, + "loss": 1.9637, + "step": 225445 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016723669589560318, + "loss": 2.0389, + "step": 225450 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.00016723532774552167, + "loss": 2.1586, + "step": 225455 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.0001672339595724714, + "loss": 2.0467, + "step": 225460 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.00016723259137645279, + "loss": 2.1627, + "step": 225465 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.0001672312231574663, + "loss": 2.1164, + "step": 225470 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016722985491551237, + "loss": 2.206, + "step": 225475 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016722848665059152, + "loss": 2.0797, + "step": 225480 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.0001672271183627042, + "loss": 2.1059, + "step": 225485 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.0001672257500518509, + "loss": 2.1175, + "step": 225490 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016722438171803202, + "loss": 2.0158, + "step": 225495 + }, + { + "epoch": 0.53, + "grad_norm": 1.875, + "learning_rate": 0.0001672230133612481, + "loss": 2.0372, + "step": 225500 + }, + { + "epoch": 0.53, + "grad_norm": 2.40625, + "learning_rate": 0.00016722164498149958, + "loss": 1.8849, + "step": 225505 + }, + { + "epoch": 0.53, + "grad_norm": 2.6875, + "learning_rate": 0.00016722027657878696, + "loss": 2.1389, + "step": 225510 + }, + { + "epoch": 0.53, + "grad_norm": 2.609375, + "learning_rate": 0.00016721890815311064, + "loss": 2.2261, + "step": 225515 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.0001672175397044711, + "loss": 1.9868, + "step": 225520 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016721617123286885, + "loss": 2.2461, + "step": 225525 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016721480273830435, + "loss": 1.889, + "step": 225530 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016721343422077803, + "loss": 2.1215, + "step": 225535 + }, + { + "epoch": 0.53, + "grad_norm": 1.9921875, + "learning_rate": 0.0001672120656802904, + "loss": 2.1114, + "step": 225540 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.0001672106971168419, + "loss": 2.2292, + "step": 225545 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016720932853043303, + "loss": 2.2395, + "step": 225550 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016720795992106419, + "loss": 1.8407, + "step": 225555 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016720659128873592, + "loss": 2.0796, + "step": 225560 + }, + { + "epoch": 0.53, + "grad_norm": 2.390625, + "learning_rate": 0.00016720522263344867, + "loss": 2.0821, + "step": 225565 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.00016720385395520285, + "loss": 2.0669, + "step": 225570 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016720248525399901, + "loss": 2.1795, + "step": 225575 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016720111652983758, + "loss": 2.1113, + "step": 225580 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016719974778271906, + "loss": 2.1214, + "step": 225585 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.00016719837901264385, + "loss": 2.1333, + "step": 225590 + }, + { + "epoch": 0.53, + "grad_norm": 1.8359375, + "learning_rate": 0.00016719701021961246, + "loss": 2.0297, + "step": 225595 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016719564140362536, + "loss": 2.0344, + "step": 225600 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.000167194272564683, + "loss": 1.9645, + "step": 225605 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016719290370278587, + "loss": 2.1981, + "step": 225610 + }, + { + "epoch": 0.53, + "grad_norm": 1.8046875, + "learning_rate": 0.00016719153481793442, + "loss": 2.1313, + "step": 225615 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016719016591012912, + "loss": 1.9975, + "step": 225620 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016718879697937044, + "loss": 2.1146, + "step": 225625 + }, + { + "epoch": 0.53, + "grad_norm": 2.40625, + "learning_rate": 0.00016718742802565884, + "loss": 2.2846, + "step": 225630 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016718605904899483, + "loss": 2.0413, + "step": 225635 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.0001671846900493788, + "loss": 1.9648, + "step": 225640 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016718332102681128, + "loss": 2.0874, + "step": 225645 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016718195198129273, + "loss": 1.9966, + "step": 225650 + }, + { + "epoch": 0.53, + "grad_norm": 2.53125, + "learning_rate": 0.0001671805829128236, + "loss": 2.0166, + "step": 225655 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016717921382140437, + "loss": 2.1337, + "step": 225660 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016717784470703548, + "loss": 2.1524, + "step": 225665 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016717647556971745, + "loss": 2.1875, + "step": 225670 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.0001671751064094507, + "loss": 2.0088, + "step": 225675 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.00016717373722623574, + "loss": 2.0043, + "step": 225680 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016717236802007296, + "loss": 2.0339, + "step": 225685 + }, + { + "epoch": 0.53, + "grad_norm": 1.890625, + "learning_rate": 0.00016717099879096293, + "loss": 2.2919, + "step": 225690 + }, + { + "epoch": 0.53, + "grad_norm": 1.9765625, + "learning_rate": 0.00016716962953890603, + "loss": 2.1854, + "step": 225695 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016716826026390278, + "loss": 1.9591, + "step": 225700 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016716689096595366, + "loss": 2.0476, + "step": 225705 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.0001671655216450591, + "loss": 2.2371, + "step": 225710 + }, + { + "epoch": 0.53, + "grad_norm": 1.7109375, + "learning_rate": 0.00016716415230121957, + "loss": 2.1439, + "step": 225715 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016716278293443554, + "loss": 2.1391, + "step": 225720 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.0001671614135447075, + "loss": 2.0213, + "step": 225725 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.0001671600441320359, + "loss": 1.9183, + "step": 225730 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001671586746964212, + "loss": 2.2286, + "step": 225735 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016715730523786387, + "loss": 2.0003, + "step": 225740 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016715593575636443, + "loss": 1.9477, + "step": 225745 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.00016715456625192326, + "loss": 2.0199, + "step": 225750 + }, + { + "epoch": 0.53, + "grad_norm": 2.5, + "learning_rate": 0.00016715319672454092, + "loss": 2.0403, + "step": 225755 + }, + { + "epoch": 0.53, + "grad_norm": 1.921875, + "learning_rate": 0.0001671518271742178, + "loss": 1.9793, + "step": 225760 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.00016715045760095439, + "loss": 2.0004, + "step": 225765 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016714908800475117, + "loss": 2.0948, + "step": 225770 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.0001671477183856086, + "loss": 2.224, + "step": 225775 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016714634874352718, + "loss": 1.9854, + "step": 225780 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.00016714497907850733, + "loss": 1.993, + "step": 225785 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.00016714360939054955, + "loss": 1.9187, + "step": 225790 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.00016714223967965425, + "loss": 2.224, + "step": 225795 + }, + { + "epoch": 0.53, + "grad_norm": 2.390625, + "learning_rate": 0.00016714086994582198, + "loss": 2.1901, + "step": 225800 + }, + { + "epoch": 0.53, + "grad_norm": 2.5625, + "learning_rate": 0.00016713950018905317, + "loss": 2.0844, + "step": 225805 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.0001671381304093483, + "loss": 2.1445, + "step": 225810 + }, + { + "epoch": 0.53, + "grad_norm": 1.8359375, + "learning_rate": 0.0001671367606067078, + "loss": 1.863, + "step": 225815 + }, + { + "epoch": 0.53, + "grad_norm": 1.8671875, + "learning_rate": 0.00016713539078113215, + "loss": 1.9108, + "step": 225820 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.0001671340209326219, + "loss": 2.1791, + "step": 225825 + }, + { + "epoch": 0.53, + "grad_norm": 2.65625, + "learning_rate": 0.0001671326510611774, + "loss": 2.0383, + "step": 225830 + }, + { + "epoch": 0.53, + "grad_norm": 1.90625, + "learning_rate": 0.0001671312811667992, + "loss": 2.0632, + "step": 225835 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.00016712991124948772, + "loss": 2.1125, + "step": 225840 + }, + { + "epoch": 0.53, + "grad_norm": 1.9921875, + "learning_rate": 0.00016712854130924342, + "loss": 2.1094, + "step": 225845 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016712717134606683, + "loss": 2.0276, + "step": 225850 + }, + { + "epoch": 0.53, + "grad_norm": 1.9453125, + "learning_rate": 0.00016712580135995835, + "loss": 2.2109, + "step": 225855 + }, + { + "epoch": 0.53, + "grad_norm": 1.921875, + "learning_rate": 0.00016712443135091852, + "loss": 1.9244, + "step": 225860 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016712306131894775, + "loss": 2.1575, + "step": 225865 + }, + { + "epoch": 0.53, + "grad_norm": 1.8359375, + "learning_rate": 0.0001671216912640465, + "loss": 1.9265, + "step": 225870 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016712032118621525, + "loss": 1.9335, + "step": 225875 + }, + { + "epoch": 0.53, + "grad_norm": 1.984375, + "learning_rate": 0.00016711895108545454, + "loss": 2.0357, + "step": 225880 + }, + { + "epoch": 0.53, + "grad_norm": 1.875, + "learning_rate": 0.00016711758096176475, + "loss": 2.1281, + "step": 225885 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016711621081514638, + "loss": 2.2174, + "step": 225890 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016711484064559987, + "loss": 2.126, + "step": 225895 + }, + { + "epoch": 0.53, + "grad_norm": 1.828125, + "learning_rate": 0.00016711347045312577, + "loss": 1.9401, + "step": 225900 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016711210023772445, + "loss": 2.3341, + "step": 225905 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.0001671107299993964, + "loss": 2.1614, + "step": 225910 + }, + { + "epoch": 0.53, + "grad_norm": 2.59375, + "learning_rate": 0.00016710935973814214, + "loss": 2.2193, + "step": 225915 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016710798945396212, + "loss": 2.0211, + "step": 225920 + }, + { + "epoch": 0.53, + "grad_norm": 2.40625, + "learning_rate": 0.00016710661914685676, + "loss": 2.2786, + "step": 225925 + }, + { + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 0.0001671052488168266, + "loss": 2.005, + "step": 225930 + }, + { + "epoch": 0.53, + "grad_norm": 2.78125, + "learning_rate": 0.000167103878463872, + "loss": 2.0759, + "step": 225935 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016710250808799358, + "loss": 2.0978, + "step": 225940 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016710113768919168, + "loss": 2.1152, + "step": 225945 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016709976726746683, + "loss": 2.0392, + "step": 225950 + }, + { + "epoch": 0.53, + "grad_norm": 1.796875, + "learning_rate": 0.00016709839682281947, + "loss": 2.1068, + "step": 225955 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.0001670970263552501, + "loss": 1.9014, + "step": 225960 + }, + { + "epoch": 0.53, + "grad_norm": 1.8671875, + "learning_rate": 0.00016709565586475912, + "loss": 1.9234, + "step": 225965 + }, + { + "epoch": 0.53, + "grad_norm": 1.96875, + "learning_rate": 0.0001670942853513471, + "loss": 1.982, + "step": 225970 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016709291481501444, + "loss": 2.1244, + "step": 225975 + }, + { + "epoch": 0.53, + "grad_norm": 2.65625, + "learning_rate": 0.00016709154425576162, + "loss": 2.1797, + "step": 225980 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016709017367358913, + "loss": 1.85, + "step": 225985 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016708880306849739, + "loss": 2.1188, + "step": 225990 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.0001670874324404869, + "loss": 2.218, + "step": 225995 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.00016708606178955816, + "loss": 2.1935, + "step": 226000 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016708469111571158, + "loss": 2.2047, + "step": 226005 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.00016708332041894764, + "loss": 1.995, + "step": 226010 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016708194969926681, + "loss": 2.1052, + "step": 226015 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.0001670805789566696, + "loss": 1.9338, + "step": 226020 + }, + { + "epoch": 0.53, + "grad_norm": 2.546875, + "learning_rate": 0.00016707920819115647, + "loss": 2.2151, + "step": 226025 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016707783740272785, + "loss": 2.1414, + "step": 226030 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.0001670764665913842, + "loss": 2.2273, + "step": 226035 + }, + { + "epoch": 0.53, + "grad_norm": 1.9296875, + "learning_rate": 0.00016707509575712603, + "loss": 2.156, + "step": 226040 + }, + { + "epoch": 0.53, + "grad_norm": 1.84375, + "learning_rate": 0.0001670737248999538, + "loss": 2.0739, + "step": 226045 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016707235401986793, + "loss": 2.1358, + "step": 226050 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016707098311686898, + "loss": 2.2234, + "step": 226055 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016706961219095735, + "loss": 1.9028, + "step": 226060 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016706824124213348, + "loss": 2.2123, + "step": 226065 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.00016706687027039793, + "loss": 1.9919, + "step": 226070 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016706549927575113, + "loss": 2.088, + "step": 226075 + }, + { + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 0.0001670641282581935, + "loss": 2.1412, + "step": 226080 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016706275721772555, + "loss": 1.9737, + "step": 226085 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.00016706138615434776, + "loss": 2.1445, + "step": 226090 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.00016706001506806059, + "loss": 1.9769, + "step": 226095 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.0001670586439588645, + "loss": 2.0458, + "step": 226100 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.00016705727282675996, + "loss": 2.133, + "step": 226105 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016705590167174746, + "loss": 2.086, + "step": 226110 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.0001670545304938274, + "loss": 2.0251, + "step": 226115 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016705315929300033, + "loss": 1.9418, + "step": 226120 + }, + { + "epoch": 0.53, + "grad_norm": 2.46875, + "learning_rate": 0.00016705178806926667, + "loss": 2.0869, + "step": 226125 + }, + { + "epoch": 0.53, + "grad_norm": 1.796875, + "learning_rate": 0.00016705041682262692, + "loss": 2.0858, + "step": 226130 + }, + { + "epoch": 0.53, + "grad_norm": 2.515625, + "learning_rate": 0.00016704904555308152, + "loss": 2.1162, + "step": 226135 + }, + { + "epoch": 0.53, + "grad_norm": 2.859375, + "learning_rate": 0.00016704767426063095, + "loss": 2.0443, + "step": 226140 + }, + { + "epoch": 0.53, + "grad_norm": 1.96875, + "learning_rate": 0.00016704630294527565, + "loss": 2.1018, + "step": 226145 + }, + { + "epoch": 0.53, + "grad_norm": 1.9921875, + "learning_rate": 0.00016704493160701618, + "loss": 2.0169, + "step": 226150 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.0001670435602458529, + "loss": 2.1431, + "step": 226155 + }, + { + "epoch": 0.53, + "grad_norm": 1.8984375, + "learning_rate": 0.00016704218886178632, + "loss": 2.1997, + "step": 226160 + }, + { + "epoch": 0.53, + "grad_norm": 2.578125, + "learning_rate": 0.00016704081745481694, + "loss": 2.1376, + "step": 226165 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016703944602494518, + "loss": 2.1503, + "step": 226170 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016703807457217154, + "loss": 2.0526, + "step": 226175 + }, + { + "epoch": 0.53, + "grad_norm": 1.96875, + "learning_rate": 0.0001670367030964965, + "loss": 2.0479, + "step": 226180 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.00016703533159792046, + "loss": 2.2134, + "step": 226185 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.000167033960076444, + "loss": 1.9799, + "step": 226190 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016703258853206744, + "loss": 2.1898, + "step": 226195 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016703121696479137, + "loss": 2.1148, + "step": 226200 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016702984537461625, + "loss": 2.2088, + "step": 226205 + }, + { + "epoch": 0.53, + "grad_norm": 1.96875, + "learning_rate": 0.00016702847376154248, + "loss": 2.2661, + "step": 226210 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016702710212557057, + "loss": 2.083, + "step": 226215 + }, + { + "epoch": 0.53, + "grad_norm": 1.9921875, + "learning_rate": 0.000167025730466701, + "loss": 2.1214, + "step": 226220 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016702435878493424, + "loss": 2.1803, + "step": 226225 + }, + { + "epoch": 0.53, + "grad_norm": 2.890625, + "learning_rate": 0.0001670229870802707, + "loss": 2.0806, + "step": 226230 + }, + { + "epoch": 0.53, + "grad_norm": 1.921875, + "learning_rate": 0.00016702161535271092, + "loss": 2.1935, + "step": 226235 + }, + { + "epoch": 0.53, + "grad_norm": 3.578125, + "learning_rate": 0.00016702024360225535, + "loss": 2.2326, + "step": 226240 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016701887182890443, + "loss": 2.088, + "step": 226245 + }, + { + "epoch": 0.53, + "grad_norm": 2.53125, + "learning_rate": 0.00016701750003265863, + "loss": 2.1104, + "step": 226250 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.0001670161282135185, + "loss": 2.1038, + "step": 226255 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016701475637148438, + "loss": 2.245, + "step": 226260 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016701338450655687, + "loss": 2.0126, + "step": 226265 + }, + { + "epoch": 0.53, + "grad_norm": 1.796875, + "learning_rate": 0.00016701201261873632, + "loss": 2.033, + "step": 226270 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016701064070802325, + "loss": 2.0731, + "step": 226275 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.00016700926877441817, + "loss": 1.9742, + "step": 226280 + }, + { + "epoch": 0.53, + "grad_norm": 1.859375, + "learning_rate": 0.00016700789681792146, + "loss": 2.2874, + "step": 226285 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016700652483853364, + "loss": 1.9707, + "step": 226290 + }, + { + "epoch": 0.53, + "grad_norm": 1.96875, + "learning_rate": 0.00016700515283625523, + "loss": 2.0702, + "step": 226295 + }, + { + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 0.00016700378081108662, + "loss": 2.1186, + "step": 226300 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016700240876302828, + "loss": 2.1816, + "step": 226305 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016700103669208073, + "loss": 2.0872, + "step": 226310 + }, + { + "epoch": 0.53, + "grad_norm": 2.6875, + "learning_rate": 0.0001669996645982444, + "loss": 2.0689, + "step": 226315 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016699829248151976, + "loss": 2.1493, + "step": 226320 + }, + { + "epoch": 0.53, + "grad_norm": 1.921875, + "learning_rate": 0.00016699692034190727, + "loss": 2.0269, + "step": 226325 + }, + { + "epoch": 0.53, + "grad_norm": 1.953125, + "learning_rate": 0.00016699554817940743, + "loss": 1.9816, + "step": 226330 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016699417599402074, + "loss": 2.1549, + "step": 226335 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016699280378574757, + "loss": 1.9599, + "step": 226340 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016699143155458847, + "loss": 2.0924, + "step": 226345 + }, + { + "epoch": 0.53, + "grad_norm": 1.8828125, + "learning_rate": 0.00016699005930054388, + "loss": 2.1541, + "step": 226350 + }, + { + "epoch": 0.53, + "grad_norm": 1.984375, + "learning_rate": 0.0001669886870236143, + "loss": 2.0595, + "step": 226355 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.0001669873147238001, + "loss": 2.0781, + "step": 226360 + }, + { + "epoch": 0.53, + "grad_norm": 2.40625, + "learning_rate": 0.00016698594240110185, + "loss": 2.161, + "step": 226365 + }, + { + "epoch": 0.53, + "grad_norm": 1.9609375, + "learning_rate": 0.00016698457005552003, + "loss": 2.2664, + "step": 226370 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.000166983197687055, + "loss": 2.1661, + "step": 226375 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016698182529570734, + "loss": 2.0139, + "step": 226380 + }, + { + "epoch": 0.53, + "grad_norm": 1.9140625, + "learning_rate": 0.00016698045288147744, + "loss": 2.0861, + "step": 226385 + }, + { + "epoch": 0.53, + "grad_norm": 1.90625, + "learning_rate": 0.00016697908044436584, + "loss": 2.1505, + "step": 226390 + }, + { + "epoch": 0.53, + "grad_norm": 2.484375, + "learning_rate": 0.00016697770798437293, + "loss": 2.3362, + "step": 226395 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016697633550149924, + "loss": 2.1799, + "step": 226400 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016697496299574523, + "loss": 2.0832, + "step": 226405 + }, + { + "epoch": 0.53, + "grad_norm": 2.6875, + "learning_rate": 0.00016697359046711135, + "loss": 2.0252, + "step": 226410 + }, + { + "epoch": 0.53, + "grad_norm": 1.8203125, + "learning_rate": 0.00016697221791559807, + "loss": 2.0737, + "step": 226415 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016697084534120587, + "loss": 2.1916, + "step": 226420 + }, + { + "epoch": 0.53, + "grad_norm": 1.78125, + "learning_rate": 0.00016696947274393522, + "loss": 1.9643, + "step": 226425 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.0001669681001237866, + "loss": 2.0293, + "step": 226430 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016696672748076042, + "loss": 2.2264, + "step": 226435 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016696535481485722, + "loss": 2.2, + "step": 226440 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.0001669639821260774, + "loss": 1.9523, + "step": 226445 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.00016696260941442153, + "loss": 2.2412, + "step": 226450 + }, + { + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 0.00016696123667989002, + "loss": 2.1204, + "step": 226455 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016695986392248327, + "loss": 2.1559, + "step": 226460 + }, + { + "epoch": 0.53, + "grad_norm": 1.8359375, + "learning_rate": 0.00016695849114220187, + "loss": 1.9523, + "step": 226465 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016695711833904618, + "loss": 2.1192, + "step": 226470 + }, + { + "epoch": 0.53, + "grad_norm": 1.8203125, + "learning_rate": 0.00016695574551301682, + "loss": 2.1241, + "step": 226475 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016695437266411406, + "loss": 2.2478, + "step": 226480 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.00016695299979233857, + "loss": 2.2378, + "step": 226485 + }, + { + "epoch": 0.53, + "grad_norm": 2.40625, + "learning_rate": 0.00016695162689769064, + "loss": 2.0636, + "step": 226490 + }, + { + "epoch": 0.53, + "grad_norm": 1.8515625, + "learning_rate": 0.00016695025398017084, + "loss": 2.0933, + "step": 226495 + }, + { + "epoch": 0.53, + "grad_norm": 1.859375, + "learning_rate": 0.00016694888103977964, + "loss": 2.0811, + "step": 226500 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.0001669475080765175, + "loss": 2.1952, + "step": 226505 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016694613509038483, + "loss": 2.1136, + "step": 226510 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016694476208138216, + "loss": 1.9957, + "step": 226515 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.00016694338904950996, + "loss": 1.9777, + "step": 226520 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.0001669420159947687, + "loss": 2.0851, + "step": 226525 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.0001669406429171588, + "loss": 2.3595, + "step": 226530 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016693926981668078, + "loss": 2.0292, + "step": 226535 + }, + { + "epoch": 0.53, + "grad_norm": 1.8203125, + "learning_rate": 0.0001669378966933351, + "loss": 1.938, + "step": 226540 + }, + { + "epoch": 0.53, + "grad_norm": 1.9921875, + "learning_rate": 0.00016693652354712218, + "loss": 2.0014, + "step": 226545 + }, + { + "epoch": 0.53, + "grad_norm": 2.40625, + "learning_rate": 0.00016693515037804256, + "loss": 2.1282, + "step": 226550 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.0001669337771860967, + "loss": 2.1429, + "step": 226555 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.000166932403971285, + "loss": 2.1206, + "step": 226560 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.000166931030733608, + "loss": 2.1515, + "step": 226565 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.00016692965747306616, + "loss": 2.0156, + "step": 226570 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016692828418965992, + "loss": 2.1701, + "step": 226575 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016692691088338973, + "loss": 2.1524, + "step": 226580 + }, + { + "epoch": 0.53, + "grad_norm": 2.390625, + "learning_rate": 0.00016692553755425616, + "loss": 1.9636, + "step": 226585 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016692416420225958, + "loss": 2.0361, + "step": 226590 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.00016692279082740049, + "loss": 2.1252, + "step": 226595 + }, + { + "epoch": 0.53, + "grad_norm": 1.8125, + "learning_rate": 0.00016692141742967936, + "loss": 2.1368, + "step": 226600 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016692004400909664, + "loss": 2.0423, + "step": 226605 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016691867056565287, + "loss": 2.0441, + "step": 226610 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016691729709934845, + "loss": 2.0176, + "step": 226615 + }, + { + "epoch": 0.53, + "grad_norm": 2.390625, + "learning_rate": 0.00016691592361018387, + "loss": 2.2291, + "step": 226620 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.00016691455009815956, + "loss": 2.2132, + "step": 226625 + }, + { + "epoch": 0.53, + "grad_norm": 1.9453125, + "learning_rate": 0.00016691317656327605, + "loss": 2.0603, + "step": 226630 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016691180300553379, + "loss": 2.0693, + "step": 226635 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016691042942493323, + "loss": 1.9251, + "step": 226640 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016690905582147488, + "loss": 2.0382, + "step": 226645 + }, + { + "epoch": 0.53, + "grad_norm": 2.3125, + "learning_rate": 0.0001669076821951592, + "loss": 2.1923, + "step": 226650 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.0001669063085459866, + "loss": 2.133, + "step": 226655 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016690493487395757, + "loss": 2.1227, + "step": 226660 + }, + { + "epoch": 0.53, + "grad_norm": 1.9296875, + "learning_rate": 0.00016690356117907268, + "loss": 2.1879, + "step": 226665 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016690218746133226, + "loss": 2.0603, + "step": 226670 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016690081372073686, + "loss": 2.127, + "step": 226675 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001668994399572869, + "loss": 2.0479, + "step": 226680 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.0001668980661709829, + "loss": 1.9758, + "step": 226685 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.00016689669236182532, + "loss": 2.1045, + "step": 226690 + }, + { + "epoch": 0.53, + "grad_norm": 1.90625, + "learning_rate": 0.0001668953185298146, + "loss": 1.9469, + "step": 226695 + }, + { + "epoch": 0.53, + "grad_norm": 2.4375, + "learning_rate": 0.00016689394467495124, + "loss": 2.1006, + "step": 226700 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016689257079723565, + "loss": 2.1683, + "step": 226705 + }, + { + "epoch": 0.53, + "grad_norm": 1.953125, + "learning_rate": 0.00016689119689666842, + "loss": 2.1942, + "step": 226710 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016688982297324989, + "loss": 2.0801, + "step": 226715 + }, + { + "epoch": 0.53, + "grad_norm": 1.8671875, + "learning_rate": 0.0001668884490269806, + "loss": 1.8688, + "step": 226720 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.000166887075057861, + "loss": 2.1138, + "step": 226725 + }, + { + "epoch": 0.53, + "grad_norm": 1.9765625, + "learning_rate": 0.00016688570106589156, + "loss": 1.9386, + "step": 226730 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016688432705107276, + "loss": 2.2483, + "step": 226735 + }, + { + "epoch": 0.53, + "grad_norm": 1.8359375, + "learning_rate": 0.00016688295301340507, + "loss": 2.0032, + "step": 226740 + }, + { + "epoch": 0.53, + "grad_norm": 2.453125, + "learning_rate": 0.00016688157895288893, + "loss": 2.0813, + "step": 226745 + }, + { + "epoch": 0.53, + "grad_norm": 2.484375, + "learning_rate": 0.00016688020486952483, + "loss": 2.1364, + "step": 226750 + }, + { + "epoch": 0.53, + "grad_norm": 2.515625, + "learning_rate": 0.00016687883076331326, + "loss": 2.0994, + "step": 226755 + }, + { + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 0.00016687745663425461, + "loss": 2.0584, + "step": 226760 + }, + { + "epoch": 0.53, + "grad_norm": 2.46875, + "learning_rate": 0.00016687608248234948, + "loss": 1.9834, + "step": 226765 + }, + { + "epoch": 0.53, + "grad_norm": 1.921875, + "learning_rate": 0.00016687470830759824, + "loss": 2.0564, + "step": 226770 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.00016687333411000136, + "loss": 2.1603, + "step": 226775 + }, + { + "epoch": 0.53, + "grad_norm": 1.8671875, + "learning_rate": 0.00016687195988955938, + "loss": 2.0419, + "step": 226780 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.0001668705856462727, + "loss": 2.0848, + "step": 226785 + }, + { + "epoch": 0.53, + "grad_norm": 2.546875, + "learning_rate": 0.00016686921138014182, + "loss": 2.0189, + "step": 226790 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.0001668678370911672, + "loss": 2.142, + "step": 226795 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016686646277934934, + "loss": 2.2631, + "step": 226800 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016686508844468868, + "loss": 2.1383, + "step": 226805 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.00016686371408718565, + "loss": 2.0895, + "step": 226810 + }, + { + "epoch": 0.53, + "grad_norm": 1.8984375, + "learning_rate": 0.00016686233970684077, + "loss": 2.2143, + "step": 226815 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.00016686096530365454, + "loss": 1.9039, + "step": 226820 + }, + { + "epoch": 0.53, + "grad_norm": 2.453125, + "learning_rate": 0.00016685959087762737, + "loss": 2.0736, + "step": 226825 + }, + { + "epoch": 0.53, + "grad_norm": 2.234375, + "learning_rate": 0.00016685821642875977, + "loss": 2.0676, + "step": 226830 + }, + { + "epoch": 0.53, + "grad_norm": 1.96875, + "learning_rate": 0.00016685684195705218, + "loss": 2.0491, + "step": 226835 + }, + { + "epoch": 0.53, + "grad_norm": 1.9453125, + "learning_rate": 0.00016685546746250505, + "loss": 2.0902, + "step": 226840 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.0001668540929451189, + "loss": 2.0073, + "step": 226845 + }, + { + "epoch": 0.53, + "grad_norm": 2.765625, + "learning_rate": 0.00016685271840489422, + "loss": 2.1626, + "step": 226850 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.0001668513438418314, + "loss": 2.1901, + "step": 226855 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016684996925593097, + "loss": 1.93, + "step": 226860 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016684859464719334, + "loss": 2.205, + "step": 226865 + }, + { + "epoch": 0.53, + "grad_norm": 1.953125, + "learning_rate": 0.00016684722001561903, + "loss": 2.202, + "step": 226870 + }, + { + "epoch": 0.53, + "grad_norm": 2.421875, + "learning_rate": 0.00016684584536120852, + "loss": 2.046, + "step": 226875 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.00016684447068396224, + "loss": 1.9277, + "step": 226880 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001668430959838807, + "loss": 2.119, + "step": 226885 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016684172126096432, + "loss": 2.079, + "step": 226890 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.00016684034651521357, + "loss": 2.1282, + "step": 226895 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.00016683897174662898, + "loss": 2.1136, + "step": 226900 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.000166837596955211, + "loss": 2.1413, + "step": 226905 + }, + { + "epoch": 0.53, + "grad_norm": 2.609375, + "learning_rate": 0.00016683622214096007, + "loss": 2.1099, + "step": 226910 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016683484730387667, + "loss": 2.1065, + "step": 226915 + }, + { + "epoch": 0.53, + "grad_norm": 1.828125, + "learning_rate": 0.00016683347244396125, + "loss": 1.987, + "step": 226920 + }, + { + "epoch": 0.53, + "grad_norm": 1.9140625, + "learning_rate": 0.00016683209756121435, + "loss": 1.9823, + "step": 226925 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.00016683072265563634, + "loss": 2.0962, + "step": 226930 + }, + { + "epoch": 0.53, + "grad_norm": 2.0625, + "learning_rate": 0.0001668293477272278, + "loss": 2.1155, + "step": 226935 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.0001668279727759891, + "loss": 2.2511, + "step": 226940 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016682659780192078, + "loss": 2.0853, + "step": 226945 + }, + { + "epoch": 0.53, + "grad_norm": 1.7421875, + "learning_rate": 0.0001668252228050233, + "loss": 2.0691, + "step": 226950 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016682384778529707, + "loss": 1.9802, + "step": 226955 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016682247274274264, + "loss": 1.9225, + "step": 226960 + }, + { + "epoch": 0.53, + "grad_norm": 2.359375, + "learning_rate": 0.00016682109767736042, + "loss": 1.9457, + "step": 226965 + }, + { + "epoch": 0.53, + "grad_norm": 1.796875, + "learning_rate": 0.00016681972258915088, + "loss": 2.122, + "step": 226970 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.0001668183474781145, + "loss": 2.2601, + "step": 226975 + }, + { + "epoch": 0.53, + "grad_norm": 1.9453125, + "learning_rate": 0.0001668169723442518, + "loss": 2.2112, + "step": 226980 + }, + { + "epoch": 0.53, + "grad_norm": 1.9765625, + "learning_rate": 0.00016681559718756322, + "loss": 2.2145, + "step": 226985 + }, + { + "epoch": 0.53, + "grad_norm": 1.9453125, + "learning_rate": 0.0001668142220080492, + "loss": 2.094, + "step": 226990 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.00016681284680571023, + "loss": 2.0966, + "step": 226995 + }, + { + "epoch": 0.53, + "grad_norm": 2.46875, + "learning_rate": 0.00016681147158054675, + "loss": 2.1887, + "step": 227000 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.00016681009633255932, + "loss": 2.0379, + "step": 227005 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016680872106174832, + "loss": 2.1464, + "step": 227010 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 0.00016680734576811424, + "loss": 2.0195, + "step": 227015 + }, + { + "epoch": 0.53, + "grad_norm": 1.953125, + "learning_rate": 0.00016680597045165758, + "loss": 2.2449, + "step": 227020 + }, + { + "epoch": 0.53, + "grad_norm": 2.40625, + "learning_rate": 0.00016680459511237878, + "loss": 2.1593, + "step": 227025 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.0001668032197502783, + "loss": 1.9733, + "step": 227030 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016680184436535664, + "loss": 1.936, + "step": 227035 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016680046895761427, + "loss": 1.9377, + "step": 227040 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016679909352705166, + "loss": 2.1622, + "step": 227045 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016679771807366923, + "loss": 2.2319, + "step": 227050 + }, + { + "epoch": 0.53, + "grad_norm": 2.390625, + "learning_rate": 0.0001667963425974675, + "loss": 1.9923, + "step": 227055 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016679496709844695, + "loss": 2.1608, + "step": 227060 + }, + { + "epoch": 0.53, + "grad_norm": 2.59375, + "learning_rate": 0.000166793591576608, + "loss": 2.0086, + "step": 227065 + }, + { + "epoch": 0.53, + "grad_norm": 1.984375, + "learning_rate": 0.0001667922160319512, + "loss": 1.7817, + "step": 227070 + }, + { + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 0.0001667908404644769, + "loss": 1.7801, + "step": 227075 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016678946487418567, + "loss": 1.953, + "step": 227080 + }, + { + "epoch": 0.53, + "grad_norm": 2.296875, + "learning_rate": 0.00016678808926107793, + "loss": 2.1241, + "step": 227085 + }, + { + "epoch": 0.53, + "grad_norm": 2.578125, + "learning_rate": 0.00016678671362515418, + "loss": 2.1178, + "step": 227090 + }, + { + "epoch": 0.53, + "grad_norm": 1.96875, + "learning_rate": 0.0001667853379664149, + "loss": 2.0946, + "step": 227095 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.0001667839622848605, + "loss": 2.177, + "step": 227100 + }, + { + "epoch": 0.53, + "grad_norm": 2.8125, + "learning_rate": 0.0001667825865804915, + "loss": 2.1575, + "step": 227105 + }, + { + "epoch": 0.53, + "grad_norm": 2.28125, + "learning_rate": 0.00016678121085330835, + "loss": 2.1636, + "step": 227110 + }, + { + "epoch": 0.53, + "grad_norm": 2.625, + "learning_rate": 0.00016677983510331155, + "loss": 2.0585, + "step": 227115 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.00016677845933050155, + "loss": 2.1594, + "step": 227120 + }, + { + "epoch": 0.53, + "grad_norm": 2.375, + "learning_rate": 0.0001667770835348788, + "loss": 2.0439, + "step": 227125 + }, + { + "epoch": 0.53, + "grad_norm": 2.1875, + "learning_rate": 0.0001667757077164438, + "loss": 2.2852, + "step": 227130 + }, + { + "epoch": 0.53, + "grad_norm": 2.015625, + "learning_rate": 0.000166774331875197, + "loss": 2.1423, + "step": 227135 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016677295601113886, + "loss": 1.9414, + "step": 227140 + }, + { + "epoch": 0.53, + "grad_norm": 2.09375, + "learning_rate": 0.00016677158012426988, + "loss": 1.9752, + "step": 227145 + }, + { + "epoch": 0.53, + "grad_norm": 2.59375, + "learning_rate": 0.00016677020421459053, + "loss": 2.1755, + "step": 227150 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.00016676882828210124, + "loss": 2.1111, + "step": 227155 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016676745232680254, + "loss": 2.2083, + "step": 227160 + }, + { + "epoch": 0.53, + "grad_norm": 1.9609375, + "learning_rate": 0.00016676607634869483, + "loss": 2.1951, + "step": 227165 + }, + { + "epoch": 0.53, + "grad_norm": 1.9375, + "learning_rate": 0.00016676470034777865, + "loss": 2.1697, + "step": 227170 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.0001667633243240544, + "loss": 2.1274, + "step": 227175 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016676194827752264, + "loss": 2.1706, + "step": 227180 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.00016676057220818376, + "loss": 2.0595, + "step": 227185 + }, + { + "epoch": 0.53, + "grad_norm": 2.109375, + "learning_rate": 0.00016675919611603825, + "loss": 2.1628, + "step": 227190 + }, + { + "epoch": 0.53, + "grad_norm": 1.90625, + "learning_rate": 0.0001667578200010866, + "loss": 2.1899, + "step": 227195 + }, + { + "epoch": 0.53, + "grad_norm": 2.34375, + "learning_rate": 0.00016675644386332926, + "loss": 2.074, + "step": 227200 + }, + { + "epoch": 0.53, + "grad_norm": 2.71875, + "learning_rate": 0.00016675506770276672, + "loss": 2.1432, + "step": 227205 + }, + { + "epoch": 0.53, + "grad_norm": 2.25, + "learning_rate": 0.00016675369151939942, + "loss": 2.1337, + "step": 227210 + }, + { + "epoch": 0.53, + "grad_norm": 2.03125, + "learning_rate": 0.00016675231531322786, + "loss": 2.1212, + "step": 227215 + }, + { + "epoch": 0.53, + "grad_norm": 1.7265625, + "learning_rate": 0.00016675093908425249, + "loss": 2.2207, + "step": 227220 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.00016674956283247382, + "loss": 2.1427, + "step": 227225 + }, + { + "epoch": 0.53, + "grad_norm": 2.390625, + "learning_rate": 0.00016674818655789225, + "loss": 2.0407, + "step": 227230 + }, + { + "epoch": 0.53, + "grad_norm": 2.46875, + "learning_rate": 0.00016674681026050833, + "loss": 2.0663, + "step": 227235 + }, + { + "epoch": 0.53, + "grad_norm": 2.046875, + "learning_rate": 0.00016674543394032244, + "loss": 2.1355, + "step": 227240 + }, + { + "epoch": 0.53, + "grad_norm": 1.84375, + "learning_rate": 0.00016674405759733513, + "loss": 1.9899, + "step": 227245 + }, + { + "epoch": 0.53, + "grad_norm": 2.078125, + "learning_rate": 0.0001667426812315468, + "loss": 2.0177, + "step": 227250 + }, + { + "epoch": 0.53, + "grad_norm": 2.15625, + "learning_rate": 0.00016674130484295801, + "loss": 2.0063, + "step": 227255 + }, + { + "epoch": 0.53, + "grad_norm": 1.828125, + "learning_rate": 0.00016673992843156915, + "loss": 1.936, + "step": 227260 + }, + { + "epoch": 0.53, + "grad_norm": 1.9296875, + "learning_rate": 0.00016673855199738076, + "loss": 2.0558, + "step": 227265 + }, + { + "epoch": 0.53, + "grad_norm": 1.9765625, + "learning_rate": 0.0001667371755403932, + "loss": 2.0761, + "step": 227270 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016673579906060707, + "loss": 2.0173, + "step": 227275 + }, + { + "epoch": 0.53, + "grad_norm": 1.890625, + "learning_rate": 0.00016673442255802279, + "loss": 2.1777, + "step": 227280 + }, + { + "epoch": 0.53, + "grad_norm": 1.7421875, + "learning_rate": 0.0001667330460326408, + "loss": 2.0502, + "step": 227285 + }, + { + "epoch": 0.53, + "grad_norm": 2.203125, + "learning_rate": 0.00016673166948446156, + "loss": 2.2279, + "step": 227290 + }, + { + "epoch": 0.53, + "grad_norm": 1.984375, + "learning_rate": 0.00016673029291348562, + "loss": 2.0584, + "step": 227295 + }, + { + "epoch": 0.53, + "grad_norm": 1.9375, + "learning_rate": 0.00016672891631971335, + "loss": 2.0024, + "step": 227300 + }, + { + "epoch": 0.53, + "grad_norm": 1.9609375, + "learning_rate": 0.00016672753970314533, + "loss": 1.9443, + "step": 227305 + }, + { + "epoch": 0.53, + "grad_norm": 2.21875, + "learning_rate": 0.00016672616306378192, + "loss": 2.1164, + "step": 227310 + }, + { + "epoch": 0.53, + "grad_norm": 2.0, + "learning_rate": 0.00016672478640162366, + "loss": 2.1258, + "step": 227315 + }, + { + "epoch": 0.53, + "grad_norm": 2.140625, + "learning_rate": 0.000166723409716671, + "loss": 1.9154, + "step": 227320 + }, + { + "epoch": 0.53, + "grad_norm": 2.171875, + "learning_rate": 0.00016672203300892443, + "loss": 1.9741, + "step": 227325 + }, + { + "epoch": 0.53, + "grad_norm": 2.125, + "learning_rate": 0.00016672065627838437, + "loss": 2.2499, + "step": 227330 + }, + { + "epoch": 0.53, + "grad_norm": 1.8125, + "learning_rate": 0.00016671927952505137, + "loss": 2.0222, + "step": 227335 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016671790274892583, + "loss": 2.0583, + "step": 227340 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016671652595000826, + "loss": 2.0529, + "step": 227345 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.0001667151491282991, + "loss": 1.964, + "step": 227350 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016671377228379884, + "loss": 2.1319, + "step": 227355 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.00016671239541650793, + "loss": 1.9464, + "step": 227360 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016671101852642686, + "loss": 2.1487, + "step": 227365 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016670964161355611, + "loss": 2.038, + "step": 227370 + }, + { + "epoch": 0.54, + "grad_norm": 1.8359375, + "learning_rate": 0.00016670826467789614, + "loss": 2.0674, + "step": 227375 + }, + { + "epoch": 0.54, + "grad_norm": 1.8828125, + "learning_rate": 0.0001667068877194474, + "loss": 2.1769, + "step": 227380 + }, + { + "epoch": 0.54, + "grad_norm": 1.8984375, + "learning_rate": 0.0001667055107382104, + "loss": 2.1633, + "step": 227385 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016670413373418556, + "loss": 2.0524, + "step": 227390 + }, + { + "epoch": 0.54, + "grad_norm": 1.8984375, + "learning_rate": 0.00016670275670737337, + "loss": 2.213, + "step": 227395 + }, + { + "epoch": 0.54, + "grad_norm": 1.875, + "learning_rate": 0.00016670137965777438, + "loss": 2.0188, + "step": 227400 + }, + { + "epoch": 0.54, + "grad_norm": 3.234375, + "learning_rate": 0.00016670000258538892, + "loss": 2.0074, + "step": 227405 + }, + { + "epoch": 0.54, + "grad_norm": 1.9609375, + "learning_rate": 0.00016669862549021752, + "loss": 2.1091, + "step": 227410 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.0001666972483722607, + "loss": 2.1829, + "step": 227415 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016669587123151888, + "loss": 2.0781, + "step": 227420 + }, + { + "epoch": 0.54, + "grad_norm": 1.9375, + "learning_rate": 0.00016669449406799255, + "loss": 2.2238, + "step": 227425 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016669311688168213, + "loss": 2.0045, + "step": 227430 + }, + { + "epoch": 0.54, + "grad_norm": 2.5625, + "learning_rate": 0.0001666917396725882, + "loss": 2.1803, + "step": 227435 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.0001666903624407111, + "loss": 2.0761, + "step": 227440 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.0001666889851860514, + "loss": 2.1402, + "step": 227445 + }, + { + "epoch": 0.54, + "grad_norm": 1.703125, + "learning_rate": 0.00016668760790860954, + "loss": 2.0707, + "step": 227450 + }, + { + "epoch": 0.54, + "grad_norm": 1.890625, + "learning_rate": 0.00016668623060838594, + "loss": 1.9634, + "step": 227455 + }, + { + "epoch": 0.54, + "grad_norm": 1.8515625, + "learning_rate": 0.00016668485328538113, + "loss": 2.0619, + "step": 227460 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016668347593959556, + "loss": 2.0075, + "step": 227465 + }, + { + "epoch": 0.54, + "grad_norm": 1.9453125, + "learning_rate": 0.00016668209857102973, + "loss": 2.1519, + "step": 227470 + }, + { + "epoch": 0.54, + "grad_norm": 2.03125, + "learning_rate": 0.00016668072117968408, + "loss": 2.0424, + "step": 227475 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016667934376555908, + "loss": 2.0645, + "step": 227480 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.0001666779663286552, + "loss": 2.0424, + "step": 227485 + }, + { + "epoch": 0.54, + "grad_norm": 1.9296875, + "learning_rate": 0.00016667658886897294, + "loss": 1.9815, + "step": 227490 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016667521138651273, + "loss": 2.1869, + "step": 227495 + }, + { + "epoch": 0.54, + "grad_norm": 2.390625, + "learning_rate": 0.00016667383388127503, + "loss": 2.2342, + "step": 227500 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016667245635326042, + "loss": 2.0567, + "step": 227505 + }, + { + "epoch": 0.54, + "grad_norm": 1.703125, + "learning_rate": 0.0001666710788024692, + "loss": 2.0429, + "step": 227510 + }, + { + "epoch": 0.54, + "grad_norm": 2.53125, + "learning_rate": 0.00016666970122890197, + "loss": 2.2557, + "step": 227515 + }, + { + "epoch": 0.54, + "grad_norm": 2.03125, + "learning_rate": 0.00016666832363255917, + "loss": 1.9959, + "step": 227520 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.00016666694601344127, + "loss": 2.0976, + "step": 227525 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.0001666655683715487, + "loss": 2.1221, + "step": 227530 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016666419070688197, + "loss": 2.1762, + "step": 227535 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016666281301944155, + "loss": 2.1346, + "step": 227540 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.0001666614353092279, + "loss": 1.9221, + "step": 227545 + }, + { + "epoch": 0.54, + "grad_norm": 3.125, + "learning_rate": 0.00016666005757624152, + "loss": 2.2234, + "step": 227550 + }, + { + "epoch": 0.54, + "grad_norm": 1.859375, + "learning_rate": 0.00016665867982048281, + "loss": 2.154, + "step": 227555 + }, + { + "epoch": 0.54, + "grad_norm": 2.59375, + "learning_rate": 0.00016665730204195234, + "loss": 2.2444, + "step": 227560 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.0001666559242406505, + "loss": 2.0649, + "step": 227565 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016665454641657775, + "loss": 2.0758, + "step": 227570 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.00016665316856973465, + "loss": 2.21, + "step": 227575 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016665179070012162, + "loss": 2.1177, + "step": 227580 + }, + { + "epoch": 0.54, + "grad_norm": 1.7734375, + "learning_rate": 0.0001666504128077391, + "loss": 2.0402, + "step": 227585 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.0001666490348925876, + "loss": 2.0689, + "step": 227590 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.0001666476569546676, + "loss": 2.1132, + "step": 227595 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.0001666462789939795, + "loss": 2.1207, + "step": 227600 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016664490101052387, + "loss": 2.0266, + "step": 227605 + }, + { + "epoch": 0.54, + "grad_norm": 1.96875, + "learning_rate": 0.00016664352300430112, + "loss": 2.144, + "step": 227610 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016664214497531173, + "loss": 2.1462, + "step": 227615 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.0001666407669235562, + "loss": 2.0952, + "step": 227620 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016663938884903495, + "loss": 2.1096, + "step": 227625 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.00016663801075174846, + "loss": 2.0823, + "step": 227630 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016663663263169725, + "loss": 1.9739, + "step": 227635 + }, + { + "epoch": 0.54, + "grad_norm": 2.390625, + "learning_rate": 0.00016663525448888176, + "loss": 2.0601, + "step": 227640 + }, + { + "epoch": 0.54, + "grad_norm": 2.578125, + "learning_rate": 0.00016663387632330243, + "loss": 2.0495, + "step": 227645 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.00016663249813495977, + "loss": 2.0749, + "step": 227650 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016663111992385425, + "loss": 2.2324, + "step": 227655 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016662974168998632, + "loss": 2.0547, + "step": 227660 + }, + { + "epoch": 0.54, + "grad_norm": 2.53125, + "learning_rate": 0.00016662836343335648, + "loss": 2.0075, + "step": 227665 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016662698515396515, + "loss": 2.2086, + "step": 227670 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016662560685181286, + "loss": 2.2882, + "step": 227675 + }, + { + "epoch": 0.54, + "grad_norm": 1.9140625, + "learning_rate": 0.00016662422852690005, + "loss": 1.964, + "step": 227680 + }, + { + "epoch": 0.54, + "grad_norm": 1.8515625, + "learning_rate": 0.00016662285017922718, + "loss": 2.0905, + "step": 227685 + }, + { + "epoch": 0.54, + "grad_norm": 1.921875, + "learning_rate": 0.00016662147180879474, + "loss": 1.98, + "step": 227690 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016662009341560322, + "loss": 2.057, + "step": 227695 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016661871499965304, + "loss": 1.992, + "step": 227700 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.0001666173365609447, + "loss": 1.9822, + "step": 227705 + }, + { + "epoch": 0.54, + "grad_norm": 2.390625, + "learning_rate": 0.0001666159580994787, + "loss": 2.1015, + "step": 227710 + }, + { + "epoch": 0.54, + "grad_norm": 1.7734375, + "learning_rate": 0.0001666145796152555, + "loss": 2.0941, + "step": 227715 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016661320110827548, + "loss": 1.9997, + "step": 227720 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.0001666118225785392, + "loss": 1.9329, + "step": 227725 + }, + { + "epoch": 0.54, + "grad_norm": 2.515625, + "learning_rate": 0.00016661044402604716, + "loss": 2.09, + "step": 227730 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.00016660906545079972, + "loss": 2.1058, + "step": 227735 + }, + { + "epoch": 0.54, + "grad_norm": 1.984375, + "learning_rate": 0.00016660768685279747, + "loss": 2.0707, + "step": 227740 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.00016660630823204083, + "loss": 1.9808, + "step": 227745 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.0001666049295885302, + "loss": 2.0797, + "step": 227750 + }, + { + "epoch": 0.54, + "grad_norm": 2.09375, + "learning_rate": 0.00016660355092226617, + "loss": 2.0289, + "step": 227755 + }, + { + "epoch": 0.54, + "grad_norm": 2.3125, + "learning_rate": 0.00016660217223324914, + "loss": 2.1408, + "step": 227760 + }, + { + "epoch": 0.54, + "grad_norm": 3.09375, + "learning_rate": 0.0001666007935214796, + "loss": 2.1035, + "step": 227765 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016659941478695806, + "loss": 2.102, + "step": 227770 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.0001665980360296849, + "loss": 2.2273, + "step": 227775 + }, + { + "epoch": 0.54, + "grad_norm": 1.9140625, + "learning_rate": 0.00016659665724966068, + "loss": 2.1798, + "step": 227780 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016659527844688581, + "loss": 2.288, + "step": 227785 + }, + { + "epoch": 0.54, + "grad_norm": 1.8671875, + "learning_rate": 0.0001665938996213608, + "loss": 1.9418, + "step": 227790 + }, + { + "epoch": 0.54, + "grad_norm": 1.90625, + "learning_rate": 0.0001665925207730861, + "loss": 2.0359, + "step": 227795 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.0001665911419020622, + "loss": 2.1148, + "step": 227800 + }, + { + "epoch": 0.54, + "grad_norm": 2.65625, + "learning_rate": 0.00016658976300828955, + "loss": 2.0962, + "step": 227805 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016658838409176863, + "loss": 2.0381, + "step": 227810 + }, + { + "epoch": 0.54, + "grad_norm": 1.6953125, + "learning_rate": 0.0001665870051524999, + "loss": 2.0692, + "step": 227815 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016658562619048384, + "loss": 2.0239, + "step": 227820 + }, + { + "epoch": 0.54, + "grad_norm": 2.03125, + "learning_rate": 0.00016658424720572096, + "loss": 2.2928, + "step": 227825 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.00016658286819821162, + "loss": 2.284, + "step": 227830 + }, + { + "epoch": 0.54, + "grad_norm": 1.8828125, + "learning_rate": 0.00016658148916795642, + "loss": 2.0192, + "step": 227835 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016658011011495575, + "loss": 2.1866, + "step": 227840 + }, + { + "epoch": 0.54, + "grad_norm": 1.921875, + "learning_rate": 0.00016657873103921014, + "loss": 2.126, + "step": 227845 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016657735194072, + "loss": 2.085, + "step": 227850 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016657597281948583, + "loss": 2.1276, + "step": 227855 + }, + { + "epoch": 0.54, + "grad_norm": 1.9453125, + "learning_rate": 0.0001665745936755081, + "loss": 2.1104, + "step": 227860 + }, + { + "epoch": 0.54, + "grad_norm": 2.828125, + "learning_rate": 0.0001665732145087873, + "loss": 2.089, + "step": 227865 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016657183531932387, + "loss": 1.9451, + "step": 227870 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.0001665704561071183, + "loss": 2.1231, + "step": 227875 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.00016656907687217105, + "loss": 2.0101, + "step": 227880 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.0001665676976144826, + "loss": 1.9738, + "step": 227885 + }, + { + "epoch": 0.54, + "grad_norm": 2.4375, + "learning_rate": 0.00016656631833405342, + "loss": 2.0802, + "step": 227890 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016656493903088396, + "loss": 2.0845, + "step": 227895 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016656355970497472, + "loss": 2.1788, + "step": 227900 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016656218035632616, + "loss": 1.8256, + "step": 227905 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016656080098493873, + "loss": 1.9537, + "step": 227910 + }, + { + "epoch": 0.54, + "grad_norm": 1.8359375, + "learning_rate": 0.00016655942159081298, + "loss": 1.9783, + "step": 227915 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.0001665580421739493, + "loss": 1.9067, + "step": 227920 + }, + { + "epoch": 0.54, + "grad_norm": 1.78125, + "learning_rate": 0.0001665566627343482, + "loss": 2.1108, + "step": 227925 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016655528327201008, + "loss": 1.8994, + "step": 227930 + }, + { + "epoch": 0.54, + "grad_norm": 1.734375, + "learning_rate": 0.0001665539037869355, + "loss": 2.1909, + "step": 227935 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.0001665525242791249, + "loss": 2.1336, + "step": 227940 + }, + { + "epoch": 0.54, + "grad_norm": 1.96875, + "learning_rate": 0.00016655114474857876, + "loss": 2.2849, + "step": 227945 + }, + { + "epoch": 0.54, + "grad_norm": 1.84375, + "learning_rate": 0.00016654976519529755, + "loss": 2.0084, + "step": 227950 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.0001665483856192817, + "loss": 2.1381, + "step": 227955 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016654700602053175, + "loss": 2.1662, + "step": 227960 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.0001665456263990481, + "loss": 2.2582, + "step": 227965 + }, + { + "epoch": 0.54, + "grad_norm": 1.8828125, + "learning_rate": 0.0001665442467548313, + "loss": 2.2118, + "step": 227970 + }, + { + "epoch": 0.54, + "grad_norm": 2.828125, + "learning_rate": 0.00016654286708788174, + "loss": 1.9994, + "step": 227975 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016654148739819996, + "loss": 2.2805, + "step": 227980 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016654010768578638, + "loss": 2.1217, + "step": 227985 + }, + { + "epoch": 0.54, + "grad_norm": 2.578125, + "learning_rate": 0.00016653872795064148, + "loss": 2.2538, + "step": 227990 + }, + { + "epoch": 0.54, + "grad_norm": 2.09375, + "learning_rate": 0.00016653734819276577, + "loss": 2.0518, + "step": 227995 + }, + { + "epoch": 0.54, + "grad_norm": 2.03125, + "learning_rate": 0.00016653596841215967, + "loss": 1.9322, + "step": 228000 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.0001665345886088237, + "loss": 2.1247, + "step": 228005 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016653320878275832, + "loss": 2.1693, + "step": 228010 + }, + { + "epoch": 0.54, + "grad_norm": 2.765625, + "learning_rate": 0.00016653182893396396, + "loss": 2.0958, + "step": 228015 + }, + { + "epoch": 0.54, + "grad_norm": 1.9453125, + "learning_rate": 0.00016653044906244114, + "loss": 2.1548, + "step": 228020 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016652906916819028, + "loss": 2.1922, + "step": 228025 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.00016652768925121193, + "loss": 2.2441, + "step": 228030 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.0001665263093115065, + "loss": 2.2523, + "step": 228035 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016652492934907446, + "loss": 2.0326, + "step": 228040 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.0001665235493639163, + "loss": 2.2575, + "step": 228045 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016652216935603248, + "loss": 1.9908, + "step": 228050 + }, + { + "epoch": 0.54, + "grad_norm": 1.90625, + "learning_rate": 0.0001665207893254235, + "loss": 2.1259, + "step": 228055 + }, + { + "epoch": 0.54, + "grad_norm": 3.703125, + "learning_rate": 0.00016651940927208983, + "loss": 2.1646, + "step": 228060 + }, + { + "epoch": 0.54, + "grad_norm": 1.890625, + "learning_rate": 0.0001665180291960319, + "loss": 2.2283, + "step": 228065 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.0001665166490972502, + "loss": 2.0997, + "step": 228070 + }, + { + "epoch": 0.54, + "grad_norm": 2.03125, + "learning_rate": 0.00016651526897574521, + "loss": 2.0167, + "step": 228075 + }, + { + "epoch": 0.54, + "grad_norm": 1.984375, + "learning_rate": 0.00016651388883151736, + "loss": 2.1016, + "step": 228080 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016651250866456723, + "loss": 2.0014, + "step": 228085 + }, + { + "epoch": 0.54, + "grad_norm": 2.609375, + "learning_rate": 0.00016651112847489518, + "loss": 1.9704, + "step": 228090 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016650974826250175, + "loss": 2.2007, + "step": 228095 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016650836802738735, + "loss": 2.224, + "step": 228100 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.0001665069877695525, + "loss": 1.9737, + "step": 228105 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016650560748899767, + "loss": 2.2061, + "step": 228110 + }, + { + "epoch": 0.54, + "grad_norm": 2.453125, + "learning_rate": 0.0001665042271857233, + "loss": 2.1192, + "step": 228115 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016650284685972987, + "loss": 2.2966, + "step": 228120 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016650146651101788, + "loss": 2.2105, + "step": 228125 + }, + { + "epoch": 0.54, + "grad_norm": 1.9921875, + "learning_rate": 0.0001665000861395878, + "loss": 1.9974, + "step": 228130 + }, + { + "epoch": 0.54, + "grad_norm": 1.9296875, + "learning_rate": 0.00016649870574544004, + "loss": 2.2312, + "step": 228135 + }, + { + "epoch": 0.54, + "grad_norm": 1.875, + "learning_rate": 0.00016649732532857513, + "loss": 1.9468, + "step": 228140 + }, + { + "epoch": 0.54, + "grad_norm": 2.3125, + "learning_rate": 0.00016649594488899356, + "loss": 2.1854, + "step": 228145 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016649456442669575, + "loss": 2.2386, + "step": 228150 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016649318394168218, + "loss": 2.1919, + "step": 228155 + }, + { + "epoch": 0.54, + "grad_norm": 1.9140625, + "learning_rate": 0.00016649180343395335, + "loss": 2.0821, + "step": 228160 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.00016649042290350972, + "loss": 2.0096, + "step": 228165 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.0001664890423503517, + "loss": 2.1628, + "step": 228170 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.0001664876617744799, + "loss": 1.9984, + "step": 228175 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016648628117589464, + "loss": 2.0917, + "step": 228180 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.0001664849005545965, + "loss": 2.1929, + "step": 228185 + }, + { + "epoch": 0.54, + "grad_norm": 2.390625, + "learning_rate": 0.0001664835199105859, + "loss": 1.9217, + "step": 228190 + }, + { + "epoch": 0.54, + "grad_norm": 1.8203125, + "learning_rate": 0.00016648213924386332, + "loss": 2.034, + "step": 228195 + }, + { + "epoch": 0.54, + "grad_norm": 1.828125, + "learning_rate": 0.00016648075855442925, + "loss": 2.1661, + "step": 228200 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.0001664793778422841, + "loss": 2.0274, + "step": 228205 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016647799710742844, + "loss": 2.1258, + "step": 228210 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.00016647661634986268, + "loss": 1.8329, + "step": 228215 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.0001664752355695873, + "loss": 2.0554, + "step": 228220 + }, + { + "epoch": 0.54, + "grad_norm": 2.453125, + "learning_rate": 0.00016647385476660278, + "loss": 2.1641, + "step": 228225 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016647247394090958, + "loss": 1.7879, + "step": 228230 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016647109309250813, + "loss": 2.1709, + "step": 228235 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016646971222139903, + "loss": 2.1197, + "step": 228240 + }, + { + "epoch": 0.54, + "grad_norm": 1.7890625, + "learning_rate": 0.00016646833132758263, + "loss": 2.1874, + "step": 228245 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016646695041105947, + "loss": 2.0846, + "step": 228250 + }, + { + "epoch": 0.54, + "grad_norm": 1.765625, + "learning_rate": 0.00016646556947182996, + "loss": 2.2033, + "step": 228255 + }, + { + "epoch": 0.54, + "grad_norm": 1.8359375, + "learning_rate": 0.00016646418850989458, + "loss": 1.889, + "step": 228260 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.0001664628075252539, + "loss": 2.0373, + "step": 228265 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016646142651790825, + "loss": 2.2566, + "step": 228270 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016646004548785822, + "loss": 2.2838, + "step": 228275 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016645866443510424, + "loss": 2.127, + "step": 228280 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016645728335964674, + "loss": 2.0681, + "step": 228285 + }, + { + "epoch": 0.54, + "grad_norm": 1.9453125, + "learning_rate": 0.00016645590226148623, + "loss": 2.1174, + "step": 228290 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.0001664545211406232, + "loss": 2.0658, + "step": 228295 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016645313999705808, + "loss": 2.1833, + "step": 228300 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.0001664517588307914, + "loss": 2.1213, + "step": 228305 + }, + { + "epoch": 0.54, + "grad_norm": 2.40625, + "learning_rate": 0.00016645037764182353, + "loss": 2.0624, + "step": 228310 + }, + { + "epoch": 0.54, + "grad_norm": 2.453125, + "learning_rate": 0.00016644899643015505, + "loss": 2.1605, + "step": 228315 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.0001664476151957864, + "loss": 2.0021, + "step": 228320 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.000166446233938718, + "loss": 2.0339, + "step": 228325 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.00016644485265895038, + "loss": 2.1268, + "step": 228330 + }, + { + "epoch": 0.54, + "grad_norm": 2.3125, + "learning_rate": 0.00016644347135648401, + "loss": 2.0063, + "step": 228335 + }, + { + "epoch": 0.54, + "grad_norm": 1.8203125, + "learning_rate": 0.00016644209003131933, + "loss": 1.9588, + "step": 228340 + }, + { + "epoch": 0.54, + "grad_norm": 2.796875, + "learning_rate": 0.00016644070868345682, + "loss": 2.0213, + "step": 228345 + }, + { + "epoch": 0.54, + "grad_norm": 1.9921875, + "learning_rate": 0.00016643932731289697, + "loss": 1.9878, + "step": 228350 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016643794591964022, + "loss": 2.2331, + "step": 228355 + }, + { + "epoch": 0.54, + "grad_norm": 1.9609375, + "learning_rate": 0.0001664365645036871, + "loss": 2.0962, + "step": 228360 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.000166435183065038, + "loss": 2.2843, + "step": 228365 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016643380160369348, + "loss": 2.1842, + "step": 228370 + }, + { + "epoch": 0.54, + "grad_norm": 2.03125, + "learning_rate": 0.00016643242011965393, + "loss": 2.022, + "step": 228375 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.0001664310386129199, + "loss": 1.9995, + "step": 228380 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016642965708349179, + "loss": 2.1455, + "step": 228385 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.0001664282755313701, + "loss": 2.2716, + "step": 228390 + }, + { + "epoch": 0.54, + "grad_norm": 1.8671875, + "learning_rate": 0.00016642689395655533, + "loss": 2.0366, + "step": 228395 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016642551235904792, + "loss": 2.0516, + "step": 228400 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016642413073884838, + "loss": 2.0226, + "step": 228405 + }, + { + "epoch": 0.54, + "grad_norm": 2.546875, + "learning_rate": 0.0001664227490959571, + "loss": 2.2638, + "step": 228410 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.00016642136743037463, + "loss": 2.1715, + "step": 228415 + }, + { + "epoch": 0.54, + "grad_norm": 1.7421875, + "learning_rate": 0.0001664199857421014, + "loss": 2.0386, + "step": 228420 + }, + { + "epoch": 0.54, + "grad_norm": 1.921875, + "learning_rate": 0.00016641860403113792, + "loss": 2.0321, + "step": 228425 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016641722229748465, + "loss": 2.1283, + "step": 228430 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.000166415840541142, + "loss": 2.2379, + "step": 228435 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016641445876211056, + "loss": 2.1502, + "step": 228440 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.0001664130769603907, + "loss": 1.9696, + "step": 228445 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016641169513598292, + "loss": 2.0434, + "step": 228450 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016641031328888773, + "loss": 1.9799, + "step": 228455 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.00016640893141910555, + "loss": 2.0192, + "step": 228460 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016640754952663688, + "loss": 2.2068, + "step": 228465 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016640616761148217, + "loss": 2.0303, + "step": 228470 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016640478567364194, + "loss": 2.1259, + "step": 228475 + }, + { + "epoch": 0.54, + "grad_norm": 1.9921875, + "learning_rate": 0.00016640340371311662, + "loss": 2.0382, + "step": 228480 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.0001664020217299067, + "loss": 2.0533, + "step": 228485 + }, + { + "epoch": 0.54, + "grad_norm": 1.859375, + "learning_rate": 0.00016640063972401262, + "loss": 2.247, + "step": 228490 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016639925769543491, + "loss": 2.1207, + "step": 228495 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.00016639787564417396, + "loss": 2.1029, + "step": 228500 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.00016639649357023034, + "loss": 2.0737, + "step": 228505 + }, + { + "epoch": 0.54, + "grad_norm": 1.875, + "learning_rate": 0.00016639511147360447, + "loss": 2.1207, + "step": 228510 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.0001663937293542968, + "loss": 2.1242, + "step": 228515 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016639234721230786, + "loss": 2.0541, + "step": 228520 + }, + { + "epoch": 0.54, + "grad_norm": 1.78125, + "learning_rate": 0.00016639096504763806, + "loss": 2.1884, + "step": 228525 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.0001663895828602879, + "loss": 2.07, + "step": 228530 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016638820065025786, + "loss": 2.3075, + "step": 228535 + }, + { + "epoch": 0.54, + "grad_norm": 3.0, + "learning_rate": 0.00016638681841754844, + "loss": 2.2288, + "step": 228540 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.00016638543616216006, + "loss": 2.309, + "step": 228545 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.0001663840538840932, + "loss": 2.1331, + "step": 228550 + }, + { + "epoch": 0.54, + "grad_norm": 1.75, + "learning_rate": 0.00016638267158334836, + "loss": 2.1137, + "step": 228555 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016638128925992597, + "loss": 2.0404, + "step": 228560 + }, + { + "epoch": 0.54, + "grad_norm": 1.984375, + "learning_rate": 0.00016637990691382654, + "loss": 2.1399, + "step": 228565 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016637852454505053, + "loss": 2.1085, + "step": 228570 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.0001663771421535984, + "loss": 2.1108, + "step": 228575 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.00016637575973947065, + "loss": 2.1734, + "step": 228580 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016637437730266774, + "loss": 2.1075, + "step": 228585 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016637299484319014, + "loss": 2.1133, + "step": 228590 + }, + { + "epoch": 0.54, + "grad_norm": 1.8671875, + "learning_rate": 0.00016637161236103832, + "loss": 2.0075, + "step": 228595 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.00016637022985621273, + "loss": 2.207, + "step": 228600 + }, + { + "epoch": 0.54, + "grad_norm": 2.453125, + "learning_rate": 0.00016636884732871388, + "loss": 2.1151, + "step": 228605 + }, + { + "epoch": 0.54, + "grad_norm": 3.0, + "learning_rate": 0.00016636746477854223, + "loss": 2.0367, + "step": 228610 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016636608220569826, + "loss": 2.1034, + "step": 228615 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.00016636469961018243, + "loss": 2.047, + "step": 228620 + }, + { + "epoch": 0.54, + "grad_norm": 2.59375, + "learning_rate": 0.0001663633169919952, + "loss": 2.2037, + "step": 228625 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016636193435113708, + "loss": 2.2726, + "step": 228630 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016636055168760852, + "loss": 2.0631, + "step": 228635 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016635916900140996, + "loss": 2.1586, + "step": 228640 + }, + { + "epoch": 0.54, + "grad_norm": 1.984375, + "learning_rate": 0.00016635778629254193, + "loss": 2.2307, + "step": 228645 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.0001663564035610049, + "loss": 2.2214, + "step": 228650 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016635502080679927, + "loss": 2.2318, + "step": 228655 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016635363802992558, + "loss": 2.1233, + "step": 228660 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016635225523038428, + "loss": 1.9587, + "step": 228665 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.00016635087240817584, + "loss": 2.0058, + "step": 228670 + }, + { + "epoch": 0.54, + "grad_norm": 1.96875, + "learning_rate": 0.00016634948956330075, + "loss": 1.9764, + "step": 228675 + }, + { + "epoch": 0.54, + "grad_norm": 2.03125, + "learning_rate": 0.00016634810669575946, + "loss": 2.09, + "step": 228680 + }, + { + "epoch": 0.54, + "grad_norm": 1.875, + "learning_rate": 0.0001663467238055525, + "loss": 2.0888, + "step": 228685 + }, + { + "epoch": 0.54, + "grad_norm": 1.875, + "learning_rate": 0.00016634534089268025, + "loss": 2.0556, + "step": 228690 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.00016634395795714322, + "loss": 2.1276, + "step": 228695 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016634257499894192, + "loss": 2.0734, + "step": 228700 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016634119201807676, + "loss": 2.0925, + "step": 228705 + }, + { + "epoch": 0.54, + "grad_norm": 1.9609375, + "learning_rate": 0.0001663398090145483, + "loss": 2.1025, + "step": 228710 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016633842598835693, + "loss": 2.0773, + "step": 228715 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.00016633704293950313, + "loss": 1.9973, + "step": 228720 + }, + { + "epoch": 0.54, + "grad_norm": 2.09375, + "learning_rate": 0.0001663356598679874, + "loss": 2.1334, + "step": 228725 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016633427677381022, + "loss": 2.2406, + "step": 228730 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016633289365697204, + "loss": 2.118, + "step": 228735 + }, + { + "epoch": 0.54, + "grad_norm": 1.8046875, + "learning_rate": 0.00016633151051747332, + "loss": 2.0451, + "step": 228740 + }, + { + "epoch": 0.54, + "grad_norm": 1.859375, + "learning_rate": 0.00016633012735531462, + "loss": 2.0376, + "step": 228745 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016632874417049629, + "loss": 2.1821, + "step": 228750 + }, + { + "epoch": 0.54, + "grad_norm": 1.9453125, + "learning_rate": 0.00016632736096301888, + "loss": 2.0872, + "step": 228755 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016632597773288282, + "loss": 2.0738, + "step": 228760 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.0001663245944800886, + "loss": 2.2399, + "step": 228765 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016632321120463672, + "loss": 2.1218, + "step": 228770 + }, + { + "epoch": 0.54, + "grad_norm": 1.84375, + "learning_rate": 0.0001663218279065276, + "loss": 2.1609, + "step": 228775 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016632044458576178, + "loss": 2.0005, + "step": 228780 + }, + { + "epoch": 0.54, + "grad_norm": 2.3125, + "learning_rate": 0.00016631906124233968, + "loss": 2.1893, + "step": 228785 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016631767787626177, + "loss": 2.0053, + "step": 228790 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016631629448752856, + "loss": 2.0342, + "step": 228795 + }, + { + "epoch": 0.54, + "grad_norm": 3.03125, + "learning_rate": 0.00016631491107614048, + "loss": 2.1282, + "step": 228800 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016631352764209803, + "loss": 2.1159, + "step": 228805 + }, + { + "epoch": 0.54, + "grad_norm": 2.609375, + "learning_rate": 0.00016631214418540169, + "loss": 2.2761, + "step": 228810 + }, + { + "epoch": 0.54, + "grad_norm": 1.9296875, + "learning_rate": 0.00016631076070605189, + "loss": 2.0934, + "step": 228815 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.00016630937720404915, + "loss": 2.1516, + "step": 228820 + }, + { + "epoch": 0.54, + "grad_norm": 1.9609375, + "learning_rate": 0.00016630799367939393, + "loss": 2.1696, + "step": 228825 + }, + { + "epoch": 0.54, + "grad_norm": 2.09375, + "learning_rate": 0.00016630661013208666, + "loss": 2.1244, + "step": 228830 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.0001663052265621279, + "loss": 2.1565, + "step": 228835 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016630384296951805, + "loss": 2.1469, + "step": 228840 + }, + { + "epoch": 0.54, + "grad_norm": 1.6796875, + "learning_rate": 0.0001663024593542576, + "loss": 1.9315, + "step": 228845 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.000166301075716347, + "loss": 2.012, + "step": 228850 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016629969205578682, + "loss": 2.02, + "step": 228855 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.00016629830837257742, + "loss": 2.1341, + "step": 228860 + }, + { + "epoch": 0.54, + "grad_norm": 2.46875, + "learning_rate": 0.00016629692466671933, + "loss": 2.0585, + "step": 228865 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016629554093821298, + "loss": 2.1756, + "step": 228870 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.0001662941571870589, + "loss": 1.9368, + "step": 228875 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.0001662927734132575, + "loss": 2.0659, + "step": 228880 + }, + { + "epoch": 0.54, + "grad_norm": 2.625, + "learning_rate": 0.00016629138961680933, + "loss": 2.0198, + "step": 228885 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.0001662900057977148, + "loss": 2.0158, + "step": 228890 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.0001662886219559744, + "loss": 2.3462, + "step": 228895 + }, + { + "epoch": 0.54, + "grad_norm": 1.984375, + "learning_rate": 0.00016628723809158862, + "loss": 2.0204, + "step": 228900 + }, + { + "epoch": 0.54, + "grad_norm": 1.8203125, + "learning_rate": 0.00016628585420455787, + "loss": 1.9568, + "step": 228905 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.0001662844702948827, + "loss": 2.0133, + "step": 228910 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.00016628308636256356, + "loss": 2.2844, + "step": 228915 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016628170240760092, + "loss": 2.0885, + "step": 228920 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016628031842999525, + "loss": 2.1312, + "step": 228925 + }, + { + "epoch": 0.54, + "grad_norm": 1.9296875, + "learning_rate": 0.000166278934429747, + "loss": 2.1316, + "step": 228930 + }, + { + "epoch": 0.54, + "grad_norm": 1.65625, + "learning_rate": 0.0001662775504068567, + "loss": 2.2054, + "step": 228935 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016627616636132473, + "loss": 2.43, + "step": 228940 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016627478229315167, + "loss": 2.0909, + "step": 228945 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.00016627339820233795, + "loss": 2.1736, + "step": 228950 + }, + { + "epoch": 0.54, + "grad_norm": 2.453125, + "learning_rate": 0.00016627201408888397, + "loss": 2.0042, + "step": 228955 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.0001662706299527903, + "loss": 2.2507, + "step": 228960 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.0001662692457940574, + "loss": 2.2052, + "step": 228965 + }, + { + "epoch": 0.54, + "grad_norm": 2.09375, + "learning_rate": 0.00016626786161268574, + "loss": 2.1869, + "step": 228970 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016626647740867573, + "loss": 2.086, + "step": 228975 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.0001662650931820279, + "loss": 2.1538, + "step": 228980 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.00016626370893274271, + "loss": 2.0471, + "step": 228985 + }, + { + "epoch": 0.54, + "grad_norm": 2.703125, + "learning_rate": 0.00016626232466082066, + "loss": 2.2609, + "step": 228990 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016626094036626218, + "loss": 2.0602, + "step": 228995 + }, + { + "epoch": 0.54, + "grad_norm": 1.765625, + "learning_rate": 0.00016625955604906775, + "loss": 1.9928, + "step": 229000 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016625817170923789, + "loss": 1.9682, + "step": 229005 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.000166256787346773, + "loss": 2.0656, + "step": 229010 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.00016625540296167358, + "loss": 1.9512, + "step": 229015 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016625401855394016, + "loss": 1.9228, + "step": 229020 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016625263412357316, + "loss": 2.0691, + "step": 229025 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016625124967057305, + "loss": 2.0181, + "step": 229030 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016624986519494028, + "loss": 2.1336, + "step": 229035 + }, + { + "epoch": 0.54, + "grad_norm": 2.40625, + "learning_rate": 0.0001662484806966754, + "loss": 2.2738, + "step": 229040 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.0001662470961757788, + "loss": 2.1458, + "step": 229045 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.00016624571163225102, + "loss": 2.2232, + "step": 229050 + }, + { + "epoch": 0.54, + "grad_norm": 1.9453125, + "learning_rate": 0.0001662443270660925, + "loss": 2.0892, + "step": 229055 + }, + { + "epoch": 0.54, + "grad_norm": 1.96875, + "learning_rate": 0.00016624294247730368, + "loss": 2.1004, + "step": 229060 + }, + { + "epoch": 0.54, + "grad_norm": 2.578125, + "learning_rate": 0.0001662415578658851, + "loss": 2.0832, + "step": 229065 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.0001662401732318372, + "loss": 2.1235, + "step": 229070 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.00016623878857516044, + "loss": 2.0747, + "step": 229075 + }, + { + "epoch": 0.54, + "grad_norm": 2.578125, + "learning_rate": 0.0001662374038958553, + "loss": 2.3826, + "step": 229080 + }, + { + "epoch": 0.54, + "grad_norm": 1.8984375, + "learning_rate": 0.00016623601919392233, + "loss": 2.1438, + "step": 229085 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016623463446936186, + "loss": 2.073, + "step": 229090 + }, + { + "epoch": 0.54, + "grad_norm": 2.578125, + "learning_rate": 0.00016623324972217448, + "loss": 2.1488, + "step": 229095 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.0001662318649523606, + "loss": 2.0668, + "step": 229100 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.00016623048015992073, + "loss": 2.1234, + "step": 229105 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.0001662290953448553, + "loss": 2.2155, + "step": 229110 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016622771050716484, + "loss": 2.0841, + "step": 229115 + }, + { + "epoch": 0.54, + "grad_norm": 1.890625, + "learning_rate": 0.00016622632564684977, + "loss": 2.0214, + "step": 229120 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.0001662249407639106, + "loss": 2.0038, + "step": 229125 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.00016622355585834778, + "loss": 2.2442, + "step": 229130 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.0001662221709301618, + "loss": 2.1914, + "step": 229135 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.00016622078597935313, + "loss": 2.1831, + "step": 229140 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.00016621940100592224, + "loss": 2.2108, + "step": 229145 + }, + { + "epoch": 0.54, + "grad_norm": 1.5234375, + "learning_rate": 0.0001662180160098696, + "loss": 2.1574, + "step": 229150 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016621663099119566, + "loss": 2.2834, + "step": 229155 + }, + { + "epoch": 0.54, + "grad_norm": 1.7734375, + "learning_rate": 0.00016621524594990096, + "loss": 2.0692, + "step": 229160 + }, + { + "epoch": 0.54, + "grad_norm": 2.390625, + "learning_rate": 0.0001662138608859859, + "loss": 2.1087, + "step": 229165 + }, + { + "epoch": 0.54, + "grad_norm": 1.90625, + "learning_rate": 0.00016621247579945097, + "loss": 2.1942, + "step": 229170 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016621109069029667, + "loss": 1.9995, + "step": 229175 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.00016620970555852348, + "loss": 2.2304, + "step": 229180 + }, + { + "epoch": 0.54, + "grad_norm": 1.7421875, + "learning_rate": 0.00016620832040413185, + "loss": 2.0937, + "step": 229185 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016620693522712224, + "loss": 2.1459, + "step": 229190 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016620555002749517, + "loss": 2.0982, + "step": 229195 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016620416480525103, + "loss": 2.0578, + "step": 229200 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.0001662027795603904, + "loss": 2.0333, + "step": 229205 + }, + { + "epoch": 0.54, + "grad_norm": 1.9921875, + "learning_rate": 0.00016620139429291368, + "loss": 2.1665, + "step": 229210 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016620000900282138, + "loss": 2.108, + "step": 229215 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016619862369011393, + "loss": 2.1176, + "step": 229220 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016619723835479183, + "loss": 2.0469, + "step": 229225 + }, + { + "epoch": 0.54, + "grad_norm": 2.46875, + "learning_rate": 0.00016619585299685555, + "loss": 2.1017, + "step": 229230 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016619446761630558, + "loss": 2.2598, + "step": 229235 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016619308221314238, + "loss": 2.1513, + "step": 229240 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016619169678736643, + "loss": 2.1661, + "step": 229245 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016619031133897818, + "loss": 2.0472, + "step": 229250 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016618892586797813, + "loss": 2.2956, + "step": 229255 + }, + { + "epoch": 0.54, + "grad_norm": 1.890625, + "learning_rate": 0.0001661875403743667, + "loss": 2.0257, + "step": 229260 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.00016618615485814448, + "loss": 2.0886, + "step": 229265 + }, + { + "epoch": 0.54, + "grad_norm": 2.671875, + "learning_rate": 0.00016618476931931183, + "loss": 2.1481, + "step": 229270 + }, + { + "epoch": 0.54, + "grad_norm": 2.03125, + "learning_rate": 0.0001661833837578693, + "loss": 2.114, + "step": 229275 + }, + { + "epoch": 0.54, + "grad_norm": 2.453125, + "learning_rate": 0.00016618199817381726, + "loss": 2.1768, + "step": 229280 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.0001661806125671563, + "loss": 1.9972, + "step": 229285 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.0001661792269378868, + "loss": 2.1019, + "step": 229290 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.0001661778412860093, + "loss": 2.1102, + "step": 229295 + }, + { + "epoch": 0.54, + "grad_norm": 2.625, + "learning_rate": 0.00016617645561152426, + "loss": 2.377, + "step": 229300 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016617506991443211, + "loss": 2.1544, + "step": 229305 + }, + { + "epoch": 0.54, + "grad_norm": 1.9609375, + "learning_rate": 0.00016617368419473338, + "loss": 2.033, + "step": 229310 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.0001661722984524285, + "loss": 2.2133, + "step": 229315 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.000166170912687518, + "loss": 2.1178, + "step": 229320 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.0001661695269000023, + "loss": 2.1604, + "step": 229325 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016616814108988187, + "loss": 2.1545, + "step": 229330 + }, + { + "epoch": 0.54, + "grad_norm": 2.65625, + "learning_rate": 0.00016616675525715722, + "loss": 2.2623, + "step": 229335 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.0001661653694018288, + "loss": 1.9676, + "step": 229340 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.0001661639835238971, + "loss": 2.1546, + "step": 229345 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016616259762336258, + "loss": 2.2564, + "step": 229350 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016616121170022572, + "loss": 2.2763, + "step": 229355 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016615982575448696, + "loss": 2.1642, + "step": 229360 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.00016615843978614683, + "loss": 2.0927, + "step": 229365 + }, + { + "epoch": 0.54, + "grad_norm": 1.828125, + "learning_rate": 0.00016615705379520579, + "loss": 2.0498, + "step": 229370 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.0001661556677816643, + "loss": 2.1697, + "step": 229375 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.0001661542817455228, + "loss": 2.1024, + "step": 229380 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.0001661528956867818, + "loss": 2.0309, + "step": 229385 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.0001661515096054418, + "loss": 2.2364, + "step": 229390 + }, + { + "epoch": 0.54, + "grad_norm": 2.4375, + "learning_rate": 0.00016615012350150324, + "loss": 2.0681, + "step": 229395 + }, + { + "epoch": 0.54, + "grad_norm": 2.40625, + "learning_rate": 0.00016614873737496658, + "loss": 2.2783, + "step": 229400 + }, + { + "epoch": 0.54, + "grad_norm": 1.7578125, + "learning_rate": 0.00016614735122583233, + "loss": 1.9235, + "step": 229405 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016614596505410092, + "loss": 2.0502, + "step": 229410 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016614457885977287, + "loss": 2.0633, + "step": 229415 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.00016614319264284864, + "loss": 2.1153, + "step": 229420 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.0001661418064033287, + "loss": 2.0505, + "step": 229425 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.0001661404201412135, + "loss": 2.1517, + "step": 229430 + }, + { + "epoch": 0.54, + "grad_norm": 2.46875, + "learning_rate": 0.0001661390338565035, + "loss": 2.2472, + "step": 229435 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.00016613764754919928, + "loss": 2.0214, + "step": 229440 + }, + { + "epoch": 0.54, + "grad_norm": 2.5, + "learning_rate": 0.0001661362612193012, + "loss": 2.054, + "step": 229445 + }, + { + "epoch": 0.54, + "grad_norm": 2.46875, + "learning_rate": 0.00016613487486680977, + "loss": 1.9278, + "step": 229450 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.00016613348849172548, + "loss": 2.0713, + "step": 229455 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.00016613210209404878, + "loss": 2.1613, + "step": 229460 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016613071567378012, + "loss": 2.0405, + "step": 229465 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016612932923092008, + "loss": 2.1934, + "step": 229470 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016612794276546903, + "loss": 2.0422, + "step": 229475 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016612655627742747, + "loss": 1.9081, + "step": 229480 + }, + { + "epoch": 0.54, + "grad_norm": 2.390625, + "learning_rate": 0.0001661251697667959, + "loss": 2.2105, + "step": 229485 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016612378323357474, + "loss": 2.0251, + "step": 229490 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.00016612239667776452, + "loss": 2.2126, + "step": 229495 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016612101009936568, + "loss": 2.0819, + "step": 229500 + }, + { + "epoch": 0.54, + "grad_norm": 1.921875, + "learning_rate": 0.0001661196234983787, + "loss": 2.0941, + "step": 229505 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016611823687480407, + "loss": 2.1217, + "step": 229510 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016611685022864227, + "loss": 2.1744, + "step": 229515 + }, + { + "epoch": 0.54, + "grad_norm": 1.90625, + "learning_rate": 0.00016611546355989372, + "loss": 2.1207, + "step": 229520 + }, + { + "epoch": 0.54, + "grad_norm": 2.8125, + "learning_rate": 0.00016611407686855894, + "loss": 2.1432, + "step": 229525 + }, + { + "epoch": 0.54, + "grad_norm": 1.9609375, + "learning_rate": 0.00016611269015463838, + "loss": 2.1947, + "step": 229530 + }, + { + "epoch": 0.54, + "grad_norm": 1.8046875, + "learning_rate": 0.00016611130341813253, + "loss": 1.9591, + "step": 229535 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.0001661099166590419, + "loss": 1.9048, + "step": 229540 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016610852987736688, + "loss": 2.0873, + "step": 229545 + }, + { + "epoch": 0.54, + "grad_norm": 1.921875, + "learning_rate": 0.000166107143073108, + "loss": 2.1391, + "step": 229550 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.0001661057562462657, + "loss": 2.0755, + "step": 229555 + }, + { + "epoch": 0.54, + "grad_norm": 1.9375, + "learning_rate": 0.00016610436939684052, + "loss": 2.0058, + "step": 229560 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016610298252483285, + "loss": 2.1128, + "step": 229565 + }, + { + "epoch": 0.54, + "grad_norm": 2.734375, + "learning_rate": 0.00016610159563024322, + "loss": 2.01, + "step": 229570 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016610020871307208, + "loss": 2.0347, + "step": 229575 + }, + { + "epoch": 0.54, + "grad_norm": 2.40625, + "learning_rate": 0.0001660988217733199, + "loss": 1.9985, + "step": 229580 + }, + { + "epoch": 0.54, + "grad_norm": 1.984375, + "learning_rate": 0.0001660974348109872, + "loss": 1.913, + "step": 229585 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016609604782607436, + "loss": 2.1345, + "step": 229590 + }, + { + "epoch": 0.54, + "grad_norm": 2.3125, + "learning_rate": 0.00016609466081858197, + "loss": 2.0307, + "step": 229595 + }, + { + "epoch": 0.54, + "grad_norm": 1.7421875, + "learning_rate": 0.00016609327378851041, + "loss": 1.9968, + "step": 229600 + }, + { + "epoch": 0.54, + "grad_norm": 2.5625, + "learning_rate": 0.00016609188673586021, + "loss": 2.1021, + "step": 229605 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.0001660904996606318, + "loss": 2.1712, + "step": 229610 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016608911256282567, + "loss": 1.8236, + "step": 229615 + }, + { + "epoch": 0.54, + "grad_norm": 1.7578125, + "learning_rate": 0.00016608772544244232, + "loss": 2.1729, + "step": 229620 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.00016608633829948223, + "loss": 2.1796, + "step": 229625 + }, + { + "epoch": 0.54, + "grad_norm": 1.7734375, + "learning_rate": 0.0001660849511339458, + "loss": 2.0584, + "step": 229630 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016608356394583358, + "loss": 2.2177, + "step": 229635 + }, + { + "epoch": 0.54, + "grad_norm": 2.5, + "learning_rate": 0.000166082176735146, + "loss": 2.0576, + "step": 229640 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.00016608078950188358, + "loss": 2.1569, + "step": 229645 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.0001660794022460467, + "loss": 2.2879, + "step": 229650 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.00016607801496763594, + "loss": 2.2147, + "step": 229655 + }, + { + "epoch": 0.54, + "grad_norm": 1.828125, + "learning_rate": 0.00016607662766665175, + "loss": 2.0574, + "step": 229660 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.00016607524034309456, + "loss": 1.9301, + "step": 229665 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.0001660738529969649, + "loss": 2.1244, + "step": 229670 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.0001660724656282632, + "loss": 2.1414, + "step": 229675 + }, + { + "epoch": 0.54, + "grad_norm": 1.8671875, + "learning_rate": 0.00016607107823698991, + "loss": 2.0004, + "step": 229680 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016606969082314557, + "loss": 2.3029, + "step": 229685 + }, + { + "epoch": 0.54, + "grad_norm": 1.9921875, + "learning_rate": 0.00016606830338673062, + "loss": 2.0659, + "step": 229690 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.00016606691592774557, + "loss": 2.0594, + "step": 229695 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016606552844619082, + "loss": 2.051, + "step": 229700 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.0001660641409420669, + "loss": 2.0306, + "step": 229705 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.0001660627534153743, + "loss": 2.2099, + "step": 229710 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.00016606136586611343, + "loss": 1.9669, + "step": 229715 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.0001660599782942848, + "loss": 1.897, + "step": 229720 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.00016605859069988892, + "loss": 2.2686, + "step": 229725 + }, + { + "epoch": 0.54, + "grad_norm": 1.890625, + "learning_rate": 0.00016605720308292618, + "loss": 2.0601, + "step": 229730 + }, + { + "epoch": 0.54, + "grad_norm": 2.390625, + "learning_rate": 0.00016605581544339713, + "loss": 2.1381, + "step": 229735 + }, + { + "epoch": 0.54, + "grad_norm": 2.546875, + "learning_rate": 0.0001660544277813022, + "loss": 2.1013, + "step": 229740 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.0001660530400966419, + "loss": 2.1395, + "step": 229745 + }, + { + "epoch": 0.54, + "grad_norm": 2.5, + "learning_rate": 0.00016605165238941667, + "loss": 2.0051, + "step": 229750 + }, + { + "epoch": 0.54, + "grad_norm": 1.7578125, + "learning_rate": 0.000166050264659627, + "loss": 1.8915, + "step": 229755 + }, + { + "epoch": 0.54, + "grad_norm": 2.890625, + "learning_rate": 0.00016604887690727335, + "loss": 2.0036, + "step": 229760 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.0001660474891323562, + "loss": 2.0932, + "step": 229765 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016604610133487605, + "loss": 2.0614, + "step": 229770 + }, + { + "epoch": 0.54, + "grad_norm": 1.9609375, + "learning_rate": 0.00016604471351483338, + "loss": 2.0763, + "step": 229775 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.0001660433256722286, + "loss": 2.0302, + "step": 229780 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.0001660419378070622, + "loss": 2.0566, + "step": 229785 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.00016604054991933474, + "loss": 2.133, + "step": 229790 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.0001660391620090466, + "loss": 1.8798, + "step": 229795 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016603777407619828, + "loss": 2.249, + "step": 229800 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016603638612079026, + "loss": 2.0539, + "step": 229805 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016603499814282302, + "loss": 2.0199, + "step": 229810 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.000166033610142297, + "loss": 2.1305, + "step": 229815 + }, + { + "epoch": 0.54, + "grad_norm": 2.3125, + "learning_rate": 0.0001660322221192127, + "loss": 2.2188, + "step": 229820 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016603083407357066, + "loss": 2.0451, + "step": 229825 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016602944600537124, + "loss": 2.055, + "step": 229830 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016602805791461497, + "loss": 2.1545, + "step": 229835 + }, + { + "epoch": 0.54, + "grad_norm": 2.390625, + "learning_rate": 0.0001660266698013023, + "loss": 2.2067, + "step": 229840 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016602528166543374, + "loss": 2.2159, + "step": 229845 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016602389350700976, + "loss": 2.066, + "step": 229850 + }, + { + "epoch": 0.54, + "grad_norm": 2.5, + "learning_rate": 0.00016602250532603082, + "loss": 2.2165, + "step": 229855 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016602111712249738, + "loss": 2.0252, + "step": 229860 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016601972889640992, + "loss": 2.1399, + "step": 229865 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016601834064776893, + "loss": 2.2492, + "step": 229870 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016601695237657488, + "loss": 2.0073, + "step": 229875 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016601556408282826, + "loss": 1.9373, + "step": 229880 + }, + { + "epoch": 0.54, + "grad_norm": 2.671875, + "learning_rate": 0.00016601417576652948, + "loss": 2.0205, + "step": 229885 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016601278742767913, + "loss": 2.0456, + "step": 229890 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.00016601139906627753, + "loss": 2.0304, + "step": 229895 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016601001068232532, + "loss": 2.0577, + "step": 229900 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016600862227582284, + "loss": 1.8778, + "step": 229905 + }, + { + "epoch": 0.54, + "grad_norm": 2.453125, + "learning_rate": 0.00016600723384677064, + "loss": 2.2288, + "step": 229910 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016600584539516917, + "loss": 2.1279, + "step": 229915 + }, + { + "epoch": 0.54, + "grad_norm": 4.15625, + "learning_rate": 0.0001660044569210189, + "loss": 2.0154, + "step": 229920 + }, + { + "epoch": 0.54, + "grad_norm": 3.09375, + "learning_rate": 0.00016600306842432032, + "loss": 2.1789, + "step": 229925 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016600167990507388, + "loss": 2.2788, + "step": 229930 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016600029136328007, + "loss": 1.9928, + "step": 229935 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016599890279893938, + "loss": 2.3711, + "step": 229940 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016599751421205228, + "loss": 2.1047, + "step": 229945 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.0001659961256026192, + "loss": 2.114, + "step": 229950 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016599473697064067, + "loss": 2.1598, + "step": 229955 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.0001659933483161171, + "loss": 2.1801, + "step": 229960 + }, + { + "epoch": 0.54, + "grad_norm": 1.984375, + "learning_rate": 0.00016599195963904904, + "loss": 2.2613, + "step": 229965 + }, + { + "epoch": 0.54, + "grad_norm": 1.9375, + "learning_rate": 0.00016599057093943692, + "loss": 2.0658, + "step": 229970 + }, + { + "epoch": 0.54, + "grad_norm": 2.578125, + "learning_rate": 0.0001659891822172812, + "loss": 1.9758, + "step": 229975 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016598779347258243, + "loss": 1.9962, + "step": 229980 + }, + { + "epoch": 0.54, + "grad_norm": 2.03125, + "learning_rate": 0.000165986404705341, + "loss": 1.9437, + "step": 229985 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016598501591555745, + "loss": 2.3095, + "step": 229990 + }, + { + "epoch": 0.54, + "grad_norm": 1.7109375, + "learning_rate": 0.0001659836271032322, + "loss": 1.9837, + "step": 229995 + }, + { + "epoch": 0.54, + "grad_norm": 1.921875, + "learning_rate": 0.00016598223826836574, + "loss": 2.2264, + "step": 230000 + }, + { + "epoch": 0.54, + "grad_norm": 1.96875, + "learning_rate": 0.00016598084941095854, + "loss": 2.1608, + "step": 230005 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.0001659794605310111, + "loss": 2.1123, + "step": 230010 + }, + { + "epoch": 0.54, + "grad_norm": 1.9609375, + "learning_rate": 0.0001659780716285239, + "loss": 2.3223, + "step": 230015 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.00016597668270349738, + "loss": 2.2672, + "step": 230020 + }, + { + "epoch": 0.54, + "grad_norm": 1.7421875, + "learning_rate": 0.00016597529375593202, + "loss": 2.0875, + "step": 230025 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.0001659739047858283, + "loss": 2.0925, + "step": 230030 + }, + { + "epoch": 0.54, + "grad_norm": 1.9609375, + "learning_rate": 0.00016597251579318672, + "loss": 2.1179, + "step": 230035 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.0001659711267780077, + "loss": 2.0268, + "step": 230040 + }, + { + "epoch": 0.54, + "grad_norm": 2.09375, + "learning_rate": 0.00016596973774029173, + "loss": 1.9052, + "step": 230045 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016596834868003936, + "loss": 2.2169, + "step": 230050 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016596695959725097, + "loss": 2.2387, + "step": 230055 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.0001659655704919271, + "loss": 2.0676, + "step": 230060 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.00016596418136406819, + "loss": 2.1814, + "step": 230065 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016596279221367467, + "loss": 2.1939, + "step": 230070 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016596140304074713, + "loss": 2.0707, + "step": 230075 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.0001659600138452859, + "loss": 2.0096, + "step": 230080 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016595862462729158, + "loss": 2.0423, + "step": 230085 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016595723538676463, + "loss": 2.1695, + "step": 230090 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016595584612370546, + "loss": 2.1451, + "step": 230095 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016595445683811457, + "loss": 1.9112, + "step": 230100 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016595306752999244, + "loss": 2.0639, + "step": 230105 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016595167819933955, + "loss": 2.2092, + "step": 230110 + }, + { + "epoch": 0.54, + "grad_norm": 1.8828125, + "learning_rate": 0.00016595028884615638, + "loss": 2.1001, + "step": 230115 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016594889947044338, + "loss": 2.2222, + "step": 230120 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016594751007220103, + "loss": 2.1081, + "step": 230125 + }, + { + "epoch": 0.54, + "grad_norm": 2.03125, + "learning_rate": 0.00016594612065142986, + "loss": 2.0366, + "step": 230130 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016594473120813027, + "loss": 2.1872, + "step": 230135 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.00016594334174230277, + "loss": 2.0259, + "step": 230140 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.00016594195225394783, + "loss": 1.852, + "step": 230145 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.0001659405627430659, + "loss": 2.0953, + "step": 230150 + }, + { + "epoch": 0.54, + "grad_norm": 1.8828125, + "learning_rate": 0.0001659391732096575, + "loss": 2.0893, + "step": 230155 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016593778365372308, + "loss": 2.2511, + "step": 230160 + }, + { + "epoch": 0.54, + "grad_norm": 2.640625, + "learning_rate": 0.0001659363940752631, + "loss": 2.0769, + "step": 230165 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016593500447427807, + "loss": 2.2704, + "step": 230170 + }, + { + "epoch": 0.54, + "grad_norm": 2.578125, + "learning_rate": 0.00016593361485076847, + "loss": 2.2805, + "step": 230175 + }, + { + "epoch": 0.54, + "grad_norm": 1.5703125, + "learning_rate": 0.0001659322252047347, + "loss": 2.1656, + "step": 230180 + }, + { + "epoch": 0.54, + "grad_norm": 1.7734375, + "learning_rate": 0.00016593083553617733, + "loss": 2.1656, + "step": 230185 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016592944584509675, + "loss": 2.044, + "step": 230190 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016592805613149352, + "loss": 2.1355, + "step": 230195 + }, + { + "epoch": 0.54, + "grad_norm": 2.453125, + "learning_rate": 0.00016592666639536806, + "loss": 2.0305, + "step": 230200 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016592527663672084, + "loss": 2.0905, + "step": 230205 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016592388685555232, + "loss": 2.2179, + "step": 230210 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016592249705186304, + "loss": 2.1932, + "step": 230215 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016592110722565346, + "loss": 1.9358, + "step": 230220 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016591971737692403, + "loss": 1.8901, + "step": 230225 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.0001659183275056752, + "loss": 2.0705, + "step": 230230 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.0001659169376119075, + "loss": 2.0239, + "step": 230235 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016591554769562135, + "loss": 2.2552, + "step": 230240 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.00016591415775681724, + "loss": 2.1492, + "step": 230245 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016591276779549569, + "loss": 2.1452, + "step": 230250 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016591137781165715, + "loss": 2.0282, + "step": 230255 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.0001659099878053021, + "loss": 2.0929, + "step": 230260 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.000165908597776431, + "loss": 1.9355, + "step": 230265 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016590720772504427, + "loss": 2.1482, + "step": 230270 + }, + { + "epoch": 0.54, + "grad_norm": 2.453125, + "learning_rate": 0.00016590581765114245, + "loss": 2.0788, + "step": 230275 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016590442755472606, + "loss": 2.2292, + "step": 230280 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.0001659030374357955, + "loss": 2.1249, + "step": 230285 + }, + { + "epoch": 0.54, + "grad_norm": 1.96875, + "learning_rate": 0.00016590164729435127, + "loss": 2.0617, + "step": 230290 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016590025713039382, + "loss": 2.2094, + "step": 230295 + }, + { + "epoch": 0.54, + "grad_norm": 1.8671875, + "learning_rate": 0.0001658988669439237, + "loss": 2.0756, + "step": 230300 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016589747673494127, + "loss": 1.9482, + "step": 230305 + }, + { + "epoch": 0.54, + "grad_norm": 3.640625, + "learning_rate": 0.0001658960865034471, + "loss": 1.9923, + "step": 230310 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016589469624944163, + "loss": 2.1314, + "step": 230315 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016589330597292535, + "loss": 2.0328, + "step": 230320 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016589191567389867, + "loss": 2.0447, + "step": 230325 + }, + { + "epoch": 0.54, + "grad_norm": 1.9296875, + "learning_rate": 0.00016589052535236217, + "loss": 2.051, + "step": 230330 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.00016588913500831628, + "loss": 2.2616, + "step": 230335 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.0001658877446417614, + "loss": 2.1574, + "step": 230340 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.00016588635425269813, + "loss": 2.0481, + "step": 230345 + }, + { + "epoch": 0.54, + "grad_norm": 1.734375, + "learning_rate": 0.00016588496384112685, + "loss": 2.2079, + "step": 230350 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016588357340704808, + "loss": 2.0992, + "step": 230355 + }, + { + "epoch": 0.54, + "grad_norm": 1.8203125, + "learning_rate": 0.0001658821829504623, + "loss": 2.0341, + "step": 230360 + }, + { + "epoch": 0.54, + "grad_norm": 1.96875, + "learning_rate": 0.00016588079247136997, + "loss": 1.9772, + "step": 230365 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.00016587940196977158, + "loss": 2.1177, + "step": 230370 + }, + { + "epoch": 0.54, + "grad_norm": 2.40625, + "learning_rate": 0.00016587801144566755, + "loss": 2.1356, + "step": 230375 + }, + { + "epoch": 0.54, + "grad_norm": 2.09375, + "learning_rate": 0.00016587662089905845, + "loss": 2.224, + "step": 230380 + }, + { + "epoch": 0.54, + "grad_norm": 1.8984375, + "learning_rate": 0.00016587523032994466, + "loss": 2.1265, + "step": 230385 + }, + { + "epoch": 0.54, + "grad_norm": 1.9375, + "learning_rate": 0.0001658738397383267, + "loss": 2.0495, + "step": 230390 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.00016587244912420504, + "loss": 2.2481, + "step": 230395 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016587105848758017, + "loss": 1.9715, + "step": 230400 + }, + { + "epoch": 0.54, + "grad_norm": 2.84375, + "learning_rate": 0.00016586966782845255, + "loss": 2.1342, + "step": 230405 + }, + { + "epoch": 0.54, + "grad_norm": 1.9375, + "learning_rate": 0.00016586827714682267, + "loss": 2.026, + "step": 230410 + }, + { + "epoch": 0.54, + "grad_norm": 1.90625, + "learning_rate": 0.00016586688644269098, + "loss": 2.0808, + "step": 230415 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.00016586549571605795, + "loss": 2.058, + "step": 230420 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.0001658641049669241, + "loss": 2.1068, + "step": 230425 + }, + { + "epoch": 0.54, + "grad_norm": 2.515625, + "learning_rate": 0.00016586271419528984, + "loss": 2.2217, + "step": 230430 + }, + { + "epoch": 0.54, + "grad_norm": 2.46875, + "learning_rate": 0.0001658613234011557, + "loss": 1.9623, + "step": 230435 + }, + { + "epoch": 0.54, + "grad_norm": 1.90625, + "learning_rate": 0.00016585993258452216, + "loss": 2.1107, + "step": 230440 + }, + { + "epoch": 0.54, + "grad_norm": 2.390625, + "learning_rate": 0.0001658585417453897, + "loss": 2.0561, + "step": 230445 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.00016585715088375872, + "loss": 2.2171, + "step": 230450 + }, + { + "epoch": 0.54, + "grad_norm": 2.609375, + "learning_rate": 0.00016585575999962973, + "loss": 2.0931, + "step": 230455 + }, + { + "epoch": 0.54, + "grad_norm": 1.9375, + "learning_rate": 0.00016585436909300323, + "loss": 2.0571, + "step": 230460 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016585297816387967, + "loss": 2.0363, + "step": 230465 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016585158721225957, + "loss": 2.1662, + "step": 230470 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016585019623814335, + "loss": 2.0479, + "step": 230475 + }, + { + "epoch": 0.54, + "grad_norm": 1.765625, + "learning_rate": 0.00016584880524153154, + "loss": 2.0536, + "step": 230480 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016584741422242454, + "loss": 2.0283, + "step": 230485 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016584602318082289, + "loss": 2.2546, + "step": 230490 + }, + { + "epoch": 0.54, + "grad_norm": 1.875, + "learning_rate": 0.00016584463211672708, + "loss": 1.7948, + "step": 230495 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016584324103013752, + "loss": 2.0559, + "step": 230500 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.0001658418499210547, + "loss": 2.033, + "step": 230505 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016584045878947912, + "loss": 2.1807, + "step": 230510 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016583906763541125, + "loss": 2.0889, + "step": 230515 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.00016583767645885153, + "loss": 2.1184, + "step": 230520 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.0001658362852598005, + "loss": 2.1264, + "step": 230525 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.0001658348940382586, + "loss": 2.1837, + "step": 230530 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016583350279422628, + "loss": 2.2068, + "step": 230535 + }, + { + "epoch": 0.54, + "grad_norm": 2.4375, + "learning_rate": 0.00016583211152770407, + "loss": 2.1397, + "step": 230540 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016583072023869242, + "loss": 2.0977, + "step": 230545 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.00016582932892719179, + "loss": 2.0801, + "step": 230550 + }, + { + "epoch": 0.54, + "grad_norm": 2.46875, + "learning_rate": 0.00016582793759320269, + "loss": 2.2342, + "step": 230555 + }, + { + "epoch": 0.54, + "grad_norm": 2.3125, + "learning_rate": 0.00016582654623672552, + "loss": 2.3001, + "step": 230560 + }, + { + "epoch": 0.54, + "grad_norm": 1.9609375, + "learning_rate": 0.00016582515485776082, + "loss": 2.0512, + "step": 230565 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.00016582376345630908, + "loss": 2.068, + "step": 230570 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016582237203237074, + "loss": 2.0333, + "step": 230575 + }, + { + "epoch": 0.54, + "grad_norm": 2.453125, + "learning_rate": 0.0001658209805859463, + "loss": 1.9796, + "step": 230580 + }, + { + "epoch": 0.54, + "grad_norm": 1.8203125, + "learning_rate": 0.0001658195891170362, + "loss": 2.1391, + "step": 230585 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016581819762564095, + "loss": 2.0415, + "step": 230590 + }, + { + "epoch": 0.54, + "grad_norm": 1.84375, + "learning_rate": 0.00016581680611176097, + "loss": 2.1863, + "step": 230595 + }, + { + "epoch": 0.54, + "grad_norm": 1.9375, + "learning_rate": 0.0001658154145753968, + "loss": 2.0945, + "step": 230600 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016581402301654892, + "loss": 2.0808, + "step": 230605 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016581263143521774, + "loss": 2.0685, + "step": 230610 + }, + { + "epoch": 0.54, + "grad_norm": 1.828125, + "learning_rate": 0.0001658112398314038, + "loss": 2.2292, + "step": 230615 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.0001658098482051075, + "loss": 2.0437, + "step": 230620 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016580845655632943, + "loss": 2.2257, + "step": 230625 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016580706488506994, + "loss": 2.0289, + "step": 230630 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016580567319132955, + "loss": 2.0648, + "step": 230635 + }, + { + "epoch": 0.54, + "grad_norm": 1.96875, + "learning_rate": 0.0001658042814751088, + "loss": 2.1781, + "step": 230640 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016580288973640808, + "loss": 2.103, + "step": 230645 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016580149797522793, + "loss": 2.1077, + "step": 230650 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.0001658001061915688, + "loss": 2.0936, + "step": 230655 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016579871438543111, + "loss": 2.1761, + "step": 230660 + }, + { + "epoch": 0.54, + "grad_norm": 1.7890625, + "learning_rate": 0.0001657973225568154, + "loss": 2.3544, + "step": 230665 + }, + { + "epoch": 0.54, + "grad_norm": 1.8828125, + "learning_rate": 0.00016579593070572215, + "loss": 1.9453, + "step": 230670 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.00016579453883215184, + "loss": 2.1549, + "step": 230675 + }, + { + "epoch": 0.54, + "grad_norm": 2.453125, + "learning_rate": 0.00016579314693610487, + "loss": 2.1118, + "step": 230680 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.0001657917550175818, + "loss": 2.0874, + "step": 230685 + }, + { + "epoch": 0.54, + "grad_norm": 2.46875, + "learning_rate": 0.0001657903630765831, + "loss": 2.1437, + "step": 230690 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.0001657889711131092, + "loss": 2.045, + "step": 230695 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016578757912716054, + "loss": 2.2446, + "step": 230700 + }, + { + "epoch": 0.54, + "grad_norm": 2.390625, + "learning_rate": 0.00016578618711873767, + "loss": 2.1385, + "step": 230705 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.0001657847950878411, + "loss": 2.1467, + "step": 230710 + }, + { + "epoch": 0.54, + "grad_norm": 2.390625, + "learning_rate": 0.0001657834030344712, + "loss": 1.8784, + "step": 230715 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016578201095862851, + "loss": 2.2402, + "step": 230720 + }, + { + "epoch": 0.54, + "grad_norm": 1.828125, + "learning_rate": 0.00016578061886031347, + "loss": 2.0561, + "step": 230725 + }, + { + "epoch": 0.54, + "grad_norm": 2.453125, + "learning_rate": 0.0001657792267395266, + "loss": 2.0811, + "step": 230730 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016577783459626838, + "loss": 2.142, + "step": 230735 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016577644243053925, + "loss": 2.2327, + "step": 230740 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016577505024233967, + "loss": 2.2351, + "step": 230745 + }, + { + "epoch": 0.54, + "grad_norm": 2.65625, + "learning_rate": 0.00016577365803167014, + "loss": 2.0976, + "step": 230750 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016577226579853115, + "loss": 2.2778, + "step": 230755 + }, + { + "epoch": 0.54, + "grad_norm": 2.53125, + "learning_rate": 0.00016577087354292314, + "loss": 1.9572, + "step": 230760 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016576948126484662, + "loss": 2.2512, + "step": 230765 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016576808896430207, + "loss": 2.2079, + "step": 230770 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016576669664128995, + "loss": 2.0384, + "step": 230775 + }, + { + "epoch": 0.54, + "grad_norm": 2.671875, + "learning_rate": 0.00016576530429581067, + "loss": 1.9425, + "step": 230780 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016576391192786482, + "loss": 2.1375, + "step": 230785 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016576251953745284, + "loss": 1.9237, + "step": 230790 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.00016576112712457514, + "loss": 2.1332, + "step": 230795 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.0001657597346892323, + "loss": 2.1126, + "step": 230800 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.0001657583422314247, + "loss": 2.1069, + "step": 230805 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.0001657569497511529, + "loss": 2.0326, + "step": 230810 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.0001657555572484173, + "loss": 2.1458, + "step": 230815 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.0001657541647232184, + "loss": 2.1446, + "step": 230820 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016575277217555673, + "loss": 2.0306, + "step": 230825 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.00016575137960543268, + "loss": 2.1383, + "step": 230830 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.00016574998701284678, + "loss": 2.0099, + "step": 230835 + }, + { + "epoch": 0.54, + "grad_norm": 1.9140625, + "learning_rate": 0.0001657485943977995, + "loss": 2.1802, + "step": 230840 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.0001657472017602913, + "loss": 2.2346, + "step": 230845 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016574580910032268, + "loss": 2.1417, + "step": 230850 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.00016574441641789407, + "loss": 2.1827, + "step": 230855 + }, + { + "epoch": 0.54, + "grad_norm": 1.75, + "learning_rate": 0.000165743023713006, + "loss": 2.1257, + "step": 230860 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.0001657416309856589, + "loss": 2.0827, + "step": 230865 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016574023823585327, + "loss": 2.2552, + "step": 230870 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016573884546358956, + "loss": 2.1205, + "step": 230875 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016573745266886832, + "loss": 2.1254, + "step": 230880 + }, + { + "epoch": 0.54, + "grad_norm": 2.5, + "learning_rate": 0.00016573605985168994, + "loss": 2.0176, + "step": 230885 + }, + { + "epoch": 0.54, + "grad_norm": 2.890625, + "learning_rate": 0.00016573466701205495, + "loss": 2.1461, + "step": 230890 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.0001657332741499638, + "loss": 2.1344, + "step": 230895 + }, + { + "epoch": 0.54, + "grad_norm": 1.984375, + "learning_rate": 0.00016573188126541697, + "loss": 2.1812, + "step": 230900 + }, + { + "epoch": 0.54, + "grad_norm": 1.9296875, + "learning_rate": 0.00016573048835841493, + "loss": 2.0479, + "step": 230905 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016572909542895815, + "loss": 1.9725, + "step": 230910 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.00016572770247704713, + "loss": 2.0626, + "step": 230915 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.00016572630950268237, + "loss": 2.2152, + "step": 230920 + }, + { + "epoch": 0.54, + "grad_norm": 2.390625, + "learning_rate": 0.00016572491650586427, + "loss": 2.1217, + "step": 230925 + }, + { + "epoch": 0.54, + "grad_norm": 2.59375, + "learning_rate": 0.00016572352348659335, + "loss": 2.1176, + "step": 230930 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.0001657221304448701, + "loss": 2.1997, + "step": 230935 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.00016572073738069495, + "loss": 2.0223, + "step": 230940 + }, + { + "epoch": 0.54, + "grad_norm": 2.09375, + "learning_rate": 0.00016571934429406842, + "loss": 2.2474, + "step": 230945 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016571795118499097, + "loss": 2.2025, + "step": 230950 + }, + { + "epoch": 0.54, + "grad_norm": 2.46875, + "learning_rate": 0.0001657165580534631, + "loss": 2.1187, + "step": 230955 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016571516489948524, + "loss": 2.2168, + "step": 230960 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016571377172305788, + "loss": 2.0361, + "step": 230965 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016571237852418152, + "loss": 2.0586, + "step": 230970 + }, + { + "epoch": 0.54, + "grad_norm": 2.09375, + "learning_rate": 0.00016571098530285662, + "loss": 2.1501, + "step": 230975 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016570959205908364, + "loss": 2.1014, + "step": 230980 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.0001657081987928631, + "loss": 1.9841, + "step": 230985 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.00016570680550419538, + "loss": 1.8608, + "step": 230990 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016570541219308109, + "loss": 2.0511, + "step": 230995 + }, + { + "epoch": 0.54, + "grad_norm": 1.6953125, + "learning_rate": 0.00016570401885952066, + "loss": 1.9398, + "step": 231000 + }, + { + "epoch": 0.54, + "grad_norm": 1.703125, + "learning_rate": 0.00016570262550351448, + "loss": 2.0843, + "step": 231005 + }, + { + "epoch": 0.54, + "grad_norm": 1.84375, + "learning_rate": 0.0001657012321250631, + "loss": 2.2819, + "step": 231010 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016569983872416702, + "loss": 2.0784, + "step": 231015 + }, + { + "epoch": 0.54, + "grad_norm": 1.9609375, + "learning_rate": 0.00016569844530082666, + "loss": 2.1486, + "step": 231020 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016569705185504253, + "loss": 2.0991, + "step": 231025 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016569565838681508, + "loss": 1.9866, + "step": 231030 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016569426489614484, + "loss": 2.0092, + "step": 231035 + }, + { + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 0.00016569287138303222, + "loss": 1.9818, + "step": 231040 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016569147784747772, + "loss": 1.9559, + "step": 231045 + }, + { + "epoch": 0.54, + "grad_norm": 1.859375, + "learning_rate": 0.00016569008428948183, + "loss": 2.0861, + "step": 231050 + }, + { + "epoch": 0.54, + "grad_norm": 1.8671875, + "learning_rate": 0.000165688690709045, + "loss": 2.0312, + "step": 231055 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016568729710616774, + "loss": 2.0323, + "step": 231060 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016568590348085051, + "loss": 2.1555, + "step": 231065 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.00016568450983309377, + "loss": 1.9592, + "step": 231070 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016568311616289803, + "loss": 2.0643, + "step": 231075 + }, + { + "epoch": 0.54, + "grad_norm": 2.640625, + "learning_rate": 0.00016568172247026372, + "loss": 1.9047, + "step": 231080 + }, + { + "epoch": 0.54, + "grad_norm": 1.796875, + "learning_rate": 0.00016568032875519134, + "loss": 2.0786, + "step": 231085 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.0001656789350176814, + "loss": 2.0274, + "step": 231090 + }, + { + "epoch": 0.54, + "grad_norm": 1.96875, + "learning_rate": 0.00016567754125773433, + "loss": 2.0239, + "step": 231095 + }, + { + "epoch": 0.54, + "grad_norm": 2.4375, + "learning_rate": 0.00016567614747535063, + "loss": 2.0214, + "step": 231100 + }, + { + "epoch": 0.54, + "grad_norm": 2.09375, + "learning_rate": 0.00016567475367053076, + "loss": 2.1831, + "step": 231105 + }, + { + "epoch": 0.54, + "grad_norm": 1.7109375, + "learning_rate": 0.00016567335984327518, + "loss": 2.1461, + "step": 231110 + }, + { + "epoch": 0.54, + "grad_norm": 1.9296875, + "learning_rate": 0.0001656719659935844, + "loss": 2.0025, + "step": 231115 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.0001656705721214589, + "loss": 2.0171, + "step": 231120 + }, + { + "epoch": 0.54, + "grad_norm": 2.140625, + "learning_rate": 0.00016566917822689913, + "loss": 2.0439, + "step": 231125 + }, + { + "epoch": 0.54, + "grad_norm": 2.765625, + "learning_rate": 0.0001656677843099056, + "loss": 1.9454, + "step": 231130 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.00016566639037047875, + "loss": 2.1231, + "step": 231135 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016566499640861907, + "loss": 2.0438, + "step": 231140 + }, + { + "epoch": 0.54, + "grad_norm": 2.40625, + "learning_rate": 0.00016566360242432703, + "loss": 2.0703, + "step": 231145 + }, + { + "epoch": 0.54, + "grad_norm": 2.3125, + "learning_rate": 0.00016566220841760312, + "loss": 2.0836, + "step": 231150 + }, + { + "epoch": 0.54, + "grad_norm": 2.03125, + "learning_rate": 0.0001656608143884478, + "loss": 2.1607, + "step": 231155 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.00016565942033686156, + "loss": 2.0987, + "step": 231160 + }, + { + "epoch": 0.54, + "grad_norm": 1.8984375, + "learning_rate": 0.00016565802626284488, + "loss": 2.091, + "step": 231165 + }, + { + "epoch": 0.54, + "grad_norm": 2.5, + "learning_rate": 0.0001656566321663982, + "loss": 2.0031, + "step": 231170 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.00016565523804752205, + "loss": 2.0973, + "step": 231175 + }, + { + "epoch": 0.54, + "grad_norm": 1.859375, + "learning_rate": 0.0001656538439062169, + "loss": 2.1926, + "step": 231180 + }, + { + "epoch": 0.54, + "grad_norm": 1.8671875, + "learning_rate": 0.00016565244974248316, + "loss": 2.1798, + "step": 231185 + }, + { + "epoch": 0.54, + "grad_norm": 1.875, + "learning_rate": 0.00016565105555632137, + "loss": 2.2001, + "step": 231190 + }, + { + "epoch": 0.54, + "grad_norm": 1.9296875, + "learning_rate": 0.00016564966134773198, + "loss": 2.1532, + "step": 231195 + }, + { + "epoch": 0.54, + "grad_norm": 2.3125, + "learning_rate": 0.00016564826711671548, + "loss": 2.0333, + "step": 231200 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.00016564687286327234, + "loss": 2.0459, + "step": 231205 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.00016564547858740304, + "loss": 1.7872, + "step": 231210 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016564408428910808, + "loss": 2.1454, + "step": 231215 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016564268996838787, + "loss": 1.9639, + "step": 231220 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016564129562524296, + "loss": 1.9454, + "step": 231225 + }, + { + "epoch": 0.54, + "grad_norm": 2.4375, + "learning_rate": 0.0001656399012596738, + "loss": 2.0726, + "step": 231230 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016563850687168082, + "loss": 2.1394, + "step": 231235 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016563711246126453, + "loss": 2.0982, + "step": 231240 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016563571802842545, + "loss": 2.0501, + "step": 231245 + }, + { + "epoch": 0.54, + "grad_norm": 1.8671875, + "learning_rate": 0.000165634323573164, + "loss": 2.1405, + "step": 231250 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.0001656329290954807, + "loss": 2.1968, + "step": 231255 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.00016563153459537598, + "loss": 2.1593, + "step": 231260 + }, + { + "epoch": 0.54, + "grad_norm": 2.40625, + "learning_rate": 0.00016563014007285032, + "loss": 1.9989, + "step": 231265 + }, + { + "epoch": 0.54, + "grad_norm": 2.015625, + "learning_rate": 0.00016562874552790422, + "loss": 2.2339, + "step": 231270 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.00016562735096053817, + "loss": 2.0511, + "step": 231275 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.0001656259563707526, + "loss": 2.152, + "step": 231280 + }, + { + "epoch": 0.54, + "grad_norm": 2.4375, + "learning_rate": 0.00016562456175854803, + "loss": 2.0326, + "step": 231285 + }, + { + "epoch": 0.54, + "grad_norm": 2.4375, + "learning_rate": 0.0001656231671239249, + "loss": 2.1139, + "step": 231290 + }, + { + "epoch": 0.54, + "grad_norm": 2.53125, + "learning_rate": 0.00016562177246688377, + "loss": 2.0318, + "step": 231295 + }, + { + "epoch": 0.54, + "grad_norm": 2.25, + "learning_rate": 0.00016562037778742502, + "loss": 2.0711, + "step": 231300 + }, + { + "epoch": 0.54, + "grad_norm": 1.8046875, + "learning_rate": 0.0001656189830855491, + "loss": 2.2211, + "step": 231305 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.00016561758836125662, + "loss": 2.0179, + "step": 231310 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016561619361454795, + "loss": 2.0247, + "step": 231315 + }, + { + "epoch": 0.54, + "grad_norm": 2.125, + "learning_rate": 0.0001656147988454236, + "loss": 2.0623, + "step": 231320 + }, + { + "epoch": 0.54, + "grad_norm": 2.640625, + "learning_rate": 0.00016561340405388405, + "loss": 2.1639, + "step": 231325 + }, + { + "epoch": 0.54, + "grad_norm": 1.8125, + "learning_rate": 0.00016561200923992977, + "loss": 1.7734, + "step": 231330 + }, + { + "epoch": 0.54, + "grad_norm": 2.765625, + "learning_rate": 0.00016561061440356124, + "loss": 2.2224, + "step": 231335 + }, + { + "epoch": 0.54, + "grad_norm": 2.640625, + "learning_rate": 0.00016560921954477892, + "loss": 1.9217, + "step": 231340 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.0001656078246635833, + "loss": 2.2967, + "step": 231345 + }, + { + "epoch": 0.54, + "grad_norm": 1.984375, + "learning_rate": 0.00016560642975997487, + "loss": 2.1397, + "step": 231350 + }, + { + "epoch": 0.54, + "grad_norm": 2.40625, + "learning_rate": 0.00016560503483395412, + "loss": 2.1807, + "step": 231355 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.00016560363988552145, + "loss": 2.2132, + "step": 231360 + }, + { + "epoch": 0.54, + "grad_norm": 2.3125, + "learning_rate": 0.00016560224491467742, + "loss": 1.9456, + "step": 231365 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.00016560084992142245, + "loss": 2.2423, + "step": 231370 + }, + { + "epoch": 0.54, + "grad_norm": 1.9765625, + "learning_rate": 0.00016559945490575705, + "loss": 1.9918, + "step": 231375 + }, + { + "epoch": 0.54, + "grad_norm": 2.0, + "learning_rate": 0.0001655980598676817, + "loss": 2.0556, + "step": 231380 + }, + { + "epoch": 0.54, + "grad_norm": 2.0625, + "learning_rate": 0.00016559666480719685, + "loss": 2.1585, + "step": 231385 + }, + { + "epoch": 0.54, + "grad_norm": 2.234375, + "learning_rate": 0.000165595269724303, + "loss": 2.1861, + "step": 231390 + }, + { + "epoch": 0.54, + "grad_norm": 2.1875, + "learning_rate": 0.0001655938746190006, + "loss": 2.1242, + "step": 231395 + }, + { + "epoch": 0.54, + "grad_norm": 2.421875, + "learning_rate": 0.00016559247949129015, + "loss": 2.2612, + "step": 231400 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.00016559108434117213, + "loss": 2.0656, + "step": 231405 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016558968916864699, + "loss": 2.2023, + "step": 231410 + }, + { + "epoch": 0.54, + "grad_norm": 1.8984375, + "learning_rate": 0.00016558829397371523, + "loss": 2.1264, + "step": 231415 + }, + { + "epoch": 0.54, + "grad_norm": 2.578125, + "learning_rate": 0.0001655868987563773, + "loss": 2.0369, + "step": 231420 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016558550351663374, + "loss": 2.0756, + "step": 231425 + }, + { + "epoch": 0.54, + "grad_norm": 2.375, + "learning_rate": 0.00016558410825448494, + "loss": 2.067, + "step": 231430 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016558271296993144, + "loss": 1.7041, + "step": 231435 + }, + { + "epoch": 0.54, + "grad_norm": 2.171875, + "learning_rate": 0.00016558131766297368, + "loss": 2.1518, + "step": 231440 + }, + { + "epoch": 0.54, + "grad_norm": 2.4375, + "learning_rate": 0.00016557992233361216, + "loss": 2.0177, + "step": 231445 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.00016557852698184737, + "loss": 1.8963, + "step": 231450 + }, + { + "epoch": 0.54, + "grad_norm": 1.890625, + "learning_rate": 0.00016557713160767973, + "loss": 2.2765, + "step": 231455 + }, + { + "epoch": 0.54, + "grad_norm": 2.078125, + "learning_rate": 0.0001655757362111098, + "loss": 2.1112, + "step": 231460 + }, + { + "epoch": 0.54, + "grad_norm": 2.71875, + "learning_rate": 0.00016557434079213796, + "loss": 2.1047, + "step": 231465 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.00016557294535076477, + "loss": 1.9901, + "step": 231470 + }, + { + "epoch": 0.54, + "grad_norm": 2.109375, + "learning_rate": 0.00016557154988699067, + "loss": 2.07, + "step": 231475 + }, + { + "epoch": 0.54, + "grad_norm": 2.3125, + "learning_rate": 0.0001655701544008161, + "loss": 2.1513, + "step": 231480 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.0001655687588922416, + "loss": 1.925, + "step": 231485 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.00016556736336126762, + "loss": 2.0251, + "step": 231490 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.00016556596780789468, + "loss": 1.9056, + "step": 231495 + }, + { + "epoch": 0.54, + "grad_norm": 2.40625, + "learning_rate": 0.00016556457223212317, + "loss": 2.124, + "step": 231500 + }, + { + "epoch": 0.54, + "grad_norm": 2.28125, + "learning_rate": 0.0001655631766339536, + "loss": 2.2156, + "step": 231505 + }, + { + "epoch": 0.54, + "grad_norm": 2.046875, + "learning_rate": 0.00016556178101338653, + "loss": 1.8693, + "step": 231510 + }, + { + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 0.00016556038537042232, + "loss": 2.2195, + "step": 231515 + }, + { + "epoch": 0.54, + "grad_norm": 1.90625, + "learning_rate": 0.00016555898970506148, + "loss": 2.1219, + "step": 231520 + }, + { + "epoch": 0.54, + "grad_norm": 2.09375, + "learning_rate": 0.00016555759401730452, + "loss": 1.9436, + "step": 231525 + }, + { + "epoch": 0.54, + "grad_norm": 2.359375, + "learning_rate": 0.0001655561983071519, + "loss": 2.0657, + "step": 231530 + }, + { + "epoch": 0.54, + "grad_norm": 2.328125, + "learning_rate": 0.0001655548025746041, + "loss": 1.9584, + "step": 231535 + }, + { + "epoch": 0.54, + "grad_norm": 2.46875, + "learning_rate": 0.00016555340681966157, + "loss": 2.1742, + "step": 231540 + }, + { + "epoch": 0.54, + "grad_norm": 2.265625, + "learning_rate": 0.0001655520110423248, + "loss": 2.1539, + "step": 231545 + }, + { + "epoch": 0.54, + "grad_norm": 2.21875, + "learning_rate": 0.0001655506152425943, + "loss": 2.0053, + "step": 231550 + }, + { + "epoch": 0.54, + "grad_norm": 2.296875, + "learning_rate": 0.0001655492194204705, + "loss": 2.0094, + "step": 231555 + }, + { + "epoch": 0.54, + "grad_norm": 2.15625, + "learning_rate": 0.0001655478235759539, + "loss": 2.0493, + "step": 231560 + }, + { + "epoch": 0.54, + "grad_norm": 2.5, + "learning_rate": 0.000165546427709045, + "loss": 2.0742, + "step": 231565 + }, + { + "epoch": 0.54, + "grad_norm": 2.09375, + "learning_rate": 0.00016554503181974423, + "loss": 2.2378, + "step": 231570 + }, + { + "epoch": 0.54, + "grad_norm": 1.953125, + "learning_rate": 0.00016554363590805208, + "loss": 2.0973, + "step": 231575 + }, + { + "epoch": 0.54, + "grad_norm": 2.34375, + "learning_rate": 0.00016554223997396905, + "loss": 1.9912, + "step": 231580 + }, + { + "epoch": 0.54, + "grad_norm": 2.40625, + "learning_rate": 0.00016554084401749561, + "loss": 2.1372, + "step": 231585 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.0001655394480386322, + "loss": 2.0426, + "step": 231590 + }, + { + "epoch": 0.55, + "grad_norm": 2.578125, + "learning_rate": 0.00016553805203737936, + "loss": 2.0472, + "step": 231595 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.0001655366560137375, + "loss": 2.2184, + "step": 231600 + }, + { + "epoch": 0.55, + "grad_norm": 2.53125, + "learning_rate": 0.00016553525996770719, + "loss": 2.1399, + "step": 231605 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.00016553386389928876, + "loss": 2.0991, + "step": 231610 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016553246780848284, + "loss": 2.0929, + "step": 231615 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.0001655310716952898, + "loss": 2.076, + "step": 231620 + }, + { + "epoch": 0.55, + "grad_norm": 1.9765625, + "learning_rate": 0.0001655296755597102, + "loss": 2.0791, + "step": 231625 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.00016552827940174443, + "loss": 2.2058, + "step": 231630 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016552688322139304, + "loss": 2.1219, + "step": 231635 + }, + { + "epoch": 0.55, + "grad_norm": 1.9453125, + "learning_rate": 0.00016552548701865647, + "loss": 2.1986, + "step": 231640 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.0001655240907935352, + "loss": 2.1006, + "step": 231645 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.00016552269454602974, + "loss": 2.1103, + "step": 231650 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.00016552129827614048, + "loss": 2.0396, + "step": 231655 + }, + { + "epoch": 0.55, + "grad_norm": 1.8046875, + "learning_rate": 0.00016551990198386803, + "loss": 2.0743, + "step": 231660 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016551850566921274, + "loss": 2.0783, + "step": 231665 + }, + { + "epoch": 0.55, + "grad_norm": 2.484375, + "learning_rate": 0.00016551710933217516, + "loss": 2.0427, + "step": 231670 + }, + { + "epoch": 0.55, + "grad_norm": 1.9296875, + "learning_rate": 0.00016551571297275573, + "loss": 2.2275, + "step": 231675 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016551431659095496, + "loss": 2.06, + "step": 231680 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016551292018677333, + "loss": 2.202, + "step": 231685 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.00016551152376021125, + "loss": 1.9766, + "step": 231690 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.00016551012731126928, + "loss": 2.0314, + "step": 231695 + }, + { + "epoch": 0.55, + "grad_norm": 2.515625, + "learning_rate": 0.00016550873083994783, + "loss": 2.1667, + "step": 231700 + }, + { + "epoch": 0.55, + "grad_norm": 2.765625, + "learning_rate": 0.00016550733434624748, + "loss": 2.1149, + "step": 231705 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016550593783016857, + "loss": 2.0668, + "step": 231710 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016550454129171167, + "loss": 1.9384, + "step": 231715 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016550314473087722, + "loss": 2.0404, + "step": 231720 + }, + { + "epoch": 0.55, + "grad_norm": 2.65625, + "learning_rate": 0.00016550174814766573, + "loss": 1.983, + "step": 231725 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.0001655003515420776, + "loss": 2.1151, + "step": 231730 + }, + { + "epoch": 0.55, + "grad_norm": 2.578125, + "learning_rate": 0.00016549895491411342, + "loss": 2.2115, + "step": 231735 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.00016549755826377358, + "loss": 2.2424, + "step": 231740 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.0001654961615910586, + "loss": 2.1554, + "step": 231745 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016549476489596896, + "loss": 1.9161, + "step": 231750 + }, + { + "epoch": 0.55, + "grad_norm": 2.609375, + "learning_rate": 0.00016549336817850508, + "loss": 2.2215, + "step": 231755 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.0001654919714386675, + "loss": 2.0766, + "step": 231760 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.0001654905746764567, + "loss": 1.9679, + "step": 231765 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016548917789187308, + "loss": 2.1633, + "step": 231770 + }, + { + "epoch": 0.55, + "grad_norm": 1.8515625, + "learning_rate": 0.00016548778108491717, + "loss": 2.2363, + "step": 231775 + }, + { + "epoch": 0.55, + "grad_norm": 1.8984375, + "learning_rate": 0.00016548638425558949, + "loss": 2.0852, + "step": 231780 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.00016548498740389045, + "loss": 2.1458, + "step": 231785 + }, + { + "epoch": 0.55, + "grad_norm": 1.9921875, + "learning_rate": 0.00016548359052982056, + "loss": 2.0355, + "step": 231790 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.00016548219363338027, + "loss": 2.0516, + "step": 231795 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.0001654807967145701, + "loss": 2.1348, + "step": 231800 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016547939977339048, + "loss": 2.0502, + "step": 231805 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016547800280984195, + "loss": 2.2258, + "step": 231810 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.0001654766058239249, + "loss": 2.0153, + "step": 231815 + }, + { + "epoch": 0.55, + "grad_norm": 2.53125, + "learning_rate": 0.00016547520881563988, + "loss": 2.2169, + "step": 231820 + }, + { + "epoch": 0.55, + "grad_norm": 2.546875, + "learning_rate": 0.00016547381178498731, + "loss": 2.136, + "step": 231825 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016547241473196774, + "loss": 1.9558, + "step": 231830 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016547101765658156, + "loss": 2.0232, + "step": 231835 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.0001654696205588293, + "loss": 2.1319, + "step": 231840 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016546822343871146, + "loss": 2.0436, + "step": 231845 + }, + { + "epoch": 0.55, + "grad_norm": 1.8671875, + "learning_rate": 0.00016546682629622848, + "loss": 2.1472, + "step": 231850 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016546542913138082, + "loss": 2.1092, + "step": 231855 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016546403194416903, + "loss": 2.001, + "step": 231860 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.0001654626347345935, + "loss": 2.1604, + "step": 231865 + }, + { + "epoch": 0.55, + "grad_norm": 1.9765625, + "learning_rate": 0.00016546123750265477, + "loss": 1.9533, + "step": 231870 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016545984024835325, + "loss": 2.0826, + "step": 231875 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016545844297168952, + "loss": 2.1829, + "step": 231880 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016545704567266399, + "loss": 1.9949, + "step": 231885 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.0001654556483512771, + "loss": 2.0716, + "step": 231890 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.0001654542510075294, + "loss": 2.1396, + "step": 231895 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016545285364142134, + "loss": 1.869, + "step": 231900 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.00016545145625295339, + "loss": 2.2132, + "step": 231905 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.0001654500588421261, + "loss": 2.1023, + "step": 231910 + }, + { + "epoch": 0.55, + "grad_norm": 4.46875, + "learning_rate": 0.0001654486614089398, + "loss": 2.2409, + "step": 231915 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.0001654472639533951, + "loss": 2.2479, + "step": 231920 + }, + { + "epoch": 0.55, + "grad_norm": 2.375, + "learning_rate": 0.0001654458664754924, + "loss": 2.2633, + "step": 231925 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016544446897523218, + "loss": 1.9934, + "step": 231930 + }, + { + "epoch": 0.55, + "grad_norm": 1.78125, + "learning_rate": 0.000165443071452615, + "loss": 2.1328, + "step": 231935 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.00016544167390764126, + "loss": 2.1446, + "step": 231940 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016544027634031143, + "loss": 2.2955, + "step": 231945 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016543887875062601, + "loss": 2.0341, + "step": 231950 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016543748113858554, + "loss": 1.985, + "step": 231955 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.00016543608350419039, + "loss": 2.2793, + "step": 231960 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.0001654346858474411, + "loss": 2.0976, + "step": 231965 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016543328816833816, + "loss": 2.2435, + "step": 231970 + }, + { + "epoch": 0.55, + "grad_norm": 2.59375, + "learning_rate": 0.000165431890466882, + "loss": 2.0337, + "step": 231975 + }, + { + "epoch": 0.55, + "grad_norm": 2.609375, + "learning_rate": 0.00016543049274307312, + "loss": 2.1871, + "step": 231980 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016542909499691198, + "loss": 2.2252, + "step": 231985 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.0001654276972283991, + "loss": 2.2484, + "step": 231990 + }, + { + "epoch": 0.55, + "grad_norm": 2.359375, + "learning_rate": 0.00016542629943753495, + "loss": 2.048, + "step": 231995 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016542490162431997, + "loss": 2.1458, + "step": 232000 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016542350378875463, + "loss": 1.9235, + "step": 232005 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016542210593083946, + "loss": 2.0683, + "step": 232010 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016542070805057493, + "loss": 1.9768, + "step": 232015 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.0001654193101479615, + "loss": 2.2824, + "step": 232020 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.00016541791222299962, + "loss": 2.0524, + "step": 232025 + }, + { + "epoch": 0.55, + "grad_norm": 1.890625, + "learning_rate": 0.00016541651427568979, + "loss": 2.1142, + "step": 232030 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.0001654151163060325, + "loss": 2.1193, + "step": 232035 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.0001654137183140282, + "loss": 2.0449, + "step": 232040 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016541232029967741, + "loss": 1.9248, + "step": 232045 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.0001654109222629806, + "loss": 2.0319, + "step": 232050 + }, + { + "epoch": 0.55, + "grad_norm": 2.546875, + "learning_rate": 0.0001654095242039382, + "loss": 2.3285, + "step": 232055 + }, + { + "epoch": 0.55, + "grad_norm": 1.90625, + "learning_rate": 0.00016540812612255077, + "loss": 2.0882, + "step": 232060 + }, + { + "epoch": 0.55, + "grad_norm": 2.375, + "learning_rate": 0.0001654067280188187, + "loss": 2.0596, + "step": 232065 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.0001654053298927425, + "loss": 2.232, + "step": 232070 + }, + { + "epoch": 0.55, + "grad_norm": 1.703125, + "learning_rate": 0.00016540393174432265, + "loss": 1.9571, + "step": 232075 + }, + { + "epoch": 0.55, + "grad_norm": 2.484375, + "learning_rate": 0.00016540253357355967, + "loss": 2.0577, + "step": 232080 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.00016540113538045397, + "loss": 2.0401, + "step": 232085 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016539973716500606, + "loss": 2.0786, + "step": 232090 + }, + { + "epoch": 0.55, + "grad_norm": 1.90625, + "learning_rate": 0.0001653983389272164, + "loss": 2.0491, + "step": 232095 + }, + { + "epoch": 0.55, + "grad_norm": 2.359375, + "learning_rate": 0.00016539694066708548, + "loss": 2.1468, + "step": 232100 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016539554238461382, + "loss": 2.0132, + "step": 232105 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016539414407980182, + "loss": 2.232, + "step": 232110 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.00016539274575265, + "loss": 2.0263, + "step": 232115 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.00016539134740315884, + "loss": 2.0882, + "step": 232120 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.0001653899490313288, + "loss": 2.239, + "step": 232125 + }, + { + "epoch": 0.55, + "grad_norm": 1.78125, + "learning_rate": 0.00016538855063716036, + "loss": 1.9238, + "step": 232130 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016538715222065404, + "loss": 2.1495, + "step": 232135 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.00016538575378181026, + "loss": 2.1886, + "step": 232140 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016538435532062953, + "loss": 2.0751, + "step": 232145 + }, + { + "epoch": 0.55, + "grad_norm": 3.21875, + "learning_rate": 0.0001653829568371123, + "loss": 2.0239, + "step": 232150 + }, + { + "epoch": 0.55, + "grad_norm": 1.953125, + "learning_rate": 0.00016538155833125908, + "loss": 2.2341, + "step": 232155 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016538015980307035, + "loss": 2.0801, + "step": 232160 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016537876125254656, + "loss": 2.0953, + "step": 232165 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016537736267968817, + "loss": 2.1348, + "step": 232170 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016537596408449572, + "loss": 1.9867, + "step": 232175 + }, + { + "epoch": 0.55, + "grad_norm": 1.9140625, + "learning_rate": 0.00016537456546696967, + "loss": 2.1703, + "step": 232180 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016537316682711044, + "loss": 2.198, + "step": 232185 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.00016537176816491854, + "loss": 2.1201, + "step": 232190 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016537036948039452, + "loss": 1.9501, + "step": 232195 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016536897077353875, + "loss": 2.1865, + "step": 232200 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.00016536757204435175, + "loss": 2.0922, + "step": 232205 + }, + { + "epoch": 0.55, + "grad_norm": 1.859375, + "learning_rate": 0.00016536617329283404, + "loss": 2.0269, + "step": 232210 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016536477451898604, + "loss": 2.1552, + "step": 232215 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016536337572280825, + "loss": 2.1698, + "step": 232220 + }, + { + "epoch": 0.55, + "grad_norm": 1.8125, + "learning_rate": 0.00016536197690430112, + "loss": 2.0373, + "step": 232225 + }, + { + "epoch": 0.55, + "grad_norm": 2.46875, + "learning_rate": 0.00016536057806346518, + "loss": 1.9526, + "step": 232230 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016535917920030085, + "loss": 2.233, + "step": 232235 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.0001653577803148087, + "loss": 2.0929, + "step": 232240 + }, + { + "epoch": 0.55, + "grad_norm": 1.9453125, + "learning_rate": 0.00016535638140698913, + "loss": 2.0084, + "step": 232245 + }, + { + "epoch": 0.55, + "grad_norm": 1.84375, + "learning_rate": 0.0001653549824768426, + "loss": 2.1359, + "step": 232250 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016535358352436964, + "loss": 2.0151, + "step": 232255 + }, + { + "epoch": 0.55, + "grad_norm": 1.8515625, + "learning_rate": 0.0001653521845495707, + "loss": 2.061, + "step": 232260 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.0001653507855524463, + "loss": 2.0571, + "step": 232265 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.00016534938653299685, + "loss": 2.1856, + "step": 232270 + }, + { + "epoch": 0.55, + "grad_norm": 2.515625, + "learning_rate": 0.0001653479874912229, + "loss": 1.8412, + "step": 232275 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016534658842712484, + "loss": 2.2326, + "step": 232280 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016534518934070326, + "loss": 2.1477, + "step": 232285 + }, + { + "epoch": 0.55, + "grad_norm": 2.359375, + "learning_rate": 0.00016534379023195852, + "loss": 2.1571, + "step": 232290 + }, + { + "epoch": 0.55, + "grad_norm": 1.7109375, + "learning_rate": 0.0001653423911008912, + "loss": 2.187, + "step": 232295 + }, + { + "epoch": 0.55, + "grad_norm": 2.46875, + "learning_rate": 0.00016534099194750172, + "loss": 2.3593, + "step": 232300 + }, + { + "epoch": 0.55, + "grad_norm": 2.796875, + "learning_rate": 0.00016533959277179058, + "loss": 2.2122, + "step": 232305 + }, + { + "epoch": 0.55, + "grad_norm": 1.78125, + "learning_rate": 0.00016533819357375824, + "loss": 2.1948, + "step": 232310 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.0001653367943534052, + "loss": 2.0074, + "step": 232315 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.0001653353951107319, + "loss": 2.0132, + "step": 232320 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016533399584573886, + "loss": 2.0223, + "step": 232325 + }, + { + "epoch": 0.55, + "grad_norm": 1.671875, + "learning_rate": 0.00016533259655842654, + "loss": 1.9376, + "step": 232330 + }, + { + "epoch": 0.55, + "grad_norm": 1.90625, + "learning_rate": 0.00016533119724879543, + "loss": 2.0486, + "step": 232335 + }, + { + "epoch": 0.55, + "grad_norm": 1.9921875, + "learning_rate": 0.000165329797916846, + "loss": 2.1291, + "step": 232340 + }, + { + "epoch": 0.55, + "grad_norm": 1.875, + "learning_rate": 0.0001653283985625787, + "loss": 2.0671, + "step": 232345 + }, + { + "epoch": 0.55, + "grad_norm": 1.9609375, + "learning_rate": 0.00016532699918599407, + "loss": 2.1151, + "step": 232350 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016532559978709252, + "loss": 2.2415, + "step": 232355 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.00016532420036587459, + "loss": 2.0811, + "step": 232360 + }, + { + "epoch": 0.55, + "grad_norm": 1.953125, + "learning_rate": 0.00016532280092234068, + "loss": 2.0335, + "step": 232365 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.00016532140145649138, + "loss": 1.8834, + "step": 232370 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016532000196832705, + "loss": 2.2635, + "step": 232375 + }, + { + "epoch": 0.55, + "grad_norm": 1.6328125, + "learning_rate": 0.00016531860245784828, + "loss": 1.9189, + "step": 232380 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016531720292505545, + "loss": 2.1357, + "step": 232385 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016531580336994907, + "loss": 2.2638, + "step": 232390 + }, + { + "epoch": 0.55, + "grad_norm": 3.203125, + "learning_rate": 0.00016531440379252964, + "loss": 2.0607, + "step": 232395 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016531300419279766, + "loss": 2.1292, + "step": 232400 + }, + { + "epoch": 0.55, + "grad_norm": 1.6640625, + "learning_rate": 0.00016531160457075353, + "loss": 2.0327, + "step": 232405 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.0001653102049263978, + "loss": 2.0262, + "step": 232410 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016530880525973088, + "loss": 2.0712, + "step": 232415 + }, + { + "epoch": 0.55, + "grad_norm": 2.671875, + "learning_rate": 0.0001653074055707533, + "loss": 2.0773, + "step": 232420 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016530600585946555, + "loss": 2.1794, + "step": 232425 + }, + { + "epoch": 0.55, + "grad_norm": 2.421875, + "learning_rate": 0.0001653046061258681, + "loss": 2.2909, + "step": 232430 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.00016530320636996138, + "loss": 2.0971, + "step": 232435 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.0001653018065917459, + "loss": 2.0102, + "step": 232440 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.0001653004067912221, + "loss": 2.2825, + "step": 232445 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016529900696839056, + "loss": 1.9564, + "step": 232450 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.0001652976071232517, + "loss": 2.1608, + "step": 232455 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016529620725580594, + "loss": 1.9923, + "step": 232460 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016529480736605386, + "loss": 2.1307, + "step": 232465 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016529340745399584, + "loss": 2.1731, + "step": 232470 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.00016529200751963242, + "loss": 2.0467, + "step": 232475 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016529060756296412, + "loss": 2.2221, + "step": 232480 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.0001652892075839913, + "loss": 2.1802, + "step": 232485 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016528780758271453, + "loss": 2.1723, + "step": 232490 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016528640755913426, + "loss": 2.039, + "step": 232495 + }, + { + "epoch": 0.55, + "grad_norm": 1.7890625, + "learning_rate": 0.00016528500751325097, + "loss": 2.239, + "step": 232500 + }, + { + "epoch": 0.55, + "grad_norm": 1.875, + "learning_rate": 0.00016528360744506515, + "loss": 2.1492, + "step": 232505 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.00016528220735457727, + "loss": 2.033, + "step": 232510 + }, + { + "epoch": 0.55, + "grad_norm": 1.8515625, + "learning_rate": 0.00016528080724178778, + "loss": 1.9669, + "step": 232515 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016527940710669715, + "loss": 2.0751, + "step": 232520 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.00016527800694930594, + "loss": 2.2327, + "step": 232525 + }, + { + "epoch": 0.55, + "grad_norm": 1.875, + "learning_rate": 0.0001652766067696146, + "loss": 2.0472, + "step": 232530 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.00016527520656762354, + "loss": 2.0317, + "step": 232535 + }, + { + "epoch": 0.55, + "grad_norm": 2.59375, + "learning_rate": 0.0001652738063433333, + "loss": 2.0916, + "step": 232540 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016527240609674434, + "loss": 2.157, + "step": 232545 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016527100582785711, + "loss": 2.1879, + "step": 232550 + }, + { + "epoch": 0.55, + "grad_norm": 1.8671875, + "learning_rate": 0.0001652696055366722, + "loss": 2.2779, + "step": 232555 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.00016526820522318997, + "loss": 2.0138, + "step": 232560 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.0001652668048874109, + "loss": 2.0207, + "step": 232565 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016526540452933556, + "loss": 2.0764, + "step": 232570 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016526400414896436, + "loss": 2.0453, + "step": 232575 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.00016526260374629778, + "loss": 2.0638, + "step": 232580 + }, + { + "epoch": 0.55, + "grad_norm": 1.75, + "learning_rate": 0.00016526120332133632, + "loss": 2.0392, + "step": 232585 + }, + { + "epoch": 0.55, + "grad_norm": 2.53125, + "learning_rate": 0.00016525980287408046, + "loss": 2.1259, + "step": 232590 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016525840240453065, + "loss": 2.1302, + "step": 232595 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.00016525700191268738, + "loss": 2.0239, + "step": 232600 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016525560139855113, + "loss": 1.9505, + "step": 232605 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.0001652542008621224, + "loss": 2.1406, + "step": 232610 + }, + { + "epoch": 0.55, + "grad_norm": 2.5625, + "learning_rate": 0.00016525280030340166, + "loss": 2.0767, + "step": 232615 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016525139972238938, + "loss": 2.0487, + "step": 232620 + }, + { + "epoch": 0.55, + "grad_norm": 2.53125, + "learning_rate": 0.00016524999911908604, + "loss": 2.0604, + "step": 232625 + }, + { + "epoch": 0.55, + "grad_norm": 2.375, + "learning_rate": 0.00016524859849349207, + "loss": 2.047, + "step": 232630 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.00016524719784560805, + "loss": 1.9716, + "step": 232635 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016524579717543437, + "loss": 2.055, + "step": 232640 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016524439648297155, + "loss": 2.2031, + "step": 232645 + }, + { + "epoch": 0.55, + "grad_norm": 1.78125, + "learning_rate": 0.0001652429957682201, + "loss": 2.1028, + "step": 232650 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016524159503118041, + "loss": 2.2209, + "step": 232655 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.000165240194271853, + "loss": 2.08, + "step": 232660 + }, + { + "epoch": 0.55, + "grad_norm": 2.421875, + "learning_rate": 0.00016523879349023842, + "loss": 2.0371, + "step": 232665 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016523739268633705, + "loss": 2.1089, + "step": 232670 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.0001652359918601494, + "loss": 2.109, + "step": 232675 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016523459101167593, + "loss": 2.0806, + "step": 232680 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016523319014091715, + "loss": 2.0011, + "step": 232685 + }, + { + "epoch": 0.55, + "grad_norm": 2.421875, + "learning_rate": 0.00016523178924787358, + "loss": 1.9548, + "step": 232690 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.0001652303883325456, + "loss": 2.1016, + "step": 232695 + }, + { + "epoch": 0.55, + "grad_norm": 1.984375, + "learning_rate": 0.00016522898739493376, + "loss": 2.0826, + "step": 232700 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016522758643503846, + "loss": 2.146, + "step": 232705 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.00016522618545286027, + "loss": 2.0576, + "step": 232710 + }, + { + "epoch": 0.55, + "grad_norm": 2.6875, + "learning_rate": 0.00016522478444839966, + "loss": 2.0112, + "step": 232715 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016522338342165707, + "loss": 2.2248, + "step": 232720 + }, + { + "epoch": 0.55, + "grad_norm": 1.7421875, + "learning_rate": 0.00016522198237263298, + "loss": 2.1405, + "step": 232725 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016522058130132787, + "loss": 1.993, + "step": 232730 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.00016521918020774223, + "loss": 2.1973, + "step": 232735 + }, + { + "epoch": 0.55, + "grad_norm": 1.9140625, + "learning_rate": 0.00016521777909187652, + "loss": 2.0923, + "step": 232740 + }, + { + "epoch": 0.55, + "grad_norm": 1.765625, + "learning_rate": 0.00016521637795373126, + "loss": 2.1143, + "step": 232745 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.0001652149767933069, + "loss": 2.0514, + "step": 232750 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.0001652135756106039, + "loss": 2.1225, + "step": 232755 + }, + { + "epoch": 0.55, + "grad_norm": 2.421875, + "learning_rate": 0.0001652121744056228, + "loss": 1.9602, + "step": 232760 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.000165210773178364, + "loss": 2.262, + "step": 232765 + }, + { + "epoch": 0.55, + "grad_norm": 1.9375, + "learning_rate": 0.00016520937192882805, + "loss": 2.0431, + "step": 232770 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.00016520797065701536, + "loss": 2.0259, + "step": 232775 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016520656936292648, + "loss": 2.1066, + "step": 232780 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016520516804656177, + "loss": 1.9432, + "step": 232785 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.0001652037667079219, + "loss": 1.9836, + "step": 232790 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016520236534700719, + "loss": 1.9907, + "step": 232795 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.00016520096396381817, + "loss": 2.0784, + "step": 232800 + }, + { + "epoch": 0.55, + "grad_norm": 1.8984375, + "learning_rate": 0.00016519956255835533, + "loss": 2.0577, + "step": 232805 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016519816113061913, + "loss": 2.1327, + "step": 232810 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016519675968061005, + "loss": 2.3037, + "step": 232815 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.0001651953582083286, + "loss": 2.1993, + "step": 232820 + }, + { + "epoch": 0.55, + "grad_norm": 1.890625, + "learning_rate": 0.0001651939567137752, + "loss": 2.0868, + "step": 232825 + }, + { + "epoch": 0.55, + "grad_norm": 1.734375, + "learning_rate": 0.00016519255519695038, + "loss": 2.1073, + "step": 232830 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.0001651911536578546, + "loss": 2.1266, + "step": 232835 + }, + { + "epoch": 0.55, + "grad_norm": 1.9609375, + "learning_rate": 0.00016518975209648833, + "loss": 2.1755, + "step": 232840 + }, + { + "epoch": 0.55, + "grad_norm": 1.8984375, + "learning_rate": 0.00016518835051285206, + "loss": 2.1221, + "step": 232845 + }, + { + "epoch": 0.55, + "grad_norm": 1.7578125, + "learning_rate": 0.0001651869489069463, + "loss": 2.2773, + "step": 232850 + }, + { + "epoch": 0.55, + "grad_norm": 2.484375, + "learning_rate": 0.00016518554727877144, + "loss": 2.1878, + "step": 232855 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016518414562832808, + "loss": 2.111, + "step": 232860 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016518274395561656, + "loss": 2.0702, + "step": 232865 + }, + { + "epoch": 0.55, + "grad_norm": 2.421875, + "learning_rate": 0.00016518134226063748, + "loss": 1.8593, + "step": 232870 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016517994054339127, + "loss": 2.0467, + "step": 232875 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.0001651785388038784, + "loss": 1.9863, + "step": 232880 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.00016517713704209937, + "loss": 2.1592, + "step": 232885 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016517573525805463, + "loss": 1.897, + "step": 232890 + }, + { + "epoch": 0.55, + "grad_norm": 2.421875, + "learning_rate": 0.00016517433345174468, + "loss": 2.0617, + "step": 232895 + }, + { + "epoch": 0.55, + "grad_norm": 1.9453125, + "learning_rate": 0.00016517293162317, + "loss": 2.0007, + "step": 232900 + }, + { + "epoch": 0.55, + "grad_norm": 1.9140625, + "learning_rate": 0.00016517152977233107, + "loss": 2.202, + "step": 232905 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016517012789922836, + "loss": 2.1832, + "step": 232910 + }, + { + "epoch": 0.55, + "grad_norm": 1.7421875, + "learning_rate": 0.00016516872600386235, + "loss": 1.9518, + "step": 232915 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.00016516732408623352, + "loss": 2.2086, + "step": 232920 + }, + { + "epoch": 0.55, + "grad_norm": 1.9140625, + "learning_rate": 0.00016516592214634235, + "loss": 2.019, + "step": 232925 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016516452018418932, + "loss": 2.1563, + "step": 232930 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.0001651631181997749, + "loss": 1.9789, + "step": 232935 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.0001651617161930996, + "loss": 2.0028, + "step": 232940 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.00016516031416416387, + "loss": 2.0151, + "step": 232945 + }, + { + "epoch": 0.55, + "grad_norm": 1.8671875, + "learning_rate": 0.00016515891211296818, + "loss": 2.024, + "step": 232950 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016515751003951304, + "loss": 2.0662, + "step": 232955 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.00016515610794379888, + "loss": 1.9431, + "step": 232960 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016515470582582626, + "loss": 2.1703, + "step": 232965 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016515330368559557, + "loss": 2.0313, + "step": 232970 + }, + { + "epoch": 0.55, + "grad_norm": 1.6640625, + "learning_rate": 0.00016515190152310735, + "loss": 2.11, + "step": 232975 + }, + { + "epoch": 0.55, + "grad_norm": 1.65625, + "learning_rate": 0.00016515049933836203, + "loss": 1.9752, + "step": 232980 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016514909713136018, + "loss": 2.1361, + "step": 232985 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016514769490210218, + "loss": 1.8895, + "step": 232990 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016514629265058854, + "loss": 1.8554, + "step": 232995 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.00016514489037681976, + "loss": 2.1442, + "step": 233000 + }, + { + "epoch": 0.55, + "grad_norm": 2.671875, + "learning_rate": 0.00016514348808079627, + "loss": 2.0958, + "step": 233005 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016514208576251863, + "loss": 2.1756, + "step": 233010 + }, + { + "epoch": 0.55, + "grad_norm": 2.515625, + "learning_rate": 0.00016514068342198725, + "loss": 2.0949, + "step": 233015 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016513928105920265, + "loss": 2.0382, + "step": 233020 + }, + { + "epoch": 0.55, + "grad_norm": 1.8671875, + "learning_rate": 0.00016513787867416526, + "loss": 2.1112, + "step": 233025 + }, + { + "epoch": 0.55, + "grad_norm": 2.5625, + "learning_rate": 0.00016513647626687557, + "loss": 2.1345, + "step": 233030 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.00016513507383733413, + "loss": 2.174, + "step": 233035 + }, + { + "epoch": 0.55, + "grad_norm": 2.671875, + "learning_rate": 0.00016513367138554135, + "loss": 2.041, + "step": 233040 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016513226891149774, + "loss": 2.1603, + "step": 233045 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016513086641520376, + "loss": 2.237, + "step": 233050 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016512946389665985, + "loss": 2.0248, + "step": 233055 + }, + { + "epoch": 0.55, + "grad_norm": 2.421875, + "learning_rate": 0.00016512806135586655, + "loss": 2.1823, + "step": 233060 + }, + { + "epoch": 0.55, + "grad_norm": 2.890625, + "learning_rate": 0.00016512665879282436, + "loss": 2.196, + "step": 233065 + }, + { + "epoch": 0.55, + "grad_norm": 2.890625, + "learning_rate": 0.00016512525620753372, + "loss": 2.0137, + "step": 233070 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.0001651238535999951, + "loss": 2.1008, + "step": 233075 + }, + { + "epoch": 0.55, + "grad_norm": 1.984375, + "learning_rate": 0.00016512245097020895, + "loss": 1.9883, + "step": 233080 + }, + { + "epoch": 0.55, + "grad_norm": 1.6875, + "learning_rate": 0.00016512104831817586, + "loss": 1.8592, + "step": 233085 + }, + { + "epoch": 0.55, + "grad_norm": 1.75, + "learning_rate": 0.0001651196456438962, + "loss": 1.9847, + "step": 233090 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.0001651182429473705, + "loss": 2.4037, + "step": 233095 + }, + { + "epoch": 0.55, + "grad_norm": 2.46875, + "learning_rate": 0.00016511684022859922, + "loss": 2.0581, + "step": 233100 + }, + { + "epoch": 0.55, + "grad_norm": 2.625, + "learning_rate": 0.00016511543748758283, + "loss": 1.9971, + "step": 233105 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016511403472432187, + "loss": 2.0328, + "step": 233110 + }, + { + "epoch": 0.55, + "grad_norm": 2.671875, + "learning_rate": 0.00016511263193881673, + "loss": 2.0173, + "step": 233115 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016511122913106795, + "loss": 2.1629, + "step": 233120 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.000165109826301076, + "loss": 2.28, + "step": 233125 + }, + { + "epoch": 0.55, + "grad_norm": 1.9296875, + "learning_rate": 0.00016510842344884133, + "loss": 2.0908, + "step": 233130 + }, + { + "epoch": 0.55, + "grad_norm": 2.375, + "learning_rate": 0.00016510702057436445, + "loss": 2.0508, + "step": 233135 + }, + { + "epoch": 0.55, + "grad_norm": 2.484375, + "learning_rate": 0.00016510561767764585, + "loss": 2.1005, + "step": 233140 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016510421475868597, + "loss": 2.0586, + "step": 233145 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016510281181748534, + "loss": 2.1613, + "step": 233150 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016510140885404436, + "loss": 2.1285, + "step": 233155 + }, + { + "epoch": 0.55, + "grad_norm": 2.46875, + "learning_rate": 0.00016510000586836358, + "loss": 1.907, + "step": 233160 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016509860286044348, + "loss": 2.1145, + "step": 233165 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016509719983028452, + "loss": 2.0377, + "step": 233170 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016509579677788714, + "loss": 2.1653, + "step": 233175 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016509439370325187, + "loss": 2.1218, + "step": 233180 + }, + { + "epoch": 0.55, + "grad_norm": 2.5, + "learning_rate": 0.00016509299060637916, + "loss": 2.1073, + "step": 233185 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.00016509158748726952, + "loss": 2.0932, + "step": 233190 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016509018434592343, + "loss": 2.171, + "step": 233195 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.0001650887811823413, + "loss": 2.1132, + "step": 233200 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016508737799652372, + "loss": 2.209, + "step": 233205 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.0001650859747884711, + "loss": 2.1452, + "step": 233210 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.0001650845715581839, + "loss": 2.1188, + "step": 233215 + }, + { + "epoch": 0.55, + "grad_norm": 1.8984375, + "learning_rate": 0.00016508316830566265, + "loss": 2.0617, + "step": 233220 + }, + { + "epoch": 0.55, + "grad_norm": 2.578125, + "learning_rate": 0.00016508176503090782, + "loss": 2.129, + "step": 233225 + }, + { + "epoch": 0.55, + "grad_norm": 2.546875, + "learning_rate": 0.00016508036173391988, + "loss": 2.0359, + "step": 233230 + }, + { + "epoch": 0.55, + "grad_norm": 2.6875, + "learning_rate": 0.00016507895841469928, + "loss": 2.2095, + "step": 233235 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016507755507324653, + "loss": 2.2402, + "step": 233240 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.00016507615170956213, + "loss": 2.2108, + "step": 233245 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.0001650747483236465, + "loss": 2.1602, + "step": 233250 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.0001650733449155002, + "loss": 1.9558, + "step": 233255 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.00016507194148512367, + "loss": 2.03, + "step": 233260 + }, + { + "epoch": 0.55, + "grad_norm": 1.9140625, + "learning_rate": 0.00016507053803251735, + "loss": 2.1465, + "step": 233265 + }, + { + "epoch": 0.55, + "grad_norm": 1.6640625, + "learning_rate": 0.00016506913455768176, + "loss": 2.1071, + "step": 233270 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016506773106061737, + "loss": 2.0231, + "step": 233275 + }, + { + "epoch": 0.55, + "grad_norm": 2.984375, + "learning_rate": 0.00016506632754132466, + "loss": 2.0077, + "step": 233280 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.00016506492399980416, + "loss": 2.1344, + "step": 233285 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016506352043605627, + "loss": 2.1512, + "step": 233290 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.00016506211685008148, + "loss": 2.2609, + "step": 233295 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.0001650607132418803, + "loss": 2.013, + "step": 233300 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.0001650593096114532, + "loss": 2.0685, + "step": 233305 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016505790595880068, + "loss": 2.1622, + "step": 233310 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.0001650565022839232, + "loss": 2.0418, + "step": 233315 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.00016505509858682124, + "loss": 2.0989, + "step": 233320 + }, + { + "epoch": 0.55, + "grad_norm": 2.6875, + "learning_rate": 0.00016505369486749524, + "loss": 2.114, + "step": 233325 + }, + { + "epoch": 0.55, + "grad_norm": 1.984375, + "learning_rate": 0.00016505229112594573, + "loss": 1.9204, + "step": 233330 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016505088736217322, + "loss": 2.281, + "step": 233335 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.0001650494835761781, + "loss": 1.9901, + "step": 233340 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016504807976796092, + "loss": 2.0953, + "step": 233345 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.00016504667593752212, + "loss": 2.241, + "step": 233350 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.0001650452720848622, + "loss": 2.1136, + "step": 233355 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016504386820998166, + "loss": 1.8998, + "step": 233360 + }, + { + "epoch": 0.55, + "grad_norm": 2.375, + "learning_rate": 0.00016504246431288096, + "loss": 2.4034, + "step": 233365 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016504106039356056, + "loss": 2.1821, + "step": 233370 + }, + { + "epoch": 0.55, + "grad_norm": 1.984375, + "learning_rate": 0.0001650396564520209, + "loss": 2.2526, + "step": 233375 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016503825248826258, + "loss": 2.0471, + "step": 233380 + }, + { + "epoch": 0.55, + "grad_norm": 1.65625, + "learning_rate": 0.000165036848502286, + "loss": 2.1426, + "step": 233385 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016503544449409163, + "loss": 2.2541, + "step": 233390 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016503404046368, + "loss": 2.1365, + "step": 233395 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016503263641105159, + "loss": 2.1806, + "step": 233400 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016503123233620678, + "loss": 1.9799, + "step": 233405 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.00016502982823914618, + "loss": 2.0885, + "step": 233410 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016502842411987017, + "loss": 2.1115, + "step": 233415 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.0001650270199783793, + "loss": 2.1237, + "step": 233420 + }, + { + "epoch": 0.55, + "grad_norm": 1.890625, + "learning_rate": 0.000165025615814674, + "loss": 2.1533, + "step": 233425 + }, + { + "epoch": 0.55, + "grad_norm": 2.515625, + "learning_rate": 0.00016502421162875477, + "loss": 2.0937, + "step": 233430 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.0001650228074206221, + "loss": 2.0888, + "step": 233435 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016502140319027644, + "loss": 2.1814, + "step": 233440 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.00016501999893771832, + "loss": 2.1741, + "step": 233445 + }, + { + "epoch": 0.55, + "grad_norm": 2.5, + "learning_rate": 0.00016501859466294818, + "loss": 2.1742, + "step": 233450 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.0001650171903659665, + "loss": 1.8904, + "step": 233455 + }, + { + "epoch": 0.55, + "grad_norm": 1.96875, + "learning_rate": 0.00016501578604677378, + "loss": 1.9874, + "step": 233460 + }, + { + "epoch": 0.55, + "grad_norm": 2.53125, + "learning_rate": 0.00016501438170537047, + "loss": 2.1102, + "step": 233465 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016501297734175708, + "loss": 2.2044, + "step": 233470 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.0001650115729559341, + "loss": 1.9763, + "step": 233475 + }, + { + "epoch": 0.55, + "grad_norm": 2.921875, + "learning_rate": 0.00016501016854790197, + "loss": 2.1018, + "step": 233480 + }, + { + "epoch": 0.55, + "grad_norm": 1.9453125, + "learning_rate": 0.00016500876411766114, + "loss": 2.0304, + "step": 233485 + }, + { + "epoch": 0.55, + "grad_norm": 2.421875, + "learning_rate": 0.00016500735966521222, + "loss": 2.1682, + "step": 233490 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.00016500595519055558, + "loss": 2.0684, + "step": 233495 + }, + { + "epoch": 0.55, + "grad_norm": 1.6640625, + "learning_rate": 0.0001650045506936917, + "loss": 2.0575, + "step": 233500 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.0001650031461746211, + "loss": 2.0366, + "step": 233505 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016500174163334424, + "loss": 1.8696, + "step": 233510 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016500033706986163, + "loss": 2.2472, + "step": 233515 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.0001649989324841737, + "loss": 2.2168, + "step": 233520 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016499752787628097, + "loss": 1.992, + "step": 233525 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.0001649961232461839, + "loss": 2.1523, + "step": 233530 + }, + { + "epoch": 0.55, + "grad_norm": 1.796875, + "learning_rate": 0.00016499471859388298, + "loss": 2.1304, + "step": 233535 + }, + { + "epoch": 0.55, + "grad_norm": 1.828125, + "learning_rate": 0.00016499331391937866, + "loss": 2.1488, + "step": 233540 + }, + { + "epoch": 0.55, + "grad_norm": 2.78125, + "learning_rate": 0.00016499190922267148, + "loss": 2.1881, + "step": 233545 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.00016499050450376188, + "loss": 2.0661, + "step": 233550 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.00016498909976265033, + "loss": 2.1829, + "step": 233555 + }, + { + "epoch": 0.55, + "grad_norm": 1.984375, + "learning_rate": 0.00016498769499933733, + "loss": 1.9386, + "step": 233560 + }, + { + "epoch": 0.55, + "grad_norm": 1.96875, + "learning_rate": 0.00016498629021382337, + "loss": 2.0296, + "step": 233565 + }, + { + "epoch": 0.55, + "grad_norm": 1.96875, + "learning_rate": 0.0001649848854061089, + "loss": 2.1206, + "step": 233570 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016498348057619443, + "loss": 2.0688, + "step": 233575 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.0001649820757240804, + "loss": 1.8445, + "step": 233580 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016498067084976732, + "loss": 2.2454, + "step": 233585 + }, + { + "epoch": 0.55, + "grad_norm": 2.671875, + "learning_rate": 0.00016497926595325567, + "loss": 2.0528, + "step": 233590 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.0001649778610345459, + "loss": 2.1995, + "step": 233595 + }, + { + "epoch": 0.55, + "grad_norm": 1.9296875, + "learning_rate": 0.00016497645609363855, + "loss": 1.8481, + "step": 233600 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016497505113053406, + "loss": 2.0463, + "step": 233605 + }, + { + "epoch": 0.55, + "grad_norm": 1.8359375, + "learning_rate": 0.00016497364614523292, + "loss": 2.0035, + "step": 233610 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016497224113773559, + "loss": 2.1527, + "step": 233615 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.00016497083610804257, + "loss": 2.0843, + "step": 233620 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.0001649694310561543, + "loss": 1.9801, + "step": 233625 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016496802598207135, + "loss": 2.3273, + "step": 233630 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016496662088579412, + "loss": 2.1256, + "step": 233635 + }, + { + "epoch": 0.55, + "grad_norm": 1.984375, + "learning_rate": 0.0001649652157673231, + "loss": 2.0212, + "step": 233640 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.0001649638106266588, + "loss": 2.1791, + "step": 233645 + }, + { + "epoch": 0.55, + "grad_norm": 2.59375, + "learning_rate": 0.00016496240546380166, + "loss": 2.1996, + "step": 233650 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016496100027875224, + "loss": 2.1564, + "step": 233655 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.0001649595950715109, + "loss": 2.0632, + "step": 233660 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016495818984207822, + "loss": 2.1771, + "step": 233665 + }, + { + "epoch": 0.55, + "grad_norm": 2.546875, + "learning_rate": 0.00016495678459045463, + "loss": 2.1149, + "step": 233670 + }, + { + "epoch": 0.55, + "grad_norm": 1.6640625, + "learning_rate": 0.00016495537931664062, + "loss": 2.0362, + "step": 233675 + }, + { + "epoch": 0.55, + "grad_norm": 1.7421875, + "learning_rate": 0.00016495397402063668, + "loss": 2.0523, + "step": 233680 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016495256870244332, + "loss": 2.0504, + "step": 233685 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016495116336206093, + "loss": 2.0266, + "step": 233690 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.0001649497579994901, + "loss": 2.0957, + "step": 233695 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016494835261473122, + "loss": 1.9599, + "step": 233700 + }, + { + "epoch": 0.55, + "grad_norm": 1.703125, + "learning_rate": 0.0001649469472077848, + "loss": 2.236, + "step": 233705 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016494554177865132, + "loss": 2.1724, + "step": 233710 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016494413632733127, + "loss": 2.0724, + "step": 233715 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.00016494273085382514, + "loss": 2.1014, + "step": 233720 + }, + { + "epoch": 0.55, + "grad_norm": 2.5, + "learning_rate": 0.00016494132535813338, + "loss": 1.8836, + "step": 233725 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.0001649399198402565, + "loss": 1.9889, + "step": 233730 + }, + { + "epoch": 0.55, + "grad_norm": 1.9765625, + "learning_rate": 0.00016493851430019496, + "loss": 2.2088, + "step": 233735 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.00016493710873794926, + "loss": 2.1077, + "step": 233740 + }, + { + "epoch": 0.55, + "grad_norm": 1.8125, + "learning_rate": 0.00016493570315351984, + "loss": 2.0231, + "step": 233745 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016493429754690721, + "loss": 2.1419, + "step": 233750 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016493289191811186, + "loss": 2.0265, + "step": 233755 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016493148626713425, + "loss": 2.1689, + "step": 233760 + }, + { + "epoch": 0.55, + "grad_norm": 1.9609375, + "learning_rate": 0.0001649300805939749, + "loss": 2.0388, + "step": 233765 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.0001649286748986342, + "loss": 1.96, + "step": 233770 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016492726918111272, + "loss": 2.208, + "step": 233775 + }, + { + "epoch": 0.55, + "grad_norm": 1.9765625, + "learning_rate": 0.0001649258634414109, + "loss": 1.9881, + "step": 233780 + }, + { + "epoch": 0.55, + "grad_norm": 2.640625, + "learning_rate": 0.0001649244576795292, + "loss": 1.883, + "step": 233785 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016492305189546818, + "loss": 2.1963, + "step": 233790 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.0001649216460892282, + "loss": 2.0688, + "step": 233795 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.00016492024026080988, + "loss": 2.1087, + "step": 233800 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.0001649188344102136, + "loss": 1.9916, + "step": 233805 + }, + { + "epoch": 0.55, + "grad_norm": 1.90625, + "learning_rate": 0.00016491742853743987, + "loss": 2.1374, + "step": 233810 + }, + { + "epoch": 0.55, + "grad_norm": 2.515625, + "learning_rate": 0.00016491602264248915, + "loss": 2.1051, + "step": 233815 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.00016491461672536197, + "loss": 2.0564, + "step": 233820 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.00016491321078605876, + "loss": 2.3015, + "step": 233825 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016491180482458003, + "loss": 2.1141, + "step": 233830 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016491039884092626, + "loss": 1.9501, + "step": 233835 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.0001649089928350979, + "loss": 2.1356, + "step": 233840 + }, + { + "epoch": 0.55, + "grad_norm": 2.53125, + "learning_rate": 0.00016490758680709545, + "loss": 1.9673, + "step": 233845 + }, + { + "epoch": 0.55, + "grad_norm": 1.671875, + "learning_rate": 0.0001649061807569194, + "loss": 1.9838, + "step": 233850 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016490477468457021, + "loss": 2.2289, + "step": 233855 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.0001649033685900484, + "loss": 2.0173, + "step": 233860 + }, + { + "epoch": 0.55, + "grad_norm": 1.953125, + "learning_rate": 0.00016490196247335442, + "loss": 2.0326, + "step": 233865 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.0001649005563344887, + "loss": 2.1424, + "step": 233870 + }, + { + "epoch": 0.55, + "grad_norm": 2.546875, + "learning_rate": 0.0001648991501734518, + "loss": 2.133, + "step": 233875 + }, + { + "epoch": 0.55, + "grad_norm": 1.71875, + "learning_rate": 0.0001648977439902442, + "loss": 2.0656, + "step": 233880 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.00016489633778486634, + "loss": 2.082, + "step": 233885 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.0001648949315573187, + "loss": 1.9641, + "step": 233890 + }, + { + "epoch": 0.55, + "grad_norm": 1.96875, + "learning_rate": 0.0001648935253076018, + "loss": 2.0453, + "step": 233895 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016489211903571608, + "loss": 2.1168, + "step": 233900 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016489071274166202, + "loss": 2.0667, + "step": 233905 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.00016488930642544013, + "loss": 2.1095, + "step": 233910 + }, + { + "epoch": 0.55, + "grad_norm": 1.890625, + "learning_rate": 0.0001648879000870509, + "loss": 2.1114, + "step": 233915 + }, + { + "epoch": 0.55, + "grad_norm": 1.984375, + "learning_rate": 0.00016488649372649477, + "loss": 2.1265, + "step": 233920 + }, + { + "epoch": 0.55, + "grad_norm": 4.6875, + "learning_rate": 0.00016488508734377222, + "loss": 2.2611, + "step": 233925 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016488368093888376, + "loss": 2.0471, + "step": 233930 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016488227451182984, + "loss": 1.9309, + "step": 233935 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016488086806261098, + "loss": 1.9514, + "step": 233940 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016487946159122763, + "loss": 2.0431, + "step": 233945 + }, + { + "epoch": 0.55, + "grad_norm": 1.9921875, + "learning_rate": 0.00016487805509768027, + "loss": 2.1396, + "step": 233950 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.0001648766485819694, + "loss": 2.0278, + "step": 233955 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.0001648752420440955, + "loss": 2.0829, + "step": 233960 + }, + { + "epoch": 0.55, + "grad_norm": 1.9375, + "learning_rate": 0.00016487383548405903, + "loss": 2.1368, + "step": 233965 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016487242890186047, + "loss": 2.1113, + "step": 233970 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.00016487102229750035, + "loss": 2.2007, + "step": 233975 + }, + { + "epoch": 0.55, + "grad_norm": 2.359375, + "learning_rate": 0.0001648696156709791, + "loss": 2.1226, + "step": 233980 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.0001648682090222972, + "loss": 2.2397, + "step": 233985 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016486680235145516, + "loss": 2.0074, + "step": 233990 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016486539565845342, + "loss": 2.1443, + "step": 233995 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.0001648639889432925, + "loss": 1.9022, + "step": 234000 + }, + { + "epoch": 0.55, + "grad_norm": 2.59375, + "learning_rate": 0.00016486258220597283, + "loss": 2.1789, + "step": 234005 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016486117544649496, + "loss": 2.2281, + "step": 234010 + }, + { + "epoch": 0.55, + "grad_norm": 1.9375, + "learning_rate": 0.00016485976866485933, + "loss": 2.1792, + "step": 234015 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.00016485836186106643, + "loss": 2.2278, + "step": 234020 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.0001648569550351167, + "loss": 2.103, + "step": 234025 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016485554818701072, + "loss": 2.1959, + "step": 234030 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.0001648541413167489, + "loss": 2.0364, + "step": 234035 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.00016485273442433168, + "loss": 2.205, + "step": 234040 + }, + { + "epoch": 0.55, + "grad_norm": 2.375, + "learning_rate": 0.0001648513275097596, + "loss": 2.1106, + "step": 234045 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.00016484992057303317, + "loss": 2.0132, + "step": 234050 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.0001648485136141528, + "loss": 1.9529, + "step": 234055 + }, + { + "epoch": 0.55, + "grad_norm": 1.8359375, + "learning_rate": 0.00016484710663311903, + "loss": 2.1581, + "step": 234060 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.0001648456996299323, + "loss": 2.1363, + "step": 234065 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.0001648442926045931, + "loss": 2.0863, + "step": 234070 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.0001648428855571019, + "loss": 2.0861, + "step": 234075 + }, + { + "epoch": 0.55, + "grad_norm": 1.9375, + "learning_rate": 0.0001648414784874592, + "loss": 2.0427, + "step": 234080 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016484007139566548, + "loss": 2.0954, + "step": 234085 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016483866428172123, + "loss": 2.2506, + "step": 234090 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.00016483725714562688, + "loss": 2.1842, + "step": 234095 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016483584998738297, + "loss": 2.1774, + "step": 234100 + }, + { + "epoch": 0.55, + "grad_norm": 1.8828125, + "learning_rate": 0.00016483444280698996, + "loss": 2.1022, + "step": 234105 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016483303560444834, + "loss": 1.9205, + "step": 234110 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.00016483162837975857, + "loss": 2.2253, + "step": 234115 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.00016483022113292112, + "loss": 2.0548, + "step": 234120 + }, + { + "epoch": 0.55, + "grad_norm": 1.9375, + "learning_rate": 0.0001648288138639365, + "loss": 2.1004, + "step": 234125 + }, + { + "epoch": 0.55, + "grad_norm": 2.46875, + "learning_rate": 0.00016482740657280517, + "loss": 2.1876, + "step": 234130 + }, + { + "epoch": 0.55, + "grad_norm": 1.9765625, + "learning_rate": 0.00016482599925952767, + "loss": 2.2499, + "step": 234135 + }, + { + "epoch": 0.55, + "grad_norm": 1.734375, + "learning_rate": 0.0001648245919241044, + "loss": 1.9432, + "step": 234140 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.00016482318456653586, + "loss": 2.1414, + "step": 234145 + }, + { + "epoch": 0.55, + "grad_norm": 1.9375, + "learning_rate": 0.00016482177718682257, + "loss": 1.9788, + "step": 234150 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016482036978496498, + "loss": 2.0217, + "step": 234155 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016481896236096355, + "loss": 2.173, + "step": 234160 + }, + { + "epoch": 0.55, + "grad_norm": 1.9609375, + "learning_rate": 0.00016481755491481884, + "loss": 2.0516, + "step": 234165 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016481614744653125, + "loss": 2.1219, + "step": 234170 + }, + { + "epoch": 0.55, + "grad_norm": 1.875, + "learning_rate": 0.00016481473995610127, + "loss": 1.9318, + "step": 234175 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.0001648133324435294, + "loss": 1.9758, + "step": 234180 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016481192490881615, + "loss": 2.1473, + "step": 234185 + }, + { + "epoch": 0.55, + "grad_norm": 2.6875, + "learning_rate": 0.00016481051735196193, + "loss": 2.0009, + "step": 234190 + }, + { + "epoch": 0.55, + "grad_norm": 2.578125, + "learning_rate": 0.0001648091097729673, + "loss": 2.1123, + "step": 234195 + }, + { + "epoch": 0.55, + "grad_norm": 2.421875, + "learning_rate": 0.0001648077021718327, + "loss": 2.2495, + "step": 234200 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.0001648062945485586, + "loss": 1.953, + "step": 234205 + }, + { + "epoch": 0.55, + "grad_norm": 1.9296875, + "learning_rate": 0.00016480488690314552, + "loss": 1.9266, + "step": 234210 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016480347923559388, + "loss": 2.053, + "step": 234215 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.0001648020715459042, + "loss": 2.0727, + "step": 234220 + }, + { + "epoch": 0.55, + "grad_norm": 2.46875, + "learning_rate": 0.00016480066383407698, + "loss": 2.2944, + "step": 234225 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.00016479925610011264, + "loss": 1.9688, + "step": 234230 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016479784834401174, + "loss": 2.0289, + "step": 234235 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.0001647964405657747, + "loss": 2.0692, + "step": 234240 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016479503276540201, + "loss": 2.4282, + "step": 234245 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.0001647936249428942, + "loss": 1.9792, + "step": 234250 + }, + { + "epoch": 0.55, + "grad_norm": 2.421875, + "learning_rate": 0.00016479221709825168, + "loss": 2.1483, + "step": 234255 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016479080923147496, + "loss": 2.1967, + "step": 234260 + }, + { + "epoch": 0.55, + "grad_norm": 1.8671875, + "learning_rate": 0.00016478940134256453, + "loss": 2.2012, + "step": 234265 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.0001647879934315209, + "loss": 2.1816, + "step": 234270 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016478658549834446, + "loss": 2.202, + "step": 234275 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.0001647851775430358, + "loss": 2.0543, + "step": 234280 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.0001647837695655953, + "loss": 2.2018, + "step": 234285 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.00016478236156602352, + "loss": 2.1855, + "step": 234290 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.0001647809535443209, + "loss": 2.0628, + "step": 234295 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016477954550048795, + "loss": 2.103, + "step": 234300 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.0001647781374345251, + "loss": 2.3183, + "step": 234305 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016477672934643288, + "loss": 2.0242, + "step": 234310 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016477532123621176, + "loss": 2.1962, + "step": 234315 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.00016477391310386225, + "loss": 2.1699, + "step": 234320 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.00016477250494938473, + "loss": 2.1048, + "step": 234325 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016477109677277978, + "loss": 2.1186, + "step": 234330 + }, + { + "epoch": 0.55, + "grad_norm": 1.9921875, + "learning_rate": 0.00016476968857404784, + "loss": 2.1534, + "step": 234335 + }, + { + "epoch": 0.55, + "grad_norm": 3.046875, + "learning_rate": 0.0001647682803531894, + "loss": 2.0685, + "step": 234340 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016476687211020496, + "loss": 1.9332, + "step": 234345 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016476546384509497, + "loss": 2.137, + "step": 234350 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016476405555785993, + "loss": 2.1296, + "step": 234355 + }, + { + "epoch": 0.55, + "grad_norm": 1.8203125, + "learning_rate": 0.0001647626472485003, + "loss": 2.0169, + "step": 234360 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016476123891701655, + "loss": 2.156, + "step": 234365 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016475983056340923, + "loss": 2.0376, + "step": 234370 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.00016475842218767873, + "loss": 2.2379, + "step": 234375 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016475701378982564, + "loss": 1.9314, + "step": 234380 + }, + { + "epoch": 0.55, + "grad_norm": 1.8046875, + "learning_rate": 0.00016475560536985035, + "loss": 2.0573, + "step": 234385 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016475419692775338, + "loss": 2.2047, + "step": 234390 + }, + { + "epoch": 0.55, + "grad_norm": 2.359375, + "learning_rate": 0.00016475278846353518, + "loss": 2.0021, + "step": 234395 + }, + { + "epoch": 0.55, + "grad_norm": 1.625, + "learning_rate": 0.00016475137997719626, + "loss": 1.973, + "step": 234400 + }, + { + "epoch": 0.55, + "grad_norm": 1.921875, + "learning_rate": 0.00016474997146873708, + "loss": 2.0131, + "step": 234405 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016474856293815818, + "loss": 2.1007, + "step": 234410 + }, + { + "epoch": 0.55, + "grad_norm": 1.9140625, + "learning_rate": 0.00016474715438545998, + "loss": 2.1248, + "step": 234415 + }, + { + "epoch": 0.55, + "grad_norm": 1.828125, + "learning_rate": 0.00016474574581064295, + "loss": 1.9907, + "step": 234420 + }, + { + "epoch": 0.55, + "grad_norm": 1.96875, + "learning_rate": 0.00016474433721370762, + "loss": 2.216, + "step": 234425 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016474292859465446, + "loss": 1.9077, + "step": 234430 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016474151995348396, + "loss": 2.0811, + "step": 234435 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016474011129019657, + "loss": 2.1329, + "step": 234440 + }, + { + "epoch": 0.55, + "grad_norm": 1.8984375, + "learning_rate": 0.00016473870260479273, + "loss": 1.9828, + "step": 234445 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.000164737293897273, + "loss": 2.1455, + "step": 234450 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016473588516763784, + "loss": 2.0135, + "step": 234455 + }, + { + "epoch": 0.55, + "grad_norm": 1.875, + "learning_rate": 0.00016473447641588776, + "loss": 2.2734, + "step": 234460 + }, + { + "epoch": 0.55, + "grad_norm": 2.65625, + "learning_rate": 0.0001647330676420232, + "loss": 2.1474, + "step": 234465 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016473165884604463, + "loss": 2.0642, + "step": 234470 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.00016473025002795253, + "loss": 2.2637, + "step": 234475 + }, + { + "epoch": 0.55, + "grad_norm": 1.7734375, + "learning_rate": 0.00016472884118774744, + "loss": 2.1919, + "step": 234480 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.0001647274323254298, + "loss": 2.1927, + "step": 234485 + }, + { + "epoch": 0.55, + "grad_norm": 2.515625, + "learning_rate": 0.0001647260234410001, + "loss": 2.172, + "step": 234490 + }, + { + "epoch": 0.55, + "grad_norm": 1.875, + "learning_rate": 0.0001647246145344588, + "loss": 2.1494, + "step": 234495 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.0001647232056058064, + "loss": 1.9919, + "step": 234500 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.0001647217966550434, + "loss": 2.1271, + "step": 234505 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016472038768217023, + "loss": 2.131, + "step": 234510 + }, + { + "epoch": 0.55, + "grad_norm": 1.828125, + "learning_rate": 0.0001647189786871874, + "loss": 1.9593, + "step": 234515 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016471756967009543, + "loss": 1.9279, + "step": 234520 + }, + { + "epoch": 0.55, + "grad_norm": 1.9765625, + "learning_rate": 0.00016471616063089473, + "loss": 2.1443, + "step": 234525 + }, + { + "epoch": 0.55, + "grad_norm": 1.9453125, + "learning_rate": 0.0001647147515695858, + "loss": 2.0708, + "step": 234530 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016471334248616918, + "loss": 2.2026, + "step": 234535 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.0001647119333806453, + "loss": 2.1625, + "step": 234540 + }, + { + "epoch": 0.55, + "grad_norm": 2.625, + "learning_rate": 0.00016471052425301466, + "loss": 1.9578, + "step": 234545 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.0001647091151032777, + "loss": 2.1918, + "step": 234550 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.00016470770593143494, + "loss": 2.1607, + "step": 234555 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.00016470629673748688, + "loss": 2.1954, + "step": 234560 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.00016470488752143395, + "loss": 2.1338, + "step": 234565 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016470347828327666, + "loss": 2.1181, + "step": 234570 + }, + { + "epoch": 0.55, + "grad_norm": 1.9140625, + "learning_rate": 0.00016470206902301552, + "loss": 2.2989, + "step": 234575 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.0001647006597406509, + "loss": 2.2361, + "step": 234580 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016469925043618343, + "loss": 2.1353, + "step": 234585 + }, + { + "epoch": 0.55, + "grad_norm": 1.7734375, + "learning_rate": 0.00016469784110961352, + "loss": 2.0241, + "step": 234590 + }, + { + "epoch": 0.55, + "grad_norm": 2.546875, + "learning_rate": 0.00016469643176094163, + "loss": 2.0579, + "step": 234595 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.0001646950223901683, + "loss": 2.1769, + "step": 234600 + }, + { + "epoch": 0.55, + "grad_norm": 2.421875, + "learning_rate": 0.00016469361299729395, + "loss": 2.1385, + "step": 234605 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016469220358231908, + "loss": 2.1783, + "step": 234610 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.0001646907941452442, + "loss": 2.1324, + "step": 234615 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016468938468606976, + "loss": 2.257, + "step": 234620 + }, + { + "epoch": 0.55, + "grad_norm": 1.7421875, + "learning_rate": 0.00016468797520479623, + "loss": 1.9573, + "step": 234625 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.00016468656570142417, + "loss": 2.1908, + "step": 234630 + }, + { + "epoch": 0.55, + "grad_norm": 1.8984375, + "learning_rate": 0.00016468515617595398, + "loss": 2.2722, + "step": 234635 + }, + { + "epoch": 0.55, + "grad_norm": 2.53125, + "learning_rate": 0.00016468374662838615, + "loss": 2.0145, + "step": 234640 + }, + { + "epoch": 0.55, + "grad_norm": 2.375, + "learning_rate": 0.0001646823370587212, + "loss": 2.0866, + "step": 234645 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016468092746695957, + "loss": 1.9876, + "step": 234650 + }, + { + "epoch": 0.55, + "grad_norm": 1.984375, + "learning_rate": 0.00016467951785310178, + "loss": 1.9964, + "step": 234655 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.0001646781082171483, + "loss": 1.9901, + "step": 234660 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.00016467669855909958, + "loss": 2.0842, + "step": 234665 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.00016467528887895613, + "loss": 2.2358, + "step": 234670 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016467387917671843, + "loss": 2.0546, + "step": 234675 + }, + { + "epoch": 0.55, + "grad_norm": 1.890625, + "learning_rate": 0.000164672469452387, + "loss": 2.0997, + "step": 234680 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.00016467105970596223, + "loss": 2.0768, + "step": 234685 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.0001646696499374447, + "loss": 2.0133, + "step": 234690 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016466824014683482, + "loss": 2.0554, + "step": 234695 + }, + { + "epoch": 0.55, + "grad_norm": 2.359375, + "learning_rate": 0.00016466683033413307, + "loss": 2.0176, + "step": 234700 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.00016466542049934, + "loss": 2.1334, + "step": 234705 + }, + { + "epoch": 0.55, + "grad_norm": 2.46875, + "learning_rate": 0.00016466401064245604, + "loss": 2.0273, + "step": 234710 + }, + { + "epoch": 0.55, + "grad_norm": 2.375, + "learning_rate": 0.00016466260076348165, + "loss": 2.2297, + "step": 234715 + }, + { + "epoch": 0.55, + "grad_norm": 3.359375, + "learning_rate": 0.00016466119086241737, + "loss": 2.0155, + "step": 234720 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016465978093926367, + "loss": 2.0541, + "step": 234725 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.000164658370994021, + "loss": 2.1749, + "step": 234730 + }, + { + "epoch": 0.55, + "grad_norm": 1.9296875, + "learning_rate": 0.00016465696102668987, + "loss": 2.0645, + "step": 234735 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.00016465555103727073, + "loss": 2.0793, + "step": 234740 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.0001646541410257641, + "loss": 2.2086, + "step": 234745 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.0001646527309921704, + "loss": 2.1345, + "step": 234750 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.0001646513209364902, + "loss": 1.9394, + "step": 234755 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016464991085872392, + "loss": 2.2335, + "step": 234760 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016464850075887207, + "loss": 2.0767, + "step": 234765 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.00016464709063693508, + "loss": 2.2057, + "step": 234770 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016464568049291348, + "loss": 2.2131, + "step": 234775 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.0001646442703268078, + "loss": 2.1718, + "step": 234780 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.00016464286013861843, + "loss": 2.0591, + "step": 234785 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016464144992834587, + "loss": 2.282, + "step": 234790 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016464003969599063, + "loss": 2.0387, + "step": 234795 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.00016463862944155318, + "loss": 2.0059, + "step": 234800 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.000164637219165034, + "loss": 2.1157, + "step": 234805 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016463580886643357, + "loss": 2.1803, + "step": 234810 + }, + { + "epoch": 0.55, + "grad_norm": 2.671875, + "learning_rate": 0.0001646343985457524, + "loss": 1.9675, + "step": 234815 + }, + { + "epoch": 0.55, + "grad_norm": 1.796875, + "learning_rate": 0.00016463298820299093, + "loss": 2.1267, + "step": 234820 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016463157783814966, + "loss": 2.2049, + "step": 234825 + }, + { + "epoch": 0.55, + "grad_norm": 1.8828125, + "learning_rate": 0.00016463016745122904, + "loss": 2.1585, + "step": 234830 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016462875704222966, + "loss": 2.22, + "step": 234835 + }, + { + "epoch": 0.55, + "grad_norm": 1.5703125, + "learning_rate": 0.00016462734661115187, + "loss": 2.0757, + "step": 234840 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.0001646259361579962, + "loss": 2.1368, + "step": 234845 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016462452568276315, + "loss": 2.1878, + "step": 234850 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.0001646231151854532, + "loss": 2.138, + "step": 234855 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.0001646217046660668, + "loss": 2.1836, + "step": 234860 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.00016462029412460448, + "loss": 2.1311, + "step": 234865 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.0001646188835610667, + "loss": 2.1061, + "step": 234870 + }, + { + "epoch": 0.55, + "grad_norm": 1.9765625, + "learning_rate": 0.0001646174729754539, + "loss": 2.0149, + "step": 234875 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.0001646160623677666, + "loss": 2.1259, + "step": 234880 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.00016461465173800533, + "loss": 2.124, + "step": 234885 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016461324108617048, + "loss": 2.0821, + "step": 234890 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.0001646118304122626, + "loss": 2.1419, + "step": 234895 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016461041971628214, + "loss": 2.196, + "step": 234900 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016460900899822957, + "loss": 2.0488, + "step": 234905 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.0001646075982581054, + "loss": 2.1104, + "step": 234910 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.00016460618749591013, + "loss": 2.1559, + "step": 234915 + }, + { + "epoch": 0.55, + "grad_norm": 2.53125, + "learning_rate": 0.0001646047767116442, + "loss": 1.872, + "step": 234920 + }, + { + "epoch": 0.55, + "grad_norm": 1.9375, + "learning_rate": 0.00016460336590530807, + "loss": 2.0531, + "step": 234925 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016460195507690228, + "loss": 2.1812, + "step": 234930 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016460054422642731, + "loss": 2.0548, + "step": 234935 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016459913335388363, + "loss": 2.1702, + "step": 234940 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016459772245927168, + "loss": 2.1701, + "step": 234945 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.000164596311542592, + "loss": 2.0262, + "step": 234950 + }, + { + "epoch": 0.55, + "grad_norm": 2.75, + "learning_rate": 0.000164594900603845, + "loss": 2.1125, + "step": 234955 + }, + { + "epoch": 0.55, + "grad_norm": 1.953125, + "learning_rate": 0.0001645934896430313, + "loss": 2.1882, + "step": 234960 + }, + { + "epoch": 0.55, + "grad_norm": 2.3125, + "learning_rate": 0.00016459207866015123, + "loss": 1.9909, + "step": 234965 + }, + { + "epoch": 0.55, + "grad_norm": 2.484375, + "learning_rate": 0.00016459066765520537, + "loss": 2.2299, + "step": 234970 + }, + { + "epoch": 0.55, + "grad_norm": 2.84375, + "learning_rate": 0.00016458925662819412, + "loss": 2.0568, + "step": 234975 + }, + { + "epoch": 0.55, + "grad_norm": 2.484375, + "learning_rate": 0.00016458784557911807, + "loss": 1.9455, + "step": 234980 + }, + { + "epoch": 0.55, + "grad_norm": 3.40625, + "learning_rate": 0.00016458643450797757, + "loss": 2.1633, + "step": 234985 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.0001645850234147732, + "loss": 2.1515, + "step": 234990 + }, + { + "epoch": 0.55, + "grad_norm": 2.71875, + "learning_rate": 0.00016458361229950544, + "loss": 2.1133, + "step": 234995 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.0001645822011621747, + "loss": 2.1792, + "step": 235000 + }, + { + "epoch": 0.55, + "grad_norm": 2.5, + "learning_rate": 0.00016458079000278156, + "loss": 2.0106, + "step": 235005 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016457937882132642, + "loss": 2.0777, + "step": 235010 + }, + { + "epoch": 0.55, + "grad_norm": 1.9921875, + "learning_rate": 0.0001645779676178098, + "loss": 2.2669, + "step": 235015 + }, + { + "epoch": 0.55, + "grad_norm": 1.90625, + "learning_rate": 0.00016457655639223216, + "loss": 2.0997, + "step": 235020 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.00016457514514459405, + "loss": 2.0542, + "step": 235025 + }, + { + "epoch": 0.55, + "grad_norm": 1.859375, + "learning_rate": 0.00016457373387489586, + "loss": 2.0984, + "step": 235030 + }, + { + "epoch": 0.55, + "grad_norm": 2.59375, + "learning_rate": 0.0001645723225831381, + "loss": 2.07, + "step": 235035 + }, + { + "epoch": 0.55, + "grad_norm": 1.90625, + "learning_rate": 0.00016457091126932126, + "loss": 2.1218, + "step": 235040 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016456949993344587, + "loss": 2.0241, + "step": 235045 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.00016456808857551232, + "loss": 2.0647, + "step": 235050 + }, + { + "epoch": 0.55, + "grad_norm": 1.9453125, + "learning_rate": 0.00016456667719552117, + "loss": 1.941, + "step": 235055 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.00016456526579347288, + "loss": 1.9583, + "step": 235060 + }, + { + "epoch": 0.55, + "grad_norm": 2.484375, + "learning_rate": 0.0001645638543693679, + "loss": 2.3221, + "step": 235065 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.00016456244292320673, + "loss": 2.1297, + "step": 235070 + }, + { + "epoch": 0.55, + "grad_norm": 1.9375, + "learning_rate": 0.00016456103145498987, + "loss": 2.2583, + "step": 235075 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.0001645596199647178, + "loss": 2.015, + "step": 235080 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.000164558208452391, + "loss": 2.1515, + "step": 235085 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.0001645567969180099, + "loss": 1.9993, + "step": 235090 + }, + { + "epoch": 0.55, + "grad_norm": 1.7890625, + "learning_rate": 0.00016455538536157507, + "loss": 2.1613, + "step": 235095 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016455397378308694, + "loss": 2.1013, + "step": 235100 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.000164552562182546, + "loss": 2.3083, + "step": 235105 + }, + { + "epoch": 0.55, + "grad_norm": 1.71875, + "learning_rate": 0.00016455115055995275, + "loss": 1.9571, + "step": 235110 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016454973891530763, + "loss": 2.0825, + "step": 235115 + }, + { + "epoch": 0.55, + "grad_norm": 1.8671875, + "learning_rate": 0.00016454832724861117, + "loss": 2.0529, + "step": 235120 + }, + { + "epoch": 0.55, + "grad_norm": 2.359375, + "learning_rate": 0.0001645469155598638, + "loss": 2.0925, + "step": 235125 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016454550384906606, + "loss": 2.1387, + "step": 235130 + }, + { + "epoch": 0.55, + "grad_norm": 3.40625, + "learning_rate": 0.00016454409211621838, + "loss": 2.0417, + "step": 235135 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.0001645426803613213, + "loss": 2.1477, + "step": 235140 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016454126858437524, + "loss": 2.019, + "step": 235145 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016453985678538074, + "loss": 2.2878, + "step": 235150 + }, + { + "epoch": 0.55, + "grad_norm": 2.296875, + "learning_rate": 0.00016453844496433826, + "loss": 2.0763, + "step": 235155 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.00016453703312124823, + "loss": 1.9506, + "step": 235160 + }, + { + "epoch": 0.55, + "grad_norm": 2.359375, + "learning_rate": 0.00016453562125611125, + "loss": 2.061, + "step": 235165 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.00016453420936892764, + "loss": 1.8463, + "step": 235170 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016453279745969805, + "loss": 2.1002, + "step": 235175 + }, + { + "epoch": 0.55, + "grad_norm": 1.8984375, + "learning_rate": 0.00016453138552842284, + "loss": 2.1383, + "step": 235180 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016452997357510254, + "loss": 2.1641, + "step": 235185 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016452856159973766, + "loss": 2.107, + "step": 235190 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016452714960232866, + "loss": 2.1008, + "step": 235195 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016452573758287596, + "loss": 1.9425, + "step": 235200 + }, + { + "epoch": 0.55, + "grad_norm": 1.9375, + "learning_rate": 0.00016452432554138016, + "loss": 1.9738, + "step": 235205 + }, + { + "epoch": 0.55, + "grad_norm": 1.96875, + "learning_rate": 0.00016452291347784166, + "loss": 2.0493, + "step": 235210 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016452150139226098, + "loss": 2.1177, + "step": 235215 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.00016452008928463852, + "loss": 2.1038, + "step": 235220 + }, + { + "epoch": 0.55, + "grad_norm": 2.421875, + "learning_rate": 0.0001645186771549749, + "loss": 2.307, + "step": 235225 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016451726500327048, + "loss": 2.3316, + "step": 235230 + }, + { + "epoch": 0.55, + "grad_norm": 1.96875, + "learning_rate": 0.0001645158528295258, + "loss": 2.1601, + "step": 235235 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016451444063374137, + "loss": 1.9189, + "step": 235240 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016451302841591762, + "loss": 2.1478, + "step": 235245 + }, + { + "epoch": 0.55, + "grad_norm": 2.90625, + "learning_rate": 0.00016451161617605503, + "loss": 2.0206, + "step": 235250 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.0001645102039141541, + "loss": 2.201, + "step": 235255 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016450879163021532, + "loss": 2.1432, + "step": 235260 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.0001645073793242392, + "loss": 2.0089, + "step": 235265 + }, + { + "epoch": 0.55, + "grad_norm": 1.9609375, + "learning_rate": 0.00016450596699622616, + "loss": 2.0359, + "step": 235270 + }, + { + "epoch": 0.55, + "grad_norm": 2.359375, + "learning_rate": 0.00016450455464617672, + "loss": 2.0843, + "step": 235275 + }, + { + "epoch": 0.55, + "grad_norm": 2.546875, + "learning_rate": 0.00016450314227409133, + "loss": 1.9677, + "step": 235280 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.0001645017298799705, + "loss": 2.2053, + "step": 235285 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016450031746381474, + "loss": 2.093, + "step": 235290 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.0001644989050256245, + "loss": 2.1818, + "step": 235295 + }, + { + "epoch": 0.55, + "grad_norm": 2.828125, + "learning_rate": 0.00016449749256540025, + "loss": 2.0822, + "step": 235300 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.0001644960800831425, + "loss": 1.9017, + "step": 235305 + }, + { + "epoch": 0.55, + "grad_norm": 2.65625, + "learning_rate": 0.0001644946675788517, + "loss": 2.156, + "step": 235310 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016449325505252834, + "loss": 2.1037, + "step": 235315 + }, + { + "epoch": 0.55, + "grad_norm": 2.34375, + "learning_rate": 0.00016449184250417296, + "loss": 2.1176, + "step": 235320 + }, + { + "epoch": 0.55, + "grad_norm": 1.875, + "learning_rate": 0.000164490429933786, + "loss": 2.0273, + "step": 235325 + }, + { + "epoch": 0.55, + "grad_norm": 2.546875, + "learning_rate": 0.00016448901734136787, + "loss": 2.0831, + "step": 235330 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016448760472691917, + "loss": 2.1124, + "step": 235335 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016448619209044033, + "loss": 2.129, + "step": 235340 + }, + { + "epoch": 0.55, + "grad_norm": 1.921875, + "learning_rate": 0.00016448477943193184, + "loss": 2.005, + "step": 235345 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016448336675139416, + "loss": 2.1641, + "step": 235350 + }, + { + "epoch": 0.55, + "grad_norm": 1.890625, + "learning_rate": 0.00016448195404882783, + "loss": 2.0128, + "step": 235355 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016448054132423327, + "loss": 2.0822, + "step": 235360 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.000164479128577611, + "loss": 1.9507, + "step": 235365 + }, + { + "epoch": 0.55, + "grad_norm": 1.96875, + "learning_rate": 0.00016447771580896148, + "loss": 1.9741, + "step": 235370 + }, + { + "epoch": 0.55, + "grad_norm": 2.59375, + "learning_rate": 0.0001644763030182852, + "loss": 2.1036, + "step": 235375 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016447489020558265, + "loss": 2.1186, + "step": 235380 + }, + { + "epoch": 0.55, + "grad_norm": 3.421875, + "learning_rate": 0.00016447347737085427, + "loss": 2.2285, + "step": 235385 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.00016447206451410064, + "loss": 2.1046, + "step": 235390 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016447065163532217, + "loss": 2.1418, + "step": 235395 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016446923873451934, + "loss": 2.0698, + "step": 235400 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.0001644678258116927, + "loss": 2.0936, + "step": 235405 + }, + { + "epoch": 0.55, + "grad_norm": 1.765625, + "learning_rate": 0.0001644664128668426, + "loss": 2.0313, + "step": 235410 + }, + { + "epoch": 0.55, + "grad_norm": 1.9375, + "learning_rate": 0.00016446499989996964, + "loss": 2.2028, + "step": 235415 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016446358691107427, + "loss": 2.0517, + "step": 235420 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016446217390015698, + "loss": 2.1539, + "step": 235425 + }, + { + "epoch": 0.55, + "grad_norm": 1.96875, + "learning_rate": 0.00016446076086721822, + "loss": 2.1374, + "step": 235430 + }, + { + "epoch": 0.55, + "grad_norm": 1.9453125, + "learning_rate": 0.0001644593478122585, + "loss": 2.1012, + "step": 235435 + }, + { + "epoch": 0.55, + "grad_norm": 2.046875, + "learning_rate": 0.00016445793473527832, + "loss": 2.0511, + "step": 235440 + }, + { + "epoch": 0.55, + "grad_norm": 1.75, + "learning_rate": 0.00016445652163627812, + "loss": 2.1175, + "step": 235445 + }, + { + "epoch": 0.55, + "grad_norm": 1.9375, + "learning_rate": 0.0001644551085152584, + "loss": 1.9809, + "step": 235450 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016445369537221966, + "loss": 2.085, + "step": 235455 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.0001644522822071624, + "loss": 2.0481, + "step": 235460 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.000164450869020087, + "loss": 2.1123, + "step": 235465 + }, + { + "epoch": 0.55, + "grad_norm": 1.8125, + "learning_rate": 0.00016444945581099403, + "loss": 2.0918, + "step": 235470 + }, + { + "epoch": 0.55, + "grad_norm": 3.3125, + "learning_rate": 0.000164448042579884, + "loss": 2.1283, + "step": 235475 + }, + { + "epoch": 0.55, + "grad_norm": 1.8359375, + "learning_rate": 0.00016444662932675732, + "loss": 2.1268, + "step": 235480 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016444521605161452, + "loss": 2.0662, + "step": 235485 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016444380275445605, + "loss": 2.0917, + "step": 235490 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.0001644423894352824, + "loss": 2.101, + "step": 235495 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016444097609409405, + "loss": 2.1484, + "step": 235500 + }, + { + "epoch": 0.55, + "grad_norm": 2.265625, + "learning_rate": 0.00016443956273089154, + "loss": 2.0278, + "step": 235505 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.0001644381493456753, + "loss": 1.9831, + "step": 235510 + }, + { + "epoch": 0.55, + "grad_norm": 1.9375, + "learning_rate": 0.0001644367359384458, + "loss": 2.042, + "step": 235515 + }, + { + "epoch": 0.55, + "grad_norm": 2.5625, + "learning_rate": 0.00016443532250920353, + "loss": 1.9036, + "step": 235520 + }, + { + "epoch": 0.55, + "grad_norm": 2.328125, + "learning_rate": 0.00016443390905794902, + "loss": 1.8906, + "step": 235525 + }, + { + "epoch": 0.55, + "grad_norm": 1.65625, + "learning_rate": 0.00016443249558468267, + "loss": 2.1706, + "step": 235530 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.00016443108208940506, + "loss": 2.1526, + "step": 235535 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.0001644296685721166, + "loss": 2.1004, + "step": 235540 + }, + { + "epoch": 0.55, + "grad_norm": 1.84375, + "learning_rate": 0.00016442825503281777, + "loss": 2.0461, + "step": 235545 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.00016442684147150913, + "loss": 2.0562, + "step": 235550 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.0001644254278881911, + "loss": 2.1805, + "step": 235555 + }, + { + "epoch": 0.55, + "grad_norm": 2.453125, + "learning_rate": 0.00016442401428286415, + "loss": 2.0259, + "step": 235560 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.00016442260065552882, + "loss": 2.1092, + "step": 235565 + }, + { + "epoch": 0.55, + "grad_norm": 2.625, + "learning_rate": 0.00016442118700618554, + "loss": 2.1262, + "step": 235570 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016441977333483482, + "loss": 2.0731, + "step": 235575 + }, + { + "epoch": 0.55, + "grad_norm": 2.078125, + "learning_rate": 0.00016441835964147713, + "loss": 1.848, + "step": 235580 + }, + { + "epoch": 0.55, + "grad_norm": 2.6875, + "learning_rate": 0.00016441694592611298, + "loss": 2.0529, + "step": 235585 + }, + { + "epoch": 0.55, + "grad_norm": 1.7421875, + "learning_rate": 0.00016441553218874284, + "loss": 1.9884, + "step": 235590 + }, + { + "epoch": 0.55, + "grad_norm": 2.84375, + "learning_rate": 0.00016441411842936716, + "loss": 1.9963, + "step": 235595 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.00016441270464798646, + "loss": 2.1713, + "step": 235600 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.00016441129084460122, + "loss": 2.1415, + "step": 235605 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016440987701921188, + "loss": 1.957, + "step": 235610 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016440846317181896, + "loss": 2.1154, + "step": 235615 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.000164407049302423, + "loss": 2.1054, + "step": 235620 + }, + { + "epoch": 0.55, + "grad_norm": 2.65625, + "learning_rate": 0.00016440563541102437, + "loss": 2.0844, + "step": 235625 + }, + { + "epoch": 0.55, + "grad_norm": 2.28125, + "learning_rate": 0.00016440422149762362, + "loss": 2.0928, + "step": 235630 + }, + { + "epoch": 0.55, + "grad_norm": 1.953125, + "learning_rate": 0.0001644028075622212, + "loss": 2.2372, + "step": 235635 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.00016440139360481766, + "loss": 2.3017, + "step": 235640 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016439997962541342, + "loss": 2.2462, + "step": 235645 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.00016439856562400898, + "loss": 2.0462, + "step": 235650 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016439715160060478, + "loss": 2.0272, + "step": 235655 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.0001643957375552014, + "loss": 2.2825, + "step": 235660 + }, + { + "epoch": 0.55, + "grad_norm": 1.9609375, + "learning_rate": 0.00016439432348779924, + "loss": 2.0079, + "step": 235665 + }, + { + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 0.0001643929093983988, + "loss": 2.0544, + "step": 235670 + }, + { + "epoch": 0.55, + "grad_norm": 2.828125, + "learning_rate": 0.00016439149528700063, + "loss": 2.2545, + "step": 235675 + }, + { + "epoch": 0.55, + "grad_norm": 1.96875, + "learning_rate": 0.0001643900811536051, + "loss": 2.0489, + "step": 235680 + }, + { + "epoch": 0.55, + "grad_norm": 2.171875, + "learning_rate": 0.00016438866699821278, + "loss": 2.2081, + "step": 235685 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.0001643872528208241, + "loss": 2.1695, + "step": 235690 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.0001643858386214396, + "loss": 2.0594, + "step": 235695 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.0001643844244000597, + "loss": 2.0175, + "step": 235700 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016438301015668492, + "loss": 2.3268, + "step": 235705 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016438159589131574, + "loss": 2.017, + "step": 235710 + }, + { + "epoch": 0.55, + "grad_norm": 1.765625, + "learning_rate": 0.0001643801816039526, + "loss": 2.0397, + "step": 235715 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.0001643787672945961, + "loss": 2.1308, + "step": 235720 + }, + { + "epoch": 0.55, + "grad_norm": 2.0625, + "learning_rate": 0.00016437735296324658, + "loss": 2.1264, + "step": 235725 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.00016437593860990464, + "loss": 2.1446, + "step": 235730 + }, + { + "epoch": 0.55, + "grad_norm": 2.125, + "learning_rate": 0.00016437452423457067, + "loss": 2.2048, + "step": 235735 + }, + { + "epoch": 0.55, + "grad_norm": 2.015625, + "learning_rate": 0.00016437310983724522, + "loss": 2.0671, + "step": 235740 + }, + { + "epoch": 0.55, + "grad_norm": 1.8203125, + "learning_rate": 0.00016437169541792874, + "loss": 2.156, + "step": 235745 + }, + { + "epoch": 0.55, + "grad_norm": 2.390625, + "learning_rate": 0.00016437028097662172, + "loss": 1.9422, + "step": 235750 + }, + { + "epoch": 0.55, + "grad_norm": 2.1875, + "learning_rate": 0.00016436886651332463, + "loss": 2.1054, + "step": 235755 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016436745202803797, + "loss": 2.0844, + "step": 235760 + }, + { + "epoch": 0.55, + "grad_norm": 2.234375, + "learning_rate": 0.00016436603752076226, + "loss": 2.0947, + "step": 235765 + }, + { + "epoch": 0.55, + "grad_norm": 2.46875, + "learning_rate": 0.0001643646229914979, + "loss": 2.1185, + "step": 235770 + }, + { + "epoch": 0.55, + "grad_norm": 2.0, + "learning_rate": 0.00016436320844024548, + "loss": 2.0169, + "step": 235775 + }, + { + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 0.00016436179386700536, + "loss": 1.9408, + "step": 235780 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.0001643603792717781, + "loss": 1.8744, + "step": 235785 + }, + { + "epoch": 0.55, + "grad_norm": 2.40625, + "learning_rate": 0.00016435896465456416, + "loss": 2.0907, + "step": 235790 + }, + { + "epoch": 0.55, + "grad_norm": 2.21875, + "learning_rate": 0.00016435755001536402, + "loss": 2.0927, + "step": 235795 + }, + { + "epoch": 0.55, + "grad_norm": 2.09375, + "learning_rate": 0.00016435613535417822, + "loss": 1.9512, + "step": 235800 + }, + { + "epoch": 0.55, + "grad_norm": 2.15625, + "learning_rate": 0.00016435472067100716, + "loss": 2.0404, + "step": 235805 + }, + { + "epoch": 0.55, + "grad_norm": 2.25, + "learning_rate": 0.00016435330596585137, + "loss": 2.0755, + "step": 235810 + }, + { + "epoch": 0.55, + "grad_norm": 2.203125, + "learning_rate": 0.0001643518912387113, + "loss": 2.1292, + "step": 235815 + }, + { + "epoch": 0.55, + "grad_norm": 2.140625, + "learning_rate": 0.0001643504764895875, + "loss": 2.214, + "step": 235820 + }, + { + "epoch": 0.55, + "grad_norm": 1.796875, + "learning_rate": 0.0001643490617184804, + "loss": 2.1534, + "step": 235825 + }, + { + "epoch": 0.55, + "grad_norm": 1.9765625, + "learning_rate": 0.0001643476469253905, + "loss": 1.9856, + "step": 235830 + }, + { + "epoch": 0.55, + "grad_norm": 2.03125, + "learning_rate": 0.00016434623211031824, + "loss": 2.0545, + "step": 235835 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016434481727326414, + "loss": 2.0054, + "step": 235840 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016434340241422868, + "loss": 2.136, + "step": 235845 + }, + { + "epoch": 0.56, + "grad_norm": 1.9453125, + "learning_rate": 0.00016434198753321238, + "loss": 2.2279, + "step": 235850 + }, + { + "epoch": 0.56, + "grad_norm": 1.8984375, + "learning_rate": 0.00016434057263021572, + "loss": 2.0424, + "step": 235855 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.0001643391577052391, + "loss": 2.0737, + "step": 235860 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016433774275828307, + "loss": 2.0225, + "step": 235865 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.0001643363277893481, + "loss": 2.2369, + "step": 235870 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016433491279843466, + "loss": 1.9267, + "step": 235875 + }, + { + "epoch": 0.56, + "grad_norm": 1.921875, + "learning_rate": 0.00016433349778554327, + "loss": 2.1226, + "step": 235880 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.0001643320827506744, + "loss": 2.0757, + "step": 235885 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.0001643306676938285, + "loss": 2.0502, + "step": 235890 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.00016432925261500607, + "loss": 2.0446, + "step": 235895 + }, + { + "epoch": 0.56, + "grad_norm": 1.8671875, + "learning_rate": 0.00016432783751420762, + "loss": 2.1477, + "step": 235900 + }, + { + "epoch": 0.56, + "grad_norm": 2.3125, + "learning_rate": 0.0001643264223914336, + "loss": 2.1569, + "step": 235905 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.0001643250072466845, + "loss": 1.9987, + "step": 235910 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016432359207996087, + "loss": 2.066, + "step": 235915 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.00016432217689126306, + "loss": 2.1044, + "step": 235920 + }, + { + "epoch": 0.56, + "grad_norm": 2.3125, + "learning_rate": 0.00016432076168059167, + "loss": 2.1306, + "step": 235925 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016431934644794714, + "loss": 2.1365, + "step": 235930 + }, + { + "epoch": 0.56, + "grad_norm": 2.40625, + "learning_rate": 0.00016431793119332994, + "loss": 2.102, + "step": 235935 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016431651591674058, + "loss": 2.0966, + "step": 235940 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.0001643151006181795, + "loss": 2.1245, + "step": 235945 + }, + { + "epoch": 0.56, + "grad_norm": 1.7109375, + "learning_rate": 0.00016431368529764727, + "loss": 1.9661, + "step": 235950 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016431226995514427, + "loss": 2.2344, + "step": 235955 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016431085459067105, + "loss": 2.0533, + "step": 235960 + }, + { + "epoch": 0.56, + "grad_norm": 1.984375, + "learning_rate": 0.0001643094392042281, + "loss": 2.0856, + "step": 235965 + }, + { + "epoch": 0.56, + "grad_norm": 1.8984375, + "learning_rate": 0.00016430802379581585, + "loss": 2.1337, + "step": 235970 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016430660836543482, + "loss": 2.2114, + "step": 235975 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016430519291308547, + "loss": 2.0203, + "step": 235980 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.0001643037774387683, + "loss": 2.1763, + "step": 235985 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016430236194248384, + "loss": 2.0854, + "step": 235990 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016430094642423248, + "loss": 2.0257, + "step": 235995 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016429953088401475, + "loss": 2.0294, + "step": 236000 + }, + { + "epoch": 0.56, + "grad_norm": 2.0, + "learning_rate": 0.00016429811532183116, + "loss": 2.0038, + "step": 236005 + }, + { + "epoch": 0.56, + "grad_norm": 2.71875, + "learning_rate": 0.00016429669973768216, + "loss": 2.1309, + "step": 236010 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016429528413156825, + "loss": 2.1608, + "step": 236015 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.0001642938685034899, + "loss": 2.0041, + "step": 236020 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016429245285344758, + "loss": 2.191, + "step": 236025 + }, + { + "epoch": 0.56, + "grad_norm": 1.953125, + "learning_rate": 0.00016429103718144182, + "loss": 1.9689, + "step": 236030 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016428962148747306, + "loss": 1.9574, + "step": 236035 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016428820577154177, + "loss": 1.9476, + "step": 236040 + }, + { + "epoch": 0.56, + "grad_norm": 1.8671875, + "learning_rate": 0.0001642867900336485, + "loss": 1.9372, + "step": 236045 + }, + { + "epoch": 0.56, + "grad_norm": 1.90625, + "learning_rate": 0.00016428537427379372, + "loss": 2.0324, + "step": 236050 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016428395849197783, + "loss": 2.1761, + "step": 236055 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.0001642825426882014, + "loss": 1.9597, + "step": 236060 + }, + { + "epoch": 0.56, + "grad_norm": 1.9765625, + "learning_rate": 0.00016428112686246488, + "loss": 2.1232, + "step": 236065 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016427971101476877, + "loss": 2.1255, + "step": 236070 + }, + { + "epoch": 0.56, + "grad_norm": 1.9921875, + "learning_rate": 0.00016427829514511358, + "loss": 2.1454, + "step": 236075 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016427687925349972, + "loss": 1.9564, + "step": 236080 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016427546333992773, + "loss": 2.1395, + "step": 236085 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016427404740439804, + "loss": 2.1355, + "step": 236090 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016427263144691117, + "loss": 1.9909, + "step": 236095 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016427121546746767, + "loss": 1.9783, + "step": 236100 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.00016426979946606787, + "loss": 2.206, + "step": 236105 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016426838344271242, + "loss": 1.864, + "step": 236110 + }, + { + "epoch": 0.56, + "grad_norm": 1.7421875, + "learning_rate": 0.00016426696739740168, + "loss": 2.0172, + "step": 236115 + }, + { + "epoch": 0.56, + "grad_norm": 1.890625, + "learning_rate": 0.0001642655513301362, + "loss": 2.0217, + "step": 236120 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016426413524091643, + "loss": 2.2331, + "step": 236125 + }, + { + "epoch": 0.56, + "grad_norm": 1.828125, + "learning_rate": 0.00016426271912974288, + "loss": 1.9618, + "step": 236130 + }, + { + "epoch": 0.56, + "grad_norm": 1.9453125, + "learning_rate": 0.00016426130299661596, + "loss": 2.0777, + "step": 236135 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016425988684153628, + "loss": 2.1418, + "step": 236140 + }, + { + "epoch": 0.56, + "grad_norm": 1.90625, + "learning_rate": 0.00016425847066450425, + "loss": 2.1508, + "step": 236145 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016425705446552032, + "loss": 2.1599, + "step": 236150 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016425563824458506, + "loss": 2.1584, + "step": 236155 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.0001642542220016989, + "loss": 2.4395, + "step": 236160 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016425280573686233, + "loss": 2.169, + "step": 236165 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.0001642513894500758, + "loss": 2.0983, + "step": 236170 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016424997314133988, + "loss": 1.8838, + "step": 236175 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.000164248556810655, + "loss": 1.9038, + "step": 236180 + }, + { + "epoch": 0.56, + "grad_norm": 2.28125, + "learning_rate": 0.0001642471404580216, + "loss": 2.1168, + "step": 236185 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016424572408344024, + "loss": 2.1408, + "step": 236190 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.0001642443076869114, + "loss": 2.1103, + "step": 236195 + }, + { + "epoch": 0.56, + "grad_norm": 1.4921875, + "learning_rate": 0.0001642428912684355, + "loss": 2.0649, + "step": 236200 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.0001642414748280131, + "loss": 1.9478, + "step": 236205 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.00016424005836564463, + "loss": 2.0669, + "step": 236210 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016423864188133057, + "loss": 1.9787, + "step": 236215 + }, + { + "epoch": 0.56, + "grad_norm": 1.6484375, + "learning_rate": 0.00016423722537507146, + "loss": 2.0498, + "step": 236220 + }, + { + "epoch": 0.56, + "grad_norm": 1.859375, + "learning_rate": 0.0001642358088468677, + "loss": 2.2474, + "step": 236225 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016423439229671988, + "loss": 2.3647, + "step": 236230 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.00016423297572462842, + "loss": 2.1109, + "step": 236235 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016423155913059377, + "loss": 2.0545, + "step": 236240 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016423014251461647, + "loss": 2.1622, + "step": 236245 + }, + { + "epoch": 0.56, + "grad_norm": 1.9296875, + "learning_rate": 0.000164228725876697, + "loss": 2.1172, + "step": 236250 + }, + { + "epoch": 0.56, + "grad_norm": 2.0, + "learning_rate": 0.00016422730921683584, + "loss": 2.0913, + "step": 236255 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.00016422589253503348, + "loss": 2.0228, + "step": 236260 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016422447583129034, + "loss": 2.0353, + "step": 236265 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.00016422305910560698, + "loss": 2.0059, + "step": 236270 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016422164235798387, + "loss": 2.0913, + "step": 236275 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016422022558842145, + "loss": 1.9484, + "step": 236280 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016421880879692027, + "loss": 2.1114, + "step": 236285 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016421739198348075, + "loss": 2.1865, + "step": 236290 + }, + { + "epoch": 0.56, + "grad_norm": 2.65625, + "learning_rate": 0.00016421597514810343, + "loss": 2.1389, + "step": 236295 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016421455829078875, + "loss": 2.0691, + "step": 236300 + }, + { + "epoch": 0.56, + "grad_norm": 2.640625, + "learning_rate": 0.00016421314141153723, + "loss": 2.2729, + "step": 236305 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016421172451034933, + "loss": 2.0366, + "step": 236310 + }, + { + "epoch": 0.56, + "grad_norm": 1.828125, + "learning_rate": 0.00016421030758722552, + "loss": 2.0985, + "step": 236315 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016420889064216632, + "loss": 2.0442, + "step": 236320 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016420747367517223, + "loss": 1.9534, + "step": 236325 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016420605668624364, + "loss": 1.8465, + "step": 236330 + }, + { + "epoch": 0.56, + "grad_norm": 2.421875, + "learning_rate": 0.00016420463967538115, + "loss": 2.0331, + "step": 236335 + }, + { + "epoch": 0.56, + "grad_norm": 1.9609375, + "learning_rate": 0.0001642032226425852, + "loss": 1.8148, + "step": 236340 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016420180558785624, + "loss": 2.0074, + "step": 236345 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.0001642003885111947, + "loss": 2.0157, + "step": 236350 + }, + { + "epoch": 0.56, + "grad_norm": 1.859375, + "learning_rate": 0.00016419897141260127, + "loss": 2.0882, + "step": 236355 + }, + { + "epoch": 0.56, + "grad_norm": 1.609375, + "learning_rate": 0.00016419755429207625, + "loss": 2.0628, + "step": 236360 + }, + { + "epoch": 0.56, + "grad_norm": 1.78125, + "learning_rate": 0.0001641961371496202, + "loss": 2.1415, + "step": 236365 + }, + { + "epoch": 0.56, + "grad_norm": 1.9296875, + "learning_rate": 0.00016419471998523356, + "loss": 2.0216, + "step": 236370 + }, + { + "epoch": 0.56, + "grad_norm": 2.28125, + "learning_rate": 0.00016419330279891686, + "loss": 1.9983, + "step": 236375 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016419188559067054, + "loss": 2.0643, + "step": 236380 + }, + { + "epoch": 0.56, + "grad_norm": 1.921875, + "learning_rate": 0.00016419046836049512, + "loss": 2.2954, + "step": 236385 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.0001641890511083911, + "loss": 2.1008, + "step": 236390 + }, + { + "epoch": 0.56, + "grad_norm": 1.90625, + "learning_rate": 0.00016418763383435888, + "loss": 2.0702, + "step": 236395 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.00016418621653839903, + "loss": 1.9124, + "step": 236400 + }, + { + "epoch": 0.56, + "grad_norm": 1.78125, + "learning_rate": 0.000164184799220512, + "loss": 1.9727, + "step": 236405 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016418338188069827, + "loss": 2.1771, + "step": 236410 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016418196451895836, + "loss": 2.1083, + "step": 236415 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.0001641805471352927, + "loss": 2.0802, + "step": 236420 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016417912972970184, + "loss": 2.1867, + "step": 236425 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.0001641777123021862, + "loss": 2.2223, + "step": 236430 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016417629485274629, + "loss": 2.0767, + "step": 236435 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016417487738138258, + "loss": 2.1428, + "step": 236440 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016417345988809556, + "loss": 2.1934, + "step": 236445 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016417204237288577, + "loss": 2.0076, + "step": 236450 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.0001641706248357536, + "loss": 2.1026, + "step": 236455 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.0001641692072766996, + "loss": 2.078, + "step": 236460 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016416778969572423, + "loss": 2.1749, + "step": 236465 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016416637209282803, + "loss": 2.1476, + "step": 236470 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016416495446801136, + "loss": 1.9448, + "step": 236475 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.0001641635368212748, + "loss": 2.2771, + "step": 236480 + }, + { + "epoch": 0.56, + "grad_norm": 1.8046875, + "learning_rate": 0.00016416211915261882, + "loss": 2.0394, + "step": 236485 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.0001641607014620439, + "loss": 2.072, + "step": 236490 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.0001641592837495505, + "loss": 2.0363, + "step": 236495 + }, + { + "epoch": 0.56, + "grad_norm": 2.28125, + "learning_rate": 0.00016415786601513914, + "loss": 2.1494, + "step": 236500 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016415644825881032, + "loss": 2.13, + "step": 236505 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016415503048056442, + "loss": 1.9922, + "step": 236510 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016415361268040207, + "loss": 2.0497, + "step": 236515 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016415219485832366, + "loss": 2.2476, + "step": 236520 + }, + { + "epoch": 0.56, + "grad_norm": 2.453125, + "learning_rate": 0.00016415077701432965, + "loss": 2.0742, + "step": 236525 + }, + { + "epoch": 0.56, + "grad_norm": 1.8671875, + "learning_rate": 0.00016414935914842063, + "loss": 1.9839, + "step": 236530 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.000164147941260597, + "loss": 2.125, + "step": 236535 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.00016414652335085928, + "loss": 1.8685, + "step": 236540 + }, + { + "epoch": 0.56, + "grad_norm": 2.671875, + "learning_rate": 0.00016414510541920793, + "loss": 1.8866, + "step": 236545 + }, + { + "epoch": 0.56, + "grad_norm": 1.8515625, + "learning_rate": 0.00016414368746564347, + "loss": 2.0044, + "step": 236550 + }, + { + "epoch": 0.56, + "grad_norm": 1.796875, + "learning_rate": 0.00016414226949016636, + "loss": 2.0519, + "step": 236555 + }, + { + "epoch": 0.56, + "grad_norm": 2.59375, + "learning_rate": 0.0001641408514927771, + "loss": 2.2354, + "step": 236560 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.0001641394334734761, + "loss": 2.0883, + "step": 236565 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016413801543226398, + "loss": 2.245, + "step": 236570 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.0001641365973691411, + "loss": 2.0522, + "step": 236575 + }, + { + "epoch": 0.56, + "grad_norm": 1.9140625, + "learning_rate": 0.00016413517928410803, + "loss": 1.9713, + "step": 236580 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016413376117716523, + "loss": 2.0621, + "step": 236585 + }, + { + "epoch": 0.56, + "grad_norm": 2.671875, + "learning_rate": 0.00016413234304831308, + "loss": 2.0477, + "step": 236590 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016413092489755225, + "loss": 2.1019, + "step": 236595 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.0001641295067248831, + "loss": 2.0686, + "step": 236600 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016412808853030617, + "loss": 2.0989, + "step": 236605 + }, + { + "epoch": 0.56, + "grad_norm": 1.875, + "learning_rate": 0.00016412667031382193, + "loss": 2.1239, + "step": 236610 + }, + { + "epoch": 0.56, + "grad_norm": 1.84375, + "learning_rate": 0.0001641252520754308, + "loss": 2.1964, + "step": 236615 + }, + { + "epoch": 0.56, + "grad_norm": 3.96875, + "learning_rate": 0.00016412383381513335, + "loss": 2.089, + "step": 236620 + }, + { + "epoch": 0.56, + "grad_norm": 1.9765625, + "learning_rate": 0.00016412241553293003, + "loss": 2.0827, + "step": 236625 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016412099722882138, + "loss": 2.1512, + "step": 236630 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016411957890280778, + "loss": 2.2248, + "step": 236635 + }, + { + "epoch": 0.56, + "grad_norm": 2.0, + "learning_rate": 0.0001641181605548898, + "loss": 2.0504, + "step": 236640 + }, + { + "epoch": 0.56, + "grad_norm": 1.9140625, + "learning_rate": 0.00016411674218506786, + "loss": 2.1466, + "step": 236645 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.0001641153237933425, + "loss": 1.8449, + "step": 236650 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016411390537971417, + "loss": 2.0876, + "step": 236655 + }, + { + "epoch": 0.56, + "grad_norm": 2.703125, + "learning_rate": 0.00016411248694418338, + "loss": 2.1328, + "step": 236660 + }, + { + "epoch": 0.56, + "grad_norm": 2.9375, + "learning_rate": 0.00016411106848675063, + "loss": 2.037, + "step": 236665 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.0001641096500074163, + "loss": 2.0823, + "step": 236670 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.000164108231506181, + "loss": 2.1204, + "step": 236675 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.0001641068129830452, + "loss": 2.1314, + "step": 236680 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.0001641053944380093, + "loss": 2.1475, + "step": 236685 + }, + { + "epoch": 0.56, + "grad_norm": 1.8359375, + "learning_rate": 0.00016410397587107383, + "loss": 2.1685, + "step": 236690 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.0001641025572822393, + "loss": 2.189, + "step": 236695 + }, + { + "epoch": 0.56, + "grad_norm": 1.9765625, + "learning_rate": 0.00016410113867150616, + "loss": 2.1452, + "step": 236700 + }, + { + "epoch": 0.56, + "grad_norm": 2.953125, + "learning_rate": 0.00016409972003887492, + "loss": 2.2411, + "step": 236705 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.00016409830138434606, + "loss": 2.0914, + "step": 236710 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016409688270792002, + "loss": 1.9498, + "step": 236715 + }, + { + "epoch": 0.56, + "grad_norm": 2.3125, + "learning_rate": 0.00016409546400959738, + "loss": 2.1478, + "step": 236720 + }, + { + "epoch": 0.56, + "grad_norm": 2.625, + "learning_rate": 0.00016409404528937852, + "loss": 2.2497, + "step": 236725 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016409262654726398, + "loss": 2.0989, + "step": 236730 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016409120778325425, + "loss": 2.1558, + "step": 236735 + }, + { + "epoch": 0.56, + "grad_norm": 1.875, + "learning_rate": 0.0001640897889973498, + "loss": 2.1159, + "step": 236740 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.0001640883701895511, + "loss": 2.0932, + "step": 236745 + }, + { + "epoch": 0.56, + "grad_norm": 3.03125, + "learning_rate": 0.00016408695135985868, + "loss": 2.1853, + "step": 236750 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016408553250827298, + "loss": 2.0237, + "step": 236755 + }, + { + "epoch": 0.56, + "grad_norm": 3.125, + "learning_rate": 0.0001640841136347945, + "loss": 2.1243, + "step": 236760 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.0001640826947394237, + "loss": 2.1331, + "step": 236765 + }, + { + "epoch": 0.56, + "grad_norm": 1.7890625, + "learning_rate": 0.00016408127582216112, + "loss": 2.1211, + "step": 236770 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.00016407985688300723, + "loss": 2.2354, + "step": 236775 + }, + { + "epoch": 0.56, + "grad_norm": 1.9375, + "learning_rate": 0.00016407843792196246, + "loss": 1.9324, + "step": 236780 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.00016407701893902737, + "loss": 2.1907, + "step": 236785 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016407559993420235, + "loss": 2.1799, + "step": 236790 + }, + { + "epoch": 0.56, + "grad_norm": 1.5703125, + "learning_rate": 0.000164074180907488, + "loss": 2.0473, + "step": 236795 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016407276185888473, + "loss": 2.173, + "step": 236800 + }, + { + "epoch": 0.56, + "grad_norm": 1.921875, + "learning_rate": 0.000164071342788393, + "loss": 2.0864, + "step": 236805 + }, + { + "epoch": 0.56, + "grad_norm": 1.890625, + "learning_rate": 0.0001640699236960134, + "loss": 2.2741, + "step": 236810 + }, + { + "epoch": 0.56, + "grad_norm": 2.53125, + "learning_rate": 0.00016406850458174633, + "loss": 1.8943, + "step": 236815 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016406708544559228, + "loss": 2.128, + "step": 236820 + }, + { + "epoch": 0.56, + "grad_norm": 1.9140625, + "learning_rate": 0.0001640656662875518, + "loss": 1.9898, + "step": 236825 + }, + { + "epoch": 0.56, + "grad_norm": 1.875, + "learning_rate": 0.00016406424710762528, + "loss": 2.1129, + "step": 236830 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016406282790581327, + "loss": 2.2576, + "step": 236835 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.0001640614086821162, + "loss": 1.893, + "step": 236840 + }, + { + "epoch": 0.56, + "grad_norm": 1.8359375, + "learning_rate": 0.00016405998943653464, + "loss": 2.2225, + "step": 236845 + }, + { + "epoch": 0.56, + "grad_norm": 2.5, + "learning_rate": 0.00016405857016906902, + "loss": 2.0907, + "step": 236850 + }, + { + "epoch": 0.56, + "grad_norm": 2.625, + "learning_rate": 0.00016405715087971983, + "loss": 2.2779, + "step": 236855 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.0001640557315684875, + "loss": 2.1128, + "step": 236860 + }, + { + "epoch": 0.56, + "grad_norm": 2.546875, + "learning_rate": 0.00016405431223537263, + "loss": 2.0809, + "step": 236865 + }, + { + "epoch": 0.56, + "grad_norm": 1.984375, + "learning_rate": 0.00016405289288037565, + "loss": 2.3163, + "step": 236870 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016405147350349698, + "loss": 2.0996, + "step": 236875 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016405005410473724, + "loss": 1.9537, + "step": 236880 + }, + { + "epoch": 0.56, + "grad_norm": 1.9609375, + "learning_rate": 0.00016404863468409678, + "loss": 1.9204, + "step": 236885 + }, + { + "epoch": 0.56, + "grad_norm": 2.40625, + "learning_rate": 0.00016404721524157617, + "loss": 2.0424, + "step": 236890 + }, + { + "epoch": 0.56, + "grad_norm": 2.3125, + "learning_rate": 0.0001640457957771759, + "loss": 1.9137, + "step": 236895 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016404437629089638, + "loss": 2.1189, + "step": 236900 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016404295678273815, + "loss": 1.9212, + "step": 236905 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.0001640415372527017, + "loss": 2.0178, + "step": 236910 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016404011770078747, + "loss": 1.9685, + "step": 236915 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.000164038698126996, + "loss": 2.1201, + "step": 236920 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.00016403727853132774, + "loss": 2.0337, + "step": 236925 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.00016403585891378318, + "loss": 1.973, + "step": 236930 + }, + { + "epoch": 0.56, + "grad_norm": 1.9609375, + "learning_rate": 0.0001640344392743628, + "loss": 1.9857, + "step": 236935 + }, + { + "epoch": 0.56, + "grad_norm": 2.0, + "learning_rate": 0.0001640330196130671, + "loss": 2.1035, + "step": 236940 + }, + { + "epoch": 0.56, + "grad_norm": 1.8671875, + "learning_rate": 0.0001640315999298966, + "loss": 2.1408, + "step": 236945 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.0001640301802248517, + "loss": 2.1289, + "step": 236950 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016402876049793295, + "loss": 2.1766, + "step": 236955 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016402734074914078, + "loss": 2.0699, + "step": 236960 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016402592097847576, + "loss": 2.0787, + "step": 236965 + }, + { + "epoch": 0.56, + "grad_norm": 2.0, + "learning_rate": 0.0001640245011859383, + "loss": 2.1146, + "step": 236970 + }, + { + "epoch": 0.56, + "grad_norm": 1.90625, + "learning_rate": 0.00016402308137152888, + "loss": 2.2816, + "step": 236975 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016402166153524808, + "loss": 2.0271, + "step": 236980 + }, + { + "epoch": 0.56, + "grad_norm": 1.9296875, + "learning_rate": 0.00016402024167709627, + "loss": 2.1751, + "step": 236985 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.000164018821797074, + "loss": 2.0793, + "step": 236990 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.0001640174018951817, + "loss": 2.0822, + "step": 236995 + }, + { + "epoch": 0.56, + "grad_norm": 1.9765625, + "learning_rate": 0.00016401598197141994, + "loss": 2.1021, + "step": 237000 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016401456202578917, + "loss": 2.3105, + "step": 237005 + }, + { + "epoch": 0.56, + "grad_norm": 1.921875, + "learning_rate": 0.00016401314205828984, + "loss": 2.1879, + "step": 237010 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.0001640117220689225, + "loss": 1.9693, + "step": 237015 + }, + { + "epoch": 0.56, + "grad_norm": 1.9140625, + "learning_rate": 0.0001640103020576875, + "loss": 2.0179, + "step": 237020 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.0001640088820245855, + "loss": 2.1548, + "step": 237025 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.0001640074619696169, + "loss": 1.9547, + "step": 237030 + }, + { + "epoch": 0.56, + "grad_norm": 2.0, + "learning_rate": 0.00016400604189278215, + "loss": 2.0953, + "step": 237035 + }, + { + "epoch": 0.56, + "grad_norm": 1.984375, + "learning_rate": 0.0001640046217940818, + "loss": 2.0644, + "step": 237040 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016400320167351631, + "loss": 2.2322, + "step": 237045 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016400178153108618, + "loss": 2.1325, + "step": 237050 + }, + { + "epoch": 0.56, + "grad_norm": 1.84375, + "learning_rate": 0.00016400036136679185, + "loss": 2.172, + "step": 237055 + }, + { + "epoch": 0.56, + "grad_norm": 1.9375, + "learning_rate": 0.00016399894118063386, + "loss": 2.1476, + "step": 237060 + }, + { + "epoch": 0.56, + "grad_norm": 2.421875, + "learning_rate": 0.00016399752097261266, + "loss": 1.9811, + "step": 237065 + }, + { + "epoch": 0.56, + "grad_norm": 3.546875, + "learning_rate": 0.00016399610074272874, + "loss": 2.0617, + "step": 237070 + }, + { + "epoch": 0.56, + "grad_norm": 2.46875, + "learning_rate": 0.00016399468049098262, + "loss": 2.099, + "step": 237075 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016399326021737472, + "loss": 1.9553, + "step": 237080 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.0001639918399219056, + "loss": 1.9608, + "step": 237085 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016399041960457567, + "loss": 2.1665, + "step": 237090 + }, + { + "epoch": 0.56, + "grad_norm": 1.8359375, + "learning_rate": 0.00016398899926538547, + "loss": 2.1356, + "step": 237095 + }, + { + "epoch": 0.56, + "grad_norm": 2.40625, + "learning_rate": 0.00016398757890433545, + "loss": 2.21, + "step": 237100 + }, + { + "epoch": 0.56, + "grad_norm": 1.953125, + "learning_rate": 0.00016398615852142613, + "loss": 2.0518, + "step": 237105 + }, + { + "epoch": 0.56, + "grad_norm": 2.46875, + "learning_rate": 0.00016398473811665798, + "loss": 2.2251, + "step": 237110 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016398331769003149, + "loss": 2.2654, + "step": 237115 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016398189724154715, + "loss": 2.0406, + "step": 237120 + }, + { + "epoch": 0.56, + "grad_norm": 2.484375, + "learning_rate": 0.00016398047677120537, + "loss": 2.0543, + "step": 237125 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016397905627900673, + "loss": 2.0972, + "step": 237130 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.00016397763576495172, + "loss": 2.1031, + "step": 237135 + }, + { + "epoch": 0.56, + "grad_norm": 1.84375, + "learning_rate": 0.00016397621522904076, + "loss": 1.9062, + "step": 237140 + }, + { + "epoch": 0.56, + "grad_norm": 1.765625, + "learning_rate": 0.00016397479467127437, + "loss": 1.9339, + "step": 237145 + }, + { + "epoch": 0.56, + "grad_norm": 1.859375, + "learning_rate": 0.000163973374091653, + "loss": 2.1806, + "step": 237150 + }, + { + "epoch": 0.56, + "grad_norm": 1.796875, + "learning_rate": 0.0001639719534901772, + "loss": 2.1202, + "step": 237155 + }, + { + "epoch": 0.56, + "grad_norm": 1.890625, + "learning_rate": 0.0001639705328668474, + "loss": 2.2527, + "step": 237160 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016396911222166415, + "loss": 2.0669, + "step": 237165 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.00016396769155462785, + "loss": 2.0766, + "step": 237170 + }, + { + "epoch": 0.56, + "grad_norm": 1.9453125, + "learning_rate": 0.00016396627086573903, + "loss": 2.2111, + "step": 237175 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016396485015499816, + "loss": 2.1999, + "step": 237180 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016396342942240577, + "loss": 2.1167, + "step": 237185 + }, + { + "epoch": 0.56, + "grad_norm": 2.28125, + "learning_rate": 0.0001639620086679623, + "loss": 2.0536, + "step": 237190 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016396058789166826, + "loss": 1.9028, + "step": 237195 + }, + { + "epoch": 0.56, + "grad_norm": 1.7421875, + "learning_rate": 0.0001639591670935241, + "loss": 2.1541, + "step": 237200 + }, + { + "epoch": 0.56, + "grad_norm": 2.84375, + "learning_rate": 0.0001639577462735303, + "loss": 2.0945, + "step": 237205 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.00016395632543168742, + "loss": 2.1714, + "step": 237210 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.0001639549045679959, + "loss": 2.021, + "step": 237215 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016395348368245622, + "loss": 2.3085, + "step": 237220 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016395206277506887, + "loss": 2.0996, + "step": 237225 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016395064184583432, + "loss": 1.9663, + "step": 237230 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016394922089475308, + "loss": 2.0892, + "step": 237235 + }, + { + "epoch": 0.56, + "grad_norm": 1.9921875, + "learning_rate": 0.00016394779992182564, + "loss": 2.0043, + "step": 237240 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016394637892705246, + "loss": 1.9404, + "step": 237245 + }, + { + "epoch": 0.56, + "grad_norm": 1.9375, + "learning_rate": 0.00016394495791043404, + "loss": 2.1917, + "step": 237250 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016394353687197087, + "loss": 2.1435, + "step": 237255 + }, + { + "epoch": 0.56, + "grad_norm": 2.0, + "learning_rate": 0.0001639421158116634, + "loss": 2.2607, + "step": 237260 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016394069472951218, + "loss": 2.1128, + "step": 237265 + }, + { + "epoch": 0.56, + "grad_norm": 1.84375, + "learning_rate": 0.00016393927362551763, + "loss": 2.0495, + "step": 237270 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.0001639378524996803, + "loss": 2.1459, + "step": 237275 + }, + { + "epoch": 0.56, + "grad_norm": 1.9453125, + "learning_rate": 0.00016393643135200058, + "loss": 2.0434, + "step": 237280 + }, + { + "epoch": 0.56, + "grad_norm": 1.8125, + "learning_rate": 0.00016393501018247905, + "loss": 2.093, + "step": 237285 + }, + { + "epoch": 0.56, + "grad_norm": 1.9296875, + "learning_rate": 0.00016393358899111614, + "loss": 2.1494, + "step": 237290 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016393216777791237, + "loss": 2.1119, + "step": 237295 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016393074654286825, + "loss": 2.2171, + "step": 237300 + }, + { + "epoch": 0.56, + "grad_norm": 1.7734375, + "learning_rate": 0.00016392932528598418, + "loss": 2.2289, + "step": 237305 + }, + { + "epoch": 0.56, + "grad_norm": 1.671875, + "learning_rate": 0.00016392790400726072, + "loss": 2.2364, + "step": 237310 + }, + { + "epoch": 0.56, + "grad_norm": 2.40625, + "learning_rate": 0.00016392648270669832, + "loss": 1.8852, + "step": 237315 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016392506138429748, + "loss": 2.2004, + "step": 237320 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016392364004005867, + "loss": 1.9832, + "step": 237325 + }, + { + "epoch": 0.56, + "grad_norm": 1.8046875, + "learning_rate": 0.0001639222186739824, + "loss": 2.0343, + "step": 237330 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.0001639207972860691, + "loss": 2.0524, + "step": 237335 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016391937587631933, + "loss": 2.1002, + "step": 237340 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016391795444473356, + "loss": 2.1603, + "step": 237345 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016391653299131223, + "loss": 2.0393, + "step": 237350 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016391511151605585, + "loss": 2.0653, + "step": 237355 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016391369001896494, + "loss": 2.1788, + "step": 237360 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.0001639122685000399, + "loss": 2.1863, + "step": 237365 + }, + { + "epoch": 0.56, + "grad_norm": 2.796875, + "learning_rate": 0.0001639108469592813, + "loss": 2.021, + "step": 237370 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.0001639094253966896, + "loss": 2.1294, + "step": 237375 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016390800381226527, + "loss": 1.8821, + "step": 237380 + }, + { + "epoch": 0.56, + "grad_norm": 1.859375, + "learning_rate": 0.00016390658220600884, + "loss": 2.1881, + "step": 237385 + }, + { + "epoch": 0.56, + "grad_norm": 1.84375, + "learning_rate": 0.00016390516057792072, + "loss": 2.0315, + "step": 237390 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016390373892800146, + "loss": 2.2772, + "step": 237395 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016390231725625152, + "loss": 2.2707, + "step": 237400 + }, + { + "epoch": 0.56, + "grad_norm": 1.796875, + "learning_rate": 0.00016390089556267138, + "loss": 2.1211, + "step": 237405 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016389947384726154, + "loss": 1.9369, + "step": 237410 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.00016389805211002248, + "loss": 2.109, + "step": 237415 + }, + { + "epoch": 0.56, + "grad_norm": 1.9765625, + "learning_rate": 0.0001638966303509547, + "loss": 2.1268, + "step": 237420 + }, + { + "epoch": 0.56, + "grad_norm": 1.8984375, + "learning_rate": 0.00016389520857005866, + "loss": 1.9727, + "step": 237425 + }, + { + "epoch": 0.56, + "grad_norm": 1.8125, + "learning_rate": 0.00016389378676733487, + "loss": 2.2, + "step": 237430 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.0001638923649427838, + "loss": 2.2315, + "step": 237435 + }, + { + "epoch": 0.56, + "grad_norm": 1.8046875, + "learning_rate": 0.0001638909430964059, + "loss": 2.0162, + "step": 237440 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016388952122820176, + "loss": 2.1868, + "step": 237445 + }, + { + "epoch": 0.56, + "grad_norm": 1.84375, + "learning_rate": 0.00016388809933817178, + "loss": 2.0798, + "step": 237450 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016388667742631645, + "loss": 1.8249, + "step": 237455 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.00016388525549263628, + "loss": 2.1156, + "step": 237460 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016388383353713174, + "loss": 2.2162, + "step": 237465 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016388241155980334, + "loss": 2.1751, + "step": 237470 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016388098956065153, + "loss": 2.2631, + "step": 237475 + }, + { + "epoch": 0.56, + "grad_norm": 2.46875, + "learning_rate": 0.00016387956753967684, + "loss": 1.9981, + "step": 237480 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.0001638781454968797, + "loss": 2.1622, + "step": 237485 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016387672343226066, + "loss": 2.1624, + "step": 237490 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016387530134582016, + "loss": 1.9918, + "step": 237495 + }, + { + "epoch": 0.56, + "grad_norm": 1.8515625, + "learning_rate": 0.0001638738792375587, + "loss": 1.8492, + "step": 237500 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016387245710747673, + "loss": 2.1108, + "step": 237505 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016387103495557482, + "loss": 2.1573, + "step": 237510 + }, + { + "epoch": 0.56, + "grad_norm": 1.734375, + "learning_rate": 0.0001638696127818534, + "loss": 2.1954, + "step": 237515 + }, + { + "epoch": 0.56, + "grad_norm": 1.984375, + "learning_rate": 0.00016386819058631294, + "loss": 2.0844, + "step": 237520 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016386676836895397, + "loss": 2.0919, + "step": 237525 + }, + { + "epoch": 0.56, + "grad_norm": 1.7578125, + "learning_rate": 0.0001638653461297769, + "loss": 2.1161, + "step": 237530 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016386392386878233, + "loss": 2.0566, + "step": 237535 + }, + { + "epoch": 0.56, + "grad_norm": 1.953125, + "learning_rate": 0.00016386250158597067, + "loss": 1.8916, + "step": 237540 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.0001638610792813424, + "loss": 1.9306, + "step": 237545 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016385965695489807, + "loss": 2.2412, + "step": 237550 + }, + { + "epoch": 0.56, + "grad_norm": 2.703125, + "learning_rate": 0.00016385823460663806, + "loss": 1.9636, + "step": 237555 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.00016385681223656296, + "loss": 2.1039, + "step": 237560 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016385538984467322, + "loss": 2.2294, + "step": 237565 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.00016385396743096934, + "loss": 2.0897, + "step": 237570 + }, + { + "epoch": 0.56, + "grad_norm": 1.9765625, + "learning_rate": 0.00016385254499545174, + "loss": 2.1124, + "step": 237575 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016385112253812097, + "loss": 2.1214, + "step": 237580 + }, + { + "epoch": 0.56, + "grad_norm": 2.671875, + "learning_rate": 0.00016384970005897749, + "loss": 1.9401, + "step": 237585 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016384827755802178, + "loss": 2.1185, + "step": 237590 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.0001638468550352544, + "loss": 2.2767, + "step": 237595 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016384543249067573, + "loss": 2.1875, + "step": 237600 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.0001638440099242863, + "loss": 2.0676, + "step": 237605 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016384258733608662, + "loss": 2.0355, + "step": 237610 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016384116472607714, + "loss": 1.9912, + "step": 237615 + }, + { + "epoch": 0.56, + "grad_norm": 2.0, + "learning_rate": 0.00016383974209425837, + "loss": 2.1206, + "step": 237620 + }, + { + "epoch": 0.56, + "grad_norm": 2.0, + "learning_rate": 0.00016383831944063077, + "loss": 2.0197, + "step": 237625 + }, + { + "epoch": 0.56, + "grad_norm": 1.96875, + "learning_rate": 0.00016383689676519487, + "loss": 1.9341, + "step": 237630 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.0001638354740679511, + "loss": 2.1138, + "step": 237635 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016383405134889998, + "loss": 2.1398, + "step": 237640 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016383262860804202, + "loss": 2.1885, + "step": 237645 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016383120584537766, + "loss": 1.9628, + "step": 237650 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016382978306090737, + "loss": 1.9635, + "step": 237655 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.0001638283602546317, + "loss": 2.0729, + "step": 237660 + }, + { + "epoch": 0.56, + "grad_norm": 1.8828125, + "learning_rate": 0.0001638269374265511, + "loss": 2.2237, + "step": 237665 + }, + { + "epoch": 0.56, + "grad_norm": 2.453125, + "learning_rate": 0.00016382551457666607, + "loss": 2.0079, + "step": 237670 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016382409170497705, + "loss": 2.0208, + "step": 237675 + }, + { + "epoch": 0.56, + "grad_norm": 1.96875, + "learning_rate": 0.0001638226688114846, + "loss": 2.061, + "step": 237680 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.00016382124589618916, + "loss": 2.0284, + "step": 237685 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016381982295909123, + "loss": 2.2767, + "step": 237690 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016381840000019126, + "loss": 2.1374, + "step": 237695 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.0001638169770194898, + "loss": 2.1089, + "step": 237700 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.0001638155540169873, + "loss": 2.1721, + "step": 237705 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016381413099268426, + "loss": 2.1642, + "step": 237710 + }, + { + "epoch": 0.56, + "grad_norm": 1.9296875, + "learning_rate": 0.00016381270794658113, + "loss": 1.8799, + "step": 237715 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.0001638112848786784, + "loss": 2.0987, + "step": 237720 + }, + { + "epoch": 0.56, + "grad_norm": 2.59375, + "learning_rate": 0.00016380986178897664, + "loss": 2.0767, + "step": 237725 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016380843867747625, + "loss": 2.2675, + "step": 237730 + }, + { + "epoch": 0.56, + "grad_norm": 2.46875, + "learning_rate": 0.00016380701554417773, + "loss": 2.1231, + "step": 237735 + }, + { + "epoch": 0.56, + "grad_norm": 1.984375, + "learning_rate": 0.00016380559238908155, + "loss": 2.0115, + "step": 237740 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016380416921218827, + "loss": 2.2072, + "step": 237745 + }, + { + "epoch": 0.56, + "grad_norm": 1.9609375, + "learning_rate": 0.00016380274601349832, + "loss": 2.0216, + "step": 237750 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.00016380132279301217, + "loss": 2.121, + "step": 237755 + }, + { + "epoch": 0.56, + "grad_norm": 1.8671875, + "learning_rate": 0.00016379989955073033, + "loss": 2.1664, + "step": 237760 + }, + { + "epoch": 0.56, + "grad_norm": 1.7578125, + "learning_rate": 0.0001637984762866533, + "loss": 2.1848, + "step": 237765 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016379705300078156, + "loss": 1.9854, + "step": 237770 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.0001637956296931156, + "loss": 2.0641, + "step": 237775 + }, + { + "epoch": 0.56, + "grad_norm": 1.9453125, + "learning_rate": 0.0001637942063636559, + "loss": 2.0315, + "step": 237780 + }, + { + "epoch": 0.56, + "grad_norm": 2.484375, + "learning_rate": 0.00016379278301240292, + "loss": 2.073, + "step": 237785 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016379135963935716, + "loss": 1.9338, + "step": 237790 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016378993624451913, + "loss": 1.88, + "step": 237795 + }, + { + "epoch": 0.56, + "grad_norm": 1.9921875, + "learning_rate": 0.00016378851282788928, + "loss": 1.9963, + "step": 237800 + }, + { + "epoch": 0.56, + "grad_norm": 1.9375, + "learning_rate": 0.00016378708938946814, + "loss": 1.9173, + "step": 237805 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016378566592925616, + "loss": 2.2182, + "step": 237810 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016378424244725389, + "loss": 2.1218, + "step": 237815 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.0001637828189434617, + "loss": 2.0945, + "step": 237820 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016378139541788018, + "loss": 2.1395, + "step": 237825 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016377997187050976, + "loss": 2.21, + "step": 237830 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.00016377854830135094, + "loss": 2.1046, + "step": 237835 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016377712471040424, + "loss": 2.0241, + "step": 237840 + }, + { + "epoch": 0.56, + "grad_norm": 1.90625, + "learning_rate": 0.0001637757010976701, + "loss": 2.0729, + "step": 237845 + }, + { + "epoch": 0.56, + "grad_norm": 2.546875, + "learning_rate": 0.000163774277463149, + "loss": 2.3081, + "step": 237850 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.0001637728538068415, + "loss": 2.215, + "step": 237855 + }, + { + "epoch": 0.56, + "grad_norm": 1.921875, + "learning_rate": 0.000163771430128748, + "loss": 2.0241, + "step": 237860 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.000163770006428869, + "loss": 2.0266, + "step": 237865 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.0001637685827072051, + "loss": 2.0535, + "step": 237870 + }, + { + "epoch": 0.56, + "grad_norm": 1.9375, + "learning_rate": 0.0001637671589637566, + "loss": 2.0685, + "step": 237875 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016376573519852412, + "loss": 2.1847, + "step": 237880 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016376431141150812, + "loss": 2.1222, + "step": 237885 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016376288760270905, + "loss": 2.0768, + "step": 237890 + }, + { + "epoch": 0.56, + "grad_norm": 2.3125, + "learning_rate": 0.00016376146377212745, + "loss": 2.0672, + "step": 237895 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016376003991976377, + "loss": 2.0435, + "step": 237900 + }, + { + "epoch": 0.56, + "grad_norm": 1.9921875, + "learning_rate": 0.00016375861604561848, + "loss": 2.0533, + "step": 237905 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.0001637571921496921, + "loss": 2.2757, + "step": 237910 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.0001637557682319851, + "loss": 1.9576, + "step": 237915 + }, + { + "epoch": 0.56, + "grad_norm": 2.640625, + "learning_rate": 0.00016375434429249797, + "loss": 2.0024, + "step": 237920 + }, + { + "epoch": 0.56, + "grad_norm": 2.5625, + "learning_rate": 0.00016375292033123126, + "loss": 2.24, + "step": 237925 + }, + { + "epoch": 0.56, + "grad_norm": 1.9453125, + "learning_rate": 0.00016375149634818533, + "loss": 1.9039, + "step": 237930 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016375007234336076, + "loss": 2.2137, + "step": 237935 + }, + { + "epoch": 0.56, + "grad_norm": 1.7421875, + "learning_rate": 0.000163748648316758, + "loss": 2.0245, + "step": 237940 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016374722426837755, + "loss": 1.9817, + "step": 237945 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.0001637458001982199, + "loss": 2.0872, + "step": 237950 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016374437610628553, + "loss": 1.8798, + "step": 237955 + }, + { + "epoch": 0.56, + "grad_norm": 1.96875, + "learning_rate": 0.0001637429519925749, + "loss": 2.0662, + "step": 237960 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016374152785708854, + "loss": 2.1307, + "step": 237965 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.0001637401036998269, + "loss": 1.838, + "step": 237970 + }, + { + "epoch": 0.56, + "grad_norm": 1.921875, + "learning_rate": 0.00016373867952079052, + "loss": 2.2036, + "step": 237975 + }, + { + "epoch": 0.56, + "grad_norm": 1.9453125, + "learning_rate": 0.00016373725531997982, + "loss": 2.2268, + "step": 237980 + }, + { + "epoch": 0.56, + "grad_norm": 1.7421875, + "learning_rate": 0.0001637358310973953, + "loss": 1.8581, + "step": 237985 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016373440685303755, + "loss": 2.1113, + "step": 237990 + }, + { + "epoch": 0.56, + "grad_norm": 1.796875, + "learning_rate": 0.00016373298258690687, + "loss": 2.0987, + "step": 237995 + }, + { + "epoch": 0.56, + "grad_norm": 1.984375, + "learning_rate": 0.00016373155829900392, + "loss": 2.188, + "step": 238000 + }, + { + "epoch": 0.56, + "grad_norm": 2.640625, + "learning_rate": 0.00016373013398932908, + "loss": 2.2521, + "step": 238005 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016372870965788286, + "loss": 2.0784, + "step": 238010 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016372728530466576, + "loss": 1.9715, + "step": 238015 + }, + { + "epoch": 0.56, + "grad_norm": 2.8125, + "learning_rate": 0.0001637258609296783, + "loss": 2.1878, + "step": 238020 + }, + { + "epoch": 0.56, + "grad_norm": 1.7890625, + "learning_rate": 0.00016372443653292092, + "loss": 2.1172, + "step": 238025 + }, + { + "epoch": 0.56, + "grad_norm": 1.796875, + "learning_rate": 0.0001637230121143941, + "loss": 1.9717, + "step": 238030 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016372158767409832, + "loss": 2.121, + "step": 238035 + }, + { + "epoch": 0.56, + "grad_norm": 4.875, + "learning_rate": 0.00016372016321203412, + "loss": 1.9882, + "step": 238040 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016371873872820194, + "loss": 2.0635, + "step": 238045 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.0001637173142226023, + "loss": 2.2071, + "step": 238050 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.00016371588969523568, + "loss": 1.9663, + "step": 238055 + }, + { + "epoch": 0.56, + "grad_norm": 1.90625, + "learning_rate": 0.00016371446514610254, + "loss": 1.9703, + "step": 238060 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.0001637130405752034, + "loss": 2.2993, + "step": 238065 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.0001637116159825387, + "loss": 2.0683, + "step": 238070 + }, + { + "epoch": 0.56, + "grad_norm": 1.703125, + "learning_rate": 0.00016371019136810896, + "loss": 1.9569, + "step": 238075 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016370876673191468, + "loss": 1.9813, + "step": 238080 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016370734207395632, + "loss": 2.1379, + "step": 238085 + }, + { + "epoch": 0.56, + "grad_norm": 2.8125, + "learning_rate": 0.00016370591739423438, + "loss": 2.0088, + "step": 238090 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016370449269274934, + "loss": 2.1458, + "step": 238095 + }, + { + "epoch": 0.56, + "grad_norm": 2.515625, + "learning_rate": 0.0001637030679695017, + "loss": 2.2256, + "step": 238100 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.00016370164322449194, + "loss": 1.9775, + "step": 238105 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016370021845772052, + "loss": 1.9988, + "step": 238110 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016369879366918798, + "loss": 2.0121, + "step": 238115 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016369736885889474, + "loss": 1.9983, + "step": 238120 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016369594402684134, + "loss": 2.0842, + "step": 238125 + }, + { + "epoch": 0.56, + "grad_norm": 1.7265625, + "learning_rate": 0.00016369451917302828, + "loss": 2.133, + "step": 238130 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016369309429745599, + "loss": 2.0255, + "step": 238135 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016369166940012497, + "loss": 2.076, + "step": 238140 + }, + { + "epoch": 0.56, + "grad_norm": 1.7578125, + "learning_rate": 0.00016369024448103573, + "loss": 1.9766, + "step": 238145 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016368881954018875, + "loss": 2.1351, + "step": 238150 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.00016368739457758453, + "loss": 2.0694, + "step": 238155 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016368596959322353, + "loss": 2.0706, + "step": 238160 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016368454458710625, + "loss": 2.3435, + "step": 238165 + }, + { + "epoch": 0.56, + "grad_norm": 2.578125, + "learning_rate": 0.00016368311955923317, + "loss": 2.0786, + "step": 238170 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016368169450960477, + "loss": 1.7615, + "step": 238175 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016368026943822158, + "loss": 2.3003, + "step": 238180 + }, + { + "epoch": 0.56, + "grad_norm": 2.5, + "learning_rate": 0.00016367884434508403, + "loss": 2.0912, + "step": 238185 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016367741923019265, + "loss": 2.0723, + "step": 238190 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.0001636759940935479, + "loss": 2.2271, + "step": 238195 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016367456893515026, + "loss": 2.0273, + "step": 238200 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016367314375500026, + "loss": 2.0898, + "step": 238205 + }, + { + "epoch": 0.56, + "grad_norm": 2.5625, + "learning_rate": 0.00016367171855309834, + "loss": 2.125, + "step": 238210 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.000163670293329445, + "loss": 1.9758, + "step": 238215 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016366886808404077, + "loss": 2.0345, + "step": 238220 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.00016366744281688604, + "loss": 2.1804, + "step": 238225 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016366601752798137, + "loss": 2.1439, + "step": 238230 + }, + { + "epoch": 0.56, + "grad_norm": 1.9921875, + "learning_rate": 0.00016366459221732726, + "loss": 2.0655, + "step": 238235 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016366316688492418, + "loss": 2.2466, + "step": 238240 + }, + { + "epoch": 0.56, + "grad_norm": 2.546875, + "learning_rate": 0.00016366174153077258, + "loss": 1.9916, + "step": 238245 + }, + { + "epoch": 0.56, + "grad_norm": 2.421875, + "learning_rate": 0.000163660316154873, + "loss": 2.0265, + "step": 238250 + }, + { + "epoch": 0.56, + "grad_norm": 1.9609375, + "learning_rate": 0.00016365889075722588, + "loss": 2.0654, + "step": 238255 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016365746533783174, + "loss": 2.0231, + "step": 238260 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016365603989669105, + "loss": 1.9046, + "step": 238265 + }, + { + "epoch": 0.56, + "grad_norm": 1.953125, + "learning_rate": 0.0001636546144338043, + "loss": 2.2835, + "step": 238270 + }, + { + "epoch": 0.56, + "grad_norm": 2.703125, + "learning_rate": 0.000163653188949172, + "loss": 2.1424, + "step": 238275 + }, + { + "epoch": 0.56, + "grad_norm": 1.984375, + "learning_rate": 0.0001636517634427946, + "loss": 2.185, + "step": 238280 + }, + { + "epoch": 0.56, + "grad_norm": 2.40625, + "learning_rate": 0.00016365033791467258, + "loss": 2.0452, + "step": 238285 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.0001636489123648065, + "loss": 2.2859, + "step": 238290 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016364748679319674, + "loss": 2.0399, + "step": 238295 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016364606119984392, + "loss": 2.2518, + "step": 238300 + }, + { + "epoch": 0.56, + "grad_norm": 1.8046875, + "learning_rate": 0.00016364463558474839, + "loss": 2.258, + "step": 238305 + }, + { + "epoch": 0.56, + "grad_norm": 1.96875, + "learning_rate": 0.0001636432099479107, + "loss": 2.0028, + "step": 238310 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016364178428933136, + "loss": 2.1904, + "step": 238315 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.0001636403586090108, + "loss": 1.9416, + "step": 238320 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016363893290694957, + "loss": 2.0533, + "step": 238325 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016363750718314813, + "loss": 2.1244, + "step": 238330 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016363608143760695, + "loss": 2.28, + "step": 238335 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016363465567032653, + "loss": 2.0627, + "step": 238340 + }, + { + "epoch": 0.56, + "grad_norm": 2.28125, + "learning_rate": 0.00016363322988130735, + "loss": 2.0756, + "step": 238345 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.0001636318040705499, + "loss": 2.1029, + "step": 238350 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.0001636303782380547, + "loss": 2.2154, + "step": 238355 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.0001636289523838222, + "loss": 2.0692, + "step": 238360 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.0001636275265078529, + "loss": 2.3228, + "step": 238365 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016362610061014724, + "loss": 2.0796, + "step": 238370 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.0001636246746907058, + "loss": 2.0486, + "step": 238375 + }, + { + "epoch": 0.56, + "grad_norm": 1.84375, + "learning_rate": 0.000163623248749529, + "loss": 2.1616, + "step": 238380 + }, + { + "epoch": 0.56, + "grad_norm": 1.9921875, + "learning_rate": 0.00016362182278661735, + "loss": 2.2491, + "step": 238385 + }, + { + "epoch": 0.56, + "grad_norm": 2.0, + "learning_rate": 0.0001636203968019713, + "loss": 2.0726, + "step": 238390 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016361897079559143, + "loss": 2.0875, + "step": 238395 + }, + { + "epoch": 0.56, + "grad_norm": 1.921875, + "learning_rate": 0.00016361754476747813, + "loss": 2.0064, + "step": 238400 + }, + { + "epoch": 0.56, + "grad_norm": 2.578125, + "learning_rate": 0.00016361611871763192, + "loss": 2.1781, + "step": 238405 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.0001636146926460533, + "loss": 2.1864, + "step": 238410 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016361326655274272, + "loss": 2.1904, + "step": 238415 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016361184043770072, + "loss": 2.2261, + "step": 238420 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016361041430092775, + "loss": 1.94, + "step": 238425 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.0001636089881424243, + "loss": 2.2233, + "step": 238430 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016360756196219091, + "loss": 2.0303, + "step": 238435 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.000163606135760228, + "loss": 2.152, + "step": 238440 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016360470953653605, + "loss": 2.167, + "step": 238445 + }, + { + "epoch": 0.56, + "grad_norm": 1.8046875, + "learning_rate": 0.00016360328329111562, + "loss": 1.9637, + "step": 238450 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016360185702396716, + "loss": 2.0937, + "step": 238455 + }, + { + "epoch": 0.56, + "grad_norm": 2.40625, + "learning_rate": 0.00016360043073509111, + "loss": 2.132, + "step": 238460 + }, + { + "epoch": 0.56, + "grad_norm": 2.640625, + "learning_rate": 0.00016359900442448801, + "loss": 2.0825, + "step": 238465 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.00016359757809215837, + "loss": 2.1178, + "step": 238470 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016359615173810262, + "loss": 2.1837, + "step": 238475 + }, + { + "epoch": 0.56, + "grad_norm": 2.40625, + "learning_rate": 0.00016359472536232127, + "loss": 1.8291, + "step": 238480 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016359329896481478, + "loss": 2.0198, + "step": 238485 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016359187254558373, + "loss": 2.0515, + "step": 238490 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016359044610462847, + "loss": 2.1529, + "step": 238495 + }, + { + "epoch": 0.56, + "grad_norm": 1.7421875, + "learning_rate": 0.00016358901964194962, + "loss": 2.1043, + "step": 238500 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016358759315754758, + "loss": 2.1454, + "step": 238505 + }, + { + "epoch": 0.56, + "grad_norm": 1.984375, + "learning_rate": 0.00016358616665142287, + "loss": 2.0564, + "step": 238510 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.000163584740123576, + "loss": 2.0383, + "step": 238515 + }, + { + "epoch": 0.56, + "grad_norm": 1.796875, + "learning_rate": 0.00016358331357400739, + "loss": 2.1467, + "step": 238520 + }, + { + "epoch": 0.56, + "grad_norm": 2.5625, + "learning_rate": 0.00016358188700271758, + "loss": 2.0941, + "step": 238525 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016358046040970705, + "loss": 2.1416, + "step": 238530 + }, + { + "epoch": 0.56, + "grad_norm": 1.8984375, + "learning_rate": 0.00016357903379497624, + "loss": 2.2068, + "step": 238535 + }, + { + "epoch": 0.56, + "grad_norm": 2.65625, + "learning_rate": 0.00016357760715852575, + "loss": 2.1271, + "step": 238540 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016357618050035595, + "loss": 2.2206, + "step": 238545 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016357475382046738, + "loss": 2.139, + "step": 238550 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016357332711886056, + "loss": 2.2122, + "step": 238555 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016357190039553588, + "loss": 2.0627, + "step": 238560 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016357047365049392, + "loss": 2.0626, + "step": 238565 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.0001635690468837351, + "loss": 2.1454, + "step": 238570 + }, + { + "epoch": 0.56, + "grad_norm": 2.9375, + "learning_rate": 0.00016356762009525996, + "loss": 2.1829, + "step": 238575 + }, + { + "epoch": 0.56, + "grad_norm": 1.9921875, + "learning_rate": 0.000163566193285069, + "loss": 2.067, + "step": 238580 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016356476645316262, + "loss": 2.0457, + "step": 238585 + }, + { + "epoch": 0.56, + "grad_norm": 2.625, + "learning_rate": 0.00016356333959954142, + "loss": 2.0139, + "step": 238590 + }, + { + "epoch": 0.56, + "grad_norm": 1.9296875, + "learning_rate": 0.00016356191272420581, + "loss": 2.0257, + "step": 238595 + }, + { + "epoch": 0.56, + "grad_norm": 1.8125, + "learning_rate": 0.00016356048582715627, + "loss": 1.9356, + "step": 238600 + }, + { + "epoch": 0.56, + "grad_norm": 1.984375, + "learning_rate": 0.00016355905890839332, + "loss": 2.1168, + "step": 238605 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016355763196791747, + "loss": 1.9705, + "step": 238610 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.0001635562050057292, + "loss": 2.049, + "step": 238615 + }, + { + "epoch": 0.56, + "grad_norm": 2.421875, + "learning_rate": 0.00016355477802182893, + "loss": 2.1427, + "step": 238620 + }, + { + "epoch": 0.56, + "grad_norm": 1.734375, + "learning_rate": 0.0001635533510162172, + "loss": 2.0321, + "step": 238625 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.0001635519239888945, + "loss": 1.9431, + "step": 238630 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.0001635504969398613, + "loss": 2.156, + "step": 238635 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016354906986911814, + "loss": 1.8881, + "step": 238640 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016354764277666544, + "loss": 2.034, + "step": 238645 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.0001635462156625037, + "loss": 1.9955, + "step": 238650 + }, + { + "epoch": 0.56, + "grad_norm": 1.53125, + "learning_rate": 0.00016354478852663342, + "loss": 1.9286, + "step": 238655 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.0001635433613690551, + "loss": 1.9269, + "step": 238660 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.00016354193418976923, + "loss": 2.0769, + "step": 238665 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016354050698877626, + "loss": 2.0954, + "step": 238670 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.0001635390797660767, + "loss": 2.1032, + "step": 238675 + }, + { + "epoch": 0.56, + "grad_norm": 1.8828125, + "learning_rate": 0.00016353765252167103, + "loss": 1.9193, + "step": 238680 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016353622525555977, + "loss": 2.1083, + "step": 238685 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016353479796774338, + "loss": 1.9477, + "step": 238690 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016353337065822234, + "loss": 2.0656, + "step": 238695 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016353194332699715, + "loss": 1.9707, + "step": 238700 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016353051597406827, + "loss": 1.9857, + "step": 238705 + }, + { + "epoch": 0.56, + "grad_norm": 2.421875, + "learning_rate": 0.00016352908859943629, + "loss": 2.2545, + "step": 238710 + }, + { + "epoch": 0.56, + "grad_norm": 1.890625, + "learning_rate": 0.00016352766120310156, + "loss": 1.9626, + "step": 238715 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016352623378506462, + "loss": 2.1021, + "step": 238720 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016352480634532597, + "loss": 2.0535, + "step": 238725 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016352337888388612, + "loss": 2.2038, + "step": 238730 + }, + { + "epoch": 0.56, + "grad_norm": 1.96875, + "learning_rate": 0.00016352195140074553, + "loss": 2.2423, + "step": 238735 + }, + { + "epoch": 0.56, + "grad_norm": 2.515625, + "learning_rate": 0.0001635205238959047, + "loss": 2.017, + "step": 238740 + }, + { + "epoch": 0.56, + "grad_norm": 1.828125, + "learning_rate": 0.00016351909636936409, + "loss": 1.8668, + "step": 238745 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016351766882112423, + "loss": 2.1624, + "step": 238750 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.00016351624125118553, + "loss": 2.1337, + "step": 238755 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016351481365954856, + "loss": 2.2479, + "step": 238760 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.00016351338604621377, + "loss": 2.1048, + "step": 238765 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016351195841118166, + "loss": 2.245, + "step": 238770 + }, + { + "epoch": 0.56, + "grad_norm": 2.6875, + "learning_rate": 0.00016351053075445271, + "loss": 2.059, + "step": 238775 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016350910307602742, + "loss": 2.1981, + "step": 238780 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.0001635076753759063, + "loss": 1.9796, + "step": 238785 + }, + { + "epoch": 0.56, + "grad_norm": 1.9765625, + "learning_rate": 0.00016350624765408973, + "loss": 1.9187, + "step": 238790 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016350481991057834, + "loss": 2.1376, + "step": 238795 + }, + { + "epoch": 0.56, + "grad_norm": 2.40625, + "learning_rate": 0.0001635033921453725, + "loss": 2.2581, + "step": 238800 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.00016350196435847278, + "loss": 2.2848, + "step": 238805 + }, + { + "epoch": 0.56, + "grad_norm": 1.75, + "learning_rate": 0.00016350053654987963, + "loss": 2.0037, + "step": 238810 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016349910871959352, + "loss": 2.1735, + "step": 238815 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.000163497680867615, + "loss": 2.0147, + "step": 238820 + }, + { + "epoch": 0.56, + "grad_norm": 1.9921875, + "learning_rate": 0.00016349625299394451, + "loss": 2.0463, + "step": 238825 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016349482509858256, + "loss": 2.0478, + "step": 238830 + }, + { + "epoch": 0.56, + "grad_norm": 1.8515625, + "learning_rate": 0.00016349339718152962, + "loss": 2.1505, + "step": 238835 + }, + { + "epoch": 0.56, + "grad_norm": 1.8984375, + "learning_rate": 0.0001634919692427862, + "loss": 1.9574, + "step": 238840 + }, + { + "epoch": 0.56, + "grad_norm": 2.3125, + "learning_rate": 0.00016349054128235273, + "loss": 2.0515, + "step": 238845 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.00016348911330022975, + "loss": 2.1004, + "step": 238850 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016348768529641776, + "loss": 2.0757, + "step": 238855 + }, + { + "epoch": 0.56, + "grad_norm": 1.8359375, + "learning_rate": 0.00016348625727091723, + "loss": 2.138, + "step": 238860 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016348482922372862, + "loss": 1.9693, + "step": 238865 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016348340115485244, + "loss": 2.0505, + "step": 238870 + }, + { + "epoch": 0.56, + "grad_norm": 2.59375, + "learning_rate": 0.00016348197306428918, + "loss": 2.0635, + "step": 238875 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016348054495203936, + "loss": 2.0754, + "step": 238880 + }, + { + "epoch": 0.56, + "grad_norm": 1.671875, + "learning_rate": 0.0001634791168181034, + "loss": 2.1979, + "step": 238885 + }, + { + "epoch": 0.56, + "grad_norm": 1.9453125, + "learning_rate": 0.0001634776886624818, + "loss": 2.026, + "step": 238890 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016347626048517511, + "loss": 2.1005, + "step": 238895 + }, + { + "epoch": 0.56, + "grad_norm": 1.9921875, + "learning_rate": 0.00016347483228618378, + "loss": 2.1473, + "step": 238900 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.0001634734040655083, + "loss": 1.9545, + "step": 238905 + }, + { + "epoch": 0.56, + "grad_norm": 1.734375, + "learning_rate": 0.0001634719758231491, + "loss": 2.0797, + "step": 238910 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016347054755910679, + "loss": 2.1178, + "step": 238915 + }, + { + "epoch": 0.56, + "grad_norm": 2.40625, + "learning_rate": 0.00016346911927338174, + "loss": 2.1578, + "step": 238920 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016346769096597453, + "loss": 1.9078, + "step": 238925 + }, + { + "epoch": 0.56, + "grad_norm": 2.546875, + "learning_rate": 0.00016346626263688557, + "loss": 2.1165, + "step": 238930 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016346483428611542, + "loss": 1.9991, + "step": 238935 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.0001634634059136645, + "loss": 2.1092, + "step": 238940 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.00016346197751953334, + "loss": 2.2007, + "step": 238945 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016346054910372238, + "loss": 2.0365, + "step": 238950 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016345912066623218, + "loss": 2.3508, + "step": 238955 + }, + { + "epoch": 0.56, + "grad_norm": 1.9921875, + "learning_rate": 0.00016345769220706322, + "loss": 2.037, + "step": 238960 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016345626372621594, + "loss": 2.1589, + "step": 238965 + }, + { + "epoch": 0.56, + "grad_norm": 2.5, + "learning_rate": 0.00016345483522369085, + "loss": 2.087, + "step": 238970 + }, + { + "epoch": 0.56, + "grad_norm": 2.59375, + "learning_rate": 0.00016345340669948844, + "loss": 2.1327, + "step": 238975 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016345197815360917, + "loss": 2.18, + "step": 238980 + }, + { + "epoch": 0.56, + "grad_norm": 1.890625, + "learning_rate": 0.00016345054958605358, + "loss": 2.4007, + "step": 238985 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016344912099682213, + "loss": 1.9364, + "step": 238990 + }, + { + "epoch": 0.56, + "grad_norm": 1.859375, + "learning_rate": 0.0001634476923859153, + "loss": 2.0408, + "step": 238995 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.00016344626375333358, + "loss": 2.1486, + "step": 239000 + }, + { + "epoch": 0.56, + "grad_norm": 2.484375, + "learning_rate": 0.00016344483509907746, + "loss": 2.0172, + "step": 239005 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016344340642314748, + "loss": 2.0362, + "step": 239010 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016344197772554404, + "loss": 2.1235, + "step": 239015 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.0001634405490062677, + "loss": 2.1086, + "step": 239020 + }, + { + "epoch": 0.56, + "grad_norm": 2.3125, + "learning_rate": 0.0001634391202653189, + "loss": 2.2, + "step": 239025 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016343769150269815, + "loss": 1.9334, + "step": 239030 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016343626271840593, + "loss": 1.879, + "step": 239035 + }, + { + "epoch": 0.56, + "grad_norm": 2.53125, + "learning_rate": 0.00016343483391244271, + "loss": 2.1661, + "step": 239040 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016343340508480903, + "loss": 2.0944, + "step": 239045 + }, + { + "epoch": 0.56, + "grad_norm": 2.59375, + "learning_rate": 0.00016343197623550535, + "loss": 2.2248, + "step": 239050 + }, + { + "epoch": 0.56, + "grad_norm": 2.40625, + "learning_rate": 0.00016343054736453214, + "loss": 2.2037, + "step": 239055 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016342911847188991, + "loss": 2.166, + "step": 239060 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016342768955757915, + "loss": 2.164, + "step": 239065 + }, + { + "epoch": 0.56, + "grad_norm": 1.9765625, + "learning_rate": 0.00016342626062160033, + "loss": 2.0886, + "step": 239070 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016342483166395397, + "loss": 2.0798, + "step": 239075 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.0001634234026846405, + "loss": 2.0294, + "step": 239080 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016342197368366047, + "loss": 2.0206, + "step": 239085 + }, + { + "epoch": 0.56, + "grad_norm": 1.8515625, + "learning_rate": 0.00016342054466101435, + "loss": 1.7766, + "step": 239090 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016341911561670261, + "loss": 2.0776, + "step": 239095 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016341768655072576, + "loss": 2.1722, + "step": 239100 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.0001634162574630843, + "loss": 2.1524, + "step": 239105 + }, + { + "epoch": 0.56, + "grad_norm": 1.9609375, + "learning_rate": 0.00016341482835377863, + "loss": 2.091, + "step": 239110 + }, + { + "epoch": 0.56, + "grad_norm": 1.8671875, + "learning_rate": 0.00016341339922280933, + "loss": 2.0762, + "step": 239115 + }, + { + "epoch": 0.56, + "grad_norm": 2.3125, + "learning_rate": 0.00016341197007017688, + "loss": 2.3187, + "step": 239120 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016341054089588178, + "loss": 2.0121, + "step": 239125 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016340911169992442, + "loss": 2.0865, + "step": 239130 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016340768248230541, + "loss": 1.9518, + "step": 239135 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016340625324302516, + "loss": 2.1509, + "step": 239140 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.0001634048239820842, + "loss": 2.0493, + "step": 239145 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.000163403394699483, + "loss": 2.0189, + "step": 239150 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016340196539522205, + "loss": 1.9825, + "step": 239155 + }, + { + "epoch": 0.56, + "grad_norm": 2.59375, + "learning_rate": 0.00016340053606930184, + "loss": 2.1107, + "step": 239160 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016339910672172282, + "loss": 2.0644, + "step": 239165 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016339767735248554, + "loss": 2.1008, + "step": 239170 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016339624796159048, + "loss": 2.1276, + "step": 239175 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016339481854903812, + "loss": 2.143, + "step": 239180 + }, + { + "epoch": 0.56, + "grad_norm": 2.640625, + "learning_rate": 0.00016339338911482894, + "loss": 1.9707, + "step": 239185 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.0001633919596589634, + "loss": 2.1675, + "step": 239190 + }, + { + "epoch": 0.56, + "grad_norm": 2.421875, + "learning_rate": 0.000163390530181442, + "loss": 2.1525, + "step": 239195 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.0001633891006822653, + "loss": 2.263, + "step": 239200 + }, + { + "epoch": 0.56, + "grad_norm": 1.9375, + "learning_rate": 0.0001633876711614337, + "loss": 2.2406, + "step": 239205 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016338624161894773, + "loss": 2.0247, + "step": 239210 + }, + { + "epoch": 0.56, + "grad_norm": 2.53125, + "learning_rate": 0.00016338481205480787, + "loss": 2.1664, + "step": 239215 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.0001633833824690146, + "loss": 2.0554, + "step": 239220 + }, + { + "epoch": 0.56, + "grad_norm": 1.9453125, + "learning_rate": 0.00016338195286156842, + "loss": 2.0665, + "step": 239225 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016338052323246983, + "loss": 2.2443, + "step": 239230 + }, + { + "epoch": 0.56, + "grad_norm": 1.8359375, + "learning_rate": 0.0001633790935817193, + "loss": 2.1111, + "step": 239235 + }, + { + "epoch": 0.56, + "grad_norm": 2.421875, + "learning_rate": 0.0001633776639093173, + "loss": 2.1051, + "step": 239240 + }, + { + "epoch": 0.56, + "grad_norm": 2.421875, + "learning_rate": 0.00016337623421526436, + "loss": 2.1641, + "step": 239245 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.00016337480449956092, + "loss": 2.0968, + "step": 239250 + }, + { + "epoch": 0.56, + "grad_norm": 1.96875, + "learning_rate": 0.00016337337476220754, + "loss": 2.0805, + "step": 239255 + }, + { + "epoch": 0.56, + "grad_norm": 2.484375, + "learning_rate": 0.00016337194500320464, + "loss": 2.0613, + "step": 239260 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016337051522255275, + "loss": 2.017, + "step": 239265 + }, + { + "epoch": 0.56, + "grad_norm": 1.8984375, + "learning_rate": 0.00016336908542025232, + "loss": 2.0423, + "step": 239270 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.0001633676555963039, + "loss": 2.0425, + "step": 239275 + }, + { + "epoch": 0.56, + "grad_norm": 1.890625, + "learning_rate": 0.0001633662257507079, + "loss": 2.0032, + "step": 239280 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.0001633647958834649, + "loss": 2.2365, + "step": 239285 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016336336599457528, + "loss": 2.0099, + "step": 239290 + }, + { + "epoch": 0.56, + "grad_norm": 1.9921875, + "learning_rate": 0.00016336193608403957, + "loss": 2.2175, + "step": 239295 + }, + { + "epoch": 0.56, + "grad_norm": 2.796875, + "learning_rate": 0.00016336050615185834, + "loss": 2.0747, + "step": 239300 + }, + { + "epoch": 0.56, + "grad_norm": 1.7890625, + "learning_rate": 0.00016335907619803198, + "loss": 2.1027, + "step": 239305 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.000163357646222561, + "loss": 2.1521, + "step": 239310 + }, + { + "epoch": 0.56, + "grad_norm": 1.9765625, + "learning_rate": 0.0001633562162254459, + "loss": 2.1634, + "step": 239315 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016335478620668723, + "loss": 2.0578, + "step": 239320 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016335335616628533, + "loss": 1.9706, + "step": 239325 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016335192610424082, + "loss": 2.2783, + "step": 239330 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016335049602055413, + "loss": 2.1295, + "step": 239335 + }, + { + "epoch": 0.56, + "grad_norm": 1.890625, + "learning_rate": 0.00016334906591522578, + "loss": 2.1578, + "step": 239340 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016334763578825624, + "loss": 2.2323, + "step": 239345 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016334620563964594, + "loss": 2.1836, + "step": 239350 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016334477546939548, + "loss": 2.0804, + "step": 239355 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016334334527750527, + "loss": 2.2006, + "step": 239360 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016334191506397585, + "loss": 2.2051, + "step": 239365 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016334048482880768, + "loss": 2.0319, + "step": 239370 + }, + { + "epoch": 0.56, + "grad_norm": 2.640625, + "learning_rate": 0.00016333905457200122, + "loss": 2.1246, + "step": 239375 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016333762429355703, + "loss": 2.108, + "step": 239380 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016333619399347556, + "loss": 2.1462, + "step": 239385 + }, + { + "epoch": 0.56, + "grad_norm": 2.453125, + "learning_rate": 0.00016333476367175727, + "loss": 2.1408, + "step": 239390 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.0001633333333284027, + "loss": 1.9505, + "step": 239395 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.0001633319029634123, + "loss": 1.968, + "step": 239400 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016333047257678656, + "loss": 2.1882, + "step": 239405 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.000163329042168526, + "loss": 2.1446, + "step": 239410 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016332761173863108, + "loss": 2.0234, + "step": 239415 + }, + { + "epoch": 0.56, + "grad_norm": 2.59375, + "learning_rate": 0.00016332618128710234, + "loss": 1.9732, + "step": 239420 + }, + { + "epoch": 0.56, + "grad_norm": 2.859375, + "learning_rate": 0.0001633247508139402, + "loss": 2.0175, + "step": 239425 + }, + { + "epoch": 0.56, + "grad_norm": 1.8203125, + "learning_rate": 0.00016332332031914515, + "loss": 2.0207, + "step": 239430 + }, + { + "epoch": 0.56, + "grad_norm": 2.671875, + "learning_rate": 0.00016332188980271772, + "loss": 2.1324, + "step": 239435 + }, + { + "epoch": 0.56, + "grad_norm": 2.53125, + "learning_rate": 0.0001633204592646584, + "loss": 2.118, + "step": 239440 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.00016331902870496764, + "loss": 2.0787, + "step": 239445 + }, + { + "epoch": 0.56, + "grad_norm": 2.53125, + "learning_rate": 0.00016331759812364597, + "loss": 2.1308, + "step": 239450 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016331616752069384, + "loss": 2.1132, + "step": 239455 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016331473689611177, + "loss": 2.1263, + "step": 239460 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016331330624990028, + "loss": 2.0933, + "step": 239465 + }, + { + "epoch": 0.56, + "grad_norm": 1.9921875, + "learning_rate": 0.0001633118755820598, + "loss": 2.2494, + "step": 239470 + }, + { + "epoch": 0.56, + "grad_norm": 2.421875, + "learning_rate": 0.0001633104448925908, + "loss": 2.0077, + "step": 239475 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.00016330901418149383, + "loss": 1.9722, + "step": 239480 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016330758344876932, + "loss": 2.1803, + "step": 239485 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.0001633061526944178, + "loss": 2.1633, + "step": 239490 + }, + { + "epoch": 0.56, + "grad_norm": 2.15625, + "learning_rate": 0.00016330472191843976, + "loss": 2.0772, + "step": 239495 + }, + { + "epoch": 0.56, + "grad_norm": 1.8515625, + "learning_rate": 0.0001633032911208357, + "loss": 2.133, + "step": 239500 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016330186030160605, + "loss": 2.201, + "step": 239505 + }, + { + "epoch": 0.56, + "grad_norm": 1.5625, + "learning_rate": 0.00016330042946075137, + "loss": 1.9784, + "step": 239510 + }, + { + "epoch": 0.56, + "grad_norm": 3.078125, + "learning_rate": 0.00016329899859827206, + "loss": 2.1376, + "step": 239515 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016329756771416872, + "loss": 2.1766, + "step": 239520 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016329613680844178, + "loss": 2.0841, + "step": 239525 + }, + { + "epoch": 0.56, + "grad_norm": 2.484375, + "learning_rate": 0.00016329470588109172, + "loss": 2.1042, + "step": 239530 + }, + { + "epoch": 0.56, + "grad_norm": 1.9609375, + "learning_rate": 0.00016329327493211905, + "loss": 2.1163, + "step": 239535 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.0001632918439615242, + "loss": 1.9572, + "step": 239540 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016329041296930775, + "loss": 2.0014, + "step": 239545 + }, + { + "epoch": 0.56, + "grad_norm": 1.9140625, + "learning_rate": 0.00016328898195547016, + "loss": 2.0719, + "step": 239550 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.0001632875509200119, + "loss": 1.9353, + "step": 239555 + }, + { + "epoch": 0.56, + "grad_norm": 1.9765625, + "learning_rate": 0.00016328611986293344, + "loss": 2.0705, + "step": 239560 + }, + { + "epoch": 0.56, + "grad_norm": 2.453125, + "learning_rate": 0.00016328468878423533, + "loss": 2.2064, + "step": 239565 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.000163283257683918, + "loss": 1.994, + "step": 239570 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016328182656198196, + "loss": 2.0983, + "step": 239575 + }, + { + "epoch": 0.56, + "grad_norm": 1.9375, + "learning_rate": 0.0001632803954184277, + "loss": 2.235, + "step": 239580 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.00016327896425325574, + "loss": 2.1964, + "step": 239585 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016327753306646646, + "loss": 2.0437, + "step": 239590 + }, + { + "epoch": 0.56, + "grad_norm": 1.8984375, + "learning_rate": 0.0001632761018580605, + "loss": 1.9947, + "step": 239595 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016327467062803825, + "loss": 2.0593, + "step": 239600 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016327323937640023, + "loss": 2.0482, + "step": 239605 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016327180810314691, + "loss": 1.9935, + "step": 239610 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016327037680827883, + "loss": 2.0424, + "step": 239615 + }, + { + "epoch": 0.56, + "grad_norm": 1.7890625, + "learning_rate": 0.0001632689454917964, + "loss": 2.1087, + "step": 239620 + }, + { + "epoch": 0.56, + "grad_norm": 2.25, + "learning_rate": 0.00016326751415370017, + "loss": 2.19, + "step": 239625 + }, + { + "epoch": 0.56, + "grad_norm": 2.3125, + "learning_rate": 0.0001632660827939906, + "loss": 2.1051, + "step": 239630 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.0001632646514126682, + "loss": 2.1092, + "step": 239635 + }, + { + "epoch": 0.56, + "grad_norm": 2.46875, + "learning_rate": 0.00016326322000973347, + "loss": 2.0704, + "step": 239640 + }, + { + "epoch": 0.56, + "grad_norm": 2.421875, + "learning_rate": 0.0001632617885851868, + "loss": 2.0719, + "step": 239645 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016326035713902886, + "loss": 2.2074, + "step": 239650 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016325892567125994, + "loss": 2.1641, + "step": 239655 + }, + { + "epoch": 0.56, + "grad_norm": 1.96875, + "learning_rate": 0.0001632574941818807, + "loss": 2.0342, + "step": 239660 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.0001632560626708915, + "loss": 2.0717, + "step": 239665 + }, + { + "epoch": 0.56, + "grad_norm": 1.9140625, + "learning_rate": 0.00016325463113829288, + "loss": 2.2294, + "step": 239670 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.00016325319958408535, + "loss": 2.005, + "step": 239675 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.0001632517680082694, + "loss": 2.0495, + "step": 239680 + }, + { + "epoch": 0.56, + "grad_norm": 2.6875, + "learning_rate": 0.00016325033641084548, + "loss": 2.0714, + "step": 239685 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016324890479181412, + "loss": 2.0304, + "step": 239690 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.00016324747315117576, + "loss": 1.9859, + "step": 239695 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.0001632460414889309, + "loss": 1.9387, + "step": 239700 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016324460980508007, + "loss": 2.1401, + "step": 239705 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016324317809962374, + "loss": 2.0068, + "step": 239710 + }, + { + "epoch": 0.56, + "grad_norm": 1.9765625, + "learning_rate": 0.00016324174637256236, + "loss": 2.2251, + "step": 239715 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.0001632403146238965, + "loss": 2.0364, + "step": 239720 + }, + { + "epoch": 0.56, + "grad_norm": 1.8359375, + "learning_rate": 0.00016323888285362656, + "loss": 2.234, + "step": 239725 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.0001632374510617531, + "loss": 1.9669, + "step": 239730 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016323601924827656, + "loss": 2.1464, + "step": 239735 + }, + { + "epoch": 0.56, + "grad_norm": 2.34375, + "learning_rate": 0.00016323458741319746, + "loss": 1.9105, + "step": 239740 + }, + { + "epoch": 0.56, + "grad_norm": 1.859375, + "learning_rate": 0.00016323315555651628, + "loss": 2.0199, + "step": 239745 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.0001632317236782335, + "loss": 1.9066, + "step": 239750 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.0001632302917783496, + "loss": 2.2293, + "step": 239755 + }, + { + "epoch": 0.56, + "grad_norm": 2.4375, + "learning_rate": 0.00016322885985686512, + "loss": 1.8936, + "step": 239760 + }, + { + "epoch": 0.56, + "grad_norm": 1.890625, + "learning_rate": 0.0001632274279137805, + "loss": 1.9097, + "step": 239765 + }, + { + "epoch": 0.56, + "grad_norm": 2.71875, + "learning_rate": 0.00016322599594909625, + "loss": 2.1727, + "step": 239770 + }, + { + "epoch": 0.56, + "grad_norm": 1.9453125, + "learning_rate": 0.00016322456396281285, + "loss": 2.2617, + "step": 239775 + }, + { + "epoch": 0.56, + "grad_norm": 1.90625, + "learning_rate": 0.00016322313195493078, + "loss": 1.9894, + "step": 239780 + }, + { + "epoch": 0.56, + "grad_norm": 1.7734375, + "learning_rate": 0.00016322169992545057, + "loss": 2.1105, + "step": 239785 + }, + { + "epoch": 0.56, + "grad_norm": 1.984375, + "learning_rate": 0.00016322026787437264, + "loss": 2.116, + "step": 239790 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016321883580169754, + "loss": 2.1512, + "step": 239795 + }, + { + "epoch": 0.56, + "grad_norm": 1.7578125, + "learning_rate": 0.00016321740370742576, + "loss": 2.0025, + "step": 239800 + }, + { + "epoch": 0.56, + "grad_norm": 2.046875, + "learning_rate": 0.00016321597159155773, + "loss": 2.275, + "step": 239805 + }, + { + "epoch": 0.56, + "grad_norm": 2.3125, + "learning_rate": 0.00016321453945409398, + "loss": 1.9502, + "step": 239810 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016321310729503502, + "loss": 2.4019, + "step": 239815 + }, + { + "epoch": 0.56, + "grad_norm": 2.109375, + "learning_rate": 0.0001632116751143813, + "loss": 2.0561, + "step": 239820 + }, + { + "epoch": 0.56, + "grad_norm": 1.96875, + "learning_rate": 0.00016321024291213334, + "loss": 2.1417, + "step": 239825 + }, + { + "epoch": 0.56, + "grad_norm": 1.96875, + "learning_rate": 0.0001632088106882916, + "loss": 2.0695, + "step": 239830 + }, + { + "epoch": 0.56, + "grad_norm": 2.71875, + "learning_rate": 0.00016320737844285657, + "loss": 2.2442, + "step": 239835 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.0001632059461758288, + "loss": 2.1403, + "step": 239840 + }, + { + "epoch": 0.56, + "grad_norm": 1.7890625, + "learning_rate": 0.0001632045138872087, + "loss": 2.1162, + "step": 239845 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016320308157699681, + "loss": 2.0564, + "step": 239850 + }, + { + "epoch": 0.56, + "grad_norm": 1.96875, + "learning_rate": 0.0001632016492451936, + "loss": 2.125, + "step": 239855 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.00016320021689179956, + "loss": 2.1616, + "step": 239860 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016319878451681515, + "loss": 2.2194, + "step": 239865 + }, + { + "epoch": 0.56, + "grad_norm": 2.8125, + "learning_rate": 0.0001631973521202409, + "loss": 2.2199, + "step": 239870 + }, + { + "epoch": 0.56, + "grad_norm": 1.6953125, + "learning_rate": 0.00016319591970207734, + "loss": 2.0338, + "step": 239875 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016319448726232486, + "loss": 2.2206, + "step": 239880 + }, + { + "epoch": 0.56, + "grad_norm": 2.203125, + "learning_rate": 0.000163193054800984, + "loss": 2.0828, + "step": 239885 + }, + { + "epoch": 0.56, + "grad_norm": 2.28125, + "learning_rate": 0.00016319162231805522, + "loss": 2.2058, + "step": 239890 + }, + { + "epoch": 0.56, + "grad_norm": 1.84375, + "learning_rate": 0.0001631901898135391, + "loss": 2.2438, + "step": 239895 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.00016318875728743603, + "loss": 2.0632, + "step": 239900 + }, + { + "epoch": 0.56, + "grad_norm": 2.453125, + "learning_rate": 0.00016318732473974654, + "loss": 2.3137, + "step": 239905 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016318589217047112, + "loss": 1.9481, + "step": 239910 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.00016318445957961018, + "loss": 2.1655, + "step": 239915 + }, + { + "epoch": 0.56, + "grad_norm": 2.21875, + "learning_rate": 0.00016318302696716435, + "loss": 2.0155, + "step": 239920 + }, + { + "epoch": 0.56, + "grad_norm": 2.015625, + "learning_rate": 0.00016318159433313407, + "loss": 1.8922, + "step": 239925 + }, + { + "epoch": 0.56, + "grad_norm": 2.0, + "learning_rate": 0.00016318016167751978, + "loss": 2.0108, + "step": 239930 + }, + { + "epoch": 0.56, + "grad_norm": 2.375, + "learning_rate": 0.00016317872900032204, + "loss": 2.1395, + "step": 239935 + }, + { + "epoch": 0.56, + "grad_norm": 2.3125, + "learning_rate": 0.00016317729630154127, + "loss": 2.2637, + "step": 239940 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016317586358117797, + "loss": 2.0879, + "step": 239945 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.00016317443083923268, + "loss": 2.1634, + "step": 239950 + }, + { + "epoch": 0.56, + "grad_norm": 1.96875, + "learning_rate": 0.00016317299807570585, + "loss": 2.1402, + "step": 239955 + }, + { + "epoch": 0.56, + "grad_norm": 2.078125, + "learning_rate": 0.00016317156529059798, + "loss": 2.1211, + "step": 239960 + }, + { + "epoch": 0.56, + "grad_norm": 2.0625, + "learning_rate": 0.00016317013248390953, + "loss": 2.1819, + "step": 239965 + }, + { + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 0.00016316869965564106, + "loss": 2.0208, + "step": 239970 + }, + { + "epoch": 0.56, + "grad_norm": 1.8984375, + "learning_rate": 0.00016316726680579299, + "loss": 2.223, + "step": 239975 + }, + { + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 0.00016316583393436584, + "loss": 1.9659, + "step": 239980 + }, + { + "epoch": 0.56, + "grad_norm": 2.359375, + "learning_rate": 0.0001631644010413601, + "loss": 2.0907, + "step": 239985 + }, + { + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 0.00016316296812677624, + "loss": 2.0439, + "step": 239990 + }, + { + "epoch": 0.56, + "grad_norm": 2.296875, + "learning_rate": 0.0001631615351906148, + "loss": 1.9836, + "step": 239995 + }, + { + "epoch": 0.56, + "grad_norm": 2.125, + "learning_rate": 0.00016316010223287622, + "loss": 2.0259, + "step": 240000 + }, + { + "epoch": 0.56, + "grad_norm": 2.515625, + "learning_rate": 0.000163158669253561, + "loss": 2.0979, + "step": 240005 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.00016315723625266962, + "loss": 2.2108, + "step": 240010 + }, + { + "epoch": 0.56, + "grad_norm": 1.7890625, + "learning_rate": 0.0001631558032302026, + "loss": 2.0766, + "step": 240015 + }, + { + "epoch": 0.56, + "grad_norm": 2.234375, + "learning_rate": 0.0001631543701861604, + "loss": 2.3185, + "step": 240020 + }, + { + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 0.0001631529371205435, + "loss": 1.902, + "step": 240025 + }, + { + "epoch": 0.56, + "grad_norm": 1.953125, + "learning_rate": 0.00016315150403335243, + "loss": 2.1473, + "step": 240030 + }, + { + "epoch": 0.56, + "grad_norm": 2.09375, + "learning_rate": 0.0001631500709245877, + "loss": 1.9616, + "step": 240035 + }, + { + "epoch": 0.56, + "grad_norm": 2.5, + "learning_rate": 0.0001631486377942497, + "loss": 2.1448, + "step": 240040 + }, + { + "epoch": 0.56, + "grad_norm": 1.8984375, + "learning_rate": 0.000163147204642339, + "loss": 1.8804, + "step": 240045 + }, + { + "epoch": 0.56, + "grad_norm": 1.84375, + "learning_rate": 0.0001631457714688561, + "loss": 2.1275, + "step": 240050 + }, + { + "epoch": 0.56, + "grad_norm": 2.265625, + "learning_rate": 0.00016314433827380143, + "loss": 2.0522, + "step": 240055 + }, + { + "epoch": 0.56, + "grad_norm": 1.8359375, + "learning_rate": 0.00016314290505717554, + "loss": 2.124, + "step": 240060 + }, + { + "epoch": 0.56, + "grad_norm": 2.71875, + "learning_rate": 0.00016314147181897885, + "loss": 1.9543, + "step": 240065 + }, + { + "epoch": 0.56, + "grad_norm": 2.40625, + "learning_rate": 0.0001631400385592119, + "loss": 2.1887, + "step": 240070 + }, + { + "epoch": 0.56, + "grad_norm": 2.28125, + "learning_rate": 0.00016313860527787517, + "loss": 2.2244, + "step": 240075 + }, + { + "epoch": 0.56, + "grad_norm": 2.140625, + "learning_rate": 0.00016313717197496917, + "loss": 2.0636, + "step": 240080 + }, + { + "epoch": 0.56, + "grad_norm": 2.390625, + "learning_rate": 0.00016313573865049433, + "loss": 2.1254, + "step": 240085 + }, + { + "epoch": 0.57, + "grad_norm": 1.8046875, + "learning_rate": 0.0001631343053044512, + "loss": 2.0815, + "step": 240090 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016313287193684024, + "loss": 2.0405, + "step": 240095 + }, + { + "epoch": 0.57, + "grad_norm": 2.609375, + "learning_rate": 0.00016313143854766192, + "loss": 2.1361, + "step": 240100 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016313000513691682, + "loss": 2.1539, + "step": 240105 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.0001631285717046053, + "loss": 2.2306, + "step": 240110 + }, + { + "epoch": 0.57, + "grad_norm": 2.671875, + "learning_rate": 0.00016312713825072795, + "loss": 2.0958, + "step": 240115 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.00016312570477528522, + "loss": 1.9962, + "step": 240120 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.0001631242712782776, + "loss": 2.0915, + "step": 240125 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016312283775970555, + "loss": 2.1307, + "step": 240130 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016312140421956964, + "loss": 2.2107, + "step": 240135 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.0001631199706578703, + "loss": 2.1942, + "step": 240140 + }, + { + "epoch": 0.57, + "grad_norm": 2.4375, + "learning_rate": 0.00016311853707460806, + "loss": 2.0838, + "step": 240145 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016311710346978336, + "loss": 2.047, + "step": 240150 + }, + { + "epoch": 0.57, + "grad_norm": 2.421875, + "learning_rate": 0.0001631156698433967, + "loss": 1.9797, + "step": 240155 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016311423619544857, + "loss": 2.1503, + "step": 240160 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.0001631128025259395, + "loss": 2.1654, + "step": 240165 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016311136883486994, + "loss": 2.1302, + "step": 240170 + }, + { + "epoch": 0.57, + "grad_norm": 2.4375, + "learning_rate": 0.00016310993512224042, + "loss": 2.1618, + "step": 240175 + }, + { + "epoch": 0.57, + "grad_norm": 2.484375, + "learning_rate": 0.00016310850138805138, + "loss": 2.0177, + "step": 240180 + }, + { + "epoch": 0.57, + "grad_norm": 3.0625, + "learning_rate": 0.0001631070676323033, + "loss": 2.1108, + "step": 240185 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016310563385499676, + "loss": 2.0429, + "step": 240190 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016310420005613216, + "loss": 2.0223, + "step": 240195 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016310276623571004, + "loss": 1.9308, + "step": 240200 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016310133239373084, + "loss": 2.0869, + "step": 240205 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016309989853019506, + "loss": 1.9232, + "step": 240210 + }, + { + "epoch": 0.57, + "grad_norm": 1.84375, + "learning_rate": 0.00016309846464510327, + "loss": 2.1588, + "step": 240215 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016309703073845588, + "loss": 2.254, + "step": 240220 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016309559681025338, + "loss": 2.1745, + "step": 240225 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.00016309416286049632, + "loss": 2.0807, + "step": 240230 + }, + { + "epoch": 0.57, + "grad_norm": 2.5625, + "learning_rate": 0.0001630927288891851, + "loss": 1.9542, + "step": 240235 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.0001630912948963203, + "loss": 1.9314, + "step": 240240 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016308986088190236, + "loss": 2.2037, + "step": 240245 + }, + { + "epoch": 0.57, + "grad_norm": 2.40625, + "learning_rate": 0.00016308842684593176, + "loss": 2.1533, + "step": 240250 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016308699278840905, + "loss": 2.0753, + "step": 240255 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016308555870933464, + "loss": 2.005, + "step": 240260 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016308412460870907, + "loss": 2.0246, + "step": 240265 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016308269048653283, + "loss": 2.0839, + "step": 240270 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.0001630812563428064, + "loss": 2.1116, + "step": 240275 + }, + { + "epoch": 0.57, + "grad_norm": 1.828125, + "learning_rate": 0.00016307982217753024, + "loss": 2.1802, + "step": 240280 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.0001630783879907049, + "loss": 2.1905, + "step": 240285 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016307695378233082, + "loss": 2.2413, + "step": 240290 + }, + { + "epoch": 0.57, + "grad_norm": 1.90625, + "learning_rate": 0.00016307551955240852, + "loss": 2.1144, + "step": 240295 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016307408530093844, + "loss": 2.0515, + "step": 240300 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016307265102792116, + "loss": 2.0556, + "step": 240305 + }, + { + "epoch": 0.57, + "grad_norm": 3.09375, + "learning_rate": 0.0001630712167333571, + "loss": 2.162, + "step": 240310 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016306978241724678, + "loss": 2.11, + "step": 240315 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016306834807959067, + "loss": 2.1737, + "step": 240320 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016306691372038924, + "loss": 1.9687, + "step": 240325 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016306547933964304, + "loss": 2.1532, + "step": 240330 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.00016306404493735251, + "loss": 2.146, + "step": 240335 + }, + { + "epoch": 0.57, + "grad_norm": 1.9453125, + "learning_rate": 0.0001630626105135182, + "loss": 2.1039, + "step": 240340 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.0001630611760681405, + "loss": 2.2102, + "step": 240345 + }, + { + "epoch": 0.57, + "grad_norm": 2.421875, + "learning_rate": 0.00016305974160122, + "loss": 2.1747, + "step": 240350 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016305830711275712, + "loss": 1.9708, + "step": 240355 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.00016305687260275242, + "loss": 1.9312, + "step": 240360 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016305543807120635, + "loss": 2.286, + "step": 240365 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016305400351811935, + "loss": 2.1764, + "step": 240370 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016305256894349197, + "loss": 2.0657, + "step": 240375 + }, + { + "epoch": 0.57, + "grad_norm": 1.8359375, + "learning_rate": 0.0001630511343473247, + "loss": 2.1541, + "step": 240380 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.00016304969972961804, + "loss": 2.0898, + "step": 240385 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.0001630482650903724, + "loss": 2.2495, + "step": 240390 + }, + { + "epoch": 0.57, + "grad_norm": 2.71875, + "learning_rate": 0.00016304683042958837, + "loss": 2.1631, + "step": 240395 + }, + { + "epoch": 0.57, + "grad_norm": 1.921875, + "learning_rate": 0.0001630453957472664, + "loss": 2.0967, + "step": 240400 + }, + { + "epoch": 0.57, + "grad_norm": 1.6875, + "learning_rate": 0.000163043961043407, + "loss": 1.9774, + "step": 240405 + }, + { + "epoch": 0.57, + "grad_norm": 1.84375, + "learning_rate": 0.0001630425263180106, + "loss": 2.2891, + "step": 240410 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016304109157107773, + "loss": 2.1295, + "step": 240415 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.0001630396568026089, + "loss": 1.9128, + "step": 240420 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.0001630382220126046, + "loss": 2.1375, + "step": 240425 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016303678720106527, + "loss": 2.1413, + "step": 240430 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.0001630353523679914, + "loss": 2.2072, + "step": 240435 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016303391751338358, + "loss": 2.0985, + "step": 240440 + }, + { + "epoch": 0.57, + "grad_norm": 3.234375, + "learning_rate": 0.0001630324826372422, + "loss": 2.1182, + "step": 240445 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.00016303104773956776, + "loss": 2.0093, + "step": 240450 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.00016302961282036078, + "loss": 2.1874, + "step": 240455 + }, + { + "epoch": 0.57, + "grad_norm": 1.828125, + "learning_rate": 0.00016302817787962174, + "loss": 2.0918, + "step": 240460 + }, + { + "epoch": 0.57, + "grad_norm": 1.90625, + "learning_rate": 0.00016302674291735118, + "loss": 2.1312, + "step": 240465 + }, + { + "epoch": 0.57, + "grad_norm": 2.390625, + "learning_rate": 0.00016302530793354947, + "loss": 2.0977, + "step": 240470 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.00016302387292821722, + "loss": 2.2606, + "step": 240475 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016302243790135485, + "loss": 2.0164, + "step": 240480 + }, + { + "epoch": 0.57, + "grad_norm": 2.5625, + "learning_rate": 0.00016302100285296288, + "loss": 2.0206, + "step": 240485 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016301956778304178, + "loss": 2.1313, + "step": 240490 + }, + { + "epoch": 0.57, + "grad_norm": 1.890625, + "learning_rate": 0.0001630181326915921, + "loss": 2.2047, + "step": 240495 + }, + { + "epoch": 0.57, + "grad_norm": 2.671875, + "learning_rate": 0.00016301669757861422, + "loss": 2.0239, + "step": 240500 + }, + { + "epoch": 0.57, + "grad_norm": 3.1875, + "learning_rate": 0.0001630152624441087, + "loss": 2.1588, + "step": 240505 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.0001630138272880761, + "loss": 2.1423, + "step": 240510 + }, + { + "epoch": 0.57, + "grad_norm": 1.8671875, + "learning_rate": 0.00016301239211051675, + "loss": 1.906, + "step": 240515 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016301095691143128, + "loss": 2.2104, + "step": 240520 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.00016300952169082006, + "loss": 1.9507, + "step": 240525 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016300808644868368, + "loss": 2.163, + "step": 240530 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016300665118502262, + "loss": 1.9272, + "step": 240535 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.0001630052158998373, + "loss": 2.1216, + "step": 240540 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.00016300378059312827, + "loss": 2.0297, + "step": 240545 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016300234526489603, + "loss": 2.1696, + "step": 240550 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.00016300090991514101, + "loss": 2.3286, + "step": 240555 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016299947454386375, + "loss": 1.9007, + "step": 240560 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.00016299803915106474, + "loss": 2.1138, + "step": 240565 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016299660373674443, + "loss": 2.1915, + "step": 240570 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016299516830090339, + "loss": 2.1563, + "step": 240575 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.000162993732843542, + "loss": 2.2295, + "step": 240580 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016299229736466085, + "loss": 2.1479, + "step": 240585 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.0001629908618642604, + "loss": 1.8041, + "step": 240590 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016298942634234106, + "loss": 2.2716, + "step": 240595 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016298799079890347, + "loss": 2.159, + "step": 240600 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016298655523394796, + "loss": 1.9839, + "step": 240605 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016298511964747515, + "loss": 1.9177, + "step": 240610 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016298368403948546, + "loss": 1.9632, + "step": 240615 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016298224840997943, + "loss": 2.0903, + "step": 240620 + }, + { + "epoch": 0.57, + "grad_norm": 1.8515625, + "learning_rate": 0.0001629808127589575, + "loss": 2.1219, + "step": 240625 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.0001629793770864202, + "loss": 2.1374, + "step": 240630 + }, + { + "epoch": 0.57, + "grad_norm": 2.390625, + "learning_rate": 0.00016297794139236797, + "loss": 2.1772, + "step": 240635 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016297650567680133, + "loss": 2.1317, + "step": 240640 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016297506993972083, + "loss": 2.1561, + "step": 240645 + }, + { + "epoch": 0.57, + "grad_norm": 1.796875, + "learning_rate": 0.00016297363418112684, + "loss": 2.1339, + "step": 240650 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016297219840101995, + "loss": 2.0196, + "step": 240655 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016297076259940064, + "loss": 2.1477, + "step": 240660 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016296932677626935, + "loss": 1.9701, + "step": 240665 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016296789093162657, + "loss": 2.1825, + "step": 240670 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016296645506547286, + "loss": 2.1227, + "step": 240675 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016296501917780862, + "loss": 2.1859, + "step": 240680 + }, + { + "epoch": 0.57, + "grad_norm": 2.4375, + "learning_rate": 0.00016296358326863445, + "loss": 2.0263, + "step": 240685 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.0001629621473379507, + "loss": 1.9677, + "step": 240690 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.000162960711385758, + "loss": 2.1708, + "step": 240695 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016295927541205675, + "loss": 2.0436, + "step": 240700 + }, + { + "epoch": 0.57, + "grad_norm": 1.7734375, + "learning_rate": 0.00016295783941684748, + "loss": 1.9738, + "step": 240705 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016295640340013064, + "loss": 2.0958, + "step": 240710 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016295496736190682, + "loss": 2.1187, + "step": 240715 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016295353130217638, + "loss": 2.2101, + "step": 240720 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.0001629520952209399, + "loss": 2.0852, + "step": 240725 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016295065911819782, + "loss": 1.9301, + "step": 240730 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016294922299395069, + "loss": 1.8519, + "step": 240735 + }, + { + "epoch": 0.57, + "grad_norm": 1.78125, + "learning_rate": 0.00016294778684819894, + "loss": 2.0118, + "step": 240740 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016294635068094307, + "loss": 2.2016, + "step": 240745 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.0001629449144921836, + "loss": 2.0126, + "step": 240750 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.00016294347828192101, + "loss": 2.0438, + "step": 240755 + }, + { + "epoch": 0.57, + "grad_norm": 1.9296875, + "learning_rate": 0.00016294204205015577, + "loss": 2.2381, + "step": 240760 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.0001629406057968884, + "loss": 2.2937, + "step": 240765 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016293916952211937, + "loss": 2.1748, + "step": 240770 + }, + { + "epoch": 0.57, + "grad_norm": 2.859375, + "learning_rate": 0.0001629377332258492, + "loss": 2.1741, + "step": 240775 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016293629690807836, + "loss": 2.1286, + "step": 240780 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016293486056880732, + "loss": 2.1866, + "step": 240785 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.0001629334242080366, + "loss": 2.1209, + "step": 240790 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016293198782576664, + "loss": 2.0907, + "step": 240795 + }, + { + "epoch": 0.57, + "grad_norm": 1.84375, + "learning_rate": 0.00016293055142199803, + "loss": 2.1485, + "step": 240800 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016292911499673118, + "loss": 2.0602, + "step": 240805 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.0001629276785499666, + "loss": 2.149, + "step": 240810 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016292624208170478, + "loss": 2.2313, + "step": 240815 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016292480559194623, + "loss": 2.0247, + "step": 240820 + }, + { + "epoch": 0.57, + "grad_norm": 1.953125, + "learning_rate": 0.00016292336908069142, + "loss": 2.069, + "step": 240825 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016292193254794084, + "loss": 2.1907, + "step": 240830 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.00016292049599369497, + "loss": 2.1331, + "step": 240835 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016291905941795437, + "loss": 2.0912, + "step": 240840 + }, + { + "epoch": 0.57, + "grad_norm": 2.6875, + "learning_rate": 0.00016291762282071942, + "loss": 2.1197, + "step": 240845 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.00016291618620199069, + "loss": 2.053, + "step": 240850 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.00016291474956176867, + "loss": 1.9875, + "step": 240855 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.0001629133129000538, + "loss": 2.1291, + "step": 240860 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.00016291187621684662, + "loss": 2.0708, + "step": 240865 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.0001629104395121476, + "loss": 1.9184, + "step": 240870 + }, + { + "epoch": 0.57, + "grad_norm": 2.421875, + "learning_rate": 0.0001629090027859572, + "loss": 1.9138, + "step": 240875 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.000162907566038276, + "loss": 2.1355, + "step": 240880 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016290612926910442, + "loss": 2.2322, + "step": 240885 + }, + { + "epoch": 0.57, + "grad_norm": 2.59375, + "learning_rate": 0.00016290469247844293, + "loss": 2.1623, + "step": 240890 + }, + { + "epoch": 0.57, + "grad_norm": 1.8671875, + "learning_rate": 0.0001629032556662921, + "loss": 2.1757, + "step": 240895 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016290181883265234, + "loss": 1.9605, + "step": 240900 + }, + { + "epoch": 0.57, + "grad_norm": 1.921875, + "learning_rate": 0.00016290038197752418, + "loss": 2.1228, + "step": 240905 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016289894510090812, + "loss": 2.2147, + "step": 240910 + }, + { + "epoch": 0.57, + "grad_norm": 2.703125, + "learning_rate": 0.00016289750820280466, + "loss": 2.2452, + "step": 240915 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016289607128321424, + "loss": 1.9044, + "step": 240920 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.0001628946343421374, + "loss": 2.0122, + "step": 240925 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.0001628931973795746, + "loss": 2.2382, + "step": 240930 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.00016289176039552635, + "loss": 2.2092, + "step": 240935 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016289032338999315, + "loss": 2.2012, + "step": 240940 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016288888636297545, + "loss": 2.0318, + "step": 240945 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016288744931447377, + "loss": 1.9581, + "step": 240950 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.0001628860122444886, + "loss": 2.105, + "step": 240955 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016288457515302045, + "loss": 2.0398, + "step": 240960 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.00016288313804006975, + "loss": 2.1553, + "step": 240965 + }, + { + "epoch": 0.57, + "grad_norm": 4.25, + "learning_rate": 0.00016288170090563705, + "loss": 1.9389, + "step": 240970 + }, + { + "epoch": 0.57, + "grad_norm": 1.9921875, + "learning_rate": 0.00016288026374972284, + "loss": 1.9875, + "step": 240975 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016287882657232755, + "loss": 2.1486, + "step": 240980 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016287738937345174, + "loss": 2.283, + "step": 240985 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016287595215309586, + "loss": 1.9538, + "step": 240990 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016287451491126044, + "loss": 2.0556, + "step": 240995 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.0001628730776479459, + "loss": 2.1689, + "step": 241000 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.0001628716403631528, + "loss": 2.223, + "step": 241005 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.0001628702030568816, + "loss": 1.8622, + "step": 241010 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.00016286876572913284, + "loss": 2.187, + "step": 241015 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016286732837990692, + "loss": 2.0677, + "step": 241020 + }, + { + "epoch": 0.57, + "grad_norm": 1.75, + "learning_rate": 0.00016286589100920443, + "loss": 2.0249, + "step": 241025 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016286445361702576, + "loss": 2.2167, + "step": 241030 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016286301620337148, + "loss": 2.0865, + "step": 241035 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016286157876824203, + "loss": 2.0493, + "step": 241040 + }, + { + "epoch": 0.57, + "grad_norm": 1.9140625, + "learning_rate": 0.00016286014131163792, + "loss": 2.0523, + "step": 241045 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.00016285870383355968, + "loss": 2.1104, + "step": 241050 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.00016285726633400778, + "loss": 2.2748, + "step": 241055 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016285582881298265, + "loss": 2.081, + "step": 241060 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016285439127048483, + "loss": 1.8653, + "step": 241065 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016285295370651484, + "loss": 2.0063, + "step": 241070 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.00016285151612107312, + "loss": 1.9783, + "step": 241075 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.0001628500785141602, + "loss": 2.0367, + "step": 241080 + }, + { + "epoch": 0.57, + "grad_norm": 2.90625, + "learning_rate": 0.00016284864088577655, + "loss": 2.2885, + "step": 241085 + }, + { + "epoch": 0.57, + "grad_norm": 1.796875, + "learning_rate": 0.00016284720323592265, + "loss": 2.0339, + "step": 241090 + }, + { + "epoch": 0.57, + "grad_norm": 1.953125, + "learning_rate": 0.000162845765564599, + "loss": 2.308, + "step": 241095 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016284432787180614, + "loss": 2.1815, + "step": 241100 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.0001628428901575445, + "loss": 2.0413, + "step": 241105 + }, + { + "epoch": 0.57, + "grad_norm": 1.84375, + "learning_rate": 0.00016284145242181458, + "loss": 2.1536, + "step": 241110 + }, + { + "epoch": 0.57, + "grad_norm": 1.8203125, + "learning_rate": 0.00016284001466461686, + "loss": 2.0941, + "step": 241115 + }, + { + "epoch": 0.57, + "grad_norm": 1.90625, + "learning_rate": 0.00016283857688595188, + "loss": 2.1361, + "step": 241120 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.0001628371390858201, + "loss": 2.0521, + "step": 241125 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.000162835701264222, + "loss": 2.0266, + "step": 241130 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016283426342115812, + "loss": 2.1408, + "step": 241135 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.00016283282555662885, + "loss": 2.0972, + "step": 241140 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.0001628313876706348, + "loss": 2.202, + "step": 241145 + }, + { + "epoch": 0.57, + "grad_norm": 2.6875, + "learning_rate": 0.00016282994976317639, + "loss": 2.1924, + "step": 241150 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.00016282851183425413, + "loss": 1.9726, + "step": 241155 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.0001628270738838685, + "loss": 2.1651, + "step": 241160 + }, + { + "epoch": 0.57, + "grad_norm": 1.9921875, + "learning_rate": 0.00016282563591202002, + "loss": 1.9986, + "step": 241165 + }, + { + "epoch": 0.57, + "grad_norm": 2.828125, + "learning_rate": 0.00016282419791870914, + "loss": 2.1805, + "step": 241170 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016282275990393643, + "loss": 2.1794, + "step": 241175 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016282132186770228, + "loss": 2.0696, + "step": 241180 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016281988381000722, + "loss": 2.0179, + "step": 241185 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.0001628184457308518, + "loss": 2.2027, + "step": 241190 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016281700763023645, + "loss": 2.1113, + "step": 241195 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016281556950816162, + "loss": 2.1552, + "step": 241200 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016281413136462786, + "loss": 2.1134, + "step": 241205 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.00016281269319963569, + "loss": 1.9964, + "step": 241210 + }, + { + "epoch": 0.57, + "grad_norm": 1.84375, + "learning_rate": 0.00016281125501318555, + "loss": 2.1368, + "step": 241215 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016280981680527793, + "loss": 2.2225, + "step": 241220 + }, + { + "epoch": 0.57, + "grad_norm": 2.75, + "learning_rate": 0.00016280837857591337, + "loss": 1.947, + "step": 241225 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.0001628069403250923, + "loss": 2.1382, + "step": 241230 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016280550205281525, + "loss": 2.0884, + "step": 241235 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016280406375908268, + "loss": 2.3719, + "step": 241240 + }, + { + "epoch": 0.57, + "grad_norm": 1.9296875, + "learning_rate": 0.00016280262544389514, + "loss": 2.123, + "step": 241245 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016280118710725305, + "loss": 2.0799, + "step": 241250 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016279974874915695, + "loss": 2.3546, + "step": 241255 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016279831036960734, + "loss": 2.1315, + "step": 241260 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.00016279687196860468, + "loss": 2.0638, + "step": 241265 + }, + { + "epoch": 0.57, + "grad_norm": 1.9609375, + "learning_rate": 0.00016279543354614945, + "loss": 2.1174, + "step": 241270 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016279399510224217, + "loss": 2.1517, + "step": 241275 + }, + { + "epoch": 0.57, + "grad_norm": 2.65625, + "learning_rate": 0.0001627925566368833, + "loss": 2.1414, + "step": 241280 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016279111815007338, + "loss": 2.1266, + "step": 241285 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.0001627896796418129, + "loss": 2.2835, + "step": 241290 + }, + { + "epoch": 0.57, + "grad_norm": 1.671875, + "learning_rate": 0.00016278824111210226, + "loss": 2.1052, + "step": 241295 + }, + { + "epoch": 0.57, + "grad_norm": 2.40625, + "learning_rate": 0.00016278680256094207, + "loss": 2.3478, + "step": 241300 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.00016278536398833275, + "loss": 2.1774, + "step": 241305 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.0001627839253942748, + "loss": 2.2355, + "step": 241310 + }, + { + "epoch": 0.57, + "grad_norm": 2.546875, + "learning_rate": 0.00016278248677876878, + "loss": 2.0726, + "step": 241315 + }, + { + "epoch": 0.57, + "grad_norm": 2.5625, + "learning_rate": 0.00016278104814181508, + "loss": 2.0544, + "step": 241320 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016277960948341425, + "loss": 2.0653, + "step": 241325 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.00016277817080356672, + "loss": 2.3017, + "step": 241330 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016277673210227307, + "loss": 1.9667, + "step": 241335 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016277529337953373, + "loss": 2.3555, + "step": 241340 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016277385463534923, + "loss": 2.1666, + "step": 241345 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016277241586972006, + "loss": 2.0503, + "step": 241350 + }, + { + "epoch": 0.57, + "grad_norm": 1.96875, + "learning_rate": 0.00016277097708264666, + "loss": 2.0628, + "step": 241355 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.0001627695382741296, + "loss": 2.0161, + "step": 241360 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016276809944416925, + "loss": 2.0833, + "step": 241365 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.00016276666059276623, + "loss": 2.0144, + "step": 241370 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016276522171992098, + "loss": 2.1488, + "step": 241375 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016276378282563397, + "loss": 2.1852, + "step": 241380 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.00016276234390990573, + "loss": 2.147, + "step": 241385 + }, + { + "epoch": 0.57, + "grad_norm": 2.46875, + "learning_rate": 0.00016276090497273672, + "loss": 2.0003, + "step": 241390 + }, + { + "epoch": 0.57, + "grad_norm": 1.859375, + "learning_rate": 0.0001627594660141275, + "loss": 2.0704, + "step": 241395 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016275802703407844, + "loss": 2.2133, + "step": 241400 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016275658803259016, + "loss": 1.9811, + "step": 241405 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016275514900966306, + "loss": 2.1415, + "step": 241410 + }, + { + "epoch": 0.57, + "grad_norm": 2.59375, + "learning_rate": 0.00016275370996529764, + "loss": 2.0747, + "step": 241415 + }, + { + "epoch": 0.57, + "grad_norm": 1.8125, + "learning_rate": 0.00016275227089949446, + "loss": 2.0319, + "step": 241420 + }, + { + "epoch": 0.57, + "grad_norm": 1.96875, + "learning_rate": 0.00016275083181225392, + "loss": 2.0862, + "step": 241425 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.0001627493927035766, + "loss": 1.8984, + "step": 241430 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016274795357346293, + "loss": 2.2727, + "step": 241435 + }, + { + "epoch": 0.57, + "grad_norm": 1.96875, + "learning_rate": 0.0001627465144219134, + "loss": 2.1844, + "step": 241440 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016274507524892856, + "loss": 2.032, + "step": 241445 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016274363605450884, + "loss": 2.2855, + "step": 241450 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016274219683865479, + "loss": 2.053, + "step": 241455 + }, + { + "epoch": 0.57, + "grad_norm": 1.8359375, + "learning_rate": 0.00016274075760136682, + "loss": 2.0278, + "step": 241460 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.0001627393183426455, + "loss": 2.174, + "step": 241465 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016273787906249127, + "loss": 2.0122, + "step": 241470 + }, + { + "epoch": 0.57, + "grad_norm": 1.78125, + "learning_rate": 0.00016273643976090466, + "loss": 2.2298, + "step": 241475 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.00016273500043788617, + "loss": 2.083, + "step": 241480 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016273356109343623, + "loss": 2.1377, + "step": 241485 + }, + { + "epoch": 0.57, + "grad_norm": 2.609375, + "learning_rate": 0.00016273212172755538, + "loss": 2.2188, + "step": 241490 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016273068234024408, + "loss": 2.126, + "step": 241495 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016272924293150284, + "loss": 2.0478, + "step": 241500 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016272780350133216, + "loss": 2.2236, + "step": 241505 + }, + { + "epoch": 0.57, + "grad_norm": 1.796875, + "learning_rate": 0.00016272636404973255, + "loss": 2.1515, + "step": 241510 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016272492457670445, + "loss": 2.0877, + "step": 241515 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.00016272348508224838, + "loss": 2.0889, + "step": 241520 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016272204556636485, + "loss": 2.2513, + "step": 241525 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016272060602905432, + "loss": 2.1555, + "step": 241530 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.0001627191664703173, + "loss": 2.2146, + "step": 241535 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016271772689015426, + "loss": 1.9563, + "step": 241540 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.0001627162872885657, + "loss": 2.1215, + "step": 241545 + }, + { + "epoch": 0.57, + "grad_norm": 1.8046875, + "learning_rate": 0.00016271484766555214, + "loss": 2.0255, + "step": 241550 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016271340802111406, + "loss": 2.0136, + "step": 241555 + }, + { + "epoch": 0.57, + "grad_norm": 1.859375, + "learning_rate": 0.00016271196835525192, + "loss": 2.2132, + "step": 241560 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016271052866796625, + "loss": 2.0631, + "step": 241565 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016270908895925753, + "loss": 2.2026, + "step": 241570 + }, + { + "epoch": 0.57, + "grad_norm": 1.8828125, + "learning_rate": 0.00016270764922912621, + "loss": 2.0804, + "step": 241575 + }, + { + "epoch": 0.57, + "grad_norm": 2.546875, + "learning_rate": 0.00016270620947757286, + "loss": 2.052, + "step": 241580 + }, + { + "epoch": 0.57, + "grad_norm": 1.96875, + "learning_rate": 0.0001627047697045979, + "loss": 2.1253, + "step": 241585 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.00016270332991020188, + "loss": 2.0399, + "step": 241590 + }, + { + "epoch": 0.57, + "grad_norm": 2.484375, + "learning_rate": 0.00016270189009438525, + "loss": 2.2214, + "step": 241595 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016270045025714854, + "loss": 2.3083, + "step": 241600 + }, + { + "epoch": 0.57, + "grad_norm": 2.484375, + "learning_rate": 0.0001626990103984922, + "loss": 2.1579, + "step": 241605 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016269757051841675, + "loss": 2.1187, + "step": 241610 + }, + { + "epoch": 0.57, + "grad_norm": 2.5, + "learning_rate": 0.00016269613061692265, + "loss": 2.003, + "step": 241615 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.00016269469069401043, + "loss": 2.239, + "step": 241620 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.00016269325074968056, + "loss": 1.9506, + "step": 241625 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016269181078393355, + "loss": 2.1357, + "step": 241630 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.00016269037079676986, + "loss": 2.2055, + "step": 241635 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016268893078819004, + "loss": 1.9302, + "step": 241640 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.0001626874907581945, + "loss": 2.0306, + "step": 241645 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016268605070678382, + "loss": 2.1421, + "step": 241650 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.0001626846106339584, + "loss": 1.9397, + "step": 241655 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.0001626831705397188, + "loss": 1.9545, + "step": 241660 + }, + { + "epoch": 0.57, + "grad_norm": 1.9609375, + "learning_rate": 0.00016268173042406555, + "loss": 1.9875, + "step": 241665 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.000162680290286999, + "loss": 2.1954, + "step": 241670 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.00016267885012851978, + "loss": 2.0472, + "step": 241675 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016267740994862828, + "loss": 1.9668, + "step": 241680 + }, + { + "epoch": 0.57, + "grad_norm": 2.390625, + "learning_rate": 0.00016267596974732507, + "loss": 1.968, + "step": 241685 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016267452952461062, + "loss": 2.1129, + "step": 241690 + }, + { + "epoch": 0.57, + "grad_norm": 1.8046875, + "learning_rate": 0.00016267308928048538, + "loss": 2.0685, + "step": 241695 + }, + { + "epoch": 0.57, + "grad_norm": 1.7578125, + "learning_rate": 0.00016267164901494994, + "loss": 2.0694, + "step": 241700 + }, + { + "epoch": 0.57, + "grad_norm": 1.8046875, + "learning_rate": 0.00016267020872800467, + "loss": 2.0771, + "step": 241705 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016266876841965013, + "loss": 2.0944, + "step": 241710 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.0001626673280898868, + "loss": 1.9881, + "step": 241715 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.00016266588773871518, + "loss": 2.1764, + "step": 241720 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016266444736613578, + "loss": 1.9756, + "step": 241725 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016266300697214905, + "loss": 2.1182, + "step": 241730 + }, + { + "epoch": 0.57, + "grad_norm": 2.953125, + "learning_rate": 0.00016266156655675548, + "loss": 1.8241, + "step": 241735 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016266012611995562, + "loss": 2.0864, + "step": 241740 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016265868566174988, + "loss": 2.1661, + "step": 241745 + }, + { + "epoch": 0.57, + "grad_norm": 2.53125, + "learning_rate": 0.00016265724518213883, + "loss": 2.064, + "step": 241750 + }, + { + "epoch": 0.57, + "grad_norm": 1.84375, + "learning_rate": 0.00016265580468112294, + "loss": 1.9569, + "step": 241755 + }, + { + "epoch": 0.57, + "grad_norm": 2.40625, + "learning_rate": 0.00016265436415870266, + "loss": 2.0789, + "step": 241760 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.0001626529236148785, + "loss": 2.1066, + "step": 241765 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.00016265148304965102, + "loss": 2.0817, + "step": 241770 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016265004246302062, + "loss": 2.1285, + "step": 241775 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016264860185498785, + "loss": 2.0725, + "step": 241780 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016264716122555317, + "loss": 2.1089, + "step": 241785 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.00016264572057471708, + "loss": 2.0697, + "step": 241790 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016264427990248006, + "loss": 2.0488, + "step": 241795 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016264283920884264, + "loss": 2.1826, + "step": 241800 + }, + { + "epoch": 0.57, + "grad_norm": 2.640625, + "learning_rate": 0.0001626413984938053, + "loss": 1.9772, + "step": 241805 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.00016263995775736852, + "loss": 2.1082, + "step": 241810 + }, + { + "epoch": 0.57, + "grad_norm": 2.6875, + "learning_rate": 0.00016263851699953275, + "loss": 1.894, + "step": 241815 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016263707622029855, + "loss": 2.1519, + "step": 241820 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.0001626356354196664, + "loss": 2.1473, + "step": 241825 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.0001626341945976368, + "loss": 2.0061, + "step": 241830 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016263275375421018, + "loss": 2.184, + "step": 241835 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016263131288938708, + "loss": 2.1166, + "step": 241840 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016262987200316804, + "loss": 1.9282, + "step": 241845 + }, + { + "epoch": 0.57, + "grad_norm": 2.46875, + "learning_rate": 0.00016262843109555344, + "loss": 2.0547, + "step": 241850 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.00016262699016654386, + "loss": 2.1323, + "step": 241855 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016262554921613978, + "loss": 2.3125, + "step": 241860 + }, + { + "epoch": 0.57, + "grad_norm": 1.9453125, + "learning_rate": 0.00016262410824434165, + "loss": 2.1359, + "step": 241865 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016262266725115, + "loss": 2.1331, + "step": 241870 + }, + { + "epoch": 0.57, + "grad_norm": 2.546875, + "learning_rate": 0.00016262122623656532, + "loss": 2.0478, + "step": 241875 + }, + { + "epoch": 0.57, + "grad_norm": 1.890625, + "learning_rate": 0.00016261978520058806, + "loss": 2.1862, + "step": 241880 + }, + { + "epoch": 0.57, + "grad_norm": 1.8828125, + "learning_rate": 0.00016261834414321876, + "loss": 2.0143, + "step": 241885 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.0001626169030644579, + "loss": 2.2747, + "step": 241890 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016261546196430596, + "loss": 1.9426, + "step": 241895 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016261402084276347, + "loss": 2.158, + "step": 241900 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016261257969983088, + "loss": 2.1513, + "step": 241905 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.0001626111385355087, + "loss": 1.984, + "step": 241910 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016260969734979743, + "loss": 2.0157, + "step": 241915 + }, + { + "epoch": 0.57, + "grad_norm": 1.8828125, + "learning_rate": 0.00016260825614269753, + "loss": 2.1611, + "step": 241920 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016260681491420953, + "loss": 2.1545, + "step": 241925 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.0001626053736643339, + "loss": 2.0908, + "step": 241930 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016260393239307113, + "loss": 2.0077, + "step": 241935 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.00016260249110042175, + "loss": 2.0985, + "step": 241940 + }, + { + "epoch": 0.57, + "grad_norm": 2.421875, + "learning_rate": 0.0001626010497863862, + "loss": 2.2619, + "step": 241945 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.000162599608450965, + "loss": 2.1337, + "step": 241950 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016259816709415862, + "loss": 2.1908, + "step": 241955 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.0001625967257159676, + "loss": 2.1741, + "step": 241960 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016259528431639242, + "loss": 2.0366, + "step": 241965 + }, + { + "epoch": 0.57, + "grad_norm": 2.40625, + "learning_rate": 0.00016259384289543354, + "loss": 1.9519, + "step": 241970 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016259240145309142, + "loss": 2.208, + "step": 241975 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016259095998936665, + "loss": 2.1157, + "step": 241980 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016258951850425967, + "loss": 2.0659, + "step": 241985 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016258807699777097, + "loss": 2.0572, + "step": 241990 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016258663546990106, + "loss": 1.8807, + "step": 241995 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016258519392065037, + "loss": 2.1837, + "step": 242000 + }, + { + "epoch": 0.57, + "grad_norm": 1.8671875, + "learning_rate": 0.00016258375235001947, + "loss": 1.9683, + "step": 242005 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016258231075800885, + "loss": 2.1674, + "step": 242010 + }, + { + "epoch": 0.57, + "grad_norm": 1.796875, + "learning_rate": 0.00016258086914461894, + "loss": 2.188, + "step": 242015 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016257942750985032, + "loss": 2.1158, + "step": 242020 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.0001625779858537034, + "loss": 2.1515, + "step": 242025 + }, + { + "epoch": 0.57, + "grad_norm": 1.96875, + "learning_rate": 0.0001625765441761787, + "loss": 2.2331, + "step": 242030 + }, + { + "epoch": 0.57, + "grad_norm": 1.9609375, + "learning_rate": 0.00016257510247727673, + "loss": 2.1572, + "step": 242035 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016257366075699795, + "loss": 2.1965, + "step": 242040 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.0001625722190153429, + "loss": 2.0738, + "step": 242045 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016257077725231206, + "loss": 2.0575, + "step": 242050 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016256933546790584, + "loss": 2.0542, + "step": 242055 + }, + { + "epoch": 0.57, + "grad_norm": 2.46875, + "learning_rate": 0.00016256789366212484, + "loss": 2.0747, + "step": 242060 + }, + { + "epoch": 0.57, + "grad_norm": 1.8125, + "learning_rate": 0.00016256645183496953, + "loss": 2.0324, + "step": 242065 + }, + { + "epoch": 0.57, + "grad_norm": 1.9296875, + "learning_rate": 0.00016256500998644037, + "loss": 2.0323, + "step": 242070 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016256356811653787, + "loss": 2.122, + "step": 242075 + }, + { + "epoch": 0.57, + "grad_norm": 1.8984375, + "learning_rate": 0.00016256212622526252, + "loss": 2.0975, + "step": 242080 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.0001625606843126148, + "loss": 2.2552, + "step": 242085 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.00016255924237859522, + "loss": 2.061, + "step": 242090 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016255780042320427, + "loss": 2.0407, + "step": 242095 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.00016255635844644246, + "loss": 2.1122, + "step": 242100 + }, + { + "epoch": 0.57, + "grad_norm": 1.96875, + "learning_rate": 0.00016255491644831028, + "loss": 1.8113, + "step": 242105 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.00016255347442880816, + "loss": 2.1069, + "step": 242110 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016255203238793665, + "loss": 2.1302, + "step": 242115 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.00016255059032569622, + "loss": 2.1615, + "step": 242120 + }, + { + "epoch": 0.57, + "grad_norm": 2.484375, + "learning_rate": 0.0001625491482420874, + "loss": 2.0137, + "step": 242125 + }, + { + "epoch": 0.57, + "grad_norm": 2.390625, + "learning_rate": 0.00016254770613711064, + "loss": 2.0983, + "step": 242130 + }, + { + "epoch": 0.57, + "grad_norm": 2.5625, + "learning_rate": 0.00016254626401076644, + "loss": 2.1165, + "step": 242135 + }, + { + "epoch": 0.57, + "grad_norm": 1.953125, + "learning_rate": 0.00016254482186305532, + "loss": 2.128, + "step": 242140 + }, + { + "epoch": 0.57, + "grad_norm": 2.53125, + "learning_rate": 0.00016254337969397775, + "loss": 2.133, + "step": 242145 + }, + { + "epoch": 0.57, + "grad_norm": 2.546875, + "learning_rate": 0.00016254193750353424, + "loss": 2.0353, + "step": 242150 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.00016254049529172528, + "loss": 1.9892, + "step": 242155 + }, + { + "epoch": 0.57, + "grad_norm": 1.890625, + "learning_rate": 0.00016253905305855132, + "loss": 2.0341, + "step": 242160 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.0001625376108040129, + "loss": 2.2581, + "step": 242165 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016253616852811046, + "loss": 1.8061, + "step": 242170 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.00016253472623084457, + "loss": 2.0126, + "step": 242175 + }, + { + "epoch": 0.57, + "grad_norm": 1.9453125, + "learning_rate": 0.0001625332839122157, + "loss": 2.0093, + "step": 242180 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.0001625318415722243, + "loss": 2.1743, + "step": 242185 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.00016253039921087088, + "loss": 2.1917, + "step": 242190 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016252895682815596, + "loss": 2.2216, + "step": 242195 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016252751442408, + "loss": 1.9263, + "step": 242200 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016252607199864353, + "loss": 1.8795, + "step": 242205 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.000162524629551847, + "loss": 2.081, + "step": 242210 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.0001625231870836909, + "loss": 2.1312, + "step": 242215 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.0001625217445941758, + "loss": 2.2016, + "step": 242220 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.00016252030208330212, + "loss": 2.0121, + "step": 242225 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.00016251885955107035, + "loss": 2.196, + "step": 242230 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.00016251741699748105, + "loss": 2.0703, + "step": 242235 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016251597442253464, + "loss": 1.9404, + "step": 242240 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.0001625145318262316, + "loss": 2.218, + "step": 242245 + }, + { + "epoch": 0.57, + "grad_norm": 1.9140625, + "learning_rate": 0.00016251308920857248, + "loss": 2.1882, + "step": 242250 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.0001625116465695578, + "loss": 2.1125, + "step": 242255 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016251020390918797, + "loss": 2.1707, + "step": 242260 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016250876122746353, + "loss": 2.1776, + "step": 242265 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016250731852438498, + "loss": 2.102, + "step": 242270 + }, + { + "epoch": 0.57, + "grad_norm": 2.90625, + "learning_rate": 0.00016250587579995276, + "loss": 2.1425, + "step": 242275 + }, + { + "epoch": 0.57, + "grad_norm": 1.9140625, + "learning_rate": 0.00016250443305416742, + "loss": 1.8357, + "step": 242280 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016250299028702943, + "loss": 1.9432, + "step": 242285 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.0001625015474985393, + "loss": 1.9897, + "step": 242290 + }, + { + "epoch": 0.57, + "grad_norm": 2.4375, + "learning_rate": 0.00016250010468869746, + "loss": 1.9298, + "step": 242295 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.00016249866185750447, + "loss": 2.1031, + "step": 242300 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.00016249721900496084, + "loss": 2.2169, + "step": 242305 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016249577613106702, + "loss": 2.2817, + "step": 242310 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.0001624943332358235, + "loss": 2.2458, + "step": 242315 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.00016249289031923075, + "loss": 2.1845, + "step": 242320 + }, + { + "epoch": 0.57, + "grad_norm": 1.9296875, + "learning_rate": 0.0001624914473812893, + "loss": 2.052, + "step": 242325 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016249000442199968, + "loss": 2.0592, + "step": 242330 + }, + { + "epoch": 0.57, + "grad_norm": 2.734375, + "learning_rate": 0.00016248856144136233, + "loss": 2.1323, + "step": 242335 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.00016248711843937775, + "loss": 2.1365, + "step": 242340 + }, + { + "epoch": 0.57, + "grad_norm": 1.859375, + "learning_rate": 0.00016248567541604642, + "loss": 1.981, + "step": 242345 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016248423237136885, + "loss": 2.2399, + "step": 242350 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.00016248278930534556, + "loss": 2.0516, + "step": 242355 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.000162481346217977, + "loss": 2.0117, + "step": 242360 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016247990310926368, + "loss": 2.2148, + "step": 242365 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.0001624784599792061, + "loss": 2.0355, + "step": 242370 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016247701682780473, + "loss": 2.0561, + "step": 242375 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016247557365506006, + "loss": 2.1555, + "step": 242380 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.00016247413046097264, + "loss": 2.1889, + "step": 242385 + }, + { + "epoch": 0.57, + "grad_norm": 1.9453125, + "learning_rate": 0.00016247268724554293, + "loss": 1.8728, + "step": 242390 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016247124400877138, + "loss": 2.1358, + "step": 242395 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.00016246980075065854, + "loss": 2.0306, + "step": 242400 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016246835747120488, + "loss": 1.928, + "step": 242405 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.0001624669141704109, + "loss": 2.081, + "step": 242410 + }, + { + "epoch": 0.57, + "grad_norm": 2.53125, + "learning_rate": 0.00016246547084827707, + "loss": 2.1761, + "step": 242415 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016246402750480393, + "loss": 2.1083, + "step": 242420 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.0001624625841399919, + "loss": 1.8799, + "step": 242425 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016246114075384158, + "loss": 2.0353, + "step": 242430 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.00016245969734635338, + "loss": 1.941, + "step": 242435 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.0001624582539175278, + "loss": 1.9624, + "step": 242440 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016245681046736534, + "loss": 2.0283, + "step": 242445 + }, + { + "epoch": 0.57, + "grad_norm": 1.8515625, + "learning_rate": 0.00016245536699586652, + "loss": 2.1343, + "step": 242450 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016245392350303182, + "loss": 2.0604, + "step": 242455 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.0001624524799888617, + "loss": 2.1539, + "step": 242460 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.0001624510364533567, + "loss": 2.0564, + "step": 242465 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016244959289651728, + "loss": 2.187, + "step": 242470 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016244814931834397, + "loss": 2.2426, + "step": 242475 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016244670571883725, + "loss": 2.2746, + "step": 242480 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016244526209799754, + "loss": 2.0842, + "step": 242485 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016244381845582546, + "loss": 1.9698, + "step": 242490 + }, + { + "epoch": 0.57, + "grad_norm": 3.921875, + "learning_rate": 0.0001624423747923214, + "loss": 2.0601, + "step": 242495 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.0001624409311074859, + "loss": 2.139, + "step": 242500 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016243948740131945, + "loss": 2.1183, + "step": 242505 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016243804367382253, + "loss": 1.9259, + "step": 242510 + }, + { + "epoch": 0.57, + "grad_norm": 1.8828125, + "learning_rate": 0.00016243659992499568, + "loss": 1.9696, + "step": 242515 + }, + { + "epoch": 0.57, + "grad_norm": 1.859375, + "learning_rate": 0.0001624351561548393, + "loss": 2.0051, + "step": 242520 + }, + { + "epoch": 0.57, + "grad_norm": 1.921875, + "learning_rate": 0.00016243371236335398, + "loss": 2.0817, + "step": 242525 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.00016243226855054016, + "loss": 2.033, + "step": 242530 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016243082471639837, + "loss": 2.2324, + "step": 242535 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016242938086092902, + "loss": 2.115, + "step": 242540 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.0001624279369841327, + "loss": 1.81, + "step": 242545 + }, + { + "epoch": 0.57, + "grad_norm": 1.921875, + "learning_rate": 0.00016242649308600987, + "loss": 2.0514, + "step": 242550 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016242504916656102, + "loss": 2.2077, + "step": 242555 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.0001624236052257866, + "loss": 2.319, + "step": 242560 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.00016242216126368715, + "loss": 2.1112, + "step": 242565 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.0001624207172802632, + "loss": 1.9317, + "step": 242570 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016241927327551516, + "loss": 1.9253, + "step": 242575 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.0001624178292494436, + "loss": 1.9451, + "step": 242580 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016241638520204897, + "loss": 2.1469, + "step": 242585 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016241494113333174, + "loss": 2.1879, + "step": 242590 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016241349704329246, + "loss": 2.1158, + "step": 242595 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.0001624120529319316, + "loss": 2.1432, + "step": 242600 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016241060879924963, + "loss": 2.2005, + "step": 242605 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016240916464524706, + "loss": 2.2013, + "step": 242610 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016240772046992443, + "loss": 2.0787, + "step": 242615 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016240627627328214, + "loss": 2.0622, + "step": 242620 + }, + { + "epoch": 0.57, + "grad_norm": 2.46875, + "learning_rate": 0.00016240483205532077, + "loss": 2.001, + "step": 242625 + }, + { + "epoch": 0.57, + "grad_norm": 2.390625, + "learning_rate": 0.0001624033878160408, + "loss": 2.1232, + "step": 242630 + }, + { + "epoch": 0.57, + "grad_norm": 2.484375, + "learning_rate": 0.00016240194355544266, + "loss": 2.0576, + "step": 242635 + }, + { + "epoch": 0.57, + "grad_norm": 1.9609375, + "learning_rate": 0.00016240049927352686, + "loss": 2.2203, + "step": 242640 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016239905497029394, + "loss": 2.157, + "step": 242645 + }, + { + "epoch": 0.57, + "grad_norm": 2.5, + "learning_rate": 0.0001623976106457444, + "loss": 2.1065, + "step": 242650 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.00016239616629987872, + "loss": 2.1397, + "step": 242655 + }, + { + "epoch": 0.57, + "grad_norm": 2.53125, + "learning_rate": 0.0001623947219326973, + "loss": 2.2181, + "step": 242660 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.00016239327754420077, + "loss": 2.0974, + "step": 242665 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016239183313438953, + "loss": 2.09, + "step": 242670 + }, + { + "epoch": 0.57, + "grad_norm": 2.4375, + "learning_rate": 0.00016239038870326415, + "loss": 2.0382, + "step": 242675 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016238894425082506, + "loss": 2.0483, + "step": 242680 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016238749977707278, + "loss": 2.0826, + "step": 242685 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.0001623860552820078, + "loss": 2.0282, + "step": 242690 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.0001623846107656306, + "loss": 2.1544, + "step": 242695 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016238316622794167, + "loss": 2.2494, + "step": 242700 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016238172166894155, + "loss": 2.1182, + "step": 242705 + }, + { + "epoch": 0.57, + "grad_norm": 2.53125, + "learning_rate": 0.0001623802770886307, + "loss": 2.1892, + "step": 242710 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.0001623788324870096, + "loss": 2.1234, + "step": 242715 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016237738786407878, + "loss": 2.1379, + "step": 242720 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.0001623759432198387, + "loss": 2.0231, + "step": 242725 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016237449855428988, + "loss": 2.1204, + "step": 242730 + }, + { + "epoch": 0.57, + "grad_norm": 1.8671875, + "learning_rate": 0.00016237305386743276, + "loss": 2.0601, + "step": 242735 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016237160915926792, + "loss": 2.0856, + "step": 242740 + }, + { + "epoch": 0.57, + "grad_norm": 2.4375, + "learning_rate": 0.00016237016442979577, + "loss": 2.0219, + "step": 242745 + }, + { + "epoch": 0.57, + "grad_norm": 1.890625, + "learning_rate": 0.00016236871967901685, + "loss": 1.9823, + "step": 242750 + }, + { + "epoch": 0.57, + "grad_norm": 1.421875, + "learning_rate": 0.00016236727490693168, + "loss": 1.9979, + "step": 242755 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.00016236583011354069, + "loss": 1.9839, + "step": 242760 + }, + { + "epoch": 0.57, + "grad_norm": 2.609375, + "learning_rate": 0.0001623643852988444, + "loss": 2.1642, + "step": 242765 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016236294046284333, + "loss": 2.01, + "step": 242770 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016236149560553792, + "loss": 2.179, + "step": 242775 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.0001623600507269287, + "loss": 2.1464, + "step": 242780 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.0001623586058270161, + "loss": 2.154, + "step": 242785 + }, + { + "epoch": 0.57, + "grad_norm": 2.734375, + "learning_rate": 0.00016235716090580077, + "loss": 2.1262, + "step": 242790 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016235571596328306, + "loss": 2.1153, + "step": 242795 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.0001623542709994635, + "loss": 2.06, + "step": 242800 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.00016235282601434258, + "loss": 2.0054, + "step": 242805 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016235138100792083, + "loss": 2.0623, + "step": 242810 + }, + { + "epoch": 0.57, + "grad_norm": 1.9609375, + "learning_rate": 0.0001623499359801987, + "loss": 2.1492, + "step": 242815 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.0001623484909311767, + "loss": 2.1875, + "step": 242820 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016234704586085534, + "loss": 2.0022, + "step": 242825 + }, + { + "epoch": 0.57, + "grad_norm": 1.8671875, + "learning_rate": 0.00016234560076923508, + "loss": 2.1736, + "step": 242830 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016234415565631645, + "loss": 2.0798, + "step": 242835 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.0001623427105220999, + "loss": 2.0267, + "step": 242840 + }, + { + "epoch": 0.57, + "grad_norm": 3.421875, + "learning_rate": 0.00016234126536658597, + "loss": 2.299, + "step": 242845 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016233982018977512, + "loss": 2.0949, + "step": 242850 + }, + { + "epoch": 0.57, + "grad_norm": 1.90625, + "learning_rate": 0.0001623383749916679, + "loss": 1.9831, + "step": 242855 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.0001623369297722647, + "loss": 2.0629, + "step": 242860 + }, + { + "epoch": 0.57, + "grad_norm": 3.296875, + "learning_rate": 0.0001623354845315661, + "loss": 2.0812, + "step": 242865 + }, + { + "epoch": 0.57, + "grad_norm": 1.8125, + "learning_rate": 0.00016233403926957257, + "loss": 2.0467, + "step": 242870 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.00016233259398628461, + "loss": 1.9761, + "step": 242875 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.0001623311486817027, + "loss": 2.3658, + "step": 242880 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.0001623297033558273, + "loss": 1.9623, + "step": 242885 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.000162328258008659, + "loss": 2.1964, + "step": 242890 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.00016232681264019823, + "loss": 2.2405, + "step": 242895 + }, + { + "epoch": 0.57, + "grad_norm": 2.671875, + "learning_rate": 0.00016232536725044547, + "loss": 1.9885, + "step": 242900 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016232392183940125, + "loss": 2.0126, + "step": 242905 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016232247640706601, + "loss": 1.9778, + "step": 242910 + }, + { + "epoch": 0.57, + "grad_norm": 2.921875, + "learning_rate": 0.00016232103095344032, + "loss": 2.0736, + "step": 242915 + }, + { + "epoch": 0.57, + "grad_norm": 1.875, + "learning_rate": 0.00016231958547852462, + "loss": 1.9959, + "step": 242920 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016231813998231944, + "loss": 2.0487, + "step": 242925 + }, + { + "epoch": 0.57, + "grad_norm": 2.578125, + "learning_rate": 0.00016231669446482524, + "loss": 2.0028, + "step": 242930 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016231524892604252, + "loss": 2.0135, + "step": 242935 + }, + { + "epoch": 0.57, + "grad_norm": 1.9140625, + "learning_rate": 0.00016231380336597179, + "loss": 2.0432, + "step": 242940 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.0001623123577846135, + "loss": 2.0405, + "step": 242945 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016231091218196823, + "loss": 2.3028, + "step": 242950 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.0001623094665580364, + "loss": 1.8949, + "step": 242955 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016230802091281855, + "loss": 2.1375, + "step": 242960 + }, + { + "epoch": 0.57, + "grad_norm": 2.71875, + "learning_rate": 0.0001623065752463151, + "loss": 2.052, + "step": 242965 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.00016230512955852663, + "loss": 2.1933, + "step": 242970 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.00016230368384945358, + "loss": 2.0923, + "step": 242975 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.00016230223811909648, + "loss": 2.0305, + "step": 242980 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.0001623007923674558, + "loss": 1.9883, + "step": 242985 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.00016229934659453202, + "loss": 2.0341, + "step": 242990 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016229790080032568, + "loss": 1.9074, + "step": 242995 + }, + { + "epoch": 0.57, + "grad_norm": 2.4375, + "learning_rate": 0.00016229645498483722, + "loss": 2.1592, + "step": 243000 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.0001622950091480672, + "loss": 2.22, + "step": 243005 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016229356329001604, + "loss": 2.0933, + "step": 243010 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016229211741068428, + "loss": 1.9407, + "step": 243015 + }, + { + "epoch": 0.57, + "grad_norm": 2.609375, + "learning_rate": 0.0001622906715100724, + "loss": 2.0646, + "step": 243020 + }, + { + "epoch": 0.57, + "grad_norm": 1.9296875, + "learning_rate": 0.0001622892255881809, + "loss": 1.9113, + "step": 243025 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.00016228777964501028, + "loss": 2.2016, + "step": 243030 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.000162286333680561, + "loss": 2.2557, + "step": 243035 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.0001622848876948336, + "loss": 2.1419, + "step": 243040 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016228344168782853, + "loss": 2.1504, + "step": 243045 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.00016228199565954636, + "loss": 2.2832, + "step": 243050 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.00016228054960998748, + "loss": 1.9306, + "step": 243055 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.00016227910353915247, + "loss": 2.0384, + "step": 243060 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016227765744704175, + "loss": 2.121, + "step": 243065 + }, + { + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 0.00016227621133365586, + "loss": 2.0263, + "step": 243070 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016227476519899533, + "loss": 1.9886, + "step": 243075 + }, + { + "epoch": 0.57, + "grad_norm": 1.8828125, + "learning_rate": 0.00016227331904306055, + "loss": 2.2237, + "step": 243080 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016227187286585213, + "loss": 1.9999, + "step": 243085 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016227042666737046, + "loss": 2.1867, + "step": 243090 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.0001622689804476161, + "loss": 2.1688, + "step": 243095 + }, + { + "epoch": 0.57, + "grad_norm": 2.703125, + "learning_rate": 0.00016226753420658954, + "loss": 1.8996, + "step": 243100 + }, + { + "epoch": 0.57, + "grad_norm": 1.9921875, + "learning_rate": 0.00016226608794429125, + "loss": 2.1317, + "step": 243105 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.00016226464166072178, + "loss": 2.2509, + "step": 243110 + }, + { + "epoch": 0.57, + "grad_norm": 1.953125, + "learning_rate": 0.00016226319535588154, + "loss": 2.1757, + "step": 243115 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016226174902977104, + "loss": 2.1409, + "step": 243120 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.0001622603026823908, + "loss": 2.2809, + "step": 243125 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016225885631374135, + "loss": 2.0366, + "step": 243130 + }, + { + "epoch": 0.57, + "grad_norm": 2.546875, + "learning_rate": 0.00016225740992382312, + "loss": 2.1708, + "step": 243135 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016225596351263666, + "loss": 1.923, + "step": 243140 + }, + { + "epoch": 0.57, + "grad_norm": 2.5625, + "learning_rate": 0.0001622545170801824, + "loss": 1.9399, + "step": 243145 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016225307062646088, + "loss": 1.9361, + "step": 243150 + }, + { + "epoch": 0.57, + "grad_norm": 1.9921875, + "learning_rate": 0.00016225162415147256, + "loss": 1.9725, + "step": 243155 + }, + { + "epoch": 0.57, + "grad_norm": 2.484375, + "learning_rate": 0.00016225017765521797, + "loss": 1.9891, + "step": 243160 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.0001622487311376976, + "loss": 2.1264, + "step": 243165 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.00016224728459891195, + "loss": 1.9731, + "step": 243170 + }, + { + "epoch": 0.57, + "grad_norm": 1.9609375, + "learning_rate": 0.00016224583803886147, + "loss": 1.9731, + "step": 243175 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016224439145754666, + "loss": 2.0929, + "step": 243180 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.0001622429448549681, + "loss": 2.145, + "step": 243185 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016224149823112618, + "loss": 2.0461, + "step": 243190 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.00016224005158602144, + "loss": 2.1061, + "step": 243195 + }, + { + "epoch": 0.57, + "grad_norm": 2.59375, + "learning_rate": 0.0001622386049196544, + "loss": 1.9086, + "step": 243200 + }, + { + "epoch": 0.57, + "grad_norm": 2.40625, + "learning_rate": 0.0001622371582320255, + "loss": 2.2414, + "step": 243205 + }, + { + "epoch": 0.57, + "grad_norm": 2.46875, + "learning_rate": 0.00016223571152313525, + "loss": 2.1832, + "step": 243210 + }, + { + "epoch": 0.57, + "grad_norm": 2.4375, + "learning_rate": 0.00016223426479298415, + "loss": 1.9817, + "step": 243215 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.00016223281804157271, + "loss": 2.0647, + "step": 243220 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016223137126890143, + "loss": 2.0268, + "step": 243225 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016222992447497073, + "loss": 2.1603, + "step": 243230 + }, + { + "epoch": 0.57, + "grad_norm": 1.5234375, + "learning_rate": 0.0001622284776597812, + "loss": 2.0324, + "step": 243235 + }, + { + "epoch": 0.57, + "grad_norm": 2.53125, + "learning_rate": 0.00016222703082333326, + "loss": 1.877, + "step": 243240 + }, + { + "epoch": 0.57, + "grad_norm": 2.59375, + "learning_rate": 0.00016222558396562748, + "loss": 2.3329, + "step": 243245 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.0001622241370866643, + "loss": 2.0018, + "step": 243250 + }, + { + "epoch": 0.57, + "grad_norm": 2.828125, + "learning_rate": 0.00016222269018644422, + "loss": 1.9975, + "step": 243255 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016222124326496773, + "loss": 2.1787, + "step": 243260 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016221979632223535, + "loss": 2.0101, + "step": 243265 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.00016221834935824757, + "loss": 1.9894, + "step": 243270 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016221690237300484, + "loss": 2.0704, + "step": 243275 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.00016221545536650772, + "loss": 1.9397, + "step": 243280 + }, + { + "epoch": 0.57, + "grad_norm": 3.671875, + "learning_rate": 0.00016221400833875665, + "loss": 2.183, + "step": 243285 + }, + { + "epoch": 0.57, + "grad_norm": 1.8359375, + "learning_rate": 0.00016221256128975216, + "loss": 1.9678, + "step": 243290 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.00016221111421949473, + "loss": 2.0414, + "step": 243295 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016220966712798484, + "loss": 2.0509, + "step": 243300 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016220822001522303, + "loss": 1.9574, + "step": 243305 + }, + { + "epoch": 0.57, + "grad_norm": 1.8515625, + "learning_rate": 0.00016220677288120974, + "loss": 2.1114, + "step": 243310 + }, + { + "epoch": 0.57, + "grad_norm": 6.25, + "learning_rate": 0.00016220532572594547, + "loss": 2.0657, + "step": 243315 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016220387854943077, + "loss": 1.9917, + "step": 243320 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016220243135166607, + "loss": 1.9491, + "step": 243325 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.0001622009841326519, + "loss": 2.2193, + "step": 243330 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016219953689238875, + "loss": 1.9876, + "step": 243335 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.0001621980896308771, + "loss": 1.9722, + "step": 243340 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016219664234811746, + "loss": 2.0596, + "step": 243345 + }, + { + "epoch": 0.57, + "grad_norm": 1.8984375, + "learning_rate": 0.00016219519504411033, + "loss": 1.8539, + "step": 243350 + }, + { + "epoch": 0.57, + "grad_norm": 2.59375, + "learning_rate": 0.00016219374771885622, + "loss": 2.0212, + "step": 243355 + }, + { + "epoch": 0.57, + "grad_norm": 2.703125, + "learning_rate": 0.00016219230037235556, + "loss": 2.1319, + "step": 243360 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.0001621908530046089, + "loss": 2.0707, + "step": 243365 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.00016218940561561667, + "loss": 2.0203, + "step": 243370 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.00016218795820537943, + "loss": 1.9577, + "step": 243375 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.0001621865107738977, + "loss": 2.1335, + "step": 243380 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.0001621850633211719, + "loss": 2.0427, + "step": 243385 + }, + { + "epoch": 0.57, + "grad_norm": 1.8359375, + "learning_rate": 0.00016218361584720254, + "loss": 1.9967, + "step": 243390 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.00016218216835199014, + "loss": 2.0986, + "step": 243395 + }, + { + "epoch": 0.57, + "grad_norm": 1.921875, + "learning_rate": 0.00016218072083553516, + "loss": 2.1421, + "step": 243400 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016217927329783817, + "loss": 2.1897, + "step": 243405 + }, + { + "epoch": 0.57, + "grad_norm": 2.5625, + "learning_rate": 0.0001621778257388996, + "loss": 2.0803, + "step": 243410 + }, + { + "epoch": 0.57, + "grad_norm": 1.6953125, + "learning_rate": 0.00016217637815871993, + "loss": 2.0056, + "step": 243415 + }, + { + "epoch": 0.57, + "grad_norm": 1.9609375, + "learning_rate": 0.00016217493055729966, + "loss": 2.0841, + "step": 243420 + }, + { + "epoch": 0.57, + "grad_norm": 2.578125, + "learning_rate": 0.00016217348293463938, + "loss": 1.8939, + "step": 243425 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016217203529073947, + "loss": 2.2057, + "step": 243430 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016217058762560047, + "loss": 2.0085, + "step": 243435 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016216913993922287, + "loss": 2.1189, + "step": 243440 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016216769223160714, + "loss": 2.2421, + "step": 243445 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.0001621662445027538, + "loss": 1.9619, + "step": 243450 + }, + { + "epoch": 0.57, + "grad_norm": 2.625, + "learning_rate": 0.00016216479675266337, + "loss": 2.1725, + "step": 243455 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016216334898133634, + "loss": 2.2154, + "step": 243460 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016216190118877318, + "loss": 2.111, + "step": 243465 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016216045337497434, + "loss": 2.0166, + "step": 243470 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.00016215900553994038, + "loss": 2.1515, + "step": 243475 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.00016215755768367177, + "loss": 2.0156, + "step": 243480 + }, + { + "epoch": 0.57, + "grad_norm": 1.96875, + "learning_rate": 0.000162156109806169, + "loss": 2.1528, + "step": 243485 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016215466190743264, + "loss": 2.2043, + "step": 243490 + }, + { + "epoch": 0.57, + "grad_norm": 2.578125, + "learning_rate": 0.0001621532139874631, + "loss": 1.8991, + "step": 243495 + }, + { + "epoch": 0.57, + "grad_norm": 2.765625, + "learning_rate": 0.00016215176604626082, + "loss": 2.1131, + "step": 243500 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016215031808382643, + "loss": 2.3821, + "step": 243505 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016214887010016036, + "loss": 2.3339, + "step": 243510 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.0001621474220952631, + "loss": 1.9778, + "step": 243515 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016214597406913515, + "loss": 1.9066, + "step": 243520 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016214452602177702, + "loss": 2.1346, + "step": 243525 + }, + { + "epoch": 0.57, + "grad_norm": 2.390625, + "learning_rate": 0.0001621430779531892, + "loss": 2.0968, + "step": 243530 + }, + { + "epoch": 0.57, + "grad_norm": 1.9609375, + "learning_rate": 0.00016214162986337216, + "loss": 2.1052, + "step": 243535 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016214018175232645, + "loss": 1.916, + "step": 243540 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.00016213873362005247, + "loss": 1.8045, + "step": 243545 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.0001621372854665508, + "loss": 1.9866, + "step": 243550 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.00016213583729182189, + "loss": 1.9016, + "step": 243555 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.0001621343890958663, + "loss": 2.1208, + "step": 243560 + }, + { + "epoch": 0.57, + "grad_norm": 2.4375, + "learning_rate": 0.00016213294087868442, + "loss": 2.0597, + "step": 243565 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.00016213149264027684, + "loss": 2.1534, + "step": 243570 + }, + { + "epoch": 0.57, + "grad_norm": 1.8203125, + "learning_rate": 0.00016213004438064398, + "loss": 2.0574, + "step": 243575 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.00016212859609978638, + "loss": 2.0418, + "step": 243580 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016212714779770452, + "loss": 2.1283, + "step": 243585 + }, + { + "epoch": 0.57, + "grad_norm": 1.78125, + "learning_rate": 0.00016212569947439892, + "loss": 2.2303, + "step": 243590 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016212425112987005, + "loss": 1.993, + "step": 243595 + }, + { + "epoch": 0.57, + "grad_norm": 2.6875, + "learning_rate": 0.0001621228027641184, + "loss": 2.0657, + "step": 243600 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016212135437714448, + "loss": 1.9216, + "step": 243605 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.00016211990596894877, + "loss": 2.0786, + "step": 243610 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.00016211845753953178, + "loss": 1.9651, + "step": 243615 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016211700908889403, + "loss": 2.0617, + "step": 243620 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016211556061703592, + "loss": 1.9852, + "step": 243625 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.00016211411212395805, + "loss": 2.0824, + "step": 243630 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016211266360966086, + "loss": 2.2345, + "step": 243635 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016211121507414487, + "loss": 2.1369, + "step": 243640 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016210976651741054, + "loss": 2.0844, + "step": 243645 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.00016210831793945843, + "loss": 2.184, + "step": 243650 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.00016210686934028896, + "loss": 2.1063, + "step": 243655 + }, + { + "epoch": 0.57, + "grad_norm": 1.9609375, + "learning_rate": 0.00016210542071990264, + "loss": 2.0166, + "step": 243660 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.0001621039720783, + "loss": 1.9508, + "step": 243665 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016210252341548152, + "loss": 2.0953, + "step": 243670 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.0001621010747314477, + "loss": 2.0107, + "step": 243675 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.00016209962602619903, + "loss": 2.1169, + "step": 243680 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.00016209817729973596, + "loss": 2.2113, + "step": 243685 + }, + { + "epoch": 0.57, + "grad_norm": 1.90625, + "learning_rate": 0.00016209672855205907, + "loss": 1.9652, + "step": 243690 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016209527978316879, + "loss": 2.0958, + "step": 243695 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016209383099306565, + "loss": 2.0709, + "step": 243700 + }, + { + "epoch": 0.57, + "grad_norm": 3.0625, + "learning_rate": 0.0001620923821817501, + "loss": 1.9998, + "step": 243705 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016209093334922269, + "loss": 1.936, + "step": 243710 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.00016208948449548388, + "loss": 2.0817, + "step": 243715 + }, + { + "epoch": 0.57, + "grad_norm": 2.5, + "learning_rate": 0.0001620880356205342, + "loss": 2.1067, + "step": 243720 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016208658672437412, + "loss": 2.1491, + "step": 243725 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.0001620851378070041, + "loss": 2.1145, + "step": 243730 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.00016208368886842475, + "loss": 2.1538, + "step": 243735 + }, + { + "epoch": 0.57, + "grad_norm": 1.8125, + "learning_rate": 0.0001620822399086364, + "loss": 1.9922, + "step": 243740 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.00016208079092763965, + "loss": 2.1032, + "step": 243745 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.000162079341925435, + "loss": 2.0743, + "step": 243750 + }, + { + "epoch": 0.57, + "grad_norm": 2.421875, + "learning_rate": 0.00016207789290202288, + "loss": 2.0773, + "step": 243755 + }, + { + "epoch": 0.57, + "grad_norm": 2.828125, + "learning_rate": 0.00016207644385740389, + "loss": 2.0181, + "step": 243760 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.0001620749947915784, + "loss": 2.0924, + "step": 243765 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016207354570454697, + "loss": 2.1239, + "step": 243770 + }, + { + "epoch": 0.57, + "grad_norm": 2.375, + "learning_rate": 0.00016207209659631016, + "loss": 2.2263, + "step": 243775 + }, + { + "epoch": 0.57, + "grad_norm": 2.484375, + "learning_rate": 0.00016207064746686834, + "loss": 2.0985, + "step": 243780 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016206919831622204, + "loss": 2.1724, + "step": 243785 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.00016206774914437184, + "loss": 2.2547, + "step": 243790 + }, + { + "epoch": 0.57, + "grad_norm": 1.859375, + "learning_rate": 0.0001620662999513181, + "loss": 2.0797, + "step": 243795 + }, + { + "epoch": 0.57, + "grad_norm": 2.4375, + "learning_rate": 0.00016206485073706146, + "loss": 2.1272, + "step": 243800 + }, + { + "epoch": 0.57, + "grad_norm": 2.671875, + "learning_rate": 0.00016206340150160228, + "loss": 2.0214, + "step": 243805 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016206195224494115, + "loss": 1.9523, + "step": 243810 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016206050296707853, + "loss": 2.0019, + "step": 243815 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.0001620590536680149, + "loss": 2.121, + "step": 243820 + }, + { + "epoch": 0.57, + "grad_norm": 2.53125, + "learning_rate": 0.00016205760434775078, + "loss": 2.0177, + "step": 243825 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016205615500628666, + "loss": 2.0049, + "step": 243830 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016205470564362303, + "loss": 1.9818, + "step": 243835 + }, + { + "epoch": 0.57, + "grad_norm": 2.015625, + "learning_rate": 0.0001620532562597604, + "loss": 2.0863, + "step": 243840 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.00016205180685469923, + "loss": 2.2781, + "step": 243845 + }, + { + "epoch": 0.57, + "grad_norm": 1.9140625, + "learning_rate": 0.00016205035742844004, + "loss": 2.1818, + "step": 243850 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016204890798098335, + "loss": 2.205, + "step": 243855 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016204745851232957, + "loss": 2.0403, + "step": 243860 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.0001620460090224793, + "loss": 2.1052, + "step": 243865 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.000162044559511433, + "loss": 2.0559, + "step": 243870 + }, + { + "epoch": 0.57, + "grad_norm": 3.421875, + "learning_rate": 0.00016204310997919113, + "loss": 2.0854, + "step": 243875 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.0001620416604257542, + "loss": 2.175, + "step": 243880 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.0001620402108511227, + "loss": 2.1419, + "step": 243885 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.0001620387612552972, + "loss": 1.9974, + "step": 243890 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016203731163827808, + "loss": 1.8637, + "step": 243895 + }, + { + "epoch": 0.57, + "grad_norm": 1.953125, + "learning_rate": 0.0001620358620000659, + "loss": 2.0769, + "step": 243900 + }, + { + "epoch": 0.57, + "grad_norm": 1.828125, + "learning_rate": 0.00016203441234066116, + "loss": 2.0284, + "step": 243905 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.00016203296266006434, + "loss": 2.0555, + "step": 243910 + }, + { + "epoch": 0.57, + "grad_norm": 2.734375, + "learning_rate": 0.0001620315129582759, + "loss": 2.2678, + "step": 243915 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.0001620300632352964, + "loss": 2.1226, + "step": 243920 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016202861349112632, + "loss": 2.0532, + "step": 243925 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.0001620271637257661, + "loss": 2.0111, + "step": 243930 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016202571393921628, + "loss": 2.0061, + "step": 243935 + }, + { + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 0.0001620242641314774, + "loss": 2.1677, + "step": 243940 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016202281430254987, + "loss": 2.1236, + "step": 243945 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.0001620213644524342, + "loss": 2.1445, + "step": 243950 + }, + { + "epoch": 0.57, + "grad_norm": 1.84375, + "learning_rate": 0.00016201991458113097, + "loss": 2.2612, + "step": 243955 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016201846468864056, + "loss": 2.0092, + "step": 243960 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.0001620170147749635, + "loss": 2.0305, + "step": 243965 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016201556484010036, + "loss": 2.1279, + "step": 243970 + }, + { + "epoch": 0.57, + "grad_norm": 2.53125, + "learning_rate": 0.00016201411488405156, + "loss": 1.9355, + "step": 243975 + }, + { + "epoch": 0.57, + "grad_norm": 1.75, + "learning_rate": 0.0001620126649068176, + "loss": 2.2214, + "step": 243980 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.00016201121490839898, + "loss": 2.1001, + "step": 243985 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016200976488879625, + "loss": 2.0858, + "step": 243990 + }, + { + "epoch": 0.57, + "grad_norm": 1.84375, + "learning_rate": 0.00016200831484800983, + "loss": 2.1211, + "step": 243995 + }, + { + "epoch": 0.57, + "grad_norm": 1.9296875, + "learning_rate": 0.00016200686478604027, + "loss": 2.1583, + "step": 244000 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.000162005414702888, + "loss": 2.2043, + "step": 244005 + }, + { + "epoch": 0.57, + "grad_norm": 2.25, + "learning_rate": 0.00016200396459855357, + "loss": 2.1466, + "step": 244010 + }, + { + "epoch": 0.57, + "grad_norm": 3.40625, + "learning_rate": 0.00016200251447303745, + "loss": 2.1168, + "step": 244015 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016200106432634018, + "loss": 1.9808, + "step": 244020 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.0001619996141584622, + "loss": 2.0198, + "step": 244025 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.000161998163969404, + "loss": 1.9645, + "step": 244030 + }, + { + "epoch": 0.57, + "grad_norm": 1.9921875, + "learning_rate": 0.00016199671375916616, + "loss": 2.0652, + "step": 244035 + }, + { + "epoch": 0.57, + "grad_norm": 1.9140625, + "learning_rate": 0.00016199526352774908, + "loss": 2.0821, + "step": 244040 + }, + { + "epoch": 0.57, + "grad_norm": 1.921875, + "learning_rate": 0.0001619938132751533, + "loss": 2.0753, + "step": 244045 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016199236300137933, + "loss": 2.0366, + "step": 244050 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.00016199091270642762, + "loss": 2.1245, + "step": 244055 + }, + { + "epoch": 0.57, + "grad_norm": 2.28125, + "learning_rate": 0.0001619894623902987, + "loss": 2.1949, + "step": 244060 + }, + { + "epoch": 0.57, + "grad_norm": 2.40625, + "learning_rate": 0.00016198801205299302, + "loss": 1.9073, + "step": 244065 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.0001619865616945112, + "loss": 2.2828, + "step": 244070 + }, + { + "epoch": 0.57, + "grad_norm": 1.9453125, + "learning_rate": 0.00016198511131485356, + "loss": 1.9337, + "step": 244075 + }, + { + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 0.0001619836609140207, + "loss": 2.0147, + "step": 244080 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.00016198221049201314, + "loss": 2.2223, + "step": 244085 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016198076004883126, + "loss": 2.0928, + "step": 244090 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.0001619793095844757, + "loss": 2.0901, + "step": 244095 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016197785909894685, + "loss": 2.2035, + "step": 244100 + }, + { + "epoch": 0.57, + "grad_norm": 2.40625, + "learning_rate": 0.00016197640859224525, + "loss": 2.1545, + "step": 244105 + }, + { + "epoch": 0.57, + "grad_norm": 2.03125, + "learning_rate": 0.00016197495806437139, + "loss": 2.0926, + "step": 244110 + }, + { + "epoch": 0.57, + "grad_norm": 2.046875, + "learning_rate": 0.00016197350751532573, + "loss": 2.0775, + "step": 244115 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.0001619720569451088, + "loss": 2.1513, + "step": 244120 + }, + { + "epoch": 0.57, + "grad_norm": 2.0625, + "learning_rate": 0.0001619706063537211, + "loss": 2.0407, + "step": 244125 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016196915574116316, + "loss": 2.2966, + "step": 244130 + }, + { + "epoch": 0.57, + "grad_norm": 1.78125, + "learning_rate": 0.0001619677051074354, + "loss": 2.3476, + "step": 244135 + }, + { + "epoch": 0.57, + "grad_norm": 2.34375, + "learning_rate": 0.00016196625445253835, + "loss": 2.1523, + "step": 244140 + }, + { + "epoch": 0.57, + "grad_norm": 2.421875, + "learning_rate": 0.00016196480377647247, + "loss": 2.1069, + "step": 244145 + }, + { + "epoch": 0.57, + "grad_norm": 2.703125, + "learning_rate": 0.00016196335307923834, + "loss": 2.1929, + "step": 244150 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.00016196190236083638, + "loss": 2.1024, + "step": 244155 + }, + { + "epoch": 0.57, + "grad_norm": 2.078125, + "learning_rate": 0.0001619604516212671, + "loss": 2.2931, + "step": 244160 + }, + { + "epoch": 0.57, + "grad_norm": 2.5625, + "learning_rate": 0.000161959000860531, + "loss": 2.1808, + "step": 244165 + }, + { + "epoch": 0.57, + "grad_norm": 1.7109375, + "learning_rate": 0.0001619575500786286, + "loss": 1.8822, + "step": 244170 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.0001619560992755604, + "loss": 2.225, + "step": 244175 + }, + { + "epoch": 0.57, + "grad_norm": 2.125, + "learning_rate": 0.00016195464845132683, + "loss": 2.186, + "step": 244180 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016195319760592847, + "loss": 2.1865, + "step": 244185 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016195174673936576, + "loss": 2.2539, + "step": 244190 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.0001619502958516392, + "loss": 2.0785, + "step": 244195 + }, + { + "epoch": 0.57, + "grad_norm": 1.78125, + "learning_rate": 0.00016194884494274926, + "loss": 2.0313, + "step": 244200 + }, + { + "epoch": 0.57, + "grad_norm": 2.21875, + "learning_rate": 0.0001619473940126965, + "loss": 2.1133, + "step": 244205 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016194594306148143, + "loss": 2.1939, + "step": 244210 + }, + { + "epoch": 0.57, + "grad_norm": 2.203125, + "learning_rate": 0.00016194449208910446, + "loss": 2.1998, + "step": 244215 + }, + { + "epoch": 0.57, + "grad_norm": 2.359375, + "learning_rate": 0.00016194304109556616, + "loss": 2.0206, + "step": 244220 + }, + { + "epoch": 0.57, + "grad_norm": 2.0, + "learning_rate": 0.00016194159008086695, + "loss": 2.1475, + "step": 244225 + }, + { + "epoch": 0.57, + "grad_norm": 2.296875, + "learning_rate": 0.0001619401390450074, + "loss": 2.0961, + "step": 244230 + }, + { + "epoch": 0.57, + "grad_norm": 2.265625, + "learning_rate": 0.00016193868798798796, + "loss": 2.0551, + "step": 244235 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.00016193723690980914, + "loss": 1.9822, + "step": 244240 + }, + { + "epoch": 0.57, + "grad_norm": 2.53125, + "learning_rate": 0.00016193578581047145, + "loss": 2.2031, + "step": 244245 + }, + { + "epoch": 0.57, + "grad_norm": 1.9375, + "learning_rate": 0.00016193433468997536, + "loss": 2.0945, + "step": 244250 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016193288354832136, + "loss": 2.1204, + "step": 244255 + }, + { + "epoch": 0.57, + "grad_norm": 2.421875, + "learning_rate": 0.00016193143238551, + "loss": 2.0915, + "step": 244260 + }, + { + "epoch": 0.57, + "grad_norm": 2.109375, + "learning_rate": 0.00016192998120154173, + "loss": 2.0794, + "step": 244265 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016192852999641705, + "loss": 2.273, + "step": 244270 + }, + { + "epoch": 0.57, + "grad_norm": 2.09375, + "learning_rate": 0.00016192707877013646, + "loss": 2.122, + "step": 244275 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016192562752270043, + "loss": 1.9358, + "step": 244280 + }, + { + "epoch": 0.57, + "grad_norm": 2.3125, + "learning_rate": 0.00016192417625410954, + "loss": 1.8256, + "step": 244285 + }, + { + "epoch": 0.57, + "grad_norm": 1.859375, + "learning_rate": 0.00016192272496436417, + "loss": 2.1049, + "step": 244290 + }, + { + "epoch": 0.57, + "grad_norm": 2.15625, + "learning_rate": 0.00016192127365346494, + "loss": 2.2172, + "step": 244295 + }, + { + "epoch": 0.57, + "grad_norm": 2.40625, + "learning_rate": 0.00016191982232141222, + "loss": 2.0817, + "step": 244300 + }, + { + "epoch": 0.57, + "grad_norm": 2.171875, + "learning_rate": 0.00016191837096820658, + "loss": 2.0416, + "step": 244305 + }, + { + "epoch": 0.57, + "grad_norm": 2.453125, + "learning_rate": 0.00016191691959384854, + "loss": 2.142, + "step": 244310 + }, + { + "epoch": 0.57, + "grad_norm": 2.234375, + "learning_rate": 0.00016191546819833852, + "loss": 2.1264, + "step": 244315 + }, + { + "epoch": 0.57, + "grad_norm": 2.609375, + "learning_rate": 0.00016191401678167705, + "loss": 1.9854, + "step": 244320 + }, + { + "epoch": 0.57, + "grad_norm": 2.1875, + "learning_rate": 0.00016191256534386466, + "loss": 1.9003, + "step": 244325 + }, + { + "epoch": 0.57, + "grad_norm": 2.140625, + "learning_rate": 0.0001619111138849018, + "loss": 2.2064, + "step": 244330 + }, + { + "epoch": 0.57, + "grad_norm": 1.984375, + "learning_rate": 0.00016190966240478896, + "loss": 2.1195, + "step": 244335 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016190821090352668, + "loss": 2.1748, + "step": 244340 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.0001619067593811154, + "loss": 2.1547, + "step": 244345 + }, + { + "epoch": 0.58, + "grad_norm": 1.890625, + "learning_rate": 0.0001619053078375557, + "loss": 1.9497, + "step": 244350 + }, + { + "epoch": 0.58, + "grad_norm": 2.484375, + "learning_rate": 0.00016190385627284798, + "loss": 2.2321, + "step": 244355 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001619024046869928, + "loss": 2.0406, + "step": 244360 + }, + { + "epoch": 0.58, + "grad_norm": 2.0, + "learning_rate": 0.00016190095307999064, + "loss": 2.1328, + "step": 244365 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016189950145184198, + "loss": 2.1921, + "step": 244370 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016189804980254734, + "loss": 2.0696, + "step": 244375 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016189659813210718, + "loss": 2.1017, + "step": 244380 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.00016189514644052202, + "loss": 2.2345, + "step": 244385 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.0001618936947277924, + "loss": 1.9192, + "step": 244390 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016189224299391873, + "loss": 2.0996, + "step": 244395 + }, + { + "epoch": 0.58, + "grad_norm": 2.453125, + "learning_rate": 0.00016189079123890156, + "loss": 2.2466, + "step": 244400 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016188933946274139, + "loss": 1.9729, + "step": 244405 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016188788766543868, + "loss": 2.1324, + "step": 244410 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016188643584699394, + "loss": 2.1552, + "step": 244415 + }, + { + "epoch": 0.58, + "grad_norm": 1.8125, + "learning_rate": 0.00016188498400740767, + "loss": 2.1038, + "step": 244420 + }, + { + "epoch": 0.58, + "grad_norm": 1.8671875, + "learning_rate": 0.00016188353214668038, + "loss": 2.1589, + "step": 244425 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016188208026481256, + "loss": 1.9728, + "step": 244430 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.0001618806283618047, + "loss": 2.0987, + "step": 244435 + }, + { + "epoch": 0.58, + "grad_norm": 1.8125, + "learning_rate": 0.00016187917643765726, + "loss": 1.9886, + "step": 244440 + }, + { + "epoch": 0.58, + "grad_norm": 2.515625, + "learning_rate": 0.0001618777244923708, + "loss": 2.1912, + "step": 244445 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.00016187627252594577, + "loss": 2.0024, + "step": 244450 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001618748205383827, + "loss": 2.0312, + "step": 244455 + }, + { + "epoch": 0.58, + "grad_norm": 2.0, + "learning_rate": 0.00016187336852968207, + "loss": 2.0619, + "step": 244460 + }, + { + "epoch": 0.58, + "grad_norm": 2.484375, + "learning_rate": 0.00016187191649984434, + "loss": 2.2478, + "step": 244465 + }, + { + "epoch": 0.58, + "grad_norm": 2.515625, + "learning_rate": 0.0001618704644488701, + "loss": 2.0945, + "step": 244470 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016186901237675975, + "loss": 2.0433, + "step": 244475 + }, + { + "epoch": 0.58, + "grad_norm": 1.9296875, + "learning_rate": 0.00016186756028351384, + "loss": 2.2363, + "step": 244480 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016186610816913284, + "loss": 1.9911, + "step": 244485 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016186465603361725, + "loss": 2.2077, + "step": 244490 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016186320387696757, + "loss": 2.1612, + "step": 244495 + }, + { + "epoch": 0.58, + "grad_norm": 2.703125, + "learning_rate": 0.0001618617516991843, + "loss": 2.1976, + "step": 244500 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016186029950026796, + "loss": 2.2004, + "step": 244505 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.000161858847280219, + "loss": 2.1969, + "step": 244510 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016185739503903792, + "loss": 2.2674, + "step": 244515 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016185594277672523, + "loss": 2.1393, + "step": 244520 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016185449049328145, + "loss": 1.9235, + "step": 244525 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016185303818870702, + "loss": 2.2119, + "step": 244530 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.00016185158586300253, + "loss": 1.9484, + "step": 244535 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001618501335161684, + "loss": 2.1013, + "step": 244540 + }, + { + "epoch": 0.58, + "grad_norm": 2.390625, + "learning_rate": 0.0001618486811482051, + "loss": 2.0704, + "step": 244545 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.0001618472287591132, + "loss": 1.8286, + "step": 244550 + }, + { + "epoch": 0.58, + "grad_norm": 2.6875, + "learning_rate": 0.00016184577634889316, + "loss": 2.1974, + "step": 244555 + }, + { + "epoch": 0.58, + "grad_norm": 2.609375, + "learning_rate": 0.0001618443239175455, + "loss": 2.2088, + "step": 244560 + }, + { + "epoch": 0.58, + "grad_norm": 2.0, + "learning_rate": 0.00016184287146507067, + "loss": 2.1875, + "step": 244565 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.0001618414189914692, + "loss": 2.0851, + "step": 244570 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016183996649674157, + "loss": 2.1193, + "step": 244575 + }, + { + "epoch": 0.58, + "grad_norm": 2.734375, + "learning_rate": 0.00016183851398088832, + "loss": 2.013, + "step": 244580 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.0001618370614439099, + "loss": 2.0402, + "step": 244585 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.0001618356088858068, + "loss": 2.0117, + "step": 244590 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.00016183415630657955, + "loss": 1.9771, + "step": 244595 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016183270370622862, + "loss": 2.0339, + "step": 244600 + }, + { + "epoch": 0.58, + "grad_norm": 1.7578125, + "learning_rate": 0.00016183125108475453, + "loss": 1.9946, + "step": 244605 + }, + { + "epoch": 0.58, + "grad_norm": 2.390625, + "learning_rate": 0.00016182979844215774, + "loss": 1.9832, + "step": 244610 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.00016182834577843877, + "loss": 2.2106, + "step": 244615 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.00016182689309359813, + "loss": 2.184, + "step": 244620 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001618254403876363, + "loss": 2.1178, + "step": 244625 + }, + { + "epoch": 0.58, + "grad_norm": 2.609375, + "learning_rate": 0.0001618239876605538, + "loss": 2.1153, + "step": 244630 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.0001618225349123511, + "loss": 2.1642, + "step": 244635 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016182108214302866, + "loss": 2.0679, + "step": 244640 + }, + { + "epoch": 0.58, + "grad_norm": 1.890625, + "learning_rate": 0.00016181962935258706, + "loss": 1.853, + "step": 244645 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.0001618181765410267, + "loss": 2.0919, + "step": 244650 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.0001618167237083482, + "loss": 2.0377, + "step": 244655 + }, + { + "epoch": 0.58, + "grad_norm": 1.9609375, + "learning_rate": 0.00016181527085455194, + "loss": 1.9312, + "step": 244660 + }, + { + "epoch": 0.58, + "grad_norm": 1.9296875, + "learning_rate": 0.0001618138179796385, + "loss": 1.9311, + "step": 244665 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.0001618123650836083, + "loss": 1.9943, + "step": 244670 + }, + { + "epoch": 0.58, + "grad_norm": 1.7109375, + "learning_rate": 0.00016181091216646188, + "loss": 1.9418, + "step": 244675 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016180945922819975, + "loss": 2.0679, + "step": 244680 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016180800626882234, + "loss": 2.0522, + "step": 244685 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016180655328833026, + "loss": 2.1064, + "step": 244690 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016180510028672394, + "loss": 1.9315, + "step": 244695 + }, + { + "epoch": 0.58, + "grad_norm": 1.96875, + "learning_rate": 0.00016180364726400384, + "loss": 2.1551, + "step": 244700 + }, + { + "epoch": 0.58, + "grad_norm": 1.8671875, + "learning_rate": 0.0001618021942201705, + "loss": 2.0132, + "step": 244705 + }, + { + "epoch": 0.58, + "grad_norm": 1.609375, + "learning_rate": 0.00016180074115522438, + "loss": 2.0845, + "step": 244710 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016179928806916606, + "loss": 2.0617, + "step": 244715 + }, + { + "epoch": 0.58, + "grad_norm": 1.765625, + "learning_rate": 0.00016179783496199596, + "loss": 2.0131, + "step": 244720 + }, + { + "epoch": 0.58, + "grad_norm": 2.5625, + "learning_rate": 0.0001617963818337146, + "loss": 1.8438, + "step": 244725 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016179492868432247, + "loss": 2.1164, + "step": 244730 + }, + { + "epoch": 0.58, + "grad_norm": 1.5390625, + "learning_rate": 0.00016179347551382008, + "loss": 2.0268, + "step": 244735 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.00016179202232220792, + "loss": 2.221, + "step": 244740 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016179056910948646, + "loss": 1.9749, + "step": 244745 + }, + { + "epoch": 0.58, + "grad_norm": 2.59375, + "learning_rate": 0.00016178911587565623, + "loss": 2.1116, + "step": 244750 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016178766262071775, + "loss": 2.0811, + "step": 244755 + }, + { + "epoch": 0.58, + "grad_norm": 2.515625, + "learning_rate": 0.00016178620934467145, + "loss": 2.0612, + "step": 244760 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016178475604751785, + "loss": 2.2229, + "step": 244765 + }, + { + "epoch": 0.58, + "grad_norm": 1.9921875, + "learning_rate": 0.0001617833027292575, + "loss": 2.1681, + "step": 244770 + }, + { + "epoch": 0.58, + "grad_norm": 2.71875, + "learning_rate": 0.00016178184938989084, + "loss": 2.2352, + "step": 244775 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016178039602941837, + "loss": 2.2372, + "step": 244780 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016177894264784063, + "loss": 2.1863, + "step": 244785 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.000161777489245158, + "loss": 2.0823, + "step": 244790 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016177603582137113, + "loss": 2.0898, + "step": 244795 + }, + { + "epoch": 0.58, + "grad_norm": 1.8046875, + "learning_rate": 0.00016177458237648043, + "loss": 2.1202, + "step": 244800 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.0001617731289104864, + "loss": 2.1084, + "step": 244805 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016177167542338956, + "loss": 2.1394, + "step": 244810 + }, + { + "epoch": 0.58, + "grad_norm": 1.6875, + "learning_rate": 0.00016177022191519037, + "loss": 2.0114, + "step": 244815 + }, + { + "epoch": 0.58, + "grad_norm": 1.9375, + "learning_rate": 0.00016176876838588938, + "loss": 2.1974, + "step": 244820 + }, + { + "epoch": 0.58, + "grad_norm": 2.6875, + "learning_rate": 0.00016176731483548705, + "loss": 2.2128, + "step": 244825 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.0001617658612639839, + "loss": 2.1429, + "step": 244830 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.0001617644076713804, + "loss": 2.1899, + "step": 244835 + }, + { + "epoch": 0.58, + "grad_norm": 2.484375, + "learning_rate": 0.00016176295405767705, + "loss": 2.0824, + "step": 244840 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.00016176150042287437, + "loss": 2.1317, + "step": 244845 + }, + { + "epoch": 0.58, + "grad_norm": 1.7109375, + "learning_rate": 0.00016176004676697283, + "loss": 2.2175, + "step": 244850 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016175859308997293, + "loss": 2.1298, + "step": 244855 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.0001617571393918752, + "loss": 2.198, + "step": 244860 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016175568567268012, + "loss": 1.9294, + "step": 244865 + }, + { + "epoch": 0.58, + "grad_norm": 7.90625, + "learning_rate": 0.00016175423193238813, + "loss": 2.1683, + "step": 244870 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.0001617527781709998, + "loss": 2.1178, + "step": 244875 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016175132438851558, + "loss": 2.1225, + "step": 244880 + }, + { + "epoch": 0.58, + "grad_norm": 1.8984375, + "learning_rate": 0.00016174987058493602, + "loss": 2.0036, + "step": 244885 + }, + { + "epoch": 0.58, + "grad_norm": 1.8203125, + "learning_rate": 0.0001617484167602616, + "loss": 2.0881, + "step": 244890 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016174696291449274, + "loss": 2.1082, + "step": 244895 + }, + { + "epoch": 0.58, + "grad_norm": 1.7890625, + "learning_rate": 0.00016174550904763003, + "loss": 2.0042, + "step": 244900 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.00016174405515967394, + "loss": 2.2027, + "step": 244905 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.00016174260125062495, + "loss": 2.0497, + "step": 244910 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.0001617411473204836, + "loss": 2.1086, + "step": 244915 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.00016173969336925033, + "loss": 2.0886, + "step": 244920 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016173823939692565, + "loss": 2.0612, + "step": 244925 + }, + { + "epoch": 0.58, + "grad_norm": 1.671875, + "learning_rate": 0.00016173678540351008, + "loss": 2.0386, + "step": 244930 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016173533138900408, + "loss": 2.0625, + "step": 244935 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016173387735340822, + "loss": 2.0097, + "step": 244940 + }, + { + "epoch": 0.58, + "grad_norm": 1.8125, + "learning_rate": 0.0001617324232967229, + "loss": 2.0537, + "step": 244945 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016173096921894868, + "loss": 2.2367, + "step": 244950 + }, + { + "epoch": 0.58, + "grad_norm": 1.9140625, + "learning_rate": 0.00016172951512008607, + "loss": 2.3426, + "step": 244955 + }, + { + "epoch": 0.58, + "grad_norm": 2.453125, + "learning_rate": 0.00016172806100013548, + "loss": 2.1918, + "step": 244960 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.0001617266068590975, + "loss": 2.1682, + "step": 244965 + }, + { + "epoch": 0.58, + "grad_norm": 2.625, + "learning_rate": 0.00016172515269697262, + "loss": 2.0194, + "step": 244970 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016172369851376125, + "loss": 2.1844, + "step": 244975 + }, + { + "epoch": 0.58, + "grad_norm": 1.8359375, + "learning_rate": 0.00016172224430946398, + "loss": 1.9342, + "step": 244980 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016172079008408127, + "loss": 2.0615, + "step": 244985 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016171933583761363, + "loss": 2.2708, + "step": 244990 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.00016171788157006155, + "loss": 2.1431, + "step": 244995 + }, + { + "epoch": 0.58, + "grad_norm": 1.90625, + "learning_rate": 0.00016171642728142547, + "loss": 2.1969, + "step": 245000 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.00016171497297170597, + "loss": 2.0116, + "step": 245005 + }, + { + "epoch": 0.58, + "grad_norm": 1.9296875, + "learning_rate": 0.00016171351864090351, + "loss": 2.2086, + "step": 245010 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016171206428901863, + "loss": 2.0548, + "step": 245015 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016171060991605177, + "loss": 2.1626, + "step": 245020 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016170915552200343, + "loss": 1.9222, + "step": 245025 + }, + { + "epoch": 0.58, + "grad_norm": 2.546875, + "learning_rate": 0.0001617077011068741, + "loss": 1.9779, + "step": 245030 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016170624667066435, + "loss": 2.1609, + "step": 245035 + }, + { + "epoch": 0.58, + "grad_norm": 2.390625, + "learning_rate": 0.00016170479221337464, + "loss": 2.0174, + "step": 245040 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.0001617033377350054, + "loss": 2.0852, + "step": 245045 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001617018832355572, + "loss": 2.0845, + "step": 245050 + }, + { + "epoch": 0.58, + "grad_norm": 2.515625, + "learning_rate": 0.00016170042871503052, + "loss": 2.2371, + "step": 245055 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.00016169897417342584, + "loss": 1.9791, + "step": 245060 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016169751961074368, + "loss": 2.1609, + "step": 245065 + }, + { + "epoch": 0.58, + "grad_norm": 1.9921875, + "learning_rate": 0.00016169606502698455, + "loss": 2.2317, + "step": 245070 + }, + { + "epoch": 0.58, + "grad_norm": 2.46875, + "learning_rate": 0.0001616946104221489, + "loss": 1.937, + "step": 245075 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.00016169315579623726, + "loss": 2.1228, + "step": 245080 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016169170114925012, + "loss": 1.959, + "step": 245085 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.00016169024648118798, + "loss": 2.0282, + "step": 245090 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016168879179205133, + "loss": 2.1022, + "step": 245095 + }, + { + "epoch": 0.58, + "grad_norm": 2.40625, + "learning_rate": 0.00016168733708184067, + "loss": 2.1504, + "step": 245100 + }, + { + "epoch": 0.58, + "grad_norm": 1.96875, + "learning_rate": 0.0001616858823505565, + "loss": 2.2325, + "step": 245105 + }, + { + "epoch": 0.58, + "grad_norm": 4.03125, + "learning_rate": 0.0001616844275981993, + "loss": 2.0102, + "step": 245110 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.0001616829728247696, + "loss": 2.0851, + "step": 245115 + }, + { + "epoch": 0.58, + "grad_norm": 1.9765625, + "learning_rate": 0.00016168151803026788, + "loss": 1.9974, + "step": 245120 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.0001616800632146946, + "loss": 2.0659, + "step": 245125 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016167860837805034, + "loss": 2.2563, + "step": 245130 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.0001616771535203355, + "loss": 2.158, + "step": 245135 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.00016167569864155068, + "loss": 2.2661, + "step": 245140 + }, + { + "epoch": 0.58, + "grad_norm": 2.5625, + "learning_rate": 0.00016167424374169626, + "loss": 2.1785, + "step": 245145 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.00016167278882077285, + "loss": 2.1581, + "step": 245150 + }, + { + "epoch": 0.58, + "grad_norm": 1.953125, + "learning_rate": 0.00016167133387878088, + "loss": 1.9351, + "step": 245155 + }, + { + "epoch": 0.58, + "grad_norm": 2.40625, + "learning_rate": 0.00016166987891572088, + "loss": 2.1496, + "step": 245160 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.0001616684239315933, + "loss": 1.9553, + "step": 245165 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.00016166696892639868, + "loss": 2.1932, + "step": 245170 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016166551390013752, + "loss": 1.7865, + "step": 245175 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.0001616640588528103, + "loss": 2.0491, + "step": 245180 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.0001616626037844175, + "loss": 2.1157, + "step": 245185 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.00016166114869495962, + "loss": 2.2046, + "step": 245190 + }, + { + "epoch": 0.58, + "grad_norm": 1.9296875, + "learning_rate": 0.00016165969358443723, + "loss": 1.9869, + "step": 245195 + }, + { + "epoch": 0.58, + "grad_norm": 2.0, + "learning_rate": 0.00016165823845285072, + "loss": 2.1504, + "step": 245200 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.00016165678330020065, + "loss": 2.1088, + "step": 245205 + }, + { + "epoch": 0.58, + "grad_norm": 1.9765625, + "learning_rate": 0.0001616553281264875, + "loss": 2.1813, + "step": 245210 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.0001616538729317118, + "loss": 2.0558, + "step": 245215 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.000161652417715874, + "loss": 2.2341, + "step": 245220 + }, + { + "epoch": 0.58, + "grad_norm": 2.46875, + "learning_rate": 0.0001616509624789746, + "loss": 1.9652, + "step": 245225 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016164950722101414, + "loss": 2.055, + "step": 245230 + }, + { + "epoch": 0.58, + "grad_norm": 2.625, + "learning_rate": 0.00016164805194199307, + "loss": 2.0241, + "step": 245235 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.00016164659664191192, + "loss": 2.0222, + "step": 245240 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.00016164514132077117, + "loss": 2.0121, + "step": 245245 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.00016164368597857136, + "loss": 2.104, + "step": 245250 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.0001616422306153129, + "loss": 2.0366, + "step": 245255 + }, + { + "epoch": 0.58, + "grad_norm": 1.7578125, + "learning_rate": 0.00016164077523099634, + "loss": 2.1436, + "step": 245260 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016163931982562222, + "loss": 2.0681, + "step": 245265 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016163786439919094, + "loss": 2.0829, + "step": 245270 + }, + { + "epoch": 0.58, + "grad_norm": 2.796875, + "learning_rate": 0.00016163640895170304, + "loss": 2.0823, + "step": 245275 + }, + { + "epoch": 0.58, + "grad_norm": 2.703125, + "learning_rate": 0.00016163495348315904, + "loss": 1.9873, + "step": 245280 + }, + { + "epoch": 0.58, + "grad_norm": 1.9453125, + "learning_rate": 0.00016163349799355945, + "loss": 2.0033, + "step": 245285 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.0001616320424829047, + "loss": 1.9932, + "step": 245290 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016163058695119534, + "loss": 2.324, + "step": 245295 + }, + { + "epoch": 0.58, + "grad_norm": 2.453125, + "learning_rate": 0.00016162913139843184, + "loss": 2.0862, + "step": 245300 + }, + { + "epoch": 0.58, + "grad_norm": 1.9296875, + "learning_rate": 0.00016162767582461477, + "loss": 2.1456, + "step": 245305 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016162622022974452, + "loss": 2.1234, + "step": 245310 + }, + { + "epoch": 0.58, + "grad_norm": 1.9921875, + "learning_rate": 0.00016162476461382164, + "loss": 2.094, + "step": 245315 + }, + { + "epoch": 0.58, + "grad_norm": 1.9453125, + "learning_rate": 0.00016162330897684664, + "loss": 2.1931, + "step": 245320 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016162185331881996, + "loss": 2.0115, + "step": 245325 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016162039763974217, + "loss": 2.3129, + "step": 245330 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016161894193961372, + "loss": 2.2363, + "step": 245335 + }, + { + "epoch": 0.58, + "grad_norm": 2.0, + "learning_rate": 0.00016161748621843514, + "loss": 2.0193, + "step": 245340 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.0001616160304762069, + "loss": 2.1529, + "step": 245345 + }, + { + "epoch": 0.58, + "grad_norm": 1.671875, + "learning_rate": 0.00016161457471292948, + "loss": 1.9922, + "step": 245350 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016161311892860344, + "loss": 2.0387, + "step": 245355 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.00016161166312322921, + "loss": 2.0986, + "step": 245360 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.00016161020729680737, + "loss": 2.2833, + "step": 245365 + }, + { + "epoch": 0.58, + "grad_norm": 2.0, + "learning_rate": 0.00016160875144933834, + "loss": 2.1336, + "step": 245370 + }, + { + "epoch": 0.58, + "grad_norm": 1.9609375, + "learning_rate": 0.00016160729558082262, + "loss": 2.3076, + "step": 245375 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016160583969126075, + "loss": 2.1306, + "step": 245380 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.0001616043837806532, + "loss": 1.8614, + "step": 245385 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016160292784900051, + "loss": 2.0802, + "step": 245390 + }, + { + "epoch": 0.58, + "grad_norm": 1.8359375, + "learning_rate": 0.00016160147189630308, + "loss": 2.1224, + "step": 245395 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.0001616000159225615, + "loss": 2.1695, + "step": 245400 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016159855992777623, + "loss": 1.8952, + "step": 245405 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.0001615971039119478, + "loss": 2.1299, + "step": 245410 + }, + { + "epoch": 0.58, + "grad_norm": 2.5, + "learning_rate": 0.0001615956478750767, + "loss": 1.8977, + "step": 245415 + }, + { + "epoch": 0.58, + "grad_norm": 2.0, + "learning_rate": 0.00016159419181716335, + "loss": 2.1234, + "step": 245420 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016159273573820835, + "loss": 2.0621, + "step": 245425 + }, + { + "epoch": 0.58, + "grad_norm": 1.9453125, + "learning_rate": 0.00016159127963821213, + "loss": 2.0523, + "step": 245430 + }, + { + "epoch": 0.58, + "grad_norm": 1.84375, + "learning_rate": 0.00016158982351717523, + "loss": 1.9635, + "step": 245435 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016158836737509811, + "loss": 2.1854, + "step": 245440 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.0001615869112119813, + "loss": 2.2075, + "step": 245445 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.0001615854550278253, + "loss": 2.2519, + "step": 245450 + }, + { + "epoch": 0.58, + "grad_norm": 2.5625, + "learning_rate": 0.00016158399882263056, + "loss": 2.079, + "step": 245455 + }, + { + "epoch": 0.58, + "grad_norm": 1.96875, + "learning_rate": 0.00016158254259639762, + "loss": 2.0821, + "step": 245460 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016158108634912696, + "loss": 2.2457, + "step": 245465 + }, + { + "epoch": 0.58, + "grad_norm": 1.875, + "learning_rate": 0.00016157963008081908, + "loss": 1.9545, + "step": 245470 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.00016157817379147451, + "loss": 2.1714, + "step": 245475 + }, + { + "epoch": 0.58, + "grad_norm": 1.8515625, + "learning_rate": 0.00016157671748109372, + "loss": 2.2045, + "step": 245480 + }, + { + "epoch": 0.58, + "grad_norm": 1.9140625, + "learning_rate": 0.00016157526114967717, + "loss": 2.1908, + "step": 245485 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016157380479722542, + "loss": 1.9578, + "step": 245490 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016157234842373893, + "loss": 2.1834, + "step": 245495 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.00016157089202921822, + "loss": 2.0547, + "step": 245500 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016156943561366376, + "loss": 1.963, + "step": 245505 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016156797917707606, + "loss": 1.948, + "step": 245510 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016156652271945564, + "loss": 2.0811, + "step": 245515 + }, + { + "epoch": 0.58, + "grad_norm": 1.734375, + "learning_rate": 0.00016156506624080298, + "loss": 2.0465, + "step": 245520 + }, + { + "epoch": 0.58, + "grad_norm": 2.390625, + "learning_rate": 0.00016156360974111858, + "loss": 2.1993, + "step": 245525 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016156215322040294, + "loss": 1.9701, + "step": 245530 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016156069667865651, + "loss": 1.9402, + "step": 245535 + }, + { + "epoch": 0.58, + "grad_norm": 1.71875, + "learning_rate": 0.00016155924011587987, + "loss": 2.1364, + "step": 245540 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.0001615577835320735, + "loss": 2.1232, + "step": 245545 + }, + { + "epoch": 0.58, + "grad_norm": 2.515625, + "learning_rate": 0.00016155632692723781, + "loss": 2.1437, + "step": 245550 + }, + { + "epoch": 0.58, + "grad_norm": 1.859375, + "learning_rate": 0.0001615548703013734, + "loss": 2.0292, + "step": 245555 + }, + { + "epoch": 0.58, + "grad_norm": 2.390625, + "learning_rate": 0.00016155341365448072, + "loss": 1.9886, + "step": 245560 + }, + { + "epoch": 0.58, + "grad_norm": 1.6640625, + "learning_rate": 0.00016155195698656027, + "loss": 2.0481, + "step": 245565 + }, + { + "epoch": 0.58, + "grad_norm": 2.40625, + "learning_rate": 0.00016155050029761258, + "loss": 2.1079, + "step": 245570 + }, + { + "epoch": 0.58, + "grad_norm": 3.3125, + "learning_rate": 0.00016154904358763812, + "loss": 2.2481, + "step": 245575 + }, + { + "epoch": 0.58, + "grad_norm": 1.828125, + "learning_rate": 0.00016154758685663735, + "loss": 2.1309, + "step": 245580 + }, + { + "epoch": 0.58, + "grad_norm": 1.890625, + "learning_rate": 0.00016154613010461086, + "loss": 2.1996, + "step": 245585 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016154467333155906, + "loss": 2.1353, + "step": 245590 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.0001615432165374825, + "loss": 2.1509, + "step": 245595 + }, + { + "epoch": 0.58, + "grad_norm": 2.6875, + "learning_rate": 0.00016154175972238165, + "loss": 2.1992, + "step": 245600 + }, + { + "epoch": 0.58, + "grad_norm": 2.46875, + "learning_rate": 0.00016154030288625704, + "loss": 2.0396, + "step": 245605 + }, + { + "epoch": 0.58, + "grad_norm": 2.625, + "learning_rate": 0.00016153884602910913, + "loss": 2.1624, + "step": 245610 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016153738915093842, + "loss": 1.9738, + "step": 245615 + }, + { + "epoch": 0.58, + "grad_norm": 2.515625, + "learning_rate": 0.00016153593225174545, + "loss": 2.167, + "step": 245620 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.0001615344753315307, + "loss": 2.1681, + "step": 245625 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016153301839029463, + "loss": 2.1329, + "step": 245630 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016153156142803776, + "loss": 2.1263, + "step": 245635 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016153010444476063, + "loss": 2.1456, + "step": 245640 + }, + { + "epoch": 0.58, + "grad_norm": 2.46875, + "learning_rate": 0.00016152864744046363, + "loss": 2.1248, + "step": 245645 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.00016152719041514737, + "loss": 2.0554, + "step": 245650 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.0001615257333688123, + "loss": 2.0264, + "step": 245655 + }, + { + "epoch": 0.58, + "grad_norm": 2.890625, + "learning_rate": 0.00016152427630145897, + "loss": 2.085, + "step": 245660 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.0001615228192130878, + "loss": 1.9379, + "step": 245665 + }, + { + "epoch": 0.58, + "grad_norm": 1.765625, + "learning_rate": 0.0001615213621036993, + "loss": 2.1146, + "step": 245670 + }, + { + "epoch": 0.58, + "grad_norm": 2.484375, + "learning_rate": 0.00016151990497329397, + "loss": 2.0741, + "step": 245675 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016151844782187237, + "loss": 2.1385, + "step": 245680 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.00016151699064943494, + "loss": 1.9357, + "step": 245685 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.00016151553345598217, + "loss": 2.044, + "step": 245690 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.0001615140762415146, + "loss": 1.9619, + "step": 245695 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.0001615126190060327, + "loss": 2.0911, + "step": 245700 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.00016151116174953697, + "loss": 2.1128, + "step": 245705 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016150970447202791, + "loss": 1.9678, + "step": 245710 + }, + { + "epoch": 0.58, + "grad_norm": 1.9609375, + "learning_rate": 0.00016150824717350607, + "loss": 2.1697, + "step": 245715 + }, + { + "epoch": 0.58, + "grad_norm": 2.453125, + "learning_rate": 0.00016150678985397184, + "loss": 2.0203, + "step": 245720 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016150533251342578, + "loss": 2.1254, + "step": 245725 + }, + { + "epoch": 0.58, + "grad_norm": 1.7890625, + "learning_rate": 0.0001615038751518684, + "loss": 2.1905, + "step": 245730 + }, + { + "epoch": 0.58, + "grad_norm": 1.75, + "learning_rate": 0.00016150241776930016, + "loss": 2.0769, + "step": 245735 + }, + { + "epoch": 0.58, + "grad_norm": 2.40625, + "learning_rate": 0.0001615009603657216, + "loss": 2.1355, + "step": 245740 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016149950294113316, + "loss": 1.9965, + "step": 245745 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.0001614980454955354, + "loss": 2.2588, + "step": 245750 + }, + { + "epoch": 0.58, + "grad_norm": 1.9375, + "learning_rate": 0.00016149658802892879, + "loss": 2.1492, + "step": 245755 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.00016149513054131384, + "loss": 2.1526, + "step": 245760 + }, + { + "epoch": 0.58, + "grad_norm": 1.8984375, + "learning_rate": 0.00016149367303269104, + "loss": 2.0189, + "step": 245765 + }, + { + "epoch": 0.58, + "grad_norm": 1.96875, + "learning_rate": 0.00016149221550306088, + "loss": 2.037, + "step": 245770 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016149075795242386, + "loss": 2.14, + "step": 245775 + }, + { + "epoch": 0.58, + "grad_norm": 1.9921875, + "learning_rate": 0.00016148930038078048, + "loss": 1.9081, + "step": 245780 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016148784278813123, + "loss": 2.0503, + "step": 245785 + }, + { + "epoch": 0.58, + "grad_norm": 1.9140625, + "learning_rate": 0.00016148638517447664, + "loss": 2.0687, + "step": 245790 + }, + { + "epoch": 0.58, + "grad_norm": 2.453125, + "learning_rate": 0.00016148492753981718, + "loss": 2.1589, + "step": 245795 + }, + { + "epoch": 0.58, + "grad_norm": 1.9296875, + "learning_rate": 0.00016148346988415337, + "loss": 2.1751, + "step": 245800 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016148201220748566, + "loss": 2.038, + "step": 245805 + }, + { + "epoch": 0.58, + "grad_norm": 1.9453125, + "learning_rate": 0.0001614805545098146, + "loss": 2.1336, + "step": 245810 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.00016147909679114063, + "loss": 2.1234, + "step": 245815 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.00016147763905146435, + "loss": 1.9703, + "step": 245820 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.00016147618129078615, + "loss": 2.1326, + "step": 245825 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.00016147472350910657, + "loss": 2.2606, + "step": 245830 + }, + { + "epoch": 0.58, + "grad_norm": 2.53125, + "learning_rate": 0.0001614732657064261, + "loss": 2.0488, + "step": 245835 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.0001614718078827453, + "loss": 2.0926, + "step": 245840 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.00016147035003806457, + "loss": 2.1121, + "step": 245845 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.00016146889217238445, + "loss": 2.219, + "step": 245850 + }, + { + "epoch": 0.58, + "grad_norm": 2.53125, + "learning_rate": 0.00016146743428570548, + "loss": 1.9972, + "step": 245855 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016146597637802808, + "loss": 2.1783, + "step": 245860 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.0001614645184493528, + "loss": 2.1919, + "step": 245865 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016146306049968014, + "loss": 2.0654, + "step": 245870 + }, + { + "epoch": 0.58, + "grad_norm": 2.53125, + "learning_rate": 0.0001614616025290106, + "loss": 1.9791, + "step": 245875 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016146014453734462, + "loss": 2.147, + "step": 245880 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016145868652468274, + "loss": 2.0098, + "step": 245885 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001614572284910255, + "loss": 2.0366, + "step": 245890 + }, + { + "epoch": 0.58, + "grad_norm": 1.984375, + "learning_rate": 0.00016145577043637333, + "loss": 2.0296, + "step": 245895 + }, + { + "epoch": 0.58, + "grad_norm": 1.8984375, + "learning_rate": 0.00016145431236072675, + "loss": 2.2234, + "step": 245900 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016145285426408625, + "loss": 2.1973, + "step": 245905 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016145139614645238, + "loss": 2.2995, + "step": 245910 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.00016144993800782555, + "loss": 2.1124, + "step": 245915 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016144847984820635, + "loss": 2.1455, + "step": 245920 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.0001614470216675952, + "loss": 2.187, + "step": 245925 + }, + { + "epoch": 0.58, + "grad_norm": 1.9453125, + "learning_rate": 0.00016144556346599267, + "loss": 2.1805, + "step": 245930 + }, + { + "epoch": 0.58, + "grad_norm": 2.59375, + "learning_rate": 0.0001614441052433992, + "loss": 1.9228, + "step": 245935 + }, + { + "epoch": 0.58, + "grad_norm": 1.921875, + "learning_rate": 0.0001614426469998153, + "loss": 2.0185, + "step": 245940 + }, + { + "epoch": 0.58, + "grad_norm": 2.453125, + "learning_rate": 0.0001614411887352415, + "loss": 2.2561, + "step": 245945 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016143973044967827, + "loss": 2.1862, + "step": 245950 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.0001614382721431261, + "loss": 2.0879, + "step": 245955 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.0001614368138155855, + "loss": 2.1666, + "step": 245960 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.000161435355467057, + "loss": 2.034, + "step": 245965 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016143389709754105, + "loss": 2.1595, + "step": 245970 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016143243870703815, + "loss": 2.115, + "step": 245975 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016143098029554884, + "loss": 2.1472, + "step": 245980 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.00016142952186307358, + "loss": 2.0569, + "step": 245985 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.0001614280634096129, + "loss": 2.0236, + "step": 245990 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016142660493516725, + "loss": 2.0646, + "step": 245995 + }, + { + "epoch": 0.58, + "grad_norm": 1.6640625, + "learning_rate": 0.00016142514643973716, + "loss": 2.1793, + "step": 246000 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016142368792332315, + "loss": 2.122, + "step": 246005 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.0001614222293859257, + "loss": 2.0557, + "step": 246010 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016142077082754528, + "loss": 2.1116, + "step": 246015 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.0001614193122481824, + "loss": 2.2457, + "step": 246020 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.0001614178536478376, + "loss": 2.0383, + "step": 246025 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001614163950265113, + "loss": 2.3221, + "step": 246030 + }, + { + "epoch": 0.58, + "grad_norm": 2.40625, + "learning_rate": 0.00016141493638420409, + "loss": 1.9299, + "step": 246035 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.0001614134777209164, + "loss": 2.1349, + "step": 246040 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016141201903664877, + "loss": 2.2228, + "step": 246045 + }, + { + "epoch": 0.58, + "grad_norm": 1.9375, + "learning_rate": 0.00016141056033140168, + "loss": 2.1134, + "step": 246050 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.0001614091016051756, + "loss": 2.1493, + "step": 246055 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016140764285797111, + "loss": 2.0756, + "step": 246060 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.0001614061840897886, + "loss": 2.1205, + "step": 246065 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.0001614047253006287, + "loss": 2.1553, + "step": 246070 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016140326649049174, + "loss": 2.3397, + "step": 246075 + }, + { + "epoch": 0.58, + "grad_norm": 1.84375, + "learning_rate": 0.00016140180765937835, + "loss": 2.0659, + "step": 246080 + }, + { + "epoch": 0.58, + "grad_norm": 2.390625, + "learning_rate": 0.000161400348807289, + "loss": 2.2222, + "step": 246085 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016139888993422417, + "loss": 2.1499, + "step": 246090 + }, + { + "epoch": 0.58, + "grad_norm": 2.390625, + "learning_rate": 0.00016139743104018437, + "loss": 2.062, + "step": 246095 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016139597212517007, + "loss": 2.0593, + "step": 246100 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016139451318918183, + "loss": 2.1024, + "step": 246105 + }, + { + "epoch": 0.58, + "grad_norm": 2.40625, + "learning_rate": 0.00016139305423222008, + "loss": 2.0931, + "step": 246110 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016139159525428538, + "loss": 2.123, + "step": 246115 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016139013625537818, + "loss": 2.1535, + "step": 246120 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016138867723549898, + "loss": 2.211, + "step": 246125 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.0001613872181946483, + "loss": 2.1362, + "step": 246130 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016138575913282664, + "loss": 2.0803, + "step": 246135 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.0001613843000500345, + "loss": 2.1387, + "step": 246140 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.00016138284094627237, + "loss": 2.126, + "step": 246145 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016138138182154074, + "loss": 2.0505, + "step": 246150 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016137992267584012, + "loss": 2.0681, + "step": 246155 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016137846350917099, + "loss": 2.199, + "step": 246160 + }, + { + "epoch": 0.58, + "grad_norm": 1.9765625, + "learning_rate": 0.0001613770043215339, + "loss": 2.1982, + "step": 246165 + }, + { + "epoch": 0.58, + "grad_norm": 1.875, + "learning_rate": 0.0001613755451129293, + "loss": 1.9706, + "step": 246170 + }, + { + "epoch": 0.58, + "grad_norm": 2.59375, + "learning_rate": 0.00016137408588335768, + "loss": 2.1779, + "step": 246175 + }, + { + "epoch": 0.58, + "grad_norm": 1.9140625, + "learning_rate": 0.00016137262663281957, + "loss": 2.1145, + "step": 246180 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.00016137116736131545, + "loss": 2.0188, + "step": 246185 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.00016136970806884584, + "loss": 2.0496, + "step": 246190 + }, + { + "epoch": 0.58, + "grad_norm": 1.8125, + "learning_rate": 0.0001613682487554112, + "loss": 2.0513, + "step": 246195 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016136678942101207, + "loss": 1.9593, + "step": 246200 + }, + { + "epoch": 0.58, + "grad_norm": 2.515625, + "learning_rate": 0.00016136533006564892, + "loss": 1.9083, + "step": 246205 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016136387068932227, + "loss": 2.0939, + "step": 246210 + }, + { + "epoch": 0.58, + "grad_norm": 1.953125, + "learning_rate": 0.00016136241129203262, + "loss": 2.3025, + "step": 246215 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.00016136095187378047, + "loss": 2.0323, + "step": 246220 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016135949243456627, + "loss": 2.166, + "step": 246225 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.0001613580329743906, + "loss": 2.0957, + "step": 246230 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016135657349325387, + "loss": 2.1152, + "step": 246235 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016135511399115662, + "loss": 2.2261, + "step": 246240 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.00016135365446809936, + "loss": 2.1811, + "step": 246245 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.0001613521949240826, + "loss": 2.2211, + "step": 246250 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.0001613507353591068, + "loss": 2.1396, + "step": 246255 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.00016134927577317248, + "loss": 2.1177, + "step": 246260 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016134781616628012, + "loss": 2.1363, + "step": 246265 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016134635653843025, + "loss": 2.2313, + "step": 246270 + }, + { + "epoch": 0.58, + "grad_norm": 1.9921875, + "learning_rate": 0.00016134489688962337, + "loss": 2.1739, + "step": 246275 + }, + { + "epoch": 0.58, + "grad_norm": 2.59375, + "learning_rate": 0.00016134343721985994, + "loss": 1.9735, + "step": 246280 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016134197752914046, + "loss": 2.0005, + "step": 246285 + }, + { + "epoch": 0.58, + "grad_norm": 2.59375, + "learning_rate": 0.00016134051781746544, + "loss": 2.329, + "step": 246290 + }, + { + "epoch": 0.58, + "grad_norm": 1.7578125, + "learning_rate": 0.00016133905808483543, + "loss": 2.2063, + "step": 246295 + }, + { + "epoch": 0.58, + "grad_norm": 2.40625, + "learning_rate": 0.0001613375983312509, + "loss": 2.0296, + "step": 246300 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.0001613361385567123, + "loss": 2.1675, + "step": 246305 + }, + { + "epoch": 0.58, + "grad_norm": 1.9609375, + "learning_rate": 0.00016133467876122015, + "loss": 2.0395, + "step": 246310 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016133321894477498, + "loss": 2.1656, + "step": 246315 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016133175910737726, + "loss": 1.9797, + "step": 246320 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.0001613302992490275, + "loss": 2.1696, + "step": 246325 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.0001613288393697262, + "loss": 2.0304, + "step": 246330 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.00016132737946947384, + "loss": 2.095, + "step": 246335 + }, + { + "epoch": 0.58, + "grad_norm": 1.9453125, + "learning_rate": 0.00016132591954827097, + "loss": 2.0681, + "step": 246340 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016132445960611805, + "loss": 2.0584, + "step": 246345 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016132299964301557, + "loss": 2.0948, + "step": 246350 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.00016132153965896405, + "loss": 1.9852, + "step": 246355 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016132007965396396, + "loss": 2.0895, + "step": 246360 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.0001613186196280158, + "loss": 1.9703, + "step": 246365 + }, + { + "epoch": 0.58, + "grad_norm": 2.546875, + "learning_rate": 0.00016131715958112012, + "loss": 1.9652, + "step": 246370 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016131569951327742, + "loss": 2.1219, + "step": 246375 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016131423942448812, + "loss": 2.0528, + "step": 246380 + }, + { + "epoch": 0.58, + "grad_norm": 1.953125, + "learning_rate": 0.00016131277931475278, + "loss": 2.0828, + "step": 246385 + }, + { + "epoch": 0.58, + "grad_norm": 1.8046875, + "learning_rate": 0.00016131131918407186, + "loss": 2.1399, + "step": 246390 + }, + { + "epoch": 0.58, + "grad_norm": 1.828125, + "learning_rate": 0.0001613098590324459, + "loss": 2.0998, + "step": 246395 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016130839885987537, + "loss": 2.1228, + "step": 246400 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016130693866636076, + "loss": 1.9298, + "step": 246405 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016130547845190262, + "loss": 2.0454, + "step": 246410 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016130401821650144, + "loss": 2.0709, + "step": 246415 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016130255796015765, + "loss": 2.0569, + "step": 246420 + }, + { + "epoch": 0.58, + "grad_norm": 2.921875, + "learning_rate": 0.0001613010976828718, + "loss": 2.2877, + "step": 246425 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.0001612996373846444, + "loss": 2.0293, + "step": 246430 + }, + { + "epoch": 0.58, + "grad_norm": 1.859375, + "learning_rate": 0.00016129817706547593, + "loss": 2.0176, + "step": 246435 + }, + { + "epoch": 0.58, + "grad_norm": 1.890625, + "learning_rate": 0.0001612967167253669, + "loss": 2.0852, + "step": 246440 + }, + { + "epoch": 0.58, + "grad_norm": 2.484375, + "learning_rate": 0.0001612952563643178, + "loss": 2.1889, + "step": 246445 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016129379598232908, + "loss": 1.9743, + "step": 246450 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016129233557940134, + "loss": 2.1706, + "step": 246455 + }, + { + "epoch": 0.58, + "grad_norm": 1.9140625, + "learning_rate": 0.000161290875155535, + "loss": 2.1751, + "step": 246460 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.0001612894147107306, + "loss": 1.9603, + "step": 246465 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.0001612879542449886, + "loss": 2.0959, + "step": 246470 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016128649375830956, + "loss": 2.0088, + "step": 246475 + }, + { + "epoch": 0.58, + "grad_norm": 1.96875, + "learning_rate": 0.00016128503325069392, + "loss": 2.0685, + "step": 246480 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.0001612835727221422, + "loss": 2.0779, + "step": 246485 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016128211217265493, + "loss": 2.1524, + "step": 246490 + }, + { + "epoch": 0.58, + "grad_norm": 1.890625, + "learning_rate": 0.00016128065160223254, + "loss": 2.0307, + "step": 246495 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016127919101087559, + "loss": 2.0501, + "step": 246500 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016127773039858453, + "loss": 2.0376, + "step": 246505 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016127626976535992, + "loss": 2.2103, + "step": 246510 + }, + { + "epoch": 0.58, + "grad_norm": 3.65625, + "learning_rate": 0.0001612748091112022, + "loss": 2.264, + "step": 246515 + }, + { + "epoch": 0.58, + "grad_norm": 1.96875, + "learning_rate": 0.00016127334843611194, + "loss": 2.0401, + "step": 246520 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.00016127188774008955, + "loss": 2.0886, + "step": 246525 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.00016127042702313555, + "loss": 2.1917, + "step": 246530 + }, + { + "epoch": 0.58, + "grad_norm": 1.921875, + "learning_rate": 0.0001612689662852505, + "loss": 2.1701, + "step": 246535 + }, + { + "epoch": 0.58, + "grad_norm": 2.5625, + "learning_rate": 0.00016126750552643485, + "loss": 2.1387, + "step": 246540 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016126604474668913, + "loss": 2.1697, + "step": 246545 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016126458394601377, + "loss": 2.182, + "step": 246550 + }, + { + "epoch": 0.58, + "grad_norm": 2.671875, + "learning_rate": 0.00016126312312440935, + "loss": 2.3505, + "step": 246555 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.00016126166228187634, + "loss": 2.2176, + "step": 246560 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016126020141841522, + "loss": 2.0826, + "step": 246565 + }, + { + "epoch": 0.58, + "grad_norm": 1.9765625, + "learning_rate": 0.0001612587405340265, + "loss": 2.1347, + "step": 246570 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016125727962871072, + "loss": 2.2283, + "step": 246575 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.0001612558187024683, + "loss": 2.108, + "step": 246580 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.0001612543577552998, + "loss": 2.1674, + "step": 246585 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.0001612528967872057, + "loss": 2.1882, + "step": 246590 + }, + { + "epoch": 0.58, + "grad_norm": 1.953125, + "learning_rate": 0.0001612514357981865, + "loss": 2.1483, + "step": 246595 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.0001612499747882427, + "loss": 2.0994, + "step": 246600 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.0001612485137573748, + "loss": 2.1155, + "step": 246605 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.00016124705270558328, + "loss": 2.0118, + "step": 246610 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016124559163286866, + "loss": 2.0018, + "step": 246615 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016124413053923142, + "loss": 2.1728, + "step": 246620 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016124266942467213, + "loss": 2.0487, + "step": 246625 + }, + { + "epoch": 0.58, + "grad_norm": 1.9765625, + "learning_rate": 0.00016124120828919122, + "loss": 2.0021, + "step": 246630 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016123974713278915, + "loss": 2.0051, + "step": 246635 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016123828595546652, + "loss": 1.9951, + "step": 246640 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016123682475722374, + "loss": 2.3749, + "step": 246645 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016123536353806136, + "loss": 2.3174, + "step": 246650 + }, + { + "epoch": 0.58, + "grad_norm": 1.796875, + "learning_rate": 0.0001612339022979799, + "loss": 2.1442, + "step": 246655 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016123244103697984, + "loss": 2.1237, + "step": 246660 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.0001612309797550616, + "loss": 2.0505, + "step": 246665 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.0001612295184522258, + "loss": 2.1114, + "step": 246670 + }, + { + "epoch": 0.58, + "grad_norm": 1.9453125, + "learning_rate": 0.00016122805712847287, + "loss": 2.2151, + "step": 246675 + }, + { + "epoch": 0.58, + "grad_norm": 2.65625, + "learning_rate": 0.00016122659578380333, + "loss": 1.9324, + "step": 246680 + }, + { + "epoch": 0.58, + "grad_norm": 1.828125, + "learning_rate": 0.00016122513441821766, + "loss": 2.081, + "step": 246685 + }, + { + "epoch": 0.58, + "grad_norm": 1.90625, + "learning_rate": 0.0001612236730317164, + "loss": 1.9975, + "step": 246690 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.0001612222116243, + "loss": 2.2418, + "step": 246695 + }, + { + "epoch": 0.58, + "grad_norm": 2.46875, + "learning_rate": 0.00016122075019596902, + "loss": 2.1506, + "step": 246700 + }, + { + "epoch": 0.58, + "grad_norm": 1.90625, + "learning_rate": 0.00016121928874672387, + "loss": 2.1867, + "step": 246705 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.0001612178272765651, + "loss": 2.1026, + "step": 246710 + }, + { + "epoch": 0.58, + "grad_norm": 1.703125, + "learning_rate": 0.00016121636578549325, + "loss": 2.2934, + "step": 246715 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016121490427350877, + "loss": 2.139, + "step": 246720 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016121344274061216, + "loss": 2.0483, + "step": 246725 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016121198118680395, + "loss": 2.1374, + "step": 246730 + }, + { + "epoch": 0.58, + "grad_norm": 2.484375, + "learning_rate": 0.00016121051961208457, + "loss": 2.0794, + "step": 246735 + }, + { + "epoch": 0.58, + "grad_norm": 2.484375, + "learning_rate": 0.00016120905801645462, + "loss": 2.2831, + "step": 246740 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.0001612075963999145, + "loss": 2.0268, + "step": 246745 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.00016120613476246475, + "loss": 1.8638, + "step": 246750 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016120467310410592, + "loss": 2.1987, + "step": 246755 + }, + { + "epoch": 0.58, + "grad_norm": 1.78125, + "learning_rate": 0.00016120321142483845, + "loss": 1.9756, + "step": 246760 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016120174972466284, + "loss": 2.2819, + "step": 246765 + }, + { + "epoch": 0.58, + "grad_norm": 1.9765625, + "learning_rate": 0.0001612002880035796, + "loss": 1.9758, + "step": 246770 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016119882626158924, + "loss": 2.0384, + "step": 246775 + }, + { + "epoch": 0.58, + "grad_norm": 3.109375, + "learning_rate": 0.00016119736449869224, + "loss": 2.214, + "step": 246780 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016119590271488913, + "loss": 2.1647, + "step": 246785 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016119444091018038, + "loss": 2.0669, + "step": 246790 + }, + { + "epoch": 0.58, + "grad_norm": 1.6953125, + "learning_rate": 0.00016119297908456648, + "loss": 1.9754, + "step": 246795 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016119151723804797, + "loss": 2.1502, + "step": 246800 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016119005537062531, + "loss": 2.2444, + "step": 246805 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016118859348229907, + "loss": 2.1128, + "step": 246810 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016118713157306966, + "loss": 2.0786, + "step": 246815 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016118566964293759, + "loss": 2.0698, + "step": 246820 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016118420769190342, + "loss": 2.0863, + "step": 246825 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.0001611827457199676, + "loss": 2.1351, + "step": 246830 + }, + { + "epoch": 0.58, + "grad_norm": 1.984375, + "learning_rate": 0.00016118128372713067, + "loss": 2.2094, + "step": 246835 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016117982171339309, + "loss": 1.9099, + "step": 246840 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.00016117835967875536, + "loss": 1.9199, + "step": 246845 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016117689762321799, + "loss": 2.2062, + "step": 246850 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.0001611754355467815, + "loss": 2.1393, + "step": 246855 + }, + { + "epoch": 0.58, + "grad_norm": 1.96875, + "learning_rate": 0.00016117397344944635, + "loss": 2.2367, + "step": 246860 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016117251133121308, + "loss": 2.0782, + "step": 246865 + }, + { + "epoch": 0.58, + "grad_norm": 2.53125, + "learning_rate": 0.0001611710491920822, + "loss": 2.1023, + "step": 246870 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.00016116958703205414, + "loss": 2.1894, + "step": 246875 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016116812485112943, + "loss": 2.1254, + "step": 246880 + }, + { + "epoch": 0.58, + "grad_norm": 1.7734375, + "learning_rate": 0.0001611666626493086, + "loss": 2.0242, + "step": 246885 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.00016116520042659213, + "loss": 2.2314, + "step": 246890 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.00016116373818298048, + "loss": 2.1352, + "step": 246895 + }, + { + "epoch": 0.58, + "grad_norm": 2.53125, + "learning_rate": 0.00016116227591847423, + "loss": 2.2931, + "step": 246900 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.00016116081363307382, + "loss": 2.1227, + "step": 246905 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016115935132677978, + "loss": 2.0837, + "step": 246910 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.0001611578889995926, + "loss": 2.0367, + "step": 246915 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016115642665151278, + "loss": 2.0638, + "step": 246920 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.0001611549642825408, + "loss": 2.2167, + "step": 246925 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016115350189267716, + "loss": 2.2555, + "step": 246930 + }, + { + "epoch": 0.58, + "grad_norm": 1.578125, + "learning_rate": 0.00016115203948192242, + "loss": 2.1266, + "step": 246935 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.000161150577050277, + "loss": 2.147, + "step": 246940 + }, + { + "epoch": 0.58, + "grad_norm": 1.7890625, + "learning_rate": 0.00016114911459774142, + "loss": 2.1691, + "step": 246945 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001611476521243162, + "loss": 2.3334, + "step": 246950 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016114618963000186, + "loss": 2.2862, + "step": 246955 + }, + { + "epoch": 0.58, + "grad_norm": 1.9921875, + "learning_rate": 0.00016114472711479887, + "loss": 2.0922, + "step": 246960 + }, + { + "epoch": 0.58, + "grad_norm": 2.796875, + "learning_rate": 0.00016114326457870772, + "loss": 1.987, + "step": 246965 + }, + { + "epoch": 0.58, + "grad_norm": 2.515625, + "learning_rate": 0.00016114180202172893, + "loss": 2.0348, + "step": 246970 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016114033944386298, + "loss": 2.0774, + "step": 246975 + }, + { + "epoch": 0.58, + "grad_norm": 1.8671875, + "learning_rate": 0.0001611388768451104, + "loss": 2.2096, + "step": 246980 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.00016113741422547163, + "loss": 2.0124, + "step": 246985 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016113595158494723, + "loss": 2.1586, + "step": 246990 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001611344889235377, + "loss": 2.0891, + "step": 246995 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016113302624124352, + "loss": 2.1283, + "step": 247000 + }, + { + "epoch": 0.58, + "grad_norm": 1.90625, + "learning_rate": 0.00016113156353806518, + "loss": 2.165, + "step": 247005 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.00016113010081400317, + "loss": 2.1124, + "step": 247010 + }, + { + "epoch": 0.58, + "grad_norm": 3.109375, + "learning_rate": 0.00016112863806905803, + "loss": 2.0618, + "step": 247015 + }, + { + "epoch": 0.58, + "grad_norm": 2.59375, + "learning_rate": 0.00016112717530323022, + "loss": 2.0798, + "step": 247020 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016112571251652029, + "loss": 2.045, + "step": 247025 + }, + { + "epoch": 0.58, + "grad_norm": 1.875, + "learning_rate": 0.0001611242497089287, + "loss": 2.0983, + "step": 247030 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016112278688045596, + "loss": 1.8506, + "step": 247035 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.0001611213240311025, + "loss": 2.1618, + "step": 247040 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.000161119861160869, + "loss": 2.2042, + "step": 247045 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016111839826975577, + "loss": 2.0715, + "step": 247050 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016111693535776343, + "loss": 2.0455, + "step": 247055 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.0001611154724248924, + "loss": 1.9512, + "step": 247060 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.0001611140094711432, + "loss": 2.0474, + "step": 247065 + }, + { + "epoch": 0.58, + "grad_norm": 2.5, + "learning_rate": 0.0001611125464965164, + "loss": 2.0633, + "step": 247070 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016111108350101242, + "loss": 2.0703, + "step": 247075 + }, + { + "epoch": 0.58, + "grad_norm": 1.9296875, + "learning_rate": 0.0001611096204846318, + "loss": 1.9434, + "step": 247080 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.000161108157447375, + "loss": 2.0264, + "step": 247085 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016110669438924253, + "loss": 2.068, + "step": 247090 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016110523131023495, + "loss": 2.0439, + "step": 247095 + }, + { + "epoch": 0.58, + "grad_norm": 1.5, + "learning_rate": 0.0001611037682103527, + "loss": 2.0038, + "step": 247100 + }, + { + "epoch": 0.58, + "grad_norm": 2.484375, + "learning_rate": 0.0001611023050895963, + "loss": 1.8301, + "step": 247105 + }, + { + "epoch": 0.58, + "grad_norm": 2.5, + "learning_rate": 0.00016110084194796622, + "loss": 2.1414, + "step": 247110 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.000161099378785463, + "loss": 2.0813, + "step": 247115 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016109791560208711, + "loss": 2.0878, + "step": 247120 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.0001610964523978391, + "loss": 2.4067, + "step": 247125 + }, + { + "epoch": 0.58, + "grad_norm": 1.890625, + "learning_rate": 0.0001610949891727194, + "loss": 1.9606, + "step": 247130 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016109352592672858, + "loss": 2.2623, + "step": 247135 + }, + { + "epoch": 0.58, + "grad_norm": 2.5625, + "learning_rate": 0.00016109206265986706, + "loss": 2.0056, + "step": 247140 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.0001610905993721354, + "loss": 2.1425, + "step": 247145 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.00016108913606353409, + "loss": 2.0653, + "step": 247150 + }, + { + "epoch": 0.58, + "grad_norm": 3.25, + "learning_rate": 0.0001610876727340636, + "loss": 2.0839, + "step": 247155 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.0001610862093837245, + "loss": 1.9244, + "step": 247160 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.0001610847460125172, + "loss": 2.0724, + "step": 247165 + }, + { + "epoch": 0.58, + "grad_norm": 1.953125, + "learning_rate": 0.00016108328262044226, + "loss": 2.1587, + "step": 247170 + }, + { + "epoch": 0.58, + "grad_norm": 1.7734375, + "learning_rate": 0.00016108181920750016, + "loss": 2.1854, + "step": 247175 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.00016108035577369143, + "loss": 2.0331, + "step": 247180 + }, + { + "epoch": 0.58, + "grad_norm": 1.828125, + "learning_rate": 0.0001610788923190165, + "loss": 1.9785, + "step": 247185 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016107742884347595, + "loss": 2.184, + "step": 247190 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.0001610759653470702, + "loss": 2.1114, + "step": 247195 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.0001610745018297998, + "loss": 2.1486, + "step": 247200 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.00016107303829166527, + "loss": 2.0008, + "step": 247205 + }, + { + "epoch": 0.58, + "grad_norm": 1.78125, + "learning_rate": 0.00016107157473266707, + "loss": 1.8922, + "step": 247210 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016107011115280574, + "loss": 2.2083, + "step": 247215 + }, + { + "epoch": 0.58, + "grad_norm": 1.90625, + "learning_rate": 0.0001610686475520817, + "loss": 1.9015, + "step": 247220 + }, + { + "epoch": 0.58, + "grad_norm": 1.8984375, + "learning_rate": 0.00016106718393049554, + "loss": 2.0233, + "step": 247225 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016106572028804766, + "loss": 2.0449, + "step": 247230 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016106425662473873, + "loss": 2.173, + "step": 247235 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016106279294056906, + "loss": 2.0917, + "step": 247240 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016106132923553927, + "loss": 1.993, + "step": 247245 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.0001610598655096498, + "loss": 2.1475, + "step": 247250 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.00016105840176290116, + "loss": 1.7746, + "step": 247255 + }, + { + "epoch": 0.58, + "grad_norm": 2.40625, + "learning_rate": 0.00016105693799529387, + "loss": 2.1879, + "step": 247260 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.00016105547420682846, + "loss": 2.2091, + "step": 247265 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.0001610540103975054, + "loss": 2.1778, + "step": 247270 + }, + { + "epoch": 0.58, + "grad_norm": 1.8359375, + "learning_rate": 0.00016105254656732513, + "loss": 2.035, + "step": 247275 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016105108271628825, + "loss": 2.0866, + "step": 247280 + }, + { + "epoch": 0.58, + "grad_norm": 2.609375, + "learning_rate": 0.00016104961884439516, + "loss": 2.256, + "step": 247285 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016104815495164644, + "loss": 2.0897, + "step": 247290 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016104669103804257, + "loss": 2.173, + "step": 247295 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016104522710358405, + "loss": 2.1617, + "step": 247300 + }, + { + "epoch": 0.58, + "grad_norm": 1.7265625, + "learning_rate": 0.00016104376314827134, + "loss": 1.9677, + "step": 247305 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.000161042299172105, + "loss": 2.3106, + "step": 247310 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.0001610408351750855, + "loss": 2.1628, + "step": 247315 + }, + { + "epoch": 0.58, + "grad_norm": 1.71875, + "learning_rate": 0.00016103937115721334, + "loss": 1.9631, + "step": 247320 + }, + { + "epoch": 0.58, + "grad_norm": 2.40625, + "learning_rate": 0.00016103790711848902, + "loss": 2.0536, + "step": 247325 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.00016103644305891308, + "loss": 2.2338, + "step": 247330 + }, + { + "epoch": 0.58, + "grad_norm": 1.8125, + "learning_rate": 0.00016103497897848593, + "loss": 2.0575, + "step": 247335 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.00016103351487720816, + "loss": 1.9867, + "step": 247340 + }, + { + "epoch": 0.58, + "grad_norm": 1.6875, + "learning_rate": 0.0001610320507550802, + "loss": 1.9875, + "step": 247345 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016103058661210264, + "loss": 2.0641, + "step": 247350 + }, + { + "epoch": 0.58, + "grad_norm": 1.984375, + "learning_rate": 0.0001610291224482759, + "loss": 2.0937, + "step": 247355 + }, + { + "epoch": 0.58, + "grad_norm": 1.921875, + "learning_rate": 0.00016102765826360048, + "loss": 1.9886, + "step": 247360 + }, + { + "epoch": 0.58, + "grad_norm": 2.546875, + "learning_rate": 0.0001610261940580769, + "loss": 2.0352, + "step": 247365 + }, + { + "epoch": 0.58, + "grad_norm": 2.390625, + "learning_rate": 0.00016102472983170569, + "loss": 2.2285, + "step": 247370 + }, + { + "epoch": 0.58, + "grad_norm": 1.484375, + "learning_rate": 0.00016102326558448734, + "loss": 1.9859, + "step": 247375 + }, + { + "epoch": 0.58, + "grad_norm": 1.7421875, + "learning_rate": 0.0001610218013164223, + "loss": 2.1853, + "step": 247380 + }, + { + "epoch": 0.58, + "grad_norm": 1.65625, + "learning_rate": 0.00016102033702751112, + "loss": 1.9668, + "step": 247385 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.00016101887271775431, + "loss": 2.0374, + "step": 247390 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016101740838715234, + "loss": 2.2322, + "step": 247395 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.00016101594403570568, + "loss": 2.2458, + "step": 247400 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.0001610144796634149, + "loss": 2.0434, + "step": 247405 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016101301527028045, + "loss": 2.112, + "step": 247410 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016101155085630284, + "loss": 2.1233, + "step": 247415 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.0001610100864214826, + "loss": 2.1014, + "step": 247420 + }, + { + "epoch": 0.58, + "grad_norm": 2.90625, + "learning_rate": 0.0001610086219658202, + "loss": 2.118, + "step": 247425 + }, + { + "epoch": 0.58, + "grad_norm": 1.953125, + "learning_rate": 0.00016100715748931617, + "loss": 2.2006, + "step": 247430 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016100569299197094, + "loss": 2.1531, + "step": 247435 + }, + { + "epoch": 0.58, + "grad_norm": 1.90625, + "learning_rate": 0.0001610042284737851, + "loss": 2.1201, + "step": 247440 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.0001610027639347591, + "loss": 2.1494, + "step": 247445 + }, + { + "epoch": 0.58, + "grad_norm": 1.90625, + "learning_rate": 0.00016100129937489345, + "loss": 2.0908, + "step": 247450 + }, + { + "epoch": 0.58, + "grad_norm": 2.484375, + "learning_rate": 0.00016099983479418866, + "loss": 2.2122, + "step": 247455 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016099837019264518, + "loss": 2.1997, + "step": 247460 + }, + { + "epoch": 0.58, + "grad_norm": 1.96875, + "learning_rate": 0.00016099690557026358, + "loss": 2.1294, + "step": 247465 + }, + { + "epoch": 0.58, + "grad_norm": 1.96875, + "learning_rate": 0.0001609954409270443, + "loss": 2.2915, + "step": 247470 + }, + { + "epoch": 0.58, + "grad_norm": 1.9296875, + "learning_rate": 0.0001609939762629879, + "loss": 2.1293, + "step": 247475 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016099251157809487, + "loss": 2.0805, + "step": 247480 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016099104687236567, + "loss": 2.043, + "step": 247485 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016098958214580083, + "loss": 1.8556, + "step": 247490 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016098811739840083, + "loss": 1.9687, + "step": 247495 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.0001609866526301662, + "loss": 2.0977, + "step": 247500 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016098518784109743, + "loss": 2.0405, + "step": 247505 + }, + { + "epoch": 0.58, + "grad_norm": 1.8671875, + "learning_rate": 0.000160983723031195, + "loss": 2.1969, + "step": 247510 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016098225820045938, + "loss": 2.1512, + "step": 247515 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016098079334889116, + "loss": 2.01, + "step": 247520 + }, + { + "epoch": 0.58, + "grad_norm": 2.484375, + "learning_rate": 0.0001609793284764908, + "loss": 1.9659, + "step": 247525 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.0001609778635832588, + "loss": 2.132, + "step": 247530 + }, + { + "epoch": 0.58, + "grad_norm": 1.90625, + "learning_rate": 0.00016097639866919562, + "loss": 1.9157, + "step": 247535 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016097493373430183, + "loss": 2.0721, + "step": 247540 + }, + { + "epoch": 0.58, + "grad_norm": 1.953125, + "learning_rate": 0.00016097346877857785, + "loss": 2.0258, + "step": 247545 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.0001609720038020243, + "loss": 2.1161, + "step": 247550 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016097053880464155, + "loss": 1.985, + "step": 247555 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.00016096907378643018, + "loss": 1.9653, + "step": 247560 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016096760874739066, + "loss": 2.102, + "step": 247565 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001609661436875235, + "loss": 2.277, + "step": 247570 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.00016096467860682922, + "loss": 2.1286, + "step": 247575 + }, + { + "epoch": 0.58, + "grad_norm": 2.609375, + "learning_rate": 0.0001609632135053083, + "loss": 2.0481, + "step": 247580 + }, + { + "epoch": 0.58, + "grad_norm": 1.953125, + "learning_rate": 0.0001609617483829612, + "loss": 2.106, + "step": 247585 + }, + { + "epoch": 0.58, + "grad_norm": 1.96875, + "learning_rate": 0.0001609602832397885, + "loss": 1.9815, + "step": 247590 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016095881807579065, + "loss": 2.13, + "step": 247595 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.00016095735289096817, + "loss": 2.2701, + "step": 247600 + }, + { + "epoch": 0.58, + "grad_norm": 1.8125, + "learning_rate": 0.00016095588768532152, + "loss": 1.9719, + "step": 247605 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016095442245885129, + "loss": 2.1195, + "step": 247610 + }, + { + "epoch": 0.58, + "grad_norm": 2.0, + "learning_rate": 0.0001609529572115579, + "loss": 2.1945, + "step": 247615 + }, + { + "epoch": 0.58, + "grad_norm": 2.5625, + "learning_rate": 0.00016095149194344185, + "loss": 1.8866, + "step": 247620 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.0001609500266545037, + "loss": 2.0452, + "step": 247625 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016094856134474386, + "loss": 2.1501, + "step": 247630 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016094709601416294, + "loss": 2.1752, + "step": 247635 + }, + { + "epoch": 0.58, + "grad_norm": 1.7265625, + "learning_rate": 0.00016094563066276134, + "loss": 1.9725, + "step": 247640 + }, + { + "epoch": 0.58, + "grad_norm": 2.40625, + "learning_rate": 0.00016094416529053966, + "loss": 1.7923, + "step": 247645 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016094269989749832, + "loss": 2.1688, + "step": 247650 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016094123448363786, + "loss": 2.1611, + "step": 247655 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016093976904895874, + "loss": 2.2568, + "step": 247660 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016093830359346152, + "loss": 2.1338, + "step": 247665 + }, + { + "epoch": 0.58, + "grad_norm": 2.546875, + "learning_rate": 0.00016093683811714665, + "loss": 1.9022, + "step": 247670 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.00016093537262001467, + "loss": 2.0523, + "step": 247675 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016093390710206603, + "loss": 2.003, + "step": 247680 + }, + { + "epoch": 0.58, + "grad_norm": 2.59375, + "learning_rate": 0.0001609324415633013, + "loss": 2.0054, + "step": 247685 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016093097600372092, + "loss": 2.2685, + "step": 247690 + }, + { + "epoch": 0.58, + "grad_norm": 1.9375, + "learning_rate": 0.00016092951042332544, + "loss": 2.0196, + "step": 247695 + }, + { + "epoch": 0.58, + "grad_norm": 1.7890625, + "learning_rate": 0.00016092804482211533, + "loss": 2.1148, + "step": 247700 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016092657920009108, + "loss": 2.075, + "step": 247705 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.0001609251135572532, + "loss": 2.1215, + "step": 247710 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.0001609236478936022, + "loss": 2.0938, + "step": 247715 + }, + { + "epoch": 0.58, + "grad_norm": 2.609375, + "learning_rate": 0.0001609221822091386, + "loss": 2.0825, + "step": 247720 + }, + { + "epoch": 0.58, + "grad_norm": 7.1875, + "learning_rate": 0.00016092071650386286, + "loss": 2.3089, + "step": 247725 + }, + { + "epoch": 0.58, + "grad_norm": 1.8515625, + "learning_rate": 0.00016091925077777548, + "loss": 1.9791, + "step": 247730 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.000160917785030877, + "loss": 2.0771, + "step": 247735 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.0001609163192631679, + "loss": 2.0777, + "step": 247740 + }, + { + "epoch": 0.58, + "grad_norm": 1.890625, + "learning_rate": 0.00016091485347464866, + "loss": 2.0776, + "step": 247745 + }, + { + "epoch": 0.58, + "grad_norm": 1.8359375, + "learning_rate": 0.00016091338766531982, + "loss": 2.0682, + "step": 247750 + }, + { + "epoch": 0.58, + "grad_norm": 1.8125, + "learning_rate": 0.00016091192183518186, + "loss": 2.0908, + "step": 247755 + }, + { + "epoch": 0.58, + "grad_norm": 1.875, + "learning_rate": 0.0001609104559842353, + "loss": 1.9719, + "step": 247760 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016090899011248058, + "loss": 2.1213, + "step": 247765 + }, + { + "epoch": 0.58, + "grad_norm": 1.8125, + "learning_rate": 0.00016090752421991827, + "loss": 1.9131, + "step": 247770 + }, + { + "epoch": 0.58, + "grad_norm": 2.640625, + "learning_rate": 0.00016090605830654885, + "loss": 2.0582, + "step": 247775 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.0001609045923723728, + "loss": 2.1974, + "step": 247780 + }, + { + "epoch": 0.58, + "grad_norm": 2.609375, + "learning_rate": 0.00016090312641739066, + "loss": 2.1217, + "step": 247785 + }, + { + "epoch": 0.58, + "grad_norm": 2.90625, + "learning_rate": 0.00016090166044160288, + "loss": 2.0225, + "step": 247790 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016090019444501001, + "loss": 2.0883, + "step": 247795 + }, + { + "epoch": 0.58, + "grad_norm": 1.984375, + "learning_rate": 0.00016089872842761253, + "loss": 2.174, + "step": 247800 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016089726238941094, + "loss": 2.1074, + "step": 247805 + }, + { + "epoch": 0.58, + "grad_norm": 1.953125, + "learning_rate": 0.00016089579633040573, + "loss": 2.0583, + "step": 247810 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016089433025059743, + "loss": 1.9963, + "step": 247815 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016089286414998648, + "loss": 2.1518, + "step": 247820 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016089139802857345, + "loss": 2.1605, + "step": 247825 + }, + { + "epoch": 0.58, + "grad_norm": 1.7734375, + "learning_rate": 0.00016088993188635882, + "loss": 2.0261, + "step": 247830 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001608884657233431, + "loss": 2.0979, + "step": 247835 + }, + { + "epoch": 0.58, + "grad_norm": 1.9765625, + "learning_rate": 0.00016088699953952673, + "loss": 2.0194, + "step": 247840 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.00016088553333491027, + "loss": 2.1453, + "step": 247845 + }, + { + "epoch": 0.58, + "grad_norm": 1.984375, + "learning_rate": 0.00016088406710949423, + "loss": 2.011, + "step": 247850 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016088260086327908, + "loss": 2.1975, + "step": 247855 + }, + { + "epoch": 0.58, + "grad_norm": 1.8515625, + "learning_rate": 0.00016088113459626532, + "loss": 1.9943, + "step": 247860 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.0001608796683084535, + "loss": 2.1085, + "step": 247865 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.00016087820199984403, + "loss": 2.2104, + "step": 247870 + }, + { + "epoch": 0.58, + "grad_norm": 1.9296875, + "learning_rate": 0.00016087673567043747, + "loss": 1.9381, + "step": 247875 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.00016087526932023435, + "loss": 1.9957, + "step": 247880 + }, + { + "epoch": 0.58, + "grad_norm": 2.59375, + "learning_rate": 0.0001608738029492351, + "loss": 2.2038, + "step": 247885 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016087233655744028, + "loss": 1.8612, + "step": 247890 + }, + { + "epoch": 0.58, + "grad_norm": 2.40625, + "learning_rate": 0.00016087087014485032, + "loss": 1.9024, + "step": 247895 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.0001608694037114658, + "loss": 1.9625, + "step": 247900 + }, + { + "epoch": 0.58, + "grad_norm": 1.8671875, + "learning_rate": 0.00016086793725728717, + "loss": 1.9925, + "step": 247905 + }, + { + "epoch": 0.58, + "grad_norm": 1.890625, + "learning_rate": 0.00016086647078231497, + "loss": 2.3482, + "step": 247910 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.00016086500428654968, + "loss": 1.9693, + "step": 247915 + }, + { + "epoch": 0.58, + "grad_norm": 1.84375, + "learning_rate": 0.00016086353776999178, + "loss": 2.0291, + "step": 247920 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.0001608620712326418, + "loss": 2.2282, + "step": 247925 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016086060467450025, + "loss": 2.0764, + "step": 247930 + }, + { + "epoch": 0.58, + "grad_norm": 1.9296875, + "learning_rate": 0.0001608591380955676, + "loss": 2.0693, + "step": 247935 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.00016085767149584437, + "loss": 1.9975, + "step": 247940 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016085620487533105, + "loss": 1.9895, + "step": 247945 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.00016085473823402815, + "loss": 1.9593, + "step": 247950 + }, + { + "epoch": 0.58, + "grad_norm": 2.46875, + "learning_rate": 0.00016085327157193614, + "loss": 2.2375, + "step": 247955 + }, + { + "epoch": 0.58, + "grad_norm": 2.53125, + "learning_rate": 0.00016085180488905557, + "loss": 2.1231, + "step": 247960 + }, + { + "epoch": 0.58, + "grad_norm": 1.6875, + "learning_rate": 0.00016085033818538695, + "loss": 1.9745, + "step": 247965 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.00016084887146093074, + "loss": 2.1764, + "step": 247970 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.0001608474047156874, + "loss": 2.2645, + "step": 247975 + }, + { + "epoch": 0.58, + "grad_norm": 1.59375, + "learning_rate": 0.00016084593794965754, + "loss": 2.236, + "step": 247980 + }, + { + "epoch": 0.58, + "grad_norm": 1.75, + "learning_rate": 0.0001608444711628416, + "loss": 1.93, + "step": 247985 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016084300435524008, + "loss": 1.9467, + "step": 247990 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.00016084153752685346, + "loss": 2.0753, + "step": 247995 + }, + { + "epoch": 0.58, + "grad_norm": 2.390625, + "learning_rate": 0.00016084007067768228, + "loss": 2.1193, + "step": 248000 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016083860380772702, + "loss": 2.1874, + "step": 248005 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.0001608371369169882, + "loss": 2.1992, + "step": 248010 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001608356700054663, + "loss": 2.1647, + "step": 248015 + }, + { + "epoch": 0.58, + "grad_norm": 1.9375, + "learning_rate": 0.00016083420307316185, + "loss": 2.0095, + "step": 248020 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016083273612007534, + "loss": 2.0374, + "step": 248025 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016083126914620723, + "loss": 1.9626, + "step": 248030 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.0001608298021515581, + "loss": 2.0285, + "step": 248035 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.0001608283351361284, + "loss": 2.1554, + "step": 248040 + }, + { + "epoch": 0.58, + "grad_norm": 2.171875, + "learning_rate": 0.0001608268680999186, + "loss": 2.0565, + "step": 248045 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016082540104292927, + "loss": 2.0254, + "step": 248050 + }, + { + "epoch": 0.58, + "grad_norm": 1.9140625, + "learning_rate": 0.00016082393396516084, + "loss": 2.0874, + "step": 248055 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.0001608224668666139, + "loss": 2.1613, + "step": 248060 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016082099974728888, + "loss": 2.1105, + "step": 248065 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.0001608195326071863, + "loss": 2.0724, + "step": 248070 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016081806544630668, + "loss": 1.9995, + "step": 248075 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.00016081659826465048, + "loss": 2.0055, + "step": 248080 + }, + { + "epoch": 0.58, + "grad_norm": 2.078125, + "learning_rate": 0.00016081513106221823, + "loss": 2.1348, + "step": 248085 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016081366383901045, + "loss": 2.2742, + "step": 248090 + }, + { + "epoch": 0.58, + "grad_norm": 2.40625, + "learning_rate": 0.0001608121965950276, + "loss": 2.0474, + "step": 248095 + }, + { + "epoch": 0.58, + "grad_norm": 2.21875, + "learning_rate": 0.00016081072933027022, + "loss": 2.1263, + "step": 248100 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016080926204473878, + "loss": 2.1559, + "step": 248105 + }, + { + "epoch": 0.58, + "grad_norm": 2.53125, + "learning_rate": 0.0001608077947384338, + "loss": 2.1588, + "step": 248110 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.00016080632741135578, + "loss": 2.0285, + "step": 248115 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.00016080486006350518, + "loss": 2.1306, + "step": 248120 + }, + { + "epoch": 0.58, + "grad_norm": 1.90625, + "learning_rate": 0.00016080339269488258, + "loss": 2.1592, + "step": 248125 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016080192530548842, + "loss": 2.0407, + "step": 248130 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.00016080045789532323, + "loss": 2.0934, + "step": 248135 + }, + { + "epoch": 0.58, + "grad_norm": 2.578125, + "learning_rate": 0.00016079899046438748, + "loss": 1.9939, + "step": 248140 + }, + { + "epoch": 0.58, + "grad_norm": 1.984375, + "learning_rate": 0.0001607975230126817, + "loss": 1.8946, + "step": 248145 + }, + { + "epoch": 0.58, + "grad_norm": 2.5, + "learning_rate": 0.0001607960555402064, + "loss": 2.1501, + "step": 248150 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016079458804696206, + "loss": 1.8761, + "step": 248155 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016079312053294919, + "loss": 2.1116, + "step": 248160 + }, + { + "epoch": 0.58, + "grad_norm": 1.9921875, + "learning_rate": 0.00016079165299816826, + "loss": 2.207, + "step": 248165 + }, + { + "epoch": 0.58, + "grad_norm": 1.9140625, + "learning_rate": 0.00016079018544261982, + "loss": 2.213, + "step": 248170 + }, + { + "epoch": 0.58, + "grad_norm": 2.359375, + "learning_rate": 0.00016078871786630433, + "loss": 2.1401, + "step": 248175 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016078725026922236, + "loss": 2.1103, + "step": 248180 + }, + { + "epoch": 0.58, + "grad_norm": 1.8984375, + "learning_rate": 0.0001607857826513743, + "loss": 2.1652, + "step": 248185 + }, + { + "epoch": 0.58, + "grad_norm": 1.9453125, + "learning_rate": 0.00016078431501276075, + "loss": 2.1869, + "step": 248190 + }, + { + "epoch": 0.58, + "grad_norm": 1.7265625, + "learning_rate": 0.00016078284735338217, + "loss": 1.9704, + "step": 248195 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.0001607813796732391, + "loss": 2.1672, + "step": 248200 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016077991197233193, + "loss": 2.1337, + "step": 248205 + }, + { + "epoch": 0.58, + "grad_norm": 2.59375, + "learning_rate": 0.00016077844425066128, + "loss": 1.7952, + "step": 248210 + }, + { + "epoch": 0.58, + "grad_norm": 1.8984375, + "learning_rate": 0.00016077697650822764, + "loss": 2.2578, + "step": 248215 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016077550874503145, + "loss": 1.9921, + "step": 248220 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.00016077404096107327, + "loss": 1.9765, + "step": 248225 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016077257315635354, + "loss": 2.0089, + "step": 248230 + }, + { + "epoch": 0.58, + "grad_norm": 1.984375, + "learning_rate": 0.00016077110533087283, + "loss": 1.9699, + "step": 248235 + }, + { + "epoch": 0.58, + "grad_norm": 2.015625, + "learning_rate": 0.00016076963748463158, + "loss": 2.0287, + "step": 248240 + }, + { + "epoch": 0.58, + "grad_norm": 2.515625, + "learning_rate": 0.00016076816961763031, + "loss": 1.9863, + "step": 248245 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016076670172986956, + "loss": 2.0949, + "step": 248250 + }, + { + "epoch": 0.58, + "grad_norm": 2.390625, + "learning_rate": 0.0001607652338213498, + "loss": 2.2847, + "step": 248255 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016076376589207154, + "loss": 2.229, + "step": 248260 + }, + { + "epoch": 0.58, + "grad_norm": 1.96875, + "learning_rate": 0.00016076229794203528, + "loss": 2.177, + "step": 248265 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.0001607608299712415, + "loss": 2.1497, + "step": 248270 + }, + { + "epoch": 0.58, + "grad_norm": 1.8984375, + "learning_rate": 0.00016075936197969073, + "loss": 1.9829, + "step": 248275 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016075789396738344, + "loss": 2.0721, + "step": 248280 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016075642593432017, + "loss": 2.0636, + "step": 248285 + }, + { + "epoch": 0.58, + "grad_norm": 2.3125, + "learning_rate": 0.00016075495788050138, + "loss": 2.2095, + "step": 248290 + }, + { + "epoch": 0.58, + "grad_norm": 1.921875, + "learning_rate": 0.0001607534898059276, + "loss": 2.0251, + "step": 248295 + }, + { + "epoch": 0.58, + "grad_norm": 2.640625, + "learning_rate": 0.00016075202171059936, + "loss": 1.9425, + "step": 248300 + }, + { + "epoch": 0.58, + "grad_norm": 1.8828125, + "learning_rate": 0.00016075055359451713, + "loss": 2.0148, + "step": 248305 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016074908545768137, + "loss": 2.0183, + "step": 248310 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.00016074761730009262, + "loss": 2.1092, + "step": 248315 + }, + { + "epoch": 0.58, + "grad_norm": 1.8828125, + "learning_rate": 0.0001607461491217514, + "loss": 1.9774, + "step": 248320 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016074468092265817, + "loss": 2.1695, + "step": 248325 + }, + { + "epoch": 0.58, + "grad_norm": 1.9609375, + "learning_rate": 0.0001607432127028135, + "loss": 2.2284, + "step": 248330 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.0001607417444622178, + "loss": 2.2511, + "step": 248335 + }, + { + "epoch": 0.58, + "grad_norm": 2.15625, + "learning_rate": 0.00016074027620087167, + "loss": 2.03, + "step": 248340 + }, + { + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 0.00016073880791877553, + "loss": 2.0723, + "step": 248345 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.0001607373396159299, + "loss": 2.1333, + "step": 248350 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.0001607358712923353, + "loss": 2.1428, + "step": 248355 + }, + { + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 0.00016073440294799224, + "loss": 2.0563, + "step": 248360 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016073293458290119, + "loss": 2.1174, + "step": 248365 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.0001607314661970627, + "loss": 2.0692, + "step": 248370 + }, + { + "epoch": 0.58, + "grad_norm": 1.78125, + "learning_rate": 0.00016072999779047722, + "loss": 2.0053, + "step": 248375 + }, + { + "epoch": 0.58, + "grad_norm": 2.453125, + "learning_rate": 0.00016072852936314523, + "loss": 2.0848, + "step": 248380 + }, + { + "epoch": 0.58, + "grad_norm": 3.203125, + "learning_rate": 0.00016072706091506734, + "loss": 2.1394, + "step": 248385 + }, + { + "epoch": 0.58, + "grad_norm": 1.8671875, + "learning_rate": 0.00016072559244624394, + "loss": 1.9962, + "step": 248390 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.0001607241239566756, + "loss": 1.9117, + "step": 248395 + }, + { + "epoch": 0.58, + "grad_norm": 2.265625, + "learning_rate": 0.0001607226554463628, + "loss": 2.2559, + "step": 248400 + }, + { + "epoch": 0.58, + "grad_norm": 1.984375, + "learning_rate": 0.000160721186915306, + "loss": 2.1177, + "step": 248405 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016071971836350576, + "loss": 2.0991, + "step": 248410 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.0001607182497909626, + "loss": 2.1076, + "step": 248415 + }, + { + "epoch": 0.58, + "grad_norm": 2.296875, + "learning_rate": 0.00016071678119767695, + "loss": 2.3141, + "step": 248420 + }, + { + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 0.00016071531258364936, + "loss": 2.1604, + "step": 248425 + }, + { + "epoch": 0.58, + "grad_norm": 1.9765625, + "learning_rate": 0.00016071384394888028, + "loss": 2.2029, + "step": 248430 + }, + { + "epoch": 0.58, + "grad_norm": 1.890625, + "learning_rate": 0.0001607123752933703, + "loss": 2.2167, + "step": 248435 + }, + { + "epoch": 0.58, + "grad_norm": 2.203125, + "learning_rate": 0.00016071090661711986, + "loss": 2.2181, + "step": 248440 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016070943792012945, + "loss": 2.086, + "step": 248445 + }, + { + "epoch": 0.58, + "grad_norm": 2.03125, + "learning_rate": 0.0001607079692023996, + "loss": 2.1083, + "step": 248450 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016070650046393087, + "loss": 2.0791, + "step": 248455 + }, + { + "epoch": 0.58, + "grad_norm": 2.421875, + "learning_rate": 0.00016070503170472362, + "loss": 2.1631, + "step": 248460 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016070356292477843, + "loss": 2.1122, + "step": 248465 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016070209412409584, + "loss": 2.0152, + "step": 248470 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.00016070062530267634, + "loss": 2.086, + "step": 248475 + }, + { + "epoch": 0.58, + "grad_norm": 2.28125, + "learning_rate": 0.00016069915646052034, + "loss": 1.952, + "step": 248480 + }, + { + "epoch": 0.58, + "grad_norm": 2.109375, + "learning_rate": 0.00016069768759762845, + "loss": 2.0803, + "step": 248485 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.0001606962187140011, + "loss": 2.1509, + "step": 248490 + }, + { + "epoch": 0.58, + "grad_norm": 2.390625, + "learning_rate": 0.00016069474980963885, + "loss": 1.9769, + "step": 248495 + }, + { + "epoch": 0.58, + "grad_norm": 1.90625, + "learning_rate": 0.00016069328088454213, + "loss": 1.9989, + "step": 248500 + }, + { + "epoch": 0.58, + "grad_norm": 2.375, + "learning_rate": 0.00016069181193871152, + "loss": 2.0494, + "step": 248505 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.0001606903429721475, + "loss": 2.1036, + "step": 248510 + }, + { + "epoch": 0.58, + "grad_norm": 2.09375, + "learning_rate": 0.00016068887398485053, + "loss": 2.0359, + "step": 248515 + }, + { + "epoch": 0.58, + "grad_norm": 1.9765625, + "learning_rate": 0.00016068740497682114, + "loss": 2.0033, + "step": 248520 + }, + { + "epoch": 0.58, + "grad_norm": 2.0, + "learning_rate": 0.00016068593594805984, + "loss": 2.1572, + "step": 248525 + }, + { + "epoch": 0.58, + "grad_norm": 1.921875, + "learning_rate": 0.0001606844668985671, + "loss": 2.2693, + "step": 248530 + }, + { + "epoch": 0.58, + "grad_norm": 1.5234375, + "learning_rate": 0.00016068299782834352, + "loss": 2.0517, + "step": 248535 + }, + { + "epoch": 0.58, + "grad_norm": 2.125, + "learning_rate": 0.00016068152873738943, + "loss": 1.9254, + "step": 248540 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016068005962570548, + "loss": 2.2647, + "step": 248545 + }, + { + "epoch": 0.58, + "grad_norm": 2.046875, + "learning_rate": 0.00016067859049329213, + "loss": 2.0522, + "step": 248550 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016067712134014986, + "loss": 2.1424, + "step": 248555 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016067565216627917, + "loss": 2.0142, + "step": 248560 + }, + { + "epoch": 0.58, + "grad_norm": 2.4375, + "learning_rate": 0.00016067418297168059, + "loss": 2.1119, + "step": 248565 + }, + { + "epoch": 0.58, + "grad_norm": 2.1875, + "learning_rate": 0.0001606727137563546, + "loss": 1.9854, + "step": 248570 + }, + { + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 0.00016067124452030175, + "loss": 1.9834, + "step": 248575 + }, + { + "epoch": 0.58, + "grad_norm": 2.0625, + "learning_rate": 0.00016066977526352244, + "loss": 1.8993, + "step": 248580 + }, + { + "epoch": 0.58, + "grad_norm": 2.140625, + "learning_rate": 0.00016066830598601727, + "loss": 2.1552, + "step": 248585 + }, + { + "epoch": 0.59, + "grad_norm": 1.8046875, + "learning_rate": 0.00016066683668778672, + "loss": 2.0058, + "step": 248590 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.00016066536736883123, + "loss": 2.0888, + "step": 248595 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.0001606638980291514, + "loss": 2.2339, + "step": 248600 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.0001606624286687477, + "loss": 1.9342, + "step": 248605 + }, + { + "epoch": 0.59, + "grad_norm": 2.59375, + "learning_rate": 0.00016066095928762058, + "loss": 2.2163, + "step": 248610 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00016065948988577055, + "loss": 2.1218, + "step": 248615 + }, + { + "epoch": 0.59, + "grad_norm": 1.9921875, + "learning_rate": 0.00016065802046319816, + "loss": 2.0826, + "step": 248620 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.0001606565510199039, + "loss": 2.1696, + "step": 248625 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00016065508155588824, + "loss": 2.0538, + "step": 248630 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00016065361207115176, + "loss": 2.0802, + "step": 248635 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00016065214256569482, + "loss": 2.0705, + "step": 248640 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00016065067303951805, + "loss": 2.2548, + "step": 248645 + }, + { + "epoch": 0.59, + "grad_norm": 2.46875, + "learning_rate": 0.0001606492034926219, + "loss": 2.2293, + "step": 248650 + }, + { + "epoch": 0.59, + "grad_norm": 2.421875, + "learning_rate": 0.00016064773392500693, + "loss": 2.2218, + "step": 248655 + }, + { + "epoch": 0.59, + "grad_norm": 1.9375, + "learning_rate": 0.00016064626433667356, + "loss": 2.0443, + "step": 248660 + }, + { + "epoch": 0.59, + "grad_norm": 1.9140625, + "learning_rate": 0.00016064479472762232, + "loss": 2.0672, + "step": 248665 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00016064332509785372, + "loss": 2.092, + "step": 248670 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00016064185544736827, + "loss": 2.1311, + "step": 248675 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.00016064038577616647, + "loss": 1.9221, + "step": 248680 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.0001606389160842488, + "loss": 2.0726, + "step": 248685 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00016063744637161576, + "loss": 2.1993, + "step": 248690 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.0001606359766382679, + "loss": 1.9611, + "step": 248695 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00016063450688420566, + "loss": 2.1066, + "step": 248700 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.0001606330371094296, + "loss": 2.0923, + "step": 248705 + }, + { + "epoch": 0.59, + "grad_norm": 1.8125, + "learning_rate": 0.00016063156731394018, + "loss": 2.1441, + "step": 248710 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00016063009749773795, + "loss": 2.0643, + "step": 248715 + }, + { + "epoch": 0.59, + "grad_norm": 2.40625, + "learning_rate": 0.0001606286276608233, + "loss": 2.1149, + "step": 248720 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00016062715780319688, + "loss": 2.2516, + "step": 248725 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.0001606256879248591, + "loss": 2.021, + "step": 248730 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00016062421802581049, + "loss": 2.2328, + "step": 248735 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00016062274810605155, + "loss": 2.0717, + "step": 248740 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.00016062127816558278, + "loss": 2.146, + "step": 248745 + }, + { + "epoch": 0.59, + "grad_norm": 1.8125, + "learning_rate": 0.0001606198082044047, + "loss": 1.9262, + "step": 248750 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00016061833822251777, + "loss": 2.1994, + "step": 248755 + }, + { + "epoch": 0.59, + "grad_norm": 1.609375, + "learning_rate": 0.00016061686821992253, + "loss": 2.0111, + "step": 248760 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00016061539819661947, + "loss": 1.9845, + "step": 248765 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00016061392815260907, + "loss": 2.0238, + "step": 248770 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.00016061245808789186, + "loss": 1.9875, + "step": 248775 + }, + { + "epoch": 0.59, + "grad_norm": 1.9375, + "learning_rate": 0.00016061098800246833, + "loss": 2.1798, + "step": 248780 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00016060951789633897, + "loss": 2.0621, + "step": 248785 + }, + { + "epoch": 0.59, + "grad_norm": 1.953125, + "learning_rate": 0.00016060804776950435, + "loss": 2.2202, + "step": 248790 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.0001606065776219649, + "loss": 2.0967, + "step": 248795 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00016060510745372116, + "loss": 2.0976, + "step": 248800 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00016060363726477358, + "loss": 2.0548, + "step": 248805 + }, + { + "epoch": 0.59, + "grad_norm": 1.9375, + "learning_rate": 0.0001606021670551227, + "loss": 2.1374, + "step": 248810 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00016060069682476903, + "loss": 2.1594, + "step": 248815 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00016059922657371306, + "loss": 2.0964, + "step": 248820 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00016059775630195531, + "loss": 2.0157, + "step": 248825 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00016059628600949627, + "loss": 2.1175, + "step": 248830 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00016059481569633642, + "loss": 2.0638, + "step": 248835 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.0001605933453624763, + "loss": 2.0382, + "step": 248840 + }, + { + "epoch": 0.59, + "grad_norm": 2.375, + "learning_rate": 0.0001605918750079164, + "loss": 2.1214, + "step": 248845 + }, + { + "epoch": 0.59, + "grad_norm": 2.453125, + "learning_rate": 0.0001605904046326572, + "loss": 1.9501, + "step": 248850 + }, + { + "epoch": 0.59, + "grad_norm": 2.46875, + "learning_rate": 0.00016058893423669923, + "loss": 2.152, + "step": 248855 + }, + { + "epoch": 0.59, + "grad_norm": 2.609375, + "learning_rate": 0.00016058746382004296, + "loss": 2.2877, + "step": 248860 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00016058599338268894, + "loss": 2.1184, + "step": 248865 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.0001605845229246376, + "loss": 2.0839, + "step": 248870 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.00016058305244588957, + "loss": 2.0389, + "step": 248875 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00016058158194644522, + "loss": 2.0057, + "step": 248880 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00016058011142630507, + "loss": 2.0567, + "step": 248885 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.0001605786408854697, + "loss": 2.1571, + "step": 248890 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00016057717032393954, + "loss": 2.1189, + "step": 248895 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00016057569974171516, + "loss": 1.901, + "step": 248900 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.00016057422913879702, + "loss": 2.1568, + "step": 248905 + }, + { + "epoch": 0.59, + "grad_norm": 1.984375, + "learning_rate": 0.00016057275851518558, + "loss": 2.2074, + "step": 248910 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00016057128787088143, + "loss": 2.005, + "step": 248915 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.000160569817205885, + "loss": 2.0098, + "step": 248920 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.0001605683465201968, + "loss": 2.1705, + "step": 248925 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.00016056687581381742, + "loss": 2.054, + "step": 248930 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00016056540508674726, + "loss": 2.1552, + "step": 248935 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00016056393433898686, + "loss": 2.0593, + "step": 248940 + }, + { + "epoch": 0.59, + "grad_norm": 2.578125, + "learning_rate": 0.00016056246357053672, + "loss": 2.1739, + "step": 248945 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00016056099278139737, + "loss": 2.0976, + "step": 248950 + }, + { + "epoch": 0.59, + "grad_norm": 2.65625, + "learning_rate": 0.00016055952197156924, + "loss": 2.1171, + "step": 248955 + }, + { + "epoch": 0.59, + "grad_norm": 2.625, + "learning_rate": 0.00016055805114105294, + "loss": 2.0115, + "step": 248960 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00016055658028984885, + "loss": 2.0234, + "step": 248965 + }, + { + "epoch": 0.59, + "grad_norm": 2.375, + "learning_rate": 0.00016055510941795757, + "loss": 2.0711, + "step": 248970 + }, + { + "epoch": 0.59, + "grad_norm": 2.625, + "learning_rate": 0.00016055363852537954, + "loss": 2.0739, + "step": 248975 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00016055216761211533, + "loss": 2.1173, + "step": 248980 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00016055069667816538, + "loss": 2.1431, + "step": 248985 + }, + { + "epoch": 0.59, + "grad_norm": 2.421875, + "learning_rate": 0.00016054922572353018, + "loss": 2.2279, + "step": 248990 + }, + { + "epoch": 0.59, + "grad_norm": 1.9140625, + "learning_rate": 0.0001605477547482103, + "loss": 2.1159, + "step": 248995 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.0001605462837522062, + "loss": 2.0286, + "step": 249000 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.0001605448127355184, + "loss": 2.2871, + "step": 249005 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00016054334169814742, + "loss": 1.9917, + "step": 249010 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.0001605418706400937, + "loss": 2.0696, + "step": 249015 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00016054039956135777, + "loss": 2.2806, + "step": 249020 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00016053892846194017, + "loss": 2.2152, + "step": 249025 + }, + { + "epoch": 0.59, + "grad_norm": 1.921875, + "learning_rate": 0.00016053745734184136, + "loss": 2.0777, + "step": 249030 + }, + { + "epoch": 0.59, + "grad_norm": 1.984375, + "learning_rate": 0.00016053598620106188, + "loss": 2.1223, + "step": 249035 + }, + { + "epoch": 0.59, + "grad_norm": 2.421875, + "learning_rate": 0.00016053451503960219, + "loss": 1.9606, + "step": 249040 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.00016053304385746278, + "loss": 2.0868, + "step": 249045 + }, + { + "epoch": 0.59, + "grad_norm": 2.9375, + "learning_rate": 0.00016053157265464422, + "loss": 1.9548, + "step": 249050 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00016053010143114697, + "loss": 2.2131, + "step": 249055 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00016052863018697154, + "loss": 2.042, + "step": 249060 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.00016052715892211846, + "loss": 2.1218, + "step": 249065 + }, + { + "epoch": 0.59, + "grad_norm": 1.7421875, + "learning_rate": 0.00016052568763658817, + "loss": 2.032, + "step": 249070 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.0001605242163303812, + "loss": 2.1738, + "step": 249075 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.0001605227450034981, + "loss": 1.9398, + "step": 249080 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.0001605212736559393, + "loss": 2.077, + "step": 249085 + }, + { + "epoch": 0.59, + "grad_norm": 2.390625, + "learning_rate": 0.00016051980228770535, + "loss": 2.2556, + "step": 249090 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.0001605183308987967, + "loss": 1.9039, + "step": 249095 + }, + { + "epoch": 0.59, + "grad_norm": 3.0, + "learning_rate": 0.00016051685948921394, + "loss": 2.0746, + "step": 249100 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00016051538805895752, + "loss": 2.0183, + "step": 249105 + }, + { + "epoch": 0.59, + "grad_norm": 1.78125, + "learning_rate": 0.00016051391660802794, + "loss": 2.069, + "step": 249110 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00016051244513642568, + "loss": 2.044, + "step": 249115 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.0001605109736441513, + "loss": 1.9984, + "step": 249120 + }, + { + "epoch": 0.59, + "grad_norm": 1.9375, + "learning_rate": 0.00016050950213120525, + "loss": 2.0901, + "step": 249125 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00016050803059758806, + "loss": 2.1297, + "step": 249130 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.00016050655904330027, + "loss": 2.1109, + "step": 249135 + }, + { + "epoch": 0.59, + "grad_norm": 2.578125, + "learning_rate": 0.0001605050874683423, + "loss": 2.0318, + "step": 249140 + }, + { + "epoch": 0.59, + "grad_norm": 1.9765625, + "learning_rate": 0.00016050361587271472, + "loss": 2.0749, + "step": 249145 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00016050214425641798, + "loss": 2.2674, + "step": 249150 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00016050067261945262, + "loss": 1.9969, + "step": 249155 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00016049920096181914, + "loss": 2.1555, + "step": 249160 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.00016049772928351805, + "loss": 2.0551, + "step": 249165 + }, + { + "epoch": 0.59, + "grad_norm": 1.953125, + "learning_rate": 0.00016049625758454982, + "loss": 2.0911, + "step": 249170 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00016049478586491498, + "loss": 2.1649, + "step": 249175 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.000160493314124614, + "loss": 2.0401, + "step": 249180 + }, + { + "epoch": 0.59, + "grad_norm": 2.546875, + "learning_rate": 0.00016049184236364744, + "loss": 2.1519, + "step": 249185 + }, + { + "epoch": 0.59, + "grad_norm": 1.9453125, + "learning_rate": 0.00016049037058201576, + "loss": 2.0636, + "step": 249190 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00016048889877971946, + "loss": 2.2381, + "step": 249195 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00016048742695675907, + "loss": 2.1087, + "step": 249200 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00016048595511313507, + "loss": 1.9338, + "step": 249205 + }, + { + "epoch": 0.59, + "grad_norm": 1.96875, + "learning_rate": 0.00016048448324884794, + "loss": 2.0491, + "step": 249210 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00016048301136389827, + "loss": 1.9614, + "step": 249215 + }, + { + "epoch": 0.59, + "grad_norm": 1.9296875, + "learning_rate": 0.00016048153945828647, + "loss": 2.1174, + "step": 249220 + }, + { + "epoch": 0.59, + "grad_norm": 2.4375, + "learning_rate": 0.0001604800675320131, + "loss": 1.9734, + "step": 249225 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00016047859558507863, + "loss": 2.0769, + "step": 249230 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.00016047712361748356, + "loss": 2.149, + "step": 249235 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.00016047565162922845, + "loss": 2.1214, + "step": 249240 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00016047417962031374, + "loss": 2.2332, + "step": 249245 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00016047270759073995, + "loss": 2.1648, + "step": 249250 + }, + { + "epoch": 0.59, + "grad_norm": 1.8984375, + "learning_rate": 0.0001604712355405076, + "loss": 1.9586, + "step": 249255 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.00016046976346961714, + "loss": 2.0441, + "step": 249260 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00016046829137806917, + "loss": 2.2019, + "step": 249265 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00016046681926586408, + "loss": 2.1113, + "step": 249270 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00016046534713300243, + "loss": 2.1292, + "step": 249275 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00016046387497948477, + "loss": 2.1654, + "step": 249280 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00016046240280531154, + "loss": 2.0695, + "step": 249285 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00016046093061048322, + "loss": 2.0406, + "step": 249290 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00016045945839500037, + "loss": 2.197, + "step": 249295 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00016045798615886344, + "loss": 2.2299, + "step": 249300 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.000160456513902073, + "loss": 2.1044, + "step": 249305 + }, + { + "epoch": 0.59, + "grad_norm": 1.8125, + "learning_rate": 0.00016045504162462955, + "loss": 2.1436, + "step": 249310 + }, + { + "epoch": 0.59, + "grad_norm": 1.8203125, + "learning_rate": 0.00016045356932653353, + "loss": 2.0971, + "step": 249315 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00016045209700778546, + "loss": 2.1791, + "step": 249320 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00016045062466838586, + "loss": 2.0852, + "step": 249325 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.00016044915230833525, + "loss": 2.1677, + "step": 249330 + }, + { + "epoch": 0.59, + "grad_norm": 2.421875, + "learning_rate": 0.0001604476799276341, + "loss": 2.2167, + "step": 249335 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00016044620752628293, + "loss": 2.1572, + "step": 249340 + }, + { + "epoch": 0.59, + "grad_norm": 1.84375, + "learning_rate": 0.00016044473510428225, + "loss": 2.0579, + "step": 249345 + }, + { + "epoch": 0.59, + "grad_norm": 1.6015625, + "learning_rate": 0.0001604432626616325, + "loss": 2.0304, + "step": 249350 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.0001604417901983343, + "loss": 2.0036, + "step": 249355 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.0001604403177143881, + "loss": 2.1266, + "step": 249360 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00016043884520979432, + "loss": 2.1585, + "step": 249365 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00016043737268455356, + "loss": 1.9957, + "step": 249370 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00016043590013866633, + "loss": 2.1728, + "step": 249375 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.0001604344275721331, + "loss": 2.2423, + "step": 249380 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00016043295498495433, + "loss": 2.1849, + "step": 249385 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00016043148237713059, + "loss": 2.158, + "step": 249390 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00016043000974866236, + "loss": 2.2215, + "step": 249395 + }, + { + "epoch": 0.59, + "grad_norm": 2.515625, + "learning_rate": 0.00016042853709955014, + "loss": 2.15, + "step": 249400 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00016042706442979444, + "loss": 2.0304, + "step": 249405 + }, + { + "epoch": 0.59, + "grad_norm": 2.375, + "learning_rate": 0.00016042559173939575, + "loss": 1.9935, + "step": 249410 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.0001604241190283546, + "loss": 1.9113, + "step": 249415 + }, + { + "epoch": 0.59, + "grad_norm": 1.8203125, + "learning_rate": 0.00016042264629667144, + "loss": 2.0372, + "step": 249420 + }, + { + "epoch": 0.59, + "grad_norm": 1.7265625, + "learning_rate": 0.00016042117354434684, + "loss": 2.1402, + "step": 249425 + }, + { + "epoch": 0.59, + "grad_norm": 1.7734375, + "learning_rate": 0.00016041970077138127, + "loss": 2.0624, + "step": 249430 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00016041822797777523, + "loss": 2.0918, + "step": 249435 + }, + { + "epoch": 0.59, + "grad_norm": 1.9140625, + "learning_rate": 0.0001604167551635292, + "loss": 2.0405, + "step": 249440 + }, + { + "epoch": 0.59, + "grad_norm": 1.9140625, + "learning_rate": 0.00016041528232864377, + "loss": 2.1522, + "step": 249445 + }, + { + "epoch": 0.59, + "grad_norm": 1.953125, + "learning_rate": 0.00016041380947311932, + "loss": 2.2112, + "step": 249450 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.00016041233659695644, + "loss": 2.1128, + "step": 249455 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00016041086370015563, + "loss": 2.0143, + "step": 249460 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00016040939078271739, + "loss": 1.9098, + "step": 249465 + }, + { + "epoch": 0.59, + "grad_norm": 1.8515625, + "learning_rate": 0.00016040791784464217, + "loss": 2.0713, + "step": 249470 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.00016040644488593049, + "loss": 2.0719, + "step": 249475 + }, + { + "epoch": 0.59, + "grad_norm": 1.9453125, + "learning_rate": 0.00016040497190658291, + "loss": 2.0313, + "step": 249480 + }, + { + "epoch": 0.59, + "grad_norm": 2.609375, + "learning_rate": 0.0001604034989065999, + "loss": 2.2916, + "step": 249485 + }, + { + "epoch": 0.59, + "grad_norm": 1.84375, + "learning_rate": 0.00016040202588598192, + "loss": 2.2233, + "step": 249490 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.00016040055284472955, + "loss": 2.068, + "step": 249495 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00016039907978284324, + "loss": 2.1139, + "step": 249500 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.0001603976067003235, + "loss": 1.97, + "step": 249505 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00016039613359717086, + "loss": 2.1373, + "step": 249510 + }, + { + "epoch": 0.59, + "grad_norm": 1.9453125, + "learning_rate": 0.0001603946604733858, + "loss": 2.2411, + "step": 249515 + }, + { + "epoch": 0.59, + "grad_norm": 1.9296875, + "learning_rate": 0.00016039318732896885, + "loss": 2.1849, + "step": 249520 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00016039171416392045, + "loss": 2.134, + "step": 249525 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00016039024097824113, + "loss": 2.1018, + "step": 249530 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00016038876777193145, + "loss": 2.1119, + "step": 249535 + }, + { + "epoch": 0.59, + "grad_norm": 1.9140625, + "learning_rate": 0.0001603872945449919, + "loss": 1.9932, + "step": 249540 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.0001603858212974229, + "loss": 1.851, + "step": 249545 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00016038434802922503, + "loss": 2.1092, + "step": 249550 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00016038287474039878, + "loss": 2.1743, + "step": 249555 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00016038140143094463, + "loss": 2.1459, + "step": 249560 + }, + { + "epoch": 0.59, + "grad_norm": 2.828125, + "learning_rate": 0.0001603799281008631, + "loss": 2.0802, + "step": 249565 + }, + { + "epoch": 0.59, + "grad_norm": 1.8359375, + "learning_rate": 0.0001603784547501547, + "loss": 1.9944, + "step": 249570 + }, + { + "epoch": 0.59, + "grad_norm": 1.8984375, + "learning_rate": 0.00016037698137881992, + "loss": 2.1417, + "step": 249575 + }, + { + "epoch": 0.59, + "grad_norm": 2.4375, + "learning_rate": 0.00016037550798685926, + "loss": 2.0784, + "step": 249580 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00016037403457427323, + "loss": 1.9103, + "step": 249585 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00016037256114106234, + "loss": 2.1568, + "step": 249590 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00016037108768722708, + "loss": 2.1673, + "step": 249595 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.000160369614212768, + "loss": 1.9508, + "step": 249600 + }, + { + "epoch": 0.59, + "grad_norm": 1.84375, + "learning_rate": 0.00016036814071768553, + "loss": 2.1264, + "step": 249605 + }, + { + "epoch": 0.59, + "grad_norm": 1.9921875, + "learning_rate": 0.00016036666720198022, + "loss": 1.8894, + "step": 249610 + }, + { + "epoch": 0.59, + "grad_norm": 1.890625, + "learning_rate": 0.00016036519366565254, + "loss": 2.0404, + "step": 249615 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00016036372010870306, + "loss": 2.212, + "step": 249620 + }, + { + "epoch": 0.59, + "grad_norm": 2.421875, + "learning_rate": 0.0001603622465311322, + "loss": 2.0184, + "step": 249625 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.00016036077293294051, + "loss": 2.0803, + "step": 249630 + }, + { + "epoch": 0.59, + "grad_norm": 2.4375, + "learning_rate": 0.00016035929931412847, + "loss": 2.1017, + "step": 249635 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.00016035782567469663, + "loss": 2.0681, + "step": 249640 + }, + { + "epoch": 0.59, + "grad_norm": 1.640625, + "learning_rate": 0.00016035635201464547, + "loss": 2.1519, + "step": 249645 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00016035487833397543, + "loss": 1.9718, + "step": 249650 + }, + { + "epoch": 0.59, + "grad_norm": 3.03125, + "learning_rate": 0.00016035340463268713, + "loss": 2.1328, + "step": 249655 + }, + { + "epoch": 0.59, + "grad_norm": 1.90625, + "learning_rate": 0.000160351930910781, + "loss": 2.1878, + "step": 249660 + }, + { + "epoch": 0.59, + "grad_norm": 1.9453125, + "learning_rate": 0.0001603504571682575, + "loss": 2.0418, + "step": 249665 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00016034898340511726, + "loss": 2.1732, + "step": 249670 + }, + { + "epoch": 0.59, + "grad_norm": 1.875, + "learning_rate": 0.00016034750962136068, + "loss": 2.1108, + "step": 249675 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.0001603460358169883, + "loss": 1.9359, + "step": 249680 + }, + { + "epoch": 0.59, + "grad_norm": 3.0, + "learning_rate": 0.00016034456199200063, + "loss": 2.2812, + "step": 249685 + }, + { + "epoch": 0.59, + "grad_norm": 2.453125, + "learning_rate": 0.00016034308814639813, + "loss": 2.176, + "step": 249690 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00016034161428018137, + "loss": 2.0936, + "step": 249695 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.00016034014039335083, + "loss": 2.1356, + "step": 249700 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00016033866648590696, + "loss": 2.0725, + "step": 249705 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00016033719255785034, + "loss": 2.2689, + "step": 249710 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00016033571860918144, + "loss": 2.0581, + "step": 249715 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00016033424463990077, + "loss": 2.159, + "step": 249720 + }, + { + "epoch": 0.59, + "grad_norm": 1.890625, + "learning_rate": 0.0001603327706500088, + "loss": 2.0794, + "step": 249725 + }, + { + "epoch": 0.59, + "grad_norm": 2.609375, + "learning_rate": 0.00016033129663950608, + "loss": 2.0933, + "step": 249730 + }, + { + "epoch": 0.59, + "grad_norm": 3.109375, + "learning_rate": 0.0001603298226083931, + "loss": 2.0949, + "step": 249735 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00016032834855667034, + "loss": 2.2813, + "step": 249740 + }, + { + "epoch": 0.59, + "grad_norm": 2.6875, + "learning_rate": 0.00016032687448433832, + "loss": 2.0293, + "step": 249745 + }, + { + "epoch": 0.59, + "grad_norm": 2.453125, + "learning_rate": 0.00016032540039139756, + "loss": 1.9951, + "step": 249750 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.00016032392627784856, + "loss": 1.978, + "step": 249755 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.0001603224521436918, + "loss": 2.0945, + "step": 249760 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00016032097798892778, + "loss": 2.0237, + "step": 249765 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00016031950381355703, + "loss": 1.9892, + "step": 249770 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00016031802961758004, + "loss": 2.1538, + "step": 249775 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.00016031655540099733, + "loss": 2.2499, + "step": 249780 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.0001603150811638094, + "loss": 2.1395, + "step": 249785 + }, + { + "epoch": 0.59, + "grad_norm": 1.8125, + "learning_rate": 0.00016031360690601674, + "loss": 1.9864, + "step": 249790 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00016031213262761984, + "loss": 2.2, + "step": 249795 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.00016031065832861923, + "loss": 1.963, + "step": 249800 + }, + { + "epoch": 0.59, + "grad_norm": 2.390625, + "learning_rate": 0.0001603091840090154, + "loss": 2.0679, + "step": 249805 + }, + { + "epoch": 0.59, + "grad_norm": 1.84375, + "learning_rate": 0.00016030770966880885, + "loss": 2.0643, + "step": 249810 + }, + { + "epoch": 0.59, + "grad_norm": 1.921875, + "learning_rate": 0.00016030623530800014, + "loss": 1.9657, + "step": 249815 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00016030476092658967, + "loss": 1.9648, + "step": 249820 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00016030328652457803, + "loss": 1.9104, + "step": 249825 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00016030181210196568, + "loss": 2.1826, + "step": 249830 + }, + { + "epoch": 0.59, + "grad_norm": 1.8515625, + "learning_rate": 0.00016030033765875316, + "loss": 2.0452, + "step": 249835 + }, + { + "epoch": 0.59, + "grad_norm": 1.7578125, + "learning_rate": 0.00016029886319494092, + "loss": 2.0998, + "step": 249840 + }, + { + "epoch": 0.59, + "grad_norm": 1.734375, + "learning_rate": 0.0001602973887105295, + "loss": 1.9772, + "step": 249845 + }, + { + "epoch": 0.59, + "grad_norm": 2.546875, + "learning_rate": 0.00016029591420551942, + "loss": 2.1459, + "step": 249850 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00016029443967991114, + "loss": 2.0353, + "step": 249855 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.00016029296513370522, + "loss": 2.2153, + "step": 249860 + }, + { + "epoch": 0.59, + "grad_norm": 2.421875, + "learning_rate": 0.00016029149056690208, + "loss": 2.1019, + "step": 249865 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00016029001597950233, + "loss": 2.1027, + "step": 249870 + }, + { + "epoch": 0.59, + "grad_norm": 2.734375, + "learning_rate": 0.00016028854137150637, + "loss": 2.1016, + "step": 249875 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00016028706674291478, + "loss": 2.153, + "step": 249880 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00016028559209372802, + "loss": 2.2048, + "step": 249885 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.0001602841174239466, + "loss": 2.0387, + "step": 249890 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00016028264273357107, + "loss": 1.9376, + "step": 249895 + }, + { + "epoch": 0.59, + "grad_norm": 2.46875, + "learning_rate": 0.00016028116802260187, + "loss": 2.1332, + "step": 249900 + }, + { + "epoch": 0.59, + "grad_norm": 2.421875, + "learning_rate": 0.0001602796932910395, + "loss": 2.076, + "step": 249905 + }, + { + "epoch": 0.59, + "grad_norm": 2.40625, + "learning_rate": 0.00016027821853888452, + "loss": 1.8904, + "step": 249910 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.00016027674376613743, + "loss": 2.0167, + "step": 249915 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.0001602752689727987, + "loss": 2.0937, + "step": 249920 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00016027379415886882, + "loss": 2.0648, + "step": 249925 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.0001602723193243483, + "loss": 2.0933, + "step": 249930 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00016027084446923774, + "loss": 2.313, + "step": 249935 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.00016026936959353752, + "loss": 2.0699, + "step": 249940 + }, + { + "epoch": 0.59, + "grad_norm": 1.84375, + "learning_rate": 0.0001602678946972482, + "loss": 2.0615, + "step": 249945 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00016026641978037027, + "loss": 2.0708, + "step": 249950 + }, + { + "epoch": 0.59, + "grad_norm": 2.6875, + "learning_rate": 0.00016026494484290424, + "loss": 2.0403, + "step": 249955 + }, + { + "epoch": 0.59, + "grad_norm": 1.8671875, + "learning_rate": 0.0001602634698848506, + "loss": 1.981, + "step": 249960 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00016026199490620986, + "loss": 2.1843, + "step": 249965 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00016026051990698255, + "loss": 2.1733, + "step": 249970 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00016025904488716918, + "loss": 1.9295, + "step": 249975 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.0001602575698467702, + "loss": 2.1136, + "step": 249980 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.0001602560947857861, + "loss": 1.9829, + "step": 249985 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.00016025461970421744, + "loss": 1.9359, + "step": 249990 + }, + { + "epoch": 0.59, + "grad_norm": 1.9921875, + "learning_rate": 0.00016025314460206476, + "loss": 2.1187, + "step": 249995 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00016025166947932848, + "loss": 2.1338, + "step": 250000 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00016025019433600914, + "loss": 2.2105, + "step": 250005 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00016024871917210724, + "loss": 1.9185, + "step": 250010 + }, + { + "epoch": 0.59, + "grad_norm": 3.015625, + "learning_rate": 0.0001602472439876233, + "loss": 2.2134, + "step": 250015 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00016024576878255776, + "loss": 2.0198, + "step": 250020 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00016024429355691123, + "loss": 2.0509, + "step": 250025 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00016024281831068416, + "loss": 1.8983, + "step": 250030 + }, + { + "epoch": 0.59, + "grad_norm": 1.9375, + "learning_rate": 0.00016024134304387698, + "loss": 1.9043, + "step": 250035 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00016023986775649034, + "loss": 2.1442, + "step": 250040 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00016023839244852463, + "loss": 2.2606, + "step": 250045 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00016023691711998043, + "loss": 2.1158, + "step": 250050 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00016023544177085815, + "loss": 2.1233, + "step": 250055 + }, + { + "epoch": 0.59, + "grad_norm": 1.8671875, + "learning_rate": 0.00016023396640115837, + "loss": 2.0581, + "step": 250060 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.0001602324910108816, + "loss": 1.9962, + "step": 250065 + }, + { + "epoch": 0.59, + "grad_norm": 2.4375, + "learning_rate": 0.00016023101560002826, + "loss": 2.2773, + "step": 250070 + }, + { + "epoch": 0.59, + "grad_norm": 2.375, + "learning_rate": 0.00016022954016859896, + "loss": 2.1946, + "step": 250075 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00016022806471659417, + "loss": 1.9394, + "step": 250080 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.00016022658924401436, + "loss": 2.0726, + "step": 250085 + }, + { + "epoch": 0.59, + "grad_norm": 1.9296875, + "learning_rate": 0.00016022511375086005, + "loss": 1.8969, + "step": 250090 + }, + { + "epoch": 0.59, + "grad_norm": 4.15625, + "learning_rate": 0.0001602236382371318, + "loss": 2.2985, + "step": 250095 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.00016022216270282998, + "loss": 2.0598, + "step": 250100 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00016022068714795522, + "loss": 2.0493, + "step": 250105 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.00016021921157250799, + "loss": 2.1306, + "step": 250110 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.00016021773597648875, + "loss": 2.2886, + "step": 250115 + }, + { + "epoch": 0.59, + "grad_norm": 1.9296875, + "learning_rate": 0.00016021626035989806, + "loss": 2.1275, + "step": 250120 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.0001602147847227364, + "loss": 2.0832, + "step": 250125 + }, + { + "epoch": 0.59, + "grad_norm": 2.375, + "learning_rate": 0.00016021330906500428, + "loss": 2.1481, + "step": 250130 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.00016021183338670223, + "loss": 1.996, + "step": 250135 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.0001602103576878307, + "loss": 1.945, + "step": 250140 + }, + { + "epoch": 0.59, + "grad_norm": 1.953125, + "learning_rate": 0.0001602088819683902, + "loss": 1.9862, + "step": 250145 + }, + { + "epoch": 0.59, + "grad_norm": 2.390625, + "learning_rate": 0.00016020740622838128, + "loss": 2.021, + "step": 250150 + }, + { + "epoch": 0.59, + "grad_norm": 1.9453125, + "learning_rate": 0.0001602059304678044, + "loss": 2.0781, + "step": 250155 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00016020445468666012, + "loss": 2.1532, + "step": 250160 + }, + { + "epoch": 0.59, + "grad_norm": 1.90625, + "learning_rate": 0.0001602029788849489, + "loss": 2.3043, + "step": 250165 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.0001602015030626712, + "loss": 2.0232, + "step": 250170 + }, + { + "epoch": 0.59, + "grad_norm": 2.75, + "learning_rate": 0.0001602000272198276, + "loss": 2.1717, + "step": 250175 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.0001601985513564186, + "loss": 1.962, + "step": 250180 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00016019707547244467, + "loss": 2.0157, + "step": 250185 + }, + { + "epoch": 0.59, + "grad_norm": 1.96875, + "learning_rate": 0.0001601955995679063, + "loss": 2.0215, + "step": 250190 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00016019412364280406, + "loss": 2.0977, + "step": 250195 + }, + { + "epoch": 0.59, + "grad_norm": 1.8203125, + "learning_rate": 0.0001601926476971384, + "loss": 2.0626, + "step": 250200 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00016019117173090983, + "loss": 2.1539, + "step": 250205 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.00016018969574411887, + "loss": 2.1123, + "step": 250210 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00016018821973676602, + "loss": 2.2606, + "step": 250215 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.00016018674370885181, + "loss": 1.9981, + "step": 250220 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.0001601852676603767, + "loss": 2.3355, + "step": 250225 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00016018379159134122, + "loss": 2.1771, + "step": 250230 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00016018231550174583, + "loss": 2.1496, + "step": 250235 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.00016018083939159107, + "loss": 2.1358, + "step": 250240 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.0001601793632608775, + "loss": 2.2026, + "step": 250245 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.00016017788710960554, + "loss": 2.0996, + "step": 250250 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.0001601764109377757, + "loss": 1.9684, + "step": 250255 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00016017493474538853, + "loss": 2.092, + "step": 250260 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00016017345853244446, + "loss": 2.1885, + "step": 250265 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.0001601719822989441, + "loss": 2.0989, + "step": 250270 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00016017050604488788, + "loss": 2.0168, + "step": 250275 + }, + { + "epoch": 0.59, + "grad_norm": 2.515625, + "learning_rate": 0.00016016902977027635, + "loss": 2.0401, + "step": 250280 + }, + { + "epoch": 0.59, + "grad_norm": 1.875, + "learning_rate": 0.00016016755347510997, + "loss": 2.0849, + "step": 250285 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.00016016607715938925, + "loss": 2.1426, + "step": 250290 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.0001601646008231147, + "loss": 2.0303, + "step": 250295 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.00016016312446628686, + "loss": 2.2843, + "step": 250300 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.0001601616480889062, + "loss": 2.0593, + "step": 250305 + }, + { + "epoch": 0.59, + "grad_norm": 1.9453125, + "learning_rate": 0.00016016017169097322, + "loss": 1.8645, + "step": 250310 + }, + { + "epoch": 0.59, + "grad_norm": 1.8046875, + "learning_rate": 0.00016015869527248846, + "loss": 2.0658, + "step": 250315 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.00016015721883345235, + "loss": 1.8915, + "step": 250320 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.0001601557423738655, + "loss": 2.1027, + "step": 250325 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00016015426589372834, + "loss": 2.1438, + "step": 250330 + }, + { + "epoch": 0.59, + "grad_norm": 1.9453125, + "learning_rate": 0.00016015278939304135, + "loss": 2.1778, + "step": 250335 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.0001601513128718051, + "loss": 2.1842, + "step": 250340 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.0001601498363300201, + "loss": 2.0766, + "step": 250345 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.0001601483597676868, + "loss": 1.9505, + "step": 250350 + }, + { + "epoch": 0.59, + "grad_norm": 1.96875, + "learning_rate": 0.00016014688318480573, + "loss": 1.8011, + "step": 250355 + }, + { + "epoch": 0.59, + "grad_norm": 2.390625, + "learning_rate": 0.0001601454065813774, + "loss": 2.0081, + "step": 250360 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00016014392995740225, + "loss": 2.0576, + "step": 250365 + }, + { + "epoch": 0.59, + "grad_norm": 1.921875, + "learning_rate": 0.00016014245331288094, + "loss": 2.2054, + "step": 250370 + }, + { + "epoch": 0.59, + "grad_norm": 1.9765625, + "learning_rate": 0.00016014097664781383, + "loss": 2.2231, + "step": 250375 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00016013949996220147, + "loss": 2.2874, + "step": 250380 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.00016013802325604438, + "loss": 2.13, + "step": 250385 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.00016013654652934304, + "loss": 2.0168, + "step": 250390 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.00016013506978209796, + "loss": 2.0548, + "step": 250395 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.00016013359301430966, + "loss": 2.1646, + "step": 250400 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.0001601321162259786, + "loss": 2.1881, + "step": 250405 + }, + { + "epoch": 0.59, + "grad_norm": 2.53125, + "learning_rate": 0.00016013063941710536, + "loss": 2.2139, + "step": 250410 + }, + { + "epoch": 0.59, + "grad_norm": 2.4375, + "learning_rate": 0.0001601291625876904, + "loss": 2.0586, + "step": 250415 + }, + { + "epoch": 0.59, + "grad_norm": 1.765625, + "learning_rate": 0.0001601276857377342, + "loss": 1.9314, + "step": 250420 + }, + { + "epoch": 0.59, + "grad_norm": 2.90625, + "learning_rate": 0.00016012620886723733, + "loss": 2.1044, + "step": 250425 + }, + { + "epoch": 0.59, + "grad_norm": 1.984375, + "learning_rate": 0.00016012473197620026, + "loss": 2.0021, + "step": 250430 + }, + { + "epoch": 0.59, + "grad_norm": 1.953125, + "learning_rate": 0.00016012325506462345, + "loss": 2.2389, + "step": 250435 + }, + { + "epoch": 0.59, + "grad_norm": 1.7578125, + "learning_rate": 0.00016012177813250745, + "loss": 2.03, + "step": 250440 + }, + { + "epoch": 0.59, + "grad_norm": 1.9375, + "learning_rate": 0.0001601203011798528, + "loss": 2.064, + "step": 250445 + }, + { + "epoch": 0.59, + "grad_norm": 1.921875, + "learning_rate": 0.00016011882420665993, + "loss": 2.1517, + "step": 250450 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.0001601173472129294, + "loss": 2.0504, + "step": 250455 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00016011587019866167, + "loss": 2.1684, + "step": 250460 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00016011439316385728, + "loss": 2.1439, + "step": 250465 + }, + { + "epoch": 0.59, + "grad_norm": 1.8359375, + "learning_rate": 0.00016011291610851672, + "loss": 2.072, + "step": 250470 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.0001601114390326405, + "loss": 2.3016, + "step": 250475 + }, + { + "epoch": 0.59, + "grad_norm": 2.578125, + "learning_rate": 0.00016010996193622913, + "loss": 2.1856, + "step": 250480 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00016010848481928311, + "loss": 2.2848, + "step": 250485 + }, + { + "epoch": 0.59, + "grad_norm": 2.4375, + "learning_rate": 0.00016010700768180294, + "loss": 2.089, + "step": 250490 + }, + { + "epoch": 0.59, + "grad_norm": 1.8515625, + "learning_rate": 0.0001601055305237891, + "loss": 2.0027, + "step": 250495 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00016010405334524215, + "loss": 2.1037, + "step": 250500 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00016010257614616256, + "loss": 1.9858, + "step": 250505 + }, + { + "epoch": 0.59, + "grad_norm": 1.9296875, + "learning_rate": 0.00016010109892655082, + "loss": 2.0746, + "step": 250510 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00016009962168640749, + "loss": 1.9858, + "step": 250515 + }, + { + "epoch": 0.59, + "grad_norm": 2.40625, + "learning_rate": 0.00016009814442573302, + "loss": 2.1086, + "step": 250520 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00016009666714452792, + "loss": 2.116, + "step": 250525 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00016009518984279274, + "loss": 2.1338, + "step": 250530 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00016009371252052792, + "loss": 2.0435, + "step": 250535 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00016009223517773403, + "loss": 1.9589, + "step": 250540 + }, + { + "epoch": 0.59, + "grad_norm": 2.4375, + "learning_rate": 0.00016009075781441153, + "loss": 2.1084, + "step": 250545 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.0001600892804305609, + "loss": 2.0904, + "step": 250550 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.00016008780302618271, + "loss": 1.969, + "step": 250555 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00016008632560127745, + "loss": 2.0995, + "step": 250560 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.0001600848481558456, + "loss": 2.1454, + "step": 250565 + }, + { + "epoch": 0.59, + "grad_norm": 2.59375, + "learning_rate": 0.0001600833706898877, + "loss": 2.1804, + "step": 250570 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.00016008189320340419, + "loss": 2.118, + "step": 250575 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00016008041569639563, + "loss": 1.9901, + "step": 250580 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00016007893816886252, + "loss": 1.9929, + "step": 250585 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00016007746062080537, + "loss": 2.0063, + "step": 250590 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00016007598305222464, + "loss": 2.1477, + "step": 250595 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00016007450546312086, + "loss": 2.0159, + "step": 250600 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00016007302785349457, + "loss": 2.0822, + "step": 250605 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.0001600715502233462, + "loss": 2.017, + "step": 250610 + }, + { + "epoch": 0.59, + "grad_norm": 1.9921875, + "learning_rate": 0.00016007007257267633, + "loss": 2.0676, + "step": 250615 + }, + { + "epoch": 0.59, + "grad_norm": 1.9296875, + "learning_rate": 0.00016006859490148547, + "loss": 1.9321, + "step": 250620 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00016006711720977404, + "loss": 2.0931, + "step": 250625 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.0001600656394975426, + "loss": 2.264, + "step": 250630 + }, + { + "epoch": 0.59, + "grad_norm": 2.46875, + "learning_rate": 0.00016006416176479163, + "loss": 2.328, + "step": 250635 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00016006268401152173, + "loss": 2.0508, + "step": 250640 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00016006120623773324, + "loss": 1.9165, + "step": 250645 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00016005972844342677, + "loss": 2.0327, + "step": 250650 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00016005825062860284, + "loss": 2.3587, + "step": 250655 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.0001600567727932619, + "loss": 2.1044, + "step": 250660 + }, + { + "epoch": 0.59, + "grad_norm": 1.953125, + "learning_rate": 0.00016005529493740448, + "loss": 2.1216, + "step": 250665 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.00016005381706103112, + "loss": 1.8123, + "step": 250670 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00016005233916414224, + "loss": 2.0185, + "step": 250675 + }, + { + "epoch": 0.59, + "grad_norm": 2.515625, + "learning_rate": 0.00016005086124673838, + "loss": 2.1875, + "step": 250680 + }, + { + "epoch": 0.59, + "grad_norm": 1.8671875, + "learning_rate": 0.0001600493833088201, + "loss": 1.9561, + "step": 250685 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00016004790535038783, + "loss": 2.0948, + "step": 250690 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.00016004642737144211, + "loss": 1.8907, + "step": 250695 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.00016004494937198345, + "loss": 1.9109, + "step": 250700 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00016004347135201237, + "loss": 2.1522, + "step": 250705 + }, + { + "epoch": 0.59, + "grad_norm": 2.390625, + "learning_rate": 0.0001600419933115293, + "loss": 2.0585, + "step": 250710 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00016004051525053483, + "loss": 2.0651, + "step": 250715 + }, + { + "epoch": 0.59, + "grad_norm": 1.7890625, + "learning_rate": 0.00016003903716902944, + "loss": 2.218, + "step": 250720 + }, + { + "epoch": 0.59, + "grad_norm": 1.7265625, + "learning_rate": 0.0001600375590670136, + "loss": 1.7863, + "step": 250725 + }, + { + "epoch": 0.59, + "grad_norm": 2.6875, + "learning_rate": 0.00016003608094448786, + "loss": 2.1892, + "step": 250730 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.0001600346028014527, + "loss": 2.0713, + "step": 250735 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00016003312463790863, + "loss": 2.076, + "step": 250740 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00016003164645385614, + "loss": 2.1764, + "step": 250745 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.0001600301682492958, + "loss": 2.0751, + "step": 250750 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.000160028690024228, + "loss": 2.2925, + "step": 250755 + }, + { + "epoch": 0.59, + "grad_norm": 1.921875, + "learning_rate": 0.00016002721177865335, + "loss": 2.0972, + "step": 250760 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.0001600257335125723, + "loss": 2.2228, + "step": 250765 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00016002425522598538, + "loss": 2.1749, + "step": 250770 + }, + { + "epoch": 0.59, + "grad_norm": 2.40625, + "learning_rate": 0.00016002277691889308, + "loss": 2.0432, + "step": 250775 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.00016002129859129594, + "loss": 1.9521, + "step": 250780 + }, + { + "epoch": 0.59, + "grad_norm": 2.421875, + "learning_rate": 0.0001600198202431944, + "loss": 1.9846, + "step": 250785 + }, + { + "epoch": 0.59, + "grad_norm": 1.890625, + "learning_rate": 0.000160018341874589, + "loss": 2.3014, + "step": 250790 + }, + { + "epoch": 0.59, + "grad_norm": 2.59375, + "learning_rate": 0.00016001686348548027, + "loss": 1.9832, + "step": 250795 + }, + { + "epoch": 0.59, + "grad_norm": 1.9765625, + "learning_rate": 0.00016001538507586869, + "loss": 2.204, + "step": 250800 + }, + { + "epoch": 0.59, + "grad_norm": 1.8984375, + "learning_rate": 0.00016001390664575476, + "loss": 1.9668, + "step": 250805 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00016001242819513895, + "loss": 2.039, + "step": 250810 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00016001094972402185, + "loss": 2.0433, + "step": 250815 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00016000947123240392, + "loss": 2.149, + "step": 250820 + }, + { + "epoch": 0.59, + "grad_norm": 3.40625, + "learning_rate": 0.00016000799272028566, + "loss": 2.0236, + "step": 250825 + }, + { + "epoch": 0.59, + "grad_norm": 2.46875, + "learning_rate": 0.00016000651418766757, + "loss": 2.0125, + "step": 250830 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.0001600050356345502, + "loss": 2.0323, + "step": 250835 + }, + { + "epoch": 0.59, + "grad_norm": 1.9765625, + "learning_rate": 0.00016000355706093398, + "loss": 2.0678, + "step": 250840 + }, + { + "epoch": 0.59, + "grad_norm": 1.84375, + "learning_rate": 0.00016000207846681946, + "loss": 2.1788, + "step": 250845 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00016000059985220717, + "loss": 2.0195, + "step": 250850 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.00015999912121709758, + "loss": 2.0635, + "step": 250855 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.0001599976425614912, + "loss": 2.0795, + "step": 250860 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00015999616388538853, + "loss": 2.1605, + "step": 250865 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.0001599946851887901, + "loss": 1.8528, + "step": 250870 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.0001599932064716964, + "loss": 1.9879, + "step": 250875 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.0001599917277341079, + "loss": 2.1989, + "step": 250880 + }, + { + "epoch": 0.59, + "grad_norm": 2.46875, + "learning_rate": 0.00015999024897602512, + "loss": 2.1593, + "step": 250885 + }, + { + "epoch": 0.59, + "grad_norm": 2.578125, + "learning_rate": 0.00015998877019744864, + "loss": 2.1961, + "step": 250890 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015998729139837887, + "loss": 2.2013, + "step": 250895 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00015998581257881637, + "loss": 2.0338, + "step": 250900 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00015998433373876165, + "loss": 2.0584, + "step": 250905 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00015998285487821515, + "loss": 2.227, + "step": 250910 + }, + { + "epoch": 0.59, + "grad_norm": 1.9375, + "learning_rate": 0.00015998137599717747, + "loss": 2.1877, + "step": 250915 + }, + { + "epoch": 0.59, + "grad_norm": 1.9140625, + "learning_rate": 0.00015997989709564902, + "loss": 2.0329, + "step": 250920 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00015997841817363035, + "loss": 2.1278, + "step": 250925 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00015997693923112198, + "loss": 2.1937, + "step": 250930 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.0001599754602681244, + "loss": 1.9799, + "step": 250935 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.00015997398128463812, + "loss": 2.058, + "step": 250940 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00015997250228066363, + "loss": 2.1719, + "step": 250945 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.00015997102325620143, + "loss": 2.0926, + "step": 250950 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00015996954421125206, + "loss": 2.1463, + "step": 250955 + }, + { + "epoch": 0.59, + "grad_norm": 1.703125, + "learning_rate": 0.000159968065145816, + "loss": 1.9402, + "step": 250960 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00015996658605989378, + "loss": 2.0698, + "step": 250965 + }, + { + "epoch": 0.59, + "grad_norm": 1.9375, + "learning_rate": 0.0001599651069534859, + "loss": 2.2511, + "step": 250970 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.0001599636278265928, + "loss": 2.2597, + "step": 250975 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00015996214867921506, + "loss": 2.0918, + "step": 250980 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00015996066951135318, + "loss": 2.2762, + "step": 250985 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00015995919032300764, + "loss": 1.9907, + "step": 250990 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00015995771111417892, + "loss": 2.0677, + "step": 250995 + }, + { + "epoch": 0.59, + "grad_norm": 2.453125, + "learning_rate": 0.00015995623188486762, + "loss": 2.2815, + "step": 251000 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.0001599547526350741, + "loss": 2.1566, + "step": 251005 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.000159953273364799, + "loss": 2.1967, + "step": 251010 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.0001599517940740428, + "loss": 2.1588, + "step": 251015 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015995031476280595, + "loss": 2.0009, + "step": 251020 + }, + { + "epoch": 0.59, + "grad_norm": 1.9296875, + "learning_rate": 0.00015994883543108898, + "loss": 1.9773, + "step": 251025 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00015994735607889238, + "loss": 2.1914, + "step": 251030 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.00015994587670621673, + "loss": 2.1263, + "step": 251035 + }, + { + "epoch": 0.59, + "grad_norm": 2.546875, + "learning_rate": 0.00015994439731306247, + "loss": 2.092, + "step": 251040 + }, + { + "epoch": 0.59, + "grad_norm": 2.515625, + "learning_rate": 0.0001599429178994301, + "loss": 2.205, + "step": 251045 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015994143846532013, + "loss": 2.172, + "step": 251050 + }, + { + "epoch": 0.59, + "grad_norm": 2.390625, + "learning_rate": 0.00015993995901073308, + "loss": 2.1918, + "step": 251055 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00015993847953566946, + "loss": 2.0036, + "step": 251060 + }, + { + "epoch": 0.59, + "grad_norm": 2.390625, + "learning_rate": 0.0001599370000401298, + "loss": 1.9257, + "step": 251065 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.00015993552052411453, + "loss": 2.2296, + "step": 251070 + }, + { + "epoch": 0.59, + "grad_norm": 2.75, + "learning_rate": 0.00015993404098762421, + "loss": 2.0322, + "step": 251075 + }, + { + "epoch": 0.59, + "grad_norm": 1.8359375, + "learning_rate": 0.00015993256143065937, + "loss": 1.9281, + "step": 251080 + }, + { + "epoch": 0.59, + "grad_norm": 2.390625, + "learning_rate": 0.00015993108185322042, + "loss": 2.1584, + "step": 251085 + }, + { + "epoch": 0.59, + "grad_norm": 1.9140625, + "learning_rate": 0.00015992960225530797, + "loss": 2.2082, + "step": 251090 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015992812263692247, + "loss": 2.1195, + "step": 251095 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00015992664299806445, + "loss": 2.2512, + "step": 251100 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00015992516333873438, + "loss": 2.0015, + "step": 251105 + }, + { + "epoch": 0.59, + "grad_norm": 1.953125, + "learning_rate": 0.00015992368365893276, + "loss": 2.1152, + "step": 251110 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.0001599222039586602, + "loss": 2.1487, + "step": 251115 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00015992072423791703, + "loss": 2.317, + "step": 251120 + }, + { + "epoch": 0.59, + "grad_norm": 1.84375, + "learning_rate": 0.00015991924449670393, + "loss": 1.9772, + "step": 251125 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.0001599177647350213, + "loss": 2.2225, + "step": 251130 + }, + { + "epoch": 0.59, + "grad_norm": 1.59375, + "learning_rate": 0.00015991628495286967, + "loss": 1.9591, + "step": 251135 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015991480515024957, + "loss": 1.9652, + "step": 251140 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.0001599133253271615, + "loss": 2.0559, + "step": 251145 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00015991184548360592, + "loss": 2.0872, + "step": 251150 + }, + { + "epoch": 0.59, + "grad_norm": 2.71875, + "learning_rate": 0.00015991036561958336, + "loss": 2.1731, + "step": 251155 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.00015990888573509437, + "loss": 2.1721, + "step": 251160 + }, + { + "epoch": 0.59, + "grad_norm": 1.9140625, + "learning_rate": 0.0001599074058301394, + "loss": 1.9429, + "step": 251165 + }, + { + "epoch": 0.59, + "grad_norm": 1.9140625, + "learning_rate": 0.00015990592590471895, + "loss": 2.1037, + "step": 251170 + }, + { + "epoch": 0.59, + "grad_norm": 2.46875, + "learning_rate": 0.0001599044459588336, + "loss": 2.0053, + "step": 251175 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015990296599248377, + "loss": 2.1393, + "step": 251180 + }, + { + "epoch": 0.59, + "grad_norm": 1.84375, + "learning_rate": 0.00015990148600567, + "loss": 2.0736, + "step": 251185 + }, + { + "epoch": 0.59, + "grad_norm": 2.5625, + "learning_rate": 0.0001599000059983928, + "loss": 2.1589, + "step": 251190 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.0001598985259706527, + "loss": 2.0988, + "step": 251195 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.0001598970459224501, + "loss": 1.9904, + "step": 251200 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015989556585378563, + "loss": 2.0738, + "step": 251205 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.00015989408576465978, + "loss": 2.1321, + "step": 251210 + }, + { + "epoch": 0.59, + "grad_norm": 1.921875, + "learning_rate": 0.000159892605655073, + "loss": 2.126, + "step": 251215 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015989112552502582, + "loss": 2.1969, + "step": 251220 + }, + { + "epoch": 0.59, + "grad_norm": 2.640625, + "learning_rate": 0.00015988964537451872, + "loss": 1.917, + "step": 251225 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.00015988816520355227, + "loss": 2.0158, + "step": 251230 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00015988668501212692, + "loss": 1.9792, + "step": 251235 + }, + { + "epoch": 0.59, + "grad_norm": 2.515625, + "learning_rate": 0.0001598852048002432, + "loss": 2.0751, + "step": 251240 + }, + { + "epoch": 0.59, + "grad_norm": 1.6640625, + "learning_rate": 0.0001598837245679016, + "loss": 1.9554, + "step": 251245 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00015988224431510263, + "loss": 2.0419, + "step": 251250 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.0001598807640418468, + "loss": 2.034, + "step": 251255 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.0001598792837481346, + "loss": 2.0715, + "step": 251260 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015987780343396657, + "loss": 1.9942, + "step": 251265 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.0001598763230993432, + "loss": 2.1517, + "step": 251270 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00015987484274426498, + "loss": 2.0768, + "step": 251275 + }, + { + "epoch": 0.59, + "grad_norm": 2.65625, + "learning_rate": 0.00015987336236873242, + "loss": 1.8083, + "step": 251280 + }, + { + "epoch": 0.59, + "grad_norm": 1.78125, + "learning_rate": 0.00015987188197274604, + "loss": 2.1357, + "step": 251285 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00015987040155630638, + "loss": 2.0794, + "step": 251290 + }, + { + "epoch": 0.59, + "grad_norm": 1.9296875, + "learning_rate": 0.00015986892111941387, + "loss": 1.9357, + "step": 251295 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00015986744066206904, + "loss": 2.0238, + "step": 251300 + }, + { + "epoch": 0.59, + "grad_norm": 2.53125, + "learning_rate": 0.00015986596018427243, + "loss": 2.0771, + "step": 251305 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.0001598644796860245, + "loss": 1.9867, + "step": 251310 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00015986299916732582, + "loss": 2.2, + "step": 251315 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.0001598615186281768, + "loss": 2.0483, + "step": 251320 + }, + { + "epoch": 0.59, + "grad_norm": 1.921875, + "learning_rate": 0.000159860038068578, + "loss": 2.2034, + "step": 251325 + }, + { + "epoch": 0.59, + "grad_norm": 1.78125, + "learning_rate": 0.00015985855748852995, + "loss": 2.071, + "step": 251330 + }, + { + "epoch": 0.59, + "grad_norm": 1.78125, + "learning_rate": 0.00015985707688803314, + "loss": 2.0951, + "step": 251335 + }, + { + "epoch": 0.59, + "grad_norm": 1.90625, + "learning_rate": 0.00015985559626708807, + "loss": 2.1206, + "step": 251340 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.00015985411562569523, + "loss": 2.1137, + "step": 251345 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.0001598526349638551, + "loss": 2.1325, + "step": 251350 + }, + { + "epoch": 0.59, + "grad_norm": 1.8125, + "learning_rate": 0.00015985115428156829, + "loss": 2.0657, + "step": 251355 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.0001598496735788352, + "loss": 1.9893, + "step": 251360 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.0001598481928556564, + "loss": 2.1188, + "step": 251365 + }, + { + "epoch": 0.59, + "grad_norm": 1.9296875, + "learning_rate": 0.00015984671211203237, + "loss": 2.0944, + "step": 251370 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.0001598452313479636, + "loss": 1.9673, + "step": 251375 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00015984375056345062, + "loss": 2.2881, + "step": 251380 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.00015984226975849394, + "loss": 1.9345, + "step": 251385 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.00015984078893309405, + "loss": 2.1798, + "step": 251390 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00015983930808725146, + "loss": 2.0492, + "step": 251395 + }, + { + "epoch": 0.59, + "grad_norm": 1.890625, + "learning_rate": 0.00015983782722096666, + "loss": 2.0017, + "step": 251400 + }, + { + "epoch": 0.59, + "grad_norm": 1.8515625, + "learning_rate": 0.00015983634633424018, + "loss": 1.792, + "step": 251405 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00015983486542707254, + "loss": 2.2011, + "step": 251410 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.0001598333844994642, + "loss": 2.1411, + "step": 251415 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015983190355141572, + "loss": 2.1668, + "step": 251420 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00015983042258292755, + "loss": 2.0741, + "step": 251425 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00015982894159400024, + "loss": 2.1152, + "step": 251430 + }, + { + "epoch": 0.59, + "grad_norm": 2.625, + "learning_rate": 0.00015982746058463426, + "loss": 2.1734, + "step": 251435 + }, + { + "epoch": 0.59, + "grad_norm": 1.875, + "learning_rate": 0.00015982597955483018, + "loss": 2.1049, + "step": 251440 + }, + { + "epoch": 0.59, + "grad_norm": 1.953125, + "learning_rate": 0.0001598244985045884, + "loss": 1.93, + "step": 251445 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.0001598230174339095, + "loss": 2.2901, + "step": 251450 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.000159821536342794, + "loss": 1.9958, + "step": 251455 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00015982005523124235, + "loss": 2.0649, + "step": 251460 + }, + { + "epoch": 0.59, + "grad_norm": 1.53125, + "learning_rate": 0.00015981857409925508, + "loss": 2.2142, + "step": 251465 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015981709294683274, + "loss": 1.9441, + "step": 251470 + }, + { + "epoch": 0.59, + "grad_norm": 2.5625, + "learning_rate": 0.00015981561177397577, + "loss": 2.0636, + "step": 251475 + }, + { + "epoch": 0.59, + "grad_norm": 2.71875, + "learning_rate": 0.0001598141305806847, + "loss": 1.8519, + "step": 251480 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00015981264936696003, + "loss": 1.9629, + "step": 251485 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.0001598111681328023, + "loss": 2.1785, + "step": 251490 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00015980968687821196, + "loss": 1.8992, + "step": 251495 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00015980820560318958, + "loss": 2.118, + "step": 251500 + }, + { + "epoch": 0.59, + "grad_norm": 1.96875, + "learning_rate": 0.0001598067243077356, + "loss": 2.0815, + "step": 251505 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.00015980524299185058, + "loss": 2.0673, + "step": 251510 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.000159803761655535, + "loss": 1.9509, + "step": 251515 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00015980228029878937, + "loss": 2.0835, + "step": 251520 + }, + { + "epoch": 0.59, + "grad_norm": 3.796875, + "learning_rate": 0.0001598007989216142, + "loss": 2.2168, + "step": 251525 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00015979931752401, + "loss": 1.9919, + "step": 251530 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00015979783610597725, + "loss": 2.0032, + "step": 251535 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.0001597963546675165, + "loss": 2.0387, + "step": 251540 + }, + { + "epoch": 0.59, + "grad_norm": 2.625, + "learning_rate": 0.00015979487320862818, + "loss": 2.251, + "step": 251545 + }, + { + "epoch": 0.59, + "grad_norm": 1.9375, + "learning_rate": 0.0001597933917293129, + "loss": 2.2321, + "step": 251550 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015979191022957108, + "loss": 2.1001, + "step": 251555 + }, + { + "epoch": 0.59, + "grad_norm": 1.9921875, + "learning_rate": 0.00015979042870940328, + "loss": 2.2615, + "step": 251560 + }, + { + "epoch": 0.59, + "grad_norm": 2.65625, + "learning_rate": 0.00015978894716880997, + "loss": 2.2584, + "step": 251565 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.00015978746560779168, + "loss": 2.1185, + "step": 251570 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.0001597859840263489, + "loss": 1.9933, + "step": 251575 + }, + { + "epoch": 0.59, + "grad_norm": 2.46875, + "learning_rate": 0.00015978450242448215, + "loss": 2.0986, + "step": 251580 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00015978302080219193, + "loss": 2.0861, + "step": 251585 + }, + { + "epoch": 0.59, + "grad_norm": 1.859375, + "learning_rate": 0.00015978153915947873, + "loss": 2.0833, + "step": 251590 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.0001597800574963431, + "loss": 2.071, + "step": 251595 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00015977857581278551, + "loss": 2.0, + "step": 251600 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00015977709410880647, + "loss": 2.0191, + "step": 251605 + }, + { + "epoch": 0.59, + "grad_norm": 1.8203125, + "learning_rate": 0.00015977561238440647, + "loss": 2.2386, + "step": 251610 + }, + { + "epoch": 0.59, + "grad_norm": 2.5625, + "learning_rate": 0.0001597741306395861, + "loss": 2.3438, + "step": 251615 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015977264887434572, + "loss": 2.0363, + "step": 251620 + }, + { + "epoch": 0.59, + "grad_norm": 1.5234375, + "learning_rate": 0.00015977116708868597, + "loss": 1.9905, + "step": 251625 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.0001597696852826073, + "loss": 2.1649, + "step": 251630 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.0001597682034561102, + "loss": 2.1689, + "step": 251635 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00015976672160919524, + "loss": 2.0708, + "step": 251640 + }, + { + "epoch": 0.59, + "grad_norm": 2.5625, + "learning_rate": 0.0001597652397418628, + "loss": 2.1665, + "step": 251645 + }, + { + "epoch": 0.59, + "grad_norm": 1.96875, + "learning_rate": 0.00015976375785411355, + "loss": 2.096, + "step": 251650 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00015976227594594787, + "loss": 2.1197, + "step": 251655 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00015976079401736637, + "loss": 2.1138, + "step": 251660 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00015975931206836945, + "loss": 2.1659, + "step": 251665 + }, + { + "epoch": 0.59, + "grad_norm": 1.578125, + "learning_rate": 0.00015975783009895768, + "loss": 1.9413, + "step": 251670 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00015975634810913155, + "loss": 2.0923, + "step": 251675 + }, + { + "epoch": 0.59, + "grad_norm": 2.40625, + "learning_rate": 0.0001597548660988916, + "loss": 2.0469, + "step": 251680 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00015975338406823826, + "loss": 2.1641, + "step": 251685 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.0001597519020171721, + "loss": 1.9993, + "step": 251690 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00015975041994569357, + "loss": 2.1409, + "step": 251695 + }, + { + "epoch": 0.59, + "grad_norm": 2.375, + "learning_rate": 0.00015974893785380325, + "loss": 2.277, + "step": 251700 + }, + { + "epoch": 0.59, + "grad_norm": 2.609375, + "learning_rate": 0.0001597474557415016, + "loss": 2.3297, + "step": 251705 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.00015974597360878914, + "loss": 2.0414, + "step": 251710 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00015974449145566636, + "loss": 1.9488, + "step": 251715 + }, + { + "epoch": 0.59, + "grad_norm": 1.9921875, + "learning_rate": 0.00015974300928213382, + "loss": 1.9796, + "step": 251720 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.00015974152708819194, + "loss": 2.0274, + "step": 251725 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00015974004487384125, + "loss": 2.0158, + "step": 251730 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00015973856263908233, + "loss": 2.1004, + "step": 251735 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00015973708038391563, + "loss": 1.9583, + "step": 251740 + }, + { + "epoch": 0.59, + "grad_norm": 2.859375, + "learning_rate": 0.00015973559810834165, + "loss": 1.9657, + "step": 251745 + }, + { + "epoch": 0.59, + "grad_norm": 1.8828125, + "learning_rate": 0.0001597341158123609, + "loss": 1.9195, + "step": 251750 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015973263349597388, + "loss": 2.1277, + "step": 251755 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00015973115115918113, + "loss": 2.1766, + "step": 251760 + }, + { + "epoch": 0.59, + "grad_norm": 1.96875, + "learning_rate": 0.00015972966880198312, + "loss": 1.9001, + "step": 251765 + }, + { + "epoch": 0.59, + "grad_norm": 2.53125, + "learning_rate": 0.0001597281864243804, + "loss": 2.0187, + "step": 251770 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.00015972670402637342, + "loss": 2.103, + "step": 251775 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.0001597252216079627, + "loss": 2.101, + "step": 251780 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.0001597237391691488, + "loss": 2.1551, + "step": 251785 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00015972225670993217, + "loss": 2.1075, + "step": 251790 + }, + { + "epoch": 0.59, + "grad_norm": 2.375, + "learning_rate": 0.00015972077423031332, + "loss": 2.1685, + "step": 251795 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00015971929173029282, + "loss": 1.9976, + "step": 251800 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00015971780920987104, + "loss": 2.3116, + "step": 251805 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00015971632666904865, + "loss": 2.1115, + "step": 251810 + }, + { + "epoch": 0.59, + "grad_norm": 1.8359375, + "learning_rate": 0.00015971484410782605, + "loss": 2.1678, + "step": 251815 + }, + { + "epoch": 0.59, + "grad_norm": 1.7734375, + "learning_rate": 0.0001597133615262038, + "loss": 2.006, + "step": 251820 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00015971187892418237, + "loss": 2.1649, + "step": 251825 + }, + { + "epoch": 0.59, + "grad_norm": 2.453125, + "learning_rate": 0.00015971039630176227, + "loss": 2.0805, + "step": 251830 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00015970891365894402, + "loss": 2.194, + "step": 251835 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.0001597074309957281, + "loss": 2.0472, + "step": 251840 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.0001597059483121151, + "loss": 1.993, + "step": 251845 + }, + { + "epoch": 0.59, + "grad_norm": 1.84375, + "learning_rate": 0.0001597044656081054, + "loss": 2.155, + "step": 251850 + }, + { + "epoch": 0.59, + "grad_norm": 2.46875, + "learning_rate": 0.0001597029828836996, + "loss": 2.2526, + "step": 251855 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00015970150013889817, + "loss": 2.0008, + "step": 251860 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.00015970001737370163, + "loss": 2.0977, + "step": 251865 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.0001596985345881105, + "loss": 1.9942, + "step": 251870 + }, + { + "epoch": 0.59, + "grad_norm": 1.7890625, + "learning_rate": 0.00015969705178212523, + "loss": 2.0752, + "step": 251875 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.0001596955689557464, + "loss": 2.2065, + "step": 251880 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00015969408610897446, + "loss": 2.1156, + "step": 251885 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00015969260324180993, + "loss": 2.1147, + "step": 251890 + }, + { + "epoch": 0.59, + "grad_norm": 2.59375, + "learning_rate": 0.00015969112035425336, + "loss": 2.1721, + "step": 251895 + }, + { + "epoch": 0.59, + "grad_norm": 1.9140625, + "learning_rate": 0.00015968963744630518, + "loss": 2.0941, + "step": 251900 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00015968815451796595, + "loss": 1.9414, + "step": 251905 + }, + { + "epoch": 0.59, + "grad_norm": 1.890625, + "learning_rate": 0.00015968667156923618, + "loss": 1.913, + "step": 251910 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.00015968518860011633, + "loss": 2.1832, + "step": 251915 + }, + { + "epoch": 0.59, + "grad_norm": 2.40625, + "learning_rate": 0.00015968370561060698, + "loss": 2.238, + "step": 251920 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015968222260070856, + "loss": 2.1188, + "step": 251925 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015968073957042163, + "loss": 1.9834, + "step": 251930 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.0001596792565197467, + "loss": 1.9456, + "step": 251935 + }, + { + "epoch": 0.59, + "grad_norm": 2.75, + "learning_rate": 0.00015967777344868416, + "loss": 2.118, + "step": 251940 + }, + { + "epoch": 0.59, + "grad_norm": 1.6953125, + "learning_rate": 0.00015967629035723468, + "loss": 1.9601, + "step": 251945 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.0001596748072453987, + "loss": 2.0772, + "step": 251950 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015967332411317671, + "loss": 2.1303, + "step": 251955 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.00015967184096056923, + "loss": 2.0719, + "step": 251960 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00015967035778757677, + "loss": 2.0221, + "step": 251965 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00015966887459419984, + "loss": 1.9819, + "step": 251970 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00015966739138043894, + "loss": 1.9628, + "step": 251975 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00015966590814629457, + "loss": 1.9712, + "step": 251980 + }, + { + "epoch": 0.59, + "grad_norm": 2.640625, + "learning_rate": 0.00015966442489176723, + "loss": 2.1492, + "step": 251985 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.00015966294161685746, + "loss": 1.8165, + "step": 251990 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00015966145832156575, + "loss": 2.0329, + "step": 251995 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.0001596599750058926, + "loss": 2.0888, + "step": 252000 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.00015965849166983852, + "loss": 2.099, + "step": 252005 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.000159657008313404, + "loss": 2.139, + "step": 252010 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.0001596555249365896, + "loss": 2.1556, + "step": 252015 + }, + { + "epoch": 0.59, + "grad_norm": 1.890625, + "learning_rate": 0.00015965404153939574, + "loss": 2.0652, + "step": 252020 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.000159652558121823, + "loss": 1.9825, + "step": 252025 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.00015965107468387187, + "loss": 2.1332, + "step": 252030 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00015964959122554285, + "loss": 2.1126, + "step": 252035 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00015964810774683646, + "loss": 2.1855, + "step": 252040 + }, + { + "epoch": 0.59, + "grad_norm": 2.9375, + "learning_rate": 0.00015964662424775318, + "loss": 2.2262, + "step": 252045 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.00015964514072829353, + "loss": 2.1727, + "step": 252050 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.000159643657188458, + "loss": 2.2923, + "step": 252055 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00015964217362824718, + "loss": 2.0987, + "step": 252060 + }, + { + "epoch": 0.59, + "grad_norm": 2.515625, + "learning_rate": 0.00015964069004766146, + "loss": 2.2015, + "step": 252065 + }, + { + "epoch": 0.59, + "grad_norm": 1.96875, + "learning_rate": 0.00015963920644670142, + "loss": 2.2311, + "step": 252070 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.0001596377228253675, + "loss": 1.9827, + "step": 252075 + }, + { + "epoch": 0.59, + "grad_norm": 1.90625, + "learning_rate": 0.00015963623918366032, + "loss": 2.0656, + "step": 252080 + }, + { + "epoch": 0.59, + "grad_norm": 2.546875, + "learning_rate": 0.00015963475552158027, + "loss": 2.2417, + "step": 252085 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00015963327183912793, + "loss": 1.9916, + "step": 252090 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00015963178813630378, + "loss": 2.0208, + "step": 252095 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00015963030441310832, + "loss": 2.2356, + "step": 252100 + }, + { + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 0.00015962882066954205, + "loss": 2.3867, + "step": 252105 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.0001596273369056055, + "loss": 1.9624, + "step": 252110 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.0001596258531212992, + "loss": 2.1091, + "step": 252115 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00015962436931662364, + "loss": 2.0265, + "step": 252120 + }, + { + "epoch": 0.59, + "grad_norm": 1.8046875, + "learning_rate": 0.00015962288549157926, + "loss": 2.0649, + "step": 252125 + }, + { + "epoch": 0.59, + "grad_norm": 2.390625, + "learning_rate": 0.00015962140164616667, + "loss": 2.0196, + "step": 252130 + }, + { + "epoch": 0.59, + "grad_norm": 1.8203125, + "learning_rate": 0.00015961991778038628, + "loss": 2.1189, + "step": 252135 + }, + { + "epoch": 0.59, + "grad_norm": 1.8203125, + "learning_rate": 0.00015961843389423867, + "loss": 1.9697, + "step": 252140 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.00015961694998772432, + "loss": 2.0704, + "step": 252145 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00015961546606084375, + "loss": 2.1739, + "step": 252150 + }, + { + "epoch": 0.59, + "grad_norm": 2.453125, + "learning_rate": 0.00015961398211359744, + "loss": 2.1838, + "step": 252155 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00015961249814598592, + "loss": 2.0774, + "step": 252160 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00015961101415800969, + "loss": 2.0923, + "step": 252165 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00015960953014966924, + "loss": 2.013, + "step": 252170 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00015960804612096511, + "loss": 2.0533, + "step": 252175 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.0001596065620718978, + "loss": 2.1012, + "step": 252180 + }, + { + "epoch": 0.59, + "grad_norm": 1.859375, + "learning_rate": 0.00015960507800246777, + "loss": 2.1684, + "step": 252185 + }, + { + "epoch": 0.59, + "grad_norm": 1.953125, + "learning_rate": 0.0001596035939126756, + "loss": 2.1392, + "step": 252190 + }, + { + "epoch": 0.59, + "grad_norm": 1.90625, + "learning_rate": 0.00015960210980252176, + "loss": 2.1521, + "step": 252195 + }, + { + "epoch": 0.59, + "grad_norm": 1.84375, + "learning_rate": 0.00015960062567200675, + "loss": 2.0447, + "step": 252200 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.0001595991415211311, + "loss": 2.0847, + "step": 252205 + }, + { + "epoch": 0.59, + "grad_norm": 1.9765625, + "learning_rate": 0.00015959765734989526, + "loss": 2.1361, + "step": 252210 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.0001595961731582998, + "loss": 2.1502, + "step": 252215 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015959468894634522, + "loss": 2.12, + "step": 252220 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.000159593204714032, + "loss": 2.2033, + "step": 252225 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.00015959172046136068, + "loss": 2.1311, + "step": 252230 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.0001595902361883317, + "loss": 2.1387, + "step": 252235 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00015958875189494567, + "loss": 2.0924, + "step": 252240 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.000159587267581203, + "loss": 2.0361, + "step": 252245 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.00015958578324710428, + "loss": 2.0267, + "step": 252250 + }, + { + "epoch": 0.59, + "grad_norm": 2.46875, + "learning_rate": 0.00015958429889264996, + "loss": 2.0533, + "step": 252255 + }, + { + "epoch": 0.59, + "grad_norm": 1.984375, + "learning_rate": 0.00015958281451784054, + "loss": 2.0961, + "step": 252260 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00015958133012267657, + "loss": 2.0326, + "step": 252265 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00015957984570715854, + "loss": 2.0999, + "step": 252270 + }, + { + "epoch": 0.59, + "grad_norm": 2.578125, + "learning_rate": 0.00015957836127128696, + "loss": 2.2405, + "step": 252275 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00015957687681506232, + "loss": 2.0859, + "step": 252280 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00015957539233848512, + "loss": 2.2768, + "step": 252285 + }, + { + "epoch": 0.59, + "grad_norm": 2.40625, + "learning_rate": 0.0001595739078415559, + "loss": 2.1692, + "step": 252290 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00015957242332427515, + "loss": 1.9057, + "step": 252295 + }, + { + "epoch": 0.59, + "grad_norm": 3.125, + "learning_rate": 0.00015957093878664339, + "loss": 2.0784, + "step": 252300 + }, + { + "epoch": 0.59, + "grad_norm": 2.453125, + "learning_rate": 0.00015956945422866112, + "loss": 2.0512, + "step": 252305 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00015956796965032886, + "loss": 1.9347, + "step": 252310 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00015956648505164705, + "loss": 2.1072, + "step": 252315 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.0001595650004326163, + "loss": 2.19, + "step": 252320 + }, + { + "epoch": 0.59, + "grad_norm": 2.1875, + "learning_rate": 0.00015956351579323702, + "loss": 2.1033, + "step": 252325 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.0001595620311335098, + "loss": 2.0666, + "step": 252330 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.0001595605464534351, + "loss": 2.0586, + "step": 252335 + }, + { + "epoch": 0.59, + "grad_norm": 1.9921875, + "learning_rate": 0.00015955906175301344, + "loss": 2.1552, + "step": 252340 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00015955757703224529, + "loss": 2.1183, + "step": 252345 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015955609229113121, + "loss": 2.1198, + "step": 252350 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.0001595546075296717, + "loss": 2.1536, + "step": 252355 + }, + { + "epoch": 0.59, + "grad_norm": 1.890625, + "learning_rate": 0.00015955312274786726, + "loss": 2.08, + "step": 252360 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00015955163794571838, + "loss": 1.9176, + "step": 252365 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.0001595501531232256, + "loss": 2.1442, + "step": 252370 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.0001595486682803894, + "loss": 2.2035, + "step": 252375 + }, + { + "epoch": 0.59, + "grad_norm": 2.671875, + "learning_rate": 0.00015954718341721028, + "loss": 2.1096, + "step": 252380 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.00015954569853368877, + "loss": 2.0678, + "step": 252385 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00015954421362982538, + "loss": 2.1821, + "step": 252390 + }, + { + "epoch": 0.59, + "grad_norm": 2.578125, + "learning_rate": 0.0001595427287056206, + "loss": 2.0127, + "step": 252395 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00015954124376107496, + "loss": 2.1446, + "step": 252400 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.00015953975879618893, + "loss": 2.1725, + "step": 252405 + }, + { + "epoch": 0.59, + "grad_norm": 1.9765625, + "learning_rate": 0.00015953827381096303, + "loss": 2.0704, + "step": 252410 + }, + { + "epoch": 0.59, + "grad_norm": 2.5, + "learning_rate": 0.0001595367888053978, + "loss": 2.37, + "step": 252415 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00015953530377949368, + "loss": 1.9053, + "step": 252420 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.00015953381873325126, + "loss": 2.0475, + "step": 252425 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.000159532333666671, + "loss": 2.0109, + "step": 252430 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015953084857975343, + "loss": 2.1404, + "step": 252435 + }, + { + "epoch": 0.59, + "grad_norm": 2.015625, + "learning_rate": 0.00015952936347249903, + "loss": 1.8649, + "step": 252440 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.0001595278783449083, + "loss": 2.1713, + "step": 252445 + }, + { + "epoch": 0.59, + "grad_norm": 2.359375, + "learning_rate": 0.00015952639319698176, + "loss": 2.164, + "step": 252450 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00015952490802871995, + "loss": 2.0598, + "step": 252455 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00015952342284012336, + "loss": 2.0701, + "step": 252460 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.0001595219376311925, + "loss": 2.1155, + "step": 252465 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015952045240192787, + "loss": 2.004, + "step": 252470 + }, + { + "epoch": 0.59, + "grad_norm": 1.9375, + "learning_rate": 0.0001595189671523299, + "loss": 2.094, + "step": 252475 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00015951748188239924, + "loss": 2.1287, + "step": 252480 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.0001595159965921363, + "loss": 2.0485, + "step": 252485 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00015951451128154163, + "loss": 2.2208, + "step": 252490 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00015951302595061572, + "loss": 2.0216, + "step": 252495 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.0001595115405993591, + "loss": 2.1909, + "step": 252500 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015951005522777222, + "loss": 2.0451, + "step": 252505 + }, + { + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 0.00015950856983585566, + "loss": 2.1315, + "step": 252510 + }, + { + "epoch": 0.59, + "grad_norm": 1.8984375, + "learning_rate": 0.00015950708442360987, + "loss": 1.9218, + "step": 252515 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.0001595055989910354, + "loss": 1.9114, + "step": 252520 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00015950411353813274, + "loss": 2.2, + "step": 252525 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015950262806490234, + "loss": 1.9687, + "step": 252530 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.0001595011425713448, + "loss": 2.0158, + "step": 252535 + }, + { + "epoch": 0.59, + "grad_norm": 1.9765625, + "learning_rate": 0.00015949965705746062, + "loss": 2.1369, + "step": 252540 + }, + { + "epoch": 0.59, + "grad_norm": 2.109375, + "learning_rate": 0.00015949817152325024, + "loss": 2.0184, + "step": 252545 + }, + { + "epoch": 0.59, + "grad_norm": 2.140625, + "learning_rate": 0.00015949668596871422, + "loss": 1.9434, + "step": 252550 + }, + { + "epoch": 0.59, + "grad_norm": 2.234375, + "learning_rate": 0.00015949520039385307, + "loss": 2.0965, + "step": 252555 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00015949371479866725, + "loss": 1.9684, + "step": 252560 + }, + { + "epoch": 0.59, + "grad_norm": 2.078125, + "learning_rate": 0.00015949222918315733, + "loss": 2.1162, + "step": 252565 + }, + { + "epoch": 0.59, + "grad_norm": 2.296875, + "learning_rate": 0.00015949074354732376, + "loss": 2.0943, + "step": 252570 + }, + { + "epoch": 0.59, + "grad_norm": 1.8671875, + "learning_rate": 0.0001594892578911671, + "loss": 2.2006, + "step": 252575 + }, + { + "epoch": 0.59, + "grad_norm": 2.46875, + "learning_rate": 0.00015948777221468777, + "loss": 2.1777, + "step": 252580 + }, + { + "epoch": 0.59, + "grad_norm": 1.953125, + "learning_rate": 0.0001594862865178864, + "loss": 2.1881, + "step": 252585 + }, + { + "epoch": 0.59, + "grad_norm": 1.671875, + "learning_rate": 0.0001594848008007634, + "loss": 2.0449, + "step": 252590 + }, + { + "epoch": 0.59, + "grad_norm": 1.6875, + "learning_rate": 0.00015948331506331935, + "loss": 2.2308, + "step": 252595 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.0001594818293055547, + "loss": 2.1885, + "step": 252600 + }, + { + "epoch": 0.59, + "grad_norm": 1.84375, + "learning_rate": 0.00015948034352746996, + "loss": 2.0264, + "step": 252605 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00015947885772906568, + "loss": 2.0211, + "step": 252610 + }, + { + "epoch": 0.59, + "grad_norm": 2.265625, + "learning_rate": 0.00015947737191034233, + "loss": 2.1013, + "step": 252615 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00015947588607130044, + "loss": 2.1049, + "step": 252620 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.00015947440021194052, + "loss": 1.9822, + "step": 252625 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.00015947291433226305, + "loss": 2.2905, + "step": 252630 + }, + { + "epoch": 0.59, + "grad_norm": 2.03125, + "learning_rate": 0.00015947142843226854, + "loss": 2.0553, + "step": 252635 + }, + { + "epoch": 0.59, + "grad_norm": 1.8828125, + "learning_rate": 0.00015946994251195756, + "loss": 1.936, + "step": 252640 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.0001594684565713305, + "loss": 2.0142, + "step": 252645 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.000159466970610388, + "loss": 2.1442, + "step": 252650 + }, + { + "epoch": 0.59, + "grad_norm": 1.984375, + "learning_rate": 0.00015946548462913046, + "loss": 2.1392, + "step": 252655 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00015946399862755848, + "loss": 1.9438, + "step": 252660 + }, + { + "epoch": 0.59, + "grad_norm": 2.046875, + "learning_rate": 0.00015946251260567245, + "loss": 2.1439, + "step": 252665 + }, + { + "epoch": 0.59, + "grad_norm": 2.09375, + "learning_rate": 0.000159461026563473, + "loss": 2.03, + "step": 252670 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.0001594595405009606, + "loss": 1.9713, + "step": 252675 + }, + { + "epoch": 0.59, + "grad_norm": 2.4375, + "learning_rate": 0.0001594580544181357, + "loss": 1.9498, + "step": 252680 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00015945656831499886, + "loss": 2.0775, + "step": 252685 + }, + { + "epoch": 0.59, + "grad_norm": 2.0, + "learning_rate": 0.00015945508219155057, + "loss": 1.9094, + "step": 252690 + }, + { + "epoch": 0.59, + "grad_norm": 2.125, + "learning_rate": 0.00015945359604779138, + "loss": 2.0307, + "step": 252695 + }, + { + "epoch": 0.59, + "grad_norm": 1.9375, + "learning_rate": 0.00015945210988372175, + "loss": 2.2087, + "step": 252700 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.00015945062369934217, + "loss": 1.8994, + "step": 252705 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00015944913749465323, + "loss": 2.1025, + "step": 252710 + }, + { + "epoch": 0.59, + "grad_norm": 2.15625, + "learning_rate": 0.00015944765126965532, + "loss": 2.1389, + "step": 252715 + }, + { + "epoch": 0.59, + "grad_norm": 2.171875, + "learning_rate": 0.00015944616502434905, + "loss": 2.2125, + "step": 252720 + }, + { + "epoch": 0.59, + "grad_norm": 1.921875, + "learning_rate": 0.00015944467875873491, + "loss": 2.0692, + "step": 252725 + }, + { + "epoch": 0.59, + "grad_norm": 2.421875, + "learning_rate": 0.00015944319247281337, + "loss": 2.1247, + "step": 252730 + }, + { + "epoch": 0.59, + "grad_norm": 2.546875, + "learning_rate": 0.00015944170616658497, + "loss": 2.0503, + "step": 252735 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.0001594402198400502, + "loss": 2.0037, + "step": 252740 + }, + { + "epoch": 0.59, + "grad_norm": 2.25, + "learning_rate": 0.00015943873349320956, + "loss": 2.1196, + "step": 252745 + }, + { + "epoch": 0.59, + "grad_norm": 1.8203125, + "learning_rate": 0.00015943724712606358, + "loss": 2.3367, + "step": 252750 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015943576073861276, + "loss": 2.1636, + "step": 252755 + }, + { + "epoch": 0.59, + "grad_norm": 1.984375, + "learning_rate": 0.0001594342743308576, + "loss": 1.9997, + "step": 252760 + }, + { + "epoch": 0.59, + "grad_norm": 2.34375, + "learning_rate": 0.00015943278790279863, + "loss": 2.2046, + "step": 252765 + }, + { + "epoch": 0.59, + "grad_norm": 1.84375, + "learning_rate": 0.0001594313014544363, + "loss": 1.9881, + "step": 252770 + }, + { + "epoch": 0.59, + "grad_norm": 1.890625, + "learning_rate": 0.00015942981498577122, + "loss": 1.9309, + "step": 252775 + }, + { + "epoch": 0.59, + "grad_norm": 2.3125, + "learning_rate": 0.0001594283284968038, + "loss": 2.1146, + "step": 252780 + }, + { + "epoch": 0.59, + "grad_norm": 2.390625, + "learning_rate": 0.00015942684198753463, + "loss": 2.0507, + "step": 252785 + }, + { + "epoch": 0.59, + "grad_norm": 2.0625, + "learning_rate": 0.0001594253554579641, + "loss": 1.9488, + "step": 252790 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 0.0001594238689080928, + "loss": 1.8954, + "step": 252795 + }, + { + "epoch": 0.59, + "grad_norm": 1.9296875, + "learning_rate": 0.00015942238233792127, + "loss": 2.2077, + "step": 252800 + }, + { + "epoch": 0.59, + "grad_norm": 1.9765625, + "learning_rate": 0.00015942089574744997, + "loss": 1.9944, + "step": 252805 + }, + { + "epoch": 0.59, + "grad_norm": 2.328125, + "learning_rate": 0.0001594194091366794, + "loss": 2.315, + "step": 252810 + }, + { + "epoch": 0.59, + "grad_norm": 2.203125, + "learning_rate": 0.0001594179225056101, + "loss": 2.0331, + "step": 252815 + }, + { + "epoch": 0.59, + "grad_norm": 1.875, + "learning_rate": 0.00015941643585424254, + "loss": 2.1678, + "step": 252820 + }, + { + "epoch": 0.59, + "grad_norm": 2.21875, + "learning_rate": 0.00015941494918257724, + "loss": 2.1828, + "step": 252825 + }, + { + "epoch": 0.59, + "grad_norm": 1.9765625, + "learning_rate": 0.00015941346249061475, + "loss": 1.855, + "step": 252830 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.0001594119757783555, + "loss": 2.174, + "step": 252835 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.0001594104890458001, + "loss": 2.0387, + "step": 252840 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.00015940900229294898, + "loss": 2.1073, + "step": 252845 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.00015940751551980263, + "loss": 2.1289, + "step": 252850 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015940602872636163, + "loss": 1.9286, + "step": 252855 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015940454191262643, + "loss": 2.1298, + "step": 252860 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015940305507859757, + "loss": 2.1769, + "step": 252865 + }, + { + "epoch": 0.6, + "grad_norm": 2.046875, + "learning_rate": 0.00015940156822427556, + "loss": 1.9893, + "step": 252870 + }, + { + "epoch": 0.6, + "grad_norm": 1.6328125, + "learning_rate": 0.00015940008134966092, + "loss": 1.99, + "step": 252875 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.00015939859445475408, + "loss": 2.1068, + "step": 252880 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015939710753955564, + "loss": 2.2404, + "step": 252885 + }, + { + "epoch": 0.6, + "grad_norm": 1.7109375, + "learning_rate": 0.00015939562060406605, + "loss": 1.977, + "step": 252890 + }, + { + "epoch": 0.6, + "grad_norm": 2.453125, + "learning_rate": 0.00015939413364828586, + "loss": 2.194, + "step": 252895 + }, + { + "epoch": 0.6, + "grad_norm": 2.5625, + "learning_rate": 0.00015939264667221554, + "loss": 2.0628, + "step": 252900 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.0001593911596758556, + "loss": 1.928, + "step": 252905 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.0001593896726592066, + "loss": 2.2035, + "step": 252910 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.000159388185622269, + "loss": 2.0418, + "step": 252915 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.0001593866985650433, + "loss": 2.0935, + "step": 252920 + }, + { + "epoch": 0.6, + "grad_norm": 2.046875, + "learning_rate": 0.00015938521148753005, + "loss": 2.0702, + "step": 252925 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015938372438972968, + "loss": 2.0256, + "step": 252930 + }, + { + "epoch": 0.6, + "grad_norm": 1.953125, + "learning_rate": 0.0001593822372716428, + "loss": 2.0374, + "step": 252935 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.00015938075013326988, + "loss": 2.2381, + "step": 252940 + }, + { + "epoch": 0.6, + "grad_norm": 2.28125, + "learning_rate": 0.0001593792629746114, + "loss": 1.9277, + "step": 252945 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.0001593777757956679, + "loss": 2.1623, + "step": 252950 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015937628859643988, + "loss": 2.0046, + "step": 252955 + }, + { + "epoch": 0.6, + "grad_norm": 2.484375, + "learning_rate": 0.00015937480137692778, + "loss": 2.0531, + "step": 252960 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015937331413713225, + "loss": 2.081, + "step": 252965 + }, + { + "epoch": 0.6, + "grad_norm": 2.34375, + "learning_rate": 0.0001593718268770537, + "loss": 2.1196, + "step": 252970 + }, + { + "epoch": 0.6, + "grad_norm": 1.9609375, + "learning_rate": 0.00015937033959669266, + "loss": 1.9931, + "step": 252975 + }, + { + "epoch": 0.6, + "grad_norm": 2.28125, + "learning_rate": 0.0001593688522960496, + "loss": 2.0939, + "step": 252980 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.00015936736497512507, + "loss": 2.1748, + "step": 252985 + }, + { + "epoch": 0.6, + "grad_norm": 1.9296875, + "learning_rate": 0.0001593658776339196, + "loss": 2.0575, + "step": 252990 + }, + { + "epoch": 0.6, + "grad_norm": 2.578125, + "learning_rate": 0.00015936439027243367, + "loss": 1.9551, + "step": 252995 + }, + { + "epoch": 0.6, + "grad_norm": 1.984375, + "learning_rate": 0.00015936290289066777, + "loss": 2.0004, + "step": 253000 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015936141548862244, + "loss": 2.2452, + "step": 253005 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015935992806629816, + "loss": 2.1059, + "step": 253010 + }, + { + "epoch": 0.6, + "grad_norm": 1.703125, + "learning_rate": 0.00015935844062369542, + "loss": 2.1556, + "step": 253015 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.00015935695316081483, + "loss": 2.3003, + "step": 253020 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.0001593554656776568, + "loss": 1.9721, + "step": 253025 + }, + { + "epoch": 0.6, + "grad_norm": 2.421875, + "learning_rate": 0.00015935397817422184, + "loss": 2.1363, + "step": 253030 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.0001593524906505105, + "loss": 2.1288, + "step": 253035 + }, + { + "epoch": 0.6, + "grad_norm": 2.46875, + "learning_rate": 0.0001593510031065233, + "loss": 2.0586, + "step": 253040 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.0001593495155422607, + "loss": 2.1084, + "step": 253045 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.0001593480279577232, + "loss": 2.1553, + "step": 253050 + }, + { + "epoch": 0.6, + "grad_norm": 1.8515625, + "learning_rate": 0.00015934654035291138, + "loss": 2.1358, + "step": 253055 + }, + { + "epoch": 0.6, + "grad_norm": 1.7421875, + "learning_rate": 0.00015934505272782569, + "loss": 2.0576, + "step": 253060 + }, + { + "epoch": 0.6, + "grad_norm": 2.046875, + "learning_rate": 0.00015934356508246665, + "loss": 2.0052, + "step": 253065 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015934207741683478, + "loss": 2.0748, + "step": 253070 + }, + { + "epoch": 0.6, + "grad_norm": 2.28125, + "learning_rate": 0.00015934058973093057, + "loss": 2.1363, + "step": 253075 + }, + { + "epoch": 0.6, + "grad_norm": 2.90625, + "learning_rate": 0.00015933910202475454, + "loss": 2.1329, + "step": 253080 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015933761429830722, + "loss": 2.1316, + "step": 253085 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015933612655158908, + "loss": 1.9727, + "step": 253090 + }, + { + "epoch": 0.6, + "grad_norm": 2.4375, + "learning_rate": 0.00015933463878460064, + "loss": 2.071, + "step": 253095 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.0001593331509973424, + "loss": 2.232, + "step": 253100 + }, + { + "epoch": 0.6, + "grad_norm": 2.296875, + "learning_rate": 0.0001593316631898149, + "loss": 2.1291, + "step": 253105 + }, + { + "epoch": 0.6, + "grad_norm": 2.65625, + "learning_rate": 0.00015933017536201862, + "loss": 2.0094, + "step": 253110 + }, + { + "epoch": 0.6, + "grad_norm": 2.65625, + "learning_rate": 0.00015932868751395408, + "loss": 2.1302, + "step": 253115 + }, + { + "epoch": 0.6, + "grad_norm": 2.28125, + "learning_rate": 0.00015932719964562177, + "loss": 2.0145, + "step": 253120 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015932571175702223, + "loss": 2.0578, + "step": 253125 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015932422384815593, + "loss": 2.2259, + "step": 253130 + }, + { + "epoch": 0.6, + "grad_norm": 1.7890625, + "learning_rate": 0.00015932273591902344, + "loss": 1.9961, + "step": 253135 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.00015932124796962517, + "loss": 2.1295, + "step": 253140 + }, + { + "epoch": 0.6, + "grad_norm": 2.34375, + "learning_rate": 0.00015931975999996172, + "loss": 2.1677, + "step": 253145 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015931827201003357, + "loss": 2.1821, + "step": 253150 + }, + { + "epoch": 0.6, + "grad_norm": 1.8515625, + "learning_rate": 0.0001593167839998412, + "loss": 1.9363, + "step": 253155 + }, + { + "epoch": 0.6, + "grad_norm": 1.7890625, + "learning_rate": 0.00015931529596938514, + "loss": 2.0836, + "step": 253160 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015931380791866593, + "loss": 2.1414, + "step": 253165 + }, + { + "epoch": 0.6, + "grad_norm": 2.484375, + "learning_rate": 0.00015931231984768403, + "loss": 2.0924, + "step": 253170 + }, + { + "epoch": 0.6, + "grad_norm": 2.734375, + "learning_rate": 0.00015931083175643995, + "loss": 1.9795, + "step": 253175 + }, + { + "epoch": 0.6, + "grad_norm": 2.5, + "learning_rate": 0.0001593093436449342, + "loss": 2.0491, + "step": 253180 + }, + { + "epoch": 0.6, + "grad_norm": 1.8671875, + "learning_rate": 0.00015930785551316739, + "loss": 2.0745, + "step": 253185 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015930636736113986, + "loss": 2.2482, + "step": 253190 + }, + { + "epoch": 0.6, + "grad_norm": 1.875, + "learning_rate": 0.00015930487918885223, + "loss": 2.1397, + "step": 253195 + }, + { + "epoch": 0.6, + "grad_norm": 2.296875, + "learning_rate": 0.00015930339099630496, + "loss": 2.0561, + "step": 253200 + }, + { + "epoch": 0.6, + "grad_norm": 1.8671875, + "learning_rate": 0.0001593019027834986, + "loss": 2.1013, + "step": 253205 + }, + { + "epoch": 0.6, + "grad_norm": 1.921875, + "learning_rate": 0.0001593004145504336, + "loss": 2.2289, + "step": 253210 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.0001592989262971105, + "loss": 2.0299, + "step": 253215 + }, + { + "epoch": 0.6, + "grad_norm": 2.765625, + "learning_rate": 0.00015929743802352989, + "loss": 2.0393, + "step": 253220 + }, + { + "epoch": 0.6, + "grad_norm": 2.34375, + "learning_rate": 0.0001592959497296921, + "loss": 2.0277, + "step": 253225 + }, + { + "epoch": 0.6, + "grad_norm": 2.828125, + "learning_rate": 0.0001592944614155978, + "loss": 2.1147, + "step": 253230 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015929297308124742, + "loss": 2.1113, + "step": 253235 + }, + { + "epoch": 0.6, + "grad_norm": 1.828125, + "learning_rate": 0.0001592914847266415, + "loss": 2.1338, + "step": 253240 + }, + { + "epoch": 0.6, + "grad_norm": 1.9375, + "learning_rate": 0.0001592899963517805, + "loss": 1.9428, + "step": 253245 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015928850795666498, + "loss": 2.0581, + "step": 253250 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015928701954129543, + "loss": 1.992, + "step": 253255 + }, + { + "epoch": 0.6, + "grad_norm": 2.53125, + "learning_rate": 0.00015928553110567236, + "loss": 2.036, + "step": 253260 + }, + { + "epoch": 0.6, + "grad_norm": 1.9609375, + "learning_rate": 0.00015928404264979628, + "loss": 2.0118, + "step": 253265 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015928255417366769, + "loss": 2.0734, + "step": 253270 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015928106567728712, + "loss": 2.0615, + "step": 253275 + }, + { + "epoch": 0.6, + "grad_norm": 1.984375, + "learning_rate": 0.00015927957716065503, + "loss": 2.1898, + "step": 253280 + }, + { + "epoch": 0.6, + "grad_norm": 1.75, + "learning_rate": 0.00015927808862377198, + "loss": 2.0725, + "step": 253285 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015927660006663847, + "loss": 2.083, + "step": 253290 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.000159275111489255, + "loss": 1.8864, + "step": 253295 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015927362289162208, + "loss": 2.0295, + "step": 253300 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.00015927213427374017, + "loss": 1.9445, + "step": 253305 + }, + { + "epoch": 0.6, + "grad_norm": 3.015625, + "learning_rate": 0.00015927064563560985, + "loss": 1.8842, + "step": 253310 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.0001592691569772316, + "loss": 2.0881, + "step": 253315 + }, + { + "epoch": 0.6, + "grad_norm": 2.796875, + "learning_rate": 0.00015926766829860595, + "loss": 2.1906, + "step": 253320 + }, + { + "epoch": 0.6, + "grad_norm": 2.625, + "learning_rate": 0.0001592661795997334, + "loss": 2.1834, + "step": 253325 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.0001592646908806144, + "loss": 1.982, + "step": 253330 + }, + { + "epoch": 0.6, + "grad_norm": 1.7890625, + "learning_rate": 0.00015926320214124955, + "loss": 1.863, + "step": 253335 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015926171338163928, + "loss": 2.2061, + "step": 253340 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015926022460178416, + "loss": 2.0964, + "step": 253345 + }, + { + "epoch": 0.6, + "grad_norm": 1.9453125, + "learning_rate": 0.00015925873580168468, + "loss": 2.2603, + "step": 253350 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.00015925724698134133, + "loss": 2.0179, + "step": 253355 + }, + { + "epoch": 0.6, + "grad_norm": 7.90625, + "learning_rate": 0.0001592557581407546, + "loss": 2.2249, + "step": 253360 + }, + { + "epoch": 0.6, + "grad_norm": 2.34375, + "learning_rate": 0.00015925426927992505, + "loss": 2.1211, + "step": 253365 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015925278039885318, + "loss": 2.1135, + "step": 253370 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.00015925129149753947, + "loss": 2.0939, + "step": 253375 + }, + { + "epoch": 0.6, + "grad_norm": 1.9375, + "learning_rate": 0.00015924980257598446, + "loss": 1.9488, + "step": 253380 + }, + { + "epoch": 0.6, + "grad_norm": 1.9921875, + "learning_rate": 0.00015924831363418865, + "loss": 2.0562, + "step": 253385 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.00015924682467215252, + "loss": 2.0832, + "step": 253390 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.0001592453356898766, + "loss": 2.2159, + "step": 253395 + }, + { + "epoch": 0.6, + "grad_norm": 1.9453125, + "learning_rate": 0.0001592438466873614, + "loss": 2.1327, + "step": 253400 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015924235766460744, + "loss": 2.0861, + "step": 253405 + }, + { + "epoch": 0.6, + "grad_norm": 1.8125, + "learning_rate": 0.00015924086862161523, + "loss": 2.2233, + "step": 253410 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015923937955838522, + "loss": 2.1779, + "step": 253415 + }, + { + "epoch": 0.6, + "grad_norm": 2.4375, + "learning_rate": 0.00015923789047491799, + "loss": 2.1172, + "step": 253420 + }, + { + "epoch": 0.6, + "grad_norm": 2.921875, + "learning_rate": 0.00015923640137121403, + "loss": 2.0287, + "step": 253425 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015923491224727383, + "loss": 1.9244, + "step": 253430 + }, + { + "epoch": 0.6, + "grad_norm": 2.515625, + "learning_rate": 0.0001592334231030979, + "loss": 2.1265, + "step": 253435 + }, + { + "epoch": 0.6, + "grad_norm": 2.296875, + "learning_rate": 0.00015923193393868677, + "loss": 2.0503, + "step": 253440 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015923044475404092, + "loss": 2.2646, + "step": 253445 + }, + { + "epoch": 0.6, + "grad_norm": 1.7109375, + "learning_rate": 0.0001592289555491609, + "loss": 1.9391, + "step": 253450 + }, + { + "epoch": 0.6, + "grad_norm": 3.046875, + "learning_rate": 0.00015922746632404718, + "loss": 2.2208, + "step": 253455 + }, + { + "epoch": 0.6, + "grad_norm": 1.953125, + "learning_rate": 0.00015922597707870032, + "loss": 2.1469, + "step": 253460 + }, + { + "epoch": 0.6, + "grad_norm": 2.53125, + "learning_rate": 0.00015922448781312073, + "loss": 1.9687, + "step": 253465 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015922299852730903, + "loss": 1.9482, + "step": 253470 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.00015922150922126566, + "loss": 2.0407, + "step": 253475 + }, + { + "epoch": 0.6, + "grad_norm": 1.890625, + "learning_rate": 0.00015922001989499113, + "loss": 2.0471, + "step": 253480 + }, + { + "epoch": 0.6, + "grad_norm": 1.984375, + "learning_rate": 0.000159218530548486, + "loss": 2.1765, + "step": 253485 + }, + { + "epoch": 0.6, + "grad_norm": 1.578125, + "learning_rate": 0.00015921704118175076, + "loss": 2.014, + "step": 253490 + }, + { + "epoch": 0.6, + "grad_norm": 1.9765625, + "learning_rate": 0.00015921555179478585, + "loss": 2.0323, + "step": 253495 + }, + { + "epoch": 0.6, + "grad_norm": 2.421875, + "learning_rate": 0.00015921406238759186, + "loss": 2.0929, + "step": 253500 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015921257296016927, + "loss": 2.0595, + "step": 253505 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.0001592110835125186, + "loss": 2.0858, + "step": 253510 + }, + { + "epoch": 0.6, + "grad_norm": 1.84375, + "learning_rate": 0.00015920959404464036, + "loss": 2.1385, + "step": 253515 + }, + { + "epoch": 0.6, + "grad_norm": 1.875, + "learning_rate": 0.00015920810455653502, + "loss": 2.2183, + "step": 253520 + }, + { + "epoch": 0.6, + "grad_norm": 1.953125, + "learning_rate": 0.00015920661504820313, + "loss": 2.1529, + "step": 253525 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.0001592051255196452, + "loss": 2.1769, + "step": 253530 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.0001592036359708617, + "loss": 2.1464, + "step": 253535 + }, + { + "epoch": 0.6, + "grad_norm": 2.515625, + "learning_rate": 0.0001592021464018532, + "loss": 1.9733, + "step": 253540 + }, + { + "epoch": 0.6, + "grad_norm": 2.453125, + "learning_rate": 0.00015920065681262013, + "loss": 2.2605, + "step": 253545 + }, + { + "epoch": 0.6, + "grad_norm": 2.484375, + "learning_rate": 0.00015919916720316305, + "loss": 1.9672, + "step": 253550 + }, + { + "epoch": 0.6, + "grad_norm": 1.6484375, + "learning_rate": 0.0001591976775734825, + "loss": 1.9887, + "step": 253555 + }, + { + "epoch": 0.6, + "grad_norm": 2.53125, + "learning_rate": 0.00015919618792357892, + "loss": 2.1515, + "step": 253560 + }, + { + "epoch": 0.6, + "grad_norm": 1.984375, + "learning_rate": 0.00015919469825345287, + "loss": 2.0415, + "step": 253565 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.0001591932085631048, + "loss": 2.1757, + "step": 253570 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.00015919171885253526, + "loss": 2.1522, + "step": 253575 + }, + { + "epoch": 0.6, + "grad_norm": 1.9765625, + "learning_rate": 0.0001591902291217448, + "loss": 2.0821, + "step": 253580 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015918873937073388, + "loss": 2.0076, + "step": 253585 + }, + { + "epoch": 0.6, + "grad_norm": 2.546875, + "learning_rate": 0.00015918724959950296, + "loss": 2.0031, + "step": 253590 + }, + { + "epoch": 0.6, + "grad_norm": 2.453125, + "learning_rate": 0.00015918575980805263, + "loss": 2.0401, + "step": 253595 + }, + { + "epoch": 0.6, + "grad_norm": 1.9765625, + "learning_rate": 0.0001591842699963834, + "loss": 2.1917, + "step": 253600 + }, + { + "epoch": 0.6, + "grad_norm": 2.578125, + "learning_rate": 0.0001591827801644957, + "loss": 1.9466, + "step": 253605 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015918129031239011, + "loss": 2.013, + "step": 253610 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015917980044006714, + "loss": 1.9291, + "step": 253615 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.00015917831054752723, + "loss": 1.989, + "step": 253620 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.00015917682063477096, + "loss": 1.9949, + "step": 253625 + }, + { + "epoch": 0.6, + "grad_norm": 2.515625, + "learning_rate": 0.00015917533070179882, + "loss": 2.0702, + "step": 253630 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.00015917384074861133, + "loss": 2.037, + "step": 253635 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015917235077520894, + "loss": 2.1237, + "step": 253640 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.00015917086078159223, + "loss": 2.1471, + "step": 253645 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015916937076776167, + "loss": 2.0678, + "step": 253650 + }, + { + "epoch": 0.6, + "grad_norm": 1.984375, + "learning_rate": 0.0001591678807337178, + "loss": 2.0062, + "step": 253655 + }, + { + "epoch": 0.6, + "grad_norm": 1.796875, + "learning_rate": 0.00015916639067946109, + "loss": 2.1068, + "step": 253660 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015916490060499208, + "loss": 2.0126, + "step": 253665 + }, + { + "epoch": 0.6, + "grad_norm": 1.9296875, + "learning_rate": 0.00015916341051031124, + "loss": 2.0463, + "step": 253670 + }, + { + "epoch": 0.6, + "grad_norm": 2.28125, + "learning_rate": 0.00015916192039541914, + "loss": 1.9809, + "step": 253675 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015916043026031624, + "loss": 1.9738, + "step": 253680 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.00015915894010500305, + "loss": 2.0297, + "step": 253685 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015915744992948011, + "loss": 2.186, + "step": 253690 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.00015915595973374792, + "loss": 2.1608, + "step": 253695 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015915446951780696, + "loss": 2.1519, + "step": 253700 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.0001591529792816578, + "loss": 2.1065, + "step": 253705 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015915148902530087, + "loss": 2.0811, + "step": 253710 + }, + { + "epoch": 0.6, + "grad_norm": 1.9296875, + "learning_rate": 0.00015914999874873674, + "loss": 2.1342, + "step": 253715 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015914850845196588, + "loss": 2.1032, + "step": 253720 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015914701813498882, + "loss": 2.1589, + "step": 253725 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.0001591455277978061, + "loss": 2.0263, + "step": 253730 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.00015914403744041815, + "loss": 2.0173, + "step": 253735 + }, + { + "epoch": 0.6, + "grad_norm": 1.7734375, + "learning_rate": 0.00015914254706282552, + "loss": 2.0822, + "step": 253740 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015914105666502875, + "loss": 2.0067, + "step": 253745 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.0001591395662470283, + "loss": 2.0305, + "step": 253750 + }, + { + "epoch": 0.6, + "grad_norm": 2.9375, + "learning_rate": 0.00015913807580882473, + "loss": 1.957, + "step": 253755 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.0001591365853504185, + "loss": 1.7239, + "step": 253760 + }, + { + "epoch": 0.6, + "grad_norm": 2.46875, + "learning_rate": 0.00015913509487181014, + "loss": 2.2693, + "step": 253765 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015913360437300018, + "loss": 2.142, + "step": 253770 + }, + { + "epoch": 0.6, + "grad_norm": 1.875, + "learning_rate": 0.0001591321138539891, + "loss": 2.0337, + "step": 253775 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015913062331477743, + "loss": 1.9832, + "step": 253780 + }, + { + "epoch": 0.6, + "grad_norm": 1.8828125, + "learning_rate": 0.00015912913275536563, + "loss": 2.1081, + "step": 253785 + }, + { + "epoch": 0.6, + "grad_norm": 1.859375, + "learning_rate": 0.00015912764217575423, + "loss": 2.1047, + "step": 253790 + }, + { + "epoch": 0.6, + "grad_norm": 2.546875, + "learning_rate": 0.00015912615157594382, + "loss": 2.0926, + "step": 253795 + }, + { + "epoch": 0.6, + "grad_norm": 1.671875, + "learning_rate": 0.00015912466095593483, + "loss": 2.1745, + "step": 253800 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.00015912317031572775, + "loss": 2.1871, + "step": 253805 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015912167965532317, + "loss": 2.1794, + "step": 253810 + }, + { + "epoch": 0.6, + "grad_norm": 2.546875, + "learning_rate": 0.00015912018897472149, + "loss": 2.1562, + "step": 253815 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.0001591186982739233, + "loss": 2.2159, + "step": 253820 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015911720755292912, + "loss": 2.1252, + "step": 253825 + }, + { + "epoch": 0.6, + "grad_norm": 1.953125, + "learning_rate": 0.00015911571681173942, + "loss": 2.2314, + "step": 253830 + }, + { + "epoch": 0.6, + "grad_norm": 2.625, + "learning_rate": 0.00015911422605035473, + "loss": 1.9596, + "step": 253835 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.00015911273526877552, + "loss": 1.9001, + "step": 253840 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015911124446700233, + "loss": 2.1808, + "step": 253845 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.0001591097536450357, + "loss": 2.1504, + "step": 253850 + }, + { + "epoch": 0.6, + "grad_norm": 2.375, + "learning_rate": 0.00015910826280287607, + "loss": 1.9192, + "step": 253855 + }, + { + "epoch": 0.6, + "grad_norm": 2.765625, + "learning_rate": 0.000159106771940524, + "loss": 2.0305, + "step": 253860 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.00015910528105798, + "loss": 2.2244, + "step": 253865 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015910379015524452, + "loss": 2.1539, + "step": 253870 + }, + { + "epoch": 0.6, + "grad_norm": 2.046875, + "learning_rate": 0.00015910229923231815, + "loss": 2.1814, + "step": 253875 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015910080828920135, + "loss": 2.1222, + "step": 253880 + }, + { + "epoch": 0.6, + "grad_norm": 2.34375, + "learning_rate": 0.00015909931732589465, + "loss": 2.1012, + "step": 253885 + }, + { + "epoch": 0.6, + "grad_norm": 2.421875, + "learning_rate": 0.00015909782634239856, + "loss": 2.0277, + "step": 253890 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015909633533871357, + "loss": 2.1784, + "step": 253895 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015909484431484016, + "loss": 2.2216, + "step": 253900 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.0001590933532707789, + "loss": 1.9155, + "step": 253905 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.0001590918622065303, + "loss": 1.9758, + "step": 253910 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015909037112209488, + "loss": 2.1816, + "step": 253915 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.00015908888001747306, + "loss": 2.0078, + "step": 253920 + }, + { + "epoch": 0.6, + "grad_norm": 2.375, + "learning_rate": 0.00015908738889266544, + "loss": 2.0522, + "step": 253925 + }, + { + "epoch": 0.6, + "grad_norm": 2.375, + "learning_rate": 0.00015908589774767248, + "loss": 1.9652, + "step": 253930 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.0001590844065824947, + "loss": 1.9235, + "step": 253935 + }, + { + "epoch": 0.6, + "grad_norm": 2.046875, + "learning_rate": 0.0001590829153971326, + "loss": 2.2779, + "step": 253940 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015908142419158677, + "loss": 1.9812, + "step": 253945 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.0001590799329658576, + "loss": 2.1611, + "step": 253950 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015907844171994567, + "loss": 2.0816, + "step": 253955 + }, + { + "epoch": 0.6, + "grad_norm": 2.765625, + "learning_rate": 0.00015907695045385147, + "loss": 2.143, + "step": 253960 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015907545916757552, + "loss": 2.2584, + "step": 253965 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015907396786111832, + "loss": 2.0841, + "step": 253970 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015907247653448037, + "loss": 2.1516, + "step": 253975 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.0001590709851876622, + "loss": 2.0684, + "step": 253980 + }, + { + "epoch": 0.6, + "grad_norm": 1.984375, + "learning_rate": 0.00015906949382066428, + "loss": 2.1331, + "step": 253985 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.0001590680024334872, + "loss": 1.8799, + "step": 253990 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.0001590665110261314, + "loss": 2.1603, + "step": 253995 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.0001590650195985974, + "loss": 1.9982, + "step": 254000 + }, + { + "epoch": 0.6, + "grad_norm": 1.9375, + "learning_rate": 0.0001590635281508857, + "loss": 1.8934, + "step": 254005 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015906203668299685, + "loss": 2.1134, + "step": 254010 + }, + { + "epoch": 0.6, + "grad_norm": 1.59375, + "learning_rate": 0.00015906054519493137, + "loss": 1.8827, + "step": 254015 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.0001590590536866897, + "loss": 2.0409, + "step": 254020 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.0001590575621582724, + "loss": 2.1226, + "step": 254025 + }, + { + "epoch": 0.6, + "grad_norm": 2.28125, + "learning_rate": 0.00015905607060967992, + "loss": 2.1232, + "step": 254030 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015905457904091285, + "loss": 2.1547, + "step": 254035 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.00015905308745197167, + "loss": 1.9748, + "step": 254040 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015905159584285689, + "loss": 1.8612, + "step": 254045 + }, + { + "epoch": 0.6, + "grad_norm": 2.421875, + "learning_rate": 0.00015905010421356897, + "loss": 2.1143, + "step": 254050 + }, + { + "epoch": 0.6, + "grad_norm": 2.5625, + "learning_rate": 0.0001590486125641085, + "loss": 2.1884, + "step": 254055 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015904712089447599, + "loss": 2.1162, + "step": 254060 + }, + { + "epoch": 0.6, + "grad_norm": 1.8984375, + "learning_rate": 0.00015904562920467186, + "loss": 2.0745, + "step": 254065 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015904413749469668, + "loss": 2.1372, + "step": 254070 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015904264576455094, + "loss": 2.0424, + "step": 254075 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015904115401423521, + "loss": 2.1914, + "step": 254080 + }, + { + "epoch": 0.6, + "grad_norm": 1.9765625, + "learning_rate": 0.0001590396622437499, + "loss": 2.1725, + "step": 254085 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015903817045309559, + "loss": 1.957, + "step": 254090 + }, + { + "epoch": 0.6, + "grad_norm": 1.9765625, + "learning_rate": 0.00015903667864227276, + "loss": 1.9665, + "step": 254095 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015903518681128196, + "loss": 1.902, + "step": 254100 + }, + { + "epoch": 0.6, + "grad_norm": 2.046875, + "learning_rate": 0.00015903369496012365, + "loss": 1.9686, + "step": 254105 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015903220308879833, + "loss": 2.147, + "step": 254110 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015903071119730656, + "loss": 2.1302, + "step": 254115 + }, + { + "epoch": 0.6, + "grad_norm": 1.859375, + "learning_rate": 0.00015902921928564884, + "loss": 2.0379, + "step": 254120 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.0001590277273538257, + "loss": 2.1177, + "step": 254125 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015902623540183755, + "loss": 1.9667, + "step": 254130 + }, + { + "epoch": 0.6, + "grad_norm": 2.671875, + "learning_rate": 0.00015902474342968501, + "loss": 2.2029, + "step": 254135 + }, + { + "epoch": 0.6, + "grad_norm": 1.765625, + "learning_rate": 0.00015902325143736854, + "loss": 2.011, + "step": 254140 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015902175942488866, + "loss": 2.3041, + "step": 254145 + }, + { + "epoch": 0.6, + "grad_norm": 1.9921875, + "learning_rate": 0.00015902026739224586, + "loss": 2.169, + "step": 254150 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015901877533944068, + "loss": 2.0858, + "step": 254155 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015901728326647363, + "loss": 2.1485, + "step": 254160 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015901579117334516, + "loss": 2.0465, + "step": 254165 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015901429906005587, + "loss": 2.2947, + "step": 254170 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.0001590128069266062, + "loss": 1.8982, + "step": 254175 + }, + { + "epoch": 0.6, + "grad_norm": 2.28125, + "learning_rate": 0.00015901131477299668, + "loss": 2.043, + "step": 254180 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.00015900982259922785, + "loss": 2.0654, + "step": 254185 + }, + { + "epoch": 0.6, + "grad_norm": 2.484375, + "learning_rate": 0.00015900833040530019, + "loss": 1.8409, + "step": 254190 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.0001590068381912142, + "loss": 1.9545, + "step": 254195 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.0001590053459569704, + "loss": 1.9744, + "step": 254200 + }, + { + "epoch": 0.6, + "grad_norm": 2.734375, + "learning_rate": 0.0001590038537025693, + "loss": 2.1115, + "step": 254205 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015900236142801143, + "loss": 2.0583, + "step": 254210 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015900086913329728, + "loss": 1.9782, + "step": 254215 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015899937681842737, + "loss": 2.0191, + "step": 254220 + }, + { + "epoch": 0.6, + "grad_norm": 3.765625, + "learning_rate": 0.0001589978844834022, + "loss": 2.1652, + "step": 254225 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.0001589963921282223, + "loss": 2.1229, + "step": 254230 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015899489975288814, + "loss": 1.9703, + "step": 254235 + }, + { + "epoch": 0.6, + "grad_norm": 1.984375, + "learning_rate": 0.00015899340735740024, + "loss": 2.1778, + "step": 254240 + }, + { + "epoch": 0.6, + "grad_norm": 1.8671875, + "learning_rate": 0.00015899191494175914, + "loss": 1.9862, + "step": 254245 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.00015899042250596532, + "loss": 2.0869, + "step": 254250 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.0001589889300500193, + "loss": 2.0757, + "step": 254255 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.00015898743757392162, + "loss": 2.0843, + "step": 254260 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.00015898594507767272, + "loss": 2.0423, + "step": 254265 + }, + { + "epoch": 0.6, + "grad_norm": 2.453125, + "learning_rate": 0.0001589844525612732, + "loss": 1.9589, + "step": 254270 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015898296002472349, + "loss": 2.0346, + "step": 254275 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.0001589814674680241, + "loss": 2.0729, + "step": 254280 + }, + { + "epoch": 0.6, + "grad_norm": 1.953125, + "learning_rate": 0.00015897997489117564, + "loss": 1.9831, + "step": 254285 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.00015897848229417852, + "loss": 2.025, + "step": 254290 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.00015897698967703327, + "loss": 2.0209, + "step": 254295 + }, + { + "epoch": 0.6, + "grad_norm": 2.46875, + "learning_rate": 0.00015897549703974045, + "loss": 2.1052, + "step": 254300 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015897400438230047, + "loss": 2.1232, + "step": 254305 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.00015897251170471396, + "loss": 2.0479, + "step": 254310 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.00015897101900698134, + "loss": 1.9098, + "step": 254315 + }, + { + "epoch": 0.6, + "grad_norm": 1.9609375, + "learning_rate": 0.00015896952628910316, + "loss": 2.1803, + "step": 254320 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015896803355107988, + "loss": 2.1068, + "step": 254325 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.0001589665407929121, + "loss": 2.0398, + "step": 254330 + }, + { + "epoch": 0.6, + "grad_norm": 1.875, + "learning_rate": 0.00015896504801460025, + "loss": 1.9933, + "step": 254335 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.0001589635552161449, + "loss": 1.9856, + "step": 254340 + }, + { + "epoch": 0.6, + "grad_norm": 1.984375, + "learning_rate": 0.0001589620623975465, + "loss": 2.1521, + "step": 254345 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.00015896056955880562, + "loss": 2.228, + "step": 254350 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015895907669992272, + "loss": 2.1257, + "step": 254355 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.00015895758382089833, + "loss": 1.996, + "step": 254360 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015895609092173295, + "loss": 2.2922, + "step": 254365 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015895459800242714, + "loss": 2.1508, + "step": 254370 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015895310506298133, + "loss": 2.0249, + "step": 254375 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.00015895161210339608, + "loss": 2.1153, + "step": 254380 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.0001589501191236719, + "loss": 2.017, + "step": 254385 + }, + { + "epoch": 0.6, + "grad_norm": 1.875, + "learning_rate": 0.0001589486261238093, + "loss": 2.0203, + "step": 254390 + }, + { + "epoch": 0.6, + "grad_norm": 2.796875, + "learning_rate": 0.00015894713310380873, + "loss": 2.1348, + "step": 254395 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.0001589456400636708, + "loss": 1.9783, + "step": 254400 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015894414700339598, + "loss": 2.1854, + "step": 254405 + }, + { + "epoch": 0.6, + "grad_norm": 2.5625, + "learning_rate": 0.00015894265392298471, + "loss": 2.1988, + "step": 254410 + }, + { + "epoch": 0.6, + "grad_norm": 1.953125, + "learning_rate": 0.0001589411608224376, + "loss": 2.3014, + "step": 254415 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015893966770175514, + "loss": 2.033, + "step": 254420 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015893817456093777, + "loss": 2.1027, + "step": 254425 + }, + { + "epoch": 0.6, + "grad_norm": 1.8046875, + "learning_rate": 0.0001589366813999861, + "loss": 2.1138, + "step": 254430 + }, + { + "epoch": 0.6, + "grad_norm": 1.9609375, + "learning_rate": 0.00015893518821890056, + "loss": 2.2355, + "step": 254435 + }, + { + "epoch": 0.6, + "grad_norm": 2.671875, + "learning_rate": 0.00015893369501768172, + "loss": 1.9911, + "step": 254440 + }, + { + "epoch": 0.6, + "grad_norm": 2.453125, + "learning_rate": 0.00015893220179633003, + "loss": 1.9759, + "step": 254445 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.00015893070855484603, + "loss": 2.2439, + "step": 254450 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015892921529323025, + "loss": 1.9289, + "step": 254455 + }, + { + "epoch": 0.6, + "grad_norm": 2.296875, + "learning_rate": 0.00015892772201148318, + "loss": 2.1549, + "step": 254460 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.0001589262287096053, + "loss": 2.2775, + "step": 254465 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.0001589247353875972, + "loss": 2.1748, + "step": 254470 + }, + { + "epoch": 0.6, + "grad_norm": 1.828125, + "learning_rate": 0.00015892324204545933, + "loss": 2.1834, + "step": 254475 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.0001589217486831922, + "loss": 2.094, + "step": 254480 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015892025530079632, + "loss": 2.0036, + "step": 254485 + }, + { + "epoch": 0.6, + "grad_norm": 2.59375, + "learning_rate": 0.00015891876189827223, + "loss": 2.1334, + "step": 254490 + }, + { + "epoch": 0.6, + "grad_norm": 2.28125, + "learning_rate": 0.0001589172684756204, + "loss": 2.0622, + "step": 254495 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015891577503284138, + "loss": 2.2691, + "step": 254500 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015891428156993568, + "loss": 1.9581, + "step": 254505 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015891278808690377, + "loss": 2.1923, + "step": 254510 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.00015891129458374617, + "loss": 2.0947, + "step": 254515 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015890980106046345, + "loss": 2.2488, + "step": 254520 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015890830751705607, + "loss": 2.1117, + "step": 254525 + }, + { + "epoch": 0.6, + "grad_norm": 2.421875, + "learning_rate": 0.00015890681395352448, + "loss": 2.1605, + "step": 254530 + }, + { + "epoch": 0.6, + "grad_norm": 1.8125, + "learning_rate": 0.00015890532036986932, + "loss": 2.1719, + "step": 254535 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.00015890382676609098, + "loss": 2.0895, + "step": 254540 + }, + { + "epoch": 0.6, + "grad_norm": 2.375, + "learning_rate": 0.00015890233314219007, + "loss": 2.1037, + "step": 254545 + }, + { + "epoch": 0.6, + "grad_norm": 1.9921875, + "learning_rate": 0.00015890083949816704, + "loss": 2.1992, + "step": 254550 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015889934583402241, + "loss": 1.9198, + "step": 254555 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.0001588978521497567, + "loss": 2.2719, + "step": 254560 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.0001588963584453704, + "loss": 2.2121, + "step": 254565 + }, + { + "epoch": 0.6, + "grad_norm": 1.9453125, + "learning_rate": 0.00015889486472086406, + "loss": 2.1303, + "step": 254570 + }, + { + "epoch": 0.6, + "grad_norm": 1.984375, + "learning_rate": 0.00015889337097623813, + "loss": 2.1546, + "step": 254575 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.0001588918772114932, + "loss": 2.2089, + "step": 254580 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.0001588903834266297, + "loss": 2.1937, + "step": 254585 + }, + { + "epoch": 0.6, + "grad_norm": 3.1875, + "learning_rate": 0.00015888888962164823, + "loss": 2.0862, + "step": 254590 + }, + { + "epoch": 0.6, + "grad_norm": 1.8046875, + "learning_rate": 0.00015888739579654918, + "loss": 1.9055, + "step": 254595 + }, + { + "epoch": 0.6, + "grad_norm": 2.453125, + "learning_rate": 0.00015888590195133317, + "loss": 2.0589, + "step": 254600 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.00015888440808600068, + "loss": 2.0647, + "step": 254605 + }, + { + "epoch": 0.6, + "grad_norm": 1.765625, + "learning_rate": 0.00015888291420055218, + "loss": 1.9721, + "step": 254610 + }, + { + "epoch": 0.6, + "grad_norm": 2.375, + "learning_rate": 0.0001588814202949882, + "loss": 1.9945, + "step": 254615 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.0001588799263693093, + "loss": 2.0488, + "step": 254620 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.00015887843242351593, + "loss": 2.1296, + "step": 254625 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.0001588769384576086, + "loss": 2.1659, + "step": 254630 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015887544447158787, + "loss": 2.1839, + "step": 254635 + }, + { + "epoch": 0.6, + "grad_norm": 1.875, + "learning_rate": 0.00015887395046545417, + "loss": 2.1228, + "step": 254640 + }, + { + "epoch": 0.6, + "grad_norm": 2.671875, + "learning_rate": 0.0001588724564392081, + "loss": 2.3018, + "step": 254645 + }, + { + "epoch": 0.6, + "grad_norm": 2.890625, + "learning_rate": 0.00015887096239285012, + "loss": 2.0152, + "step": 254650 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015886946832638075, + "loss": 2.146, + "step": 254655 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015886797423980052, + "loss": 1.9704, + "step": 254660 + }, + { + "epoch": 0.6, + "grad_norm": 2.34375, + "learning_rate": 0.0001588664801331099, + "loss": 1.9451, + "step": 254665 + }, + { + "epoch": 0.6, + "grad_norm": 1.7734375, + "learning_rate": 0.00015886498600630947, + "loss": 2.0937, + "step": 254670 + }, + { + "epoch": 0.6, + "grad_norm": 2.296875, + "learning_rate": 0.00015886349185939965, + "loss": 2.1563, + "step": 254675 + }, + { + "epoch": 0.6, + "grad_norm": 2.296875, + "learning_rate": 0.000158861997692381, + "loss": 2.1042, + "step": 254680 + }, + { + "epoch": 0.6, + "grad_norm": 1.921875, + "learning_rate": 0.00015886050350525402, + "loss": 1.9214, + "step": 254685 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015885900929801921, + "loss": 2.1416, + "step": 254690 + }, + { + "epoch": 0.6, + "grad_norm": 1.84375, + "learning_rate": 0.00015885751507067712, + "loss": 1.9957, + "step": 254695 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.00015885602082322823, + "loss": 2.2365, + "step": 254700 + }, + { + "epoch": 0.6, + "grad_norm": 2.046875, + "learning_rate": 0.00015885452655567305, + "loss": 1.9033, + "step": 254705 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015885303226801208, + "loss": 1.9949, + "step": 254710 + }, + { + "epoch": 0.6, + "grad_norm": 2.453125, + "learning_rate": 0.0001588515379602459, + "loss": 2.132, + "step": 254715 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015885004363237493, + "loss": 2.1109, + "step": 254720 + }, + { + "epoch": 0.6, + "grad_norm": 1.9921875, + "learning_rate": 0.0001588485492843997, + "loss": 1.9387, + "step": 254725 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.0001588470549163208, + "loss": 2.0435, + "step": 254730 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015884556052813865, + "loss": 2.1598, + "step": 254735 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015884406611985376, + "loss": 2.111, + "step": 254740 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015884257169146668, + "loss": 2.048, + "step": 254745 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015884107724297794, + "loss": 2.0423, + "step": 254750 + }, + { + "epoch": 0.6, + "grad_norm": 2.046875, + "learning_rate": 0.000158839582774388, + "loss": 2.0257, + "step": 254755 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015883808828569737, + "loss": 2.2127, + "step": 254760 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.0001588365937769066, + "loss": 1.9915, + "step": 254765 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.0001588350992480162, + "loss": 2.0294, + "step": 254770 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015883360469902668, + "loss": 2.0446, + "step": 254775 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015883211012993848, + "loss": 2.1225, + "step": 254780 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015883061554075218, + "loss": 1.938, + "step": 254785 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.0001588291209314683, + "loss": 2.1307, + "step": 254790 + }, + { + "epoch": 0.6, + "grad_norm": 2.625, + "learning_rate": 0.00015882762630208732, + "loss": 2.0799, + "step": 254795 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015882613165260973, + "loss": 1.971, + "step": 254800 + }, + { + "epoch": 0.6, + "grad_norm": 2.609375, + "learning_rate": 0.00015882463698303607, + "loss": 2.1593, + "step": 254805 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015882314229336686, + "loss": 2.1831, + "step": 254810 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015882164758360262, + "loss": 1.982, + "step": 254815 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.0001588201528537438, + "loss": 1.9748, + "step": 254820 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015881865810379097, + "loss": 2.0508, + "step": 254825 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015881716333374462, + "loss": 2.1912, + "step": 254830 + }, + { + "epoch": 0.6, + "grad_norm": 2.421875, + "learning_rate": 0.00015881566854360522, + "loss": 2.0965, + "step": 254835 + }, + { + "epoch": 0.6, + "grad_norm": 2.53125, + "learning_rate": 0.00015881417373337338, + "loss": 1.8598, + "step": 254840 + }, + { + "epoch": 0.6, + "grad_norm": 2.515625, + "learning_rate": 0.0001588126789030495, + "loss": 2.0222, + "step": 254845 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.0001588111840526342, + "loss": 2.0336, + "step": 254850 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.0001588096891821279, + "loss": 1.9496, + "step": 254855 + }, + { + "epoch": 0.6, + "grad_norm": 2.671875, + "learning_rate": 0.0001588081942915311, + "loss": 2.1252, + "step": 254860 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.0001588066993808444, + "loss": 1.9914, + "step": 254865 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.0001588052044500683, + "loss": 1.9076, + "step": 254870 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.00015880370949920323, + "loss": 2.0497, + "step": 254875 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015880221452824978, + "loss": 2.1142, + "step": 254880 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.00015880071953720836, + "loss": 2.3404, + "step": 254885 + }, + { + "epoch": 0.6, + "grad_norm": 2.5625, + "learning_rate": 0.0001587992245260796, + "loss": 2.0731, + "step": 254890 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015879772949486395, + "loss": 2.2535, + "step": 254895 + }, + { + "epoch": 0.6, + "grad_norm": 2.34375, + "learning_rate": 0.00015879623444356192, + "loss": 2.0703, + "step": 254900 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.00015879473937217405, + "loss": 2.0891, + "step": 254905 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015879324428070085, + "loss": 2.0263, + "step": 254910 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015879174916914277, + "loss": 2.1182, + "step": 254915 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015879025403750036, + "loss": 1.8902, + "step": 254920 + }, + { + "epoch": 0.6, + "grad_norm": 2.046875, + "learning_rate": 0.00015878875888577414, + "loss": 2.061, + "step": 254925 + }, + { + "epoch": 0.6, + "grad_norm": 1.9609375, + "learning_rate": 0.00015878726371396464, + "loss": 2.1624, + "step": 254930 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.0001587857685220723, + "loss": 1.9508, + "step": 254935 + }, + { + "epoch": 0.6, + "grad_norm": 1.828125, + "learning_rate": 0.0001587842733100977, + "loss": 1.979, + "step": 254940 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.0001587827780780413, + "loss": 2.0839, + "step": 254945 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.0001587812828259037, + "loss": 2.2664, + "step": 254950 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.0001587797875536853, + "loss": 2.0203, + "step": 254955 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015877829226138669, + "loss": 2.0474, + "step": 254960 + }, + { + "epoch": 0.6, + "grad_norm": 2.609375, + "learning_rate": 0.00015877679694900834, + "loss": 2.0258, + "step": 254965 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015877530161655072, + "loss": 2.156, + "step": 254970 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.00015877380626401445, + "loss": 1.9905, + "step": 254975 + }, + { + "epoch": 0.6, + "grad_norm": 2.46875, + "learning_rate": 0.00015877231089139998, + "loss": 2.124, + "step": 254980 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015877081549870776, + "loss": 2.1647, + "step": 254985 + }, + { + "epoch": 0.6, + "grad_norm": 1.78125, + "learning_rate": 0.00015876932008593842, + "loss": 2.0341, + "step": 254990 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.0001587678246530924, + "loss": 2.0192, + "step": 254995 + }, + { + "epoch": 0.6, + "grad_norm": 1.8515625, + "learning_rate": 0.00015876632920017023, + "loss": 2.0594, + "step": 255000 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.0001587648337271724, + "loss": 2.0192, + "step": 255005 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015876333823409947, + "loss": 2.032, + "step": 255010 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015876184272095188, + "loss": 1.9432, + "step": 255015 + }, + { + "epoch": 0.6, + "grad_norm": 2.546875, + "learning_rate": 0.0001587603471877302, + "loss": 1.9081, + "step": 255020 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015875885163443493, + "loss": 1.9347, + "step": 255025 + }, + { + "epoch": 0.6, + "grad_norm": 3.8125, + "learning_rate": 0.00015875735606106657, + "loss": 1.9837, + "step": 255030 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.0001587558604676256, + "loss": 2.1726, + "step": 255035 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.0001587543648541126, + "loss": 2.1533, + "step": 255040 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.000158752869220528, + "loss": 1.9274, + "step": 255045 + }, + { + "epoch": 0.6, + "grad_norm": 2.484375, + "learning_rate": 0.00015875137356687237, + "loss": 2.0766, + "step": 255050 + }, + { + "epoch": 0.6, + "grad_norm": 1.953125, + "learning_rate": 0.00015874987789314625, + "loss": 2.0787, + "step": 255055 + }, + { + "epoch": 0.6, + "grad_norm": 1.9296875, + "learning_rate": 0.00015874838219935005, + "loss": 2.0006, + "step": 255060 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.00015874688648548436, + "loss": 2.1357, + "step": 255065 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.00015874539075154967, + "loss": 2.1198, + "step": 255070 + }, + { + "epoch": 0.6, + "grad_norm": 1.9453125, + "learning_rate": 0.00015874389499754646, + "loss": 1.966, + "step": 255075 + }, + { + "epoch": 0.6, + "grad_norm": 2.28125, + "learning_rate": 0.00015874239922347532, + "loss": 2.1759, + "step": 255080 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.0001587409034293367, + "loss": 2.0736, + "step": 255085 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015873940761513107, + "loss": 2.3104, + "step": 255090 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015873791178085906, + "loss": 2.0345, + "step": 255095 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.0001587364159265211, + "loss": 2.1513, + "step": 255100 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015873492005211766, + "loss": 2.2374, + "step": 255105 + }, + { + "epoch": 0.6, + "grad_norm": 2.4375, + "learning_rate": 0.0001587334241576494, + "loss": 1.999, + "step": 255110 + }, + { + "epoch": 0.6, + "grad_norm": 2.6875, + "learning_rate": 0.00015873192824311666, + "loss": 2.1764, + "step": 255115 + }, + { + "epoch": 0.6, + "grad_norm": 2.609375, + "learning_rate": 0.00015873043230852006, + "loss": 2.2139, + "step": 255120 + }, + { + "epoch": 0.6, + "grad_norm": 2.375, + "learning_rate": 0.00015872893635386008, + "loss": 2.0536, + "step": 255125 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015872744037913718, + "loss": 2.0078, + "step": 255130 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015872594438435197, + "loss": 2.1641, + "step": 255135 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015872444836950492, + "loss": 2.0671, + "step": 255140 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015872295233459652, + "loss": 1.891, + "step": 255145 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015872145627962732, + "loss": 2.1572, + "step": 255150 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015871996020459773, + "loss": 2.004, + "step": 255155 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.0001587184641095084, + "loss": 1.9633, + "step": 255160 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.0001587169679943598, + "loss": 2.1813, + "step": 255165 + }, + { + "epoch": 0.6, + "grad_norm": 1.9453125, + "learning_rate": 0.00015871547185915237, + "loss": 2.0368, + "step": 255170 + }, + { + "epoch": 0.6, + "grad_norm": 2.046875, + "learning_rate": 0.00015871397570388666, + "loss": 1.9956, + "step": 255175 + }, + { + "epoch": 0.6, + "grad_norm": 1.8515625, + "learning_rate": 0.00015871247952856321, + "loss": 1.8572, + "step": 255180 + }, + { + "epoch": 0.6, + "grad_norm": 2.515625, + "learning_rate": 0.00015871098333318252, + "loss": 2.2709, + "step": 255185 + }, + { + "epoch": 0.6, + "grad_norm": 1.734375, + "learning_rate": 0.00015870948711774508, + "loss": 2.0935, + "step": 255190 + }, + { + "epoch": 0.6, + "grad_norm": 1.875, + "learning_rate": 0.00015870799088225142, + "loss": 1.954, + "step": 255195 + }, + { + "epoch": 0.6, + "grad_norm": 1.859375, + "learning_rate": 0.00015870649462670206, + "loss": 2.008, + "step": 255200 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.0001587049983510975, + "loss": 2.0905, + "step": 255205 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.0001587035020554382, + "loss": 1.9125, + "step": 255210 + }, + { + "epoch": 0.6, + "grad_norm": 1.71875, + "learning_rate": 0.0001587020057397248, + "loss": 2.2672, + "step": 255215 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015870050940395764, + "loss": 2.053, + "step": 255220 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.00015869901304813738, + "loss": 2.0982, + "step": 255225 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.00015869751667226445, + "loss": 1.946, + "step": 255230 + }, + { + "epoch": 0.6, + "grad_norm": 2.828125, + "learning_rate": 0.0001586960202763394, + "loss": 2.0171, + "step": 255235 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.0001586945238603627, + "loss": 2.0251, + "step": 255240 + }, + { + "epoch": 0.6, + "grad_norm": 2.84375, + "learning_rate": 0.00015869302742433492, + "loss": 1.9498, + "step": 255245 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.0001586915309682565, + "loss": 2.1705, + "step": 255250 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.000158690034492128, + "loss": 2.1988, + "step": 255255 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.0001586885379959499, + "loss": 2.0463, + "step": 255260 + }, + { + "epoch": 0.6, + "grad_norm": 1.8671875, + "learning_rate": 0.0001586870414797228, + "loss": 2.0786, + "step": 255265 + }, + { + "epoch": 0.6, + "grad_norm": 1.8359375, + "learning_rate": 0.0001586855449434471, + "loss": 2.0109, + "step": 255270 + }, + { + "epoch": 0.6, + "grad_norm": 1.828125, + "learning_rate": 0.00015868404838712333, + "loss": 1.9729, + "step": 255275 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015868255181075206, + "loss": 2.0442, + "step": 255280 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015868105521433376, + "loss": 2.2218, + "step": 255285 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.00015867955859786897, + "loss": 2.0335, + "step": 255290 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015867806196135812, + "loss": 2.1174, + "step": 255295 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.0001586765653048018, + "loss": 2.1782, + "step": 255300 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.00015867506862820053, + "loss": 2.0657, + "step": 255305 + }, + { + "epoch": 0.6, + "grad_norm": 1.9921875, + "learning_rate": 0.00015867357193155476, + "loss": 2.1518, + "step": 255310 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015867207521486507, + "loss": 2.1528, + "step": 255315 + }, + { + "epoch": 0.6, + "grad_norm": 1.796875, + "learning_rate": 0.0001586705784781319, + "loss": 2.0446, + "step": 255320 + }, + { + "epoch": 0.6, + "grad_norm": 1.9609375, + "learning_rate": 0.00015866908172135582, + "loss": 2.0899, + "step": 255325 + }, + { + "epoch": 0.6, + "grad_norm": 2.484375, + "learning_rate": 0.00015866758494453728, + "loss": 2.0936, + "step": 255330 + }, + { + "epoch": 0.6, + "grad_norm": 1.984375, + "learning_rate": 0.00015866608814767686, + "loss": 2.0876, + "step": 255335 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015866459133077507, + "loss": 1.9865, + "step": 255340 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015866309449383232, + "loss": 2.1239, + "step": 255345 + }, + { + "epoch": 0.6, + "grad_norm": 2.4375, + "learning_rate": 0.00015866159763684923, + "loss": 1.975, + "step": 255350 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.0001586601007598263, + "loss": 1.9507, + "step": 255355 + }, + { + "epoch": 0.6, + "grad_norm": 1.9296875, + "learning_rate": 0.000158658603862764, + "loss": 2.1759, + "step": 255360 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.00015865710694566284, + "loss": 2.1681, + "step": 255365 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015865561000852333, + "loss": 2.008, + "step": 255370 + }, + { + "epoch": 0.6, + "grad_norm": 1.7578125, + "learning_rate": 0.00015865411305134605, + "loss": 1.6932, + "step": 255375 + }, + { + "epoch": 0.6, + "grad_norm": 2.5625, + "learning_rate": 0.00015865261607413142, + "loss": 2.1466, + "step": 255380 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015865111907688003, + "loss": 2.0812, + "step": 255385 + }, + { + "epoch": 0.6, + "grad_norm": 1.8203125, + "learning_rate": 0.00015864962205959233, + "loss": 2.1129, + "step": 255390 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.00015864812502226885, + "loss": 1.8881, + "step": 255395 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.0001586466279649101, + "loss": 2.1183, + "step": 255400 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015864513088751663, + "loss": 2.0661, + "step": 255405 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.0001586436337900889, + "loss": 2.1293, + "step": 255410 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015864213667262747, + "loss": 2.0863, + "step": 255415 + }, + { + "epoch": 0.6, + "grad_norm": 1.9453125, + "learning_rate": 0.0001586406395351328, + "loss": 2.273, + "step": 255420 + }, + { + "epoch": 0.6, + "grad_norm": 1.6640625, + "learning_rate": 0.0001586391423776054, + "loss": 1.9126, + "step": 255425 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015863764520004583, + "loss": 1.9706, + "step": 255430 + }, + { + "epoch": 0.6, + "grad_norm": 2.6875, + "learning_rate": 0.00015863614800245456, + "loss": 2.0902, + "step": 255435 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015863465078483214, + "loss": 2.0641, + "step": 255440 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.00015863315354717908, + "loss": 2.0486, + "step": 255445 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.00015863165628949585, + "loss": 2.1399, + "step": 255450 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015863015901178295, + "loss": 1.9993, + "step": 255455 + }, + { + "epoch": 0.6, + "grad_norm": 2.484375, + "learning_rate": 0.00015862866171404094, + "loss": 2.1671, + "step": 255460 + }, + { + "epoch": 0.6, + "grad_norm": 1.671875, + "learning_rate": 0.00015862716439627036, + "loss": 2.1858, + "step": 255465 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015862566705847163, + "loss": 2.0274, + "step": 255470 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.00015862416970064533, + "loss": 1.9735, + "step": 255475 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015862267232279192, + "loss": 2.0803, + "step": 255480 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015862117492491197, + "loss": 2.131, + "step": 255485 + }, + { + "epoch": 0.6, + "grad_norm": 1.65625, + "learning_rate": 0.00015861967750700596, + "loss": 2.1243, + "step": 255490 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.00015861818006907442, + "loss": 2.089, + "step": 255495 + }, + { + "epoch": 0.6, + "grad_norm": 2.703125, + "learning_rate": 0.00015861668261111785, + "loss": 2.0015, + "step": 255500 + }, + { + "epoch": 0.6, + "grad_norm": 2.375, + "learning_rate": 0.00015861518513313668, + "loss": 1.9826, + "step": 255505 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015861368763513158, + "loss": 2.0588, + "step": 255510 + }, + { + "epoch": 0.6, + "grad_norm": 2.46875, + "learning_rate": 0.00015861219011710297, + "loss": 2.1443, + "step": 255515 + }, + { + "epoch": 0.6, + "grad_norm": 2.71875, + "learning_rate": 0.00015861069257905137, + "loss": 2.2595, + "step": 255520 + }, + { + "epoch": 0.6, + "grad_norm": 2.375, + "learning_rate": 0.0001586091950209773, + "loss": 2.2112, + "step": 255525 + }, + { + "epoch": 0.6, + "grad_norm": 1.9609375, + "learning_rate": 0.00015860769744288123, + "loss": 2.1076, + "step": 255530 + }, + { + "epoch": 0.6, + "grad_norm": 2.421875, + "learning_rate": 0.00015860619984476375, + "loss": 1.9314, + "step": 255535 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.0001586047022266253, + "loss": 1.9143, + "step": 255540 + }, + { + "epoch": 0.6, + "grad_norm": 2.53125, + "learning_rate": 0.00015860320458846645, + "loss": 1.9712, + "step": 255545 + }, + { + "epoch": 0.6, + "grad_norm": 2.421875, + "learning_rate": 0.00015860170693028763, + "loss": 1.9029, + "step": 255550 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015860020925208946, + "loss": 2.2476, + "step": 255555 + }, + { + "epoch": 0.6, + "grad_norm": 2.34375, + "learning_rate": 0.00015859871155387236, + "loss": 1.9362, + "step": 255560 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.0001585972138356369, + "loss": 2.0565, + "step": 255565 + }, + { + "epoch": 0.6, + "grad_norm": 1.8671875, + "learning_rate": 0.00015859571609738358, + "loss": 1.9948, + "step": 255570 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015859421833911284, + "loss": 2.1619, + "step": 255575 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015859272056082532, + "loss": 1.9717, + "step": 255580 + }, + { + "epoch": 0.6, + "grad_norm": 2.046875, + "learning_rate": 0.00015859122276252143, + "loss": 2.0148, + "step": 255585 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015858972494420172, + "loss": 2.0931, + "step": 255590 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.0001585882271058667, + "loss": 2.0109, + "step": 255595 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.0001585867292475169, + "loss": 1.9726, + "step": 255600 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015858523136915277, + "loss": 2.1728, + "step": 255605 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.00015858373347077488, + "loss": 1.8003, + "step": 255610 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015858223555238374, + "loss": 2.1411, + "step": 255615 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.0001585807376139798, + "loss": 1.9698, + "step": 255620 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.0001585792396555637, + "loss": 2.161, + "step": 255625 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.0001585777416771358, + "loss": 2.0452, + "step": 255630 + }, + { + "epoch": 0.6, + "grad_norm": 2.4375, + "learning_rate": 0.0001585762436786967, + "loss": 2.0826, + "step": 255635 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015857474566024688, + "loss": 1.9902, + "step": 255640 + }, + { + "epoch": 0.6, + "grad_norm": 1.9375, + "learning_rate": 0.0001585732476217869, + "loss": 2.0319, + "step": 255645 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015857174956331721, + "loss": 2.0555, + "step": 255650 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015857025148483835, + "loss": 2.151, + "step": 255655 + }, + { + "epoch": 0.6, + "grad_norm": 2.046875, + "learning_rate": 0.00015856875338635083, + "loss": 2.1477, + "step": 255660 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015856725526785517, + "loss": 2.1518, + "step": 255665 + }, + { + "epoch": 0.6, + "grad_norm": 2.34375, + "learning_rate": 0.00015856575712935186, + "loss": 2.1455, + "step": 255670 + }, + { + "epoch": 0.6, + "grad_norm": 1.75, + "learning_rate": 0.00015856425897084142, + "loss": 1.9489, + "step": 255675 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.0001585627607923244, + "loss": 2.1832, + "step": 255680 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015856126259380124, + "loss": 2.1389, + "step": 255685 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.0001585597643752725, + "loss": 2.067, + "step": 255690 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015855826613673873, + "loss": 2.0972, + "step": 255695 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015855676787820033, + "loss": 1.9175, + "step": 255700 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.0001585552695996579, + "loss": 2.0243, + "step": 255705 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.00015855377130111195, + "loss": 2.1555, + "step": 255710 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015855227298256294, + "loss": 2.2768, + "step": 255715 + }, + { + "epoch": 0.6, + "grad_norm": 1.9765625, + "learning_rate": 0.0001585507746440114, + "loss": 2.0534, + "step": 255720 + }, + { + "epoch": 0.6, + "grad_norm": 2.34375, + "learning_rate": 0.0001585492762854579, + "loss": 1.9731, + "step": 255725 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015854777790690286, + "loss": 2.2586, + "step": 255730 + }, + { + "epoch": 0.6, + "grad_norm": 1.84375, + "learning_rate": 0.00015854627950834686, + "loss": 2.0046, + "step": 255735 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015854478108979041, + "loss": 2.1181, + "step": 255740 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015854328265123394, + "loss": 2.0687, + "step": 255745 + }, + { + "epoch": 0.6, + "grad_norm": 2.671875, + "learning_rate": 0.00015854178419267808, + "loss": 2.0585, + "step": 255750 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015854028571412327, + "loss": 2.2108, + "step": 255755 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015853878721557005, + "loss": 2.1213, + "step": 255760 + }, + { + "epoch": 0.6, + "grad_norm": 2.453125, + "learning_rate": 0.0001585372886970189, + "loss": 2.2286, + "step": 255765 + }, + { + "epoch": 0.6, + "grad_norm": 2.75, + "learning_rate": 0.00015853579015847032, + "loss": 2.0618, + "step": 255770 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015853429159992487, + "loss": 1.9109, + "step": 255775 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.0001585327930213831, + "loss": 2.1142, + "step": 255780 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015853129442284543, + "loss": 2.1082, + "step": 255785 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015852979580431237, + "loss": 2.0769, + "step": 255790 + }, + { + "epoch": 0.6, + "grad_norm": 1.453125, + "learning_rate": 0.00015852829716578453, + "loss": 2.0132, + "step": 255795 + }, + { + "epoch": 0.6, + "grad_norm": 1.8203125, + "learning_rate": 0.0001585267985072623, + "loss": 2.0493, + "step": 255800 + }, + { + "epoch": 0.6, + "grad_norm": 2.546875, + "learning_rate": 0.00015852529982874632, + "loss": 2.2128, + "step": 255805 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.000158523801130237, + "loss": 2.056, + "step": 255810 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015852230241173492, + "loss": 1.9536, + "step": 255815 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.00015852080367324053, + "loss": 2.0501, + "step": 255820 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015851930491475434, + "loss": 1.9019, + "step": 255825 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015851780613627693, + "loss": 2.1688, + "step": 255830 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015851630733780878, + "loss": 2.1115, + "step": 255835 + }, + { + "epoch": 0.6, + "grad_norm": 1.875, + "learning_rate": 0.0001585148085193504, + "loss": 2.1844, + "step": 255840 + }, + { + "epoch": 0.6, + "grad_norm": 1.9921875, + "learning_rate": 0.0001585133096809023, + "loss": 2.2261, + "step": 255845 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015851181082246498, + "loss": 2.0797, + "step": 255850 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015851031194403898, + "loss": 2.2052, + "step": 255855 + }, + { + "epoch": 0.6, + "grad_norm": 1.890625, + "learning_rate": 0.0001585088130456248, + "loss": 2.2179, + "step": 255860 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.0001585073141272229, + "loss": 2.1754, + "step": 255865 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.0001585058151888339, + "loss": 2.1274, + "step": 255870 + }, + { + "epoch": 0.6, + "grad_norm": 2.53125, + "learning_rate": 0.00015850431623045823, + "loss": 1.9827, + "step": 255875 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.0001585028172520964, + "loss": 2.1743, + "step": 255880 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015850131825374898, + "loss": 2.1712, + "step": 255885 + }, + { + "epoch": 0.6, + "grad_norm": 1.875, + "learning_rate": 0.00015849981923541644, + "loss": 1.946, + "step": 255890 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.0001584983201970993, + "loss": 2.1084, + "step": 255895 + }, + { + "epoch": 0.6, + "grad_norm": 2.546875, + "learning_rate": 0.0001584968211387981, + "loss": 1.8885, + "step": 255900 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015849532206051327, + "loss": 2.1904, + "step": 255905 + }, + { + "epoch": 0.6, + "grad_norm": 2.65625, + "learning_rate": 0.0001584938229622454, + "loss": 1.8624, + "step": 255910 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015849232384399498, + "loss": 2.1419, + "step": 255915 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015849082470576257, + "loss": 2.1196, + "step": 255920 + }, + { + "epoch": 0.6, + "grad_norm": 2.484375, + "learning_rate": 0.00015848932554754857, + "loss": 2.164, + "step": 255925 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015848782636935355, + "loss": 2.147, + "step": 255930 + }, + { + "epoch": 0.6, + "grad_norm": 2.546875, + "learning_rate": 0.00015848632717117806, + "loss": 2.0994, + "step": 255935 + }, + { + "epoch": 0.6, + "grad_norm": 2.484375, + "learning_rate": 0.00015848482795302257, + "loss": 2.1292, + "step": 255940 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.0001584833287148876, + "loss": 2.1095, + "step": 255945 + }, + { + "epoch": 0.6, + "grad_norm": 1.75, + "learning_rate": 0.00015848182945677367, + "loss": 2.0079, + "step": 255950 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015848033017868129, + "loss": 2.033, + "step": 255955 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.000158478830880611, + "loss": 2.2177, + "step": 255960 + }, + { + "epoch": 0.6, + "grad_norm": 1.890625, + "learning_rate": 0.0001584773315625632, + "loss": 2.1911, + "step": 255965 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015847583222453854, + "loss": 2.1774, + "step": 255970 + }, + { + "epoch": 0.6, + "grad_norm": 1.9453125, + "learning_rate": 0.00015847433286653745, + "loss": 2.035, + "step": 255975 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015847283348856047, + "loss": 2.0753, + "step": 255980 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015847133409060813, + "loss": 2.0372, + "step": 255985 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.0001584698346726809, + "loss": 2.0978, + "step": 255990 + }, + { + "epoch": 0.6, + "grad_norm": 2.65625, + "learning_rate": 0.00015846833523477932, + "loss": 2.1094, + "step": 255995 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.0001584668357769039, + "loss": 2.2921, + "step": 256000 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015846533629905517, + "loss": 2.1474, + "step": 256005 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015846383680123357, + "loss": 2.1624, + "step": 256010 + }, + { + "epoch": 0.6, + "grad_norm": 2.546875, + "learning_rate": 0.00015846233728343973, + "loss": 2.1935, + "step": 256015 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015846083774567404, + "loss": 2.3534, + "step": 256020 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.0001584593381879371, + "loss": 2.1537, + "step": 256025 + }, + { + "epoch": 0.6, + "grad_norm": 1.9140625, + "learning_rate": 0.00015845783861022939, + "loss": 2.0127, + "step": 256030 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.0001584563390125514, + "loss": 2.0432, + "step": 256035 + }, + { + "epoch": 0.6, + "grad_norm": 2.5625, + "learning_rate": 0.00015845483939490367, + "loss": 1.9862, + "step": 256040 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.0001584533397572867, + "loss": 2.0178, + "step": 256045 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.00015845184009970103, + "loss": 2.1259, + "step": 256050 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015845034042214715, + "loss": 2.2191, + "step": 256055 + }, + { + "epoch": 0.6, + "grad_norm": 2.46875, + "learning_rate": 0.00015844884072462555, + "loss": 2.088, + "step": 256060 + }, + { + "epoch": 0.6, + "grad_norm": 1.859375, + "learning_rate": 0.00015844734100713676, + "loss": 1.9555, + "step": 256065 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015844584126968134, + "loss": 2.137, + "step": 256070 + }, + { + "epoch": 0.6, + "grad_norm": 2.53125, + "learning_rate": 0.00015844434151225976, + "loss": 2.1613, + "step": 256075 + }, + { + "epoch": 0.6, + "grad_norm": 2.296875, + "learning_rate": 0.0001584428417348725, + "loss": 2.0678, + "step": 256080 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.0001584413419375201, + "loss": 2.3281, + "step": 256085 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.0001584398421202031, + "loss": 2.2709, + "step": 256090 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015843834228292198, + "loss": 2.1292, + "step": 256095 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015843684242567728, + "loss": 2.1677, + "step": 256100 + }, + { + "epoch": 0.6, + "grad_norm": 2.453125, + "learning_rate": 0.00015843534254846947, + "loss": 2.2742, + "step": 256105 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.0001584338426512991, + "loss": 2.0415, + "step": 256110 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.0001584323427341667, + "loss": 2.0084, + "step": 256115 + }, + { + "epoch": 0.6, + "grad_norm": 2.421875, + "learning_rate": 0.0001584308427970727, + "loss": 2.0029, + "step": 256120 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.00015842934284001768, + "loss": 2.0384, + "step": 256125 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015842784286300215, + "loss": 2.08, + "step": 256130 + }, + { + "epoch": 0.6, + "grad_norm": 2.453125, + "learning_rate": 0.0001584263428660266, + "loss": 2.1925, + "step": 256135 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015842484284909155, + "loss": 2.2111, + "step": 256140 + }, + { + "epoch": 0.6, + "grad_norm": 2.296875, + "learning_rate": 0.0001584233428121975, + "loss": 1.9986, + "step": 256145 + }, + { + "epoch": 0.6, + "grad_norm": 1.9765625, + "learning_rate": 0.000158421842755345, + "loss": 2.1368, + "step": 256150 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.00015842034267853455, + "loss": 2.1652, + "step": 256155 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.00015841884258176663, + "loss": 2.252, + "step": 256160 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.00015841734246504178, + "loss": 2.1473, + "step": 256165 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.0001584158423283605, + "loss": 2.1794, + "step": 256170 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.0001584143421717233, + "loss": 1.8776, + "step": 256175 + }, + { + "epoch": 0.6, + "grad_norm": 2.453125, + "learning_rate": 0.00015841284199513074, + "loss": 2.0502, + "step": 256180 + }, + { + "epoch": 0.6, + "grad_norm": 1.9765625, + "learning_rate": 0.00015841134179858326, + "loss": 2.2136, + "step": 256185 + }, + { + "epoch": 0.6, + "grad_norm": 1.9140625, + "learning_rate": 0.00015840984158208143, + "loss": 2.0451, + "step": 256190 + }, + { + "epoch": 0.6, + "grad_norm": 2.484375, + "learning_rate": 0.0001584083413456257, + "loss": 1.9379, + "step": 256195 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.00015840684108921663, + "loss": 2.1534, + "step": 256200 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015840534081285474, + "loss": 2.0607, + "step": 256205 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015840384051654053, + "loss": 2.0017, + "step": 256210 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.0001584023402002745, + "loss": 2.2571, + "step": 256215 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.0001584008398640572, + "loss": 2.0414, + "step": 256220 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015839933950788907, + "loss": 2.0799, + "step": 256225 + }, + { + "epoch": 0.6, + "grad_norm": 1.9375, + "learning_rate": 0.0001583978391317707, + "loss": 1.9743, + "step": 256230 + }, + { + "epoch": 0.6, + "grad_norm": 2.296875, + "learning_rate": 0.00015839633873570254, + "loss": 2.0604, + "step": 256235 + }, + { + "epoch": 0.6, + "grad_norm": 2.4375, + "learning_rate": 0.00015839483831968517, + "loss": 1.9878, + "step": 256240 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.00015839333788371902, + "loss": 2.2106, + "step": 256245 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015839183742780467, + "loss": 1.8846, + "step": 256250 + }, + { + "epoch": 0.6, + "grad_norm": 2.296875, + "learning_rate": 0.00015839033695194262, + "loss": 2.0291, + "step": 256255 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015838883645613336, + "loss": 2.1031, + "step": 256260 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.0001583873359403774, + "loss": 1.9465, + "step": 256265 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.0001583858354046753, + "loss": 2.158, + "step": 256270 + }, + { + "epoch": 0.6, + "grad_norm": 1.75, + "learning_rate": 0.00015838433484902751, + "loss": 2.174, + "step": 256275 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015838283427343456, + "loss": 2.0442, + "step": 256280 + }, + { + "epoch": 0.6, + "grad_norm": 2.609375, + "learning_rate": 0.000158381333677897, + "loss": 2.1007, + "step": 256285 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015837983306241532, + "loss": 1.8808, + "step": 256290 + }, + { + "epoch": 0.6, + "grad_norm": 1.796875, + "learning_rate": 0.00015837833242699002, + "loss": 2.058, + "step": 256295 + }, + { + "epoch": 0.6, + "grad_norm": 1.9375, + "learning_rate": 0.00015837683177162163, + "loss": 2.0821, + "step": 256300 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015837533109631067, + "loss": 2.3237, + "step": 256305 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015837383040105763, + "loss": 1.9916, + "step": 256310 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015837232968586302, + "loss": 2.1862, + "step": 256315 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015837082895072736, + "loss": 2.1043, + "step": 256320 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.00015836932819565119, + "loss": 2.0691, + "step": 256325 + }, + { + "epoch": 0.6, + "grad_norm": 1.9609375, + "learning_rate": 0.00015836782742063496, + "loss": 2.1477, + "step": 256330 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015836632662567923, + "loss": 2.1759, + "step": 256335 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.0001583648258107845, + "loss": 2.1182, + "step": 256340 + }, + { + "epoch": 0.6, + "grad_norm": 1.8828125, + "learning_rate": 0.00015836332497595136, + "loss": 2.1111, + "step": 256345 + }, + { + "epoch": 0.6, + "grad_norm": 1.9140625, + "learning_rate": 0.00015836182412118016, + "loss": 1.8705, + "step": 256350 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015836032324647155, + "loss": 2.1159, + "step": 256355 + }, + { + "epoch": 0.6, + "grad_norm": 1.921875, + "learning_rate": 0.00015835882235182598, + "loss": 2.1783, + "step": 256360 + }, + { + "epoch": 0.6, + "grad_norm": 2.296875, + "learning_rate": 0.00015835732143724398, + "loss": 2.1127, + "step": 256365 + }, + { + "epoch": 0.6, + "grad_norm": 2.640625, + "learning_rate": 0.00015835582050272606, + "loss": 1.9129, + "step": 256370 + }, + { + "epoch": 0.6, + "grad_norm": 2.609375, + "learning_rate": 0.00015835431954827273, + "loss": 2.2006, + "step": 256375 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015835281857388452, + "loss": 2.1503, + "step": 256380 + }, + { + "epoch": 0.6, + "grad_norm": 1.890625, + "learning_rate": 0.0001583513175795619, + "loss": 2.0974, + "step": 256385 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.0001583498165653054, + "loss": 2.0929, + "step": 256390 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.0001583483155311156, + "loss": 2.0819, + "step": 256395 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015834681447699292, + "loss": 1.954, + "step": 256400 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015834531340293794, + "loss": 1.939, + "step": 256405 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.0001583438123089511, + "loss": 2.2945, + "step": 256410 + }, + { + "epoch": 0.6, + "grad_norm": 2.859375, + "learning_rate": 0.000158342311195033, + "loss": 2.0157, + "step": 256415 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.0001583408100611841, + "loss": 2.1832, + "step": 256420 + }, + { + "epoch": 0.6, + "grad_norm": 1.8984375, + "learning_rate": 0.0001583393089074049, + "loss": 2.1333, + "step": 256425 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.00015833780773369594, + "loss": 2.0353, + "step": 256430 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015833630654005774, + "loss": 2.0555, + "step": 256435 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.0001583348053264908, + "loss": 2.0885, + "step": 256440 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.0001583333040929956, + "loss": 2.0324, + "step": 256445 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.0001583318028395727, + "loss": 2.0691, + "step": 256450 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.0001583303015662226, + "loss": 2.0615, + "step": 256455 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015832880027294584, + "loss": 2.1695, + "step": 256460 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.0001583272989597429, + "loss": 2.1655, + "step": 256465 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.0001583257976266143, + "loss": 2.0988, + "step": 256470 + }, + { + "epoch": 0.6, + "grad_norm": 2.46875, + "learning_rate": 0.00015832429627356048, + "loss": 2.1314, + "step": 256475 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.0001583227949005821, + "loss": 2.2716, + "step": 256480 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015832129350767956, + "loss": 2.0632, + "step": 256485 + }, + { + "epoch": 0.6, + "grad_norm": 2.625, + "learning_rate": 0.0001583197920948534, + "loss": 2.1263, + "step": 256490 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.00015831829066210418, + "loss": 1.9778, + "step": 256495 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.00015831678920943232, + "loss": 2.1158, + "step": 256500 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.00015831528773683844, + "loss": 2.012, + "step": 256505 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015831378624432297, + "loss": 2.0277, + "step": 256510 + }, + { + "epoch": 0.6, + "grad_norm": 1.890625, + "learning_rate": 0.00015831228473188645, + "loss": 2.0779, + "step": 256515 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015831078319952942, + "loss": 2.0223, + "step": 256520 + }, + { + "epoch": 0.6, + "grad_norm": 1.9375, + "learning_rate": 0.00015830928164725235, + "loss": 2.1779, + "step": 256525 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015830778007505577, + "loss": 2.1374, + "step": 256530 + }, + { + "epoch": 0.6, + "grad_norm": 1.78125, + "learning_rate": 0.0001583062784829402, + "loss": 1.8884, + "step": 256535 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015830477687090615, + "loss": 2.0456, + "step": 256540 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015830327523895415, + "loss": 2.2666, + "step": 256545 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015830177358708468, + "loss": 2.0693, + "step": 256550 + }, + { + "epoch": 0.6, + "grad_norm": 1.8203125, + "learning_rate": 0.00015830027191529825, + "loss": 2.1751, + "step": 256555 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.0001582987702235954, + "loss": 2.1937, + "step": 256560 + }, + { + "epoch": 0.6, + "grad_norm": 2.734375, + "learning_rate": 0.00015829726851197663, + "loss": 1.9588, + "step": 256565 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015829576678044247, + "loss": 2.2475, + "step": 256570 + }, + { + "epoch": 0.6, + "grad_norm": 1.9140625, + "learning_rate": 0.0001582942650289934, + "loss": 2.2357, + "step": 256575 + }, + { + "epoch": 0.6, + "grad_norm": 1.8984375, + "learning_rate": 0.00015829276325762996, + "loss": 2.048, + "step": 256580 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.00015829126146635266, + "loss": 2.0459, + "step": 256585 + }, + { + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 0.000158289759655162, + "loss": 1.9784, + "step": 256590 + }, + { + "epoch": 0.6, + "grad_norm": 2.296875, + "learning_rate": 0.0001582882578240585, + "loss": 2.1848, + "step": 256595 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015828675597304268, + "loss": 1.9392, + "step": 256600 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.00015828525410211506, + "loss": 1.8932, + "step": 256605 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015828375221127613, + "loss": 2.0721, + "step": 256610 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.00015828225030052639, + "loss": 2.07, + "step": 256615 + }, + { + "epoch": 0.6, + "grad_norm": 2.6875, + "learning_rate": 0.00015828074836986638, + "loss": 2.1496, + "step": 256620 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.00015827924641929664, + "loss": 2.2337, + "step": 256625 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015827774444881763, + "loss": 2.0577, + "step": 256630 + }, + { + "epoch": 0.6, + "grad_norm": 1.7578125, + "learning_rate": 0.0001582762424584299, + "loss": 2.0564, + "step": 256635 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015827474044813396, + "loss": 2.1277, + "step": 256640 + }, + { + "epoch": 0.6, + "grad_norm": 2.375, + "learning_rate": 0.00015827323841793026, + "loss": 2.1017, + "step": 256645 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015827173636781943, + "loss": 1.9987, + "step": 256650 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015827023429780188, + "loss": 2.002, + "step": 256655 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.00015826873220787813, + "loss": 2.2385, + "step": 256660 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.0001582672300980488, + "loss": 2.0005, + "step": 256665 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015826572796831428, + "loss": 2.1346, + "step": 256670 + }, + { + "epoch": 0.6, + "grad_norm": 2.515625, + "learning_rate": 0.00015826422581867515, + "loss": 2.1457, + "step": 256675 + }, + { + "epoch": 0.6, + "grad_norm": 1.84375, + "learning_rate": 0.00015826272364913185, + "loss": 2.033, + "step": 256680 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.00015826122145968502, + "loss": 2.3289, + "step": 256685 + }, + { + "epoch": 0.6, + "grad_norm": 3.171875, + "learning_rate": 0.00015825971925033508, + "loss": 1.9328, + "step": 256690 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.00015825821702108254, + "loss": 1.8879, + "step": 256695 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015825671477192796, + "loss": 2.2427, + "step": 256700 + }, + { + "epoch": 0.6, + "grad_norm": 2.453125, + "learning_rate": 0.00015825521250287182, + "loss": 2.156, + "step": 256705 + }, + { + "epoch": 0.6, + "grad_norm": 2.28125, + "learning_rate": 0.00015825371021391463, + "loss": 2.1083, + "step": 256710 + }, + { + "epoch": 0.6, + "grad_norm": 2.390625, + "learning_rate": 0.00015825220790505691, + "loss": 2.2251, + "step": 256715 + }, + { + "epoch": 0.6, + "grad_norm": 1.921875, + "learning_rate": 0.0001582507055762992, + "loss": 2.1039, + "step": 256720 + }, + { + "epoch": 0.6, + "grad_norm": 1.9921875, + "learning_rate": 0.000158249203227642, + "loss": 1.8801, + "step": 256725 + }, + { + "epoch": 0.6, + "grad_norm": 1.78125, + "learning_rate": 0.00015824770085908583, + "loss": 2.3187, + "step": 256730 + }, + { + "epoch": 0.6, + "grad_norm": 2.140625, + "learning_rate": 0.00015824619847063117, + "loss": 2.0328, + "step": 256735 + }, + { + "epoch": 0.6, + "grad_norm": 2.5, + "learning_rate": 0.00015824469606227855, + "loss": 2.1334, + "step": 256740 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015824319363402846, + "loss": 2.1018, + "step": 256745 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015824169118588147, + "loss": 2.1653, + "step": 256750 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015824018871783804, + "loss": 2.0577, + "step": 256755 + }, + { + "epoch": 0.6, + "grad_norm": 1.8359375, + "learning_rate": 0.00015823868622989872, + "loss": 2.0249, + "step": 256760 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015823718372206402, + "loss": 2.0198, + "step": 256765 + }, + { + "epoch": 0.6, + "grad_norm": 2.53125, + "learning_rate": 0.00015823568119433443, + "loss": 1.9854, + "step": 256770 + }, + { + "epoch": 0.6, + "grad_norm": 1.859375, + "learning_rate": 0.00015823417864671048, + "loss": 2.0159, + "step": 256775 + }, + { + "epoch": 0.6, + "grad_norm": 2.03125, + "learning_rate": 0.00015823267607919267, + "loss": 1.9968, + "step": 256780 + }, + { + "epoch": 0.6, + "grad_norm": 2.265625, + "learning_rate": 0.00015823117349178154, + "loss": 2.215, + "step": 256785 + }, + { + "epoch": 0.6, + "grad_norm": 2.015625, + "learning_rate": 0.00015822967088447755, + "loss": 1.9204, + "step": 256790 + }, + { + "epoch": 0.6, + "grad_norm": 2.5, + "learning_rate": 0.00015822816825728127, + "loss": 1.856, + "step": 256795 + }, + { + "epoch": 0.6, + "grad_norm": 2.203125, + "learning_rate": 0.00015822666561019318, + "loss": 2.1096, + "step": 256800 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015822516294321384, + "loss": 2.1158, + "step": 256805 + }, + { + "epoch": 0.6, + "grad_norm": 2.125, + "learning_rate": 0.0001582236602563437, + "loss": 2.0091, + "step": 256810 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.0001582221575495833, + "loss": 2.1051, + "step": 256815 + }, + { + "epoch": 0.6, + "grad_norm": 1.921875, + "learning_rate": 0.00015822065482293317, + "loss": 2.0815, + "step": 256820 + }, + { + "epoch": 0.6, + "grad_norm": 1.90625, + "learning_rate": 0.00015821915207639377, + "loss": 2.0524, + "step": 256825 + }, + { + "epoch": 0.6, + "grad_norm": 1.765625, + "learning_rate": 0.0001582176493099657, + "loss": 2.1566, + "step": 256830 + }, + { + "epoch": 0.6, + "grad_norm": 2.46875, + "learning_rate": 0.0001582161465236494, + "loss": 2.0663, + "step": 256835 + }, + { + "epoch": 0.6, + "grad_norm": 1.9375, + "learning_rate": 0.00015821464371744542, + "loss": 2.1038, + "step": 256840 + }, + { + "epoch": 0.6, + "grad_norm": 2.5625, + "learning_rate": 0.00015821314089135426, + "loss": 2.0975, + "step": 256845 + }, + { + "epoch": 0.6, + "grad_norm": 2.375, + "learning_rate": 0.00015821163804537641, + "loss": 2.0649, + "step": 256850 + }, + { + "epoch": 0.6, + "grad_norm": 1.8671875, + "learning_rate": 0.00015821013517951243, + "loss": 2.0042, + "step": 256855 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015820863229376283, + "loss": 2.0324, + "step": 256860 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015820712938812809, + "loss": 2.1143, + "step": 256865 + }, + { + "epoch": 0.6, + "grad_norm": 2.109375, + "learning_rate": 0.00015820562646260872, + "loss": 2.0539, + "step": 256870 + }, + { + "epoch": 0.6, + "grad_norm": 2.765625, + "learning_rate": 0.00015820412351720529, + "loss": 2.0722, + "step": 256875 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015820262055191826, + "loss": 2.1271, + "step": 256880 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015820111756674815, + "loss": 1.99, + "step": 256885 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.0001581996145616955, + "loss": 2.1525, + "step": 256890 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015819811153676075, + "loss": 2.008, + "step": 256895 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015819660849194454, + "loss": 2.0106, + "step": 256900 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015819510542724728, + "loss": 1.8544, + "step": 256905 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.0001581936023426695, + "loss": 2.1565, + "step": 256910 + }, + { + "epoch": 0.6, + "grad_norm": 1.90625, + "learning_rate": 0.00015819209923821176, + "loss": 2.0482, + "step": 256915 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.00015819059611387455, + "loss": 2.2377, + "step": 256920 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.00015818909296965835, + "loss": 2.1978, + "step": 256925 + }, + { + "epoch": 0.6, + "grad_norm": 1.8046875, + "learning_rate": 0.00015818758980556372, + "loss": 2.0835, + "step": 256930 + }, + { + "epoch": 0.6, + "grad_norm": 1.6875, + "learning_rate": 0.00015818608662159116, + "loss": 2.003, + "step": 256935 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015818458341774116, + "loss": 2.1139, + "step": 256940 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015818308019401424, + "loss": 2.0343, + "step": 256945 + }, + { + "epoch": 0.6, + "grad_norm": 1.875, + "learning_rate": 0.00015818157695041097, + "loss": 2.0337, + "step": 256950 + }, + { + "epoch": 0.6, + "grad_norm": 1.9140625, + "learning_rate": 0.00015818007368693178, + "loss": 2.0671, + "step": 256955 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015817857040357724, + "loss": 2.2022, + "step": 256960 + }, + { + "epoch": 0.6, + "grad_norm": 2.15625, + "learning_rate": 0.00015817706710034783, + "loss": 2.1158, + "step": 256965 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015817556377724408, + "loss": 2.0099, + "step": 256970 + }, + { + "epoch": 0.6, + "grad_norm": 2.09375, + "learning_rate": 0.00015817406043426647, + "loss": 2.1291, + "step": 256975 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.0001581725570714156, + "loss": 2.041, + "step": 256980 + }, + { + "epoch": 0.6, + "grad_norm": 2.28125, + "learning_rate": 0.00015817105368869192, + "loss": 2.1317, + "step": 256985 + }, + { + "epoch": 0.6, + "grad_norm": 2.4375, + "learning_rate": 0.00015816955028609596, + "loss": 2.065, + "step": 256990 + }, + { + "epoch": 0.6, + "grad_norm": 2.234375, + "learning_rate": 0.00015816804686362822, + "loss": 2.1188, + "step": 256995 + }, + { + "epoch": 0.6, + "grad_norm": 1.8125, + "learning_rate": 0.0001581665434212892, + "loss": 2.1298, + "step": 257000 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.00015816503995907946, + "loss": 2.0266, + "step": 257005 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.00015816353647699947, + "loss": 1.9546, + "step": 257010 + }, + { + "epoch": 0.6, + "grad_norm": 2.171875, + "learning_rate": 0.00015816203297504974, + "loss": 2.148, + "step": 257015 + }, + { + "epoch": 0.6, + "grad_norm": 2.40625, + "learning_rate": 0.00015816052945323087, + "loss": 2.2743, + "step": 257020 + }, + { + "epoch": 0.6, + "grad_norm": 2.25, + "learning_rate": 0.00015815902591154326, + "loss": 1.9878, + "step": 257025 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015815752234998745, + "loss": 2.1296, + "step": 257030 + }, + { + "epoch": 0.6, + "grad_norm": 2.359375, + "learning_rate": 0.000158156018768564, + "loss": 2.0945, + "step": 257035 + }, + { + "epoch": 0.6, + "grad_norm": 1.96875, + "learning_rate": 0.00015815451516727343, + "loss": 2.1623, + "step": 257040 + }, + { + "epoch": 0.6, + "grad_norm": 2.078125, + "learning_rate": 0.00015815301154611618, + "loss": 1.8366, + "step": 257045 + }, + { + "epoch": 0.6, + "grad_norm": 2.0625, + "learning_rate": 0.00015815150790509283, + "loss": 2.0995, + "step": 257050 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.00015815000424420386, + "loss": 2.0358, + "step": 257055 + }, + { + "epoch": 0.6, + "grad_norm": 2.21875, + "learning_rate": 0.0001581485005634498, + "loss": 2.1544, + "step": 257060 + }, + { + "epoch": 0.6, + "grad_norm": 2.328125, + "learning_rate": 0.00015814699686283117, + "loss": 2.0888, + "step": 257065 + }, + { + "epoch": 0.6, + "grad_norm": 2.0, + "learning_rate": 0.00015814549314234846, + "loss": 2.033, + "step": 257070 + }, + { + "epoch": 0.6, + "grad_norm": 1.984375, + "learning_rate": 0.0001581439894020022, + "loss": 2.2934, + "step": 257075 + }, + { + "epoch": 0.6, + "grad_norm": 2.1875, + "learning_rate": 0.00015814248564179287, + "loss": 1.8342, + "step": 257080 + }, + { + "epoch": 0.61, + "grad_norm": 2.0, + "learning_rate": 0.00015814098186172105, + "loss": 2.1329, + "step": 257085 + }, + { + "epoch": 0.61, + "grad_norm": 2.359375, + "learning_rate": 0.00015813947806178716, + "loss": 2.3082, + "step": 257090 + }, + { + "epoch": 0.61, + "grad_norm": 1.8046875, + "learning_rate": 0.00015813797424199183, + "loss": 2.2243, + "step": 257095 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.0001581364704023355, + "loss": 1.9885, + "step": 257100 + }, + { + "epoch": 0.61, + "grad_norm": 2.609375, + "learning_rate": 0.0001581349665428187, + "loss": 2.13, + "step": 257105 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.0001581334626634419, + "loss": 2.0839, + "step": 257110 + }, + { + "epoch": 0.61, + "grad_norm": 1.8359375, + "learning_rate": 0.0001581319587642057, + "loss": 2.2453, + "step": 257115 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.00015813045484511058, + "loss": 2.2295, + "step": 257120 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.000158128950906157, + "loss": 2.0697, + "step": 257125 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015812744694734555, + "loss": 2.0084, + "step": 257130 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015812594296867668, + "loss": 2.2438, + "step": 257135 + }, + { + "epoch": 0.61, + "grad_norm": 2.453125, + "learning_rate": 0.00015812443897015094, + "loss": 1.9745, + "step": 257140 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.00015812293495176885, + "loss": 1.9276, + "step": 257145 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.0001581214309135309, + "loss": 2.1594, + "step": 257150 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015811992685543764, + "loss": 2.1101, + "step": 257155 + }, + { + "epoch": 0.61, + "grad_norm": 1.8515625, + "learning_rate": 0.00015811842277748954, + "loss": 2.0898, + "step": 257160 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015811691867968712, + "loss": 2.1141, + "step": 257165 + }, + { + "epoch": 0.61, + "grad_norm": 1.84375, + "learning_rate": 0.00015811541456203093, + "loss": 1.9908, + "step": 257170 + }, + { + "epoch": 0.61, + "grad_norm": 1.9921875, + "learning_rate": 0.00015811391042452145, + "loss": 2.2908, + "step": 257175 + }, + { + "epoch": 0.61, + "grad_norm": 1.65625, + "learning_rate": 0.0001581124062671592, + "loss": 2.0022, + "step": 257180 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.0001581109020899447, + "loss": 2.2076, + "step": 257185 + }, + { + "epoch": 0.61, + "grad_norm": 1.9765625, + "learning_rate": 0.00015810939789287845, + "loss": 2.0854, + "step": 257190 + }, + { + "epoch": 0.61, + "grad_norm": 2.65625, + "learning_rate": 0.000158107893675961, + "loss": 2.1253, + "step": 257195 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.00015810638943919284, + "loss": 2.1631, + "step": 257200 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015810488518257446, + "loss": 2.1047, + "step": 257205 + }, + { + "epoch": 0.61, + "grad_norm": 1.71875, + "learning_rate": 0.00015810338090610642, + "loss": 2.031, + "step": 257210 + }, + { + "epoch": 0.61, + "grad_norm": 1.9609375, + "learning_rate": 0.00015810187660978922, + "loss": 2.0501, + "step": 257215 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.0001581003722936233, + "loss": 2.1208, + "step": 257220 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.0001580988679576093, + "loss": 2.1616, + "step": 257225 + }, + { + "epoch": 0.61, + "grad_norm": 2.453125, + "learning_rate": 0.00015809736360174767, + "loss": 1.9059, + "step": 257230 + }, + { + "epoch": 0.61, + "grad_norm": 1.9765625, + "learning_rate": 0.00015809585922603892, + "loss": 2.0028, + "step": 257235 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015809435483048358, + "loss": 2.1541, + "step": 257240 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015809285041508213, + "loss": 1.9809, + "step": 257245 + }, + { + "epoch": 0.61, + "grad_norm": 1.8046875, + "learning_rate": 0.00015809134597983513, + "loss": 2.2273, + "step": 257250 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015808984152474307, + "loss": 2.2535, + "step": 257255 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015808833704980644, + "loss": 2.1615, + "step": 257260 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.0001580868325550258, + "loss": 2.1534, + "step": 257265 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015808532804040165, + "loss": 1.8284, + "step": 257270 + }, + { + "epoch": 0.61, + "grad_norm": 2.40625, + "learning_rate": 0.0001580838235059345, + "loss": 2.1518, + "step": 257275 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015808231895162483, + "loss": 2.1378, + "step": 257280 + }, + { + "epoch": 0.61, + "grad_norm": 2.375, + "learning_rate": 0.00015808081437747323, + "loss": 2.1341, + "step": 257285 + }, + { + "epoch": 0.61, + "grad_norm": 1.9140625, + "learning_rate": 0.00015807930978348016, + "loss": 2.0997, + "step": 257290 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015807780516964614, + "loss": 1.9478, + "step": 257295 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.00015807630053597167, + "loss": 2.1685, + "step": 257300 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.0001580747958824573, + "loss": 2.0089, + "step": 257305 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015807329120910352, + "loss": 2.1607, + "step": 257310 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015807178651591088, + "loss": 1.9142, + "step": 257315 + }, + { + "epoch": 0.61, + "grad_norm": 2.65625, + "learning_rate": 0.00015807028180287981, + "loss": 2.0893, + "step": 257320 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.0001580687770700109, + "loss": 1.8619, + "step": 257325 + }, + { + "epoch": 0.61, + "grad_norm": 1.875, + "learning_rate": 0.00015806727231730468, + "loss": 2.0749, + "step": 257330 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.0001580657675447616, + "loss": 2.0618, + "step": 257335 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015806426275238219, + "loss": 2.0522, + "step": 257340 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015806275794016697, + "loss": 2.0951, + "step": 257345 + }, + { + "epoch": 0.61, + "grad_norm": 1.9765625, + "learning_rate": 0.00015806125310811647, + "loss": 2.1291, + "step": 257350 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.0001580597482562312, + "loss": 1.976, + "step": 257355 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.00015805824338451167, + "loss": 2.1875, + "step": 257360 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.00015805673849295836, + "loss": 2.0852, + "step": 257365 + }, + { + "epoch": 0.61, + "grad_norm": 1.84375, + "learning_rate": 0.00015805523358157188, + "loss": 2.0043, + "step": 257370 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.00015805372865035263, + "loss": 2.3188, + "step": 257375 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015805222369930116, + "loss": 2.0225, + "step": 257380 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.000158050718728418, + "loss": 2.0213, + "step": 257385 + }, + { + "epoch": 0.61, + "grad_norm": 2.59375, + "learning_rate": 0.0001580492137377037, + "loss": 1.9776, + "step": 257390 + }, + { + "epoch": 0.61, + "grad_norm": 1.7265625, + "learning_rate": 0.00015804770872715872, + "loss": 2.1666, + "step": 257395 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.0001580462036967836, + "loss": 2.0996, + "step": 257400 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.0001580446986465788, + "loss": 2.0187, + "step": 257405 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.00015804319357654492, + "loss": 2.2312, + "step": 257410 + }, + { + "epoch": 0.61, + "grad_norm": 2.359375, + "learning_rate": 0.00015804168848668245, + "loss": 2.2553, + "step": 257415 + }, + { + "epoch": 0.61, + "grad_norm": 2.359375, + "learning_rate": 0.00015804018337699184, + "loss": 2.0428, + "step": 257420 + }, + { + "epoch": 0.61, + "grad_norm": 2.640625, + "learning_rate": 0.00015803867824747367, + "loss": 2.3554, + "step": 257425 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015803717309812842, + "loss": 1.9247, + "step": 257430 + }, + { + "epoch": 0.61, + "grad_norm": 1.9296875, + "learning_rate": 0.00015803566792895666, + "loss": 2.1258, + "step": 257435 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.0001580341627399588, + "loss": 2.0129, + "step": 257440 + }, + { + "epoch": 0.61, + "grad_norm": 2.59375, + "learning_rate": 0.00015803265753113548, + "loss": 2.2053, + "step": 257445 + }, + { + "epoch": 0.61, + "grad_norm": 3.0, + "learning_rate": 0.0001580311523024871, + "loss": 2.0658, + "step": 257450 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015802964705401425, + "loss": 1.9619, + "step": 257455 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.00015802814178571744, + "loss": 1.9815, + "step": 257460 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015802663649759714, + "loss": 2.1131, + "step": 257465 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.0001580251311896539, + "loss": 2.031, + "step": 257470 + }, + { + "epoch": 0.61, + "grad_norm": 2.0, + "learning_rate": 0.00015802362586188822, + "loss": 2.167, + "step": 257475 + }, + { + "epoch": 0.61, + "grad_norm": 1.9921875, + "learning_rate": 0.00015802212051430062, + "loss": 1.9607, + "step": 257480 + }, + { + "epoch": 0.61, + "grad_norm": 2.359375, + "learning_rate": 0.00015802061514689158, + "loss": 2.2268, + "step": 257485 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015801910975966167, + "loss": 2.0438, + "step": 257490 + }, + { + "epoch": 0.61, + "grad_norm": 1.8828125, + "learning_rate": 0.0001580176043526114, + "loss": 2.1178, + "step": 257495 + }, + { + "epoch": 0.61, + "grad_norm": 1.7421875, + "learning_rate": 0.00015801609892574124, + "loss": 2.1259, + "step": 257500 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015801459347905173, + "loss": 2.0354, + "step": 257505 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015801308801254339, + "loss": 2.0213, + "step": 257510 + }, + { + "epoch": 0.61, + "grad_norm": 2.515625, + "learning_rate": 0.00015801158252621672, + "loss": 2.1773, + "step": 257515 + }, + { + "epoch": 0.61, + "grad_norm": 2.640625, + "learning_rate": 0.00015801007702007225, + "loss": 2.1763, + "step": 257520 + }, + { + "epoch": 0.61, + "grad_norm": 2.90625, + "learning_rate": 0.00015800857149411045, + "loss": 2.01, + "step": 257525 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.0001580070659483319, + "loss": 2.0351, + "step": 257530 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.0001580055603827371, + "loss": 2.107, + "step": 257535 + }, + { + "epoch": 0.61, + "grad_norm": 1.5625, + "learning_rate": 0.00015800405479732653, + "loss": 2.0008, + "step": 257540 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.00015800254919210073, + "loss": 2.0772, + "step": 257545 + }, + { + "epoch": 0.61, + "grad_norm": 1.953125, + "learning_rate": 0.0001580010435670602, + "loss": 2.0611, + "step": 257550 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015799953792220548, + "loss": 2.0847, + "step": 257555 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015799803225753704, + "loss": 2.1061, + "step": 257560 + }, + { + "epoch": 0.61, + "grad_norm": 2.609375, + "learning_rate": 0.0001579965265730554, + "loss": 2.1582, + "step": 257565 + }, + { + "epoch": 0.61, + "grad_norm": 1.7578125, + "learning_rate": 0.00015799502086876114, + "loss": 2.0505, + "step": 257570 + }, + { + "epoch": 0.61, + "grad_norm": 1.890625, + "learning_rate": 0.00015799351514465473, + "loss": 2.0563, + "step": 257575 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015799200940073667, + "loss": 2.1176, + "step": 257580 + }, + { + "epoch": 0.61, + "grad_norm": 2.640625, + "learning_rate": 0.0001579905036370075, + "loss": 2.104, + "step": 257585 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.0001579889978534677, + "loss": 1.9757, + "step": 257590 + }, + { + "epoch": 0.61, + "grad_norm": 2.515625, + "learning_rate": 0.00015798749205011783, + "loss": 2.1226, + "step": 257595 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015798598622695836, + "loss": 2.0606, + "step": 257600 + }, + { + "epoch": 0.61, + "grad_norm": 1.7890625, + "learning_rate": 0.00015798448038398984, + "loss": 1.9299, + "step": 257605 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015798297452121273, + "loss": 2.0033, + "step": 257610 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015798146863862762, + "loss": 2.0, + "step": 257615 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015797996273623498, + "loss": 2.02, + "step": 257620 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015797845681403533, + "loss": 1.9788, + "step": 257625 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015797695087202922, + "loss": 2.0655, + "step": 257630 + }, + { + "epoch": 0.61, + "grad_norm": 1.8203125, + "learning_rate": 0.0001579754449102171, + "loss": 1.9769, + "step": 257635 + }, + { + "epoch": 0.61, + "grad_norm": 1.734375, + "learning_rate": 0.0001579739389285995, + "loss": 1.918, + "step": 257640 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015797243292717698, + "loss": 2.1126, + "step": 257645 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.00015797092690595003, + "loss": 2.1126, + "step": 257650 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015796942086491916, + "loss": 1.9934, + "step": 257655 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015796791480408485, + "loss": 2.0759, + "step": 257660 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.0001579664087234477, + "loss": 2.2315, + "step": 257665 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.00015796490262300815, + "loss": 2.1766, + "step": 257670 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015796339650276671, + "loss": 2.1588, + "step": 257675 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015796189036272398, + "loss": 2.1396, + "step": 257680 + }, + { + "epoch": 0.61, + "grad_norm": 1.9609375, + "learning_rate": 0.00015796038420288038, + "loss": 2.1282, + "step": 257685 + }, + { + "epoch": 0.61, + "grad_norm": 2.453125, + "learning_rate": 0.00015795887802323648, + "loss": 2.0753, + "step": 257690 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015795737182379272, + "loss": 2.187, + "step": 257695 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.00015795586560454972, + "loss": 1.9925, + "step": 257700 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.00015795435936550795, + "loss": 1.967, + "step": 257705 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.0001579528531066679, + "loss": 2.0444, + "step": 257710 + }, + { + "epoch": 0.61, + "grad_norm": 2.359375, + "learning_rate": 0.00015795134682803011, + "loss": 2.1113, + "step": 257715 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.00015794984052959512, + "loss": 1.8714, + "step": 257720 + }, + { + "epoch": 0.61, + "grad_norm": 1.9296875, + "learning_rate": 0.00015794833421136338, + "loss": 2.11, + "step": 257725 + }, + { + "epoch": 0.61, + "grad_norm": 2.75, + "learning_rate": 0.00015794682787333543, + "loss": 2.025, + "step": 257730 + }, + { + "epoch": 0.61, + "grad_norm": 1.84375, + "learning_rate": 0.00015794532151551183, + "loss": 1.9164, + "step": 257735 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.000157943815137893, + "loss": 2.1165, + "step": 257740 + }, + { + "epoch": 0.61, + "grad_norm": 2.765625, + "learning_rate": 0.00015794230874047958, + "loss": 1.9858, + "step": 257745 + }, + { + "epoch": 0.61, + "grad_norm": 2.78125, + "learning_rate": 0.00015794080232327197, + "loss": 2.1046, + "step": 257750 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015793929588627073, + "loss": 2.1622, + "step": 257755 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.00015793778942947638, + "loss": 2.1068, + "step": 257760 + }, + { + "epoch": 0.61, + "grad_norm": 1.9140625, + "learning_rate": 0.00015793628295288946, + "loss": 2.1077, + "step": 257765 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015793477645651042, + "loss": 2.2246, + "step": 257770 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.00015793326994033982, + "loss": 2.0997, + "step": 257775 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015793176340437817, + "loss": 2.1077, + "step": 257780 + }, + { + "epoch": 0.61, + "grad_norm": 2.359375, + "learning_rate": 0.00015793025684862597, + "loss": 2.0471, + "step": 257785 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015792875027308376, + "loss": 2.0418, + "step": 257790 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015792724367775206, + "loss": 2.1743, + "step": 257795 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.0001579257370626313, + "loss": 2.1045, + "step": 257800 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015792423042772208, + "loss": 2.0647, + "step": 257805 + }, + { + "epoch": 0.61, + "grad_norm": 1.9453125, + "learning_rate": 0.00015792272377302492, + "loss": 2.1634, + "step": 257810 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.00015792121709854028, + "loss": 2.1343, + "step": 257815 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.0001579197104042687, + "loss": 1.8192, + "step": 257820 + }, + { + "epoch": 0.61, + "grad_norm": 1.4140625, + "learning_rate": 0.0001579182036902107, + "loss": 1.9161, + "step": 257825 + }, + { + "epoch": 0.61, + "grad_norm": 1.84375, + "learning_rate": 0.00015791669695636683, + "loss": 2.1447, + "step": 257830 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.0001579151902027375, + "loss": 1.9782, + "step": 257835 + }, + { + "epoch": 0.61, + "grad_norm": 2.5625, + "learning_rate": 0.00015791368342932334, + "loss": 2.0159, + "step": 257840 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.0001579121766361248, + "loss": 1.9223, + "step": 257845 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.0001579106698231424, + "loss": 2.2456, + "step": 257850 + }, + { + "epoch": 0.61, + "grad_norm": 1.9375, + "learning_rate": 0.0001579091629903767, + "loss": 2.2108, + "step": 257855 + }, + { + "epoch": 0.61, + "grad_norm": 1.7421875, + "learning_rate": 0.00015790765613782814, + "loss": 2.0329, + "step": 257860 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.0001579061492654973, + "loss": 2.1342, + "step": 257865 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015790464237338468, + "loss": 2.0906, + "step": 257870 + }, + { + "epoch": 0.61, + "grad_norm": 1.84375, + "learning_rate": 0.00015790313546149075, + "loss": 2.1254, + "step": 257875 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.00015790162852981606, + "loss": 2.0052, + "step": 257880 + }, + { + "epoch": 0.61, + "grad_norm": 1.875, + "learning_rate": 0.00015790012157836114, + "loss": 1.9921, + "step": 257885 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015789861460712646, + "loss": 2.1991, + "step": 257890 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.0001578971076161126, + "loss": 2.1532, + "step": 257895 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015789560060532003, + "loss": 2.1209, + "step": 257900 + }, + { + "epoch": 0.61, + "grad_norm": 1.703125, + "learning_rate": 0.00015789409357474927, + "loss": 2.0714, + "step": 257905 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015789258652440085, + "loss": 1.9869, + "step": 257910 + }, + { + "epoch": 0.61, + "grad_norm": 1.625, + "learning_rate": 0.00015789107945427523, + "loss": 2.0465, + "step": 257915 + }, + { + "epoch": 0.61, + "grad_norm": 1.75, + "learning_rate": 0.000157889572364373, + "loss": 2.0806, + "step": 257920 + }, + { + "epoch": 0.61, + "grad_norm": 2.0, + "learning_rate": 0.00015788806525469465, + "loss": 2.0726, + "step": 257925 + }, + { + "epoch": 0.61, + "grad_norm": 1.890625, + "learning_rate": 0.00015788655812524067, + "loss": 2.0744, + "step": 257930 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.0001578850509760116, + "loss": 1.9704, + "step": 257935 + }, + { + "epoch": 0.61, + "grad_norm": 2.671875, + "learning_rate": 0.0001578835438070079, + "loss": 2.0193, + "step": 257940 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015788203661823023, + "loss": 2.0561, + "step": 257945 + }, + { + "epoch": 0.61, + "grad_norm": 1.9921875, + "learning_rate": 0.00015788052940967894, + "loss": 2.0155, + "step": 257950 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.0001578790221813546, + "loss": 2.1598, + "step": 257955 + }, + { + "epoch": 0.61, + "grad_norm": 2.625, + "learning_rate": 0.00015787751493325777, + "loss": 2.0924, + "step": 257960 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015787600766538896, + "loss": 2.1404, + "step": 257965 + }, + { + "epoch": 0.61, + "grad_norm": 2.625, + "learning_rate": 0.00015787450037774858, + "loss": 2.1717, + "step": 257970 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015787299307033726, + "loss": 1.9973, + "step": 257975 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015787148574315546, + "loss": 1.9063, + "step": 257980 + }, + { + "epoch": 0.61, + "grad_norm": 2.5625, + "learning_rate": 0.00015786997839620375, + "loss": 2.154, + "step": 257985 + }, + { + "epoch": 0.61, + "grad_norm": 1.8515625, + "learning_rate": 0.00015786847102948256, + "loss": 1.9311, + "step": 257990 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015786696364299246, + "loss": 2.0598, + "step": 257995 + }, + { + "epoch": 0.61, + "grad_norm": 2.53125, + "learning_rate": 0.00015786545623673397, + "loss": 2.2695, + "step": 258000 + }, + { + "epoch": 0.61, + "grad_norm": 2.46875, + "learning_rate": 0.0001578639488107076, + "loss": 2.1492, + "step": 258005 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015786244136491387, + "loss": 2.0999, + "step": 258010 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.00015786093389935323, + "loss": 2.0971, + "step": 258015 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015785942641402623, + "loss": 2.0055, + "step": 258020 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.0001578579189089335, + "loss": 2.3035, + "step": 258025 + }, + { + "epoch": 0.61, + "grad_norm": 1.6484375, + "learning_rate": 0.00015785641138407538, + "loss": 2.0354, + "step": 258030 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015785490383945247, + "loss": 2.2869, + "step": 258035 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.0001578533962750653, + "loss": 2.2942, + "step": 258040 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015785188869091435, + "loss": 2.0022, + "step": 258045 + }, + { + "epoch": 0.61, + "grad_norm": 2.375, + "learning_rate": 0.0001578503810870001, + "loss": 2.1172, + "step": 258050 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015784887346332316, + "loss": 2.0316, + "step": 258055 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015784736581988398, + "loss": 2.3004, + "step": 258060 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.0001578458581566831, + "loss": 2.1542, + "step": 258065 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.00015784435047372104, + "loss": 1.9967, + "step": 258070 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015784284277099825, + "loss": 2.0013, + "step": 258075 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.00015784133504851534, + "loss": 2.0502, + "step": 258080 + }, + { + "epoch": 0.61, + "grad_norm": 1.890625, + "learning_rate": 0.00015783982730627275, + "loss": 2.008, + "step": 258085 + }, + { + "epoch": 0.61, + "grad_norm": 1.9296875, + "learning_rate": 0.00015783831954427105, + "loss": 2.0562, + "step": 258090 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.00015783681176251074, + "loss": 1.8641, + "step": 258095 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015783530396099228, + "loss": 2.0587, + "step": 258100 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.0001578337961397163, + "loss": 2.0849, + "step": 258105 + }, + { + "epoch": 0.61, + "grad_norm": 1.984375, + "learning_rate": 0.00015783228829868321, + "loss": 2.1134, + "step": 258110 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.00015783078043789356, + "loss": 2.1332, + "step": 258115 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.00015782927255734788, + "loss": 2.0904, + "step": 258120 + }, + { + "epoch": 0.61, + "grad_norm": 1.7890625, + "learning_rate": 0.00015782776465704664, + "loss": 2.1847, + "step": 258125 + }, + { + "epoch": 0.61, + "grad_norm": 1.890625, + "learning_rate": 0.0001578262567369904, + "loss": 2.1111, + "step": 258130 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015782474879717966, + "loss": 2.2565, + "step": 258135 + }, + { + "epoch": 0.61, + "grad_norm": 1.9609375, + "learning_rate": 0.00015782324083761498, + "loss": 2.0564, + "step": 258140 + }, + { + "epoch": 0.61, + "grad_norm": 1.75, + "learning_rate": 0.00015782173285829677, + "loss": 1.9674, + "step": 258145 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015782022485922562, + "loss": 2.1116, + "step": 258150 + }, + { + "epoch": 0.61, + "grad_norm": 2.609375, + "learning_rate": 0.00015781871684040206, + "loss": 2.0373, + "step": 258155 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.00015781720880182656, + "loss": 2.1718, + "step": 258160 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015781570074349966, + "loss": 2.0354, + "step": 258165 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.00015781419266542188, + "loss": 2.1068, + "step": 258170 + }, + { + "epoch": 0.61, + "grad_norm": 2.625, + "learning_rate": 0.0001578126845675937, + "loss": 2.1033, + "step": 258175 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015781117645001567, + "loss": 2.0303, + "step": 258180 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.0001578096683126883, + "loss": 2.1012, + "step": 258185 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015780816015561208, + "loss": 2.0199, + "step": 258190 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015780665197878755, + "loss": 2.1202, + "step": 258195 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015780514378221523, + "loss": 1.8594, + "step": 258200 + }, + { + "epoch": 0.61, + "grad_norm": 2.59375, + "learning_rate": 0.0001578036355658956, + "loss": 1.9663, + "step": 258205 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.00015780212732982923, + "loss": 2.2284, + "step": 258210 + }, + { + "epoch": 0.61, + "grad_norm": 1.875, + "learning_rate": 0.0001578006190740166, + "loss": 1.9998, + "step": 258215 + }, + { + "epoch": 0.61, + "grad_norm": 1.8984375, + "learning_rate": 0.00015779911079845822, + "loss": 2.2498, + "step": 258220 + }, + { + "epoch": 0.61, + "grad_norm": 2.65625, + "learning_rate": 0.00015779760250315462, + "loss": 1.9099, + "step": 258225 + }, + { + "epoch": 0.61, + "grad_norm": 1.90625, + "learning_rate": 0.0001577960941881063, + "loss": 2.0471, + "step": 258230 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.0001577945858533138, + "loss": 1.9787, + "step": 258235 + }, + { + "epoch": 0.61, + "grad_norm": 1.7890625, + "learning_rate": 0.00015779307749877764, + "loss": 2.0555, + "step": 258240 + }, + { + "epoch": 0.61, + "grad_norm": 2.46875, + "learning_rate": 0.0001577915691244983, + "loss": 2.1375, + "step": 258245 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015779006073047628, + "loss": 2.0823, + "step": 258250 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015778855231671216, + "loss": 2.0556, + "step": 258255 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.0001577870438832064, + "loss": 1.8974, + "step": 258260 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.00015778553542995956, + "loss": 2.0616, + "step": 258265 + }, + { + "epoch": 0.61, + "grad_norm": 4.8125, + "learning_rate": 0.00015778402695697214, + "loss": 2.2491, + "step": 258270 + }, + { + "epoch": 0.61, + "grad_norm": 2.515625, + "learning_rate": 0.0001577825184642446, + "loss": 1.871, + "step": 258275 + }, + { + "epoch": 0.61, + "grad_norm": 2.453125, + "learning_rate": 0.0001577810099517776, + "loss": 2.0702, + "step": 258280 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.00015777950141957145, + "loss": 2.1612, + "step": 258285 + }, + { + "epoch": 0.61, + "grad_norm": 1.9453125, + "learning_rate": 0.00015777799286762682, + "loss": 2.1065, + "step": 258290 + }, + { + "epoch": 0.61, + "grad_norm": 1.9140625, + "learning_rate": 0.0001577764842959442, + "loss": 1.9332, + "step": 258295 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015777497570452408, + "loss": 2.1577, + "step": 258300 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.00015777346709336696, + "loss": 2.1848, + "step": 258305 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.00015777195846247336, + "loss": 1.9375, + "step": 258310 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015777044981184383, + "loss": 2.0413, + "step": 258315 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.00015776894114147888, + "loss": 2.2582, + "step": 258320 + }, + { + "epoch": 0.61, + "grad_norm": 2.65625, + "learning_rate": 0.00015776743245137902, + "loss": 2.0694, + "step": 258325 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015776592374154474, + "loss": 2.0134, + "step": 258330 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.00015776441501197657, + "loss": 2.0086, + "step": 258335 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015776290626267502, + "loss": 2.2448, + "step": 258340 + }, + { + "epoch": 0.61, + "grad_norm": 1.9296875, + "learning_rate": 0.00015776139749364062, + "loss": 2.086, + "step": 258345 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015775988870487387, + "loss": 2.1022, + "step": 258350 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015775837989637532, + "loss": 2.0582, + "step": 258355 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.00015775687106814546, + "loss": 2.1687, + "step": 258360 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.0001577553622201848, + "loss": 2.0661, + "step": 258365 + }, + { + "epoch": 0.61, + "grad_norm": 1.8046875, + "learning_rate": 0.00015775385335249384, + "loss": 2.111, + "step": 258370 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015775234446507313, + "loss": 2.0506, + "step": 258375 + }, + { + "epoch": 0.61, + "grad_norm": 2.71875, + "learning_rate": 0.00015775083555792318, + "loss": 2.0347, + "step": 258380 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.0001577493266310445, + "loss": 1.9719, + "step": 258385 + }, + { + "epoch": 0.61, + "grad_norm": 1.9296875, + "learning_rate": 0.0001577478176844376, + "loss": 2.1863, + "step": 258390 + }, + { + "epoch": 0.61, + "grad_norm": 1.8359375, + "learning_rate": 0.00015774630871810295, + "loss": 1.9812, + "step": 258395 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.00015774479973204117, + "loss": 2.161, + "step": 258400 + }, + { + "epoch": 0.61, + "grad_norm": 1.8984375, + "learning_rate": 0.00015774329072625268, + "loss": 2.033, + "step": 258405 + }, + { + "epoch": 0.61, + "grad_norm": 2.46875, + "learning_rate": 0.00015774178170073808, + "loss": 2.2954, + "step": 258410 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015774027265549783, + "loss": 2.2651, + "step": 258415 + }, + { + "epoch": 0.61, + "grad_norm": 1.9296875, + "learning_rate": 0.00015773876359053242, + "loss": 1.8821, + "step": 258420 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015773725450584242, + "loss": 1.8881, + "step": 258425 + }, + { + "epoch": 0.61, + "grad_norm": 1.75, + "learning_rate": 0.00015773574540142835, + "loss": 2.0103, + "step": 258430 + }, + { + "epoch": 0.61, + "grad_norm": 2.484375, + "learning_rate": 0.00015773423627729068, + "loss": 2.0571, + "step": 258435 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015773272713342994, + "loss": 2.3214, + "step": 258440 + }, + { + "epoch": 0.61, + "grad_norm": 1.9921875, + "learning_rate": 0.0001577312179698467, + "loss": 2.1433, + "step": 258445 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015772970878654137, + "loss": 2.0115, + "step": 258450 + }, + { + "epoch": 0.61, + "grad_norm": 1.9765625, + "learning_rate": 0.00015772819958351456, + "loss": 2.0925, + "step": 258455 + }, + { + "epoch": 0.61, + "grad_norm": 1.8671875, + "learning_rate": 0.00015772669036076672, + "loss": 1.9481, + "step": 258460 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015772518111829844, + "loss": 2.1374, + "step": 258465 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015772367185611016, + "loss": 2.1498, + "step": 258470 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.00015772216257420242, + "loss": 2.0472, + "step": 258475 + }, + { + "epoch": 0.61, + "grad_norm": 2.6875, + "learning_rate": 0.00015772065327257577, + "loss": 2.1233, + "step": 258480 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015771914395123067, + "loss": 2.1368, + "step": 258485 + }, + { + "epoch": 0.61, + "grad_norm": 1.984375, + "learning_rate": 0.00015771763461016767, + "loss": 2.1244, + "step": 258490 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.0001577161252493873, + "loss": 2.0275, + "step": 258495 + }, + { + "epoch": 0.61, + "grad_norm": 2.0, + "learning_rate": 0.00015771461586889005, + "loss": 2.1122, + "step": 258500 + }, + { + "epoch": 0.61, + "grad_norm": 2.59375, + "learning_rate": 0.0001577131064686764, + "loss": 2.3152, + "step": 258505 + }, + { + "epoch": 0.61, + "grad_norm": 1.953125, + "learning_rate": 0.00015771159704874697, + "loss": 2.0933, + "step": 258510 + }, + { + "epoch": 0.61, + "grad_norm": 2.71875, + "learning_rate": 0.00015771008760910216, + "loss": 2.137, + "step": 258515 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015770857814974257, + "loss": 2.0155, + "step": 258520 + }, + { + "epoch": 0.61, + "grad_norm": 3.296875, + "learning_rate": 0.00015770706867066867, + "loss": 2.0627, + "step": 258525 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015770555917188098, + "loss": 1.9383, + "step": 258530 + }, + { + "epoch": 0.61, + "grad_norm": 1.9140625, + "learning_rate": 0.00015770404965338, + "loss": 2.0955, + "step": 258535 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015770254011516634, + "loss": 1.8807, + "step": 258540 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015770103055724039, + "loss": 2.1476, + "step": 258545 + }, + { + "epoch": 0.61, + "grad_norm": 1.8828125, + "learning_rate": 0.00015769952097960277, + "loss": 2.119, + "step": 258550 + }, + { + "epoch": 0.61, + "grad_norm": 1.9765625, + "learning_rate": 0.0001576980113822539, + "loss": 2.2225, + "step": 258555 + }, + { + "epoch": 0.61, + "grad_norm": 1.8828125, + "learning_rate": 0.00015769650176519434, + "loss": 2.0408, + "step": 258560 + }, + { + "epoch": 0.61, + "grad_norm": 2.9375, + "learning_rate": 0.00015769499212842463, + "loss": 1.7825, + "step": 258565 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.0001576934824719453, + "loss": 2.0267, + "step": 258570 + }, + { + "epoch": 0.61, + "grad_norm": 1.9921875, + "learning_rate": 0.00015769197279575677, + "loss": 2.1989, + "step": 258575 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.00015769046309985964, + "loss": 2.1873, + "step": 258580 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.0001576889533842544, + "loss": 1.8907, + "step": 258585 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.00015768744364894158, + "loss": 2.0833, + "step": 258590 + }, + { + "epoch": 0.61, + "grad_norm": 1.9765625, + "learning_rate": 0.00015768593389392166, + "loss": 2.0854, + "step": 258595 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.0001576844241191952, + "loss": 2.1624, + "step": 258600 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015768291432476268, + "loss": 2.1073, + "step": 258605 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015768140451062463, + "loss": 2.1695, + "step": 258610 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015767989467678159, + "loss": 2.014, + "step": 258615 + }, + { + "epoch": 0.61, + "grad_norm": 1.953125, + "learning_rate": 0.00015767838482323403, + "loss": 2.0014, + "step": 258620 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.0001576768749499825, + "loss": 2.0247, + "step": 258625 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015767536505702746, + "loss": 2.1652, + "step": 258630 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.0001576738551443695, + "loss": 1.9825, + "step": 258635 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.0001576723452120091, + "loss": 2.0831, + "step": 258640 + }, + { + "epoch": 0.61, + "grad_norm": 1.9609375, + "learning_rate": 0.00015767083525994682, + "loss": 1.9748, + "step": 258645 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.0001576693252881831, + "loss": 2.0027, + "step": 258650 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.0001576678152967185, + "loss": 2.0462, + "step": 258655 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015766630528555353, + "loss": 2.0887, + "step": 258660 + }, + { + "epoch": 0.61, + "grad_norm": 1.7578125, + "learning_rate": 0.0001576647952546887, + "loss": 1.9841, + "step": 258665 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015766328520412453, + "loss": 2.1131, + "step": 258670 + }, + { + "epoch": 0.61, + "grad_norm": 2.578125, + "learning_rate": 0.00015766177513386152, + "loss": 1.9782, + "step": 258675 + }, + { + "epoch": 0.61, + "grad_norm": 1.859375, + "learning_rate": 0.00015766026504390024, + "loss": 2.0679, + "step": 258680 + }, + { + "epoch": 0.61, + "grad_norm": 1.8671875, + "learning_rate": 0.00015765875493424112, + "loss": 1.9906, + "step": 258685 + }, + { + "epoch": 0.61, + "grad_norm": 2.453125, + "learning_rate": 0.00015765724480488476, + "loss": 2.0422, + "step": 258690 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.00015765573465583165, + "loss": 2.1836, + "step": 258695 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015765422448708226, + "loss": 2.0054, + "step": 258700 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015765271429863718, + "loss": 2.0918, + "step": 258705 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015765120409049686, + "loss": 2.0843, + "step": 258710 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015764969386266185, + "loss": 1.9461, + "step": 258715 + }, + { + "epoch": 0.61, + "grad_norm": 1.9140625, + "learning_rate": 0.00015764818361513263, + "loss": 2.1174, + "step": 258720 + }, + { + "epoch": 0.61, + "grad_norm": 2.453125, + "learning_rate": 0.0001576466733479098, + "loss": 2.2601, + "step": 258725 + }, + { + "epoch": 0.61, + "grad_norm": 1.8359375, + "learning_rate": 0.00015764516306099378, + "loss": 2.0981, + "step": 258730 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015764365275438514, + "loss": 2.1217, + "step": 258735 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.00015764214242808437, + "loss": 2.1462, + "step": 258740 + }, + { + "epoch": 0.61, + "grad_norm": 1.953125, + "learning_rate": 0.000157640632082092, + "loss": 1.9766, + "step": 258745 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.00015763912171640854, + "loss": 2.0771, + "step": 258750 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015763761133103454, + "loss": 1.9638, + "step": 258755 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.0001576361009259705, + "loss": 2.0369, + "step": 258760 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015763459050121688, + "loss": 2.1531, + "step": 258765 + }, + { + "epoch": 0.61, + "grad_norm": 2.375, + "learning_rate": 0.00015763308005677424, + "loss": 2.1424, + "step": 258770 + }, + { + "epoch": 0.61, + "grad_norm": 1.578125, + "learning_rate": 0.00015763156959264313, + "loss": 2.0091, + "step": 258775 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.000157630059108824, + "loss": 2.0517, + "step": 258780 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.0001576285486053174, + "loss": 2.2477, + "step": 258785 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015762703808212388, + "loss": 2.207, + "step": 258790 + }, + { + "epoch": 0.61, + "grad_norm": 2.921875, + "learning_rate": 0.00015762552753924387, + "loss": 2.2305, + "step": 258795 + }, + { + "epoch": 0.61, + "grad_norm": 1.8984375, + "learning_rate": 0.00015762401697667794, + "loss": 1.8575, + "step": 258800 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.0001576225063944266, + "loss": 2.038, + "step": 258805 + }, + { + "epoch": 0.61, + "grad_norm": 1.9375, + "learning_rate": 0.0001576209957924904, + "loss": 2.1847, + "step": 258810 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.0001576194851708698, + "loss": 2.1158, + "step": 258815 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015761797452956534, + "loss": 2.0528, + "step": 258820 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.00015761646386857755, + "loss": 2.002, + "step": 258825 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.0001576149531879069, + "loss": 2.0897, + "step": 258830 + }, + { + "epoch": 0.61, + "grad_norm": 2.484375, + "learning_rate": 0.00015761344248755395, + "loss": 2.07, + "step": 258835 + }, + { + "epoch": 0.61, + "grad_norm": 1.984375, + "learning_rate": 0.00015761193176751923, + "loss": 2.1159, + "step": 258840 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.0001576104210278032, + "loss": 2.3427, + "step": 258845 + }, + { + "epoch": 0.61, + "grad_norm": 2.515625, + "learning_rate": 0.0001576089102684064, + "loss": 2.1149, + "step": 258850 + }, + { + "epoch": 0.61, + "grad_norm": 1.953125, + "learning_rate": 0.00015760739948932938, + "loss": 2.0239, + "step": 258855 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.0001576058886905726, + "loss": 2.0677, + "step": 258860 + }, + { + "epoch": 0.61, + "grad_norm": 2.359375, + "learning_rate": 0.0001576043778721366, + "loss": 2.0403, + "step": 258865 + }, + { + "epoch": 0.61, + "grad_norm": 1.984375, + "learning_rate": 0.00015760286703402193, + "loss": 2.2333, + "step": 258870 + }, + { + "epoch": 0.61, + "grad_norm": 1.984375, + "learning_rate": 0.00015760135617622905, + "loss": 2.1571, + "step": 258875 + }, + { + "epoch": 0.61, + "grad_norm": 1.9609375, + "learning_rate": 0.0001575998452987585, + "loss": 2.1442, + "step": 258880 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015759833440161082, + "loss": 2.1519, + "step": 258885 + }, + { + "epoch": 0.61, + "grad_norm": 1.984375, + "learning_rate": 0.0001575968234847865, + "loss": 2.2556, + "step": 258890 + }, + { + "epoch": 0.61, + "grad_norm": 2.375, + "learning_rate": 0.00015759531254828606, + "loss": 1.9351, + "step": 258895 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015759380159211002, + "loss": 1.9992, + "step": 258900 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015759229061625886, + "loss": 1.9665, + "step": 258905 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015759077962073317, + "loss": 2.1466, + "step": 258910 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.0001575892686055334, + "loss": 2.0342, + "step": 258915 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015758775757066011, + "loss": 2.0721, + "step": 258920 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.0001575862465161138, + "loss": 1.9886, + "step": 258925 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015758473544189494, + "loss": 2.0143, + "step": 258930 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.0001575832243480041, + "loss": 2.0529, + "step": 258935 + }, + { + "epoch": 0.61, + "grad_norm": 1.984375, + "learning_rate": 0.00015758171323444184, + "loss": 2.2241, + "step": 258940 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015758020210120862, + "loss": 2.0889, + "step": 258945 + }, + { + "epoch": 0.61, + "grad_norm": 1.8203125, + "learning_rate": 0.0001575786909483049, + "loss": 2.0023, + "step": 258950 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.0001575771797757313, + "loss": 2.2689, + "step": 258955 + }, + { + "epoch": 0.61, + "grad_norm": 2.515625, + "learning_rate": 0.00015757566858348824, + "loss": 2.1945, + "step": 258960 + }, + { + "epoch": 0.61, + "grad_norm": 2.53125, + "learning_rate": 0.00015757415737157633, + "loss": 2.0765, + "step": 258965 + }, + { + "epoch": 0.61, + "grad_norm": 1.921875, + "learning_rate": 0.00015757264613999606, + "loss": 2.0934, + "step": 258970 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.00015757113488874792, + "loss": 1.9487, + "step": 258975 + }, + { + "epoch": 0.61, + "grad_norm": 1.9765625, + "learning_rate": 0.0001575696236178324, + "loss": 2.1589, + "step": 258980 + }, + { + "epoch": 0.61, + "grad_norm": 1.828125, + "learning_rate": 0.00015756811232725006, + "loss": 2.0323, + "step": 258985 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015756660101700146, + "loss": 2.1935, + "step": 258990 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015756508968708702, + "loss": 2.1538, + "step": 258995 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.0001575635783375073, + "loss": 2.0926, + "step": 259000 + }, + { + "epoch": 0.61, + "grad_norm": 2.453125, + "learning_rate": 0.00015756206696826284, + "loss": 2.116, + "step": 259005 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.00015756055557935412, + "loss": 2.1991, + "step": 259010 + }, + { + "epoch": 0.61, + "grad_norm": 1.8671875, + "learning_rate": 0.00015755904417078166, + "loss": 2.027, + "step": 259015 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.000157557532742546, + "loss": 2.0083, + "step": 259020 + }, + { + "epoch": 0.61, + "grad_norm": 1.9296875, + "learning_rate": 0.00015755602129464763, + "loss": 2.1387, + "step": 259025 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.0001575545098270871, + "loss": 2.0708, + "step": 259030 + }, + { + "epoch": 0.61, + "grad_norm": 2.0, + "learning_rate": 0.00015755299833986492, + "loss": 1.8278, + "step": 259035 + }, + { + "epoch": 0.61, + "grad_norm": 2.5, + "learning_rate": 0.00015755148683298153, + "loss": 2.13, + "step": 259040 + }, + { + "epoch": 0.61, + "grad_norm": 2.8125, + "learning_rate": 0.00015754997530643755, + "loss": 2.024, + "step": 259045 + }, + { + "epoch": 0.61, + "grad_norm": 2.65625, + "learning_rate": 0.00015754846376023348, + "loss": 2.1883, + "step": 259050 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015754695219436976, + "loss": 2.142, + "step": 259055 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015754544060884703, + "loss": 2.131, + "step": 259060 + }, + { + "epoch": 0.61, + "grad_norm": 3.09375, + "learning_rate": 0.00015754392900366564, + "loss": 2.2377, + "step": 259065 + }, + { + "epoch": 0.61, + "grad_norm": 2.640625, + "learning_rate": 0.00015754241737882623, + "loss": 2.1313, + "step": 259070 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015754090573432935, + "loss": 2.0958, + "step": 259075 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.0001575393940701754, + "loss": 2.0629, + "step": 259080 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015753788238636495, + "loss": 1.9316, + "step": 259085 + }, + { + "epoch": 0.61, + "grad_norm": 1.9296875, + "learning_rate": 0.0001575363706828985, + "loss": 1.9058, + "step": 259090 + }, + { + "epoch": 0.61, + "grad_norm": 2.46875, + "learning_rate": 0.0001575348589597766, + "loss": 2.2585, + "step": 259095 + }, + { + "epoch": 0.61, + "grad_norm": 1.8203125, + "learning_rate": 0.00015753334721699974, + "loss": 2.0274, + "step": 259100 + }, + { + "epoch": 0.61, + "grad_norm": 1.796875, + "learning_rate": 0.00015753183545456846, + "loss": 1.9055, + "step": 259105 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.00015753032367248326, + "loss": 1.9591, + "step": 259110 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015752881187074466, + "loss": 2.2939, + "step": 259115 + }, + { + "epoch": 0.61, + "grad_norm": 2.375, + "learning_rate": 0.00015752730004935317, + "loss": 2.2178, + "step": 259120 + }, + { + "epoch": 0.61, + "grad_norm": 1.9140625, + "learning_rate": 0.0001575257882083093, + "loss": 1.9989, + "step": 259125 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.0001575242763476136, + "loss": 2.0581, + "step": 259130 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015752276446726654, + "loss": 1.9999, + "step": 259135 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015752125256726868, + "loss": 1.9311, + "step": 259140 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.0001575197406476205, + "loss": 2.0651, + "step": 259145 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.00015751822870832256, + "loss": 2.1342, + "step": 259150 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015751671674937533, + "loss": 2.1042, + "step": 259155 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015751520477077935, + "loss": 2.0333, + "step": 259160 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.00015751369277253513, + "loss": 1.8954, + "step": 259165 + }, + { + "epoch": 0.61, + "grad_norm": 1.8359375, + "learning_rate": 0.0001575121807546432, + "loss": 2.2095, + "step": 259170 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015751066871710404, + "loss": 1.9478, + "step": 259175 + }, + { + "epoch": 0.61, + "grad_norm": 2.578125, + "learning_rate": 0.00015750915665991823, + "loss": 2.0355, + "step": 259180 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.00015750764458308622, + "loss": 1.8848, + "step": 259185 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015750613248660856, + "loss": 2.0527, + "step": 259190 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.0001575046203704858, + "loss": 2.0583, + "step": 259195 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015750310823471833, + "loss": 2.0477, + "step": 259200 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015750159607930684, + "loss": 2.0339, + "step": 259205 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015750008390425174, + "loss": 2.1815, + "step": 259210 + }, + { + "epoch": 0.61, + "grad_norm": 1.953125, + "learning_rate": 0.0001574985717095536, + "loss": 2.0424, + "step": 259215 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.00015749705949521286, + "loss": 1.865, + "step": 259220 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015749554726123005, + "loss": 2.1749, + "step": 259225 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.00015749403500760578, + "loss": 2.0945, + "step": 259230 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015749252273434045, + "loss": 2.2829, + "step": 259235 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015749101044143466, + "loss": 2.0296, + "step": 259240 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.00015748949812888894, + "loss": 2.0674, + "step": 259245 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015748798579670373, + "loss": 2.1796, + "step": 259250 + }, + { + "epoch": 0.61, + "grad_norm": 1.828125, + "learning_rate": 0.00015748647344487955, + "loss": 1.9804, + "step": 259255 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.000157484961073417, + "loss": 1.9742, + "step": 259260 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.00015748344868231652, + "loss": 2.2029, + "step": 259265 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015748193627157864, + "loss": 2.1683, + "step": 259270 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.0001574804238412039, + "loss": 2.0776, + "step": 259275 + }, + { + "epoch": 0.61, + "grad_norm": 2.5625, + "learning_rate": 0.0001574789113911928, + "loss": 1.8306, + "step": 259280 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.00015747739892154586, + "loss": 1.9821, + "step": 259285 + }, + { + "epoch": 0.61, + "grad_norm": 1.984375, + "learning_rate": 0.0001574758864322636, + "loss": 2.1434, + "step": 259290 + }, + { + "epoch": 0.61, + "grad_norm": 2.546875, + "learning_rate": 0.00015747437392334655, + "loss": 2.1969, + "step": 259295 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.0001574728613947952, + "loss": 2.1517, + "step": 259300 + }, + { + "epoch": 0.61, + "grad_norm": 1.921875, + "learning_rate": 0.00015747134884661006, + "loss": 2.1906, + "step": 259305 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.0001574698362787917, + "loss": 2.0653, + "step": 259310 + }, + { + "epoch": 0.61, + "grad_norm": 1.75, + "learning_rate": 0.0001574683236913406, + "loss": 2.1237, + "step": 259315 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015746681108425725, + "loss": 2.1737, + "step": 259320 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.0001574652984575422, + "loss": 2.1609, + "step": 259325 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015746378581119597, + "loss": 2.1339, + "step": 259330 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.0001574622731452191, + "loss": 2.1355, + "step": 259335 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015746076045961203, + "loss": 2.2688, + "step": 259340 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015745924775437535, + "loss": 2.0772, + "step": 259345 + }, + { + "epoch": 0.61, + "grad_norm": 2.578125, + "learning_rate": 0.00015745773502950954, + "loss": 2.0109, + "step": 259350 + }, + { + "epoch": 0.61, + "grad_norm": 2.40625, + "learning_rate": 0.00015745622228501512, + "loss": 1.9627, + "step": 259355 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.0001574547095208926, + "loss": 1.9484, + "step": 259360 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015745319673714253, + "loss": 2.0518, + "step": 259365 + }, + { + "epoch": 0.61, + "grad_norm": 1.9609375, + "learning_rate": 0.00015745168393376538, + "loss": 2.0794, + "step": 259370 + }, + { + "epoch": 0.61, + "grad_norm": 1.9765625, + "learning_rate": 0.00015745017111076171, + "loss": 1.9942, + "step": 259375 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.00015744865826813203, + "loss": 1.9388, + "step": 259380 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.00015744714540587683, + "loss": 2.0687, + "step": 259385 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015744563252399668, + "loss": 1.9182, + "step": 259390 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.00015744411962249203, + "loss": 2.1207, + "step": 259395 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.00015744260670136343, + "loss": 2.1584, + "step": 259400 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.0001574410937606114, + "loss": 2.0584, + "step": 259405 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.00015743958080023643, + "loss": 2.0339, + "step": 259410 + }, + { + "epoch": 0.61, + "grad_norm": 1.9296875, + "learning_rate": 0.00015743806782023904, + "loss": 2.1332, + "step": 259415 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015743655482061981, + "loss": 2.215, + "step": 259420 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.0001574350418013792, + "loss": 2.1854, + "step": 259425 + }, + { + "epoch": 0.61, + "grad_norm": 2.640625, + "learning_rate": 0.00015743352876251775, + "loss": 2.1677, + "step": 259430 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015743201570403596, + "loss": 1.9795, + "step": 259435 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.0001574305026259343, + "loss": 1.9967, + "step": 259440 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015742898952821342, + "loss": 2.2259, + "step": 259445 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.0001574274764108737, + "loss": 2.0594, + "step": 259450 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015742596327391573, + "loss": 1.9585, + "step": 259455 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015742445011734, + "loss": 2.2038, + "step": 259460 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015742293694114706, + "loss": 2.1545, + "step": 259465 + }, + { + "epoch": 0.61, + "grad_norm": 2.53125, + "learning_rate": 0.00015742142374533736, + "loss": 2.0009, + "step": 259470 + }, + { + "epoch": 0.61, + "grad_norm": 2.59375, + "learning_rate": 0.00015741991052991148, + "loss": 2.2519, + "step": 259475 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015741839729486995, + "loss": 1.8598, + "step": 259480 + }, + { + "epoch": 0.61, + "grad_norm": 3.03125, + "learning_rate": 0.00015741688404021322, + "loss": 2.1488, + "step": 259485 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015741537076594187, + "loss": 2.0282, + "step": 259490 + }, + { + "epoch": 0.61, + "grad_norm": 2.640625, + "learning_rate": 0.00015741385747205635, + "loss": 1.8156, + "step": 259495 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.00015741234415855723, + "loss": 1.9921, + "step": 259500 + }, + { + "epoch": 0.61, + "grad_norm": 2.90625, + "learning_rate": 0.00015741083082544503, + "loss": 2.2845, + "step": 259505 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.00015740931747272021, + "loss": 2.0583, + "step": 259510 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015740780410038337, + "loss": 2.0314, + "step": 259515 + }, + { + "epoch": 0.61, + "grad_norm": 2.546875, + "learning_rate": 0.00015740629070843496, + "loss": 2.221, + "step": 259520 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.0001574047772968755, + "loss": 2.0086, + "step": 259525 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015740326386570557, + "loss": 2.1162, + "step": 259530 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.0001574017504149256, + "loss": 2.1266, + "step": 259535 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.0001574002369445362, + "loss": 2.1653, + "step": 259540 + }, + { + "epoch": 0.61, + "grad_norm": 1.7578125, + "learning_rate": 0.0001573987234545378, + "loss": 2.1566, + "step": 259545 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015739720994493097, + "loss": 1.9833, + "step": 259550 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.0001573956964157162, + "loss": 2.0042, + "step": 259555 + }, + { + "epoch": 0.61, + "grad_norm": 2.40625, + "learning_rate": 0.00015739418286689406, + "loss": 2.1745, + "step": 259560 + }, + { + "epoch": 0.61, + "grad_norm": 2.359375, + "learning_rate": 0.000157392669298465, + "loss": 2.0108, + "step": 259565 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.00015739115571042955, + "loss": 1.978, + "step": 259570 + }, + { + "epoch": 0.61, + "grad_norm": 2.78125, + "learning_rate": 0.00015738964210278822, + "loss": 2.1679, + "step": 259575 + }, + { + "epoch": 0.61, + "grad_norm": 1.8671875, + "learning_rate": 0.0001573881284755416, + "loss": 2.1749, + "step": 259580 + }, + { + "epoch": 0.61, + "grad_norm": 1.8828125, + "learning_rate": 0.00015738661482869013, + "loss": 2.0844, + "step": 259585 + }, + { + "epoch": 0.61, + "grad_norm": 2.484375, + "learning_rate": 0.00015738510116223436, + "loss": 1.9769, + "step": 259590 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.00015738358747617478, + "loss": 2.079, + "step": 259595 + }, + { + "epoch": 0.61, + "grad_norm": 1.9453125, + "learning_rate": 0.00015738207377051193, + "loss": 2.1282, + "step": 259600 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015738056004524636, + "loss": 2.0481, + "step": 259605 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.0001573790463003785, + "loss": 2.1321, + "step": 259610 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015737753253590895, + "loss": 1.8048, + "step": 259615 + }, + { + "epoch": 0.61, + "grad_norm": 2.375, + "learning_rate": 0.0001573760187518382, + "loss": 2.1381, + "step": 259620 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.00015737450494816672, + "loss": 2.0068, + "step": 259625 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015737299112489507, + "loss": 2.0622, + "step": 259630 + }, + { + "epoch": 0.61, + "grad_norm": 2.359375, + "learning_rate": 0.0001573714772820238, + "loss": 2.2241, + "step": 259635 + }, + { + "epoch": 0.61, + "grad_norm": 1.9140625, + "learning_rate": 0.0001573699634195534, + "loss": 2.0034, + "step": 259640 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015736844953748435, + "loss": 2.0617, + "step": 259645 + }, + { + "epoch": 0.61, + "grad_norm": 1.8203125, + "learning_rate": 0.0001573669356358172, + "loss": 2.1334, + "step": 259650 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015736542171455246, + "loss": 2.1003, + "step": 259655 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015736390777369068, + "loss": 2.215, + "step": 259660 + }, + { + "epoch": 0.61, + "grad_norm": 1.7578125, + "learning_rate": 0.00015736239381323235, + "loss": 2.0295, + "step": 259665 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.00015736087983317796, + "loss": 1.97, + "step": 259670 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015735936583352806, + "loss": 2.0447, + "step": 259675 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015735785181428315, + "loss": 1.9143, + "step": 259680 + }, + { + "epoch": 0.61, + "grad_norm": 2.515625, + "learning_rate": 0.00015735633777544375, + "loss": 2.1591, + "step": 259685 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.0001573548237170104, + "loss": 2.1718, + "step": 259690 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.00015735330963898364, + "loss": 2.1351, + "step": 259695 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015735179554136392, + "loss": 2.1777, + "step": 259700 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.00015735028142415178, + "loss": 2.051, + "step": 259705 + }, + { + "epoch": 0.61, + "grad_norm": 1.9609375, + "learning_rate": 0.00015734876728734772, + "loss": 2.0611, + "step": 259710 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.0001573472531309523, + "loss": 2.0517, + "step": 259715 + }, + { + "epoch": 0.61, + "grad_norm": 2.46875, + "learning_rate": 0.00015734573895496604, + "loss": 2.0121, + "step": 259720 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015734422475938945, + "loss": 2.138, + "step": 259725 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015734271054422297, + "loss": 2.1141, + "step": 259730 + }, + { + "epoch": 0.61, + "grad_norm": 2.546875, + "learning_rate": 0.00015734119630946721, + "loss": 2.1079, + "step": 259735 + }, + { + "epoch": 0.61, + "grad_norm": 1.5703125, + "learning_rate": 0.00015733968205512269, + "loss": 2.0785, + "step": 259740 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015733816778118985, + "loss": 2.0038, + "step": 259745 + }, + { + "epoch": 0.61, + "grad_norm": 1.859375, + "learning_rate": 0.00015733665348766925, + "loss": 2.0107, + "step": 259750 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015733513917456145, + "loss": 2.2688, + "step": 259755 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015733362484186692, + "loss": 2.0696, + "step": 259760 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015733211048958614, + "loss": 2.0844, + "step": 259765 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.0001573305961177197, + "loss": 2.0469, + "step": 259770 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.0001573290817262681, + "loss": 1.9226, + "step": 259775 + }, + { + "epoch": 0.61, + "grad_norm": 2.75, + "learning_rate": 0.0001573275673152318, + "loss": 1.9658, + "step": 259780 + }, + { + "epoch": 0.61, + "grad_norm": 1.8359375, + "learning_rate": 0.00015732605288461144, + "loss": 2.0737, + "step": 259785 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015732453843440738, + "loss": 2.285, + "step": 259790 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015732302396462026, + "loss": 1.9575, + "step": 259795 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.00015732150947525057, + "loss": 2.0512, + "step": 259800 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015731999496629878, + "loss": 2.3359, + "step": 259805 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015731848043776546, + "loss": 2.0119, + "step": 259810 + }, + { + "epoch": 0.61, + "grad_norm": 2.5, + "learning_rate": 0.00015731696588965107, + "loss": 2.2238, + "step": 259815 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015731545132195623, + "loss": 2.1928, + "step": 259820 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.00015731393673468135, + "loss": 2.0474, + "step": 259825 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.000157312422127827, + "loss": 1.9137, + "step": 259830 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.0001573109075013937, + "loss": 2.1458, + "step": 259835 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.00015730939285538194, + "loss": 1.9317, + "step": 259840 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.00015730787818979224, + "loss": 1.9374, + "step": 259845 + }, + { + "epoch": 0.61, + "grad_norm": 1.78125, + "learning_rate": 0.00015730636350462515, + "loss": 2.1162, + "step": 259850 + }, + { + "epoch": 0.61, + "grad_norm": 1.9765625, + "learning_rate": 0.00015730484879988117, + "loss": 2.1743, + "step": 259855 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.0001573033340755608, + "loss": 2.2437, + "step": 259860 + }, + { + "epoch": 0.61, + "grad_norm": 1.84375, + "learning_rate": 0.00015730181933166457, + "loss": 2.1213, + "step": 259865 + }, + { + "epoch": 0.61, + "grad_norm": 2.40625, + "learning_rate": 0.000157300304568193, + "loss": 2.0976, + "step": 259870 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.0001572987897851466, + "loss": 2.314, + "step": 259875 + }, + { + "epoch": 0.61, + "grad_norm": 1.84375, + "learning_rate": 0.0001572972749825259, + "loss": 2.3204, + "step": 259880 + }, + { + "epoch": 0.61, + "grad_norm": 1.8359375, + "learning_rate": 0.00015729576016033144, + "loss": 2.1244, + "step": 259885 + }, + { + "epoch": 0.61, + "grad_norm": 2.5625, + "learning_rate": 0.00015729424531856368, + "loss": 2.1679, + "step": 259890 + }, + { + "epoch": 0.61, + "grad_norm": 1.9296875, + "learning_rate": 0.00015729273045722315, + "loss": 1.9699, + "step": 259895 + }, + { + "epoch": 0.61, + "grad_norm": 2.46875, + "learning_rate": 0.00015729121557631043, + "loss": 2.1508, + "step": 259900 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015728970067582594, + "loss": 2.2471, + "step": 259905 + }, + { + "epoch": 0.61, + "grad_norm": 2.5, + "learning_rate": 0.0001572881857557703, + "loss": 1.8595, + "step": 259910 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015728667081614398, + "loss": 2.0562, + "step": 259915 + }, + { + "epoch": 0.61, + "grad_norm": 2.453125, + "learning_rate": 0.00015728515585694745, + "loss": 2.1046, + "step": 259920 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015728364087818129, + "loss": 2.0301, + "step": 259925 + }, + { + "epoch": 0.61, + "grad_norm": 1.7734375, + "learning_rate": 0.00015728212587984598, + "loss": 2.0363, + "step": 259930 + }, + { + "epoch": 0.61, + "grad_norm": 2.53125, + "learning_rate": 0.0001572806108619421, + "loss": 1.8667, + "step": 259935 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.0001572790958244701, + "loss": 2.0962, + "step": 259940 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015727758076743053, + "loss": 1.8739, + "step": 259945 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015727606569082388, + "loss": 2.1614, + "step": 259950 + }, + { + "epoch": 0.61, + "grad_norm": 2.5, + "learning_rate": 0.0001572745505946507, + "loss": 2.0199, + "step": 259955 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.0001572730354789115, + "loss": 2.1077, + "step": 259960 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.00015727152034360678, + "loss": 2.1047, + "step": 259965 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.0001572700051887371, + "loss": 2.1383, + "step": 259970 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.0001572684900143029, + "loss": 1.9994, + "step": 259975 + }, + { + "epoch": 0.61, + "grad_norm": 2.515625, + "learning_rate": 0.00015726697482030478, + "loss": 2.1005, + "step": 259980 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015726545960674323, + "loss": 2.1081, + "step": 259985 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015726394437361874, + "loss": 1.9627, + "step": 259990 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015726242912093182, + "loss": 2.0975, + "step": 259995 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.00015726091384868306, + "loss": 2.238, + "step": 260000 + }, + { + "epoch": 0.61, + "grad_norm": 2.6875, + "learning_rate": 0.00015725939855687292, + "loss": 2.0287, + "step": 260005 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.0001572578832455019, + "loss": 1.9935, + "step": 260010 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.00015725636791457062, + "loss": 2.1068, + "step": 260015 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.00015725485256407948, + "loss": 1.9877, + "step": 260020 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015725333719402907, + "loss": 2.1134, + "step": 260025 + }, + { + "epoch": 0.61, + "grad_norm": 1.8671875, + "learning_rate": 0.00015725182180441983, + "loss": 2.0758, + "step": 260030 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015725030639525238, + "loss": 2.2073, + "step": 260035 + }, + { + "epoch": 0.61, + "grad_norm": 2.484375, + "learning_rate": 0.0001572487909665272, + "loss": 2.0296, + "step": 260040 + }, + { + "epoch": 0.61, + "grad_norm": 2.53125, + "learning_rate": 0.00015724727551824473, + "loss": 2.1231, + "step": 260045 + }, + { + "epoch": 0.61, + "grad_norm": 1.9765625, + "learning_rate": 0.0001572457600504056, + "loss": 2.0256, + "step": 260050 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015724424456301027, + "loss": 2.093, + "step": 260055 + }, + { + "epoch": 0.61, + "grad_norm": 2.40625, + "learning_rate": 0.00015724272905605924, + "loss": 2.1813, + "step": 260060 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015724121352955308, + "loss": 2.0381, + "step": 260065 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015723969798349228, + "loss": 2.0009, + "step": 260070 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015723818241787738, + "loss": 2.0945, + "step": 260075 + }, + { + "epoch": 0.61, + "grad_norm": 1.984375, + "learning_rate": 0.00015723666683270887, + "loss": 1.9847, + "step": 260080 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015723515122798725, + "loss": 2.2085, + "step": 260085 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015723363560371308, + "loss": 2.1531, + "step": 260090 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015723211995988687, + "loss": 1.9834, + "step": 260095 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015723060429650914, + "loss": 2.1039, + "step": 260100 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.0001572290886135804, + "loss": 2.1752, + "step": 260105 + }, + { + "epoch": 0.61, + "grad_norm": 1.8515625, + "learning_rate": 0.00015722757291110112, + "loss": 2.0422, + "step": 260110 + }, + { + "epoch": 0.61, + "grad_norm": 1.875, + "learning_rate": 0.0001572260571890719, + "loss": 1.913, + "step": 260115 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.0001572245414474932, + "loss": 2.058, + "step": 260120 + }, + { + "epoch": 0.61, + "grad_norm": 1.859375, + "learning_rate": 0.0001572230256863656, + "loss": 2.0071, + "step": 260125 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015722150990568954, + "loss": 2.1108, + "step": 260130 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.0001572199941054656, + "loss": 2.0084, + "step": 260135 + }, + { + "epoch": 0.61, + "grad_norm": 1.7421875, + "learning_rate": 0.00015721847828569424, + "loss": 1.9903, + "step": 260140 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.000157216962446376, + "loss": 2.0111, + "step": 260145 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.00015721544658751143, + "loss": 2.0779, + "step": 260150 + }, + { + "epoch": 0.61, + "grad_norm": 1.921875, + "learning_rate": 0.00015721393070910103, + "loss": 2.1675, + "step": 260155 + }, + { + "epoch": 0.61, + "grad_norm": 2.40625, + "learning_rate": 0.00015721241481114534, + "loss": 2.1956, + "step": 260160 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.0001572108988936448, + "loss": 2.0829, + "step": 260165 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.0001572093829566, + "loss": 1.971, + "step": 260170 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015720786700001147, + "loss": 2.0432, + "step": 260175 + }, + { + "epoch": 0.61, + "grad_norm": 1.8828125, + "learning_rate": 0.00015720635102387965, + "loss": 1.9724, + "step": 260180 + }, + { + "epoch": 0.61, + "grad_norm": 1.796875, + "learning_rate": 0.00015720483502820515, + "loss": 2.0262, + "step": 260185 + }, + { + "epoch": 0.61, + "grad_norm": 1.7421875, + "learning_rate": 0.00015720331901298838, + "loss": 2.0258, + "step": 260190 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015720180297822996, + "loss": 2.0746, + "step": 260195 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015720028692393036, + "loss": 1.9427, + "step": 260200 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.00015719877085009008, + "loss": 2.2385, + "step": 260205 + }, + { + "epoch": 0.61, + "grad_norm": 1.9140625, + "learning_rate": 0.0001571972547567097, + "loss": 2.2167, + "step": 260210 + }, + { + "epoch": 0.61, + "grad_norm": 1.9296875, + "learning_rate": 0.00015719573864378966, + "loss": 2.1203, + "step": 260215 + }, + { + "epoch": 0.61, + "grad_norm": 1.8203125, + "learning_rate": 0.00015719422251133053, + "loss": 2.0663, + "step": 260220 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015719270635933285, + "loss": 2.11, + "step": 260225 + }, + { + "epoch": 0.61, + "grad_norm": 1.9921875, + "learning_rate": 0.0001571911901877971, + "loss": 2.0029, + "step": 260230 + }, + { + "epoch": 0.61, + "grad_norm": 1.859375, + "learning_rate": 0.00015718967399672376, + "loss": 2.2264, + "step": 260235 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.0001571881577861134, + "loss": 2.0027, + "step": 260240 + }, + { + "epoch": 0.61, + "grad_norm": 2.5, + "learning_rate": 0.00015718664155596655, + "loss": 2.2405, + "step": 260245 + }, + { + "epoch": 0.61, + "grad_norm": 1.953125, + "learning_rate": 0.00015718512530628372, + "loss": 1.9805, + "step": 260250 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015718360903706538, + "loss": 2.1693, + "step": 260255 + }, + { + "epoch": 0.61, + "grad_norm": 3.03125, + "learning_rate": 0.0001571820927483121, + "loss": 2.2364, + "step": 260260 + }, + { + "epoch": 0.61, + "grad_norm": 1.7734375, + "learning_rate": 0.00015718057644002439, + "loss": 1.923, + "step": 260265 + }, + { + "epoch": 0.61, + "grad_norm": 2.6875, + "learning_rate": 0.00015717906011220273, + "loss": 2.0757, + "step": 260270 + }, + { + "epoch": 0.61, + "grad_norm": 2.40625, + "learning_rate": 0.00015717754376484767, + "loss": 2.2703, + "step": 260275 + }, + { + "epoch": 0.61, + "grad_norm": 2.0, + "learning_rate": 0.00015717602739795974, + "loss": 2.144, + "step": 260280 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015717451101153945, + "loss": 2.0448, + "step": 260285 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015717299460558728, + "loss": 2.1002, + "step": 260290 + }, + { + "epoch": 0.61, + "grad_norm": 2.0, + "learning_rate": 0.0001571714781801038, + "loss": 2.2522, + "step": 260295 + }, + { + "epoch": 0.61, + "grad_norm": 1.953125, + "learning_rate": 0.0001571699617350895, + "loss": 2.0979, + "step": 260300 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015716844527054492, + "loss": 2.0576, + "step": 260305 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.00015716692878647054, + "loss": 1.9743, + "step": 260310 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015716541228286688, + "loss": 2.0935, + "step": 260315 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015716389575973452, + "loss": 2.012, + "step": 260320 + }, + { + "epoch": 0.61, + "grad_norm": 2.359375, + "learning_rate": 0.00015716237921707392, + "loss": 2.0728, + "step": 260325 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015716086265488561, + "loss": 2.0155, + "step": 260330 + }, + { + "epoch": 0.61, + "grad_norm": 1.9140625, + "learning_rate": 0.00015715934607317013, + "loss": 2.0737, + "step": 260335 + }, + { + "epoch": 0.61, + "grad_norm": 2.375, + "learning_rate": 0.00015715782947192794, + "loss": 2.0859, + "step": 260340 + }, + { + "epoch": 0.61, + "grad_norm": 2.828125, + "learning_rate": 0.00015715631285115965, + "loss": 2.1825, + "step": 260345 + }, + { + "epoch": 0.61, + "grad_norm": 2.5, + "learning_rate": 0.00015715479621086572, + "loss": 1.9347, + "step": 260350 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015715327955104664, + "loss": 1.853, + "step": 260355 + }, + { + "epoch": 0.61, + "grad_norm": 1.9375, + "learning_rate": 0.00015715176287170297, + "loss": 2.0694, + "step": 260360 + }, + { + "epoch": 0.61, + "grad_norm": 1.6328125, + "learning_rate": 0.00015715024617283526, + "loss": 2.1339, + "step": 260365 + }, + { + "epoch": 0.61, + "grad_norm": 1.65625, + "learning_rate": 0.00015714872945444394, + "loss": 2.0618, + "step": 260370 + }, + { + "epoch": 0.61, + "grad_norm": 2.515625, + "learning_rate": 0.00015714721271652962, + "loss": 1.9685, + "step": 260375 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.00015714569595909274, + "loss": 2.1848, + "step": 260380 + }, + { + "epoch": 0.61, + "grad_norm": 1.7421875, + "learning_rate": 0.00015714417918213388, + "loss": 1.9958, + "step": 260385 + }, + { + "epoch": 0.61, + "grad_norm": 2.53125, + "learning_rate": 0.00015714266238565352, + "loss": 2.1531, + "step": 260390 + }, + { + "epoch": 0.61, + "grad_norm": 1.8125, + "learning_rate": 0.00015714114556965222, + "loss": 2.089, + "step": 260395 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015713962873413042, + "loss": 2.0147, + "step": 260400 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.0001571381118790887, + "loss": 2.0335, + "step": 260405 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.0001571365950045276, + "loss": 2.2016, + "step": 260410 + }, + { + "epoch": 0.61, + "grad_norm": 2.453125, + "learning_rate": 0.00015713507811044756, + "loss": 2.1096, + "step": 260415 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015713356119684914, + "loss": 2.079, + "step": 260420 + }, + { + "epoch": 0.61, + "grad_norm": 2.96875, + "learning_rate": 0.0001571320442637329, + "loss": 1.9923, + "step": 260425 + }, + { + "epoch": 0.61, + "grad_norm": 1.7421875, + "learning_rate": 0.00015713052731109928, + "loss": 2.237, + "step": 260430 + }, + { + "epoch": 0.61, + "grad_norm": 1.765625, + "learning_rate": 0.00015712901033894885, + "loss": 1.9676, + "step": 260435 + }, + { + "epoch": 0.61, + "grad_norm": 1.953125, + "learning_rate": 0.00015712749334728212, + "loss": 1.8743, + "step": 260440 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.0001571259763360996, + "loss": 2.0557, + "step": 260445 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.0001571244593054018, + "loss": 2.2501, + "step": 260450 + }, + { + "epoch": 0.61, + "grad_norm": 2.484375, + "learning_rate": 0.00015712294225518926, + "loss": 2.1071, + "step": 260455 + }, + { + "epoch": 0.61, + "grad_norm": 2.359375, + "learning_rate": 0.00015712142518546247, + "loss": 2.0563, + "step": 260460 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.000157119908096222, + "loss": 2.2183, + "step": 260465 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.0001571183909874683, + "loss": 1.9573, + "step": 260470 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.00015711687385920194, + "loss": 2.2055, + "step": 260475 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.0001571153567114234, + "loss": 1.9601, + "step": 260480 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.0001571138395441332, + "loss": 1.9011, + "step": 260485 + }, + { + "epoch": 0.61, + "grad_norm": 1.78125, + "learning_rate": 0.00015711232235733193, + "loss": 1.9674, + "step": 260490 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015711080515102004, + "loss": 2.0872, + "step": 260495 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.0001571092879251981, + "loss": 2.0944, + "step": 260500 + }, + { + "epoch": 0.61, + "grad_norm": 2.65625, + "learning_rate": 0.0001571077706798665, + "loss": 2.2469, + "step": 260505 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015710625341502593, + "loss": 2.1582, + "step": 260510 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015710473613067677, + "loss": 1.8998, + "step": 260515 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.0001571032188268196, + "loss": 2.0832, + "step": 260520 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015710170150345497, + "loss": 2.0107, + "step": 260525 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.00015710018416058335, + "loss": 2.1958, + "step": 260530 + }, + { + "epoch": 0.61, + "grad_norm": 1.984375, + "learning_rate": 0.0001570986667982053, + "loss": 2.1823, + "step": 260535 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.00015709714941632122, + "loss": 2.175, + "step": 260540 + }, + { + "epoch": 0.61, + "grad_norm": 2.53125, + "learning_rate": 0.00015709563201493182, + "loss": 2.1282, + "step": 260545 + }, + { + "epoch": 0.61, + "grad_norm": 1.8671875, + "learning_rate": 0.00015709411459403748, + "loss": 2.0146, + "step": 260550 + }, + { + "epoch": 0.61, + "grad_norm": 2.46875, + "learning_rate": 0.00015709259715363875, + "loss": 1.9134, + "step": 260555 + }, + { + "epoch": 0.61, + "grad_norm": 2.953125, + "learning_rate": 0.00015709107969373617, + "loss": 2.1692, + "step": 260560 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.0001570895622143302, + "loss": 1.9857, + "step": 260565 + }, + { + "epoch": 0.61, + "grad_norm": 2.453125, + "learning_rate": 0.0001570880447154214, + "loss": 2.0148, + "step": 260570 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015708652719701032, + "loss": 1.9626, + "step": 260575 + }, + { + "epoch": 0.61, + "grad_norm": 2.40625, + "learning_rate": 0.00015708500965909745, + "loss": 2.1339, + "step": 260580 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015708349210168332, + "loss": 2.025, + "step": 260585 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015708197452476841, + "loss": 1.9732, + "step": 260590 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015708045692835326, + "loss": 2.1032, + "step": 260595 + }, + { + "epoch": 0.61, + "grad_norm": 2.375, + "learning_rate": 0.00015707893931243838, + "loss": 2.1118, + "step": 260600 + }, + { + "epoch": 0.61, + "grad_norm": 1.8671875, + "learning_rate": 0.00015707742167702434, + "loss": 2.2663, + "step": 260605 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015707590402211157, + "loss": 2.0754, + "step": 260610 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015707438634770065, + "loss": 2.1687, + "step": 260615 + }, + { + "epoch": 0.61, + "grad_norm": 2.0, + "learning_rate": 0.00015707286865379206, + "loss": 2.0245, + "step": 260620 + }, + { + "epoch": 0.61, + "grad_norm": 2.75, + "learning_rate": 0.0001570713509403864, + "loss": 2.0373, + "step": 260625 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015706983320748412, + "loss": 1.8071, + "step": 260630 + }, + { + "epoch": 0.61, + "grad_norm": 1.8984375, + "learning_rate": 0.00015706831545508573, + "loss": 1.988, + "step": 260635 + }, + { + "epoch": 0.61, + "grad_norm": 1.7734375, + "learning_rate": 0.0001570667976831918, + "loss": 2.0387, + "step": 260640 + }, + { + "epoch": 0.61, + "grad_norm": 1.9140625, + "learning_rate": 0.00015706527989180278, + "loss": 2.0949, + "step": 260645 + }, + { + "epoch": 0.61, + "grad_norm": 2.5625, + "learning_rate": 0.00015706376208091921, + "loss": 2.0239, + "step": 260650 + }, + { + "epoch": 0.61, + "grad_norm": 2.375, + "learning_rate": 0.00015706224425054166, + "loss": 2.077, + "step": 260655 + }, + { + "epoch": 0.61, + "grad_norm": 1.6796875, + "learning_rate": 0.00015706072640067062, + "loss": 2.0587, + "step": 260660 + }, + { + "epoch": 0.61, + "grad_norm": 1.7421875, + "learning_rate": 0.00015705920853130656, + "loss": 1.9752, + "step": 260665 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.00015705769064245007, + "loss": 2.006, + "step": 260670 + }, + { + "epoch": 0.61, + "grad_norm": 1.734375, + "learning_rate": 0.00015705617273410163, + "loss": 2.2758, + "step": 260675 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.0001570546548062618, + "loss": 1.8232, + "step": 260680 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015705313685893102, + "loss": 1.9555, + "step": 260685 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.00015705161889210985, + "loss": 2.1569, + "step": 260690 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015705010090579885, + "loss": 2.0756, + "step": 260695 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.00015704858289999844, + "loss": 2.008, + "step": 260700 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015704706487470926, + "loss": 2.1922, + "step": 260705 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015704554682993176, + "loss": 2.045, + "step": 260710 + }, + { + "epoch": 0.61, + "grad_norm": 2.515625, + "learning_rate": 0.00015704402876566645, + "loss": 2.0604, + "step": 260715 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015704251068191386, + "loss": 2.0764, + "step": 260720 + }, + { + "epoch": 0.61, + "grad_norm": 2.671875, + "learning_rate": 0.00015704099257867453, + "loss": 2.0595, + "step": 260725 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015703947445594896, + "loss": 1.995, + "step": 260730 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015703795631373765, + "loss": 2.0802, + "step": 260735 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.00015703643815204117, + "loss": 2.0124, + "step": 260740 + }, + { + "epoch": 0.61, + "grad_norm": 2.6875, + "learning_rate": 0.00015703491997086, + "loss": 1.9294, + "step": 260745 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015703340177019467, + "loss": 2.096, + "step": 260750 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015703188355004565, + "loss": 2.1893, + "step": 260755 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015703036531041357, + "loss": 2.1209, + "step": 260760 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015702884705129884, + "loss": 1.8559, + "step": 260765 + }, + { + "epoch": 0.61, + "grad_norm": 2.671875, + "learning_rate": 0.00015702732877270203, + "loss": 1.7734, + "step": 260770 + }, + { + "epoch": 0.61, + "grad_norm": 2.0, + "learning_rate": 0.00015702581047462365, + "loss": 1.9918, + "step": 260775 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.00015702429215706424, + "loss": 2.2052, + "step": 260780 + }, + { + "epoch": 0.61, + "grad_norm": 3.359375, + "learning_rate": 0.0001570227738200243, + "loss": 2.0951, + "step": 260785 + }, + { + "epoch": 0.61, + "grad_norm": 1.921875, + "learning_rate": 0.0001570212554635043, + "loss": 2.2135, + "step": 260790 + }, + { + "epoch": 0.61, + "grad_norm": 1.875, + "learning_rate": 0.00015701973708750482, + "loss": 2.0751, + "step": 260795 + }, + { + "epoch": 0.61, + "grad_norm": 2.40625, + "learning_rate": 0.0001570182186920264, + "loss": 2.0525, + "step": 260800 + }, + { + "epoch": 0.61, + "grad_norm": 2.546875, + "learning_rate": 0.0001570167002770695, + "loss": 2.2845, + "step": 260805 + }, + { + "epoch": 0.61, + "grad_norm": 1.9921875, + "learning_rate": 0.00015701518184263466, + "loss": 2.0449, + "step": 260810 + }, + { + "epoch": 0.61, + "grad_norm": 1.8125, + "learning_rate": 0.00015701366338872238, + "loss": 1.9974, + "step": 260815 + }, + { + "epoch": 0.61, + "grad_norm": 1.6875, + "learning_rate": 0.00015701214491533325, + "loss": 2.0989, + "step": 260820 + }, + { + "epoch": 0.61, + "grad_norm": 2.546875, + "learning_rate": 0.0001570106264224677, + "loss": 1.9022, + "step": 260825 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.0001570091079101263, + "loss": 2.1828, + "step": 260830 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015700758937830953, + "loss": 2.0107, + "step": 260835 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015700607082701798, + "loss": 2.0165, + "step": 260840 + }, + { + "epoch": 0.61, + "grad_norm": 2.1875, + "learning_rate": 0.00015700455225625207, + "loss": 1.9834, + "step": 260845 + }, + { + "epoch": 0.61, + "grad_norm": 2.515625, + "learning_rate": 0.0001570030336660124, + "loss": 2.0578, + "step": 260850 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.0001570015150562995, + "loss": 2.1629, + "step": 260855 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015699999642711373, + "loss": 2.1889, + "step": 260860 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015699847777845584, + "loss": 1.9207, + "step": 260865 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.0001569969591103262, + "loss": 2.0961, + "step": 260870 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.00015699544042272536, + "loss": 1.8406, + "step": 260875 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015699392171565383, + "loss": 2.0867, + "step": 260880 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015699240298911216, + "loss": 2.1545, + "step": 260885 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015699088424310083, + "loss": 2.1786, + "step": 260890 + }, + { + "epoch": 0.61, + "grad_norm": 1.9765625, + "learning_rate": 0.00015698936547762045, + "loss": 2.0816, + "step": 260895 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015698784669267138, + "loss": 2.0495, + "step": 260900 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.0001569863278882543, + "loss": 2.1754, + "step": 260905 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015698480906436963, + "loss": 1.9026, + "step": 260910 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.0001569832902210179, + "loss": 2.1509, + "step": 260915 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015698177135819964, + "loss": 2.0733, + "step": 260920 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015698025247591542, + "loss": 2.109, + "step": 260925 + }, + { + "epoch": 0.61, + "grad_norm": 3.0, + "learning_rate": 0.00015697873357416566, + "loss": 1.9573, + "step": 260930 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015697721465295097, + "loss": 1.993, + "step": 260935 + }, + { + "epoch": 0.61, + "grad_norm": 2.609375, + "learning_rate": 0.0001569756957122718, + "loss": 2.1943, + "step": 260940 + }, + { + "epoch": 0.61, + "grad_norm": 2.40625, + "learning_rate": 0.0001569741767521287, + "loss": 1.9053, + "step": 260945 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.0001569726577725222, + "loss": 2.2602, + "step": 260950 + }, + { + "epoch": 0.61, + "grad_norm": 1.875, + "learning_rate": 0.00015697113877345282, + "loss": 2.0673, + "step": 260955 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.00015696961975492104, + "loss": 2.0312, + "step": 260960 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.00015696810071692743, + "loss": 2.1169, + "step": 260965 + }, + { + "epoch": 0.61, + "grad_norm": 2.609375, + "learning_rate": 0.00015696658165947243, + "loss": 2.1634, + "step": 260970 + }, + { + "epoch": 0.61, + "grad_norm": 2.203125, + "learning_rate": 0.0001569650625825567, + "loss": 2.0364, + "step": 260975 + }, + { + "epoch": 0.61, + "grad_norm": 1.9140625, + "learning_rate": 0.0001569635434861806, + "loss": 1.9395, + "step": 260980 + }, + { + "epoch": 0.61, + "grad_norm": 2.375, + "learning_rate": 0.00015696202437034476, + "loss": 1.9801, + "step": 260985 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015696050523504967, + "loss": 2.0036, + "step": 260990 + }, + { + "epoch": 0.61, + "grad_norm": 2.140625, + "learning_rate": 0.00015695898608029577, + "loss": 2.1055, + "step": 260995 + }, + { + "epoch": 0.61, + "grad_norm": 2.328125, + "learning_rate": 0.0001569574669060837, + "loss": 2.0412, + "step": 261000 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.00015695594771241393, + "loss": 2.0912, + "step": 261005 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015695442849928694, + "loss": 2.0946, + "step": 261010 + }, + { + "epoch": 0.61, + "grad_norm": 2.375, + "learning_rate": 0.00015695290926670335, + "loss": 1.9201, + "step": 261015 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015695139001466353, + "loss": 2.197, + "step": 261020 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.0001569498707431681, + "loss": 2.0621, + "step": 261025 + }, + { + "epoch": 0.61, + "grad_norm": 2.5, + "learning_rate": 0.0001569483514522176, + "loss": 2.047, + "step": 261030 + }, + { + "epoch": 0.61, + "grad_norm": 2.4375, + "learning_rate": 0.00015694683214181253, + "loss": 1.9658, + "step": 261035 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015694531281195334, + "loss": 2.1841, + "step": 261040 + }, + { + "epoch": 0.61, + "grad_norm": 1.890625, + "learning_rate": 0.0001569437934626406, + "loss": 2.0129, + "step": 261045 + }, + { + "epoch": 0.61, + "grad_norm": 2.296875, + "learning_rate": 0.00015694227409387482, + "loss": 2.1401, + "step": 261050 + }, + { + "epoch": 0.61, + "grad_norm": 2.59375, + "learning_rate": 0.00015694075470565656, + "loss": 2.0521, + "step": 261055 + }, + { + "epoch": 0.61, + "grad_norm": 1.859375, + "learning_rate": 0.0001569392352979863, + "loss": 1.9213, + "step": 261060 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015693771587086457, + "loss": 2.0445, + "step": 261065 + }, + { + "epoch": 0.61, + "grad_norm": 1.9609375, + "learning_rate": 0.00015693619642429186, + "loss": 2.1727, + "step": 261070 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.0001569346769582687, + "loss": 1.9824, + "step": 261075 + }, + { + "epoch": 0.61, + "grad_norm": 2.25, + "learning_rate": 0.00015693315747279565, + "loss": 1.9692, + "step": 261080 + }, + { + "epoch": 0.61, + "grad_norm": 2.890625, + "learning_rate": 0.0001569316379678732, + "loss": 2.0745, + "step": 261085 + }, + { + "epoch": 0.61, + "grad_norm": 2.265625, + "learning_rate": 0.00015693011844350186, + "loss": 2.1197, + "step": 261090 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.00015692859889968218, + "loss": 2.1342, + "step": 261095 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015692707933641465, + "loss": 2.0432, + "step": 261100 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.00015692555975369975, + "loss": 2.026, + "step": 261105 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.0001569240401515381, + "loss": 2.3132, + "step": 261110 + }, + { + "epoch": 0.61, + "grad_norm": 2.046875, + "learning_rate": 0.00015692252052993015, + "loss": 1.9524, + "step": 261115 + }, + { + "epoch": 0.61, + "grad_norm": 2.015625, + "learning_rate": 0.00015692100088887645, + "loss": 2.0472, + "step": 261120 + }, + { + "epoch": 0.61, + "grad_norm": 1.796875, + "learning_rate": 0.00015691948122837748, + "loss": 2.0273, + "step": 261125 + }, + { + "epoch": 0.61, + "grad_norm": 1.859375, + "learning_rate": 0.0001569179615484338, + "loss": 2.0196, + "step": 261130 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.0001569164418490459, + "loss": 1.9551, + "step": 261135 + }, + { + "epoch": 0.61, + "grad_norm": 1.640625, + "learning_rate": 0.0001569149221302143, + "loss": 1.8719, + "step": 261140 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015691340239193955, + "loss": 2.1025, + "step": 261145 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.00015691188263422214, + "loss": 1.9106, + "step": 261150 + }, + { + "epoch": 0.61, + "grad_norm": 1.96875, + "learning_rate": 0.0001569103628570626, + "loss": 2.0865, + "step": 261155 + }, + { + "epoch": 0.61, + "grad_norm": 2.625, + "learning_rate": 0.00015690884306046146, + "loss": 1.9914, + "step": 261160 + }, + { + "epoch": 0.61, + "grad_norm": 2.0625, + "learning_rate": 0.00015690732324441924, + "loss": 2.0919, + "step": 261165 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015690580340893643, + "loss": 2.0318, + "step": 261170 + }, + { + "epoch": 0.61, + "grad_norm": 1.6796875, + "learning_rate": 0.00015690428355401355, + "loss": 1.8012, + "step": 261175 + }, + { + "epoch": 0.61, + "grad_norm": 2.609375, + "learning_rate": 0.00015690276367965114, + "loss": 2.056, + "step": 261180 + }, + { + "epoch": 0.61, + "grad_norm": 1.7890625, + "learning_rate": 0.0001569012437858497, + "loss": 1.8003, + "step": 261185 + }, + { + "epoch": 0.61, + "grad_norm": 1.796875, + "learning_rate": 0.0001568997238726098, + "loss": 1.9647, + "step": 261190 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.0001568982039399319, + "loss": 2.0809, + "step": 261195 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015689668398781654, + "loss": 2.088, + "step": 261200 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015689516401626427, + "loss": 2.0217, + "step": 261205 + }, + { + "epoch": 0.61, + "grad_norm": 2.078125, + "learning_rate": 0.00015689364402527554, + "loss": 1.8842, + "step": 261210 + }, + { + "epoch": 0.61, + "grad_norm": 2.09375, + "learning_rate": 0.0001568921240148509, + "loss": 2.0353, + "step": 261215 + }, + { + "epoch": 0.61, + "grad_norm": 2.234375, + "learning_rate": 0.00015689060398499092, + "loss": 2.0593, + "step": 261220 + }, + { + "epoch": 0.61, + "grad_norm": 2.40625, + "learning_rate": 0.00015688908393569608, + "loss": 2.089, + "step": 261225 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.0001568875638669669, + "loss": 1.9886, + "step": 261230 + }, + { + "epoch": 0.61, + "grad_norm": 2.15625, + "learning_rate": 0.00015688604377880387, + "loss": 1.9353, + "step": 261235 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.00015688452367120754, + "loss": 2.0387, + "step": 261240 + }, + { + "epoch": 0.61, + "grad_norm": 2.171875, + "learning_rate": 0.00015688300354417843, + "loss": 2.1723, + "step": 261245 + }, + { + "epoch": 0.61, + "grad_norm": 2.5625, + "learning_rate": 0.00015688148339771706, + "loss": 2.0268, + "step": 261250 + }, + { + "epoch": 0.61, + "grad_norm": 2.28125, + "learning_rate": 0.00015687996323182393, + "loss": 2.004, + "step": 261255 + }, + { + "epoch": 0.61, + "grad_norm": 2.109375, + "learning_rate": 0.00015687844304649958, + "loss": 1.9027, + "step": 261260 + }, + { + "epoch": 0.61, + "grad_norm": 2.3125, + "learning_rate": 0.0001568769228417445, + "loss": 1.8646, + "step": 261265 + }, + { + "epoch": 0.61, + "grad_norm": 1.90625, + "learning_rate": 0.00015687540261755925, + "loss": 1.9517, + "step": 261270 + }, + { + "epoch": 0.61, + "grad_norm": 2.609375, + "learning_rate": 0.00015687388237394436, + "loss": 2.2397, + "step": 261275 + }, + { + "epoch": 0.61, + "grad_norm": 2.125, + "learning_rate": 0.00015687236211090028, + "loss": 1.8839, + "step": 261280 + }, + { + "epoch": 0.61, + "grad_norm": 2.421875, + "learning_rate": 0.00015687084182842759, + "loss": 2.0445, + "step": 261285 + }, + { + "epoch": 0.61, + "grad_norm": 2.34375, + "learning_rate": 0.00015686932152652675, + "loss": 2.0548, + "step": 261290 + }, + { + "epoch": 0.61, + "grad_norm": 3.0, + "learning_rate": 0.00015686780120519837, + "loss": 1.9655, + "step": 261295 + }, + { + "epoch": 0.61, + "grad_norm": 2.390625, + "learning_rate": 0.0001568662808644429, + "loss": 2.2051, + "step": 261300 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.00015686476050426088, + "loss": 1.9344, + "step": 261305 + }, + { + "epoch": 0.61, + "grad_norm": 2.21875, + "learning_rate": 0.00015686324012465283, + "loss": 1.9717, + "step": 261310 + }, + { + "epoch": 0.61, + "grad_norm": 1.921875, + "learning_rate": 0.00015686171972561925, + "loss": 2.0243, + "step": 261315 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015686019930716069, + "loss": 2.0565, + "step": 261320 + }, + { + "epoch": 0.61, + "grad_norm": 2.46875, + "learning_rate": 0.00015685867886927764, + "loss": 1.9238, + "step": 261325 + }, + { + "epoch": 0.61, + "grad_norm": 2.03125, + "learning_rate": 0.00015685715841197063, + "loss": 1.9821, + "step": 261330 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.0001568556379352402, + "loss": 2.1154, + "step": 261335 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015685411743908686, + "loss": 2.0464, + "step": 261340 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.0001568525969235111, + "loss": 2.1393, + "step": 261345 + }, + { + "epoch": 0.62, + "grad_norm": 1.9609375, + "learning_rate": 0.0001568510763885135, + "loss": 2.0376, + "step": 261350 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015684955583409452, + "loss": 2.177, + "step": 261355 + }, + { + "epoch": 0.62, + "grad_norm": 2.390625, + "learning_rate": 0.0001568480352602547, + "loss": 2.1179, + "step": 261360 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015684651466699452, + "loss": 2.2344, + "step": 261365 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.0001568449940543146, + "loss": 2.0786, + "step": 261370 + }, + { + "epoch": 0.62, + "grad_norm": 1.9375, + "learning_rate": 0.00015684347342221537, + "loss": 2.0835, + "step": 261375 + }, + { + "epoch": 0.62, + "grad_norm": 1.984375, + "learning_rate": 0.00015684195277069736, + "loss": 2.2159, + "step": 261380 + }, + { + "epoch": 0.62, + "grad_norm": 2.625, + "learning_rate": 0.00015684043209976116, + "loss": 2.0441, + "step": 261385 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.0001568389114094072, + "loss": 2.1513, + "step": 261390 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015683739069963603, + "loss": 2.2152, + "step": 261395 + }, + { + "epoch": 0.62, + "grad_norm": 1.8984375, + "learning_rate": 0.0001568358699704482, + "loss": 2.0972, + "step": 261400 + }, + { + "epoch": 0.62, + "grad_norm": 1.9453125, + "learning_rate": 0.00015683434922184421, + "loss": 1.9079, + "step": 261405 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015683282845382456, + "loss": 2.086, + "step": 261410 + }, + { + "epoch": 0.62, + "grad_norm": 2.75, + "learning_rate": 0.00015683130766638978, + "loss": 2.2592, + "step": 261415 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.0001568297868595404, + "loss": 2.0255, + "step": 261420 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015682826603327695, + "loss": 2.1184, + "step": 261425 + }, + { + "epoch": 0.62, + "grad_norm": 1.84375, + "learning_rate": 0.0001568267451875999, + "loss": 2.0827, + "step": 261430 + }, + { + "epoch": 0.62, + "grad_norm": 2.5, + "learning_rate": 0.00015682522432250984, + "loss": 2.3114, + "step": 261435 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015682370343800725, + "loss": 2.2212, + "step": 261440 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.00015682218253409262, + "loss": 2.0748, + "step": 261445 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.0001568206616107665, + "loss": 2.0165, + "step": 261450 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015681914066802946, + "loss": 2.0244, + "step": 261455 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015681761970588195, + "loss": 2.0682, + "step": 261460 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.0001568160987243245, + "loss": 2.0374, + "step": 261465 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.00015681457772335767, + "loss": 2.0682, + "step": 261470 + }, + { + "epoch": 0.62, + "grad_norm": 2.640625, + "learning_rate": 0.0001568130567029819, + "loss": 2.2666, + "step": 261475 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015681153566319777, + "loss": 2.0143, + "step": 261480 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.00015681001460400581, + "loss": 2.1061, + "step": 261485 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015680849352540653, + "loss": 2.1335, + "step": 261490 + }, + { + "epoch": 0.62, + "grad_norm": 1.921875, + "learning_rate": 0.0001568069724274004, + "loss": 2.0194, + "step": 261495 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.000156805451309988, + "loss": 2.2749, + "step": 261500 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.0001568039301731698, + "loss": 2.1034, + "step": 261505 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015680240901694642, + "loss": 2.1085, + "step": 261510 + }, + { + "epoch": 0.62, + "grad_norm": 1.8203125, + "learning_rate": 0.00015680088784131825, + "loss": 2.1734, + "step": 261515 + }, + { + "epoch": 0.62, + "grad_norm": 2.46875, + "learning_rate": 0.00015679936664628587, + "loss": 2.1874, + "step": 261520 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.00015679784543184982, + "loss": 2.0225, + "step": 261525 + }, + { + "epoch": 0.62, + "grad_norm": 2.71875, + "learning_rate": 0.00015679632419801057, + "loss": 2.2112, + "step": 261530 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015679480294476867, + "loss": 2.1078, + "step": 261535 + }, + { + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 0.00015679328167212463, + "loss": 2.0656, + "step": 261540 + }, + { + "epoch": 0.62, + "grad_norm": 2.46875, + "learning_rate": 0.00015679176038007898, + "loss": 2.0379, + "step": 261545 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015679023906863226, + "loss": 2.2091, + "step": 261550 + }, + { + "epoch": 0.62, + "grad_norm": 2.703125, + "learning_rate": 0.0001567887177377849, + "loss": 2.0398, + "step": 261555 + }, + { + "epoch": 0.62, + "grad_norm": 2.390625, + "learning_rate": 0.00015678719638753755, + "loss": 1.8276, + "step": 261560 + }, + { + "epoch": 0.62, + "grad_norm": 2.453125, + "learning_rate": 0.00015678567501789065, + "loss": 1.9303, + "step": 261565 + }, + { + "epoch": 0.62, + "grad_norm": 1.8671875, + "learning_rate": 0.00015678415362884473, + "loss": 2.1204, + "step": 261570 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015678263222040033, + "loss": 1.936, + "step": 261575 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.0001567811107925579, + "loss": 2.1672, + "step": 261580 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015677958934531806, + "loss": 2.1268, + "step": 261585 + }, + { + "epoch": 0.62, + "grad_norm": 1.8359375, + "learning_rate": 0.00015677806787868127, + "loss": 2.0855, + "step": 261590 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.00015677654639264805, + "loss": 2.0748, + "step": 261595 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015677502488721894, + "loss": 1.9503, + "step": 261600 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015677350336239443, + "loss": 1.9675, + "step": 261605 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015677198181817512, + "loss": 2.0109, + "step": 261610 + }, + { + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 0.00015677046025456144, + "loss": 2.1333, + "step": 261615 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015676893867155392, + "loss": 1.9923, + "step": 261620 + }, + { + "epoch": 0.62, + "grad_norm": 2.6875, + "learning_rate": 0.00015676741706915312, + "loss": 2.0955, + "step": 261625 + }, + { + "epoch": 0.62, + "grad_norm": 2.390625, + "learning_rate": 0.00015676589544735955, + "loss": 2.004, + "step": 261630 + }, + { + "epoch": 0.62, + "grad_norm": 2.59375, + "learning_rate": 0.0001567643738061737, + "loss": 2.1532, + "step": 261635 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015676285214559613, + "loss": 2.0752, + "step": 261640 + }, + { + "epoch": 0.62, + "grad_norm": 2.421875, + "learning_rate": 0.00015676133046562733, + "loss": 2.1776, + "step": 261645 + }, + { + "epoch": 0.62, + "grad_norm": 1.859375, + "learning_rate": 0.00015675980876626785, + "loss": 2.0506, + "step": 261650 + }, + { + "epoch": 0.62, + "grad_norm": 1.8046875, + "learning_rate": 0.00015675828704751817, + "loss": 2.1382, + "step": 261655 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.0001567567653093788, + "loss": 2.1097, + "step": 261660 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.0001567552435518503, + "loss": 2.232, + "step": 261665 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.0001567537217749332, + "loss": 2.1511, + "step": 261670 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015675219997862802, + "loss": 2.0282, + "step": 261675 + }, + { + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 0.00015675067816293525, + "loss": 1.908, + "step": 261680 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015674915632785542, + "loss": 1.9922, + "step": 261685 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.000156747634473389, + "loss": 1.9259, + "step": 261690 + }, + { + "epoch": 0.62, + "grad_norm": 1.9375, + "learning_rate": 0.0001567461125995366, + "loss": 2.2491, + "step": 261695 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.0001567445907062987, + "loss": 2.0582, + "step": 261700 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015674306879367582, + "loss": 2.2127, + "step": 261705 + }, + { + "epoch": 0.62, + "grad_norm": 1.9453125, + "learning_rate": 0.00015674154686166845, + "loss": 2.0246, + "step": 261710 + }, + { + "epoch": 0.62, + "grad_norm": 1.7421875, + "learning_rate": 0.00015674002491027718, + "loss": 2.0739, + "step": 261715 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015673850293950245, + "loss": 2.1409, + "step": 261720 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015673698094934485, + "loss": 1.9664, + "step": 261725 + }, + { + "epoch": 0.62, + "grad_norm": 2.515625, + "learning_rate": 0.00015673545893980486, + "loss": 2.1006, + "step": 261730 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.000156733936910883, + "loss": 2.0409, + "step": 261735 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.0001567324148625798, + "loss": 2.1069, + "step": 261740 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015673089279489576, + "loss": 2.0769, + "step": 261745 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015672937070783144, + "loss": 2.1925, + "step": 261750 + }, + { + "epoch": 0.62, + "grad_norm": 2.5625, + "learning_rate": 0.00015672784860138735, + "loss": 2.0417, + "step": 261755 + }, + { + "epoch": 0.62, + "grad_norm": 1.921875, + "learning_rate": 0.00015672632647556397, + "loss": 2.1555, + "step": 261760 + }, + { + "epoch": 0.62, + "grad_norm": 1.71875, + "learning_rate": 0.00015672480433036188, + "loss": 1.938, + "step": 261765 + }, + { + "epoch": 0.62, + "grad_norm": 1.9609375, + "learning_rate": 0.00015672328216578154, + "loss": 1.9077, + "step": 261770 + }, + { + "epoch": 0.62, + "grad_norm": 2.390625, + "learning_rate": 0.0001567217599818235, + "loss": 2.011, + "step": 261775 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.0001567202377784883, + "loss": 2.1721, + "step": 261780 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015671871555577641, + "loss": 2.1755, + "step": 261785 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.0001567171933136884, + "loss": 2.0018, + "step": 261790 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015671567105222478, + "loss": 2.0882, + "step": 261795 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015671414877138602, + "loss": 2.0487, + "step": 261800 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.0001567126264711727, + "loss": 2.2296, + "step": 261805 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015671110415158533, + "loss": 2.0452, + "step": 261810 + }, + { + "epoch": 0.62, + "grad_norm": 1.984375, + "learning_rate": 0.00015670958181262438, + "loss": 2.1406, + "step": 261815 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015670805945429045, + "loss": 2.1292, + "step": 261820 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015670653707658397, + "loss": 2.1341, + "step": 261825 + }, + { + "epoch": 0.62, + "grad_norm": 2.40625, + "learning_rate": 0.00015670501467950556, + "loss": 1.9824, + "step": 261830 + }, + { + "epoch": 0.62, + "grad_norm": 2.546875, + "learning_rate": 0.00015670349226305566, + "loss": 2.2863, + "step": 261835 + }, + { + "epoch": 0.62, + "grad_norm": 2.453125, + "learning_rate": 0.00015670196982723485, + "loss": 2.0002, + "step": 261840 + }, + { + "epoch": 0.62, + "grad_norm": 2.53125, + "learning_rate": 0.00015670044737204356, + "loss": 2.2802, + "step": 261845 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.00015669892489748242, + "loss": 2.2398, + "step": 261850 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015669740240355185, + "loss": 2.1463, + "step": 261855 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015669587989025247, + "loss": 1.9052, + "step": 261860 + }, + { + "epoch": 0.62, + "grad_norm": 1.921875, + "learning_rate": 0.00015669435735758474, + "loss": 2.0459, + "step": 261865 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.0001566928348055492, + "loss": 2.156, + "step": 261870 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015669131223414632, + "loss": 2.1327, + "step": 261875 + }, + { + "epoch": 0.62, + "grad_norm": 2.53125, + "learning_rate": 0.00015668978964337668, + "loss": 1.9966, + "step": 261880 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.0001566882670332408, + "loss": 2.075, + "step": 261885 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.00015668674440373913, + "loss": 2.0735, + "step": 261890 + }, + { + "epoch": 0.62, + "grad_norm": 1.828125, + "learning_rate": 0.00015668522175487227, + "loss": 1.9194, + "step": 261895 + }, + { + "epoch": 0.62, + "grad_norm": 1.921875, + "learning_rate": 0.0001566836990866407, + "loss": 1.9809, + "step": 261900 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015668217639904498, + "loss": 2.2345, + "step": 261905 + }, + { + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 0.00015668065369208557, + "loss": 2.0647, + "step": 261910 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015667913096576304, + "loss": 2.1915, + "step": 261915 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015667760822007788, + "loss": 1.8515, + "step": 261920 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015667608545503063, + "loss": 1.9588, + "step": 261925 + }, + { + "epoch": 0.62, + "grad_norm": 1.8125, + "learning_rate": 0.00015667456267062178, + "loss": 2.0598, + "step": 261930 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.0001566730398668519, + "loss": 2.3005, + "step": 261935 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015667151704372144, + "loss": 2.0383, + "step": 261940 + }, + { + "epoch": 0.62, + "grad_norm": 1.953125, + "learning_rate": 0.000156669994201231, + "loss": 2.0073, + "step": 261945 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015666847133938107, + "loss": 2.1042, + "step": 261950 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015666694845817216, + "loss": 2.2161, + "step": 261955 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015666542555760473, + "loss": 1.9975, + "step": 261960 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015666390263767942, + "loss": 2.1988, + "step": 261965 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.00015666237969839668, + "loss": 2.0652, + "step": 261970 + }, + { + "epoch": 0.62, + "grad_norm": 2.390625, + "learning_rate": 0.00015666085673975708, + "loss": 2.0624, + "step": 261975 + }, + { + "epoch": 0.62, + "grad_norm": 2.734375, + "learning_rate": 0.00015665933376176106, + "loss": 2.1151, + "step": 261980 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.00015665781076440918, + "loss": 2.1829, + "step": 261985 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015665628774770197, + "loss": 2.2207, + "step": 261990 + }, + { + "epoch": 0.62, + "grad_norm": 2.4375, + "learning_rate": 0.00015665476471163995, + "loss": 1.9882, + "step": 261995 + }, + { + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 0.00015665324165622366, + "loss": 2.2202, + "step": 262000 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015665171858145357, + "loss": 2.1265, + "step": 262005 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015665019548733025, + "loss": 2.0484, + "step": 262010 + }, + { + "epoch": 0.62, + "grad_norm": 1.9609375, + "learning_rate": 0.00015664867237385414, + "loss": 2.0088, + "step": 262015 + }, + { + "epoch": 0.62, + "grad_norm": 1.7109375, + "learning_rate": 0.00015664714924102586, + "loss": 2.1027, + "step": 262020 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.00015664562608884588, + "loss": 2.149, + "step": 262025 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015664410291731473, + "loss": 2.1007, + "step": 262030 + }, + { + "epoch": 0.62, + "grad_norm": 1.953125, + "learning_rate": 0.00015664257972643292, + "loss": 1.9672, + "step": 262035 + }, + { + "epoch": 0.62, + "grad_norm": 1.921875, + "learning_rate": 0.00015664105651620096, + "loss": 2.1489, + "step": 262040 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015663953328661943, + "loss": 2.0877, + "step": 262045 + }, + { + "epoch": 0.62, + "grad_norm": 1.8359375, + "learning_rate": 0.00015663801003768877, + "loss": 2.0176, + "step": 262050 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.00015663648676940957, + "loss": 2.1486, + "step": 262055 + }, + { + "epoch": 0.62, + "grad_norm": 1.9609375, + "learning_rate": 0.00015663496348178232, + "loss": 2.0543, + "step": 262060 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015663344017480748, + "loss": 2.0489, + "step": 262065 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015663191684848568, + "loss": 1.7001, + "step": 262070 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015663039350281735, + "loss": 2.2213, + "step": 262075 + }, + { + "epoch": 0.62, + "grad_norm": 2.625, + "learning_rate": 0.0001566288701378031, + "loss": 2.2749, + "step": 262080 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.0001566273467534434, + "loss": 2.1288, + "step": 262085 + }, + { + "epoch": 0.62, + "grad_norm": 2.40625, + "learning_rate": 0.00015662582334973876, + "loss": 2.2776, + "step": 262090 + }, + { + "epoch": 0.62, + "grad_norm": 2.296875, + "learning_rate": 0.00015662429992668967, + "loss": 2.2276, + "step": 262095 + }, + { + "epoch": 0.62, + "grad_norm": 2.453125, + "learning_rate": 0.00015662277648429676, + "loss": 2.014, + "step": 262100 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.00015662125302256047, + "loss": 1.8729, + "step": 262105 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.0001566197295414813, + "loss": 1.9627, + "step": 262110 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.0001566182060410598, + "loss": 2.255, + "step": 262115 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.0001566166825212965, + "loss": 2.2049, + "step": 262120 + }, + { + "epoch": 0.62, + "grad_norm": 1.9375, + "learning_rate": 0.00015661515898219194, + "loss": 2.098, + "step": 262125 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015661363542374658, + "loss": 1.8555, + "step": 262130 + }, + { + "epoch": 0.62, + "grad_norm": 2.4375, + "learning_rate": 0.00015661211184596102, + "loss": 1.902, + "step": 262135 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015661058824883572, + "loss": 2.0935, + "step": 262140 + }, + { + "epoch": 0.62, + "grad_norm": 4.5625, + "learning_rate": 0.0001566090646323712, + "loss": 2.0608, + "step": 262145 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015660754099656803, + "loss": 2.075, + "step": 262150 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015660601734142666, + "loss": 2.0528, + "step": 262155 + }, + { + "epoch": 0.62, + "grad_norm": 2.4375, + "learning_rate": 0.00015660449366694765, + "loss": 2.1061, + "step": 262160 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015660296997313153, + "loss": 2.1627, + "step": 262165 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.0001566014462599788, + "loss": 2.024, + "step": 262170 + }, + { + "epoch": 0.62, + "grad_norm": 2.46875, + "learning_rate": 0.00015659992252749002, + "loss": 1.8643, + "step": 262175 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015659839877566565, + "loss": 1.9367, + "step": 262180 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015659687500450627, + "loss": 2.0495, + "step": 262185 + }, + { + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 0.00015659535121401231, + "loss": 2.121, + "step": 262190 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.0001565938274041844, + "loss": 2.1461, + "step": 262195 + }, + { + "epoch": 0.62, + "grad_norm": 1.859375, + "learning_rate": 0.00015659230357502302, + "loss": 2.098, + "step": 262200 + }, + { + "epoch": 0.62, + "grad_norm": 1.9296875, + "learning_rate": 0.00015659077972652864, + "loss": 1.9526, + "step": 262205 + }, + { + "epoch": 0.62, + "grad_norm": 1.984375, + "learning_rate": 0.00015658925585870188, + "loss": 1.9843, + "step": 262210 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.00015658773197154316, + "loss": 2.0365, + "step": 262215 + }, + { + "epoch": 0.62, + "grad_norm": 1.8125, + "learning_rate": 0.00015658620806505304, + "loss": 1.9699, + "step": 262220 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.00015658468413923206, + "loss": 1.9679, + "step": 262225 + }, + { + "epoch": 0.62, + "grad_norm": 1.953125, + "learning_rate": 0.00015658316019408075, + "loss": 2.1333, + "step": 262230 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.0001565816362295996, + "loss": 1.979, + "step": 262235 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.0001565801122457891, + "loss": 2.0601, + "step": 262240 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.0001565785882426498, + "loss": 2.0807, + "step": 262245 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.0001565770642201823, + "loss": 2.0328, + "step": 262250 + }, + { + "epoch": 0.62, + "grad_norm": 1.8671875, + "learning_rate": 0.000156575540178387, + "loss": 2.0739, + "step": 262255 + }, + { + "epoch": 0.62, + "grad_norm": 1.8828125, + "learning_rate": 0.00015657401611726446, + "loss": 2.0393, + "step": 262260 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015657249203681524, + "loss": 2.0085, + "step": 262265 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.0001565709679370398, + "loss": 2.0409, + "step": 262270 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.0001565694438179387, + "loss": 2.0733, + "step": 262275 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.00015656791967951246, + "loss": 2.0947, + "step": 262280 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.0001565663955217616, + "loss": 2.2144, + "step": 262285 + }, + { + "epoch": 0.62, + "grad_norm": 1.859375, + "learning_rate": 0.00015656487134468663, + "loss": 2.0453, + "step": 262290 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015656334714828806, + "loss": 1.99, + "step": 262295 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015656182293256646, + "loss": 2.2404, + "step": 262300 + }, + { + "epoch": 0.62, + "grad_norm": 3.0, + "learning_rate": 0.00015656029869752226, + "loss": 2.2104, + "step": 262305 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.00015655877444315606, + "loss": 2.0336, + "step": 262310 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.00015655725016946835, + "loss": 1.9955, + "step": 262315 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.0001565557258764597, + "loss": 2.2461, + "step": 262320 + }, + { + "epoch": 0.62, + "grad_norm": 1.828125, + "learning_rate": 0.00015655420156413053, + "loss": 1.9963, + "step": 262325 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015655267723248145, + "loss": 1.9368, + "step": 262330 + }, + { + "epoch": 0.62, + "grad_norm": 1.78125, + "learning_rate": 0.00015655115288151298, + "loss": 2.066, + "step": 262335 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015654962851122556, + "loss": 2.0169, + "step": 262340 + }, + { + "epoch": 0.62, + "grad_norm": 1.875, + "learning_rate": 0.0001565481041216198, + "loss": 2.1368, + "step": 262345 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.00015654657971269614, + "loss": 2.1964, + "step": 262350 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015654505528445517, + "loss": 2.1829, + "step": 262355 + }, + { + "epoch": 0.62, + "grad_norm": 1.7578125, + "learning_rate": 0.0001565435308368974, + "loss": 2.1272, + "step": 262360 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.0001565420063700233, + "loss": 2.1199, + "step": 262365 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015654048188383344, + "loss": 2.0607, + "step": 262370 + }, + { + "epoch": 0.62, + "grad_norm": 1.8984375, + "learning_rate": 0.00015653895737832835, + "loss": 2.0242, + "step": 262375 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015653743285350846, + "loss": 2.0487, + "step": 262380 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.0001565359083093744, + "loss": 2.2478, + "step": 262385 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.00015653438374592664, + "loss": 1.9035, + "step": 262390 + }, + { + "epoch": 0.62, + "grad_norm": 1.90625, + "learning_rate": 0.0001565328591631657, + "loss": 2.1207, + "step": 262395 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015653133456109214, + "loss": 1.8758, + "step": 262400 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015652980993970643, + "loss": 2.0504, + "step": 262405 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015652828529900912, + "loss": 2.2763, + "step": 262410 + }, + { + "epoch": 0.62, + "grad_norm": 1.5859375, + "learning_rate": 0.00015652676063900074, + "loss": 2.1485, + "step": 262415 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.00015652523595968178, + "loss": 2.1797, + "step": 262420 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015652371126105276, + "loss": 2.227, + "step": 262425 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.0001565221865431142, + "loss": 1.9153, + "step": 262430 + }, + { + "epoch": 0.62, + "grad_norm": 2.4375, + "learning_rate": 0.0001565206618058667, + "loss": 2.1192, + "step": 262435 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015651913704931066, + "loss": 2.1216, + "step": 262440 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015651761227344668, + "loss": 2.2116, + "step": 262445 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015651608747827524, + "loss": 2.0848, + "step": 262450 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.0001565145626637969, + "loss": 2.0151, + "step": 262455 + }, + { + "epoch": 0.62, + "grad_norm": 1.9765625, + "learning_rate": 0.00015651303783001215, + "loss": 2.0483, + "step": 262460 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.0001565115129769215, + "loss": 2.0827, + "step": 262465 + }, + { + "epoch": 0.62, + "grad_norm": 1.8828125, + "learning_rate": 0.00015650998810452554, + "loss": 2.1165, + "step": 262470 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.00015650846321282473, + "loss": 2.1453, + "step": 262475 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.00015650693830181957, + "loss": 1.9924, + "step": 262480 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015650541337151064, + "loss": 2.0318, + "step": 262485 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.00015650388842189842, + "loss": 2.1071, + "step": 262490 + }, + { + "epoch": 0.62, + "grad_norm": 2.71875, + "learning_rate": 0.0001565023634529835, + "loss": 2.0484, + "step": 262495 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.0001565008384647663, + "loss": 2.0491, + "step": 262500 + }, + { + "epoch": 0.62, + "grad_norm": 2.390625, + "learning_rate": 0.0001564993134572474, + "loss": 2.0172, + "step": 262505 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.00015649778843042727, + "loss": 2.2025, + "step": 262510 + }, + { + "epoch": 0.62, + "grad_norm": 1.9375, + "learning_rate": 0.00015649626338430653, + "loss": 2.0886, + "step": 262515 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.00015649473831888563, + "loss": 2.1686, + "step": 262520 + }, + { + "epoch": 0.62, + "grad_norm": 1.9921875, + "learning_rate": 0.0001564932132341651, + "loss": 1.9276, + "step": 262525 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015649168813014545, + "loss": 2.1036, + "step": 262530 + }, + { + "epoch": 0.62, + "grad_norm": 1.9765625, + "learning_rate": 0.0001564901630068272, + "loss": 2.1481, + "step": 262535 + }, + { + "epoch": 0.62, + "grad_norm": 2.40625, + "learning_rate": 0.00015648863786421092, + "loss": 2.0852, + "step": 262540 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.0001564871127022971, + "loss": 2.1005, + "step": 262545 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015648558752108624, + "loss": 2.0783, + "step": 262550 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.00015648406232057887, + "loss": 2.1676, + "step": 262555 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015648253710077555, + "loss": 2.0944, + "step": 262560 + }, + { + "epoch": 0.62, + "grad_norm": 2.296875, + "learning_rate": 0.00015648101186167672, + "loss": 2.0089, + "step": 262565 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.000156479486603283, + "loss": 2.0866, + "step": 262570 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015647796132559484, + "loss": 2.043, + "step": 262575 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.0001564764360286128, + "loss": 2.2419, + "step": 262580 + }, + { + "epoch": 0.62, + "grad_norm": 2.40625, + "learning_rate": 0.00015647491071233736, + "loss": 2.2013, + "step": 262585 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.0001564733853767691, + "loss": 2.0211, + "step": 262590 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015647186002190847, + "loss": 2.1013, + "step": 262595 + }, + { + "epoch": 0.62, + "grad_norm": 1.9296875, + "learning_rate": 0.00015647033464775606, + "loss": 2.2545, + "step": 262600 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015646880925431237, + "loss": 1.983, + "step": 262605 + }, + { + "epoch": 0.62, + "grad_norm": 2.65625, + "learning_rate": 0.0001564672838415779, + "loss": 2.1284, + "step": 262610 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015646575840955315, + "loss": 2.1482, + "step": 262615 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015646423295823868, + "loss": 1.9932, + "step": 262620 + }, + { + "epoch": 0.62, + "grad_norm": 2.390625, + "learning_rate": 0.00015646270748763503, + "loss": 1.9484, + "step": 262625 + }, + { + "epoch": 0.62, + "grad_norm": 2.421875, + "learning_rate": 0.0001564611819977427, + "loss": 1.8914, + "step": 262630 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015645965648856218, + "loss": 2.0211, + "step": 262635 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015645813096009406, + "loss": 2.0685, + "step": 262640 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015645660541233874, + "loss": 1.953, + "step": 262645 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.00015645507984529688, + "loss": 2.1128, + "step": 262650 + }, + { + "epoch": 0.62, + "grad_norm": 1.9296875, + "learning_rate": 0.0001564535542589689, + "loss": 2.2062, + "step": 262655 + }, + { + "epoch": 0.62, + "grad_norm": 1.984375, + "learning_rate": 0.00015645202865335542, + "loss": 2.1011, + "step": 262660 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015645050302845687, + "loss": 2.1675, + "step": 262665 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.0001564489773842738, + "loss": 2.1769, + "step": 262670 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015644745172080672, + "loss": 2.042, + "step": 262675 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.00015644592603805622, + "loss": 2.0749, + "step": 262680 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.0001564444003360227, + "loss": 1.9643, + "step": 262685 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.00015644287461470682, + "loss": 2.0939, + "step": 262690 + }, + { + "epoch": 0.62, + "grad_norm": 2.53125, + "learning_rate": 0.000156441348874109, + "loss": 2.1276, + "step": 262695 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015643982311422977, + "loss": 2.0794, + "step": 262700 + }, + { + "epoch": 0.62, + "grad_norm": 2.75, + "learning_rate": 0.00015643829733506966, + "loss": 2.224, + "step": 262705 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.00015643677153662927, + "loss": 1.8973, + "step": 262710 + }, + { + "epoch": 0.62, + "grad_norm": 2.390625, + "learning_rate": 0.00015643524571890902, + "loss": 2.0545, + "step": 262715 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015643371988190947, + "loss": 1.9702, + "step": 262720 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015643219402563112, + "loss": 1.9606, + "step": 262725 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.0001564306681500745, + "loss": 2.2177, + "step": 262730 + }, + { + "epoch": 0.62, + "grad_norm": 2.53125, + "learning_rate": 0.0001564291422552402, + "loss": 2.1373, + "step": 262735 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015642761634112863, + "loss": 2.1327, + "step": 262740 + }, + { + "epoch": 0.62, + "grad_norm": 2.578125, + "learning_rate": 0.00015642609040774036, + "loss": 2.1257, + "step": 262745 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.00015642456445507597, + "loss": 1.8373, + "step": 262750 + }, + { + "epoch": 0.62, + "grad_norm": 1.9140625, + "learning_rate": 0.00015642303848313586, + "loss": 2.0896, + "step": 262755 + }, + { + "epoch": 0.62, + "grad_norm": 1.9921875, + "learning_rate": 0.00015642151249192066, + "loss": 1.9643, + "step": 262760 + }, + { + "epoch": 0.62, + "grad_norm": 1.875, + "learning_rate": 0.0001564199864814308, + "loss": 2.1154, + "step": 262765 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.0001564184604516669, + "loss": 2.049, + "step": 262770 + }, + { + "epoch": 0.62, + "grad_norm": 1.9453125, + "learning_rate": 0.00015641693440262942, + "loss": 2.0689, + "step": 262775 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015641540833431886, + "loss": 1.9986, + "step": 262780 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.0001564138822467358, + "loss": 2.1484, + "step": 262785 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015641235613988075, + "loss": 2.2054, + "step": 262790 + }, + { + "epoch": 0.62, + "grad_norm": 1.859375, + "learning_rate": 0.0001564108300137542, + "loss": 1.8852, + "step": 262795 + }, + { + "epoch": 0.62, + "grad_norm": 2.609375, + "learning_rate": 0.00015640930386835668, + "loss": 2.2288, + "step": 262800 + }, + { + "epoch": 0.62, + "grad_norm": 1.984375, + "learning_rate": 0.0001564077777036887, + "loss": 2.0739, + "step": 262805 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015640625151975085, + "loss": 2.0607, + "step": 262810 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015640472531654358, + "loss": 2.1535, + "step": 262815 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015640319909406743, + "loss": 2.1379, + "step": 262820 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.0001564016728523229, + "loss": 2.0589, + "step": 262825 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015640014659131058, + "loss": 1.8876, + "step": 262830 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.0001563986203110309, + "loss": 2.1349, + "step": 262835 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.00015639709401148446, + "loss": 2.1312, + "step": 262840 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015639556769267174, + "loss": 2.1507, + "step": 262845 + }, + { + "epoch": 0.62, + "grad_norm": 2.453125, + "learning_rate": 0.00015639404135459327, + "loss": 2.1611, + "step": 262850 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015639251499724961, + "loss": 1.8749, + "step": 262855 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.0001563909886206412, + "loss": 2.0533, + "step": 262860 + }, + { + "epoch": 0.62, + "grad_norm": 2.796875, + "learning_rate": 0.00015638946222476863, + "loss": 1.9435, + "step": 262865 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.0001563879358096324, + "loss": 2.182, + "step": 262870 + }, + { + "epoch": 0.62, + "grad_norm": 2.53125, + "learning_rate": 0.000156386409375233, + "loss": 2.0724, + "step": 262875 + }, + { + "epoch": 0.62, + "grad_norm": 2.59375, + "learning_rate": 0.00015638488292157098, + "loss": 2.2223, + "step": 262880 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.00015638335644864687, + "loss": 1.9372, + "step": 262885 + }, + { + "epoch": 0.62, + "grad_norm": 2.5, + "learning_rate": 0.00015638182995646117, + "loss": 2.2496, + "step": 262890 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015638030344501444, + "loss": 1.9714, + "step": 262895 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015637877691430718, + "loss": 1.9935, + "step": 262900 + }, + { + "epoch": 0.62, + "grad_norm": 2.40625, + "learning_rate": 0.0001563772503643399, + "loss": 2.0974, + "step": 262905 + }, + { + "epoch": 0.62, + "grad_norm": 2.40625, + "learning_rate": 0.0001563757237951131, + "loss": 2.1388, + "step": 262910 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015637419720662736, + "loss": 2.0228, + "step": 262915 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.0001563726705988832, + "loss": 1.9686, + "step": 262920 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.00015637114397188106, + "loss": 2.0529, + "step": 262925 + }, + { + "epoch": 0.62, + "grad_norm": 1.96875, + "learning_rate": 0.00015636961732562154, + "loss": 2.17, + "step": 262930 + }, + { + "epoch": 0.62, + "grad_norm": 2.46875, + "learning_rate": 0.00015636809066010515, + "loss": 2.0519, + "step": 262935 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015636656397533235, + "loss": 2.1789, + "step": 262940 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015636503727130374, + "loss": 2.0731, + "step": 262945 + }, + { + "epoch": 0.62, + "grad_norm": 2.75, + "learning_rate": 0.00015636351054801982, + "loss": 2.0191, + "step": 262950 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015636198380548108, + "loss": 2.2861, + "step": 262955 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015636045704368808, + "loss": 2.0795, + "step": 262960 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015635893026264133, + "loss": 2.0828, + "step": 262965 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015635740346234132, + "loss": 2.1504, + "step": 262970 + }, + { + "epoch": 0.62, + "grad_norm": 1.9765625, + "learning_rate": 0.0001563558766427886, + "loss": 2.0506, + "step": 262975 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015635434980398373, + "loss": 2.0952, + "step": 262980 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015635282294592718, + "loss": 2.2958, + "step": 262985 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015635129606861946, + "loss": 2.1823, + "step": 262990 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015634976917206113, + "loss": 2.2475, + "step": 262995 + }, + { + "epoch": 0.62, + "grad_norm": 2.296875, + "learning_rate": 0.00015634824225625268, + "loss": 2.0236, + "step": 263000 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.0001563467153211947, + "loss": 2.0107, + "step": 263005 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.0001563451883668876, + "loss": 2.0649, + "step": 263010 + }, + { + "epoch": 0.62, + "grad_norm": 1.828125, + "learning_rate": 0.000156343661393332, + "loss": 1.8578, + "step": 263015 + }, + { + "epoch": 0.62, + "grad_norm": 2.421875, + "learning_rate": 0.00015634213440052837, + "loss": 2.1964, + "step": 263020 + }, + { + "epoch": 0.62, + "grad_norm": 1.75, + "learning_rate": 0.00015634060738847723, + "loss": 2.1773, + "step": 263025 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.00015633908035717916, + "loss": 2.0537, + "step": 263030 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.0001563375533066346, + "loss": 2.0025, + "step": 263035 + }, + { + "epoch": 0.62, + "grad_norm": 2.421875, + "learning_rate": 0.0001563360262368441, + "loss": 2.2742, + "step": 263040 + }, + { + "epoch": 0.62, + "grad_norm": 2.390625, + "learning_rate": 0.00015633449914780823, + "loss": 1.9731, + "step": 263045 + }, + { + "epoch": 0.62, + "grad_norm": 1.9453125, + "learning_rate": 0.00015633297203952745, + "loss": 1.9431, + "step": 263050 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015633144491200232, + "loss": 2.2144, + "step": 263055 + }, + { + "epoch": 0.62, + "grad_norm": 2.515625, + "learning_rate": 0.0001563299177652333, + "loss": 1.9888, + "step": 263060 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015632839059922103, + "loss": 2.015, + "step": 263065 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015632686341396593, + "loss": 2.063, + "step": 263070 + }, + { + "epoch": 0.62, + "grad_norm": 2.453125, + "learning_rate": 0.00015632533620946851, + "loss": 1.9244, + "step": 263075 + }, + { + "epoch": 0.62, + "grad_norm": 1.921875, + "learning_rate": 0.00015632380898572939, + "loss": 2.2226, + "step": 263080 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015632228174274903, + "loss": 2.2802, + "step": 263085 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015632075448052793, + "loss": 2.2118, + "step": 263090 + }, + { + "epoch": 0.62, + "grad_norm": 1.953125, + "learning_rate": 0.00015631922719906667, + "loss": 2.2267, + "step": 263095 + }, + { + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 0.00015631769989836572, + "loss": 2.0387, + "step": 263100 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.0001563161725784256, + "loss": 2.1892, + "step": 263105 + }, + { + "epoch": 0.62, + "grad_norm": 2.65625, + "learning_rate": 0.00015631464523924691, + "loss": 2.2459, + "step": 263110 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015631311788083006, + "loss": 1.9627, + "step": 263115 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015631159050317564, + "loss": 2.0625, + "step": 263120 + }, + { + "epoch": 0.62, + "grad_norm": 1.875, + "learning_rate": 0.0001563100631062842, + "loss": 2.0434, + "step": 263125 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.00015630853569015618, + "loss": 2.2879, + "step": 263130 + }, + { + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 0.00015630700825479217, + "loss": 2.0547, + "step": 263135 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015630548080019266, + "loss": 2.138, + "step": 263140 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015630395332635817, + "loss": 2.0112, + "step": 263145 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.0001563024258332892, + "loss": 2.063, + "step": 263150 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015630089832098634, + "loss": 2.0332, + "step": 263155 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015629937078945005, + "loss": 1.9857, + "step": 263160 + }, + { + "epoch": 0.62, + "grad_norm": 1.9453125, + "learning_rate": 0.00015629784323868088, + "loss": 2.0436, + "step": 263165 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015629631566867937, + "loss": 2.263, + "step": 263170 + }, + { + "epoch": 0.62, + "grad_norm": 1.7421875, + "learning_rate": 0.00015629478807944599, + "loss": 2.1491, + "step": 263175 + }, + { + "epoch": 0.62, + "grad_norm": 1.7890625, + "learning_rate": 0.0001562932604709813, + "loss": 2.0648, + "step": 263180 + }, + { + "epoch": 0.62, + "grad_norm": 1.984375, + "learning_rate": 0.0001562917328432858, + "loss": 2.0112, + "step": 263185 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015629020519636005, + "loss": 2.0658, + "step": 263190 + }, + { + "epoch": 0.62, + "grad_norm": 2.65625, + "learning_rate": 0.0001562886775302045, + "loss": 1.8803, + "step": 263195 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015628714984481973, + "loss": 2.1375, + "step": 263200 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.0001562856221402063, + "loss": 2.2529, + "step": 263205 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.0001562840944163646, + "loss": 1.9751, + "step": 263210 + }, + { + "epoch": 0.62, + "grad_norm": 2.5, + "learning_rate": 0.0001562825666732953, + "loss": 2.1772, + "step": 263215 + }, + { + "epoch": 0.62, + "grad_norm": 1.7265625, + "learning_rate": 0.0001562810389109988, + "loss": 2.1031, + "step": 263220 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.0001562795111294757, + "loss": 1.9741, + "step": 263225 + }, + { + "epoch": 0.62, + "grad_norm": 1.8828125, + "learning_rate": 0.00015627798332872654, + "loss": 1.9449, + "step": 263230 + }, + { + "epoch": 0.62, + "grad_norm": 2.4375, + "learning_rate": 0.00015627645550875174, + "loss": 2.1345, + "step": 263235 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.0001562749276695519, + "loss": 1.9947, + "step": 263240 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.00015627339981112755, + "loss": 2.2084, + "step": 263245 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.00015627187193347918, + "loss": 2.067, + "step": 263250 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.0001562703440366073, + "loss": 2.0293, + "step": 263255 + }, + { + "epoch": 0.62, + "grad_norm": 1.8125, + "learning_rate": 0.00015626881612051244, + "loss": 2.002, + "step": 263260 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015626728818519514, + "loss": 2.2324, + "step": 263265 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.0001562657602306559, + "loss": 2.121, + "step": 263270 + }, + { + "epoch": 0.62, + "grad_norm": 2.453125, + "learning_rate": 0.0001562642322568953, + "loss": 2.1819, + "step": 263275 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015626270426391382, + "loss": 2.2224, + "step": 263280 + }, + { + "epoch": 0.62, + "grad_norm": 1.9296875, + "learning_rate": 0.00015626117625171192, + "loss": 2.0836, + "step": 263285 + }, + { + "epoch": 0.62, + "grad_norm": 1.65625, + "learning_rate": 0.00015625964822029023, + "loss": 2.2042, + "step": 263290 + }, + { + "epoch": 0.62, + "grad_norm": 1.8671875, + "learning_rate": 0.0001562581201696492, + "loss": 2.0675, + "step": 263295 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015625659209978938, + "loss": 2.0716, + "step": 263300 + }, + { + "epoch": 0.62, + "grad_norm": 2.546875, + "learning_rate": 0.0001562550640107113, + "loss": 2.0739, + "step": 263305 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015625353590241546, + "loss": 2.0167, + "step": 263310 + }, + { + "epoch": 0.62, + "grad_norm": 1.6171875, + "learning_rate": 0.0001562520077749024, + "loss": 1.9642, + "step": 263315 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.0001562504796281726, + "loss": 2.0496, + "step": 263320 + }, + { + "epoch": 0.62, + "grad_norm": 1.8515625, + "learning_rate": 0.00015624895146222666, + "loss": 1.9792, + "step": 263325 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015624742327706505, + "loss": 2.2084, + "step": 263330 + }, + { + "epoch": 0.62, + "grad_norm": 2.5625, + "learning_rate": 0.0001562458950726883, + "loss": 2.1089, + "step": 263335 + }, + { + "epoch": 0.62, + "grad_norm": 2.515625, + "learning_rate": 0.0001562443668490969, + "loss": 2.122, + "step": 263340 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015624283860629144, + "loss": 2.1587, + "step": 263345 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.0001562413103442724, + "loss": 2.0549, + "step": 263350 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.0001562397820630403, + "loss": 1.9069, + "step": 263355 + }, + { + "epoch": 0.62, + "grad_norm": 2.796875, + "learning_rate": 0.00015623825376259566, + "loss": 1.9024, + "step": 263360 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.00015623672544293906, + "loss": 1.9718, + "step": 263365 + }, + { + "epoch": 0.62, + "grad_norm": 2.453125, + "learning_rate": 0.00015623519710407092, + "loss": 2.0116, + "step": 263370 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015623366874599186, + "loss": 1.8823, + "step": 263375 + }, + { + "epoch": 0.62, + "grad_norm": 1.953125, + "learning_rate": 0.00015623214036870233, + "loss": 2.0208, + "step": 263380 + }, + { + "epoch": 0.62, + "grad_norm": 1.703125, + "learning_rate": 0.00015623061197220288, + "loss": 2.1661, + "step": 263385 + }, + { + "epoch": 0.62, + "grad_norm": 2.421875, + "learning_rate": 0.00015622908355649405, + "loss": 2.0429, + "step": 263390 + }, + { + "epoch": 0.62, + "grad_norm": 1.9609375, + "learning_rate": 0.00015622755512157633, + "loss": 2.1821, + "step": 263395 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015622602666745026, + "loss": 2.0241, + "step": 263400 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.00015622449819411639, + "loss": 2.0489, + "step": 263405 + }, + { + "epoch": 0.62, + "grad_norm": 1.78125, + "learning_rate": 0.00015622296970157516, + "loss": 2.098, + "step": 263410 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015622144118982717, + "loss": 2.1157, + "step": 263415 + }, + { + "epoch": 0.62, + "grad_norm": 2.75, + "learning_rate": 0.0001562199126588729, + "loss": 2.0182, + "step": 263420 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.0001562183841087129, + "loss": 2.0498, + "step": 263425 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.0001562168555393477, + "loss": 2.0626, + "step": 263430 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015621532695077778, + "loss": 1.9509, + "step": 263435 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.0001562137983430037, + "loss": 1.8464, + "step": 263440 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015621226971602596, + "loss": 2.0825, + "step": 263445 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015621074106984505, + "loss": 2.1936, + "step": 263450 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.0001562092124044616, + "loss": 2.1138, + "step": 263455 + }, + { + "epoch": 0.62, + "grad_norm": 1.96875, + "learning_rate": 0.00015620768371987605, + "loss": 2.0245, + "step": 263460 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.0001562061550160889, + "loss": 2.0372, + "step": 263465 + }, + { + "epoch": 0.62, + "grad_norm": 1.7578125, + "learning_rate": 0.00015620462629310072, + "loss": 2.0465, + "step": 263470 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015620309755091203, + "loss": 2.1529, + "step": 263475 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015620156878952334, + "loss": 2.3526, + "step": 263480 + }, + { + "epoch": 0.62, + "grad_norm": 1.9140625, + "learning_rate": 0.00015620004000893519, + "loss": 2.0047, + "step": 263485 + }, + { + "epoch": 0.62, + "grad_norm": 1.8359375, + "learning_rate": 0.00015619851120914806, + "loss": 2.0413, + "step": 263490 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.0001561969823901625, + "loss": 2.122, + "step": 263495 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015619545355197907, + "loss": 2.1244, + "step": 263500 + }, + { + "epoch": 0.62, + "grad_norm": 1.890625, + "learning_rate": 0.0001561939246945982, + "loss": 2.1151, + "step": 263505 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.0001561923958180205, + "loss": 1.7343, + "step": 263510 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015619086692224645, + "loss": 2.117, + "step": 263515 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015618933800727656, + "loss": 2.0847, + "step": 263520 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015618780907311145, + "loss": 2.1577, + "step": 263525 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.00015618628011975148, + "loss": 2.0625, + "step": 263530 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015618475114719728, + "loss": 2.0924, + "step": 263535 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.00015618322215544937, + "loss": 2.0081, + "step": 263540 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.00015618169314450824, + "loss": 2.2305, + "step": 263545 + }, + { + "epoch": 0.62, + "grad_norm": 2.296875, + "learning_rate": 0.00015618016411437445, + "loss": 2.2128, + "step": 263550 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015617863506504844, + "loss": 2.0711, + "step": 263555 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015617710599653083, + "loss": 2.117, + "step": 263560 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.00015617557690882208, + "loss": 2.1647, + "step": 263565 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015617404780192274, + "loss": 2.0611, + "step": 263570 + }, + { + "epoch": 0.62, + "grad_norm": 1.9921875, + "learning_rate": 0.00015617251867583334, + "loss": 2.0154, + "step": 263575 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.0001561709895305544, + "loss": 2.0515, + "step": 263580 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.0001561694603660864, + "loss": 2.0225, + "step": 263585 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.0001561679311824299, + "loss": 1.9671, + "step": 263590 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015616640197958542, + "loss": 2.2592, + "step": 263595 + }, + { + "epoch": 0.62, + "grad_norm": 1.890625, + "learning_rate": 0.0001561648727575535, + "loss": 2.0697, + "step": 263600 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015616334351633464, + "loss": 1.8839, + "step": 263605 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015616181425592932, + "loss": 2.166, + "step": 263610 + }, + { + "epoch": 0.62, + "grad_norm": 2.296875, + "learning_rate": 0.00015616028497633812, + "loss": 2.2266, + "step": 263615 + }, + { + "epoch": 0.62, + "grad_norm": 1.6953125, + "learning_rate": 0.0001561587556775616, + "loss": 1.941, + "step": 263620 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.00015615722635960017, + "loss": 2.1769, + "step": 263625 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015615569702245443, + "loss": 2.1212, + "step": 263630 + }, + { + "epoch": 0.62, + "grad_norm": 1.984375, + "learning_rate": 0.0001561541676661249, + "loss": 2.0813, + "step": 263635 + }, + { + "epoch": 0.62, + "grad_norm": 1.875, + "learning_rate": 0.00015615263829061207, + "loss": 2.1364, + "step": 263640 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.0001561511088959165, + "loss": 1.9874, + "step": 263645 + }, + { + "epoch": 0.62, + "grad_norm": 1.8359375, + "learning_rate": 0.00015614957948203867, + "loss": 2.1089, + "step": 263650 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015614805004897914, + "loss": 2.2117, + "step": 263655 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015614652059673843, + "loss": 2.1694, + "step": 263660 + }, + { + "epoch": 0.62, + "grad_norm": 2.6875, + "learning_rate": 0.000156144991125317, + "loss": 1.8949, + "step": 263665 + }, + { + "epoch": 0.62, + "grad_norm": 3.28125, + "learning_rate": 0.00015614346163471545, + "loss": 2.125, + "step": 263670 + }, + { + "epoch": 0.62, + "grad_norm": 2.296875, + "learning_rate": 0.0001561419321249343, + "loss": 2.0694, + "step": 263675 + }, + { + "epoch": 0.62, + "grad_norm": 2.46875, + "learning_rate": 0.00015614040259597403, + "loss": 2.0338, + "step": 263680 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015613887304783517, + "loss": 2.1063, + "step": 263685 + }, + { + "epoch": 0.62, + "grad_norm": 2.4375, + "learning_rate": 0.00015613734348051826, + "loss": 2.1188, + "step": 263690 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.00015613581389402383, + "loss": 2.0919, + "step": 263695 + }, + { + "epoch": 0.62, + "grad_norm": 2.6875, + "learning_rate": 0.00015613428428835237, + "loss": 1.9866, + "step": 263700 + }, + { + "epoch": 0.62, + "grad_norm": 1.96875, + "learning_rate": 0.00015613275466350442, + "loss": 2.0035, + "step": 263705 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015613122501948053, + "loss": 2.1053, + "step": 263710 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.00015612969535628115, + "loss": 2.0717, + "step": 263715 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015612816567390686, + "loss": 2.1514, + "step": 263720 + }, + { + "epoch": 0.62, + "grad_norm": 1.875, + "learning_rate": 0.00015612663597235822, + "loss": 2.0697, + "step": 263725 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.00015612510625163565, + "loss": 1.9833, + "step": 263730 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015612357651173975, + "loss": 2.112, + "step": 263735 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.000156122046752671, + "loss": 2.1166, + "step": 263740 + }, + { + "epoch": 0.62, + "grad_norm": 1.8046875, + "learning_rate": 0.00015612051697442994, + "loss": 2.2014, + "step": 263745 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015611898717701714, + "loss": 2.0887, + "step": 263750 + }, + { + "epoch": 0.62, + "grad_norm": 2.828125, + "learning_rate": 0.00015611745736043302, + "loss": 2.0627, + "step": 263755 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015611592752467818, + "loss": 2.041, + "step": 263760 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.00015611439766975314, + "loss": 2.0651, + "step": 263765 + }, + { + "epoch": 0.62, + "grad_norm": 1.703125, + "learning_rate": 0.00015611286779565838, + "loss": 2.0742, + "step": 263770 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015611133790239444, + "loss": 2.1035, + "step": 263775 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015610980798996186, + "loss": 1.9506, + "step": 263780 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015610827805836116, + "loss": 2.2247, + "step": 263785 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.00015610674810759284, + "loss": 1.9769, + "step": 263790 + }, + { + "epoch": 0.62, + "grad_norm": 2.296875, + "learning_rate": 0.00015610521813765746, + "loss": 2.0442, + "step": 263795 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.0001561036881485555, + "loss": 2.0772, + "step": 263800 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015610215814028752, + "loss": 2.1813, + "step": 263805 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.000156100628112854, + "loss": 2.0279, + "step": 263810 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.0001560990980662555, + "loss": 1.763, + "step": 263815 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015609756800049257, + "loss": 2.0438, + "step": 263820 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015609603791556564, + "loss": 2.1451, + "step": 263825 + }, + { + "epoch": 0.62, + "grad_norm": 1.8828125, + "learning_rate": 0.0001560945078114753, + "loss": 2.1995, + "step": 263830 + }, + { + "epoch": 0.62, + "grad_norm": 2.40625, + "learning_rate": 0.00015609297768822206, + "loss": 2.0328, + "step": 263835 + }, + { + "epoch": 0.62, + "grad_norm": 2.453125, + "learning_rate": 0.00015609144754580645, + "loss": 2.0797, + "step": 263840 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.00015608991738422896, + "loss": 1.9799, + "step": 263845 + }, + { + "epoch": 0.62, + "grad_norm": 1.8828125, + "learning_rate": 0.00015608838720349016, + "loss": 2.2218, + "step": 263850 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015608685700359054, + "loss": 2.0819, + "step": 263855 + }, + { + "epoch": 0.62, + "grad_norm": 1.609375, + "learning_rate": 0.00015608532678453062, + "loss": 2.0718, + "step": 263860 + }, + { + "epoch": 0.62, + "grad_norm": 1.953125, + "learning_rate": 0.00015608379654631098, + "loss": 1.964, + "step": 263865 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.00015608226628893208, + "loss": 2.2388, + "step": 263870 + }, + { + "epoch": 0.62, + "grad_norm": 1.6796875, + "learning_rate": 0.00015608073601239446, + "loss": 2.0505, + "step": 263875 + }, + { + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 0.0001560792057166986, + "loss": 1.9972, + "step": 263880 + }, + { + "epoch": 0.62, + "grad_norm": 1.90625, + "learning_rate": 0.0001560776754018451, + "loss": 1.8091, + "step": 263885 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015607614506783446, + "loss": 2.0433, + "step": 263890 + }, + { + "epoch": 0.62, + "grad_norm": 2.40625, + "learning_rate": 0.00015607461471466719, + "loss": 2.1671, + "step": 263895 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.0001560730843423438, + "loss": 2.006, + "step": 263900 + }, + { + "epoch": 0.62, + "grad_norm": 2.640625, + "learning_rate": 0.0001560715539508648, + "loss": 2.0723, + "step": 263905 + }, + { + "epoch": 0.62, + "grad_norm": 1.9921875, + "learning_rate": 0.0001560700235402308, + "loss": 1.9627, + "step": 263910 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015606849311044222, + "loss": 2.0252, + "step": 263915 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015606696266149967, + "loss": 1.9231, + "step": 263920 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.0001560654321934036, + "loss": 2.1488, + "step": 263925 + }, + { + "epoch": 0.62, + "grad_norm": 1.984375, + "learning_rate": 0.00015606390170615453, + "loss": 2.0855, + "step": 263930 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015606237119975303, + "loss": 2.0571, + "step": 263935 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015606084067419965, + "loss": 2.1831, + "step": 263940 + }, + { + "epoch": 0.62, + "grad_norm": 1.6953125, + "learning_rate": 0.00015605931012949484, + "loss": 1.8992, + "step": 263945 + }, + { + "epoch": 0.62, + "grad_norm": 1.8828125, + "learning_rate": 0.00015605777956563917, + "loss": 2.1627, + "step": 263950 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015605624898263313, + "loss": 2.134, + "step": 263955 + }, + { + "epoch": 0.62, + "grad_norm": 2.296875, + "learning_rate": 0.00015605471838047724, + "loss": 2.0035, + "step": 263960 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015605318775917208, + "loss": 2.0738, + "step": 263965 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.00015605165711871812, + "loss": 1.9898, + "step": 263970 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.0001560501264591159, + "loss": 2.05, + "step": 263975 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015604859578036595, + "loss": 1.9577, + "step": 263980 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.00015604706508246877, + "loss": 2.0585, + "step": 263985 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015604553436542488, + "loss": 2.1609, + "step": 263990 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.0001560440036292348, + "loss": 2.1794, + "step": 263995 + }, + { + "epoch": 0.62, + "grad_norm": 1.9765625, + "learning_rate": 0.00015604247287389912, + "loss": 1.8571, + "step": 264000 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.0001560409420994183, + "loss": 2.0956, + "step": 264005 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.0001560394113057929, + "loss": 2.0326, + "step": 264010 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015603788049302336, + "loss": 2.1147, + "step": 264015 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015603634966111034, + "loss": 1.9399, + "step": 264020 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.00015603481881005423, + "loss": 2.1011, + "step": 264025 + }, + { + "epoch": 0.62, + "grad_norm": 1.7578125, + "learning_rate": 0.00015603328793985566, + "loss": 2.1054, + "step": 264030 + }, + { + "epoch": 0.62, + "grad_norm": 2.515625, + "learning_rate": 0.00015603175705051507, + "loss": 2.2591, + "step": 264035 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.000156030226142033, + "loss": 1.8599, + "step": 264040 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.00015602869521441002, + "loss": 1.8729, + "step": 264045 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.0001560271642676466, + "loss": 2.1033, + "step": 264050 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015602563330174328, + "loss": 2.1751, + "step": 264055 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015602410231670063, + "loss": 2.1342, + "step": 264060 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015602257131251907, + "loss": 2.1388, + "step": 264065 + }, + { + "epoch": 0.62, + "grad_norm": 2.40625, + "learning_rate": 0.00015602104028919923, + "loss": 2.2686, + "step": 264070 + }, + { + "epoch": 0.62, + "grad_norm": 2.625, + "learning_rate": 0.00015601950924674156, + "loss": 1.9607, + "step": 264075 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015601797818514662, + "loss": 1.9078, + "step": 264080 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.0001560164471044149, + "loss": 2.0217, + "step": 264085 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.00015601491600454698, + "loss": 2.1459, + "step": 264090 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015601338488554334, + "loss": 1.9933, + "step": 264095 + }, + { + "epoch": 0.62, + "grad_norm": 1.8359375, + "learning_rate": 0.0001560118537474045, + "loss": 2.1078, + "step": 264100 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.00015601032259013103, + "loss": 2.1787, + "step": 264105 + }, + { + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 0.0001560087914137234, + "loss": 2.1399, + "step": 264110 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.00015600726021818212, + "loss": 2.3149, + "step": 264115 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015600572900350778, + "loss": 2.1728, + "step": 264120 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015600419776970082, + "loss": 2.0969, + "step": 264125 + }, + { + "epoch": 0.62, + "grad_norm": 2.46875, + "learning_rate": 0.00015600266651676187, + "loss": 2.0855, + "step": 264130 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015600113524469138, + "loss": 2.0837, + "step": 264135 + }, + { + "epoch": 0.62, + "grad_norm": 1.953125, + "learning_rate": 0.00015599960395348988, + "loss": 1.9615, + "step": 264140 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.0001559980726431579, + "loss": 2.1898, + "step": 264145 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015599654131369595, + "loss": 2.1227, + "step": 264150 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015599500996510453, + "loss": 2.1725, + "step": 264155 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015599347859738427, + "loss": 2.0606, + "step": 264160 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.0001559919472105356, + "loss": 2.1491, + "step": 264165 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015599041580455905, + "loss": 1.9339, + "step": 264170 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015598888437945516, + "loss": 2.1492, + "step": 264175 + }, + { + "epoch": 0.62, + "grad_norm": 1.9765625, + "learning_rate": 0.00015598735293522449, + "loss": 2.1656, + "step": 264180 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015598582147186748, + "loss": 2.1862, + "step": 264185 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015598428998938474, + "loss": 2.0868, + "step": 264190 + }, + { + "epoch": 0.62, + "grad_norm": 2.609375, + "learning_rate": 0.0001559827584877767, + "loss": 2.0725, + "step": 264195 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015598122696704395, + "loss": 2.136, + "step": 264200 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.000155979695427187, + "loss": 2.1973, + "step": 264205 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.0001559781638682064, + "loss": 1.9327, + "step": 264210 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015597663229010263, + "loss": 2.1047, + "step": 264215 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.0001559751006928762, + "loss": 1.9255, + "step": 264220 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.0001559735690765277, + "loss": 2.1157, + "step": 264225 + }, + { + "epoch": 0.62, + "grad_norm": 1.90625, + "learning_rate": 0.00015597203744105757, + "loss": 2.0021, + "step": 264230 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015597050578646638, + "loss": 2.2917, + "step": 264235 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015596897411275471, + "loss": 1.9087, + "step": 264240 + }, + { + "epoch": 0.62, + "grad_norm": 2.46875, + "learning_rate": 0.00015596744241992298, + "loss": 2.0493, + "step": 264245 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015596591070797175, + "loss": 2.0641, + "step": 264250 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015596437897690154, + "loss": 1.8188, + "step": 264255 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.0001559628472267129, + "loss": 2.1606, + "step": 264260 + }, + { + "epoch": 0.62, + "grad_norm": 2.71875, + "learning_rate": 0.00015596131545740635, + "loss": 2.0713, + "step": 264265 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015595978366898238, + "loss": 2.1308, + "step": 264270 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.00015595825186144157, + "loss": 2.0576, + "step": 264275 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015595672003478436, + "loss": 1.8956, + "step": 264280 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.00015595518818901132, + "loss": 2.1327, + "step": 264285 + }, + { + "epoch": 0.62, + "grad_norm": 1.90625, + "learning_rate": 0.00015595365632412302, + "loss": 2.0788, + "step": 264290 + }, + { + "epoch": 0.62, + "grad_norm": 1.7109375, + "learning_rate": 0.0001559521244401199, + "loss": 1.8934, + "step": 264295 + }, + { + "epoch": 0.62, + "grad_norm": 1.984375, + "learning_rate": 0.00015595059253700253, + "loss": 2.1101, + "step": 264300 + }, + { + "epoch": 0.62, + "grad_norm": 1.984375, + "learning_rate": 0.0001559490606147714, + "loss": 2.1357, + "step": 264305 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015594752867342711, + "loss": 2.0702, + "step": 264310 + }, + { + "epoch": 0.62, + "grad_norm": 2.46875, + "learning_rate": 0.00015594599671297008, + "loss": 2.0524, + "step": 264315 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.0001559444647334009, + "loss": 2.0868, + "step": 264320 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.00015594293273472006, + "loss": 2.062, + "step": 264325 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015594140071692814, + "loss": 2.1693, + "step": 264330 + }, + { + "epoch": 0.62, + "grad_norm": 1.8046875, + "learning_rate": 0.00015593986868002558, + "loss": 2.0269, + "step": 264335 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015593833662401298, + "loss": 1.9559, + "step": 264340 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.0001559368045488908, + "loss": 2.1963, + "step": 264345 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.0001559352724546596, + "loss": 1.9614, + "step": 264350 + }, + { + "epoch": 0.62, + "grad_norm": 1.890625, + "learning_rate": 0.0001559337403413199, + "loss": 1.9087, + "step": 264355 + }, + { + "epoch": 0.62, + "grad_norm": 1.9375, + "learning_rate": 0.00015593220820887222, + "loss": 2.1743, + "step": 264360 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.0001559306760573171, + "loss": 2.1381, + "step": 264365 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015592914388665502, + "loss": 1.9608, + "step": 264370 + }, + { + "epoch": 0.62, + "grad_norm": 1.890625, + "learning_rate": 0.00015592761169688654, + "loss": 1.9764, + "step": 264375 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.0001559260794880122, + "loss": 2.0257, + "step": 264380 + }, + { + "epoch": 0.62, + "grad_norm": 2.40625, + "learning_rate": 0.00015592454726003245, + "loss": 2.1569, + "step": 264385 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.0001559230150129479, + "loss": 2.085, + "step": 264390 + }, + { + "epoch": 0.62, + "grad_norm": 2.609375, + "learning_rate": 0.000155921482746759, + "loss": 2.2014, + "step": 264395 + }, + { + "epoch": 0.62, + "grad_norm": 1.8359375, + "learning_rate": 0.00015591995046146634, + "loss": 1.9083, + "step": 264400 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.0001559184181570704, + "loss": 1.9662, + "step": 264405 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.0001559168858335717, + "loss": 1.935, + "step": 264410 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.0001559153534909708, + "loss": 1.9545, + "step": 264415 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.0001559138211292682, + "loss": 2.0378, + "step": 264420 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.0001559122887484644, + "loss": 1.9834, + "step": 264425 + }, + { + "epoch": 0.62, + "grad_norm": 2.46875, + "learning_rate": 0.00015591075634855997, + "loss": 2.0433, + "step": 264430 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015590922392955537, + "loss": 2.0586, + "step": 264435 + }, + { + "epoch": 0.62, + "grad_norm": 1.9375, + "learning_rate": 0.0001559076914914512, + "loss": 2.0681, + "step": 264440 + }, + { + "epoch": 0.62, + "grad_norm": 2.4375, + "learning_rate": 0.00015590615903424796, + "loss": 2.056, + "step": 264445 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.00015590462655794615, + "loss": 2.0775, + "step": 264450 + }, + { + "epoch": 0.62, + "grad_norm": 1.84375, + "learning_rate": 0.00015590309406254631, + "loss": 2.1542, + "step": 264455 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015590156154804893, + "loss": 2.0519, + "step": 264460 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.0001559000290144546, + "loss": 2.1407, + "step": 264465 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.0001558984964617638, + "loss": 1.9441, + "step": 264470 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015589696388997704, + "loss": 2.0414, + "step": 264475 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015589543129909488, + "loss": 2.0299, + "step": 264480 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.0001558938986891178, + "loss": 2.0483, + "step": 264485 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.0001558923660600464, + "loss": 1.9684, + "step": 264490 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.00015589083341188112, + "loss": 2.1409, + "step": 264495 + }, + { + "epoch": 0.62, + "grad_norm": 1.953125, + "learning_rate": 0.00015588930074462252, + "loss": 2.2024, + "step": 264500 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015588776805827115, + "loss": 2.14, + "step": 264505 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.00015588623535282747, + "loss": 2.13, + "step": 264510 + }, + { + "epoch": 0.62, + "grad_norm": 2.40625, + "learning_rate": 0.00015588470262829204, + "loss": 1.9393, + "step": 264515 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.00015588316988466542, + "loss": 2.1271, + "step": 264520 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015588163712194802, + "loss": 2.0592, + "step": 264525 + }, + { + "epoch": 0.62, + "grad_norm": 2.734375, + "learning_rate": 0.0001558801043401405, + "loss": 2.0178, + "step": 264530 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015587857153924331, + "loss": 1.9647, + "step": 264535 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.000155877038719257, + "loss": 1.9506, + "step": 264540 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015587550588018204, + "loss": 2.2425, + "step": 264545 + }, + { + "epoch": 0.62, + "grad_norm": 2.421875, + "learning_rate": 0.00015587397302201903, + "loss": 2.1592, + "step": 264550 + }, + { + "epoch": 0.62, + "grad_norm": 1.734375, + "learning_rate": 0.00015587244014476847, + "loss": 1.9201, + "step": 264555 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015587090724843082, + "loss": 2.0469, + "step": 264560 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.0001558693743330067, + "loss": 2.1225, + "step": 264565 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.0001558678413984966, + "loss": 2.1687, + "step": 264570 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015586630844490097, + "loss": 2.1574, + "step": 264575 + }, + { + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 0.00015586477547222045, + "loss": 2.0949, + "step": 264580 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015586324248045548, + "loss": 2.029, + "step": 264585 + }, + { + "epoch": 0.62, + "grad_norm": 1.796875, + "learning_rate": 0.00015586170946960663, + "loss": 2.1313, + "step": 264590 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.0001558601764396744, + "loss": 2.0946, + "step": 264595 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.0001558586433906593, + "loss": 2.2181, + "step": 264600 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.0001558571103225619, + "loss": 2.0914, + "step": 264605 + }, + { + "epoch": 0.62, + "grad_norm": 2.4375, + "learning_rate": 0.00015585557723538272, + "loss": 2.1195, + "step": 264610 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.0001558540441291222, + "loss": 1.9334, + "step": 264615 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015585251100378098, + "loss": 2.0992, + "step": 264620 + }, + { + "epoch": 0.62, + "grad_norm": 1.78125, + "learning_rate": 0.0001558509778593595, + "loss": 2.1893, + "step": 264625 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015584944469585832, + "loss": 1.9804, + "step": 264630 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015584791151327794, + "loss": 1.8628, + "step": 264635 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015584637831161894, + "loss": 2.217, + "step": 264640 + }, + { + "epoch": 0.62, + "grad_norm": 1.953125, + "learning_rate": 0.0001558448450908818, + "loss": 1.9793, + "step": 264645 + }, + { + "epoch": 0.62, + "grad_norm": 1.890625, + "learning_rate": 0.00015584331185106702, + "loss": 1.9933, + "step": 264650 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.00015584177859217518, + "loss": 2.149, + "step": 264655 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015584024531420675, + "loss": 2.0461, + "step": 264660 + }, + { + "epoch": 0.62, + "grad_norm": 1.953125, + "learning_rate": 0.0001558387120171623, + "loss": 2.0869, + "step": 264665 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015583717870104233, + "loss": 2.0953, + "step": 264670 + }, + { + "epoch": 0.62, + "grad_norm": 2.5, + "learning_rate": 0.00015583564536584733, + "loss": 2.0105, + "step": 264675 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.0001558341120115779, + "loss": 2.0785, + "step": 264680 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.0001558325786382345, + "loss": 2.0731, + "step": 264685 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.00015583104524581772, + "loss": 2.1009, + "step": 264690 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.000155829511834328, + "loss": 2.2113, + "step": 264695 + }, + { + "epoch": 0.62, + "grad_norm": 2.5625, + "learning_rate": 0.00015582797840376595, + "loss": 2.1659, + "step": 264700 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.00015582644495413202, + "loss": 1.9477, + "step": 264705 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015582491148542676, + "loss": 2.0943, + "step": 264710 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.0001558233779976507, + "loss": 2.1479, + "step": 264715 + }, + { + "epoch": 0.62, + "grad_norm": 2.46875, + "learning_rate": 0.00015582184449080435, + "loss": 2.0295, + "step": 264720 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.00015582031096488826, + "loss": 2.0165, + "step": 264725 + }, + { + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 0.00015581877741990295, + "loss": 2.1149, + "step": 264730 + }, + { + "epoch": 0.62, + "grad_norm": 2.78125, + "learning_rate": 0.00015581724385584891, + "loss": 2.1845, + "step": 264735 + }, + { + "epoch": 0.62, + "grad_norm": 2.625, + "learning_rate": 0.0001558157102727267, + "loss": 2.1835, + "step": 264740 + }, + { + "epoch": 0.62, + "grad_norm": 2.5, + "learning_rate": 0.00015581417667053687, + "loss": 2.1971, + "step": 264745 + }, + { + "epoch": 0.62, + "grad_norm": 2.625, + "learning_rate": 0.00015581264304927986, + "loss": 2.0638, + "step": 264750 + }, + { + "epoch": 0.62, + "grad_norm": 2.8125, + "learning_rate": 0.00015581110940895624, + "loss": 2.1819, + "step": 264755 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015580957574956654, + "loss": 2.0455, + "step": 264760 + }, + { + "epoch": 0.62, + "grad_norm": 2.703125, + "learning_rate": 0.00015580804207111125, + "loss": 1.9996, + "step": 264765 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015580650837359098, + "loss": 1.8842, + "step": 264770 + }, + { + "epoch": 0.62, + "grad_norm": 2.421875, + "learning_rate": 0.00015580497465700615, + "loss": 1.8497, + "step": 264775 + }, + { + "epoch": 0.62, + "grad_norm": 2.703125, + "learning_rate": 0.00015580344092135735, + "loss": 2.0635, + "step": 264780 + }, + { + "epoch": 0.62, + "grad_norm": 1.9375, + "learning_rate": 0.0001558019071666451, + "loss": 2.1792, + "step": 264785 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015580037339286987, + "loss": 2.0414, + "step": 264790 + }, + { + "epoch": 0.62, + "grad_norm": 1.9453125, + "learning_rate": 0.0001557988396000322, + "loss": 1.9357, + "step": 264795 + }, + { + "epoch": 0.62, + "grad_norm": 2.53125, + "learning_rate": 0.00015579730578813266, + "loss": 2.0411, + "step": 264800 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.0001557957719571718, + "loss": 1.9731, + "step": 264805 + }, + { + "epoch": 0.62, + "grad_norm": 1.7890625, + "learning_rate": 0.00015579423810715002, + "loss": 1.9543, + "step": 264810 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.00015579270423806793, + "loss": 2.0303, + "step": 264815 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.00015579117034992605, + "loss": 1.98, + "step": 264820 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015578963644272492, + "loss": 2.0511, + "step": 264825 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.000155788102516465, + "loss": 2.1549, + "step": 264830 + }, + { + "epoch": 0.62, + "grad_norm": 2.890625, + "learning_rate": 0.00015578656857114686, + "loss": 2.0622, + "step": 264835 + }, + { + "epoch": 0.62, + "grad_norm": 2.15625, + "learning_rate": 0.00015578503460677104, + "loss": 2.2361, + "step": 264840 + }, + { + "epoch": 0.62, + "grad_norm": 1.953125, + "learning_rate": 0.000155783500623338, + "loss": 2.1671, + "step": 264845 + }, + { + "epoch": 0.62, + "grad_norm": 2.53125, + "learning_rate": 0.00015578196662084835, + "loss": 2.2, + "step": 264850 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015578043259930252, + "loss": 2.1241, + "step": 264855 + }, + { + "epoch": 0.62, + "grad_norm": 2.46875, + "learning_rate": 0.0001557788985587011, + "loss": 2.0722, + "step": 264860 + }, + { + "epoch": 0.62, + "grad_norm": 1.9765625, + "learning_rate": 0.00015577736449904462, + "loss": 2.2418, + "step": 264865 + }, + { + "epoch": 0.62, + "grad_norm": 1.8671875, + "learning_rate": 0.00015577583042033358, + "loss": 2.0492, + "step": 264870 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015577429632256847, + "loss": 2.0034, + "step": 264875 + }, + { + "epoch": 0.62, + "grad_norm": 2.546875, + "learning_rate": 0.0001557727622057499, + "loss": 2.1407, + "step": 264880 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.0001557712280698783, + "loss": 1.9958, + "step": 264885 + }, + { + "epoch": 0.62, + "grad_norm": 1.9609375, + "learning_rate": 0.00015576969391495427, + "loss": 2.0968, + "step": 264890 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015576815974097826, + "loss": 2.096, + "step": 264895 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.00015576662554795084, + "loss": 2.1662, + "step": 264900 + }, + { + "epoch": 0.62, + "grad_norm": 1.8515625, + "learning_rate": 0.00015576509133587258, + "loss": 1.9178, + "step": 264905 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.0001557635571047439, + "loss": 2.0429, + "step": 264910 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.0001557620228545654, + "loss": 2.0648, + "step": 264915 + }, + { + "epoch": 0.62, + "grad_norm": 3.0625, + "learning_rate": 0.00015576048858533757, + "loss": 1.999, + "step": 264920 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015575895429706095, + "loss": 2.1049, + "step": 264925 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015575741998973607, + "loss": 1.8919, + "step": 264930 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015575588566336346, + "loss": 2.0501, + "step": 264935 + }, + { + "epoch": 0.62, + "grad_norm": 1.8203125, + "learning_rate": 0.0001557543513179436, + "loss": 2.05, + "step": 264940 + }, + { + "epoch": 0.62, + "grad_norm": 2.390625, + "learning_rate": 0.00015575281695347706, + "loss": 1.9816, + "step": 264945 + }, + { + "epoch": 0.62, + "grad_norm": 1.84375, + "learning_rate": 0.00015575128256996431, + "loss": 1.9569, + "step": 264950 + }, + { + "epoch": 0.62, + "grad_norm": 2.40625, + "learning_rate": 0.00015574974816740596, + "loss": 2.163, + "step": 264955 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015574821374580243, + "loss": 2.0403, + "step": 264960 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.00015574667930515435, + "loss": 2.1348, + "step": 264965 + }, + { + "epoch": 0.62, + "grad_norm": 1.9765625, + "learning_rate": 0.0001557451448454622, + "loss": 2.1756, + "step": 264970 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015574361036672644, + "loss": 2.1802, + "step": 264975 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.00015574207586894768, + "loss": 2.1133, + "step": 264980 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.00015574054135212643, + "loss": 2.0204, + "step": 264985 + }, + { + "epoch": 0.62, + "grad_norm": 2.625, + "learning_rate": 0.0001557390068162632, + "loss": 2.0524, + "step": 264990 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.0001557374722613585, + "loss": 2.182, + "step": 264995 + }, + { + "epoch": 0.62, + "grad_norm": 2.59375, + "learning_rate": 0.00015573593768741288, + "loss": 2.1274, + "step": 265000 + }, + { + "epoch": 0.62, + "grad_norm": 2.546875, + "learning_rate": 0.00015573440309442683, + "loss": 2.0228, + "step": 265005 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.0001557328684824009, + "loss": 1.9619, + "step": 265010 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015573133385133562, + "loss": 2.0758, + "step": 265015 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015572979920123152, + "loss": 2.1539, + "step": 265020 + }, + { + "epoch": 0.62, + "grad_norm": 2.5625, + "learning_rate": 0.00015572826453208913, + "loss": 1.935, + "step": 265025 + }, + { + "epoch": 0.62, + "grad_norm": 1.8984375, + "learning_rate": 0.0001557267298439089, + "loss": 2.1184, + "step": 265030 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015572519513669142, + "loss": 2.0571, + "step": 265035 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015572366041043722, + "loss": 2.201, + "step": 265040 + }, + { + "epoch": 0.62, + "grad_norm": 2.609375, + "learning_rate": 0.0001557221256651468, + "loss": 2.078, + "step": 265045 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.0001557205909008207, + "loss": 2.0988, + "step": 265050 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.00015571905611745944, + "loss": 2.0826, + "step": 265055 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.0001557175213150635, + "loss": 2.09, + "step": 265060 + }, + { + "epoch": 0.62, + "grad_norm": 1.984375, + "learning_rate": 0.0001557159864936335, + "loss": 2.186, + "step": 265065 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015571445165316988, + "loss": 2.1448, + "step": 265070 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.0001557129167936732, + "loss": 2.1028, + "step": 265075 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.000155711381915144, + "loss": 2.0363, + "step": 265080 + }, + { + "epoch": 0.62, + "grad_norm": 1.765625, + "learning_rate": 0.00015570984701758272, + "loss": 2.0098, + "step": 265085 + }, + { + "epoch": 0.62, + "grad_norm": 1.9375, + "learning_rate": 0.00015570831210098997, + "loss": 2.0014, + "step": 265090 + }, + { + "epoch": 0.62, + "grad_norm": 1.9375, + "learning_rate": 0.00015570677716536627, + "loss": 2.0967, + "step": 265095 + }, + { + "epoch": 0.62, + "grad_norm": 1.9609375, + "learning_rate": 0.00015570524221071209, + "loss": 2.0632, + "step": 265100 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015570370723702804, + "loss": 2.0994, + "step": 265105 + }, + { + "epoch": 0.62, + "grad_norm": 1.90625, + "learning_rate": 0.00015570217224431456, + "loss": 1.9753, + "step": 265110 + }, + { + "epoch": 0.62, + "grad_norm": 2.0, + "learning_rate": 0.0001557006372325722, + "loss": 1.9549, + "step": 265115 + }, + { + "epoch": 0.62, + "grad_norm": 1.9453125, + "learning_rate": 0.0001556991022018015, + "loss": 2.1442, + "step": 265120 + }, + { + "epoch": 0.62, + "grad_norm": 1.9375, + "learning_rate": 0.00015569756715200298, + "loss": 1.8038, + "step": 265125 + }, + { + "epoch": 0.62, + "grad_norm": 3.21875, + "learning_rate": 0.0001556960320831772, + "loss": 2.2634, + "step": 265130 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.0001556944969953246, + "loss": 2.0907, + "step": 265135 + }, + { + "epoch": 0.62, + "grad_norm": 2.4375, + "learning_rate": 0.00015569296188844574, + "loss": 2.1612, + "step": 265140 + }, + { + "epoch": 0.62, + "grad_norm": 2.203125, + "learning_rate": 0.00015569142676254117, + "loss": 1.9637, + "step": 265145 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.0001556898916176114, + "loss": 1.9704, + "step": 265150 + }, + { + "epoch": 0.62, + "grad_norm": 2.296875, + "learning_rate": 0.00015568835645365695, + "loss": 2.1281, + "step": 265155 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015568682127067838, + "loss": 1.9517, + "step": 265160 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015568528606867615, + "loss": 2.1085, + "step": 265165 + }, + { + "epoch": 0.62, + "grad_norm": 1.8203125, + "learning_rate": 0.0001556837508476508, + "loss": 2.0307, + "step": 265170 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.0001556822156076029, + "loss": 1.7668, + "step": 265175 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.00015568068034853294, + "loss": 2.3329, + "step": 265180 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.00015567914507044143, + "loss": 1.9439, + "step": 265185 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015567760977332892, + "loss": 1.9021, + "step": 265190 + }, + { + "epoch": 0.62, + "grad_norm": 2.5, + "learning_rate": 0.00015567607445719593, + "loss": 2.3384, + "step": 265195 + }, + { + "epoch": 0.62, + "grad_norm": 1.75, + "learning_rate": 0.00015567453912204303, + "loss": 2.039, + "step": 265200 + }, + { + "epoch": 0.62, + "grad_norm": 2.296875, + "learning_rate": 0.00015567300376787067, + "loss": 2.0828, + "step": 265205 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.0001556714683946794, + "loss": 2.2207, + "step": 265210 + }, + { + "epoch": 0.62, + "grad_norm": 1.8515625, + "learning_rate": 0.0001556699330024697, + "loss": 1.9099, + "step": 265215 + }, + { + "epoch": 0.62, + "grad_norm": 2.09375, + "learning_rate": 0.00015566839759124218, + "loss": 1.9927, + "step": 265220 + }, + { + "epoch": 0.62, + "grad_norm": 1.796875, + "learning_rate": 0.00015566686216099735, + "loss": 2.162, + "step": 265225 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.0001556653267117357, + "loss": 2.2625, + "step": 265230 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.00015566379124345775, + "loss": 2.0514, + "step": 265235 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015566225575616404, + "loss": 2.0695, + "step": 265240 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.00015566072024985508, + "loss": 2.1716, + "step": 265245 + }, + { + "epoch": 0.62, + "grad_norm": 2.109375, + "learning_rate": 0.0001556591847245314, + "loss": 1.9639, + "step": 265250 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015565764918019356, + "loss": 2.1668, + "step": 265255 + }, + { + "epoch": 0.62, + "grad_norm": 1.953125, + "learning_rate": 0.00015565611361684206, + "loss": 2.136, + "step": 265260 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015565457803447743, + "loss": 1.9208, + "step": 265265 + }, + { + "epoch": 0.62, + "grad_norm": 2.046875, + "learning_rate": 0.00015565304243310015, + "loss": 1.9789, + "step": 265270 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.0001556515068127108, + "loss": 2.0775, + "step": 265275 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015564997117330993, + "loss": 2.097, + "step": 265280 + }, + { + "epoch": 0.62, + "grad_norm": 2.265625, + "learning_rate": 0.00015564843551489797, + "loss": 2.2264, + "step": 265285 + }, + { + "epoch": 0.62, + "grad_norm": 1.890625, + "learning_rate": 0.0001556468998374755, + "loss": 1.9557, + "step": 265290 + }, + { + "epoch": 0.62, + "grad_norm": 1.8671875, + "learning_rate": 0.00015564536414104305, + "loss": 1.9064, + "step": 265295 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015564382842560114, + "loss": 2.0747, + "step": 265300 + }, + { + "epoch": 0.62, + "grad_norm": 2.390625, + "learning_rate": 0.00015564229269115026, + "loss": 2.0722, + "step": 265305 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.000155640756937691, + "loss": 2.2764, + "step": 265310 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015563922116522384, + "loss": 2.3305, + "step": 265315 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.0001556376853737493, + "loss": 2.1887, + "step": 265320 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.0001556361495632679, + "loss": 2.0658, + "step": 265325 + }, + { + "epoch": 0.62, + "grad_norm": 2.953125, + "learning_rate": 0.00015563461373378022, + "loss": 1.8626, + "step": 265330 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015563307788528673, + "loss": 2.0792, + "step": 265335 + }, + { + "epoch": 0.62, + "grad_norm": 1.984375, + "learning_rate": 0.00015563154201778798, + "loss": 2.1327, + "step": 265340 + }, + { + "epoch": 0.62, + "grad_norm": 1.578125, + "learning_rate": 0.00015563000613128444, + "loss": 2.0665, + "step": 265345 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.00015562847022577674, + "loss": 2.1002, + "step": 265350 + }, + { + "epoch": 0.62, + "grad_norm": 2.4375, + "learning_rate": 0.0001556269343012653, + "loss": 2.1264, + "step": 265355 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.00015562539835775073, + "loss": 2.0571, + "step": 265360 + }, + { + "epoch": 0.62, + "grad_norm": 2.28125, + "learning_rate": 0.0001556238623952335, + "loss": 2.2074, + "step": 265365 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015562232641371418, + "loss": 1.8848, + "step": 265370 + }, + { + "epoch": 0.62, + "grad_norm": 2.21875, + "learning_rate": 0.0001556207904131932, + "loss": 2.0728, + "step": 265375 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.0001556192543936712, + "loss": 2.0664, + "step": 265380 + }, + { + "epoch": 0.62, + "grad_norm": 2.453125, + "learning_rate": 0.00015561771835514865, + "loss": 2.038, + "step": 265385 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.00015561618229762605, + "loss": 2.0699, + "step": 265390 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.000155614646221104, + "loss": 2.1134, + "step": 265395 + }, + { + "epoch": 0.62, + "grad_norm": 2.1875, + "learning_rate": 0.00015561311012558292, + "loss": 2.4508, + "step": 265400 + }, + { + "epoch": 0.62, + "grad_norm": 2.03125, + "learning_rate": 0.00015561157401106342, + "loss": 1.9886, + "step": 265405 + }, + { + "epoch": 0.62, + "grad_norm": 1.8671875, + "learning_rate": 0.000155610037877546, + "loss": 1.8899, + "step": 265410 + }, + { + "epoch": 0.62, + "grad_norm": 1.859375, + "learning_rate": 0.00015560850172503118, + "loss": 2.0901, + "step": 265415 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.0001556069655535195, + "loss": 1.9111, + "step": 265420 + }, + { + "epoch": 0.62, + "grad_norm": 1.5390625, + "learning_rate": 0.00015560542936301147, + "loss": 1.9541, + "step": 265425 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.0001556038931535076, + "loss": 2.0025, + "step": 265430 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015560235692500842, + "loss": 2.0432, + "step": 265435 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.0001556008206775145, + "loss": 2.1729, + "step": 265440 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.00015559928441102633, + "loss": 2.0394, + "step": 265445 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015559774812554442, + "loss": 2.1465, + "step": 265450 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015559621182106934, + "loss": 2.1437, + "step": 265455 + }, + { + "epoch": 0.62, + "grad_norm": 2.171875, + "learning_rate": 0.00015559467549760154, + "loss": 2.0798, + "step": 265460 + }, + { + "epoch": 0.62, + "grad_norm": 2.25, + "learning_rate": 0.0001555931391551416, + "loss": 2.1589, + "step": 265465 + }, + { + "epoch": 0.62, + "grad_norm": 1.9765625, + "learning_rate": 0.00015559160279369005, + "loss": 1.9671, + "step": 265470 + }, + { + "epoch": 0.62, + "grad_norm": 2.3125, + "learning_rate": 0.0001555900664132474, + "loss": 2.0544, + "step": 265475 + }, + { + "epoch": 0.62, + "grad_norm": 2.296875, + "learning_rate": 0.00015558853001381418, + "loss": 2.164, + "step": 265480 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.0001555869935953909, + "loss": 2.2867, + "step": 265485 + }, + { + "epoch": 0.62, + "grad_norm": 2.359375, + "learning_rate": 0.0001555854571579781, + "loss": 2.1359, + "step": 265490 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.0001555839207015763, + "loss": 2.1249, + "step": 265495 + }, + { + "epoch": 0.62, + "grad_norm": 2.015625, + "learning_rate": 0.000155582384226186, + "loss": 2.1105, + "step": 265500 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.0001555808477318078, + "loss": 2.2342, + "step": 265505 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015557931121844214, + "loss": 1.9963, + "step": 265510 + }, + { + "epoch": 0.62, + "grad_norm": 2.375, + "learning_rate": 0.00015557777468608956, + "loss": 2.2407, + "step": 265515 + }, + { + "epoch": 0.62, + "grad_norm": 2.0625, + "learning_rate": 0.00015557623813475065, + "loss": 2.1123, + "step": 265520 + }, + { + "epoch": 0.62, + "grad_norm": 2.34375, + "learning_rate": 0.00015557470156442587, + "loss": 2.0987, + "step": 265525 + }, + { + "epoch": 0.62, + "grad_norm": 2.53125, + "learning_rate": 0.00015557316497511576, + "loss": 2.0387, + "step": 265530 + }, + { + "epoch": 0.62, + "grad_norm": 2.328125, + "learning_rate": 0.00015557162836682083, + "loss": 2.0567, + "step": 265535 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.00015557009173954164, + "loss": 2.1424, + "step": 265540 + }, + { + "epoch": 0.62, + "grad_norm": 2.515625, + "learning_rate": 0.00015556855509327871, + "loss": 2.0384, + "step": 265545 + }, + { + "epoch": 0.62, + "grad_norm": 2.234375, + "learning_rate": 0.00015556701842803255, + "loss": 2.0286, + "step": 265550 + }, + { + "epoch": 0.62, + "grad_norm": 2.734375, + "learning_rate": 0.0001555654817438037, + "loss": 2.1088, + "step": 265555 + }, + { + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 0.00015556394504059266, + "loss": 2.0878, + "step": 265560 + }, + { + "epoch": 0.62, + "grad_norm": 2.078125, + "learning_rate": 0.00015556240831839996, + "loss": 1.9062, + "step": 265565 + }, + { + "epoch": 0.62, + "grad_norm": 2.90625, + "learning_rate": 0.00015556087157722614, + "loss": 2.0504, + "step": 265570 + }, + { + "epoch": 0.62, + "grad_norm": 2.125, + "learning_rate": 0.0001555593348170717, + "loss": 2.0732, + "step": 265575 + }, + { + "epoch": 0.62, + "grad_norm": 2.390625, + "learning_rate": 0.00015555779803793723, + "loss": 2.0122, + "step": 265580 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.00015555626123982318, + "loss": 2.1567, + "step": 265585 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.0001555547244227301, + "loss": 1.9127, + "step": 265590 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015555318758665853, + "loss": 1.973, + "step": 265595 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.000155551650731609, + "loss": 2.1141, + "step": 265600 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.00015555011385758199, + "loss": 2.0236, + "step": 265605 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.00015554857696457807, + "loss": 1.9788, + "step": 265610 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.00015554704005259776, + "loss": 2.0945, + "step": 265615 + }, + { + "epoch": 0.63, + "grad_norm": 1.8125, + "learning_rate": 0.0001555455031216415, + "loss": 1.9495, + "step": 265620 + }, + { + "epoch": 0.63, + "grad_norm": 2.765625, + "learning_rate": 0.00015554396617170998, + "loss": 2.1902, + "step": 265625 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015554242920280358, + "loss": 2.0252, + "step": 265630 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.0001555408922149229, + "loss": 2.0786, + "step": 265635 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015553935520806845, + "loss": 2.1914, + "step": 265640 + }, + { + "epoch": 0.63, + "grad_norm": 1.8828125, + "learning_rate": 0.0001555378181822407, + "loss": 2.0962, + "step": 265645 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.00015553628113744027, + "loss": 1.9242, + "step": 265650 + }, + { + "epoch": 0.63, + "grad_norm": 2.515625, + "learning_rate": 0.00015553474407366765, + "loss": 2.0303, + "step": 265655 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.00015553320699092333, + "loss": 2.1178, + "step": 265660 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.00015553166988920787, + "loss": 2.0247, + "step": 265665 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015553013276852179, + "loss": 2.0584, + "step": 265670 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015552859562886558, + "loss": 2.0294, + "step": 265675 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.00015552705847023983, + "loss": 2.1609, + "step": 265680 + }, + { + "epoch": 0.63, + "grad_norm": 1.859375, + "learning_rate": 0.00015552552129264498, + "loss": 2.1416, + "step": 265685 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015552398409608166, + "loss": 2.021, + "step": 265690 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015552244688055034, + "loss": 1.9335, + "step": 265695 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.0001555209096460515, + "loss": 1.8568, + "step": 265700 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015551937239258572, + "loss": 2.2151, + "step": 265705 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015551783512015354, + "loss": 2.1268, + "step": 265710 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015551629782875547, + "loss": 2.0022, + "step": 265715 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.00015551476051839198, + "loss": 2.0576, + "step": 265720 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.00015551322318906367, + "loss": 2.1611, + "step": 265725 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015551168584077103, + "loss": 1.9703, + "step": 265730 + }, + { + "epoch": 0.63, + "grad_norm": 1.8046875, + "learning_rate": 0.0001555101484735146, + "loss": 1.9715, + "step": 265735 + }, + { + "epoch": 0.63, + "grad_norm": 2.671875, + "learning_rate": 0.0001555086110872949, + "loss": 2.1111, + "step": 265740 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.00015550707368211243, + "loss": 2.0612, + "step": 265745 + }, + { + "epoch": 0.63, + "grad_norm": 1.8828125, + "learning_rate": 0.00015550553625796778, + "loss": 1.9891, + "step": 265750 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015550399881486136, + "loss": 2.1332, + "step": 265755 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015550246135279382, + "loss": 2.1905, + "step": 265760 + }, + { + "epoch": 0.63, + "grad_norm": 1.9765625, + "learning_rate": 0.00015550092387176561, + "loss": 2.0813, + "step": 265765 + }, + { + "epoch": 0.63, + "grad_norm": 2.546875, + "learning_rate": 0.0001554993863717773, + "loss": 1.9323, + "step": 265770 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.0001554978488528294, + "loss": 2.0392, + "step": 265775 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015549631131492238, + "loss": 2.0919, + "step": 265780 + }, + { + "epoch": 0.63, + "grad_norm": 2.515625, + "learning_rate": 0.00015549477375805687, + "loss": 2.0694, + "step": 265785 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.0001554932361822333, + "loss": 1.9532, + "step": 265790 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015549169858745224, + "loss": 2.1246, + "step": 265795 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.00015549016097371422, + "loss": 2.0056, + "step": 265800 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.00015548862334101976, + "loss": 2.254, + "step": 265805 + }, + { + "epoch": 0.63, + "grad_norm": 1.984375, + "learning_rate": 0.00015548708568936936, + "loss": 2.036, + "step": 265810 + }, + { + "epoch": 0.63, + "grad_norm": 2.453125, + "learning_rate": 0.00015548554801876358, + "loss": 2.0784, + "step": 265815 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015548401032920294, + "loss": 2.2649, + "step": 265820 + }, + { + "epoch": 0.63, + "grad_norm": 2.484375, + "learning_rate": 0.00015548247262068795, + "loss": 2.0898, + "step": 265825 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.00015548093489321912, + "loss": 2.1983, + "step": 265830 + }, + { + "epoch": 0.63, + "grad_norm": 2.578125, + "learning_rate": 0.00015547939714679704, + "loss": 2.2221, + "step": 265835 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015547785938142215, + "loss": 2.0495, + "step": 265840 + }, + { + "epoch": 0.63, + "grad_norm": 3.0, + "learning_rate": 0.00015547632159709502, + "loss": 1.9844, + "step": 265845 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015547478379381618, + "loss": 2.1751, + "step": 265850 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015547324597158614, + "loss": 2.0219, + "step": 265855 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015547170813040543, + "loss": 2.1154, + "step": 265860 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.0001554701702702746, + "loss": 1.8924, + "step": 265865 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015546863239119415, + "loss": 2.1128, + "step": 265870 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.0001554670944931646, + "loss": 2.0886, + "step": 265875 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.00015546555657618648, + "loss": 2.008, + "step": 265880 + }, + { + "epoch": 0.63, + "grad_norm": 1.78125, + "learning_rate": 0.00015546401864026036, + "loss": 1.8569, + "step": 265885 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.0001554624806853867, + "loss": 2.0624, + "step": 265890 + }, + { + "epoch": 0.63, + "grad_norm": 1.75, + "learning_rate": 0.00015546094271156604, + "loss": 2.0383, + "step": 265895 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.00015545940471879888, + "loss": 2.2237, + "step": 265900 + }, + { + "epoch": 0.63, + "grad_norm": 1.9375, + "learning_rate": 0.00015545786670708586, + "loss": 1.9059, + "step": 265905 + }, + { + "epoch": 0.63, + "grad_norm": 1.734375, + "learning_rate": 0.00015545632867642738, + "loss": 2.0101, + "step": 265910 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.000155454790626824, + "loss": 2.2454, + "step": 265915 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.0001554532525582763, + "loss": 2.0305, + "step": 265920 + }, + { + "epoch": 0.63, + "grad_norm": 1.984375, + "learning_rate": 0.00015545171447078475, + "loss": 2.0596, + "step": 265925 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.0001554501763643499, + "loss": 1.9099, + "step": 265930 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.00015544863823897223, + "loss": 2.0855, + "step": 265935 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.0001554471000946523, + "loss": 1.9791, + "step": 265940 + }, + { + "epoch": 0.63, + "grad_norm": 1.7421875, + "learning_rate": 0.00015544556193139065, + "loss": 2.0341, + "step": 265945 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.0001554440237491878, + "loss": 2.2762, + "step": 265950 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015544248554804426, + "loss": 2.024, + "step": 265955 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.00015544094732796056, + "loss": 2.2012, + "step": 265960 + }, + { + "epoch": 0.63, + "grad_norm": 2.640625, + "learning_rate": 0.0001554394090889372, + "loss": 2.1325, + "step": 265965 + }, + { + "epoch": 0.63, + "grad_norm": 1.6484375, + "learning_rate": 0.00015543787083097476, + "loss": 2.018, + "step": 265970 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015543633255407375, + "loss": 2.134, + "step": 265975 + }, + { + "epoch": 0.63, + "grad_norm": 1.828125, + "learning_rate": 0.00015543479425823468, + "loss": 1.93, + "step": 265980 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015543325594345806, + "loss": 1.9809, + "step": 265985 + }, + { + "epoch": 0.63, + "grad_norm": 1.8515625, + "learning_rate": 0.00015543171760974445, + "loss": 2.1443, + "step": 265990 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015543017925709432, + "loss": 2.017, + "step": 265995 + }, + { + "epoch": 0.63, + "grad_norm": 1.6328125, + "learning_rate": 0.0001554286408855083, + "loss": 1.9746, + "step": 266000 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.0001554271024949868, + "loss": 2.1543, + "step": 266005 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.00015542556408553045, + "loss": 2.2171, + "step": 266010 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015542402565713967, + "loss": 2.2225, + "step": 266015 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.00015542248720981503, + "loss": 2.0703, + "step": 266020 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015542094874355708, + "loss": 1.9816, + "step": 266025 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015541941025836638, + "loss": 2.2279, + "step": 266030 + }, + { + "epoch": 0.63, + "grad_norm": 1.9765625, + "learning_rate": 0.00015541787175424334, + "loss": 1.9519, + "step": 266035 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015541633323118856, + "loss": 2.0842, + "step": 266040 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015541479468920256, + "loss": 2.2003, + "step": 266045 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.0001554132561282859, + "loss": 2.1484, + "step": 266050 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.000155411717548439, + "loss": 2.284, + "step": 266055 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.00015541017894966252, + "loss": 2.0894, + "step": 266060 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.0001554086403319569, + "loss": 2.073, + "step": 266065 + }, + { + "epoch": 0.63, + "grad_norm": 1.875, + "learning_rate": 0.00015540710169532264, + "loss": 1.9074, + "step": 266070 + }, + { + "epoch": 0.63, + "grad_norm": 2.953125, + "learning_rate": 0.00015540556303976036, + "loss": 2.1652, + "step": 266075 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.00015540402436527048, + "loss": 2.1546, + "step": 266080 + }, + { + "epoch": 0.63, + "grad_norm": 2.53125, + "learning_rate": 0.00015540248567185363, + "loss": 2.0468, + "step": 266085 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015540094695951027, + "loss": 2.1181, + "step": 266090 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.00015539940822824092, + "loss": 2.0476, + "step": 266095 + }, + { + "epoch": 0.63, + "grad_norm": 2.5625, + "learning_rate": 0.00015539786947804616, + "loss": 2.1932, + "step": 266100 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.00015539633070892647, + "loss": 2.0952, + "step": 266105 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.0001553947919208824, + "loss": 1.8547, + "step": 266110 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.00015539325311391444, + "loss": 2.0417, + "step": 266115 + }, + { + "epoch": 0.63, + "grad_norm": 1.7890625, + "learning_rate": 0.00015539171428802318, + "loss": 2.0452, + "step": 266120 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.00015539017544320907, + "loss": 2.0986, + "step": 266125 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.00015538863657947268, + "loss": 2.2103, + "step": 266130 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015538709769681453, + "loss": 1.9946, + "step": 266135 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.00015538555879523512, + "loss": 2.0151, + "step": 266140 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.00015538401987473502, + "loss": 2.0674, + "step": 266145 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.00015538248093531476, + "loss": 2.101, + "step": 266150 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.00015538094197697482, + "loss": 2.0668, + "step": 266155 + }, + { + "epoch": 0.63, + "grad_norm": 2.71875, + "learning_rate": 0.00015537940299971572, + "loss": 2.0881, + "step": 266160 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.00015537786400353804, + "loss": 2.1787, + "step": 266165 + }, + { + "epoch": 0.63, + "grad_norm": 1.7578125, + "learning_rate": 0.00015537632498844228, + "loss": 2.1062, + "step": 266170 + }, + { + "epoch": 0.63, + "grad_norm": 1.9921875, + "learning_rate": 0.00015537478595442896, + "loss": 2.0485, + "step": 266175 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015537324690149856, + "loss": 2.0668, + "step": 266180 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.0001553717078296517, + "loss": 2.1262, + "step": 266185 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015537016873888888, + "loss": 2.0195, + "step": 266190 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.0001553686296292106, + "loss": 2.0503, + "step": 266195 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.00015536709050061737, + "loss": 2.0202, + "step": 266200 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015536555135310973, + "loss": 2.1196, + "step": 266205 + }, + { + "epoch": 0.63, + "grad_norm": 1.765625, + "learning_rate": 0.00015536401218668822, + "loss": 2.0965, + "step": 266210 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015536247300135337, + "loss": 2.1177, + "step": 266215 + }, + { + "epoch": 0.63, + "grad_norm": 1.75, + "learning_rate": 0.00015536093379710567, + "loss": 2.106, + "step": 266220 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015535939457394572, + "loss": 2.2068, + "step": 266225 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.00015535785533187397, + "loss": 2.1099, + "step": 266230 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.00015535631607089097, + "loss": 2.0373, + "step": 266235 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015535477679099727, + "loss": 2.1818, + "step": 266240 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015535323749219334, + "loss": 2.0751, + "step": 266245 + }, + { + "epoch": 0.63, + "grad_norm": 1.9609375, + "learning_rate": 0.00015535169817447976, + "loss": 2.0757, + "step": 266250 + }, + { + "epoch": 0.63, + "grad_norm": 1.953125, + "learning_rate": 0.00015535015883785702, + "loss": 2.1544, + "step": 266255 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.00015534861948232566, + "loss": 2.0359, + "step": 266260 + }, + { + "epoch": 0.63, + "grad_norm": 2.53125, + "learning_rate": 0.00015534708010788624, + "loss": 1.9575, + "step": 266265 + }, + { + "epoch": 0.63, + "grad_norm": 1.8515625, + "learning_rate": 0.0001553455407145392, + "loss": 2.0773, + "step": 266270 + }, + { + "epoch": 0.63, + "grad_norm": 1.984375, + "learning_rate": 0.00015534400130228518, + "loss": 2.024, + "step": 266275 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015534246187112462, + "loss": 2.1759, + "step": 266280 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015534092242105806, + "loss": 2.0517, + "step": 266285 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015533938295208606, + "loss": 2.1092, + "step": 266290 + }, + { + "epoch": 0.63, + "grad_norm": 2.421875, + "learning_rate": 0.0001553378434642091, + "loss": 2.1611, + "step": 266295 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.00015533630395742773, + "loss": 2.2351, + "step": 266300 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015533476443174252, + "loss": 1.9185, + "step": 266305 + }, + { + "epoch": 0.63, + "grad_norm": 1.8984375, + "learning_rate": 0.0001553332248871539, + "loss": 2.094, + "step": 266310 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015533168532366244, + "loss": 2.0045, + "step": 266315 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.0001553301457412687, + "loss": 2.0756, + "step": 266320 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.0001553286061399732, + "loss": 1.9931, + "step": 266325 + }, + { + "epoch": 0.63, + "grad_norm": 1.9296875, + "learning_rate": 0.0001553270665197764, + "loss": 2.0062, + "step": 266330 + }, + { + "epoch": 0.63, + "grad_norm": 3.265625, + "learning_rate": 0.0001553255268806789, + "loss": 2.0837, + "step": 266335 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015532398722268117, + "loss": 1.9706, + "step": 266340 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015532244754578377, + "loss": 2.1231, + "step": 266345 + }, + { + "epoch": 0.63, + "grad_norm": 2.546875, + "learning_rate": 0.00015532090784998724, + "loss": 2.1974, + "step": 266350 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015531936813529206, + "loss": 2.0444, + "step": 266355 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.0001553178284016988, + "loss": 2.1774, + "step": 266360 + }, + { + "epoch": 0.63, + "grad_norm": 2.734375, + "learning_rate": 0.00015531628864920799, + "loss": 2.0587, + "step": 266365 + }, + { + "epoch": 0.63, + "grad_norm": 1.984375, + "learning_rate": 0.00015531474887782006, + "loss": 2.1042, + "step": 266370 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015531320908753566, + "loss": 1.9306, + "step": 266375 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015531166927835525, + "loss": 2.0854, + "step": 266380 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.0001553101294502794, + "loss": 2.1405, + "step": 266385 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.00015530858960330858, + "loss": 1.9991, + "step": 266390 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.0001553070497374433, + "loss": 1.9564, + "step": 266395 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.0001553055098526842, + "loss": 2.0775, + "step": 266400 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.0001553039699490317, + "loss": 2.124, + "step": 266405 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015530243002648638, + "loss": 2.0513, + "step": 266410 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015530089008504872, + "loss": 2.0971, + "step": 266415 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.00015529935012471933, + "loss": 2.0337, + "step": 266420 + }, + { + "epoch": 0.63, + "grad_norm": 1.984375, + "learning_rate": 0.00015529781014549863, + "loss": 2.0882, + "step": 266425 + }, + { + "epoch": 0.63, + "grad_norm": 2.734375, + "learning_rate": 0.00015529627014738718, + "loss": 2.153, + "step": 266430 + }, + { + "epoch": 0.63, + "grad_norm": 2.421875, + "learning_rate": 0.00015529473013038556, + "loss": 1.955, + "step": 266435 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015529319009449424, + "loss": 2.1454, + "step": 266440 + }, + { + "epoch": 0.63, + "grad_norm": 2.984375, + "learning_rate": 0.0001552916500397138, + "loss": 2.2357, + "step": 266445 + }, + { + "epoch": 0.63, + "grad_norm": 1.8671875, + "learning_rate": 0.00015529010996604464, + "loss": 2.0129, + "step": 266450 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015528856987348744, + "loss": 2.2164, + "step": 266455 + }, + { + "epoch": 0.63, + "grad_norm": 2.8125, + "learning_rate": 0.00015528702976204265, + "loss": 2.1114, + "step": 266460 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015528548963171082, + "loss": 2.0279, + "step": 266465 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015528394948249248, + "loss": 2.0146, + "step": 266470 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.0001552824093143881, + "loss": 2.1289, + "step": 266475 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.00015528086912739824, + "loss": 2.1142, + "step": 266480 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.00015527932892152348, + "loss": 2.0794, + "step": 266485 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015527778869676425, + "loss": 1.9861, + "step": 266490 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015527624845312118, + "loss": 2.1371, + "step": 266495 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.0001552747081905947, + "loss": 2.1231, + "step": 266500 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015527316790918535, + "loss": 2.1431, + "step": 266505 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015527162760889374, + "loss": 2.045, + "step": 266510 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015527008728972032, + "loss": 2.1693, + "step": 266515 + }, + { + "epoch": 0.63, + "grad_norm": 1.9609375, + "learning_rate": 0.00015526854695166564, + "loss": 2.1734, + "step": 266520 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015526700659473022, + "loss": 2.2152, + "step": 266525 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015526546621891457, + "loss": 2.2389, + "step": 266530 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015526392582421926, + "loss": 2.0778, + "step": 266535 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015526238541064478, + "loss": 2.0222, + "step": 266540 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015526084497819167, + "loss": 2.0121, + "step": 266545 + }, + { + "epoch": 0.63, + "grad_norm": 1.9609375, + "learning_rate": 0.00015525930452686045, + "loss": 1.9206, + "step": 266550 + }, + { + "epoch": 0.63, + "grad_norm": 2.453125, + "learning_rate": 0.00015525776405665161, + "loss": 2.0857, + "step": 266555 + }, + { + "epoch": 0.63, + "grad_norm": 1.875, + "learning_rate": 0.00015525622356756577, + "loss": 1.7056, + "step": 266560 + }, + { + "epoch": 0.63, + "grad_norm": 2.421875, + "learning_rate": 0.00015525468305960341, + "loss": 2.1396, + "step": 266565 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015525314253276502, + "loss": 2.1686, + "step": 266570 + }, + { + "epoch": 0.63, + "grad_norm": 2.59375, + "learning_rate": 0.00015525160198705119, + "loss": 2.0799, + "step": 266575 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015525006142246234, + "loss": 2.0295, + "step": 266580 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.0001552485208389991, + "loss": 2.3768, + "step": 266585 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015524698023666197, + "loss": 2.0911, + "step": 266590 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.00015524543961545147, + "loss": 2.1104, + "step": 266595 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015524389897536814, + "loss": 2.1019, + "step": 266600 + }, + { + "epoch": 0.63, + "grad_norm": 2.5, + "learning_rate": 0.00015524235831641247, + "loss": 2.0259, + "step": 266605 + }, + { + "epoch": 0.63, + "grad_norm": 2.421875, + "learning_rate": 0.00015524081763858502, + "loss": 2.2566, + "step": 266610 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.0001552392769418863, + "loss": 2.2661, + "step": 266615 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.00015523773622631683, + "loss": 2.1267, + "step": 266620 + }, + { + "epoch": 0.63, + "grad_norm": 1.9765625, + "learning_rate": 0.0001552361954918772, + "loss": 2.1173, + "step": 266625 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015523465473856783, + "loss": 1.9003, + "step": 266630 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.00015523311396638931, + "loss": 1.9834, + "step": 266635 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.00015523157317534215, + "loss": 2.0149, + "step": 266640 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.0001552300323654269, + "loss": 2.0191, + "step": 266645 + }, + { + "epoch": 0.63, + "grad_norm": 1.9453125, + "learning_rate": 0.00015522849153664408, + "loss": 2.0778, + "step": 266650 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.00015522695068899417, + "loss": 2.1992, + "step": 266655 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.00015522540982247776, + "loss": 2.038, + "step": 266660 + }, + { + "epoch": 0.63, + "grad_norm": 2.5625, + "learning_rate": 0.00015522386893709533, + "loss": 2.0672, + "step": 266665 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.00015522232803284744, + "loss": 2.1651, + "step": 266670 + }, + { + "epoch": 0.63, + "grad_norm": 2.484375, + "learning_rate": 0.0001552207871097346, + "loss": 2.0446, + "step": 266675 + }, + { + "epoch": 0.63, + "grad_norm": 1.953125, + "learning_rate": 0.00015521924616775733, + "loss": 1.987, + "step": 266680 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.00015521770520691617, + "loss": 1.9869, + "step": 266685 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015521616422721162, + "loss": 2.0398, + "step": 266690 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.00015521462322864424, + "loss": 2.1514, + "step": 266695 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015521308221121455, + "loss": 2.0224, + "step": 266700 + }, + { + "epoch": 0.63, + "grad_norm": 2.765625, + "learning_rate": 0.00015521154117492309, + "loss": 2.1976, + "step": 266705 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.00015521000011977032, + "loss": 2.0453, + "step": 266710 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015520845904575686, + "loss": 1.9601, + "step": 266715 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015520691795288313, + "loss": 2.0665, + "step": 266720 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015520537684114979, + "loss": 2.278, + "step": 266725 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015520383571055723, + "loss": 1.9985, + "step": 266730 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015520229456110607, + "loss": 2.0489, + "step": 266735 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015520075339279678, + "loss": 2.0779, + "step": 266740 + }, + { + "epoch": 0.63, + "grad_norm": 2.5625, + "learning_rate": 0.00015519921220562993, + "loss": 2.1244, + "step": 266745 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015519767099960604, + "loss": 2.1285, + "step": 266750 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.00015519612977472563, + "loss": 2.0347, + "step": 266755 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015519458853098918, + "loss": 2.2007, + "step": 266760 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.0001551930472683973, + "loss": 2.0002, + "step": 266765 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015519150598695042, + "loss": 1.9314, + "step": 266770 + }, + { + "epoch": 0.63, + "grad_norm": 2.734375, + "learning_rate": 0.00015518996468664916, + "loss": 2.1325, + "step": 266775 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.000155188423367494, + "loss": 2.1032, + "step": 266780 + }, + { + "epoch": 0.63, + "grad_norm": 1.75, + "learning_rate": 0.00015518688202948547, + "loss": 2.1253, + "step": 266785 + }, + { + "epoch": 0.63, + "grad_norm": 2.53125, + "learning_rate": 0.0001551853406726241, + "loss": 2.2122, + "step": 266790 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.00015518379929691046, + "loss": 2.0879, + "step": 266795 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.00015518225790234498, + "loss": 2.261, + "step": 266800 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015518071648892823, + "loss": 1.8752, + "step": 266805 + }, + { + "epoch": 0.63, + "grad_norm": 1.96875, + "learning_rate": 0.00015517917505666076, + "loss": 2.0862, + "step": 266810 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.00015517763360554314, + "loss": 2.1564, + "step": 266815 + }, + { + "epoch": 0.63, + "grad_norm": 1.8671875, + "learning_rate": 0.0001551760921355758, + "loss": 1.8731, + "step": 266820 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.00015517455064675928, + "loss": 2.0482, + "step": 266825 + }, + { + "epoch": 0.63, + "grad_norm": 2.5625, + "learning_rate": 0.00015517300913909417, + "loss": 1.8223, + "step": 266830 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.00015517146761258094, + "loss": 2.1667, + "step": 266835 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015516992606722015, + "loss": 1.9599, + "step": 266840 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015516838450301226, + "loss": 2.0264, + "step": 266845 + }, + { + "epoch": 0.63, + "grad_norm": 2.484375, + "learning_rate": 0.00015516684291995788, + "loss": 1.9462, + "step": 266850 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015516530131805752, + "loss": 2.1646, + "step": 266855 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015516375969731172, + "loss": 2.232, + "step": 266860 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015516221805772094, + "loss": 2.0252, + "step": 266865 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015516067639928576, + "loss": 1.9518, + "step": 266870 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.0001551591347220067, + "loss": 2.0338, + "step": 266875 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015515759302588426, + "loss": 2.2113, + "step": 266880 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015515605131091896, + "loss": 2.0713, + "step": 266885 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015515450957711142, + "loss": 2.2526, + "step": 266890 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015515296782446205, + "loss": 2.0671, + "step": 266895 + }, + { + "epoch": 0.63, + "grad_norm": 2.78125, + "learning_rate": 0.00015515142605297143, + "loss": 2.0439, + "step": 266900 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.0001551498842626401, + "loss": 2.0658, + "step": 266905 + }, + { + "epoch": 0.63, + "grad_norm": 1.9375, + "learning_rate": 0.00015514834245346858, + "loss": 1.849, + "step": 266910 + }, + { + "epoch": 0.63, + "grad_norm": 2.59375, + "learning_rate": 0.00015514680062545736, + "loss": 2.1006, + "step": 266915 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.000155145258778607, + "loss": 2.1893, + "step": 266920 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.000155143716912918, + "loss": 1.9499, + "step": 266925 + }, + { + "epoch": 0.63, + "grad_norm": 1.9765625, + "learning_rate": 0.00015514217502839094, + "loss": 1.9826, + "step": 266930 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.0001551406331250263, + "loss": 2.1246, + "step": 266935 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.0001551390912028246, + "loss": 2.1224, + "step": 266940 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015513754926178643, + "loss": 2.0854, + "step": 266945 + }, + { + "epoch": 0.63, + "grad_norm": 2.453125, + "learning_rate": 0.00015513600730191228, + "loss": 2.0964, + "step": 266950 + }, + { + "epoch": 0.63, + "grad_norm": 2.96875, + "learning_rate": 0.00015513446532320263, + "loss": 2.1148, + "step": 266955 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015513292332565803, + "loss": 2.0073, + "step": 266960 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015513138130927905, + "loss": 2.136, + "step": 266965 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.0001551298392740662, + "loss": 1.9084, + "step": 266970 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015512829722001998, + "loss": 2.2485, + "step": 266975 + }, + { + "epoch": 0.63, + "grad_norm": 1.8671875, + "learning_rate": 0.00015512675514714092, + "loss": 2.0286, + "step": 266980 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015512521305542957, + "loss": 2.1278, + "step": 266985 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.0001551236709448865, + "loss": 2.0943, + "step": 266990 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.0001551221288155121, + "loss": 2.1808, + "step": 266995 + }, + { + "epoch": 0.63, + "grad_norm": 1.953125, + "learning_rate": 0.00015512058666730707, + "loss": 2.1518, + "step": 267000 + }, + { + "epoch": 0.63, + "grad_norm": 1.9453125, + "learning_rate": 0.0001551190445002718, + "loss": 2.2071, + "step": 267005 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015511750231440684, + "loss": 2.1662, + "step": 267010 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.00015511596010971278, + "loss": 2.2514, + "step": 267015 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015511441788619007, + "loss": 2.0044, + "step": 267020 + }, + { + "epoch": 0.63, + "grad_norm": 1.8046875, + "learning_rate": 0.0001551128756438393, + "loss": 2.1576, + "step": 267025 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.00015511133338266098, + "loss": 2.1793, + "step": 267030 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015510979110265564, + "loss": 2.2136, + "step": 267035 + }, + { + "epoch": 0.63, + "grad_norm": 1.7890625, + "learning_rate": 0.00015510824880382378, + "loss": 2.0353, + "step": 267040 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.00015510670648616594, + "loss": 2.2049, + "step": 267045 + }, + { + "epoch": 0.63, + "grad_norm": 1.9375, + "learning_rate": 0.00015510516414968267, + "loss": 2.015, + "step": 267050 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.00015510362179437442, + "loss": 1.9761, + "step": 267055 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015510207942024185, + "loss": 1.9911, + "step": 267060 + }, + { + "epoch": 0.63, + "grad_norm": 1.9140625, + "learning_rate": 0.00015510053702728533, + "loss": 1.9312, + "step": 267065 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015509899461550551, + "loss": 2.0282, + "step": 267070 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.0001550974521849029, + "loss": 2.0787, + "step": 267075 + }, + { + "epoch": 0.63, + "grad_norm": 1.8203125, + "learning_rate": 0.00015509590973547795, + "loss": 2.1191, + "step": 267080 + }, + { + "epoch": 0.63, + "grad_norm": 1.8046875, + "learning_rate": 0.00015509436726723126, + "loss": 1.9622, + "step": 267085 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.00015509282478016334, + "loss": 2.0395, + "step": 267090 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015509128227427472, + "loss": 2.0949, + "step": 267095 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.0001550897397495659, + "loss": 2.11, + "step": 267100 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.00015508819720603745, + "loss": 2.202, + "step": 267105 + }, + { + "epoch": 0.63, + "grad_norm": 2.609375, + "learning_rate": 0.00015508665464368984, + "loss": 2.0583, + "step": 267110 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.00015508511206252369, + "loss": 2.0392, + "step": 267115 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.00015508356946253939, + "loss": 2.0485, + "step": 267120 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.0001550820268437376, + "loss": 1.9904, + "step": 267125 + }, + { + "epoch": 0.63, + "grad_norm": 2.703125, + "learning_rate": 0.00015508048420611874, + "loss": 1.9379, + "step": 267130 + }, + { + "epoch": 0.63, + "grad_norm": 2.75, + "learning_rate": 0.00015507894154968344, + "loss": 2.1181, + "step": 267135 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.00015507739887443215, + "loss": 2.0879, + "step": 267140 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.0001550758561803654, + "loss": 2.1851, + "step": 267145 + }, + { + "epoch": 0.63, + "grad_norm": 1.8984375, + "learning_rate": 0.00015507431346748377, + "loss": 1.8843, + "step": 267150 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015507277073578774, + "loss": 2.0375, + "step": 267155 + }, + { + "epoch": 0.63, + "grad_norm": 1.90625, + "learning_rate": 0.00015507122798527788, + "loss": 1.9061, + "step": 267160 + }, + { + "epoch": 0.63, + "grad_norm": 2.578125, + "learning_rate": 0.00015506968521595464, + "loss": 1.9907, + "step": 267165 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015506814242781863, + "loss": 2.1378, + "step": 267170 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015506659962087033, + "loss": 2.0401, + "step": 267175 + }, + { + "epoch": 0.63, + "grad_norm": 2.640625, + "learning_rate": 0.0001550650567951103, + "loss": 2.0931, + "step": 267180 + }, + { + "epoch": 0.63, + "grad_norm": 1.78125, + "learning_rate": 0.00015506351395053902, + "loss": 2.144, + "step": 267185 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.00015506197108715707, + "loss": 2.2323, + "step": 267190 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015506042820496494, + "loss": 2.1772, + "step": 267195 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.0001550588853039632, + "loss": 2.2371, + "step": 267200 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.0001550573423841523, + "loss": 2.2032, + "step": 267205 + }, + { + "epoch": 0.63, + "grad_norm": 1.9609375, + "learning_rate": 0.00015505579944553283, + "loss": 2.1207, + "step": 267210 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015505425648810531, + "loss": 2.163, + "step": 267215 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.00015505271351187025, + "loss": 2.0042, + "step": 267220 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015505117051682819, + "loss": 2.0464, + "step": 267225 + }, + { + "epoch": 0.63, + "grad_norm": 1.8671875, + "learning_rate": 0.00015504962750297966, + "loss": 2.0423, + "step": 267230 + }, + { + "epoch": 0.63, + "grad_norm": 1.9453125, + "learning_rate": 0.00015504808447032515, + "loss": 2.0551, + "step": 267235 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.00015504654141886525, + "loss": 2.0287, + "step": 267240 + }, + { + "epoch": 0.63, + "grad_norm": 3.359375, + "learning_rate": 0.00015504499834860042, + "loss": 1.9449, + "step": 267245 + }, + { + "epoch": 0.63, + "grad_norm": 1.734375, + "learning_rate": 0.00015504345525953124, + "loss": 2.1918, + "step": 267250 + }, + { + "epoch": 0.63, + "grad_norm": 1.8984375, + "learning_rate": 0.0001550419121516582, + "loss": 2.015, + "step": 267255 + }, + { + "epoch": 0.63, + "grad_norm": 1.8671875, + "learning_rate": 0.00015504036902498186, + "loss": 2.1407, + "step": 267260 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015503882587950275, + "loss": 1.8961, + "step": 267265 + }, + { + "epoch": 0.63, + "grad_norm": 2.609375, + "learning_rate": 0.00015503728271522136, + "loss": 2.0707, + "step": 267270 + }, + { + "epoch": 0.63, + "grad_norm": 1.8046875, + "learning_rate": 0.00015503573953213823, + "loss": 2.0171, + "step": 267275 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.00015503419633025386, + "loss": 2.1898, + "step": 267280 + }, + { + "epoch": 0.63, + "grad_norm": 2.703125, + "learning_rate": 0.00015503265310956888, + "loss": 2.2417, + "step": 267285 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.0001550311098700837, + "loss": 2.0116, + "step": 267290 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015502956661179893, + "loss": 2.1848, + "step": 267295 + }, + { + "epoch": 0.63, + "grad_norm": 2.625, + "learning_rate": 0.00015502802333471505, + "loss": 2.1752, + "step": 267300 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015502648003883258, + "loss": 2.1347, + "step": 267305 + }, + { + "epoch": 0.63, + "grad_norm": 2.453125, + "learning_rate": 0.00015502493672415207, + "loss": 2.0626, + "step": 267310 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.00015502339339067405, + "loss": 1.886, + "step": 267315 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.00015502185003839905, + "loss": 2.0342, + "step": 267320 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.0001550203066673276, + "loss": 1.9898, + "step": 267325 + }, + { + "epoch": 0.63, + "grad_norm": 2.734375, + "learning_rate": 0.00015501876327746018, + "loss": 2.1192, + "step": 267330 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.00015501721986879735, + "loss": 1.9925, + "step": 267335 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015501567644133967, + "loss": 2.0255, + "step": 267340 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015501413299508766, + "loss": 1.9549, + "step": 267345 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015501258953004175, + "loss": 2.0823, + "step": 267350 + }, + { + "epoch": 0.63, + "grad_norm": 1.9765625, + "learning_rate": 0.00015501104604620261, + "loss": 2.0474, + "step": 267355 + }, + { + "epoch": 0.63, + "grad_norm": 1.796875, + "learning_rate": 0.00015500950254357065, + "loss": 2.2106, + "step": 267360 + }, + { + "epoch": 0.63, + "grad_norm": 1.8828125, + "learning_rate": 0.00015500795902214647, + "loss": 2.0464, + "step": 267365 + }, + { + "epoch": 0.63, + "grad_norm": 1.8203125, + "learning_rate": 0.00015500641548193058, + "loss": 2.0644, + "step": 267370 + }, + { + "epoch": 0.63, + "grad_norm": 2.484375, + "learning_rate": 0.00015500487192292347, + "loss": 2.1312, + "step": 267375 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015500332834512574, + "loss": 1.8464, + "step": 267380 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.00015500178474853784, + "loss": 2.0935, + "step": 267385 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.00015500024113316037, + "loss": 2.0782, + "step": 267390 + }, + { + "epoch": 0.63, + "grad_norm": 2.59375, + "learning_rate": 0.00015499869749899378, + "loss": 1.9273, + "step": 267395 + }, + { + "epoch": 0.63, + "grad_norm": 1.6796875, + "learning_rate": 0.00015499715384603867, + "loss": 1.8939, + "step": 267400 + }, + { + "epoch": 0.63, + "grad_norm": 1.9921875, + "learning_rate": 0.00015499561017429553, + "loss": 2.132, + "step": 267405 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015499406648376488, + "loss": 1.8827, + "step": 267410 + }, + { + "epoch": 0.63, + "grad_norm": 1.8984375, + "learning_rate": 0.00015499252277444725, + "loss": 2.0876, + "step": 267415 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.0001549909790463432, + "loss": 1.8617, + "step": 267420 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.0001549894352994532, + "loss": 1.964, + "step": 267425 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015498789153377786, + "loss": 1.9793, + "step": 267430 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015498634774931765, + "loss": 2.0044, + "step": 267435 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.00015498480394607306, + "loss": 2.074, + "step": 267440 + }, + { + "epoch": 0.63, + "grad_norm": 2.796875, + "learning_rate": 0.0001549832601240447, + "loss": 2.0429, + "step": 267445 + }, + { + "epoch": 0.63, + "grad_norm": 2.421875, + "learning_rate": 0.00015498171628323306, + "loss": 2.0344, + "step": 267450 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015498017242363864, + "loss": 2.144, + "step": 267455 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015497862854526204, + "loss": 2.0258, + "step": 267460 + }, + { + "epoch": 0.63, + "grad_norm": 1.78125, + "learning_rate": 0.0001549770846481037, + "loss": 2.1857, + "step": 267465 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015497554073216424, + "loss": 2.0, + "step": 267470 + }, + { + "epoch": 0.63, + "grad_norm": 1.8984375, + "learning_rate": 0.0001549739967974441, + "loss": 1.8831, + "step": 267475 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.0001549724528439439, + "loss": 2.0867, + "step": 267480 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015497090887166403, + "loss": 1.987, + "step": 267485 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.00015496936488060517, + "loss": 2.1312, + "step": 267490 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015496782087076773, + "loss": 2.0237, + "step": 267495 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.0001549662768421523, + "loss": 2.0567, + "step": 267500 + }, + { + "epoch": 0.63, + "grad_norm": 1.859375, + "learning_rate": 0.00015496473279475942, + "loss": 1.9859, + "step": 267505 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.00015496318872858957, + "loss": 2.125, + "step": 267510 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.0001549616446436433, + "loss": 2.3095, + "step": 267515 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.00015496010053992108, + "loss": 1.9154, + "step": 267520 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015495855641742357, + "loss": 1.9998, + "step": 267525 + }, + { + "epoch": 0.63, + "grad_norm": 2.6875, + "learning_rate": 0.0001549570122761512, + "loss": 2.0633, + "step": 267530 + }, + { + "epoch": 0.63, + "grad_norm": 1.8671875, + "learning_rate": 0.0001549554681161045, + "loss": 2.1786, + "step": 267535 + }, + { + "epoch": 0.63, + "grad_norm": 1.8515625, + "learning_rate": 0.00015495392393728404, + "loss": 1.9707, + "step": 267540 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.00015495237973969035, + "loss": 1.8994, + "step": 267545 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015495083552332387, + "loss": 1.834, + "step": 267550 + }, + { + "epoch": 0.63, + "grad_norm": 1.859375, + "learning_rate": 0.0001549492912881852, + "loss": 2.1657, + "step": 267555 + }, + { + "epoch": 0.63, + "grad_norm": 1.90625, + "learning_rate": 0.00015494774703427489, + "loss": 1.9304, + "step": 267560 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.0001549462027615934, + "loss": 2.0692, + "step": 267565 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015494465847014127, + "loss": 2.0167, + "step": 267570 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.0001549431141599191, + "loss": 2.0509, + "step": 267575 + }, + { + "epoch": 0.63, + "grad_norm": 1.9140625, + "learning_rate": 0.00015494156983092734, + "loss": 2.4049, + "step": 267580 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.00015494002548316655, + "loss": 1.9811, + "step": 267585 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015493848111663725, + "loss": 2.1182, + "step": 267590 + }, + { + "epoch": 0.63, + "grad_norm": 1.953125, + "learning_rate": 0.00015493693673133998, + "loss": 2.0855, + "step": 267595 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015493539232727522, + "loss": 1.9933, + "step": 267600 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015493384790444357, + "loss": 2.1248, + "step": 267605 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.0001549323034628455, + "loss": 1.9958, + "step": 267610 + }, + { + "epoch": 0.63, + "grad_norm": 1.984375, + "learning_rate": 0.00015493075900248156, + "loss": 2.15, + "step": 267615 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015492921452335228, + "loss": 2.1674, + "step": 267620 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.0001549276700254582, + "loss": 2.1261, + "step": 267625 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.0001549261255087998, + "loss": 2.16, + "step": 267630 + }, + { + "epoch": 0.63, + "grad_norm": 2.609375, + "learning_rate": 0.00015492458097337767, + "loss": 1.7709, + "step": 267635 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.0001549230364191923, + "loss": 2.0285, + "step": 267640 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015492149184624424, + "loss": 2.1381, + "step": 267645 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015491994725453395, + "loss": 1.9521, + "step": 267650 + }, + { + "epoch": 0.63, + "grad_norm": 4.03125, + "learning_rate": 0.00015491840264406204, + "loss": 2.0121, + "step": 267655 + }, + { + "epoch": 0.63, + "grad_norm": 2.515625, + "learning_rate": 0.00015491685801482903, + "loss": 2.0918, + "step": 267660 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.0001549153133668354, + "loss": 2.0589, + "step": 267665 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.0001549137687000817, + "loss": 2.0839, + "step": 267670 + }, + { + "epoch": 0.63, + "grad_norm": 1.90625, + "learning_rate": 0.00015491222401456847, + "loss": 2.1159, + "step": 267675 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.0001549106793102962, + "loss": 1.9815, + "step": 267680 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.0001549091345872655, + "loss": 1.9188, + "step": 267685 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015490758984547682, + "loss": 1.9267, + "step": 267690 + }, + { + "epoch": 0.63, + "grad_norm": 1.8359375, + "learning_rate": 0.00015490604508493073, + "loss": 2.0899, + "step": 267695 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015490450030562772, + "loss": 2.1883, + "step": 267700 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015490295550756833, + "loss": 1.9772, + "step": 267705 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015490141069075307, + "loss": 2.0111, + "step": 267710 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015489986585518254, + "loss": 2.0893, + "step": 267715 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015489832100085718, + "loss": 1.9326, + "step": 267720 + }, + { + "epoch": 0.63, + "grad_norm": 2.53125, + "learning_rate": 0.0001548967761277776, + "loss": 2.0894, + "step": 267725 + }, + { + "epoch": 0.63, + "grad_norm": 1.953125, + "learning_rate": 0.00015489523123594427, + "loss": 1.9942, + "step": 267730 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.0001548936863253577, + "loss": 2.0115, + "step": 267735 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.0001548921413960185, + "loss": 2.1919, + "step": 267740 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.00015489059644792713, + "loss": 2.1046, + "step": 267745 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.0001548890514810841, + "loss": 2.1082, + "step": 267750 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015488750649549004, + "loss": 2.0771, + "step": 267755 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.0001548859614911454, + "loss": 2.0189, + "step": 267760 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.0001548844164680507, + "loss": 2.1933, + "step": 267765 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.0001548828714262065, + "loss": 2.102, + "step": 267770 + }, + { + "epoch": 0.63, + "grad_norm": 1.8203125, + "learning_rate": 0.0001548813263656133, + "loss": 1.8438, + "step": 267775 + }, + { + "epoch": 0.63, + "grad_norm": 1.734375, + "learning_rate": 0.00015487978128627162, + "loss": 2.0867, + "step": 267780 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015487823618818208, + "loss": 2.0427, + "step": 267785 + }, + { + "epoch": 0.63, + "grad_norm": 1.9609375, + "learning_rate": 0.0001548766910713451, + "loss": 2.0251, + "step": 267790 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.00015487514593576123, + "loss": 2.2725, + "step": 267795 + }, + { + "epoch": 0.63, + "grad_norm": 2.625, + "learning_rate": 0.00015487360078143104, + "loss": 2.214, + "step": 267800 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015487205560835502, + "loss": 2.1203, + "step": 267805 + }, + { + "epoch": 0.63, + "grad_norm": 1.9375, + "learning_rate": 0.00015487051041653373, + "loss": 2.0737, + "step": 267810 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.00015486896520596765, + "loss": 2.1658, + "step": 267815 + }, + { + "epoch": 0.63, + "grad_norm": 1.8984375, + "learning_rate": 0.00015486741997665734, + "loss": 1.9633, + "step": 267820 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015486587472860334, + "loss": 2.1853, + "step": 267825 + }, + { + "epoch": 0.63, + "grad_norm": 1.9296875, + "learning_rate": 0.00015486432946180617, + "loss": 2.2042, + "step": 267830 + }, + { + "epoch": 0.63, + "grad_norm": 1.984375, + "learning_rate": 0.00015486278417626634, + "loss": 2.0096, + "step": 267835 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015486123887198437, + "loss": 2.0081, + "step": 267840 + }, + { + "epoch": 0.63, + "grad_norm": 1.859375, + "learning_rate": 0.00015485969354896083, + "loss": 2.0988, + "step": 267845 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.0001548581482071962, + "loss": 2.1328, + "step": 267850 + }, + { + "epoch": 0.63, + "grad_norm": 2.8125, + "learning_rate": 0.0001548566028466911, + "loss": 2.1982, + "step": 267855 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.00015485505746744592, + "loss": 2.1436, + "step": 267860 + }, + { + "epoch": 0.63, + "grad_norm": 1.78125, + "learning_rate": 0.00015485351206946126, + "loss": 2.0586, + "step": 267865 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.00015485196665273766, + "loss": 1.8982, + "step": 267870 + }, + { + "epoch": 0.63, + "grad_norm": 5.40625, + "learning_rate": 0.00015485042121727563, + "loss": 2.1878, + "step": 267875 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015484887576307568, + "loss": 2.1574, + "step": 267880 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.0001548473302901384, + "loss": 2.1986, + "step": 267885 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015484578479846425, + "loss": 2.1135, + "step": 267890 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.0001548442392880538, + "loss": 2.0288, + "step": 267895 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.00015484269375890756, + "loss": 2.1405, + "step": 267900 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015484114821102607, + "loss": 2.0909, + "step": 267905 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015483960264440984, + "loss": 2.2247, + "step": 267910 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015483805705905943, + "loss": 2.1476, + "step": 267915 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015483651145497532, + "loss": 1.9922, + "step": 267920 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.00015483496583215805, + "loss": 2.0296, + "step": 267925 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.0001548334201906082, + "loss": 1.9648, + "step": 267930 + }, + { + "epoch": 0.63, + "grad_norm": 2.671875, + "learning_rate": 0.0001548318745303262, + "loss": 2.0526, + "step": 267935 + }, + { + "epoch": 0.63, + "grad_norm": 1.953125, + "learning_rate": 0.00015483032885131267, + "loss": 1.9643, + "step": 267940 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.0001548287831535681, + "loss": 2.1524, + "step": 267945 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015482723743709303, + "loss": 2.0596, + "step": 267950 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.000154825691701888, + "loss": 2.195, + "step": 267955 + }, + { + "epoch": 0.63, + "grad_norm": 1.9921875, + "learning_rate": 0.0001548241459479535, + "loss": 2.0509, + "step": 267960 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.00015482260017529007, + "loss": 1.9685, + "step": 267965 + }, + { + "epoch": 0.63, + "grad_norm": 2.59375, + "learning_rate": 0.0001548210543838983, + "loss": 1.9934, + "step": 267970 + }, + { + "epoch": 0.63, + "grad_norm": 1.859375, + "learning_rate": 0.00015481950857377857, + "loss": 2.1525, + "step": 267975 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015481796274493157, + "loss": 2.043, + "step": 267980 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015481641689735773, + "loss": 2.2512, + "step": 267985 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.0001548148710310576, + "loss": 2.0744, + "step": 267990 + }, + { + "epoch": 0.63, + "grad_norm": 1.8359375, + "learning_rate": 0.00015481332514603175, + "loss": 1.9875, + "step": 267995 + }, + { + "epoch": 0.63, + "grad_norm": 1.78125, + "learning_rate": 0.00015481177924228068, + "loss": 1.9271, + "step": 268000 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015481023331980487, + "loss": 2.0263, + "step": 268005 + }, + { + "epoch": 0.63, + "grad_norm": 1.828125, + "learning_rate": 0.0001548086873786049, + "loss": 2.1775, + "step": 268010 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015480714141868131, + "loss": 2.1176, + "step": 268015 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015480559544003462, + "loss": 1.9744, + "step": 268020 + }, + { + "epoch": 0.63, + "grad_norm": 1.96875, + "learning_rate": 0.0001548040494426653, + "loss": 1.9841, + "step": 268025 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015480250342657392, + "loss": 1.8739, + "step": 268030 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015480095739176105, + "loss": 2.1004, + "step": 268035 + }, + { + "epoch": 0.63, + "grad_norm": 1.96875, + "learning_rate": 0.00015479941133822719, + "loss": 1.9637, + "step": 268040 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.0001547978652659728, + "loss": 2.0805, + "step": 268045 + }, + { + "epoch": 0.63, + "grad_norm": 1.9375, + "learning_rate": 0.0001547963191749985, + "loss": 2.1354, + "step": 268050 + }, + { + "epoch": 0.63, + "grad_norm": 1.9140625, + "learning_rate": 0.00015479477306530476, + "loss": 2.1427, + "step": 268055 + }, + { + "epoch": 0.63, + "grad_norm": 2.875, + "learning_rate": 0.00015479322693689218, + "loss": 2.0233, + "step": 268060 + }, + { + "epoch": 0.63, + "grad_norm": 1.96875, + "learning_rate": 0.0001547916807897612, + "loss": 2.1215, + "step": 268065 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015479013462391241, + "loss": 1.9216, + "step": 268070 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.0001547885884393463, + "loss": 2.2097, + "step": 268075 + }, + { + "epoch": 0.63, + "grad_norm": 1.9609375, + "learning_rate": 0.00015478704223606342, + "loss": 1.9563, + "step": 268080 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015478549601406427, + "loss": 1.893, + "step": 268085 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015478394977334943, + "loss": 2.1414, + "step": 268090 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015478240351391942, + "loss": 2.0338, + "step": 268095 + }, + { + "epoch": 0.63, + "grad_norm": 1.8203125, + "learning_rate": 0.0001547808572357747, + "loss": 1.9547, + "step": 268100 + }, + { + "epoch": 0.63, + "grad_norm": 1.9296875, + "learning_rate": 0.00015477931093891585, + "loss": 2.1762, + "step": 268105 + }, + { + "epoch": 0.63, + "grad_norm": 1.890625, + "learning_rate": 0.00015477776462334344, + "loss": 1.9312, + "step": 268110 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015477621828905791, + "loss": 2.1029, + "step": 268115 + }, + { + "epoch": 0.63, + "grad_norm": 2.515625, + "learning_rate": 0.00015477467193605985, + "loss": 1.9686, + "step": 268120 + }, + { + "epoch": 0.63, + "grad_norm": 2.421875, + "learning_rate": 0.00015477312556434975, + "loss": 2.1452, + "step": 268125 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.0001547715791739282, + "loss": 1.8262, + "step": 268130 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.00015477003276479563, + "loss": 2.0838, + "step": 268135 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015476848633695265, + "loss": 2.0577, + "step": 268140 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015476693989039975, + "loss": 2.0826, + "step": 268145 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.0001547653934251375, + "loss": 2.0804, + "step": 268150 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.00015476384694116635, + "loss": 2.065, + "step": 268155 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.0001547623004384869, + "loss": 2.0008, + "step": 268160 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015476075391709966, + "loss": 2.1557, + "step": 268165 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015475920737700516, + "loss": 1.9599, + "step": 268170 + }, + { + "epoch": 0.63, + "grad_norm": 1.8984375, + "learning_rate": 0.00015475766081820392, + "loss": 2.1135, + "step": 268175 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015475611424069645, + "loss": 2.0455, + "step": 268180 + }, + { + "epoch": 0.63, + "grad_norm": 1.8125, + "learning_rate": 0.00015475456764448331, + "loss": 2.0123, + "step": 268185 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.000154753021029565, + "loss": 2.0527, + "step": 268190 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.0001547514743959421, + "loss": 2.1852, + "step": 268195 + }, + { + "epoch": 0.63, + "grad_norm": 1.96875, + "learning_rate": 0.0001547499277436151, + "loss": 2.1882, + "step": 268200 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.0001547483810725845, + "loss": 1.9393, + "step": 268205 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.0001547468343828509, + "loss": 2.1001, + "step": 268210 + }, + { + "epoch": 0.63, + "grad_norm": 1.984375, + "learning_rate": 0.00015474528767441475, + "loss": 2.2847, + "step": 268215 + }, + { + "epoch": 0.63, + "grad_norm": 2.90625, + "learning_rate": 0.00015474374094727664, + "loss": 2.1143, + "step": 268220 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015474219420143705, + "loss": 2.2171, + "step": 268225 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.00015474064743689657, + "loss": 2.1027, + "step": 268230 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.0001547391006536557, + "loss": 2.0845, + "step": 268235 + }, + { + "epoch": 0.63, + "grad_norm": 1.796875, + "learning_rate": 0.00015473755385171493, + "loss": 2.0145, + "step": 268240 + }, + { + "epoch": 0.63, + "grad_norm": 2.546875, + "learning_rate": 0.0001547360070310748, + "loss": 2.0787, + "step": 268245 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.0001547344601917359, + "loss": 1.985, + "step": 268250 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.0001547329133336987, + "loss": 2.1404, + "step": 268255 + }, + { + "epoch": 0.63, + "grad_norm": 1.9609375, + "learning_rate": 0.00015473136645696373, + "loss": 2.0876, + "step": 268260 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015472981956153156, + "loss": 2.2062, + "step": 268265 + }, + { + "epoch": 0.63, + "grad_norm": 2.546875, + "learning_rate": 0.00015472827264740265, + "loss": 1.9277, + "step": 268270 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.0001547267257145776, + "loss": 2.1596, + "step": 268275 + }, + { + "epoch": 0.63, + "grad_norm": 1.859375, + "learning_rate": 0.0001547251787630569, + "loss": 1.9918, + "step": 268280 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.0001547236317928411, + "loss": 2.1054, + "step": 268285 + }, + { + "epoch": 0.63, + "grad_norm": 1.8515625, + "learning_rate": 0.0001547220848039307, + "loss": 1.996, + "step": 268290 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015472053779632625, + "loss": 2.0489, + "step": 268295 + }, + { + "epoch": 0.63, + "grad_norm": 2.515625, + "learning_rate": 0.00015471899077002825, + "loss": 2.1346, + "step": 268300 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.00015471744372503728, + "loss": 2.1009, + "step": 268305 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.00015471589666135382, + "loss": 1.9678, + "step": 268310 + }, + { + "epoch": 0.63, + "grad_norm": 3.21875, + "learning_rate": 0.00015471434957897845, + "loss": 2.1898, + "step": 268315 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015471280247791164, + "loss": 2.0511, + "step": 268320 + }, + { + "epoch": 0.63, + "grad_norm": 1.859375, + "learning_rate": 0.00015471125535815392, + "loss": 2.0811, + "step": 268325 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015470970821970586, + "loss": 2.0648, + "step": 268330 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.000154708161062568, + "loss": 2.2051, + "step": 268335 + }, + { + "epoch": 0.63, + "grad_norm": 1.9140625, + "learning_rate": 0.00015470661388674084, + "loss": 1.9945, + "step": 268340 + }, + { + "epoch": 0.63, + "grad_norm": 1.640625, + "learning_rate": 0.0001547050666922249, + "loss": 2.198, + "step": 268345 + }, + { + "epoch": 0.63, + "grad_norm": 1.9296875, + "learning_rate": 0.00015470351947902068, + "loss": 2.1815, + "step": 268350 + }, + { + "epoch": 0.63, + "grad_norm": 1.890625, + "learning_rate": 0.00015470197224712876, + "loss": 2.0325, + "step": 268355 + }, + { + "epoch": 0.63, + "grad_norm": 1.9375, + "learning_rate": 0.00015470042499654966, + "loss": 2.1627, + "step": 268360 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.00015469887772728394, + "loss": 2.055, + "step": 268365 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015469733043933207, + "loss": 1.945, + "step": 268370 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.00015469578313269457, + "loss": 2.0753, + "step": 268375 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.000154694235807372, + "loss": 2.1184, + "step": 268380 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.0001546926884633649, + "loss": 1.9838, + "step": 268385 + }, + { + "epoch": 0.63, + "grad_norm": 1.9609375, + "learning_rate": 0.00015469114110067383, + "loss": 2.0232, + "step": 268390 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015468959371929924, + "loss": 1.9829, + "step": 268395 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.00015468804631924168, + "loss": 2.0836, + "step": 268400 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.00015468649890050168, + "loss": 2.0424, + "step": 268405 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.0001546849514630798, + "loss": 2.106, + "step": 268410 + }, + { + "epoch": 0.63, + "grad_norm": 2.71875, + "learning_rate": 0.00015468340400697655, + "loss": 2.0453, + "step": 268415 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.00015468185653219247, + "loss": 1.9004, + "step": 268420 + }, + { + "epoch": 0.63, + "grad_norm": 2.734375, + "learning_rate": 0.00015468030903872804, + "loss": 2.0949, + "step": 268425 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.00015467876152658384, + "loss": 1.9415, + "step": 268430 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.0001546772139957604, + "loss": 1.9472, + "step": 268435 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.00015467566644625824, + "loss": 2.1292, + "step": 268440 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015467411887807786, + "loss": 2.1995, + "step": 268445 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.0001546725712912198, + "loss": 2.0283, + "step": 268450 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015467102368568462, + "loss": 2.2097, + "step": 268455 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015466947606147281, + "loss": 2.0976, + "step": 268460 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.00015466792841858492, + "loss": 2.0227, + "step": 268465 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015466638075702148, + "loss": 2.1556, + "step": 268470 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015466483307678304, + "loss": 2.0307, + "step": 268475 + }, + { + "epoch": 0.63, + "grad_norm": 1.8203125, + "learning_rate": 0.00015466328537787005, + "loss": 2.0937, + "step": 268480 + }, + { + "epoch": 0.63, + "grad_norm": 1.8984375, + "learning_rate": 0.00015466173766028312, + "loss": 1.8989, + "step": 268485 + }, + { + "epoch": 0.63, + "grad_norm": 1.921875, + "learning_rate": 0.00015466018992402276, + "loss": 1.8489, + "step": 268490 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015465864216908945, + "loss": 2.257, + "step": 268495 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.0001546570943954838, + "loss": 2.1289, + "step": 268500 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015465554660320624, + "loss": 2.0987, + "step": 268505 + }, + { + "epoch": 0.63, + "grad_norm": 1.8515625, + "learning_rate": 0.00015465399879225738, + "loss": 2.185, + "step": 268510 + }, + { + "epoch": 0.63, + "grad_norm": 1.9140625, + "learning_rate": 0.00015465245096263775, + "loss": 2.1324, + "step": 268515 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.0001546509031143478, + "loss": 2.0435, + "step": 268520 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.00015464935524738818, + "loss": 2.1242, + "step": 268525 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.0001546478073617593, + "loss": 2.1176, + "step": 268530 + }, + { + "epoch": 0.63, + "grad_norm": 2.578125, + "learning_rate": 0.00015464625945746176, + "loss": 1.9683, + "step": 268535 + }, + { + "epoch": 0.63, + "grad_norm": 2.453125, + "learning_rate": 0.00015464471153449604, + "loss": 2.0089, + "step": 268540 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015464316359286272, + "loss": 2.0773, + "step": 268545 + }, + { + "epoch": 0.63, + "grad_norm": 2.640625, + "learning_rate": 0.0001546416156325623, + "loss": 1.9125, + "step": 268550 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.0001546400676535953, + "loss": 2.1455, + "step": 268555 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015463851965596227, + "loss": 2.1979, + "step": 268560 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.0001546369716396637, + "loss": 2.1126, + "step": 268565 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.0001546354236047002, + "loss": 2.1949, + "step": 268570 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.0001546338755510722, + "loss": 2.1126, + "step": 268575 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.00015463232747878035, + "loss": 2.0409, + "step": 268580 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015463077938782503, + "loss": 1.9258, + "step": 268585 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.00015462923127820689, + "loss": 2.0376, + "step": 268590 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.0001546276831499264, + "loss": 1.858, + "step": 268595 + }, + { + "epoch": 0.63, + "grad_norm": 1.921875, + "learning_rate": 0.0001546261350029841, + "loss": 1.9771, + "step": 268600 + }, + { + "epoch": 0.63, + "grad_norm": 1.9375, + "learning_rate": 0.00015462458683738052, + "loss": 2.1359, + "step": 268605 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.00015462303865311622, + "loss": 2.0342, + "step": 268610 + }, + { + "epoch": 0.63, + "grad_norm": 1.859375, + "learning_rate": 0.00015462149045019166, + "loss": 2.199, + "step": 268615 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.0001546199422286074, + "loss": 2.2406, + "step": 268620 + }, + { + "epoch": 0.63, + "grad_norm": 2.515625, + "learning_rate": 0.000154618393988364, + "loss": 2.0701, + "step": 268625 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.000154616845729462, + "loss": 2.076, + "step": 268630 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015461529745190185, + "loss": 1.9063, + "step": 268635 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.00015461374915568413, + "loss": 2.094, + "step": 268640 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015461220084080933, + "loss": 2.1322, + "step": 268645 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.0001546106525072781, + "loss": 2.2089, + "step": 268650 + }, + { + "epoch": 0.63, + "grad_norm": 3.0, + "learning_rate": 0.0001546091041550908, + "loss": 2.0319, + "step": 268655 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015460755578424807, + "loss": 1.8846, + "step": 268660 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015460600739475042, + "loss": 2.0954, + "step": 268665 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.00015460445898659835, + "loss": 2.1294, + "step": 268670 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.0001546029105597924, + "loss": 2.0467, + "step": 268675 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015460136211433313, + "loss": 2.102, + "step": 268680 + }, + { + "epoch": 0.63, + "grad_norm": 1.9375, + "learning_rate": 0.00015459981365022104, + "loss": 2.1041, + "step": 268685 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015459826516745664, + "loss": 2.0556, + "step": 268690 + }, + { + "epoch": 0.63, + "grad_norm": 2.671875, + "learning_rate": 0.0001545967166660405, + "loss": 2.0602, + "step": 268695 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015459516814597313, + "loss": 1.9904, + "step": 268700 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015459361960725506, + "loss": 2.0981, + "step": 268705 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.0001545920710498868, + "loss": 2.2472, + "step": 268710 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015459052247386893, + "loss": 1.9164, + "step": 268715 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.00015458897387920194, + "loss": 2.0809, + "step": 268720 + }, + { + "epoch": 0.63, + "grad_norm": 3.125, + "learning_rate": 0.00015458742526588636, + "loss": 2.2621, + "step": 268725 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.00015458587663392273, + "loss": 2.2023, + "step": 268730 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.00015458432798331158, + "loss": 2.257, + "step": 268735 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.00015458277931405343, + "loss": 1.9722, + "step": 268740 + }, + { + "epoch": 0.63, + "grad_norm": 3.140625, + "learning_rate": 0.0001545812306261488, + "loss": 2.4157, + "step": 268745 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015457968191959825, + "loss": 2.0477, + "step": 268750 + }, + { + "epoch": 0.63, + "grad_norm": 2.546875, + "learning_rate": 0.00015457813319440225, + "loss": 1.9317, + "step": 268755 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015457658445056143, + "loss": 2.0865, + "step": 268760 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015457503568807622, + "loss": 2.1523, + "step": 268765 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.0001545734869069472, + "loss": 2.019, + "step": 268770 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015457193810717486, + "loss": 2.2015, + "step": 268775 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.0001545703892887598, + "loss": 1.9654, + "step": 268780 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015456884045170248, + "loss": 2.0342, + "step": 268785 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.00015456729159600346, + "loss": 2.1046, + "step": 268790 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015456574272166326, + "loss": 2.1132, + "step": 268795 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015456419382868243, + "loss": 2.1333, + "step": 268800 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.00015456264491706145, + "loss": 2.2331, + "step": 268805 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.00015456109598680092, + "loss": 2.0243, + "step": 268810 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.0001545595470379013, + "loss": 2.3623, + "step": 268815 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.00015455799807036316, + "loss": 2.2, + "step": 268820 + }, + { + "epoch": 0.63, + "grad_norm": 2.546875, + "learning_rate": 0.000154556449084187, + "loss": 2.0664, + "step": 268825 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.0001545549000793734, + "loss": 2.1096, + "step": 268830 + }, + { + "epoch": 0.63, + "grad_norm": 2.75, + "learning_rate": 0.00015455335105592284, + "loss": 1.9129, + "step": 268835 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015455180201383585, + "loss": 2.3464, + "step": 268840 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.000154550252953113, + "loss": 2.0862, + "step": 268845 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015454870387375478, + "loss": 2.0365, + "step": 268850 + }, + { + "epoch": 0.63, + "grad_norm": 2.71875, + "learning_rate": 0.00015454715477576176, + "loss": 1.9319, + "step": 268855 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.0001545456056591344, + "loss": 2.0242, + "step": 268860 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.0001545440565238733, + "loss": 1.8453, + "step": 268865 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015454250736997898, + "loss": 2.1376, + "step": 268870 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.0001545409581974519, + "loss": 2.0435, + "step": 268875 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015453940900629268, + "loss": 2.0564, + "step": 268880 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.0001545378597965018, + "loss": 2.0429, + "step": 268885 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015453631056807978, + "loss": 2.0455, + "step": 268890 + }, + { + "epoch": 0.63, + "grad_norm": 1.6484375, + "learning_rate": 0.00015453476132102717, + "loss": 1.9044, + "step": 268895 + }, + { + "epoch": 0.63, + "grad_norm": 2.625, + "learning_rate": 0.00015453321205534452, + "loss": 2.115, + "step": 268900 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.00015453166277103233, + "loss": 2.1011, + "step": 268905 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015453011346809112, + "loss": 2.0671, + "step": 268910 + }, + { + "epoch": 0.63, + "grad_norm": 1.984375, + "learning_rate": 0.00015452856414652143, + "loss": 1.9574, + "step": 268915 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015452701480632382, + "loss": 1.7429, + "step": 268920 + }, + { + "epoch": 0.63, + "grad_norm": 1.8359375, + "learning_rate": 0.00015452546544749877, + "loss": 2.0927, + "step": 268925 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015452391607004683, + "loss": 1.9469, + "step": 268930 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015452236667396857, + "loss": 2.24, + "step": 268935 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015452081725926444, + "loss": 1.9674, + "step": 268940 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015451926782593503, + "loss": 2.1962, + "step": 268945 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.0001545177183739808, + "loss": 2.1796, + "step": 268950 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015451616890340238, + "loss": 2.1962, + "step": 268955 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.00015451461941420023, + "loss": 2.2699, + "step": 268960 + }, + { + "epoch": 0.63, + "grad_norm": 2.5625, + "learning_rate": 0.0001545130699063749, + "loss": 2.2293, + "step": 268965 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.00015451152037992695, + "loss": 2.0642, + "step": 268970 + }, + { + "epoch": 0.63, + "grad_norm": 1.921875, + "learning_rate": 0.00015450997083485684, + "loss": 2.1775, + "step": 268975 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.00015450842127116515, + "loss": 1.9802, + "step": 268980 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.0001545068716888524, + "loss": 1.8502, + "step": 268985 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.0001545053220879191, + "loss": 2.0234, + "step": 268990 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015450377246836578, + "loss": 2.0263, + "step": 268995 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.000154502222830193, + "loss": 2.0106, + "step": 269000 + }, + { + "epoch": 0.63, + "grad_norm": 1.8984375, + "learning_rate": 0.0001545006731734013, + "loss": 2.1556, + "step": 269005 + }, + { + "epoch": 0.63, + "grad_norm": 1.9609375, + "learning_rate": 0.00015449912349799113, + "loss": 2.1319, + "step": 269010 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.0001544975738039631, + "loss": 2.0694, + "step": 269015 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015449602409131768, + "loss": 1.9643, + "step": 269020 + }, + { + "epoch": 0.63, + "grad_norm": 2.578125, + "learning_rate": 0.00015449447436005547, + "loss": 2.1074, + "step": 269025 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015449292461017694, + "loss": 2.0848, + "step": 269030 + }, + { + "epoch": 0.63, + "grad_norm": 2.453125, + "learning_rate": 0.00015449137484168263, + "loss": 2.2357, + "step": 269035 + }, + { + "epoch": 0.63, + "grad_norm": 2.640625, + "learning_rate": 0.00015448982505457311, + "loss": 2.0791, + "step": 269040 + }, + { + "epoch": 0.63, + "grad_norm": 2.6875, + "learning_rate": 0.00015448827524884885, + "loss": 2.279, + "step": 269045 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.0001544867254245104, + "loss": 1.9558, + "step": 269050 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.0001544851755815583, + "loss": 1.9822, + "step": 269055 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015448362571999312, + "loss": 2.2896, + "step": 269060 + }, + { + "epoch": 0.63, + "grad_norm": 1.96875, + "learning_rate": 0.00015448207583981528, + "loss": 2.186, + "step": 269065 + }, + { + "epoch": 0.63, + "grad_norm": 1.6796875, + "learning_rate": 0.00015448052594102543, + "loss": 2.0483, + "step": 269070 + }, + { + "epoch": 0.63, + "grad_norm": 2.421875, + "learning_rate": 0.000154478976023624, + "loss": 2.045, + "step": 269075 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.0001544774260876116, + "loss": 2.1504, + "step": 269080 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.0001544758761329887, + "loss": 2.1009, + "step": 269085 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015447432615975587, + "loss": 2.1112, + "step": 269090 + }, + { + "epoch": 0.63, + "grad_norm": 2.5, + "learning_rate": 0.0001544727761679136, + "loss": 2.4221, + "step": 269095 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015447122615746246, + "loss": 2.0489, + "step": 269100 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.00015446967612840295, + "loss": 1.9847, + "step": 269105 + }, + { + "epoch": 0.63, + "grad_norm": 1.96875, + "learning_rate": 0.0001544681260807356, + "loss": 2.1068, + "step": 269110 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.000154466576014461, + "loss": 2.0141, + "step": 269115 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.0001544650259295796, + "loss": 1.9917, + "step": 269120 + }, + { + "epoch": 0.63, + "grad_norm": 2.421875, + "learning_rate": 0.00015446347582609196, + "loss": 1.978, + "step": 269125 + }, + { + "epoch": 0.63, + "grad_norm": 2.046875, + "learning_rate": 0.0001544619257039986, + "loss": 2.2076, + "step": 269130 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015446037556330003, + "loss": 2.0704, + "step": 269135 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.00015445882540399686, + "loss": 2.1612, + "step": 269140 + }, + { + "epoch": 0.63, + "grad_norm": 1.8203125, + "learning_rate": 0.00015445727522608955, + "loss": 2.0277, + "step": 269145 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.00015445572502957865, + "loss": 1.9074, + "step": 269150 + }, + { + "epoch": 0.63, + "grad_norm": 3.03125, + "learning_rate": 0.00015445417481446466, + "loss": 2.079, + "step": 269155 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.0001544526245807482, + "loss": 1.9592, + "step": 269160 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.00015445107432842967, + "loss": 2.111, + "step": 269165 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.0001544495240575097, + "loss": 1.9142, + "step": 269170 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.0001544479737679888, + "loss": 2.0346, + "step": 269175 + }, + { + "epoch": 0.63, + "grad_norm": 2.546875, + "learning_rate": 0.00015444642345986744, + "loss": 1.9352, + "step": 269180 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015444487313314622, + "loss": 2.0266, + "step": 269185 + }, + { + "epoch": 0.63, + "grad_norm": 1.890625, + "learning_rate": 0.00015444332278782561, + "loss": 2.0591, + "step": 269190 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.00015444177242390623, + "loss": 1.9461, + "step": 269195 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.00015444022204138852, + "loss": 2.0756, + "step": 269200 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015443867164027303, + "loss": 1.9936, + "step": 269205 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015443712122056034, + "loss": 2.2106, + "step": 269210 + }, + { + "epoch": 0.63, + "grad_norm": 2.5, + "learning_rate": 0.0001544355707822509, + "loss": 2.0653, + "step": 269215 + }, + { + "epoch": 0.63, + "grad_norm": 2.140625, + "learning_rate": 0.00015443402032534534, + "loss": 2.0711, + "step": 269220 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.00015443246984984406, + "loss": 2.085, + "step": 269225 + }, + { + "epoch": 0.63, + "grad_norm": 1.75, + "learning_rate": 0.00015443091935574772, + "loss": 2.0482, + "step": 269230 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.00015442936884305675, + "loss": 1.9934, + "step": 269235 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015442781831177173, + "loss": 2.0991, + "step": 269240 + }, + { + "epoch": 0.63, + "grad_norm": 1.765625, + "learning_rate": 0.00015442626776189319, + "loss": 2.178, + "step": 269245 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.00015442471719342166, + "loss": 2.1212, + "step": 269250 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015442316660635767, + "loss": 2.1161, + "step": 269255 + }, + { + "epoch": 0.63, + "grad_norm": 1.9921875, + "learning_rate": 0.00015442161600070172, + "loss": 2.1677, + "step": 269260 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015442006537645433, + "loss": 2.2306, + "step": 269265 + }, + { + "epoch": 0.63, + "grad_norm": 1.8515625, + "learning_rate": 0.00015441851473361608, + "loss": 1.9981, + "step": 269270 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.0001544169640721875, + "loss": 2.1386, + "step": 269275 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015441541339216908, + "loss": 1.9905, + "step": 269280 + }, + { + "epoch": 0.63, + "grad_norm": 1.9140625, + "learning_rate": 0.00015441386269356136, + "loss": 2.1362, + "step": 269285 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015441231197636487, + "loss": 1.911, + "step": 269290 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015441076124058018, + "loss": 2.0213, + "step": 269295 + }, + { + "epoch": 0.63, + "grad_norm": 2.53125, + "learning_rate": 0.00015440921048620777, + "loss": 2.1109, + "step": 269300 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015440765971324818, + "loss": 1.8991, + "step": 269305 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.00015440610892170198, + "loss": 2.3343, + "step": 269310 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.00015440455811156962, + "loss": 2.1147, + "step": 269315 + }, + { + "epoch": 0.63, + "grad_norm": 1.9921875, + "learning_rate": 0.0001544030072828517, + "loss": 2.1705, + "step": 269320 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015440145643554873, + "loss": 2.1183, + "step": 269325 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015439990556966122, + "loss": 2.0755, + "step": 269330 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015439835468518976, + "loss": 2.263, + "step": 269335 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.00015439680378213477, + "loss": 2.1524, + "step": 269340 + }, + { + "epoch": 0.63, + "grad_norm": 2.40625, + "learning_rate": 0.00015439525286049688, + "loss": 2.0364, + "step": 269345 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015439370192027658, + "loss": 2.1384, + "step": 269350 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.0001543921509614744, + "loss": 1.9784, + "step": 269355 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015439059998409088, + "loss": 2.0433, + "step": 269360 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.00015438904898812657, + "loss": 1.9851, + "step": 269365 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015438749797358194, + "loss": 2.1065, + "step": 269370 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015438594694045757, + "loss": 2.1638, + "step": 269375 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015438439588875394, + "loss": 2.1114, + "step": 269380 + }, + { + "epoch": 0.63, + "grad_norm": 1.8828125, + "learning_rate": 0.00015438284481847166, + "loss": 2.2034, + "step": 269385 + }, + { + "epoch": 0.63, + "grad_norm": 2.515625, + "learning_rate": 0.0001543812937296112, + "loss": 2.1377, + "step": 269390 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.0001543797426221731, + "loss": 2.0554, + "step": 269395 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.0001543781914961579, + "loss": 2.0169, + "step": 269400 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.0001543766403515661, + "loss": 2.1384, + "step": 269405 + }, + { + "epoch": 0.63, + "grad_norm": 1.921875, + "learning_rate": 0.00015437508918839828, + "loss": 1.9575, + "step": 269410 + }, + { + "epoch": 0.63, + "grad_norm": 1.875, + "learning_rate": 0.00015437353800665493, + "loss": 2.1056, + "step": 269415 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.0001543719868063366, + "loss": 2.0351, + "step": 269420 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.0001543704355874438, + "loss": 2.0883, + "step": 269425 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.0001543688843499771, + "loss": 2.1695, + "step": 269430 + }, + { + "epoch": 0.63, + "grad_norm": 2.578125, + "learning_rate": 0.000154367333093937, + "loss": 2.0334, + "step": 269435 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.000154365781819324, + "loss": 2.2006, + "step": 269440 + }, + { + "epoch": 0.63, + "grad_norm": 1.4453125, + "learning_rate": 0.00015436423052613872, + "loss": 2.0337, + "step": 269445 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015436267921438158, + "loss": 1.9703, + "step": 269450 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015436112788405317, + "loss": 2.2964, + "step": 269455 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015435957653515403, + "loss": 2.1959, + "step": 269460 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015435802516768464, + "loss": 2.0483, + "step": 269465 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015435647378164559, + "loss": 2.1348, + "step": 269470 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.0001543549223770374, + "loss": 2.307, + "step": 269475 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015435337095386054, + "loss": 2.1515, + "step": 269480 + }, + { + "epoch": 0.63, + "grad_norm": 2.453125, + "learning_rate": 0.0001543518195121156, + "loss": 2.1702, + "step": 269485 + }, + { + "epoch": 0.63, + "grad_norm": 1.8203125, + "learning_rate": 0.00015435026805180312, + "loss": 2.1793, + "step": 269490 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.0001543487165729236, + "loss": 2.0871, + "step": 269495 + }, + { + "epoch": 0.63, + "grad_norm": 1.8671875, + "learning_rate": 0.00015434716507547753, + "loss": 1.9805, + "step": 269500 + }, + { + "epoch": 0.63, + "grad_norm": 1.90625, + "learning_rate": 0.0001543456135594655, + "loss": 2.1817, + "step": 269505 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015434406202488802, + "loss": 2.1581, + "step": 269510 + }, + { + "epoch": 0.63, + "grad_norm": 1.984375, + "learning_rate": 0.00015434251047174565, + "loss": 2.1152, + "step": 269515 + }, + { + "epoch": 0.63, + "grad_norm": 2.71875, + "learning_rate": 0.00015434095890003887, + "loss": 2.25, + "step": 269520 + }, + { + "epoch": 0.63, + "grad_norm": 1.96875, + "learning_rate": 0.00015433940730976827, + "loss": 1.9053, + "step": 269525 + }, + { + "epoch": 0.63, + "grad_norm": 2.65625, + "learning_rate": 0.0001543378557009343, + "loss": 2.2987, + "step": 269530 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.00015433630407353754, + "loss": 1.8653, + "step": 269535 + }, + { + "epoch": 0.63, + "grad_norm": 1.78125, + "learning_rate": 0.0001543347524275785, + "loss": 1.9812, + "step": 269540 + }, + { + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 0.00015433320076305775, + "loss": 1.9938, + "step": 269545 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.00015433164907997576, + "loss": 1.9429, + "step": 269550 + }, + { + "epoch": 0.63, + "grad_norm": 2.4375, + "learning_rate": 0.00015433009737833312, + "loss": 1.9678, + "step": 269555 + }, + { + "epoch": 0.63, + "grad_norm": 2.65625, + "learning_rate": 0.00015432854565813035, + "loss": 2.1173, + "step": 269560 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015432699391936794, + "loss": 2.1445, + "step": 269565 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015432544216204645, + "loss": 2.1535, + "step": 269570 + }, + { + "epoch": 0.63, + "grad_norm": 2.59375, + "learning_rate": 0.0001543238903861664, + "loss": 2.0486, + "step": 269575 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015432233859172834, + "loss": 2.1621, + "step": 269580 + }, + { + "epoch": 0.63, + "grad_norm": 2.15625, + "learning_rate": 0.00015432078677873274, + "loss": 1.8954, + "step": 269585 + }, + { + "epoch": 0.63, + "grad_norm": 2.578125, + "learning_rate": 0.0001543192349471802, + "loss": 2.2142, + "step": 269590 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.00015431768309707124, + "loss": 1.9542, + "step": 269595 + }, + { + "epoch": 0.63, + "grad_norm": 2.0625, + "learning_rate": 0.00015431613122840636, + "loss": 2.1955, + "step": 269600 + }, + { + "epoch": 0.63, + "grad_norm": 1.953125, + "learning_rate": 0.00015431457934118612, + "loss": 2.1458, + "step": 269605 + }, + { + "epoch": 0.63, + "grad_norm": 2.65625, + "learning_rate": 0.00015431302743541098, + "loss": 2.1414, + "step": 269610 + }, + { + "epoch": 0.63, + "grad_norm": 2.484375, + "learning_rate": 0.0001543114755110816, + "loss": 1.9991, + "step": 269615 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.0001543099235681984, + "loss": 2.1977, + "step": 269620 + }, + { + "epoch": 0.63, + "grad_norm": 1.96875, + "learning_rate": 0.00015430837160676194, + "loss": 1.938, + "step": 269625 + }, + { + "epoch": 0.63, + "grad_norm": 1.96875, + "learning_rate": 0.00015430681962677278, + "loss": 2.1897, + "step": 269630 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.00015430526762823138, + "loss": 2.2454, + "step": 269635 + }, + { + "epoch": 0.63, + "grad_norm": 2.328125, + "learning_rate": 0.00015430371561113836, + "loss": 2.2574, + "step": 269640 + }, + { + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 0.00015430216357549417, + "loss": 2.0214, + "step": 269645 + }, + { + "epoch": 0.63, + "grad_norm": 2.1875, + "learning_rate": 0.0001543006115212994, + "loss": 2.078, + "step": 269650 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.00015429905944855453, + "loss": 2.0411, + "step": 269655 + }, + { + "epoch": 0.63, + "grad_norm": 2.421875, + "learning_rate": 0.00015429750735726017, + "loss": 2.3071, + "step": 269660 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 0.00015429595524741677, + "loss": 2.2077, + "step": 269665 + }, + { + "epoch": 0.63, + "grad_norm": 1.7421875, + "learning_rate": 0.00015429440311902487, + "loss": 2.1128, + "step": 269670 + }, + { + "epoch": 0.63, + "grad_norm": 1.890625, + "learning_rate": 0.00015429285097208503, + "loss": 2.0139, + "step": 269675 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.00015429129880659777, + "loss": 2.1969, + "step": 269680 + }, + { + "epoch": 0.63, + "grad_norm": 2.078125, + "learning_rate": 0.0001542897466225636, + "loss": 1.9878, + "step": 269685 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015428819441998308, + "loss": 1.8734, + "step": 269690 + }, + { + "epoch": 0.63, + "grad_norm": 2.25, + "learning_rate": 0.00015428664219885673, + "loss": 2.2284, + "step": 269695 + }, + { + "epoch": 0.63, + "grad_norm": 2.375, + "learning_rate": 0.00015428508995918506, + "loss": 2.2654, + "step": 269700 + }, + { + "epoch": 0.63, + "grad_norm": 2.296875, + "learning_rate": 0.00015428353770096867, + "loss": 1.9324, + "step": 269705 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.000154281985424208, + "loss": 2.1299, + "step": 269710 + }, + { + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 0.0001542804331289036, + "loss": 2.2031, + "step": 269715 + }, + { + "epoch": 0.63, + "grad_norm": 2.171875, + "learning_rate": 0.00015427888081505605, + "loss": 2.0001, + "step": 269720 + }, + { + "epoch": 0.63, + "grad_norm": 2.71875, + "learning_rate": 0.00015427732848266585, + "loss": 1.9827, + "step": 269725 + }, + { + "epoch": 0.63, + "grad_norm": 2.03125, + "learning_rate": 0.00015427577613173353, + "loss": 2.1462, + "step": 269730 + }, + { + "epoch": 0.63, + "grad_norm": 2.359375, + "learning_rate": 0.00015427422376225962, + "loss": 2.0487, + "step": 269735 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.00015427267137424464, + "loss": 2.033, + "step": 269740 + }, + { + "epoch": 0.63, + "grad_norm": 2.984375, + "learning_rate": 0.00015427111896768912, + "loss": 2.251, + "step": 269745 + }, + { + "epoch": 0.63, + "grad_norm": 2.5625, + "learning_rate": 0.00015426956654259364, + "loss": 2.0446, + "step": 269750 + }, + { + "epoch": 0.63, + "grad_norm": 2.78125, + "learning_rate": 0.00015426801409895865, + "loss": 2.0739, + "step": 269755 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015426646163678475, + "loss": 2.1973, + "step": 269760 + }, + { + "epoch": 0.63, + "grad_norm": 2.671875, + "learning_rate": 0.00015426490915607243, + "loss": 2.3043, + "step": 269765 + }, + { + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 0.00015426335665682226, + "loss": 2.1242, + "step": 269770 + }, + { + "epoch": 0.63, + "grad_norm": 2.21875, + "learning_rate": 0.0001542618041390347, + "loss": 2.1524, + "step": 269775 + }, + { + "epoch": 0.63, + "grad_norm": 2.28125, + "learning_rate": 0.00015426025160271035, + "loss": 2.0408, + "step": 269780 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015425869904784972, + "loss": 2.0996, + "step": 269785 + }, + { + "epoch": 0.63, + "grad_norm": 2.109375, + "learning_rate": 0.00015425714647445332, + "loss": 2.0352, + "step": 269790 + }, + { + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 0.0001542555938825217, + "loss": 1.9682, + "step": 269795 + }, + { + "epoch": 0.63, + "grad_norm": 2.640625, + "learning_rate": 0.00015425404127205537, + "loss": 2.2162, + "step": 269800 + }, + { + "epoch": 0.63, + "grad_norm": 2.265625, + "learning_rate": 0.00015425248864305488, + "loss": 2.0912, + "step": 269805 + }, + { + "epoch": 0.63, + "grad_norm": 2.484375, + "learning_rate": 0.0001542509359955208, + "loss": 2.0814, + "step": 269810 + }, + { + "epoch": 0.63, + "grad_norm": 2.3125, + "learning_rate": 0.00015424938332945357, + "loss": 2.1863, + "step": 269815 + }, + { + "epoch": 0.63, + "grad_norm": 2.09375, + "learning_rate": 0.00015424783064485378, + "loss": 2.3277, + "step": 269820 + }, + { + "epoch": 0.63, + "grad_norm": 2.015625, + "learning_rate": 0.00015424627794172194, + "loss": 2.0933, + "step": 269825 + }, + { + "epoch": 0.63, + "grad_norm": 2.46875, + "learning_rate": 0.0001542447252200586, + "loss": 1.974, + "step": 269830 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015424317247986427, + "loss": 2.1418, + "step": 269835 + }, + { + "epoch": 0.64, + "grad_norm": 1.859375, + "learning_rate": 0.00015424161972113945, + "loss": 2.0202, + "step": 269840 + }, + { + "epoch": 0.64, + "grad_norm": 2.625, + "learning_rate": 0.00015424006694388478, + "loss": 2.23, + "step": 269845 + }, + { + "epoch": 0.64, + "grad_norm": 2.65625, + "learning_rate": 0.00015423851414810068, + "loss": 2.1732, + "step": 269850 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015423696133378773, + "loss": 1.8395, + "step": 269855 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015423540850094644, + "loss": 2.0405, + "step": 269860 + }, + { + "epoch": 0.64, + "grad_norm": 2.484375, + "learning_rate": 0.00015423385564957738, + "loss": 2.1237, + "step": 269865 + }, + { + "epoch": 0.64, + "grad_norm": 2.484375, + "learning_rate": 0.00015423230277968104, + "loss": 2.0334, + "step": 269870 + }, + { + "epoch": 0.64, + "grad_norm": 2.578125, + "learning_rate": 0.00015423074989125794, + "loss": 2.0174, + "step": 269875 + }, + { + "epoch": 0.64, + "grad_norm": 3.578125, + "learning_rate": 0.00015422919698430864, + "loss": 2.0512, + "step": 269880 + }, + { + "epoch": 0.64, + "grad_norm": 1.96875, + "learning_rate": 0.00015422764405883366, + "loss": 2.1753, + "step": 269885 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015422609111483354, + "loss": 2.1168, + "step": 269890 + }, + { + "epoch": 0.64, + "grad_norm": 1.8828125, + "learning_rate": 0.00015422453815230883, + "loss": 1.976, + "step": 269895 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015422298517126, + "loss": 2.0676, + "step": 269900 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015422143217168758, + "loss": 2.0388, + "step": 269905 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015421987915359222, + "loss": 1.9634, + "step": 269910 + }, + { + "epoch": 0.64, + "grad_norm": 1.828125, + "learning_rate": 0.00015421832611697432, + "loss": 2.1608, + "step": 269915 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015421677306183445, + "loss": 2.1431, + "step": 269920 + }, + { + "epoch": 0.64, + "grad_norm": 2.609375, + "learning_rate": 0.00015421521998817316, + "loss": 2.0493, + "step": 269925 + }, + { + "epoch": 0.64, + "grad_norm": 2.390625, + "learning_rate": 0.00015421366689599095, + "loss": 2.0187, + "step": 269930 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.0001542121137852884, + "loss": 2.1889, + "step": 269935 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015421056065606598, + "loss": 2.1401, + "step": 269940 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015420900750832427, + "loss": 1.9941, + "step": 269945 + }, + { + "epoch": 0.64, + "grad_norm": 2.953125, + "learning_rate": 0.0001542074543420638, + "loss": 2.1145, + "step": 269950 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015420590115728503, + "loss": 2.1051, + "step": 269955 + }, + { + "epoch": 0.64, + "grad_norm": 1.8828125, + "learning_rate": 0.00015420434795398858, + "loss": 2.1271, + "step": 269960 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.0001542027947321749, + "loss": 2.207, + "step": 269965 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.0001542012414918446, + "loss": 1.9711, + "step": 269970 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015419968823299817, + "loss": 2.1262, + "step": 269975 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015419813495563613, + "loss": 1.9943, + "step": 269980 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.000154196581659759, + "loss": 2.0682, + "step": 269985 + }, + { + "epoch": 0.64, + "grad_norm": 2.453125, + "learning_rate": 0.00015419502834536738, + "loss": 1.9983, + "step": 269990 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015419347501246174, + "loss": 2.1363, + "step": 269995 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015419192166104263, + "loss": 2.0446, + "step": 270000 + }, + { + "epoch": 0.64, + "grad_norm": 1.453125, + "learning_rate": 0.00015419036829111057, + "loss": 1.8469, + "step": 270005 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.0001541888149026661, + "loss": 2.0582, + "step": 270010 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015418726149570974, + "loss": 2.219, + "step": 270015 + }, + { + "epoch": 0.64, + "grad_norm": 2.515625, + "learning_rate": 0.00015418570807024205, + "loss": 2.1197, + "step": 270020 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015418415462626353, + "loss": 2.1973, + "step": 270025 + }, + { + "epoch": 0.64, + "grad_norm": 2.453125, + "learning_rate": 0.0001541826011637747, + "loss": 2.0024, + "step": 270030 + }, + { + "epoch": 0.64, + "grad_norm": 1.953125, + "learning_rate": 0.00015418104768277614, + "loss": 2.1264, + "step": 270035 + }, + { + "epoch": 0.64, + "grad_norm": 2.515625, + "learning_rate": 0.0001541794941832683, + "loss": 2.1557, + "step": 270040 + }, + { + "epoch": 0.64, + "grad_norm": 2.4375, + "learning_rate": 0.00015417794066525185, + "loss": 2.1192, + "step": 270045 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015417638712872714, + "loss": 2.0253, + "step": 270050 + }, + { + "epoch": 0.64, + "grad_norm": 1.75, + "learning_rate": 0.00015417483357369486, + "loss": 1.9789, + "step": 270055 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015417328000015546, + "loss": 2.0297, + "step": 270060 + }, + { + "epoch": 0.64, + "grad_norm": 2.71875, + "learning_rate": 0.00015417172640810946, + "loss": 2.1199, + "step": 270065 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.00015417017279755743, + "loss": 2.192, + "step": 270070 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015416861916849988, + "loss": 2.1928, + "step": 270075 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.0001541670655209374, + "loss": 2.0419, + "step": 270080 + }, + { + "epoch": 0.64, + "grad_norm": 1.7421875, + "learning_rate": 0.0001541655118548704, + "loss": 2.0727, + "step": 270085 + }, + { + "epoch": 0.64, + "grad_norm": 1.9453125, + "learning_rate": 0.00015416395817029947, + "loss": 2.0828, + "step": 270090 + }, + { + "epoch": 0.64, + "grad_norm": 2.390625, + "learning_rate": 0.00015416240446722518, + "loss": 2.1265, + "step": 270095 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015416085074564806, + "loss": 1.9823, + "step": 270100 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015415929700556856, + "loss": 2.1123, + "step": 270105 + }, + { + "epoch": 0.64, + "grad_norm": 2.5, + "learning_rate": 0.00015415774324698732, + "loss": 2.1392, + "step": 270110 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015415618946990476, + "loss": 1.9947, + "step": 270115 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015415463567432146, + "loss": 2.0633, + "step": 270120 + }, + { + "epoch": 0.64, + "grad_norm": 2.53125, + "learning_rate": 0.00015415308186023802, + "loss": 2.1411, + "step": 270125 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015415152802765483, + "loss": 2.1712, + "step": 270130 + }, + { + "epoch": 0.64, + "grad_norm": 2.8125, + "learning_rate": 0.00015414997417657254, + "loss": 1.9907, + "step": 270135 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.0001541484203069916, + "loss": 2.2117, + "step": 270140 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.0001541468664189126, + "loss": 2.1531, + "step": 270145 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.00015414531251233606, + "loss": 1.9954, + "step": 270150 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015414375858726248, + "loss": 1.9882, + "step": 270155 + }, + { + "epoch": 0.64, + "grad_norm": 1.9375, + "learning_rate": 0.00015414220464369244, + "loss": 2.0949, + "step": 270160 + }, + { + "epoch": 0.64, + "grad_norm": 2.640625, + "learning_rate": 0.0001541406506816264, + "loss": 2.1557, + "step": 270165 + }, + { + "epoch": 0.64, + "grad_norm": 2.59375, + "learning_rate": 0.00015413909670106496, + "loss": 2.1436, + "step": 270170 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.0001541375427020086, + "loss": 2.173, + "step": 270175 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.0001541359886844579, + "loss": 2.1032, + "step": 270180 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015413443464841334, + "loss": 2.026, + "step": 270185 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015413288059387548, + "loss": 2.0394, + "step": 270190 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015413132652084487, + "loss": 2.1445, + "step": 270195 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015412977242932198, + "loss": 2.2326, + "step": 270200 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015412821831930744, + "loss": 1.9813, + "step": 270205 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.00015412666419080165, + "loss": 2.0066, + "step": 270210 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.00015412511004380525, + "loss": 2.1352, + "step": 270215 + }, + { + "epoch": 0.64, + "grad_norm": 1.921875, + "learning_rate": 0.00015412355587831873, + "loss": 1.974, + "step": 270220 + }, + { + "epoch": 0.64, + "grad_norm": 3.0625, + "learning_rate": 0.00015412200169434258, + "loss": 2.1714, + "step": 270225 + }, + { + "epoch": 0.64, + "grad_norm": 1.6953125, + "learning_rate": 0.00015412044749187742, + "loss": 2.0544, + "step": 270230 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.0001541188932709237, + "loss": 1.9841, + "step": 270235 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015411733903148204, + "loss": 2.1563, + "step": 270240 + }, + { + "epoch": 0.64, + "grad_norm": 2.5, + "learning_rate": 0.00015411578477355286, + "loss": 1.925, + "step": 270245 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015411423049713677, + "loss": 2.04, + "step": 270250 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015411267620223424, + "loss": 2.1004, + "step": 270255 + }, + { + "epoch": 0.64, + "grad_norm": 2.390625, + "learning_rate": 0.0001541111218888459, + "loss": 2.0809, + "step": 270260 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.0001541095675569722, + "loss": 2.2631, + "step": 270265 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.00015410801320661367, + "loss": 2.0463, + "step": 270270 + }, + { + "epoch": 0.64, + "grad_norm": 2.390625, + "learning_rate": 0.00015410645883777086, + "loss": 2.0818, + "step": 270275 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.0001541049044504443, + "loss": 2.144, + "step": 270280 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015410335004463455, + "loss": 2.0728, + "step": 270285 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.0001541017956203421, + "loss": 2.1181, + "step": 270290 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.0001541002411775675, + "loss": 2.0411, + "step": 270295 + }, + { + "epoch": 0.64, + "grad_norm": 1.8828125, + "learning_rate": 0.00015409868671631126, + "loss": 2.1237, + "step": 270300 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.00015409713223657394, + "loss": 2.0472, + "step": 270305 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015409557773835605, + "loss": 2.0727, + "step": 270310 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015409402322165815, + "loss": 2.1369, + "step": 270315 + }, + { + "epoch": 0.64, + "grad_norm": 2.4375, + "learning_rate": 0.00015409246868648073, + "loss": 2.0816, + "step": 270320 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015409091413282435, + "loss": 2.1609, + "step": 270325 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015408935956068954, + "loss": 2.3129, + "step": 270330 + }, + { + "epoch": 0.64, + "grad_norm": 2.484375, + "learning_rate": 0.00015408780497007683, + "loss": 2.0775, + "step": 270335 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015408625036098672, + "loss": 2.1313, + "step": 270340 + }, + { + "epoch": 0.64, + "grad_norm": 2.4375, + "learning_rate": 0.0001540846957334198, + "loss": 1.9786, + "step": 270345 + }, + { + "epoch": 0.64, + "grad_norm": 1.65625, + "learning_rate": 0.00015408314108737652, + "loss": 2.0414, + "step": 270350 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.0001540815864228575, + "loss": 2.246, + "step": 270355 + }, + { + "epoch": 0.64, + "grad_norm": 2.421875, + "learning_rate": 0.0001540800317398632, + "loss": 2.1597, + "step": 270360 + }, + { + "epoch": 0.64, + "grad_norm": 2.609375, + "learning_rate": 0.0001540784770383942, + "loss": 1.9781, + "step": 270365 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015407692231845098, + "loss": 2.0681, + "step": 270370 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015407536758003416, + "loss": 2.0266, + "step": 270375 + }, + { + "epoch": 0.64, + "grad_norm": 2.84375, + "learning_rate": 0.00015407381282314417, + "loss": 2.2024, + "step": 270380 + }, + { + "epoch": 0.64, + "grad_norm": 2.3125, + "learning_rate": 0.00015407225804778157, + "loss": 2.0068, + "step": 270385 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.00015407070325394693, + "loss": 2.1461, + "step": 270390 + }, + { + "epoch": 0.64, + "grad_norm": 1.84375, + "learning_rate": 0.00015406914844164077, + "loss": 2.1415, + "step": 270395 + }, + { + "epoch": 0.64, + "grad_norm": 2.9375, + "learning_rate": 0.0001540675936108636, + "loss": 2.1831, + "step": 270400 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015406603876161597, + "loss": 2.0865, + "step": 270405 + }, + { + "epoch": 0.64, + "grad_norm": 3.078125, + "learning_rate": 0.00015406448389389837, + "loss": 2.1723, + "step": 270410 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.00015406292900771137, + "loss": 2.0652, + "step": 270415 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.00015406137410305551, + "loss": 2.2236, + "step": 270420 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.0001540598191799313, + "loss": 2.1827, + "step": 270425 + }, + { + "epoch": 0.64, + "grad_norm": 2.390625, + "learning_rate": 0.00015405826423833927, + "loss": 2.0833, + "step": 270430 + }, + { + "epoch": 0.64, + "grad_norm": 2.953125, + "learning_rate": 0.00015405670927827995, + "loss": 2.1695, + "step": 270435 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.00015405515429975388, + "loss": 2.2528, + "step": 270440 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.0001540535993027616, + "loss": 2.0014, + "step": 270445 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015405204428730362, + "loss": 2.1519, + "step": 270450 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015405048925338052, + "loss": 1.9433, + "step": 270455 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.00015404893420099274, + "loss": 1.8497, + "step": 270460 + }, + { + "epoch": 0.64, + "grad_norm": 1.75, + "learning_rate": 0.00015404737913014087, + "loss": 1.9716, + "step": 270465 + }, + { + "epoch": 0.64, + "grad_norm": 1.9921875, + "learning_rate": 0.00015404582404082544, + "loss": 1.8258, + "step": 270470 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.000154044268933047, + "loss": 2.0576, + "step": 270475 + }, + { + "epoch": 0.64, + "grad_norm": 2.484375, + "learning_rate": 0.00015404271380680605, + "loss": 2.02, + "step": 270480 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015404115866210312, + "loss": 1.9844, + "step": 270485 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.00015403960349893872, + "loss": 1.9966, + "step": 270490 + }, + { + "epoch": 0.64, + "grad_norm": 3.03125, + "learning_rate": 0.00015403804831731343, + "loss": 2.0724, + "step": 270495 + }, + { + "epoch": 0.64, + "grad_norm": 1.921875, + "learning_rate": 0.0001540364931172278, + "loss": 2.1185, + "step": 270500 + }, + { + "epoch": 0.64, + "grad_norm": 1.8046875, + "learning_rate": 0.0001540349378986823, + "loss": 2.046, + "step": 270505 + }, + { + "epoch": 0.64, + "grad_norm": 1.9140625, + "learning_rate": 0.00015403338266167747, + "loss": 2.1215, + "step": 270510 + }, + { + "epoch": 0.64, + "grad_norm": 1.671875, + "learning_rate": 0.00015403182740621389, + "loss": 2.1336, + "step": 270515 + }, + { + "epoch": 0.64, + "grad_norm": 1.953125, + "learning_rate": 0.00015403027213229203, + "loss": 2.0662, + "step": 270520 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015402871683991244, + "loss": 2.0596, + "step": 270525 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.0001540271615290757, + "loss": 2.1674, + "step": 270530 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015402560619978228, + "loss": 2.0862, + "step": 270535 + }, + { + "epoch": 0.64, + "grad_norm": 2.640625, + "learning_rate": 0.00015402405085203274, + "loss": 2.1365, + "step": 270540 + }, + { + "epoch": 0.64, + "grad_norm": 2.453125, + "learning_rate": 0.00015402249548582758, + "loss": 1.9614, + "step": 270545 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015402094010116737, + "loss": 2.0439, + "step": 270550 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015401938469805263, + "loss": 2.0884, + "step": 270555 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015401782927648388, + "loss": 2.0914, + "step": 270560 + }, + { + "epoch": 0.64, + "grad_norm": 1.796875, + "learning_rate": 0.00015401627383646165, + "loss": 2.0997, + "step": 270565 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015401471837798653, + "loss": 2.0365, + "step": 270570 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015401316290105895, + "loss": 1.9911, + "step": 270575 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015401160740567953, + "loss": 2.189, + "step": 270580 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015401005189184875, + "loss": 2.0993, + "step": 270585 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015400849635956716, + "loss": 2.3244, + "step": 270590 + }, + { + "epoch": 0.64, + "grad_norm": 1.9375, + "learning_rate": 0.00015400694080883529, + "loss": 1.8857, + "step": 270595 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015400538523965365, + "loss": 1.9333, + "step": 270600 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.0001540038296520228, + "loss": 1.9729, + "step": 270605 + }, + { + "epoch": 0.64, + "grad_norm": 1.859375, + "learning_rate": 0.0001540022740459433, + "loss": 2.1072, + "step": 270610 + }, + { + "epoch": 0.64, + "grad_norm": 1.8359375, + "learning_rate": 0.00015400071842141562, + "loss": 2.0472, + "step": 270615 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.00015399916277844032, + "loss": 2.096, + "step": 270620 + }, + { + "epoch": 0.64, + "grad_norm": 2.515625, + "learning_rate": 0.00015399760711701788, + "loss": 2.2921, + "step": 270625 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.0001539960514371489, + "loss": 2.0265, + "step": 270630 + }, + { + "epoch": 0.64, + "grad_norm": 2.8125, + "learning_rate": 0.0001539944957388339, + "loss": 2.0611, + "step": 270635 + }, + { + "epoch": 0.64, + "grad_norm": 2.625, + "learning_rate": 0.00015399294002207343, + "loss": 1.977, + "step": 270640 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.00015399138428686798, + "loss": 2.1124, + "step": 270645 + }, + { + "epoch": 0.64, + "grad_norm": 2.640625, + "learning_rate": 0.00015398982853321807, + "loss": 2.0107, + "step": 270650 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.00015398827276112424, + "loss": 2.1588, + "step": 270655 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.00015398671697058707, + "loss": 2.0447, + "step": 270660 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015398516116160707, + "loss": 2.1223, + "step": 270665 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.00015398360533418472, + "loss": 1.8775, + "step": 270670 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015398204948832063, + "loss": 1.991, + "step": 270675 + }, + { + "epoch": 0.64, + "grad_norm": 2.390625, + "learning_rate": 0.00015398049362401527, + "loss": 1.9498, + "step": 270680 + }, + { + "epoch": 0.64, + "grad_norm": 1.8515625, + "learning_rate": 0.0001539789377412692, + "loss": 2.0597, + "step": 270685 + }, + { + "epoch": 0.64, + "grad_norm": 2.53125, + "learning_rate": 0.00015397738184008293, + "loss": 2.0514, + "step": 270690 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.00015397582592045704, + "loss": 2.038, + "step": 270695 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.00015397426998239202, + "loss": 2.0581, + "step": 270700 + }, + { + "epoch": 0.64, + "grad_norm": 2.453125, + "learning_rate": 0.00015397271402588838, + "loss": 1.8371, + "step": 270705 + }, + { + "epoch": 0.64, + "grad_norm": 2.421875, + "learning_rate": 0.0001539711580509467, + "loss": 2.0674, + "step": 270710 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.00015396960205756749, + "loss": 2.1368, + "step": 270715 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.0001539680460457513, + "loss": 2.1633, + "step": 270720 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015396649001549862, + "loss": 2.013, + "step": 270725 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015396493396681006, + "loss": 2.0289, + "step": 270730 + }, + { + "epoch": 0.64, + "grad_norm": 1.9609375, + "learning_rate": 0.00015396337789968606, + "loss": 2.147, + "step": 270735 + }, + { + "epoch": 0.64, + "grad_norm": 2.484375, + "learning_rate": 0.0001539618218141272, + "loss": 1.9455, + "step": 270740 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015396026571013399, + "loss": 2.1291, + "step": 270745 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.000153958709587707, + "loss": 1.8746, + "step": 270750 + }, + { + "epoch": 0.64, + "grad_norm": 2.71875, + "learning_rate": 0.0001539571534468467, + "loss": 2.0326, + "step": 270755 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.0001539555972875537, + "loss": 2.1958, + "step": 270760 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.00015395404110982844, + "loss": 2.0234, + "step": 270765 + }, + { + "epoch": 0.64, + "grad_norm": 2.421875, + "learning_rate": 0.00015395248491367153, + "loss": 2.1035, + "step": 270770 + }, + { + "epoch": 0.64, + "grad_norm": 1.8203125, + "learning_rate": 0.00015395092869908348, + "loss": 2.0023, + "step": 270775 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.0001539493724660648, + "loss": 1.995, + "step": 270780 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015394781621461609, + "loss": 2.0994, + "step": 270785 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015394625994473774, + "loss": 1.9968, + "step": 270790 + }, + { + "epoch": 0.64, + "grad_norm": 1.953125, + "learning_rate": 0.00015394470365643042, + "loss": 2.0338, + "step": 270795 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015394314734969462, + "loss": 2.1405, + "step": 270800 + }, + { + "epoch": 0.64, + "grad_norm": 1.6640625, + "learning_rate": 0.00015394159102453085, + "loss": 1.7539, + "step": 270805 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.00015394003468093966, + "loss": 2.0798, + "step": 270810 + }, + { + "epoch": 0.64, + "grad_norm": 1.8828125, + "learning_rate": 0.00015393847831892156, + "loss": 1.9211, + "step": 270815 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015393692193847706, + "loss": 2.1276, + "step": 270820 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015393536553960679, + "loss": 1.9227, + "step": 270825 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.0001539338091223112, + "loss": 2.0088, + "step": 270830 + }, + { + "epoch": 0.64, + "grad_norm": 2.671875, + "learning_rate": 0.00015393225268659085, + "loss": 2.1126, + "step": 270835 + }, + { + "epoch": 0.64, + "grad_norm": 2.96875, + "learning_rate": 0.00015393069623244627, + "loss": 2.0085, + "step": 270840 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015392913975987798, + "loss": 2.1484, + "step": 270845 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.0001539275832688865, + "loss": 2.1627, + "step": 270850 + }, + { + "epoch": 0.64, + "grad_norm": 1.8203125, + "learning_rate": 0.00015392602675947238, + "loss": 2.2501, + "step": 270855 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015392447023163615, + "loss": 2.0268, + "step": 270860 + }, + { + "epoch": 0.64, + "grad_norm": 1.9296875, + "learning_rate": 0.00015392291368537835, + "loss": 2.0776, + "step": 270865 + }, + { + "epoch": 0.64, + "grad_norm": 2.515625, + "learning_rate": 0.00015392135712069953, + "loss": 2.0379, + "step": 270870 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.0001539198005376002, + "loss": 2.2015, + "step": 270875 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015391824393608085, + "loss": 2.0978, + "step": 270880 + }, + { + "epoch": 0.64, + "grad_norm": 2.546875, + "learning_rate": 0.00015391668731614208, + "loss": 2.032, + "step": 270885 + }, + { + "epoch": 0.64, + "grad_norm": 1.875, + "learning_rate": 0.00015391513067778437, + "loss": 2.1432, + "step": 270890 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015391357402100829, + "loss": 1.9237, + "step": 270895 + }, + { + "epoch": 0.64, + "grad_norm": 2.53125, + "learning_rate": 0.00015391201734581432, + "loss": 2.012, + "step": 270900 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.00015391046065220306, + "loss": 2.0108, + "step": 270905 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.000153908903940175, + "loss": 2.121, + "step": 270910 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.00015390734720973072, + "loss": 2.0148, + "step": 270915 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.0001539057904608707, + "loss": 1.9316, + "step": 270920 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.0001539042336935954, + "loss": 2.1334, + "step": 270925 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.0001539026769079055, + "loss": 2.0578, + "step": 270930 + }, + { + "epoch": 0.64, + "grad_norm": 2.5625, + "learning_rate": 0.00015390112010380146, + "loss": 2.1202, + "step": 270935 + }, + { + "epoch": 0.64, + "grad_norm": 2.4375, + "learning_rate": 0.00015389956328128385, + "loss": 2.0881, + "step": 270940 + }, + { + "epoch": 0.64, + "grad_norm": 1.96875, + "learning_rate": 0.00015389800644035316, + "loss": 2.0602, + "step": 270945 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.0001538964495810099, + "loss": 2.0156, + "step": 270950 + }, + { + "epoch": 0.64, + "grad_norm": 1.96875, + "learning_rate": 0.00015389489270325462, + "loss": 2.0724, + "step": 270955 + }, + { + "epoch": 0.64, + "grad_norm": 1.9921875, + "learning_rate": 0.00015389333580708795, + "loss": 2.1047, + "step": 270960 + }, + { + "epoch": 0.64, + "grad_norm": 2.46875, + "learning_rate": 0.00015389177889251027, + "loss": 2.0892, + "step": 270965 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015389022195952222, + "loss": 2.1687, + "step": 270970 + }, + { + "epoch": 0.64, + "grad_norm": 1.859375, + "learning_rate": 0.00015388866500812425, + "loss": 2.2907, + "step": 270975 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.000153887108038317, + "loss": 2.0036, + "step": 270980 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.00015388555105010088, + "loss": 2.1166, + "step": 270985 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.0001538839940434765, + "loss": 2.0436, + "step": 270990 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.00015388243701844437, + "loss": 2.1372, + "step": 270995 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.000153880879975005, + "loss": 2.0319, + "step": 271000 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015387932291315893, + "loss": 2.1576, + "step": 271005 + }, + { + "epoch": 0.64, + "grad_norm": 2.546875, + "learning_rate": 0.00015387776583290677, + "loss": 2.0651, + "step": 271010 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.00015387620873424896, + "loss": 2.0478, + "step": 271015 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.00015387465161718607, + "loss": 2.1167, + "step": 271020 + }, + { + "epoch": 0.64, + "grad_norm": 1.9609375, + "learning_rate": 0.00015387309448171857, + "loss": 2.1268, + "step": 271025 + }, + { + "epoch": 0.64, + "grad_norm": 2.625, + "learning_rate": 0.0001538715373278471, + "loss": 2.0071, + "step": 271030 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015386998015557213, + "loss": 2.0778, + "step": 271035 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015386842296489416, + "loss": 2.2082, + "step": 271040 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015386686575581378, + "loss": 2.2039, + "step": 271045 + }, + { + "epoch": 0.64, + "grad_norm": 2.453125, + "learning_rate": 0.0001538653085283315, + "loss": 2.0868, + "step": 271050 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.0001538637512824479, + "loss": 2.2781, + "step": 271055 + }, + { + "epoch": 0.64, + "grad_norm": 1.890625, + "learning_rate": 0.0001538621940181634, + "loss": 2.0046, + "step": 271060 + }, + { + "epoch": 0.64, + "grad_norm": 1.9453125, + "learning_rate": 0.00015386063673547863, + "loss": 2.0727, + "step": 271065 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.00015385907943439407, + "loss": 2.1836, + "step": 271070 + }, + { + "epoch": 0.64, + "grad_norm": 2.390625, + "learning_rate": 0.0001538575221149103, + "loss": 2.0851, + "step": 271075 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015385596477702782, + "loss": 2.2361, + "step": 271080 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015385440742074713, + "loss": 2.0759, + "step": 271085 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.00015385285004606883, + "loss": 2.0823, + "step": 271090 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015385129265299343, + "loss": 2.1352, + "step": 271095 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015384973524152143, + "loss": 2.1434, + "step": 271100 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.0001538481778116534, + "loss": 1.9453, + "step": 271105 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015384662036338983, + "loss": 1.9951, + "step": 271110 + }, + { + "epoch": 0.64, + "grad_norm": 2.515625, + "learning_rate": 0.00015384506289673125, + "loss": 2.1826, + "step": 271115 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.0001538435054116783, + "loss": 1.9744, + "step": 271120 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.00015384194790823142, + "loss": 2.0694, + "step": 271125 + }, + { + "epoch": 0.64, + "grad_norm": 2.3125, + "learning_rate": 0.0001538403903863911, + "loss": 2.2213, + "step": 271130 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.00015383883284615792, + "loss": 1.9828, + "step": 271135 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.0001538372752875325, + "loss": 2.0069, + "step": 271140 + }, + { + "epoch": 0.64, + "grad_norm": 2.609375, + "learning_rate": 0.00015383571771051524, + "loss": 2.2152, + "step": 271145 + }, + { + "epoch": 0.64, + "grad_norm": 2.46875, + "learning_rate": 0.0001538341601151067, + "loss": 2.0389, + "step": 271150 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015383260250130749, + "loss": 2.1477, + "step": 271155 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.00015383104486911802, + "loss": 2.1456, + "step": 271160 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015382948721853894, + "loss": 1.9876, + "step": 271165 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015382792954957071, + "loss": 2.0184, + "step": 271170 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.0001538263718622139, + "loss": 2.0788, + "step": 271175 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015382481415646902, + "loss": 2.0965, + "step": 271180 + }, + { + "epoch": 0.64, + "grad_norm": 2.59375, + "learning_rate": 0.00015382325643233658, + "loss": 2.1086, + "step": 271185 + }, + { + "epoch": 0.64, + "grad_norm": 1.875, + "learning_rate": 0.00015382169868981716, + "loss": 2.0036, + "step": 271190 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.00015382014092891127, + "loss": 1.9092, + "step": 271195 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015381858314961945, + "loss": 2.097, + "step": 271200 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015381702535194225, + "loss": 2.1431, + "step": 271205 + }, + { + "epoch": 0.64, + "grad_norm": 1.8125, + "learning_rate": 0.00015381546753588015, + "loss": 2.0329, + "step": 271210 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.00015381390970143372, + "loss": 2.2347, + "step": 271215 + }, + { + "epoch": 0.64, + "grad_norm": 2.703125, + "learning_rate": 0.00015381235184860342, + "loss": 2.191, + "step": 271220 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015381079397738993, + "loss": 2.145, + "step": 271225 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015380923608779366, + "loss": 1.9249, + "step": 271230 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015380767817981518, + "loss": 2.1741, + "step": 271235 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015380612025345505, + "loss": 2.1847, + "step": 271240 + }, + { + "epoch": 0.64, + "grad_norm": 1.7890625, + "learning_rate": 0.0001538045623087137, + "loss": 2.0667, + "step": 271245 + }, + { + "epoch": 0.64, + "grad_norm": 2.484375, + "learning_rate": 0.0001538030043455918, + "loss": 1.9737, + "step": 271250 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015380144636408983, + "loss": 2.0031, + "step": 271255 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015379988836420827, + "loss": 2.0637, + "step": 271260 + }, + { + "epoch": 0.64, + "grad_norm": 1.8125, + "learning_rate": 0.0001537983303459477, + "loss": 2.1624, + "step": 271265 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015379677230930864, + "loss": 1.9853, + "step": 271270 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015379521425429164, + "loss": 2.0194, + "step": 271275 + }, + { + "epoch": 0.64, + "grad_norm": 1.921875, + "learning_rate": 0.00015379365618089724, + "loss": 2.0873, + "step": 271280 + }, + { + "epoch": 0.64, + "grad_norm": 1.8984375, + "learning_rate": 0.0001537920980891259, + "loss": 1.9406, + "step": 271285 + }, + { + "epoch": 0.64, + "grad_norm": 2.6875, + "learning_rate": 0.00015379053997897826, + "loss": 2.1838, + "step": 271290 + }, + { + "epoch": 0.64, + "grad_norm": 1.96875, + "learning_rate": 0.00015378898185045475, + "loss": 2.23, + "step": 271295 + }, + { + "epoch": 0.64, + "grad_norm": 1.6796875, + "learning_rate": 0.00015378742370355597, + "loss": 2.0272, + "step": 271300 + }, + { + "epoch": 0.64, + "grad_norm": 2.609375, + "learning_rate": 0.00015378586553828244, + "loss": 2.0214, + "step": 271305 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.00015378430735463465, + "loss": 1.9843, + "step": 271310 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.0001537827491526132, + "loss": 2.0759, + "step": 271315 + }, + { + "epoch": 0.64, + "grad_norm": 1.9375, + "learning_rate": 0.00015378119093221857, + "loss": 2.0327, + "step": 271320 + }, + { + "epoch": 0.64, + "grad_norm": 2.578125, + "learning_rate": 0.0001537796326934513, + "loss": 2.0819, + "step": 271325 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015377807443631194, + "loss": 2.122, + "step": 271330 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.000153776516160801, + "loss": 2.1867, + "step": 271335 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015377495786691908, + "loss": 2.167, + "step": 271340 + }, + { + "epoch": 0.64, + "grad_norm": 1.875, + "learning_rate": 0.0001537733995546666, + "loss": 1.9258, + "step": 271345 + }, + { + "epoch": 0.64, + "grad_norm": 1.8828125, + "learning_rate": 0.00015377184122404417, + "loss": 2.1293, + "step": 271350 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.0001537702828750523, + "loss": 1.9768, + "step": 271355 + }, + { + "epoch": 0.64, + "grad_norm": 2.703125, + "learning_rate": 0.00015376872450769153, + "loss": 2.0183, + "step": 271360 + }, + { + "epoch": 0.64, + "grad_norm": 2.5, + "learning_rate": 0.0001537671661219624, + "loss": 1.9972, + "step": 271365 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.00015376560771786547, + "loss": 2.2574, + "step": 271370 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015376404929540115, + "loss": 2.0386, + "step": 271375 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015376249085457012, + "loss": 2.1339, + "step": 271380 + }, + { + "epoch": 0.64, + "grad_norm": 2.875, + "learning_rate": 0.0001537609323953728, + "loss": 2.0234, + "step": 271385 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.0001537593739178098, + "loss": 2.1668, + "step": 271390 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015375781542188163, + "loss": 2.166, + "step": 271395 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.0001537562569075888, + "loss": 2.0048, + "step": 271400 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.00015375469837493184, + "loss": 1.9352, + "step": 271405 + }, + { + "epoch": 0.64, + "grad_norm": 1.625, + "learning_rate": 0.0001537531398239113, + "loss": 2.0344, + "step": 271410 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015375158125452776, + "loss": 2.0019, + "step": 271415 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015375002266678167, + "loss": 2.06, + "step": 271420 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015374846406067362, + "loss": 1.8287, + "step": 271425 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015374690543620408, + "loss": 2.0749, + "step": 271430 + }, + { + "epoch": 0.64, + "grad_norm": 1.953125, + "learning_rate": 0.00015374534679337364, + "loss": 2.1871, + "step": 271435 + }, + { + "epoch": 0.64, + "grad_norm": 1.78125, + "learning_rate": 0.00015374378813218283, + "loss": 1.935, + "step": 271440 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015374222945263214, + "loss": 2.1034, + "step": 271445 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.00015374067075472217, + "loss": 2.0998, + "step": 271450 + }, + { + "epoch": 0.64, + "grad_norm": 2.5, + "learning_rate": 0.0001537391120384534, + "loss": 2.1003, + "step": 271455 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015373755330382632, + "loss": 1.8446, + "step": 271460 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.0001537359945508416, + "loss": 2.0424, + "step": 271465 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015373443577949963, + "loss": 2.0378, + "step": 271470 + }, + { + "epoch": 0.64, + "grad_norm": 3.0625, + "learning_rate": 0.00015373287698980102, + "loss": 1.9598, + "step": 271475 + }, + { + "epoch": 0.64, + "grad_norm": 2.671875, + "learning_rate": 0.0001537313181817463, + "loss": 2.028, + "step": 271480 + }, + { + "epoch": 0.64, + "grad_norm": 1.9453125, + "learning_rate": 0.00015372975935533595, + "loss": 2.0477, + "step": 271485 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.0001537282005105706, + "loss": 1.9392, + "step": 271490 + }, + { + "epoch": 0.64, + "grad_norm": 2.3125, + "learning_rate": 0.00015372664164745067, + "loss": 1.9699, + "step": 271495 + }, + { + "epoch": 0.64, + "grad_norm": 2.5625, + "learning_rate": 0.00015372508276597678, + "loss": 2.1736, + "step": 271500 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.0001537235238661494, + "loss": 1.9612, + "step": 271505 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015372196494796908, + "loss": 2.1755, + "step": 271510 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015372040601143637, + "loss": 2.1288, + "step": 271515 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.0001537188470565518, + "loss": 1.9847, + "step": 271520 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015371728808331593, + "loss": 2.1008, + "step": 271525 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015371572909172928, + "loss": 2.1472, + "step": 271530 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.0001537141700817923, + "loss": 2.0608, + "step": 271535 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015371261105350556, + "loss": 2.0856, + "step": 271540 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015371105200686968, + "loss": 2.0507, + "step": 271545 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.00015370949294188512, + "loss": 1.9956, + "step": 271550 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015370793385855242, + "loss": 2.2151, + "step": 271555 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.0001537063747568721, + "loss": 2.1425, + "step": 271560 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.00015370481563684472, + "loss": 1.8928, + "step": 271565 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.00015370325649847078, + "loss": 1.9993, + "step": 271570 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015370169734175087, + "loss": 2.2505, + "step": 271575 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.00015370013816668547, + "loss": 2.0897, + "step": 271580 + }, + { + "epoch": 0.64, + "grad_norm": 2.59375, + "learning_rate": 0.00015369857897327513, + "loss": 2.1036, + "step": 271585 + }, + { + "epoch": 0.64, + "grad_norm": 2.5, + "learning_rate": 0.0001536970197615204, + "loss": 2.2242, + "step": 271590 + }, + { + "epoch": 0.64, + "grad_norm": 1.875, + "learning_rate": 0.00015369546053142175, + "loss": 1.9556, + "step": 271595 + }, + { + "epoch": 0.64, + "grad_norm": 2.453125, + "learning_rate": 0.0001536939012829798, + "loss": 2.0134, + "step": 271600 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.000153692342016195, + "loss": 1.8041, + "step": 271605 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015369078273106795, + "loss": 2.0642, + "step": 271610 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015368922342759916, + "loss": 1.989, + "step": 271615 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.0001536876641057891, + "loss": 2.1974, + "step": 271620 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015368610476563843, + "loss": 1.9757, + "step": 271625 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.0001536845454071476, + "loss": 2.2546, + "step": 271630 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.00015368298603031714, + "loss": 2.1979, + "step": 271635 + }, + { + "epoch": 0.64, + "grad_norm": 2.421875, + "learning_rate": 0.0001536814266351476, + "loss": 2.0615, + "step": 271640 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015367986722163952, + "loss": 1.968, + "step": 271645 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.0001536783077897934, + "loss": 2.0773, + "step": 271650 + }, + { + "epoch": 0.64, + "grad_norm": 3.90625, + "learning_rate": 0.0001536767483396098, + "loss": 1.9834, + "step": 271655 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015367518887108927, + "loss": 2.0313, + "step": 271660 + }, + { + "epoch": 0.64, + "grad_norm": 1.7890625, + "learning_rate": 0.00015367362938423234, + "loss": 2.0925, + "step": 271665 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.0001536720698790395, + "loss": 2.0522, + "step": 271670 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.0001536705103555113, + "loss": 1.998, + "step": 271675 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015366895081364828, + "loss": 1.9459, + "step": 271680 + }, + { + "epoch": 0.64, + "grad_norm": 1.921875, + "learning_rate": 0.000153667391253451, + "loss": 2.0547, + "step": 271685 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015366583167491995, + "loss": 2.1673, + "step": 271690 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015366427207805567, + "loss": 2.1043, + "step": 271695 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.0001536627124628587, + "loss": 1.9913, + "step": 271700 + }, + { + "epoch": 0.64, + "grad_norm": 2.484375, + "learning_rate": 0.00015366115282932955, + "loss": 2.0317, + "step": 271705 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015365959317746884, + "loss": 2.1126, + "step": 271710 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015365803350727702, + "loss": 1.9175, + "step": 271715 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.0001536564738187546, + "loss": 1.9411, + "step": 271720 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.00015365491411190217, + "loss": 2.1411, + "step": 271725 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.00015365335438672031, + "loss": 1.9896, + "step": 271730 + }, + { + "epoch": 0.64, + "grad_norm": 1.9296875, + "learning_rate": 0.00015365179464320945, + "loss": 1.8543, + "step": 271735 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015365023488137015, + "loss": 1.9157, + "step": 271740 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015364867510120296, + "loss": 2.1583, + "step": 271745 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.0001536471153027084, + "loss": 2.1327, + "step": 271750 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.00015364555548588705, + "loss": 2.1239, + "step": 271755 + }, + { + "epoch": 0.64, + "grad_norm": 2.453125, + "learning_rate": 0.0001536439956507394, + "loss": 2.1411, + "step": 271760 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015364243579726593, + "loss": 2.0262, + "step": 271765 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015364087592546728, + "loss": 2.1293, + "step": 271770 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015363931603534393, + "loss": 2.267, + "step": 271775 + }, + { + "epoch": 0.64, + "grad_norm": 1.90625, + "learning_rate": 0.00015363775612689643, + "loss": 1.9616, + "step": 271780 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015363619620012528, + "loss": 2.1137, + "step": 271785 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015363463625503102, + "loss": 2.2921, + "step": 271790 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015363307629161422, + "loss": 1.9236, + "step": 271795 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015363151630987538, + "loss": 2.0413, + "step": 271800 + }, + { + "epoch": 0.64, + "grad_norm": 2.453125, + "learning_rate": 0.00015362995630981504, + "loss": 1.8565, + "step": 271805 + }, + { + "epoch": 0.64, + "grad_norm": 2.46875, + "learning_rate": 0.00015362839629143372, + "loss": 1.9988, + "step": 271810 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.000153626836254732, + "loss": 1.9986, + "step": 271815 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015362527619971035, + "loss": 1.8511, + "step": 271820 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015362371612636936, + "loss": 2.2359, + "step": 271825 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.00015362215603470953, + "loss": 2.2273, + "step": 271830 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.00015362059592473136, + "loss": 1.9267, + "step": 271835 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.00015361903579643547, + "loss": 1.8895, + "step": 271840 + }, + { + "epoch": 0.64, + "grad_norm": 2.515625, + "learning_rate": 0.00015361747564982233, + "loss": 1.9864, + "step": 271845 + }, + { + "epoch": 0.64, + "grad_norm": 1.8671875, + "learning_rate": 0.0001536159154848925, + "loss": 2.0914, + "step": 271850 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015361435530164647, + "loss": 1.862, + "step": 271855 + }, + { + "epoch": 0.64, + "grad_norm": 2.484375, + "learning_rate": 0.0001536127951000848, + "loss": 2.2046, + "step": 271860 + }, + { + "epoch": 0.64, + "grad_norm": 1.8046875, + "learning_rate": 0.0001536112348802081, + "loss": 2.027, + "step": 271865 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.00015360967464201676, + "loss": 1.8713, + "step": 271870 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.00015360811438551138, + "loss": 2.0504, + "step": 271875 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015360655411069253, + "loss": 2.0621, + "step": 271880 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.0001536049938175607, + "loss": 2.0486, + "step": 271885 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.0001536034335061164, + "loss": 2.2444, + "step": 271890 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015360187317636025, + "loss": 2.1318, + "step": 271895 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.0001536003128282927, + "loss": 1.9641, + "step": 271900 + }, + { + "epoch": 0.64, + "grad_norm": 2.46875, + "learning_rate": 0.0001535987524619143, + "loss": 2.1494, + "step": 271905 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.0001535971920772256, + "loss": 2.1345, + "step": 271910 + }, + { + "epoch": 0.64, + "grad_norm": 1.8984375, + "learning_rate": 0.00015359563167422714, + "loss": 2.1501, + "step": 271915 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.00015359407125291942, + "loss": 1.8498, + "step": 271920 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.000153592510813303, + "loss": 2.0959, + "step": 271925 + }, + { + "epoch": 0.64, + "grad_norm": 1.9453125, + "learning_rate": 0.00015359095035537843, + "loss": 2.0282, + "step": 271930 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.0001535893898791462, + "loss": 2.0747, + "step": 271935 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015358782938460684, + "loss": 2.1862, + "step": 271940 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.00015358626887176095, + "loss": 2.1818, + "step": 271945 + }, + { + "epoch": 0.64, + "grad_norm": 1.6015625, + "learning_rate": 0.000153584708340609, + "loss": 2.0081, + "step": 271950 + }, + { + "epoch": 0.64, + "grad_norm": 2.671875, + "learning_rate": 0.00015358314779115152, + "loss": 2.0881, + "step": 271955 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.0001535815872233891, + "loss": 2.032, + "step": 271960 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015358002663732223, + "loss": 2.2539, + "step": 271965 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.00015357846603295143, + "loss": 2.0549, + "step": 271970 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015357690541027726, + "loss": 2.0365, + "step": 271975 + }, + { + "epoch": 0.64, + "grad_norm": 1.78125, + "learning_rate": 0.0001535753447693003, + "loss": 2.0147, + "step": 271980 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015357378411002097, + "loss": 1.9719, + "step": 271985 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015357222343243988, + "loss": 2.0942, + "step": 271990 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015357066273655754, + "loss": 2.1721, + "step": 271995 + }, + { + "epoch": 0.64, + "grad_norm": 3.234375, + "learning_rate": 0.00015356910202237448, + "loss": 1.9698, + "step": 272000 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.0001535675412898913, + "loss": 2.1252, + "step": 272005 + }, + { + "epoch": 0.64, + "grad_norm": 2.4375, + "learning_rate": 0.00015356598053910842, + "loss": 2.0726, + "step": 272010 + }, + { + "epoch": 0.64, + "grad_norm": 2.796875, + "learning_rate": 0.00015356441977002646, + "loss": 2.053, + "step": 272015 + }, + { + "epoch": 0.64, + "grad_norm": 1.953125, + "learning_rate": 0.00015356285898264588, + "loss": 2.0515, + "step": 272020 + }, + { + "epoch": 0.64, + "grad_norm": 1.9453125, + "learning_rate": 0.0001535612981769673, + "loss": 1.9292, + "step": 272025 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015355973735299122, + "loss": 2.1353, + "step": 272030 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.0001535581765107181, + "loss": 2.0879, + "step": 272035 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.0001535566156501486, + "loss": 2.1982, + "step": 272040 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015355505477128315, + "loss": 2.1265, + "step": 272045 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015355349387412233, + "loss": 1.9751, + "step": 272050 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.00015355193295866666, + "loss": 2.0317, + "step": 272055 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.0001535503720249167, + "loss": 2.3638, + "step": 272060 + }, + { + "epoch": 0.64, + "grad_norm": 1.953125, + "learning_rate": 0.00015354881107287294, + "loss": 2.0311, + "step": 272065 + }, + { + "epoch": 0.64, + "grad_norm": 1.9921875, + "learning_rate": 0.00015354725010253596, + "loss": 1.9983, + "step": 272070 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015354568911390623, + "loss": 2.0935, + "step": 272075 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015354412810698436, + "loss": 2.1418, + "step": 272080 + }, + { + "epoch": 0.64, + "grad_norm": 1.8828125, + "learning_rate": 0.0001535425670817708, + "loss": 1.8783, + "step": 272085 + }, + { + "epoch": 0.64, + "grad_norm": 1.921875, + "learning_rate": 0.00015354100603826617, + "loss": 1.9174, + "step": 272090 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.00015353944497647096, + "loss": 2.0569, + "step": 272095 + }, + { + "epoch": 0.64, + "grad_norm": 1.7890625, + "learning_rate": 0.0001535378838963857, + "loss": 2.2237, + "step": 272100 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015353632279801088, + "loss": 2.0224, + "step": 272105 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015353476168134713, + "loss": 2.1088, + "step": 272110 + }, + { + "epoch": 0.64, + "grad_norm": 2.671875, + "learning_rate": 0.00015353320054639492, + "loss": 1.9745, + "step": 272115 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015353163939315481, + "loss": 2.0795, + "step": 272120 + }, + { + "epoch": 0.64, + "grad_norm": 1.8125, + "learning_rate": 0.00015353007822162734, + "loss": 2.157, + "step": 272125 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015352851703181298, + "loss": 1.9446, + "step": 272130 + }, + { + "epoch": 0.64, + "grad_norm": 2.3125, + "learning_rate": 0.00015352695582371233, + "loss": 2.0713, + "step": 272135 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.0001535253945973259, + "loss": 2.0237, + "step": 272140 + }, + { + "epoch": 0.64, + "grad_norm": 2.53125, + "learning_rate": 0.00015352383335265422, + "loss": 2.1942, + "step": 272145 + }, + { + "epoch": 0.64, + "grad_norm": 2.546875, + "learning_rate": 0.00015352227208969782, + "loss": 2.1421, + "step": 272150 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015352071080845724, + "loss": 2.0606, + "step": 272155 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015351914950893306, + "loss": 2.2371, + "step": 272160 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015351758819112573, + "loss": 1.8852, + "step": 272165 + }, + { + "epoch": 0.64, + "grad_norm": 1.9453125, + "learning_rate": 0.00015351602685503583, + "loss": 1.9728, + "step": 272170 + }, + { + "epoch": 0.64, + "grad_norm": 3.546875, + "learning_rate": 0.00015351446550066386, + "loss": 1.9677, + "step": 272175 + }, + { + "epoch": 0.64, + "grad_norm": 1.8984375, + "learning_rate": 0.00015351290412801038, + "loss": 2.1432, + "step": 272180 + }, + { + "epoch": 0.64, + "grad_norm": 1.96875, + "learning_rate": 0.00015351134273707597, + "loss": 1.9937, + "step": 272185 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015350978132786108, + "loss": 2.0818, + "step": 272190 + }, + { + "epoch": 0.64, + "grad_norm": 1.9140625, + "learning_rate": 0.0001535082199003663, + "loss": 2.016, + "step": 272195 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015350665845459212, + "loss": 2.1909, + "step": 272200 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015350509699053907, + "loss": 2.0388, + "step": 272205 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.00015350353550820775, + "loss": 2.3006, + "step": 272210 + }, + { + "epoch": 0.64, + "grad_norm": 2.5, + "learning_rate": 0.00015350197400759865, + "loss": 1.8512, + "step": 272215 + }, + { + "epoch": 0.64, + "grad_norm": 1.8984375, + "learning_rate": 0.00015350041248871229, + "loss": 2.1707, + "step": 272220 + }, + { + "epoch": 0.64, + "grad_norm": 1.796875, + "learning_rate": 0.0001534988509515492, + "loss": 2.0044, + "step": 272225 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.00015349728939610997, + "loss": 1.8538, + "step": 272230 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.0001534957278223951, + "loss": 1.9993, + "step": 272235 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015349416623040507, + "loss": 2.1678, + "step": 272240 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.0001534926046201405, + "loss": 2.0453, + "step": 272245 + }, + { + "epoch": 0.64, + "grad_norm": 1.9375, + "learning_rate": 0.00015349104299160188, + "loss": 2.1334, + "step": 272250 + }, + { + "epoch": 0.64, + "grad_norm": 1.828125, + "learning_rate": 0.00015348948134478974, + "loss": 1.7482, + "step": 272255 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.00015348791967970463, + "loss": 2.1939, + "step": 272260 + }, + { + "epoch": 0.64, + "grad_norm": 1.9296875, + "learning_rate": 0.00015348635799634707, + "loss": 2.1145, + "step": 272265 + }, + { + "epoch": 0.64, + "grad_norm": 1.8203125, + "learning_rate": 0.00015348479629471765, + "loss": 2.0622, + "step": 272270 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.0001534832345748168, + "loss": 2.0904, + "step": 272275 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015348167283664514, + "loss": 2.0705, + "step": 272280 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.00015348011108020314, + "loss": 2.2281, + "step": 272285 + }, + { + "epoch": 0.64, + "grad_norm": 1.875, + "learning_rate": 0.00015347854930549133, + "loss": 2.1454, + "step": 272290 + }, + { + "epoch": 0.64, + "grad_norm": 2.390625, + "learning_rate": 0.00015347698751251034, + "loss": 2.1752, + "step": 272295 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.00015347542570126062, + "loss": 1.9426, + "step": 272300 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015347386387174275, + "loss": 2.2019, + "step": 272305 + }, + { + "epoch": 0.64, + "grad_norm": 1.828125, + "learning_rate": 0.0001534723020239572, + "loss": 1.9757, + "step": 272310 + }, + { + "epoch": 0.64, + "grad_norm": 1.890625, + "learning_rate": 0.00015347074015790456, + "loss": 2.2331, + "step": 272315 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015346917827358536, + "loss": 2.0528, + "step": 272320 + }, + { + "epoch": 0.64, + "grad_norm": 2.453125, + "learning_rate": 0.0001534676163710001, + "loss": 1.916, + "step": 272325 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.00015346605445014936, + "loss": 2.1542, + "step": 272330 + }, + { + "epoch": 0.64, + "grad_norm": 2.5, + "learning_rate": 0.0001534644925110336, + "loss": 2.1193, + "step": 272335 + }, + { + "epoch": 0.64, + "grad_norm": 1.9921875, + "learning_rate": 0.0001534629305536534, + "loss": 1.904, + "step": 272340 + }, + { + "epoch": 0.64, + "grad_norm": 2.546875, + "learning_rate": 0.00015346136857800934, + "loss": 1.959, + "step": 272345 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015345980658410189, + "loss": 2.0468, + "step": 272350 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015345824457193156, + "loss": 2.075, + "step": 272355 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015345668254149896, + "loss": 1.9146, + "step": 272360 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.0001534551204928046, + "loss": 1.9544, + "step": 272365 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015345355842584897, + "loss": 2.017, + "step": 272370 + }, + { + "epoch": 0.64, + "grad_norm": 1.671875, + "learning_rate": 0.00015345199634063264, + "loss": 2.137, + "step": 272375 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015345043423715615, + "loss": 2.1435, + "step": 272380 + }, + { + "epoch": 0.64, + "grad_norm": 2.421875, + "learning_rate": 0.00015344887211542004, + "loss": 2.149, + "step": 272385 + }, + { + "epoch": 0.64, + "grad_norm": 1.921875, + "learning_rate": 0.00015344730997542483, + "loss": 2.0939, + "step": 272390 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.000153445747817171, + "loss": 2.1542, + "step": 272395 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015344418564065915, + "loss": 2.2022, + "step": 272400 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.0001534426234458898, + "loss": 2.1173, + "step": 272405 + }, + { + "epoch": 0.64, + "grad_norm": 2.5625, + "learning_rate": 0.00015344106123286352, + "loss": 2.0662, + "step": 272410 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015343949900158076, + "loss": 2.1339, + "step": 272415 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015343793675204212, + "loss": 2.135, + "step": 272420 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015343637448424807, + "loss": 2.0223, + "step": 272425 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015343481219819922, + "loss": 1.9025, + "step": 272430 + }, + { + "epoch": 0.64, + "grad_norm": 2.53125, + "learning_rate": 0.0001534332498938961, + "loss": 2.0986, + "step": 272435 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.00015343168757133915, + "loss": 1.9905, + "step": 272440 + }, + { + "epoch": 0.64, + "grad_norm": 2.53125, + "learning_rate": 0.00015343012523052902, + "loss": 2.156, + "step": 272445 + }, + { + "epoch": 0.64, + "grad_norm": 1.9296875, + "learning_rate": 0.00015342856287146615, + "loss": 1.9648, + "step": 272450 + }, + { + "epoch": 0.64, + "grad_norm": 1.828125, + "learning_rate": 0.00015342700049415114, + "loss": 2.1476, + "step": 272455 + }, + { + "epoch": 0.64, + "grad_norm": 3.15625, + "learning_rate": 0.00015342543809858448, + "loss": 2.0217, + "step": 272460 + }, + { + "epoch": 0.64, + "grad_norm": 1.921875, + "learning_rate": 0.00015342387568476672, + "loss": 1.9998, + "step": 272465 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.0001534223132526984, + "loss": 2.3569, + "step": 272470 + }, + { + "epoch": 0.64, + "grad_norm": 1.578125, + "learning_rate": 0.00015342075080238004, + "loss": 1.9658, + "step": 272475 + }, + { + "epoch": 0.64, + "grad_norm": 2.453125, + "learning_rate": 0.0001534191883338122, + "loss": 2.1808, + "step": 272480 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.0001534176258469954, + "loss": 2.1042, + "step": 272485 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.00015341606334193017, + "loss": 2.2853, + "step": 272490 + }, + { + "epoch": 0.64, + "grad_norm": 1.7421875, + "learning_rate": 0.00015341450081861704, + "loss": 1.9559, + "step": 272495 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015341293827705654, + "loss": 2.0791, + "step": 272500 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.0001534113757172492, + "loss": 1.9284, + "step": 272505 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.0001534098131391956, + "loss": 2.2151, + "step": 272510 + }, + { + "epoch": 0.64, + "grad_norm": 1.8671875, + "learning_rate": 0.0001534082505428962, + "loss": 2.1994, + "step": 272515 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.0001534066879283516, + "loss": 2.0784, + "step": 272520 + }, + { + "epoch": 0.64, + "grad_norm": 2.5, + "learning_rate": 0.0001534051252955623, + "loss": 2.1275, + "step": 272525 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015340356264452885, + "loss": 1.9924, + "step": 272530 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.00015340199997525175, + "loss": 2.0603, + "step": 272535 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015340043728773157, + "loss": 2.1161, + "step": 272540 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015339887458196884, + "loss": 2.2499, + "step": 272545 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.0001533973118579641, + "loss": 2.1139, + "step": 272550 + }, + { + "epoch": 0.64, + "grad_norm": 1.9375, + "learning_rate": 0.00015339574911571783, + "loss": 2.027, + "step": 272555 + }, + { + "epoch": 0.64, + "grad_norm": 1.9296875, + "learning_rate": 0.00015339418635523062, + "loss": 2.1014, + "step": 272560 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.00015339262357650299, + "loss": 2.1446, + "step": 272565 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015339106077953552, + "loss": 2.0395, + "step": 272570 + }, + { + "epoch": 0.64, + "grad_norm": 2.484375, + "learning_rate": 0.00015338949796432863, + "loss": 1.9609, + "step": 272575 + }, + { + "epoch": 0.64, + "grad_norm": 2.609375, + "learning_rate": 0.00015338793513088294, + "loss": 2.1069, + "step": 272580 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.0001533863722791989, + "loss": 1.8513, + "step": 272585 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.0001533848094092772, + "loss": 2.167, + "step": 272590 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.0001533832465211183, + "loss": 2.0363, + "step": 272595 + }, + { + "epoch": 0.64, + "grad_norm": 1.9375, + "learning_rate": 0.00015338168361472265, + "loss": 2.0234, + "step": 272600 + }, + { + "epoch": 0.64, + "grad_norm": 2.3125, + "learning_rate": 0.00015338012069009084, + "loss": 2.2624, + "step": 272605 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015337855774722343, + "loss": 2.133, + "step": 272610 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015337699478612095, + "loss": 2.0091, + "step": 272615 + }, + { + "epoch": 0.64, + "grad_norm": 2.53125, + "learning_rate": 0.00015337543180678393, + "loss": 2.0664, + "step": 272620 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015337386880921288, + "loss": 1.9083, + "step": 272625 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015337230579340832, + "loss": 2.143, + "step": 272630 + }, + { + "epoch": 0.64, + "grad_norm": 1.515625, + "learning_rate": 0.00015337074275937086, + "loss": 2.0859, + "step": 272635 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015336917970710094, + "loss": 2.0453, + "step": 272640 + }, + { + "epoch": 0.64, + "grad_norm": 1.890625, + "learning_rate": 0.0001533676166365992, + "loss": 2.1147, + "step": 272645 + }, + { + "epoch": 0.64, + "grad_norm": 1.96875, + "learning_rate": 0.00015336605354786603, + "loss": 2.1414, + "step": 272650 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015336449044090213, + "loss": 2.1339, + "step": 272655 + }, + { + "epoch": 0.64, + "grad_norm": 2.515625, + "learning_rate": 0.0001533629273157079, + "loss": 2.0929, + "step": 272660 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015336136417228396, + "loss": 2.1079, + "step": 272665 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.00015335980101063078, + "loss": 2.121, + "step": 272670 + }, + { + "epoch": 0.64, + "grad_norm": 1.984375, + "learning_rate": 0.00015335823783074894, + "loss": 2.2212, + "step": 272675 + }, + { + "epoch": 0.64, + "grad_norm": 1.8125, + "learning_rate": 0.00015335667463263895, + "loss": 2.0827, + "step": 272680 + }, + { + "epoch": 0.64, + "grad_norm": 1.9140625, + "learning_rate": 0.00015335511141630138, + "loss": 1.9945, + "step": 272685 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.00015335354818173672, + "loss": 2.0439, + "step": 272690 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015335198492894548, + "loss": 1.9545, + "step": 272695 + }, + { + "epoch": 0.64, + "grad_norm": 2.5625, + "learning_rate": 0.0001533504216579283, + "loss": 2.2506, + "step": 272700 + }, + { + "epoch": 0.64, + "grad_norm": 1.9140625, + "learning_rate": 0.00015334885836868563, + "loss": 2.1215, + "step": 272705 + }, + { + "epoch": 0.64, + "grad_norm": 2.4375, + "learning_rate": 0.00015334729506121802, + "loss": 2.1981, + "step": 272710 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.000153345731735526, + "loss": 2.0584, + "step": 272715 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.0001533441683916101, + "loss": 1.9352, + "step": 272720 + }, + { + "epoch": 0.64, + "grad_norm": 2.453125, + "learning_rate": 0.00015334260502947086, + "loss": 1.9093, + "step": 272725 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015334104164910884, + "loss": 2.1715, + "step": 272730 + }, + { + "epoch": 0.64, + "grad_norm": 2.546875, + "learning_rate": 0.00015333947825052455, + "loss": 2.1606, + "step": 272735 + }, + { + "epoch": 0.64, + "grad_norm": 2.4375, + "learning_rate": 0.00015333791483371854, + "loss": 2.1092, + "step": 272740 + }, + { + "epoch": 0.64, + "grad_norm": 2.53125, + "learning_rate": 0.00015333635139869127, + "loss": 2.0575, + "step": 272745 + }, + { + "epoch": 0.64, + "grad_norm": 2.46875, + "learning_rate": 0.0001533347879454434, + "loss": 2.152, + "step": 272750 + }, + { + "epoch": 0.64, + "grad_norm": 2.53125, + "learning_rate": 0.0001533332244739754, + "loss": 1.9305, + "step": 272755 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015333166098428777, + "loss": 2.0719, + "step": 272760 + }, + { + "epoch": 0.64, + "grad_norm": 2.3125, + "learning_rate": 0.00015333009747638108, + "loss": 2.3572, + "step": 272765 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015332853395025583, + "loss": 2.0854, + "step": 272770 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.00015332697040591265, + "loss": 2.01, + "step": 272775 + }, + { + "epoch": 0.64, + "grad_norm": 2.515625, + "learning_rate": 0.000153325406843352, + "loss": 2.0267, + "step": 272780 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.0001533238432625744, + "loss": 1.9693, + "step": 272785 + }, + { + "epoch": 0.64, + "grad_norm": 2.5, + "learning_rate": 0.00015332227966358044, + "loss": 2.0079, + "step": 272790 + }, + { + "epoch": 0.64, + "grad_norm": 1.75, + "learning_rate": 0.00015332071604637058, + "loss": 2.0706, + "step": 272795 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.00015331915241094542, + "loss": 2.0675, + "step": 272800 + }, + { + "epoch": 0.64, + "grad_norm": 2.59375, + "learning_rate": 0.00015331758875730545, + "loss": 2.2974, + "step": 272805 + }, + { + "epoch": 0.64, + "grad_norm": 1.734375, + "learning_rate": 0.00015331602508545123, + "loss": 2.1753, + "step": 272810 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.00015331446139538332, + "loss": 2.0592, + "step": 272815 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.0001533128976871022, + "loss": 2.0955, + "step": 272820 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015331133396060843, + "loss": 1.9793, + "step": 272825 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015330977021590255, + "loss": 2.0668, + "step": 272830 + }, + { + "epoch": 0.64, + "grad_norm": 1.8828125, + "learning_rate": 0.00015330820645298508, + "loss": 2.0357, + "step": 272835 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015330664267185654, + "loss": 2.2015, + "step": 272840 + }, + { + "epoch": 0.64, + "grad_norm": 1.828125, + "learning_rate": 0.00015330507887251752, + "loss": 2.0074, + "step": 272845 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.0001533035150549685, + "loss": 2.0564, + "step": 272850 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015330195121921004, + "loss": 2.1819, + "step": 272855 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.00015330038736524262, + "loss": 2.0254, + "step": 272860 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.0001532988234930669, + "loss": 2.0462, + "step": 272865 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.00015329725960268328, + "loss": 1.9215, + "step": 272870 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.00015329569569409238, + "loss": 1.9649, + "step": 272875 + }, + { + "epoch": 0.64, + "grad_norm": 2.46875, + "learning_rate": 0.00015329413176729467, + "loss": 1.9705, + "step": 272880 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.00015329256782229073, + "loss": 2.1825, + "step": 272885 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.00015329100385908114, + "loss": 2.0782, + "step": 272890 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.0001532894398776663, + "loss": 2.1568, + "step": 272895 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.00015328787587804685, + "loss": 1.9258, + "step": 272900 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.00015328631186022326, + "loss": 1.9747, + "step": 272905 + }, + { + "epoch": 0.64, + "grad_norm": 1.9921875, + "learning_rate": 0.00015328474782419615, + "loss": 1.9071, + "step": 272910 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015328318376996598, + "loss": 2.2019, + "step": 272915 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015328161969753332, + "loss": 1.9405, + "step": 272920 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.0001532800556068987, + "loss": 2.0032, + "step": 272925 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015327849149806262, + "loss": 1.9593, + "step": 272930 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015327692737102564, + "loss": 2.0927, + "step": 272935 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.0001532753632257883, + "loss": 1.9162, + "step": 272940 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.00015327379906235117, + "loss": 1.9818, + "step": 272945 + }, + { + "epoch": 0.64, + "grad_norm": 2.625, + "learning_rate": 0.00015327223488071468, + "loss": 2.2628, + "step": 272950 + }, + { + "epoch": 0.64, + "grad_norm": 1.984375, + "learning_rate": 0.0001532706706808795, + "loss": 2.1458, + "step": 272955 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.000153269106462846, + "loss": 2.037, + "step": 272960 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.0001532675422266149, + "loss": 2.2756, + "step": 272965 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.0001532659779721866, + "loss": 1.9916, + "step": 272970 + }, + { + "epoch": 0.64, + "grad_norm": 2.75, + "learning_rate": 0.00015326441369956166, + "loss": 2.1008, + "step": 272975 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.0001532628494087407, + "loss": 2.0901, + "step": 272980 + }, + { + "epoch": 0.64, + "grad_norm": 2.953125, + "learning_rate": 0.0001532612850997241, + "loss": 2.2028, + "step": 272985 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.00015325972077251252, + "loss": 1.9828, + "step": 272990 + }, + { + "epoch": 0.64, + "grad_norm": 1.984375, + "learning_rate": 0.0001532581564271064, + "loss": 1.9996, + "step": 272995 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.0001532565920635064, + "loss": 2.1985, + "step": 273000 + }, + { + "epoch": 0.64, + "grad_norm": 1.9375, + "learning_rate": 0.00015325502768171297, + "loss": 1.9649, + "step": 273005 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015325346328172663, + "loss": 2.0741, + "step": 273010 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.00015325189886354794, + "loss": 1.9899, + "step": 273015 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015325033442717745, + "loss": 2.0806, + "step": 273020 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.0001532487699726157, + "loss": 2.0893, + "step": 273025 + }, + { + "epoch": 0.64, + "grad_norm": 2.421875, + "learning_rate": 0.00015324720549986318, + "loss": 2.083, + "step": 273030 + }, + { + "epoch": 0.64, + "grad_norm": 1.8359375, + "learning_rate": 0.00015324564100892046, + "loss": 1.9232, + "step": 273035 + }, + { + "epoch": 0.64, + "grad_norm": 1.875, + "learning_rate": 0.00015324407649978806, + "loss": 2.2101, + "step": 273040 + }, + { + "epoch": 0.64, + "grad_norm": 2.609375, + "learning_rate": 0.0001532425119724665, + "loss": 2.0212, + "step": 273045 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015324094742695635, + "loss": 1.8805, + "step": 273050 + }, + { + "epoch": 0.64, + "grad_norm": 2.59375, + "learning_rate": 0.0001532393828632581, + "loss": 1.984, + "step": 273055 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015323781828137235, + "loss": 1.8424, + "step": 273060 + }, + { + "epoch": 0.64, + "grad_norm": 1.671875, + "learning_rate": 0.00015323625368129958, + "loss": 2.0518, + "step": 273065 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015323468906304035, + "loss": 2.2186, + "step": 273070 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015323312442659515, + "loss": 2.0378, + "step": 273075 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.0001532315597719646, + "loss": 2.1519, + "step": 273080 + }, + { + "epoch": 0.64, + "grad_norm": 2.515625, + "learning_rate": 0.00015322999509914914, + "loss": 2.2036, + "step": 273085 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015322843040814938, + "loss": 1.9639, + "step": 273090 + }, + { + "epoch": 0.64, + "grad_norm": 2.625, + "learning_rate": 0.00015322686569896581, + "loss": 2.1421, + "step": 273095 + }, + { + "epoch": 0.64, + "grad_norm": 2.390625, + "learning_rate": 0.00015322530097159899, + "loss": 2.0011, + "step": 273100 + }, + { + "epoch": 0.64, + "grad_norm": 1.9375, + "learning_rate": 0.0001532237362260494, + "loss": 2.0583, + "step": 273105 + }, + { + "epoch": 0.64, + "grad_norm": 1.734375, + "learning_rate": 0.00015322217146231766, + "loss": 2.1288, + "step": 273110 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.00015322060668040424, + "loss": 1.9587, + "step": 273115 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.0001532190418803097, + "loss": 2.0937, + "step": 273120 + }, + { + "epoch": 0.64, + "grad_norm": 1.8359375, + "learning_rate": 0.00015321747706203458, + "loss": 2.1543, + "step": 273125 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.0001532159122255794, + "loss": 2.2168, + "step": 273130 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015321434737094469, + "loss": 2.1393, + "step": 273135 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.000153212782498131, + "loss": 2.2106, + "step": 273140 + }, + { + "epoch": 0.64, + "grad_norm": 1.9609375, + "learning_rate": 0.00015321121760713885, + "loss": 1.9591, + "step": 273145 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.0001532096526979688, + "loss": 2.1285, + "step": 273150 + }, + { + "epoch": 0.64, + "grad_norm": 1.859375, + "learning_rate": 0.00015320808777062134, + "loss": 2.0013, + "step": 273155 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.0001532065228250971, + "loss": 1.9855, + "step": 273160 + }, + { + "epoch": 0.64, + "grad_norm": 1.9921875, + "learning_rate": 0.00015320495786139645, + "loss": 2.0873, + "step": 273165 + }, + { + "epoch": 0.64, + "grad_norm": 2.515625, + "learning_rate": 0.0001532033928795201, + "loss": 2.2037, + "step": 273170 + }, + { + "epoch": 0.64, + "grad_norm": 1.96875, + "learning_rate": 0.00015320182787946846, + "loss": 2.1385, + "step": 273175 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015320026286124211, + "loss": 1.9492, + "step": 273180 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015319869782484162, + "loss": 1.9581, + "step": 273185 + }, + { + "epoch": 0.64, + "grad_norm": 1.7265625, + "learning_rate": 0.00015319713277026748, + "loss": 2.0032, + "step": 273190 + }, + { + "epoch": 0.64, + "grad_norm": 1.90625, + "learning_rate": 0.00015319556769752022, + "loss": 2.0147, + "step": 273195 + }, + { + "epoch": 0.64, + "grad_norm": 1.765625, + "learning_rate": 0.00015319400260660037, + "loss": 2.0351, + "step": 273200 + }, + { + "epoch": 0.64, + "grad_norm": 2.46875, + "learning_rate": 0.00015319243749750854, + "loss": 1.9984, + "step": 273205 + }, + { + "epoch": 0.64, + "grad_norm": 1.953125, + "learning_rate": 0.00015319087237024518, + "loss": 2.0502, + "step": 273210 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015318930722481083, + "loss": 1.8222, + "step": 273215 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015318774206120607, + "loss": 2.1495, + "step": 273220 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.0001531861768794314, + "loss": 1.9167, + "step": 273225 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015318461167948742, + "loss": 2.1641, + "step": 273230 + }, + { + "epoch": 0.64, + "grad_norm": 2.46875, + "learning_rate": 0.00015318304646137456, + "loss": 2.1552, + "step": 273235 + }, + { + "epoch": 0.64, + "grad_norm": 1.8671875, + "learning_rate": 0.0001531814812250934, + "loss": 2.2225, + "step": 273240 + }, + { + "epoch": 0.64, + "grad_norm": 1.921875, + "learning_rate": 0.0001531799159706445, + "loss": 2.1461, + "step": 273245 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.0001531783506980284, + "loss": 2.0667, + "step": 273250 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015317678540724557, + "loss": 2.1927, + "step": 273255 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.0001531752200982966, + "loss": 2.0091, + "step": 273260 + }, + { + "epoch": 0.64, + "grad_norm": 2.515625, + "learning_rate": 0.000153173654771182, + "loss": 2.0501, + "step": 273265 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015317208942590234, + "loss": 2.0555, + "step": 273270 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.00015317052406245814, + "loss": 1.9991, + "step": 273275 + }, + { + "epoch": 0.64, + "grad_norm": 1.984375, + "learning_rate": 0.0001531689586808499, + "loss": 2.1565, + "step": 273280 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015316739328107817, + "loss": 2.0525, + "step": 273285 + }, + { + "epoch": 0.64, + "grad_norm": 2.578125, + "learning_rate": 0.00015316582786314352, + "loss": 2.1674, + "step": 273290 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.0001531642624270464, + "loss": 2.0914, + "step": 273295 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.0001531626969727875, + "loss": 2.1347, + "step": 273300 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015316113150036717, + "loss": 2.0168, + "step": 273305 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015315956600978607, + "loss": 2.0375, + "step": 273310 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.0001531580005010447, + "loss": 2.0672, + "step": 273315 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.0001531564349741436, + "loss": 2.1063, + "step": 273320 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.0001531548694290833, + "loss": 2.1759, + "step": 273325 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.00015315330386586433, + "loss": 1.9206, + "step": 273330 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.0001531517382844872, + "loss": 2.2278, + "step": 273335 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.0001531501726849525, + "loss": 2.2017, + "step": 273340 + }, + { + "epoch": 0.64, + "grad_norm": 1.7109375, + "learning_rate": 0.00015314860706726072, + "loss": 1.9975, + "step": 273345 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015314704143141243, + "loss": 1.9735, + "step": 273350 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015314547577740815, + "loss": 2.1451, + "step": 273355 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.00015314391010524836, + "loss": 2.1461, + "step": 273360 + }, + { + "epoch": 0.64, + "grad_norm": 2.421875, + "learning_rate": 0.0001531423444149337, + "loss": 2.0379, + "step": 273365 + }, + { + "epoch": 0.64, + "grad_norm": 2.453125, + "learning_rate": 0.00015314077870646467, + "loss": 2.0412, + "step": 273370 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015313921297984174, + "loss": 2.0121, + "step": 273375 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.0001531376472350655, + "loss": 2.0818, + "step": 273380 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.0001531360814721365, + "loss": 2.2183, + "step": 273385 + }, + { + "epoch": 0.64, + "grad_norm": 2.3125, + "learning_rate": 0.0001531345156910552, + "loss": 1.868, + "step": 273390 + }, + { + "epoch": 0.64, + "grad_norm": 2.6875, + "learning_rate": 0.00015313294989182223, + "loss": 1.8938, + "step": 273395 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.0001531313840744381, + "loss": 2.029, + "step": 273400 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.00015312981823890323, + "loss": 1.886, + "step": 273405 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015312825238521834, + "loss": 2.1746, + "step": 273410 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015312668651338383, + "loss": 2.026, + "step": 273415 + }, + { + "epoch": 0.64, + "grad_norm": 2.625, + "learning_rate": 0.00015312512062340031, + "loss": 2.0524, + "step": 273420 + }, + { + "epoch": 0.64, + "grad_norm": 2.578125, + "learning_rate": 0.00015312355471526828, + "loss": 2.2201, + "step": 273425 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015312198878898826, + "loss": 2.0806, + "step": 273430 + }, + { + "epoch": 0.64, + "grad_norm": 1.796875, + "learning_rate": 0.00015312042284456085, + "loss": 2.1977, + "step": 273435 + }, + { + "epoch": 0.64, + "grad_norm": 2.421875, + "learning_rate": 0.00015311885688198647, + "loss": 1.956, + "step": 273440 + }, + { + "epoch": 0.64, + "grad_norm": 2.234375, + "learning_rate": 0.00015311729090126578, + "loss": 2.1284, + "step": 273445 + }, + { + "epoch": 0.64, + "grad_norm": 2.53125, + "learning_rate": 0.00015311572490239926, + "loss": 2.1882, + "step": 273450 + }, + { + "epoch": 0.64, + "grad_norm": 3.28125, + "learning_rate": 0.00015311415888538744, + "loss": 2.2329, + "step": 273455 + }, + { + "epoch": 0.64, + "grad_norm": 2.515625, + "learning_rate": 0.00015311259285023088, + "loss": 2.1145, + "step": 273460 + }, + { + "epoch": 0.64, + "grad_norm": 1.90625, + "learning_rate": 0.00015311102679693006, + "loss": 2.1623, + "step": 273465 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015310946072548553, + "loss": 2.1435, + "step": 273470 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.00015310789463589787, + "loss": 2.0886, + "step": 273475 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.0001531063285281676, + "loss": 2.1458, + "step": 273480 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.00015310476240229526, + "loss": 2.1289, + "step": 273485 + }, + { + "epoch": 0.64, + "grad_norm": 1.9453125, + "learning_rate": 0.0001531031962582813, + "loss": 2.1119, + "step": 273490 + }, + { + "epoch": 0.64, + "grad_norm": 1.9375, + "learning_rate": 0.00015310163009612637, + "loss": 2.0798, + "step": 273495 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.000153100063915831, + "loss": 2.0486, + "step": 273500 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015309849771739565, + "loss": 1.8828, + "step": 273505 + }, + { + "epoch": 0.64, + "grad_norm": 2.484375, + "learning_rate": 0.00015309693150082087, + "loss": 1.9562, + "step": 273510 + }, + { + "epoch": 0.64, + "grad_norm": 2.890625, + "learning_rate": 0.00015309536526610725, + "loss": 2.1796, + "step": 273515 + }, + { + "epoch": 0.64, + "grad_norm": 2.78125, + "learning_rate": 0.00015309379901325528, + "loss": 2.2117, + "step": 273520 + }, + { + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 0.00015309223274226547, + "loss": 1.9687, + "step": 273525 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.0001530906664531384, + "loss": 2.2133, + "step": 273530 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015308910014587464, + "loss": 2.1353, + "step": 273535 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015308753382047466, + "loss": 1.8261, + "step": 273540 + }, + { + "epoch": 0.64, + "grad_norm": 1.96875, + "learning_rate": 0.000153085967476939, + "loss": 1.9258, + "step": 273545 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.0001530844011152682, + "loss": 2.1056, + "step": 273550 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015308283473546282, + "loss": 2.1858, + "step": 273555 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.0001530812683375234, + "loss": 1.9731, + "step": 273560 + }, + { + "epoch": 0.64, + "grad_norm": 1.8671875, + "learning_rate": 0.00015307970192145044, + "loss": 2.0179, + "step": 273565 + }, + { + "epoch": 0.64, + "grad_norm": 1.90625, + "learning_rate": 0.0001530781354872445, + "loss": 2.0893, + "step": 273570 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.0001530765690349061, + "loss": 2.124, + "step": 273575 + }, + { + "epoch": 0.64, + "grad_norm": 1.78125, + "learning_rate": 0.0001530750025644358, + "loss": 1.8349, + "step": 273580 + }, + { + "epoch": 0.64, + "grad_norm": 2.609375, + "learning_rate": 0.0001530734360758341, + "loss": 2.125, + "step": 273585 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015307186956910152, + "loss": 2.0232, + "step": 273590 + }, + { + "epoch": 0.64, + "grad_norm": 1.859375, + "learning_rate": 0.00015307030304423868, + "loss": 2.0454, + "step": 273595 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.00015306873650124602, + "loss": 2.1181, + "step": 273600 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015306716994012415, + "loss": 2.2362, + "step": 273605 + }, + { + "epoch": 0.64, + "grad_norm": 1.921875, + "learning_rate": 0.00015306560336087355, + "loss": 2.0934, + "step": 273610 + }, + { + "epoch": 0.64, + "grad_norm": 2.171875, + "learning_rate": 0.00015306403676349482, + "loss": 2.131, + "step": 273615 + }, + { + "epoch": 0.64, + "grad_norm": 2.0625, + "learning_rate": 0.0001530624701479884, + "loss": 2.0576, + "step": 273620 + }, + { + "epoch": 0.64, + "grad_norm": 1.828125, + "learning_rate": 0.00015306090351435491, + "loss": 1.8169, + "step": 273625 + }, + { + "epoch": 0.64, + "grad_norm": 1.9296875, + "learning_rate": 0.00015305933686259482, + "loss": 2.1407, + "step": 273630 + }, + { + "epoch": 0.64, + "grad_norm": 1.9375, + "learning_rate": 0.00015305777019270872, + "loss": 2.0021, + "step": 273635 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.00015305620350469714, + "loss": 2.1715, + "step": 273640 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.00015305463679856057, + "loss": 2.0408, + "step": 273645 + }, + { + "epoch": 0.64, + "grad_norm": 2.28125, + "learning_rate": 0.00015305307007429958, + "loss": 2.1576, + "step": 273650 + }, + { + "epoch": 0.64, + "grad_norm": 2.3125, + "learning_rate": 0.0001530515033319147, + "loss": 1.9108, + "step": 273655 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.00015304993657140647, + "loss": 2.0454, + "step": 273660 + }, + { + "epoch": 0.64, + "grad_norm": 1.9453125, + "learning_rate": 0.00015304836979277542, + "loss": 2.0914, + "step": 273665 + }, + { + "epoch": 0.64, + "grad_norm": 2.4375, + "learning_rate": 0.0001530468029960221, + "loss": 2.0329, + "step": 273670 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015304523618114702, + "loss": 2.0208, + "step": 273675 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.0001530436693481507, + "loss": 1.9309, + "step": 273680 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.0001530421024970337, + "loss": 2.201, + "step": 273685 + }, + { + "epoch": 0.64, + "grad_norm": 1.8984375, + "learning_rate": 0.00015304053562779658, + "loss": 2.0145, + "step": 273690 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.00015303896874043987, + "loss": 2.1438, + "step": 273695 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015303740183496406, + "loss": 2.0823, + "step": 273700 + }, + { + "epoch": 0.64, + "grad_norm": 2.484375, + "learning_rate": 0.0001530358349113697, + "loss": 1.9022, + "step": 273705 + }, + { + "epoch": 0.64, + "grad_norm": 2.21875, + "learning_rate": 0.00015303426796965734, + "loss": 1.8862, + "step": 273710 + }, + { + "epoch": 0.64, + "grad_norm": 2.328125, + "learning_rate": 0.00015303270100982755, + "loss": 2.0421, + "step": 273715 + }, + { + "epoch": 0.64, + "grad_norm": 2.828125, + "learning_rate": 0.00015303113403188076, + "loss": 1.9849, + "step": 273720 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.00015302956703581763, + "loss": 2.1173, + "step": 273725 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015302800002163863, + "loss": 1.998, + "step": 273730 + }, + { + "epoch": 0.64, + "grad_norm": 2.015625, + "learning_rate": 0.00015302643298934426, + "loss": 2.0163, + "step": 273735 + }, + { + "epoch": 0.64, + "grad_norm": 1.7734375, + "learning_rate": 0.00015302486593893514, + "loss": 1.8658, + "step": 273740 + }, + { + "epoch": 0.64, + "grad_norm": 2.390625, + "learning_rate": 0.00015302329887041175, + "loss": 2.0578, + "step": 273745 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.00015302173178377467, + "loss": 2.106, + "step": 273750 + }, + { + "epoch": 0.64, + "grad_norm": 2.390625, + "learning_rate": 0.0001530201646790244, + "loss": 2.0808, + "step": 273755 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.00015301859755616147, + "loss": 2.1098, + "step": 273760 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.00015301703041518638, + "loss": 2.2353, + "step": 273765 + }, + { + "epoch": 0.64, + "grad_norm": 2.0, + "learning_rate": 0.00015301546325609974, + "loss": 2.1728, + "step": 273770 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.0001530138960789021, + "loss": 2.1318, + "step": 273775 + }, + { + "epoch": 0.64, + "grad_norm": 2.46875, + "learning_rate": 0.00015301232888359392, + "loss": 2.099, + "step": 273780 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015301076167017577, + "loss": 1.9336, + "step": 273785 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.00015300919443864814, + "loss": 2.1137, + "step": 273790 + }, + { + "epoch": 0.64, + "grad_norm": 1.8828125, + "learning_rate": 0.00015300762718901164, + "loss": 2.1761, + "step": 273795 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.0001530060599212668, + "loss": 2.2985, + "step": 273800 + }, + { + "epoch": 0.64, + "grad_norm": 1.984375, + "learning_rate": 0.0001530044926354141, + "loss": 2.0507, + "step": 273805 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015300292533145413, + "loss": 2.1403, + "step": 273810 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.00015300135800938738, + "loss": 2.0147, + "step": 273815 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.0001529997906692144, + "loss": 1.9107, + "step": 273820 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.00015299822331093572, + "loss": 2.0497, + "step": 273825 + }, + { + "epoch": 0.64, + "grad_norm": 2.203125, + "learning_rate": 0.00015299665593455187, + "loss": 2.0949, + "step": 273830 + }, + { + "epoch": 0.64, + "grad_norm": 1.9609375, + "learning_rate": 0.00015299508854006345, + "loss": 2.0507, + "step": 273835 + }, + { + "epoch": 0.64, + "grad_norm": 1.9609375, + "learning_rate": 0.00015299352112747093, + "loss": 1.9414, + "step": 273840 + }, + { + "epoch": 0.64, + "grad_norm": 2.640625, + "learning_rate": 0.00015299195369677487, + "loss": 1.9387, + "step": 273845 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015299038624797578, + "loss": 2.1068, + "step": 273850 + }, + { + "epoch": 0.64, + "grad_norm": 2.421875, + "learning_rate": 0.00015298881878107422, + "loss": 2.016, + "step": 273855 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.0001529872512960707, + "loss": 2.0802, + "step": 273860 + }, + { + "epoch": 0.64, + "grad_norm": 1.96875, + "learning_rate": 0.00015298568379296584, + "loss": 2.0825, + "step": 273865 + }, + { + "epoch": 0.64, + "grad_norm": 2.25, + "learning_rate": 0.00015298411627176, + "loss": 1.9898, + "step": 273870 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.00015298254873245392, + "loss": 2.0317, + "step": 273875 + }, + { + "epoch": 0.64, + "grad_norm": 1.984375, + "learning_rate": 0.000152980981175048, + "loss": 2.0266, + "step": 273880 + }, + { + "epoch": 0.64, + "grad_norm": 2.3125, + "learning_rate": 0.00015297941359954283, + "loss": 2.1928, + "step": 273885 + }, + { + "epoch": 0.64, + "grad_norm": 1.9375, + "learning_rate": 0.00015297784600593894, + "loss": 1.9808, + "step": 273890 + }, + { + "epoch": 0.64, + "grad_norm": 1.9765625, + "learning_rate": 0.00015297627839423682, + "loss": 2.1247, + "step": 273895 + }, + { + "epoch": 0.64, + "grad_norm": 1.96875, + "learning_rate": 0.00015297471076443706, + "loss": 2.081, + "step": 273900 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015297314311654017, + "loss": 1.9932, + "step": 273905 + }, + { + "epoch": 0.64, + "grad_norm": 2.34375, + "learning_rate": 0.00015297157545054673, + "loss": 2.0787, + "step": 273910 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015297000776645723, + "loss": 1.9449, + "step": 273915 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.00015296844006427218, + "loss": 1.9997, + "step": 273920 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.00015296687234399217, + "loss": 1.9184, + "step": 273925 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015296530460561774, + "loss": 2.0084, + "step": 273930 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015296373684914934, + "loss": 1.7387, + "step": 273935 + }, + { + "epoch": 0.64, + "grad_norm": 2.484375, + "learning_rate": 0.0001529621690745876, + "loss": 2.1553, + "step": 273940 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015296060128193304, + "loss": 2.0837, + "step": 273945 + }, + { + "epoch": 0.64, + "grad_norm": 2.09375, + "learning_rate": 0.00015295903347118617, + "loss": 1.9806, + "step": 273950 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.00015295746564234755, + "loss": 2.2032, + "step": 273955 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.00015295589779541766, + "loss": 1.9079, + "step": 273960 + }, + { + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 0.0001529543299303971, + "loss": 2.2473, + "step": 273965 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015295276204728637, + "loss": 1.9657, + "step": 273970 + }, + { + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 0.00015295119414608603, + "loss": 2.1828, + "step": 273975 + }, + { + "epoch": 0.64, + "grad_norm": 2.03125, + "learning_rate": 0.00015294962622679656, + "loss": 2.1698, + "step": 273980 + }, + { + "epoch": 0.64, + "grad_norm": 2.265625, + "learning_rate": 0.0001529480582894186, + "loss": 2.0367, + "step": 273985 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015294649033395258, + "loss": 2.0893, + "step": 273990 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015294492236039907, + "loss": 1.9964, + "step": 273995 + }, + { + "epoch": 0.64, + "grad_norm": 2.40625, + "learning_rate": 0.00015294335436875865, + "loss": 2.1663, + "step": 274000 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 0.0001529417863590318, + "loss": 2.151, + "step": 274005 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015294021833121908, + "loss": 2.1425, + "step": 274010 + }, + { + "epoch": 0.64, + "grad_norm": 1.921875, + "learning_rate": 0.000152938650285321, + "loss": 2.122, + "step": 274015 + }, + { + "epoch": 0.64, + "grad_norm": 1.984375, + "learning_rate": 0.00015293708222133815, + "loss": 2.1916, + "step": 274020 + }, + { + "epoch": 0.64, + "grad_norm": 2.109375, + "learning_rate": 0.00015293551413927105, + "loss": 2.1567, + "step": 274025 + }, + { + "epoch": 0.64, + "grad_norm": 2.359375, + "learning_rate": 0.00015293394603912018, + "loss": 1.9458, + "step": 274030 + }, + { + "epoch": 0.64, + "grad_norm": 1.8203125, + "learning_rate": 0.00015293237792088612, + "loss": 2.2294, + "step": 274035 + }, + { + "epoch": 0.64, + "grad_norm": 2.625, + "learning_rate": 0.00015293080978456938, + "loss": 1.9904, + "step": 274040 + }, + { + "epoch": 0.64, + "grad_norm": 2.1875, + "learning_rate": 0.00015292924163017057, + "loss": 1.9406, + "step": 274045 + }, + { + "epoch": 0.64, + "grad_norm": 2.046875, + "learning_rate": 0.00015292767345769014, + "loss": 2.1477, + "step": 274050 + }, + { + "epoch": 0.64, + "grad_norm": 2.4375, + "learning_rate": 0.00015292610526712865, + "loss": 2.1343, + "step": 274055 + }, + { + "epoch": 0.64, + "grad_norm": 1.65625, + "learning_rate": 0.00015292453705848663, + "loss": 2.0153, + "step": 274060 + }, + { + "epoch": 0.64, + "grad_norm": 2.078125, + "learning_rate": 0.00015292296883176465, + "loss": 2.2419, + "step": 274065 + }, + { + "epoch": 0.64, + "grad_norm": 2.609375, + "learning_rate": 0.00015292140058696324, + "loss": 2.0613, + "step": 274070 + }, + { + "epoch": 0.64, + "grad_norm": 2.15625, + "learning_rate": 0.0001529198323240829, + "loss": 2.0917, + "step": 274075 + }, + { + "epoch": 0.64, + "grad_norm": 2.75, + "learning_rate": 0.00015291826404312419, + "loss": 2.1337, + "step": 274080 + }, + { + "epoch": 0.65, + "grad_norm": 1.9921875, + "learning_rate": 0.0001529166957440876, + "loss": 2.0127, + "step": 274085 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015291512742697378, + "loss": 2.2806, + "step": 274090 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015291355909178314, + "loss": 2.1957, + "step": 274095 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.0001529119907385163, + "loss": 2.0927, + "step": 274100 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015291042236717373, + "loss": 2.2612, + "step": 274105 + }, + { + "epoch": 0.65, + "grad_norm": 2.515625, + "learning_rate": 0.00015290885397775602, + "loss": 2.292, + "step": 274110 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.0001529072855702637, + "loss": 2.12, + "step": 274115 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.0001529057171446973, + "loss": 2.041, + "step": 274120 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.0001529041487010573, + "loss": 2.1627, + "step": 274125 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015290258023934432, + "loss": 2.1651, + "step": 274130 + }, + { + "epoch": 0.65, + "grad_norm": 1.9296875, + "learning_rate": 0.00015290101175955884, + "loss": 2.1893, + "step": 274135 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015289944326170142, + "loss": 2.0825, + "step": 274140 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.0001528978747457726, + "loss": 2.0747, + "step": 274145 + }, + { + "epoch": 0.65, + "grad_norm": 1.7421875, + "learning_rate": 0.00015289630621177292, + "loss": 1.8688, + "step": 274150 + }, + { + "epoch": 0.65, + "grad_norm": 2.625, + "learning_rate": 0.00015289473765970287, + "loss": 2.1347, + "step": 274155 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.00015289316908956307, + "loss": 2.1394, + "step": 274160 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.0001528916005013539, + "loss": 1.9857, + "step": 274165 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.00015289003189507608, + "loss": 2.1032, + "step": 274170 + }, + { + "epoch": 0.65, + "grad_norm": 1.796875, + "learning_rate": 0.00015288846327073005, + "loss": 2.1221, + "step": 274175 + }, + { + "epoch": 0.65, + "grad_norm": 1.84375, + "learning_rate": 0.0001528868946283164, + "loss": 1.9563, + "step": 274180 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.0001528853259678356, + "loss": 2.0618, + "step": 274185 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015288375728928817, + "loss": 2.051, + "step": 274190 + }, + { + "epoch": 0.65, + "grad_norm": 2.34375, + "learning_rate": 0.00015288218859267473, + "loss": 2.1102, + "step": 274195 + }, + { + "epoch": 0.65, + "grad_norm": 1.890625, + "learning_rate": 0.0001528806198779958, + "loss": 2.2329, + "step": 274200 + }, + { + "epoch": 0.65, + "grad_norm": 1.7265625, + "learning_rate": 0.00015287905114525185, + "loss": 2.1473, + "step": 274205 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.00015287748239444345, + "loss": 2.0863, + "step": 274210 + }, + { + "epoch": 0.65, + "grad_norm": 1.8828125, + "learning_rate": 0.00015287591362557116, + "loss": 2.114, + "step": 274215 + }, + { + "epoch": 0.65, + "grad_norm": 1.96875, + "learning_rate": 0.00015287434483863548, + "loss": 2.0699, + "step": 274220 + }, + { + "epoch": 0.65, + "grad_norm": 1.65625, + "learning_rate": 0.000152872776033637, + "loss": 2.1126, + "step": 274225 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.0001528712072105762, + "loss": 2.1015, + "step": 274230 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015286963836945368, + "loss": 2.2643, + "step": 274235 + }, + { + "epoch": 0.65, + "grad_norm": 1.984375, + "learning_rate": 0.00015286806951026987, + "loss": 1.873, + "step": 274240 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.0001528665006330254, + "loss": 1.9612, + "step": 274245 + }, + { + "epoch": 0.65, + "grad_norm": 1.8515625, + "learning_rate": 0.00015286493173772076, + "loss": 2.2318, + "step": 274250 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.0001528633628243565, + "loss": 2.098, + "step": 274255 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015286179389293318, + "loss": 2.0774, + "step": 274260 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.0001528602249434513, + "loss": 1.9582, + "step": 274265 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.0001528586559759114, + "loss": 2.0096, + "step": 274270 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.00015285708699031402, + "loss": 2.1645, + "step": 274275 + }, + { + "epoch": 0.65, + "grad_norm": 1.984375, + "learning_rate": 0.00015285551798665972, + "loss": 2.2376, + "step": 274280 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015285394896494902, + "loss": 2.0143, + "step": 274285 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.0001528523799251824, + "loss": 1.9623, + "step": 274290 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015285081086736054, + "loss": 2.0683, + "step": 274295 + }, + { + "epoch": 0.65, + "grad_norm": 2.890625, + "learning_rate": 0.00015284924179148382, + "loss": 2.2344, + "step": 274300 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015284767269755284, + "loss": 1.9528, + "step": 274305 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015284610358556815, + "loss": 2.1377, + "step": 274310 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015284453445553028, + "loss": 1.8866, + "step": 274315 + }, + { + "epoch": 0.65, + "grad_norm": 2.53125, + "learning_rate": 0.0001528429653074398, + "loss": 1.9994, + "step": 274320 + }, + { + "epoch": 0.65, + "grad_norm": 2.53125, + "learning_rate": 0.00015284139614129712, + "loss": 2.101, + "step": 274325 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015283982695710288, + "loss": 2.0996, + "step": 274330 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.0001528382577548576, + "loss": 2.13, + "step": 274335 + }, + { + "epoch": 0.65, + "grad_norm": 1.9375, + "learning_rate": 0.00015283668853456186, + "loss": 1.9684, + "step": 274340 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015283511929621612, + "loss": 2.0106, + "step": 274345 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.0001528335500398209, + "loss": 2.1627, + "step": 274350 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015283198076537682, + "loss": 2.1145, + "step": 274355 + }, + { + "epoch": 0.65, + "grad_norm": 2.40625, + "learning_rate": 0.0001528304114728844, + "loss": 2.236, + "step": 274360 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015282884216234416, + "loss": 2.0972, + "step": 274365 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015282727283375658, + "loss": 2.0696, + "step": 274370 + }, + { + "epoch": 0.65, + "grad_norm": 1.953125, + "learning_rate": 0.00015282570348712228, + "loss": 2.0688, + "step": 274375 + }, + { + "epoch": 0.65, + "grad_norm": 1.7890625, + "learning_rate": 0.00015282413412244172, + "loss": 2.0451, + "step": 274380 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.00015282256473971552, + "loss": 2.1581, + "step": 274385 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015282099533894415, + "loss": 2.2365, + "step": 274390 + }, + { + "epoch": 0.65, + "grad_norm": 1.9375, + "learning_rate": 0.0001528194259201282, + "loss": 2.1732, + "step": 274395 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015281785648326818, + "loss": 2.12, + "step": 274400 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015281628702836457, + "loss": 1.9528, + "step": 274405 + }, + { + "epoch": 0.65, + "grad_norm": 2.5, + "learning_rate": 0.00015281471755541797, + "loss": 1.9743, + "step": 274410 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015281314806442895, + "loss": 1.9971, + "step": 274415 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015281157855539796, + "loss": 2.1367, + "step": 274420 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015281000902832558, + "loss": 2.2029, + "step": 274425 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015280843948321237, + "loss": 2.0107, + "step": 274430 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015280686992005883, + "loss": 2.0429, + "step": 274435 + }, + { + "epoch": 0.65, + "grad_norm": 2.625, + "learning_rate": 0.00015280530033886546, + "loss": 2.0387, + "step": 274440 + }, + { + "epoch": 0.65, + "grad_norm": 2.65625, + "learning_rate": 0.0001528037307396329, + "loss": 1.8888, + "step": 274445 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.0001528021611223616, + "loss": 2.0421, + "step": 274450 + }, + { + "epoch": 0.65, + "grad_norm": 2.484375, + "learning_rate": 0.00015280059148705215, + "loss": 2.1903, + "step": 274455 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015279902183370502, + "loss": 2.1538, + "step": 274460 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.00015279745216232083, + "loss": 1.9381, + "step": 274465 + }, + { + "epoch": 0.65, + "grad_norm": 2.578125, + "learning_rate": 0.00015279588247290001, + "loss": 1.9874, + "step": 274470 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015279431276544322, + "loss": 2.1526, + "step": 274475 + }, + { + "epoch": 0.65, + "grad_norm": 2.453125, + "learning_rate": 0.00015279274303995095, + "loss": 2.0715, + "step": 274480 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015279117329642365, + "loss": 1.9691, + "step": 274485 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.00015278960353486197, + "loss": 2.0017, + "step": 274490 + }, + { + "epoch": 0.65, + "grad_norm": 2.53125, + "learning_rate": 0.0001527880337552664, + "loss": 2.0951, + "step": 274495 + }, + { + "epoch": 0.65, + "grad_norm": 2.515625, + "learning_rate": 0.0001527864639576375, + "loss": 2.0838, + "step": 274500 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015278489414197576, + "loss": 2.1219, + "step": 274505 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.00015278332430828173, + "loss": 1.9804, + "step": 274510 + }, + { + "epoch": 0.65, + "grad_norm": 2.40625, + "learning_rate": 0.00015278175445655599, + "loss": 2.0984, + "step": 274515 + }, + { + "epoch": 0.65, + "grad_norm": 1.984375, + "learning_rate": 0.00015278018458679906, + "loss": 2.0635, + "step": 274520 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015277861469901143, + "loss": 2.1013, + "step": 274525 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.00015277704479319367, + "loss": 2.07, + "step": 274530 + }, + { + "epoch": 0.65, + "grad_norm": 2.59375, + "learning_rate": 0.0001527754748693463, + "loss": 2.1554, + "step": 274535 + }, + { + "epoch": 0.65, + "grad_norm": 2.546875, + "learning_rate": 0.00015277390492746988, + "loss": 1.9851, + "step": 274540 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015277233496756496, + "loss": 2.059, + "step": 274545 + }, + { + "epoch": 0.65, + "grad_norm": 2.875, + "learning_rate": 0.00015277076498963204, + "loss": 2.2684, + "step": 274550 + }, + { + "epoch": 0.65, + "grad_norm": 2.484375, + "learning_rate": 0.00015276919499367165, + "loss": 2.0545, + "step": 274555 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015276762497968437, + "loss": 1.943, + "step": 274560 + }, + { + "epoch": 0.65, + "grad_norm": 2.4375, + "learning_rate": 0.00015276605494767072, + "loss": 2.0884, + "step": 274565 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.0001527644848976312, + "loss": 2.1537, + "step": 274570 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.0001527629148295664, + "loss": 2.062, + "step": 274575 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.0001527613447434768, + "loss": 1.8999, + "step": 274580 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.00015275977463936302, + "loss": 2.0711, + "step": 274585 + }, + { + "epoch": 0.65, + "grad_norm": 2.46875, + "learning_rate": 0.00015275820451722552, + "loss": 2.0405, + "step": 274590 + }, + { + "epoch": 0.65, + "grad_norm": 1.875, + "learning_rate": 0.00015275663437706483, + "loss": 1.9153, + "step": 274595 + }, + { + "epoch": 0.65, + "grad_norm": 2.4375, + "learning_rate": 0.00015275506421888154, + "loss": 2.0199, + "step": 274600 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015275349404267615, + "loss": 1.8864, + "step": 274605 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015275192384844925, + "loss": 2.0563, + "step": 274610 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015275035363620132, + "loss": 2.0938, + "step": 274615 + }, + { + "epoch": 0.65, + "grad_norm": 1.75, + "learning_rate": 0.0001527487834059329, + "loss": 1.9541, + "step": 274620 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015274721315764454, + "loss": 2.161, + "step": 274625 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015274564289133674, + "loss": 2.0657, + "step": 274630 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015274407260701013, + "loss": 2.0521, + "step": 274635 + }, + { + "epoch": 0.65, + "grad_norm": 2.34375, + "learning_rate": 0.00015274250230466518, + "loss": 2.1839, + "step": 274640 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015274093198430243, + "loss": 2.0797, + "step": 274645 + }, + { + "epoch": 0.65, + "grad_norm": 2.453125, + "learning_rate": 0.00015273936164592237, + "loss": 1.99, + "step": 274650 + }, + { + "epoch": 0.65, + "grad_norm": 2.578125, + "learning_rate": 0.00015273779128952564, + "loss": 1.9271, + "step": 274655 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015273622091511272, + "loss": 2.0353, + "step": 274660 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.00015273465052268416, + "loss": 1.9755, + "step": 274665 + }, + { + "epoch": 0.65, + "grad_norm": 1.90625, + "learning_rate": 0.00015273308011224047, + "loss": 2.1376, + "step": 274670 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015273150968378218, + "loss": 2.0769, + "step": 274675 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.0001527299392373099, + "loss": 1.9524, + "step": 274680 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015272836877282408, + "loss": 1.9315, + "step": 274685 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.0001527267982903253, + "loss": 2.0795, + "step": 274690 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.0001527252277898141, + "loss": 2.1542, + "step": 274695 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.000152723657271291, + "loss": 2.0722, + "step": 274700 + }, + { + "epoch": 0.65, + "grad_norm": 1.484375, + "learning_rate": 0.00015272208673475658, + "loss": 2.1595, + "step": 274705 + }, + { + "epoch": 0.65, + "grad_norm": 1.96875, + "learning_rate": 0.00015272051618021125, + "loss": 2.1792, + "step": 274710 + }, + { + "epoch": 0.65, + "grad_norm": 2.546875, + "learning_rate": 0.0001527189456076557, + "loss": 1.9804, + "step": 274715 + }, + { + "epoch": 0.65, + "grad_norm": 1.8515625, + "learning_rate": 0.0001527173750170904, + "loss": 2.0574, + "step": 274720 + }, + { + "epoch": 0.65, + "grad_norm": 2.46875, + "learning_rate": 0.00015271580440851586, + "loss": 2.1066, + "step": 274725 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015271423378193267, + "loss": 2.0664, + "step": 274730 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015271266313734134, + "loss": 2.0814, + "step": 274735 + }, + { + "epoch": 0.65, + "grad_norm": 2.453125, + "learning_rate": 0.0001527110924747424, + "loss": 2.0292, + "step": 274740 + }, + { + "epoch": 0.65, + "grad_norm": 1.8984375, + "learning_rate": 0.0001527095217941364, + "loss": 2.1647, + "step": 274745 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015270795109552387, + "loss": 1.9651, + "step": 274750 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015270638037890533, + "loss": 2.2002, + "step": 274755 + }, + { + "epoch": 0.65, + "grad_norm": 1.9296875, + "learning_rate": 0.00015270480964428137, + "loss": 2.2234, + "step": 274760 + }, + { + "epoch": 0.65, + "grad_norm": 2.34375, + "learning_rate": 0.00015270323889165246, + "loss": 2.0046, + "step": 274765 + }, + { + "epoch": 0.65, + "grad_norm": 1.8671875, + "learning_rate": 0.0001527016681210192, + "loss": 1.9242, + "step": 274770 + }, + { + "epoch": 0.65, + "grad_norm": 1.8203125, + "learning_rate": 0.0001527000973323821, + "loss": 2.0836, + "step": 274775 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015269852652574165, + "loss": 2.1326, + "step": 274780 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015269695570109844, + "loss": 2.1303, + "step": 274785 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015269538485845297, + "loss": 2.015, + "step": 274790 + }, + { + "epoch": 0.65, + "grad_norm": 1.78125, + "learning_rate": 0.00015269381399780584, + "loss": 2.0948, + "step": 274795 + }, + { + "epoch": 0.65, + "grad_norm": 1.984375, + "learning_rate": 0.00015269224311915754, + "loss": 1.8871, + "step": 274800 + }, + { + "epoch": 0.65, + "grad_norm": 2.453125, + "learning_rate": 0.0001526906722225086, + "loss": 2.2513, + "step": 274805 + }, + { + "epoch": 0.65, + "grad_norm": 1.796875, + "learning_rate": 0.00015268910130785958, + "loss": 2.1892, + "step": 274810 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.00015268753037521102, + "loss": 2.0117, + "step": 274815 + }, + { + "epoch": 0.65, + "grad_norm": 2.5, + "learning_rate": 0.00015268595942456346, + "loss": 2.2075, + "step": 274820 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015268438845591738, + "loss": 2.0402, + "step": 274825 + }, + { + "epoch": 0.65, + "grad_norm": 1.8828125, + "learning_rate": 0.00015268281746927338, + "loss": 2.0463, + "step": 274830 + }, + { + "epoch": 0.65, + "grad_norm": 1.8828125, + "learning_rate": 0.00015268124646463193, + "loss": 1.7823, + "step": 274835 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015267967544199367, + "loss": 2.1625, + "step": 274840 + }, + { + "epoch": 0.65, + "grad_norm": 2.40625, + "learning_rate": 0.00015267810440135904, + "loss": 2.1396, + "step": 274845 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015267653334272862, + "loss": 2.171, + "step": 274850 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015267496226610295, + "loss": 2.0757, + "step": 274855 + }, + { + "epoch": 0.65, + "grad_norm": 2.734375, + "learning_rate": 0.00015267339117148256, + "loss": 2.2283, + "step": 274860 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015267182005886798, + "loss": 2.2226, + "step": 274865 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.00015267024892825975, + "loss": 1.8105, + "step": 274870 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015266867777965842, + "loss": 2.1871, + "step": 274875 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015266710661306448, + "loss": 2.1349, + "step": 274880 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015266553542847853, + "loss": 2.0729, + "step": 274885 + }, + { + "epoch": 0.65, + "grad_norm": 2.625, + "learning_rate": 0.00015266396422590107, + "loss": 2.0151, + "step": 274890 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015266239300533265, + "loss": 1.9333, + "step": 274895 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.00015266082176677382, + "loss": 2.0331, + "step": 274900 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015265925051022506, + "loss": 2.2205, + "step": 274905 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015265767923568698, + "loss": 1.9182, + "step": 274910 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015265610794316006, + "loss": 1.9351, + "step": 274915 + }, + { + "epoch": 0.65, + "grad_norm": 1.84375, + "learning_rate": 0.00015265453663264485, + "loss": 2.0866, + "step": 274920 + }, + { + "epoch": 0.65, + "grad_norm": 1.984375, + "learning_rate": 0.0001526529653041419, + "loss": 2.2224, + "step": 274925 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.00015265139395765175, + "loss": 2.1758, + "step": 274930 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015264982259317495, + "loss": 2.21, + "step": 274935 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.00015264825121071195, + "loss": 2.0027, + "step": 274940 + }, + { + "epoch": 0.65, + "grad_norm": 1.8671875, + "learning_rate": 0.0001526466798102634, + "loss": 2.1383, + "step": 274945 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.0001526451083918298, + "loss": 1.9898, + "step": 274950 + }, + { + "epoch": 0.65, + "grad_norm": 1.9609375, + "learning_rate": 0.0001526435369554117, + "loss": 2.3037, + "step": 274955 + }, + { + "epoch": 0.65, + "grad_norm": 2.546875, + "learning_rate": 0.00015264196550100956, + "loss": 2.2006, + "step": 274960 + }, + { + "epoch": 0.65, + "grad_norm": 1.8515625, + "learning_rate": 0.00015264039402862397, + "loss": 2.0108, + "step": 274965 + }, + { + "epoch": 0.65, + "grad_norm": 1.953125, + "learning_rate": 0.00015263882253825547, + "loss": 2.0131, + "step": 274970 + }, + { + "epoch": 0.65, + "grad_norm": 2.5, + "learning_rate": 0.00015263725102990464, + "loss": 2.1245, + "step": 274975 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015263567950357193, + "loss": 1.961, + "step": 274980 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015263410795925792, + "loss": 1.9601, + "step": 274985 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015263253639696313, + "loss": 1.9002, + "step": 274990 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.0001526309648166881, + "loss": 2.2387, + "step": 274995 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015262939321843345, + "loss": 2.1436, + "step": 275000 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.0001526278216021996, + "loss": 2.2425, + "step": 275005 + }, + { + "epoch": 0.65, + "grad_norm": 1.8125, + "learning_rate": 0.00015262624996798714, + "loss": 2.1674, + "step": 275010 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015262467831579657, + "loss": 2.0087, + "step": 275015 + }, + { + "epoch": 0.65, + "grad_norm": 1.890625, + "learning_rate": 0.00015262310664562848, + "loss": 2.1152, + "step": 275020 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015262153495748337, + "loss": 2.0849, + "step": 275025 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.0001526199632513618, + "loss": 2.105, + "step": 275030 + }, + { + "epoch": 0.65, + "grad_norm": 1.90625, + "learning_rate": 0.0001526183915272643, + "loss": 2.0451, + "step": 275035 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.0001526168197851914, + "loss": 2.1771, + "step": 275040 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015261524802514365, + "loss": 2.2507, + "step": 275045 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015261367624712154, + "loss": 2.0525, + "step": 275050 + }, + { + "epoch": 0.65, + "grad_norm": 2.40625, + "learning_rate": 0.00015261210445112567, + "loss": 2.0406, + "step": 275055 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015261053263715657, + "loss": 2.1031, + "step": 275060 + }, + { + "epoch": 0.65, + "grad_norm": 1.9609375, + "learning_rate": 0.00015260896080521473, + "loss": 2.018, + "step": 275065 + }, + { + "epoch": 0.65, + "grad_norm": 1.859375, + "learning_rate": 0.00015260738895530075, + "loss": 2.3191, + "step": 275070 + }, + { + "epoch": 0.65, + "grad_norm": 1.8671875, + "learning_rate": 0.00015260581708741508, + "loss": 2.0669, + "step": 275075 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.0001526042452015583, + "loss": 2.1154, + "step": 275080 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015260267329773105, + "loss": 2.0507, + "step": 275085 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015260110137593367, + "loss": 2.16, + "step": 275090 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015259952943616687, + "loss": 1.9459, + "step": 275095 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.0001525979574784311, + "loss": 2.0114, + "step": 275100 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015259638550272687, + "loss": 2.0926, + "step": 275105 + }, + { + "epoch": 0.65, + "grad_norm": 2.703125, + "learning_rate": 0.00015259481350905483, + "loss": 2.0677, + "step": 275110 + }, + { + "epoch": 0.65, + "grad_norm": 1.9140625, + "learning_rate": 0.00015259324149741538, + "loss": 2.1801, + "step": 275115 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015259166946780917, + "loss": 2.1294, + "step": 275120 + }, + { + "epoch": 0.65, + "grad_norm": 2.53125, + "learning_rate": 0.0001525900974202367, + "loss": 2.0925, + "step": 275125 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015258852535469843, + "loss": 1.9802, + "step": 275130 + }, + { + "epoch": 0.65, + "grad_norm": 1.921875, + "learning_rate": 0.00015258695327119502, + "loss": 1.972, + "step": 275135 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015258538116972697, + "loss": 2.1114, + "step": 275140 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015258380905029478, + "loss": 1.9866, + "step": 275145 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015258223691289898, + "loss": 2.0126, + "step": 275150 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015258066475754017, + "loss": 2.116, + "step": 275155 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015257909258421882, + "loss": 1.9068, + "step": 275160 + }, + { + "epoch": 0.65, + "grad_norm": 3.09375, + "learning_rate": 0.00015257752039293554, + "loss": 2.095, + "step": 275165 + }, + { + "epoch": 0.65, + "grad_norm": 1.703125, + "learning_rate": 0.0001525759481836908, + "loss": 1.9561, + "step": 275170 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015257437595648515, + "loss": 1.9519, + "step": 275175 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015257280371131917, + "loss": 1.9605, + "step": 275180 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.00015257123144819335, + "loss": 2.0132, + "step": 275185 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015256965916710824, + "loss": 2.1103, + "step": 275190 + }, + { + "epoch": 0.65, + "grad_norm": 1.90625, + "learning_rate": 0.0001525680868680644, + "loss": 2.0115, + "step": 275195 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.00015256651455106232, + "loss": 2.1666, + "step": 275200 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.0001525649422161026, + "loss": 2.1477, + "step": 275205 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.0001525633698631857, + "loss": 1.9488, + "step": 275210 + }, + { + "epoch": 0.65, + "grad_norm": 1.9453125, + "learning_rate": 0.00015256179749231224, + "loss": 1.988, + "step": 275215 + }, + { + "epoch": 0.65, + "grad_norm": 1.984375, + "learning_rate": 0.0001525602251034827, + "loss": 2.1755, + "step": 275220 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015255865269669765, + "loss": 2.143, + "step": 275225 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.0001525570802719576, + "loss": 2.0835, + "step": 275230 + }, + { + "epoch": 0.65, + "grad_norm": 1.890625, + "learning_rate": 0.00015255550782926304, + "loss": 1.9868, + "step": 275235 + }, + { + "epoch": 0.65, + "grad_norm": 1.9296875, + "learning_rate": 0.00015255393536861464, + "loss": 2.142, + "step": 275240 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015255236289001282, + "loss": 2.0829, + "step": 275245 + }, + { + "epoch": 0.65, + "grad_norm": 1.8828125, + "learning_rate": 0.0001525507903934582, + "loss": 1.8827, + "step": 275250 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015254921787895124, + "loss": 2.0269, + "step": 275255 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.00015254764534649256, + "loss": 2.1809, + "step": 275260 + }, + { + "epoch": 0.65, + "grad_norm": 1.953125, + "learning_rate": 0.0001525460727960826, + "loss": 2.2752, + "step": 275265 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015254450022772196, + "loss": 2.0023, + "step": 275270 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.0001525429276414112, + "loss": 2.1756, + "step": 275275 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.0001525413550371508, + "loss": 2.0003, + "step": 275280 + }, + { + "epoch": 0.65, + "grad_norm": 1.65625, + "learning_rate": 0.0001525397824149413, + "loss": 1.9319, + "step": 275285 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.00015253820977478326, + "loss": 1.9854, + "step": 275290 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.00015253663711667725, + "loss": 2.1015, + "step": 275295 + }, + { + "epoch": 0.65, + "grad_norm": 1.9609375, + "learning_rate": 0.00015253506444062377, + "loss": 2.1869, + "step": 275300 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015253349174662332, + "loss": 2.1382, + "step": 275305 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.0001525319190346765, + "loss": 2.1029, + "step": 275310 + }, + { + "epoch": 0.65, + "grad_norm": 2.921875, + "learning_rate": 0.00015253034630478385, + "loss": 2.0516, + "step": 275315 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015252877355694585, + "loss": 2.2496, + "step": 275320 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.00015252720079116306, + "loss": 2.0506, + "step": 275325 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015252562800743605, + "loss": 2.1809, + "step": 275330 + }, + { + "epoch": 0.65, + "grad_norm": 2.5625, + "learning_rate": 0.00015252405520576533, + "loss": 2.227, + "step": 275335 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015252248238615144, + "loss": 2.0786, + "step": 275340 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015252090954859487, + "loss": 1.9816, + "step": 275345 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.00015251933669309625, + "loss": 2.0605, + "step": 275350 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.0001525177638196561, + "loss": 2.1306, + "step": 275355 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015251619092827485, + "loss": 2.0403, + "step": 275360 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.0001525146180189532, + "loss": 2.0938, + "step": 275365 + }, + { + "epoch": 0.65, + "grad_norm": 3.203125, + "learning_rate": 0.00015251304509169154, + "loss": 1.9358, + "step": 275370 + }, + { + "epoch": 0.65, + "grad_norm": 1.890625, + "learning_rate": 0.0001525114721464905, + "loss": 1.9412, + "step": 275375 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015250989918335058, + "loss": 2.0466, + "step": 275380 + }, + { + "epoch": 0.65, + "grad_norm": 2.40625, + "learning_rate": 0.00015250832620227234, + "loss": 1.91, + "step": 275385 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015250675320325628, + "loss": 2.0686, + "step": 275390 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.000152505180186303, + "loss": 2.0723, + "step": 275395 + }, + { + "epoch": 0.65, + "grad_norm": 1.953125, + "learning_rate": 0.00015250360715141295, + "loss": 1.9609, + "step": 275400 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015250203409858677, + "loss": 2.1728, + "step": 275405 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.0001525004610278249, + "loss": 2.0825, + "step": 275410 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015249888793912795, + "loss": 1.949, + "step": 275415 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015249731483249642, + "loss": 1.9329, + "step": 275420 + }, + { + "epoch": 0.65, + "grad_norm": 1.9453125, + "learning_rate": 0.00015249574170793083, + "loss": 2.1603, + "step": 275425 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015249416856543175, + "loss": 1.9189, + "step": 275430 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015249259540499975, + "loss": 2.0581, + "step": 275435 + }, + { + "epoch": 0.65, + "grad_norm": 1.8359375, + "learning_rate": 0.00015249102222663526, + "loss": 2.1699, + "step": 275440 + }, + { + "epoch": 0.65, + "grad_norm": 2.53125, + "learning_rate": 0.00015248944903033894, + "loss": 2.0946, + "step": 275445 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015248787581611124, + "loss": 2.1411, + "step": 275450 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015248630258395273, + "loss": 2.2716, + "step": 275455 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015248472933386399, + "loss": 2.3103, + "step": 275460 + }, + { + "epoch": 0.65, + "grad_norm": 1.890625, + "learning_rate": 0.00015248315606584547, + "loss": 1.9397, + "step": 275465 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.00015248158277989776, + "loss": 1.9327, + "step": 275470 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.0001524800094760214, + "loss": 1.8614, + "step": 275475 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.0001524784361542169, + "loss": 2.239, + "step": 275480 + }, + { + "epoch": 0.65, + "grad_norm": 2.515625, + "learning_rate": 0.00015247686281448487, + "loss": 2.0934, + "step": 275485 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015247528945682573, + "loss": 2.1734, + "step": 275490 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015247371608124007, + "loss": 2.0766, + "step": 275495 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.0001524721426877285, + "loss": 2.0775, + "step": 275500 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.00015247056927629145, + "loss": 1.9191, + "step": 275505 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.0001524689958469295, + "loss": 2.02, + "step": 275510 + }, + { + "epoch": 0.65, + "grad_norm": 1.8203125, + "learning_rate": 0.00015246742239964322, + "loss": 2.0638, + "step": 275515 + }, + { + "epoch": 0.65, + "grad_norm": 2.59375, + "learning_rate": 0.0001524658489344331, + "loss": 1.9102, + "step": 275520 + }, + { + "epoch": 0.65, + "grad_norm": 2.4375, + "learning_rate": 0.0001524642754512997, + "loss": 1.8946, + "step": 275525 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015246270195024353, + "loss": 1.9195, + "step": 275530 + }, + { + "epoch": 0.65, + "grad_norm": 2.78125, + "learning_rate": 0.00015246112843126516, + "loss": 1.9084, + "step": 275535 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015245955489436513, + "loss": 1.9694, + "step": 275540 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015245798133954396, + "loss": 2.0469, + "step": 275545 + }, + { + "epoch": 0.65, + "grad_norm": 1.96875, + "learning_rate": 0.0001524564077668022, + "loss": 2.2055, + "step": 275550 + }, + { + "epoch": 0.65, + "grad_norm": 1.984375, + "learning_rate": 0.00015245483417614035, + "loss": 1.9086, + "step": 275555 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.000152453260567559, + "loss": 2.226, + "step": 275560 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015245168694105868, + "loss": 2.0533, + "step": 275565 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.00015245011329663988, + "loss": 2.0852, + "step": 275570 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.0001524485396343032, + "loss": 1.9979, + "step": 275575 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015244696595404913, + "loss": 1.9275, + "step": 275580 + }, + { + "epoch": 0.65, + "grad_norm": 2.40625, + "learning_rate": 0.0001524453922558782, + "loss": 2.0924, + "step": 275585 + }, + { + "epoch": 0.65, + "grad_norm": 2.453125, + "learning_rate": 0.000152443818539791, + "loss": 2.0145, + "step": 275590 + }, + { + "epoch": 0.65, + "grad_norm": 1.515625, + "learning_rate": 0.00015244224480578806, + "loss": 2.0223, + "step": 275595 + }, + { + "epoch": 0.65, + "grad_norm": 1.953125, + "learning_rate": 0.00015244067105386989, + "loss": 1.8695, + "step": 275600 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.000152439097284037, + "loss": 2.1406, + "step": 275605 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015243752349629, + "loss": 2.2742, + "step": 275610 + }, + { + "epoch": 0.65, + "grad_norm": 1.96875, + "learning_rate": 0.00015243594969062936, + "loss": 2.091, + "step": 275615 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015243437586705567, + "loss": 1.9441, + "step": 275620 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015243280202556945, + "loss": 1.9609, + "step": 275625 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015243122816617122, + "loss": 2.0332, + "step": 275630 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015242965428886155, + "loss": 1.9565, + "step": 275635 + }, + { + "epoch": 0.65, + "grad_norm": 2.625, + "learning_rate": 0.00015242808039364093, + "loss": 2.068, + "step": 275640 + }, + { + "epoch": 0.65, + "grad_norm": 2.4375, + "learning_rate": 0.00015242650648050995, + "loss": 1.8462, + "step": 275645 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.0001524249325494691, + "loss": 2.2745, + "step": 275650 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.000152423358600519, + "loss": 1.9405, + "step": 275655 + }, + { + "epoch": 0.65, + "grad_norm": 2.546875, + "learning_rate": 0.0001524217846336601, + "loss": 2.1737, + "step": 275660 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015242021064889292, + "loss": 2.0667, + "step": 275665 + }, + { + "epoch": 0.65, + "grad_norm": 1.9453125, + "learning_rate": 0.0001524186366462181, + "loss": 2.2359, + "step": 275670 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015241706262563608, + "loss": 1.9873, + "step": 275675 + }, + { + "epoch": 0.65, + "grad_norm": 1.9375, + "learning_rate": 0.0001524154885871475, + "loss": 2.0376, + "step": 275680 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.0001524139145307528, + "loss": 2.1565, + "step": 275685 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015241234045645254, + "loss": 2.112, + "step": 275690 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.0001524107663642473, + "loss": 2.0902, + "step": 275695 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.0001524091922541376, + "loss": 2.1289, + "step": 275700 + }, + { + "epoch": 0.65, + "grad_norm": 1.9609375, + "learning_rate": 0.00015240761812612398, + "loss": 2.0904, + "step": 275705 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015240604398020696, + "loss": 1.828, + "step": 275710 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.00015240446981638705, + "loss": 2.0861, + "step": 275715 + }, + { + "epoch": 0.65, + "grad_norm": 1.53125, + "learning_rate": 0.00015240289563466482, + "loss": 1.8641, + "step": 275720 + }, + { + "epoch": 0.65, + "grad_norm": 1.9921875, + "learning_rate": 0.00015240132143504086, + "loss": 2.0687, + "step": 275725 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015239974721751562, + "loss": 2.0717, + "step": 275730 + }, + { + "epoch": 0.65, + "grad_norm": 1.9921875, + "learning_rate": 0.00015239817298208968, + "loss": 2.0556, + "step": 275735 + }, + { + "epoch": 0.65, + "grad_norm": 2.578125, + "learning_rate": 0.0001523965987287636, + "loss": 2.0989, + "step": 275740 + }, + { + "epoch": 0.65, + "grad_norm": 1.96875, + "learning_rate": 0.00015239502445753785, + "loss": 2.142, + "step": 275745 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.00015239345016841303, + "loss": 1.8225, + "step": 275750 + }, + { + "epoch": 0.65, + "grad_norm": 1.75, + "learning_rate": 0.00015239187586138967, + "loss": 2.1206, + "step": 275755 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.0001523903015364683, + "loss": 1.8945, + "step": 275760 + }, + { + "epoch": 0.65, + "grad_norm": 3.3125, + "learning_rate": 0.00015238872719364942, + "loss": 2.0598, + "step": 275765 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015238715283293362, + "loss": 1.8512, + "step": 275770 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015238557845432142, + "loss": 2.3142, + "step": 275775 + }, + { + "epoch": 0.65, + "grad_norm": 1.9609375, + "learning_rate": 0.00015238400405781337, + "loss": 2.0327, + "step": 275780 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015238242964340997, + "loss": 1.9087, + "step": 275785 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.00015238085521111175, + "loss": 2.1427, + "step": 275790 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015237928076091935, + "loss": 2.109, + "step": 275795 + }, + { + "epoch": 0.65, + "grad_norm": 1.9140625, + "learning_rate": 0.0001523777062928332, + "loss": 1.862, + "step": 275800 + }, + { + "epoch": 0.65, + "grad_norm": 2.453125, + "learning_rate": 0.0001523761318068539, + "loss": 2.0248, + "step": 275805 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.0001523745573029819, + "loss": 2.0106, + "step": 275810 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015237298278121788, + "loss": 1.8509, + "step": 275815 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.0001523714082415623, + "loss": 2.2638, + "step": 275820 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015236983368401564, + "loss": 2.048, + "step": 275825 + }, + { + "epoch": 0.65, + "grad_norm": 1.6953125, + "learning_rate": 0.0001523682591085785, + "loss": 1.9757, + "step": 275830 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015236668451525142, + "loss": 2.0811, + "step": 275835 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.000152365109904035, + "loss": 1.97, + "step": 275840 + }, + { + "epoch": 0.65, + "grad_norm": 2.40625, + "learning_rate": 0.0001523635352749296, + "loss": 2.01, + "step": 275845 + }, + { + "epoch": 0.65, + "grad_norm": 1.828125, + "learning_rate": 0.00015236196062793592, + "loss": 2.1862, + "step": 275850 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015236038596305444, + "loss": 2.0159, + "step": 275855 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015235881128028574, + "loss": 2.025, + "step": 275860 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015235723657963028, + "loss": 2.1668, + "step": 275865 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015235566186108863, + "loss": 2.1726, + "step": 275870 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.00015235408712466138, + "loss": 1.9961, + "step": 275875 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015235251237034894, + "loss": 2.1903, + "step": 275880 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.000152350937598152, + "loss": 2.0606, + "step": 275885 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015234936280807105, + "loss": 2.1176, + "step": 275890 + }, + { + "epoch": 0.65, + "grad_norm": 1.65625, + "learning_rate": 0.00015234778800010657, + "loss": 2.0915, + "step": 275895 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015234621317425912, + "loss": 1.8293, + "step": 275900 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015234463833052928, + "loss": 2.2003, + "step": 275905 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015234306346891756, + "loss": 2.0659, + "step": 275910 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.0001523414885894245, + "loss": 2.0712, + "step": 275915 + }, + { + "epoch": 0.65, + "grad_norm": 2.4375, + "learning_rate": 0.00015233991369205066, + "loss": 2.0935, + "step": 275920 + }, + { + "epoch": 0.65, + "grad_norm": 1.9453125, + "learning_rate": 0.0001523383387767965, + "loss": 1.9878, + "step": 275925 + }, + { + "epoch": 0.65, + "grad_norm": 1.734375, + "learning_rate": 0.00015233676384366268, + "loss": 2.0205, + "step": 275930 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015233518889264962, + "loss": 2.1542, + "step": 275935 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.00015233361392375793, + "loss": 2.1579, + "step": 275940 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015233203893698813, + "loss": 1.8501, + "step": 275945 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015233046393234073, + "loss": 2.1447, + "step": 275950 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015232888890981636, + "loss": 2.1059, + "step": 275955 + }, + { + "epoch": 0.65, + "grad_norm": 2.578125, + "learning_rate": 0.0001523273138694154, + "loss": 2.0937, + "step": 275960 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015232573881113855, + "loss": 2.0567, + "step": 275965 + }, + { + "epoch": 0.65, + "grad_norm": 2.4375, + "learning_rate": 0.00015232416373498628, + "loss": 2.1833, + "step": 275970 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.00015232258864095912, + "loss": 2.0676, + "step": 275975 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.0001523210135290576, + "loss": 2.0726, + "step": 275980 + }, + { + "epoch": 0.65, + "grad_norm": 2.40625, + "learning_rate": 0.00015231943839928225, + "loss": 2.2121, + "step": 275985 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015231786325163365, + "loss": 2.055, + "step": 275990 + }, + { + "epoch": 0.65, + "grad_norm": 1.828125, + "learning_rate": 0.00015231628808611234, + "loss": 2.1216, + "step": 275995 + }, + { + "epoch": 0.65, + "grad_norm": 2.34375, + "learning_rate": 0.00015231471290271884, + "loss": 2.1138, + "step": 276000 + }, + { + "epoch": 0.65, + "grad_norm": 1.9921875, + "learning_rate": 0.00015231313770145365, + "loss": 2.2417, + "step": 276005 + }, + { + "epoch": 0.65, + "grad_norm": 2.59375, + "learning_rate": 0.00015231156248231734, + "loss": 2.109, + "step": 276010 + }, + { + "epoch": 0.65, + "grad_norm": 2.515625, + "learning_rate": 0.00015230998724531048, + "loss": 2.2974, + "step": 276015 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015230841199043358, + "loss": 2.1724, + "step": 276020 + }, + { + "epoch": 0.65, + "grad_norm": 2.40625, + "learning_rate": 0.00015230683671768716, + "loss": 2.2058, + "step": 276025 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.0001523052614270718, + "loss": 2.2404, + "step": 276030 + }, + { + "epoch": 0.65, + "grad_norm": 1.9453125, + "learning_rate": 0.000152303686118588, + "loss": 2.0561, + "step": 276035 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.0001523021107922363, + "loss": 1.8928, + "step": 276040 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015230053544801727, + "loss": 2.1178, + "step": 276045 + }, + { + "epoch": 0.65, + "grad_norm": 1.7265625, + "learning_rate": 0.00015229896008593142, + "loss": 1.9151, + "step": 276050 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.0001522973847059793, + "loss": 2.1537, + "step": 276055 + }, + { + "epoch": 0.65, + "grad_norm": 2.5625, + "learning_rate": 0.00015229580930816144, + "loss": 1.9578, + "step": 276060 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015229423389247839, + "loss": 2.0696, + "step": 276065 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.0001522926584589307, + "loss": 2.0813, + "step": 276070 + }, + { + "epoch": 0.65, + "grad_norm": 2.46875, + "learning_rate": 0.00015229108300751884, + "loss": 1.9117, + "step": 276075 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.00015228950753824343, + "loss": 2.1013, + "step": 276080 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.000152287932051105, + "loss": 2.0365, + "step": 276085 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015228635654610404, + "loss": 1.9479, + "step": 276090 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015228478102324107, + "loss": 2.1037, + "step": 276095 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.0001522832054825167, + "loss": 2.2358, + "step": 276100 + }, + { + "epoch": 0.65, + "grad_norm": 1.9375, + "learning_rate": 0.00015228162992393144, + "loss": 2.1713, + "step": 276105 + }, + { + "epoch": 0.65, + "grad_norm": 2.65625, + "learning_rate": 0.00015228005434748587, + "loss": 2.2231, + "step": 276110 + }, + { + "epoch": 0.65, + "grad_norm": 1.8984375, + "learning_rate": 0.00015227847875318043, + "loss": 2.0957, + "step": 276115 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.0001522769031410157, + "loss": 2.0557, + "step": 276120 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015227532751099229, + "loss": 2.0913, + "step": 276125 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015227375186311067, + "loss": 2.0318, + "step": 276130 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015227217619737137, + "loss": 2.0115, + "step": 276135 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015227060051377494, + "loss": 1.9684, + "step": 276140 + }, + { + "epoch": 0.65, + "grad_norm": 1.9296875, + "learning_rate": 0.00015226902481232192, + "loss": 2.0418, + "step": 276145 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015226744909301287, + "loss": 2.0989, + "step": 276150 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015226587335584831, + "loss": 2.1621, + "step": 276155 + }, + { + "epoch": 0.65, + "grad_norm": 2.71875, + "learning_rate": 0.0001522642976008288, + "loss": 2.082, + "step": 276160 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015226272182795482, + "loss": 2.2616, + "step": 276165 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.00015226114603722696, + "loss": 2.1832, + "step": 276170 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.00015225957022864574, + "loss": 2.1212, + "step": 276175 + }, + { + "epoch": 0.65, + "grad_norm": 2.734375, + "learning_rate": 0.00015225799440221174, + "loss": 2.0102, + "step": 276180 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015225641855792543, + "loss": 1.9295, + "step": 276185 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015225484269578736, + "loss": 2.1213, + "step": 276190 + }, + { + "epoch": 0.65, + "grad_norm": 1.8125, + "learning_rate": 0.00015225326681579811, + "loss": 1.9651, + "step": 276195 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.0001522516909179582, + "loss": 2.0371, + "step": 276200 + }, + { + "epoch": 0.65, + "grad_norm": 2.34375, + "learning_rate": 0.00015225011500226817, + "loss": 2.0295, + "step": 276205 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015224853906872852, + "loss": 2.149, + "step": 276210 + }, + { + "epoch": 0.65, + "grad_norm": 2.578125, + "learning_rate": 0.00015224696311733985, + "loss": 2.0967, + "step": 276215 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015224538714810268, + "loss": 1.9318, + "step": 276220 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015224381116101752, + "loss": 1.9634, + "step": 276225 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015224223515608492, + "loss": 1.8199, + "step": 276230 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015224065913330546, + "loss": 2.0327, + "step": 276235 + }, + { + "epoch": 0.65, + "grad_norm": 1.921875, + "learning_rate": 0.0001522390830926796, + "loss": 1.995, + "step": 276240 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.00015223750703420797, + "loss": 2.1992, + "step": 276245 + }, + { + "epoch": 0.65, + "grad_norm": 1.828125, + "learning_rate": 0.00015223593095789104, + "loss": 2.1302, + "step": 276250 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.0001522343548637293, + "loss": 1.8678, + "step": 276255 + }, + { + "epoch": 0.65, + "grad_norm": 2.609375, + "learning_rate": 0.00015223277875172343, + "loss": 2.0494, + "step": 276260 + }, + { + "epoch": 0.65, + "grad_norm": 1.9140625, + "learning_rate": 0.00015223120262187388, + "loss": 2.0512, + "step": 276265 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015222962647418124, + "loss": 2.159, + "step": 276270 + }, + { + "epoch": 0.65, + "grad_norm": 1.90625, + "learning_rate": 0.00015222805030864597, + "loss": 1.9483, + "step": 276275 + }, + { + "epoch": 0.65, + "grad_norm": 2.578125, + "learning_rate": 0.00015222647412526863, + "loss": 1.8763, + "step": 276280 + }, + { + "epoch": 0.65, + "grad_norm": 1.9765625, + "learning_rate": 0.00015222489792404982, + "loss": 1.9282, + "step": 276285 + }, + { + "epoch": 0.65, + "grad_norm": 1.8671875, + "learning_rate": 0.00015222332170499004, + "loss": 2.0814, + "step": 276290 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.00015222174546808984, + "loss": 2.0988, + "step": 276295 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.0001522201692133497, + "loss": 2.0825, + "step": 276300 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.00015221859294077019, + "loss": 2.0128, + "step": 276305 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.0001522170166503519, + "loss": 1.9043, + "step": 276310 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.0001522154403420953, + "loss": 2.1482, + "step": 276315 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.000152213864016001, + "loss": 2.1194, + "step": 276320 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015221228767206947, + "loss": 1.9112, + "step": 276325 + }, + { + "epoch": 0.65, + "grad_norm": 1.84375, + "learning_rate": 0.0001522107113103013, + "loss": 2.1647, + "step": 276330 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015220913493069698, + "loss": 2.1015, + "step": 276335 + }, + { + "epoch": 0.65, + "grad_norm": 2.546875, + "learning_rate": 0.0001522075585332571, + "loss": 2.1302, + "step": 276340 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015220598211798214, + "loss": 1.8756, + "step": 276345 + }, + { + "epoch": 0.65, + "grad_norm": 1.921875, + "learning_rate": 0.00015220440568487268, + "loss": 2.1639, + "step": 276350 + }, + { + "epoch": 0.65, + "grad_norm": 1.9921875, + "learning_rate": 0.00015220282923392926, + "loss": 2.2334, + "step": 276355 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015220125276515237, + "loss": 1.9982, + "step": 276360 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.0001521996762785426, + "loss": 2.1791, + "step": 276365 + }, + { + "epoch": 0.65, + "grad_norm": 1.8125, + "learning_rate": 0.0001521980997741005, + "loss": 2.0946, + "step": 276370 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015219652325182656, + "loss": 2.0815, + "step": 276375 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015219494671172136, + "loss": 2.2524, + "step": 276380 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015219337015378543, + "loss": 2.1769, + "step": 276385 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015219179357801925, + "loss": 2.0174, + "step": 276390 + }, + { + "epoch": 0.65, + "grad_norm": 2.4375, + "learning_rate": 0.00015219021698442342, + "loss": 2.0666, + "step": 276395 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015218864037299848, + "loss": 2.1083, + "step": 276400 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015218706374374495, + "loss": 1.9499, + "step": 276405 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.0001521854870966634, + "loss": 2.1413, + "step": 276410 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.0001521839104317543, + "loss": 2.1293, + "step": 276415 + }, + { + "epoch": 0.65, + "grad_norm": 1.9296875, + "learning_rate": 0.00015218233374901825, + "loss": 2.0787, + "step": 276420 + }, + { + "epoch": 0.65, + "grad_norm": 1.8984375, + "learning_rate": 0.00015218075704845577, + "loss": 2.0632, + "step": 276425 + }, + { + "epoch": 0.65, + "grad_norm": 1.828125, + "learning_rate": 0.0001521791803300674, + "loss": 2.0322, + "step": 276430 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015217760359385367, + "loss": 2.1809, + "step": 276435 + }, + { + "epoch": 0.65, + "grad_norm": 2.5625, + "learning_rate": 0.00015217602683981508, + "loss": 2.1097, + "step": 276440 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.0001521744500679523, + "loss": 2.1877, + "step": 276445 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015217287327826573, + "loss": 2.0899, + "step": 276450 + }, + { + "epoch": 0.65, + "grad_norm": 2.515625, + "learning_rate": 0.00015217129647075598, + "loss": 2.0667, + "step": 276455 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015216971964542353, + "loss": 1.9623, + "step": 276460 + }, + { + "epoch": 0.65, + "grad_norm": 1.921875, + "learning_rate": 0.00015216814280226898, + "loss": 2.0487, + "step": 276465 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015216656594129285, + "loss": 2.0211, + "step": 276470 + }, + { + "epoch": 0.65, + "grad_norm": 1.9453125, + "learning_rate": 0.00015216498906249566, + "loss": 2.0157, + "step": 276475 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015216341216587798, + "loss": 1.9078, + "step": 276480 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015216183525144033, + "loss": 2.0526, + "step": 276485 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015216025831918327, + "loss": 1.9542, + "step": 276490 + }, + { + "epoch": 0.65, + "grad_norm": 2.453125, + "learning_rate": 0.0001521586813691073, + "loss": 2.0468, + "step": 276495 + }, + { + "epoch": 0.65, + "grad_norm": 2.953125, + "learning_rate": 0.00015215710440121298, + "loss": 2.2013, + "step": 276500 + }, + { + "epoch": 0.65, + "grad_norm": 2.671875, + "learning_rate": 0.00015215552741550082, + "loss": 2.0978, + "step": 276505 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015215395041197143, + "loss": 1.9726, + "step": 276510 + }, + { + "epoch": 0.65, + "grad_norm": 2.40625, + "learning_rate": 0.0001521523733906253, + "loss": 2.0745, + "step": 276515 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015215079635146295, + "loss": 2.0499, + "step": 276520 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015214921929448494, + "loss": 2.2263, + "step": 276525 + }, + { + "epoch": 0.65, + "grad_norm": 1.8359375, + "learning_rate": 0.00015214764221969183, + "loss": 1.9577, + "step": 276530 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015214606512708412, + "loss": 2.0217, + "step": 276535 + }, + { + "epoch": 0.65, + "grad_norm": 1.8984375, + "learning_rate": 0.0001521444880166624, + "loss": 1.9794, + "step": 276540 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015214291088842715, + "loss": 2.0316, + "step": 276545 + }, + { + "epoch": 0.65, + "grad_norm": 1.9609375, + "learning_rate": 0.00015214133374237893, + "loss": 2.17, + "step": 276550 + }, + { + "epoch": 0.65, + "grad_norm": 2.34375, + "learning_rate": 0.00015213975657851832, + "loss": 2.0186, + "step": 276555 + }, + { + "epoch": 0.65, + "grad_norm": 2.78125, + "learning_rate": 0.0001521381793968458, + "loss": 2.0532, + "step": 276560 + }, + { + "epoch": 0.65, + "grad_norm": 2.578125, + "learning_rate": 0.00015213660219736194, + "loss": 2.0795, + "step": 276565 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015213502498006727, + "loss": 1.8957, + "step": 276570 + }, + { + "epoch": 0.65, + "grad_norm": 2.546875, + "learning_rate": 0.0001521334477449623, + "loss": 2.1298, + "step": 276575 + }, + { + "epoch": 0.65, + "grad_norm": 2.4375, + "learning_rate": 0.00015213187049204762, + "loss": 1.9398, + "step": 276580 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015213029322132376, + "loss": 2.0171, + "step": 276585 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.00015212871593279129, + "loss": 2.2962, + "step": 276590 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015212713862645063, + "loss": 1.9914, + "step": 276595 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.0001521255613023024, + "loss": 2.0792, + "step": 276600 + }, + { + "epoch": 0.65, + "grad_norm": 1.984375, + "learning_rate": 0.00015212398396034714, + "loss": 2.1571, + "step": 276605 + }, + { + "epoch": 0.65, + "grad_norm": 1.9609375, + "learning_rate": 0.00015212240660058543, + "loss": 2.1361, + "step": 276610 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015212082922301773, + "loss": 2.0914, + "step": 276615 + }, + { + "epoch": 0.65, + "grad_norm": 1.890625, + "learning_rate": 0.0001521192518276446, + "loss": 2.0388, + "step": 276620 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015211767441446655, + "loss": 2.1813, + "step": 276625 + }, + { + "epoch": 0.65, + "grad_norm": 2.578125, + "learning_rate": 0.00015211609698348422, + "loss": 2.056, + "step": 276630 + }, + { + "epoch": 0.65, + "grad_norm": 2.40625, + "learning_rate": 0.00015211451953469807, + "loss": 2.131, + "step": 276635 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.00015211294206810865, + "loss": 2.0674, + "step": 276640 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015211136458371648, + "loss": 2.0651, + "step": 276645 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015210978708152215, + "loss": 1.92, + "step": 276650 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015210820956152617, + "loss": 2.0798, + "step": 276655 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015210663202372908, + "loss": 2.1046, + "step": 276660 + }, + { + "epoch": 0.65, + "grad_norm": 2.53125, + "learning_rate": 0.0001521050544681314, + "loss": 2.0872, + "step": 276665 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.00015210347689473372, + "loss": 2.0225, + "step": 276670 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.00015210189930353653, + "loss": 2.0486, + "step": 276675 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.0001521003216945404, + "loss": 2.1709, + "step": 276680 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015209874406774585, + "loss": 2.1641, + "step": 276685 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.0001520971664231534, + "loss": 2.1148, + "step": 276690 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015209558876076365, + "loss": 2.0169, + "step": 276695 + }, + { + "epoch": 0.65, + "grad_norm": 2.484375, + "learning_rate": 0.00015209401108057707, + "loss": 2.2597, + "step": 276700 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.0001520924333825943, + "loss": 2.1414, + "step": 276705 + }, + { + "epoch": 0.65, + "grad_norm": 1.9140625, + "learning_rate": 0.0001520908556668157, + "loss": 1.9987, + "step": 276710 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.000152089277933242, + "loss": 2.0397, + "step": 276715 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015208770018187363, + "loss": 1.9229, + "step": 276720 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015208612241271116, + "loss": 2.1137, + "step": 276725 + }, + { + "epoch": 0.65, + "grad_norm": 1.828125, + "learning_rate": 0.00015208454462575514, + "loss": 2.0341, + "step": 276730 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015208296682100607, + "loss": 2.0354, + "step": 276735 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.00015208138899846453, + "loss": 1.9819, + "step": 276740 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015207981115813104, + "loss": 2.1236, + "step": 276745 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015207823330000615, + "loss": 2.1055, + "step": 276750 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015207665542409037, + "loss": 2.1915, + "step": 276755 + }, + { + "epoch": 0.65, + "grad_norm": 2.484375, + "learning_rate": 0.00015207507753038426, + "loss": 2.0989, + "step": 276760 + }, + { + "epoch": 0.65, + "grad_norm": 2.40625, + "learning_rate": 0.00015207349961888835, + "loss": 2.1631, + "step": 276765 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015207192168960324, + "loss": 2.015, + "step": 276770 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015207034374252937, + "loss": 2.1221, + "step": 276775 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015206876577766735, + "loss": 2.115, + "step": 276780 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015206718779501766, + "loss": 2.243, + "step": 276785 + }, + { + "epoch": 0.65, + "grad_norm": 2.6875, + "learning_rate": 0.0001520656097945809, + "loss": 1.9264, + "step": 276790 + }, + { + "epoch": 0.65, + "grad_norm": 1.8984375, + "learning_rate": 0.00015206403177635763, + "loss": 1.9804, + "step": 276795 + }, + { + "epoch": 0.65, + "grad_norm": 1.859375, + "learning_rate": 0.00015206245374034828, + "loss": 2.2562, + "step": 276800 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015206087568655344, + "loss": 2.1903, + "step": 276805 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015205929761497372, + "loss": 2.0726, + "step": 276810 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015205771952560957, + "loss": 2.2546, + "step": 276815 + }, + { + "epoch": 0.65, + "grad_norm": 1.96875, + "learning_rate": 0.00015205614141846155, + "loss": 2.1512, + "step": 276820 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015205456329353022, + "loss": 2.2456, + "step": 276825 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.0001520529851508161, + "loss": 2.0053, + "step": 276830 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015205140699031975, + "loss": 1.9632, + "step": 276835 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.0001520498288120417, + "loss": 2.0632, + "step": 276840 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015204825061598243, + "loss": 1.986, + "step": 276845 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.0001520466724021426, + "loss": 2.0762, + "step": 276850 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015204509417052263, + "loss": 1.9217, + "step": 276855 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015204351592112316, + "loss": 2.1611, + "step": 276860 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.00015204193765394464, + "loss": 2.4422, + "step": 276865 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015204035936898768, + "loss": 1.9299, + "step": 276870 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015203878106625278, + "loss": 2.0499, + "step": 276875 + }, + { + "epoch": 0.65, + "grad_norm": 3.296875, + "learning_rate": 0.00015203720274574047, + "loss": 2.0528, + "step": 276880 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015203562440745135, + "loss": 2.0867, + "step": 276885 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015203404605138588, + "loss": 2.1223, + "step": 276890 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.00015203246767754464, + "loss": 2.1591, + "step": 276895 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015203088928592817, + "loss": 1.9631, + "step": 276900 + }, + { + "epoch": 0.65, + "grad_norm": 1.984375, + "learning_rate": 0.00015202931087653703, + "loss": 2.0283, + "step": 276905 + }, + { + "epoch": 0.65, + "grad_norm": 1.953125, + "learning_rate": 0.0001520277324493717, + "loss": 1.889, + "step": 276910 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015202615400443275, + "loss": 2.1596, + "step": 276915 + }, + { + "epoch": 0.65, + "grad_norm": 1.9375, + "learning_rate": 0.00015202457554172076, + "loss": 2.125, + "step": 276920 + }, + { + "epoch": 0.65, + "grad_norm": 1.8828125, + "learning_rate": 0.00015202299706123623, + "loss": 1.8072, + "step": 276925 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015202141856297967, + "loss": 2.0803, + "step": 276930 + }, + { + "epoch": 0.65, + "grad_norm": 1.921875, + "learning_rate": 0.00015201984004695163, + "loss": 2.0046, + "step": 276935 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.0001520182615131527, + "loss": 2.1132, + "step": 276940 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.0001520166829615834, + "loss": 2.0502, + "step": 276945 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015201510439224425, + "loss": 2.0297, + "step": 276950 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015201352580513578, + "loss": 2.1004, + "step": 276955 + }, + { + "epoch": 0.65, + "grad_norm": 1.9296875, + "learning_rate": 0.00015201194720025854, + "loss": 2.1336, + "step": 276960 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.0001520103685776131, + "loss": 2.0761, + "step": 276965 + }, + { + "epoch": 0.65, + "grad_norm": 2.703125, + "learning_rate": 0.0001520087899372, + "loss": 2.1022, + "step": 276970 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.0001520072112790197, + "loss": 2.0524, + "step": 276975 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015200563260307282, + "loss": 2.1686, + "step": 276980 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015200405390935985, + "loss": 2.0527, + "step": 276985 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015200247519788138, + "loss": 2.1413, + "step": 276990 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.0001520008964686379, + "loss": 1.8015, + "step": 276995 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015199931772163, + "loss": 2.0816, + "step": 277000 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.00015199773895685818, + "loss": 2.0319, + "step": 277005 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015199616017432297, + "loss": 2.0086, + "step": 277010 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015199458137402493, + "loss": 2.1276, + "step": 277015 + }, + { + "epoch": 0.65, + "grad_norm": 1.84375, + "learning_rate": 0.00015199300255596464, + "loss": 2.0412, + "step": 277020 + }, + { + "epoch": 0.65, + "grad_norm": 2.34375, + "learning_rate": 0.00015199142372014258, + "loss": 2.0809, + "step": 277025 + }, + { + "epoch": 0.65, + "grad_norm": 2.84375, + "learning_rate": 0.00015198984486655925, + "loss": 2.0464, + "step": 277030 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015198826599521528, + "loss": 2.2925, + "step": 277035 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015198668710611122, + "loss": 1.7814, + "step": 277040 + }, + { + "epoch": 0.65, + "grad_norm": 2.484375, + "learning_rate": 0.0001519851081992475, + "loss": 2.1928, + "step": 277045 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.0001519835292746248, + "loss": 2.0972, + "step": 277050 + }, + { + "epoch": 0.65, + "grad_norm": 2.765625, + "learning_rate": 0.0001519819503322435, + "loss": 2.0047, + "step": 277055 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015198037137210423, + "loss": 2.1054, + "step": 277060 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015197879239420758, + "loss": 1.9934, + "step": 277065 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.000151977213398554, + "loss": 2.2228, + "step": 277070 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015197563438514407, + "loss": 2.0399, + "step": 277075 + }, + { + "epoch": 0.65, + "grad_norm": 1.921875, + "learning_rate": 0.00015197405535397832, + "loss": 2.1001, + "step": 277080 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.0001519724763050573, + "loss": 2.0485, + "step": 277085 + }, + { + "epoch": 0.65, + "grad_norm": 1.96875, + "learning_rate": 0.00015197089723838152, + "loss": 2.1086, + "step": 277090 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015196931815395154, + "loss": 2.0457, + "step": 277095 + }, + { + "epoch": 0.65, + "grad_norm": 1.90625, + "learning_rate": 0.00015196773905176792, + "loss": 1.9847, + "step": 277100 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015196615993183114, + "loss": 2.0691, + "step": 277105 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.0001519645807941418, + "loss": 1.9294, + "step": 277110 + }, + { + "epoch": 0.65, + "grad_norm": 1.953125, + "learning_rate": 0.0001519630016387004, + "loss": 2.1909, + "step": 277115 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.00015196142246550753, + "loss": 2.1462, + "step": 277120 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.00015195984327456366, + "loss": 2.1099, + "step": 277125 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.0001519582640658694, + "loss": 2.0569, + "step": 277130 + }, + { + "epoch": 0.65, + "grad_norm": 1.953125, + "learning_rate": 0.00015195668483942523, + "loss": 2.0965, + "step": 277135 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015195510559523173, + "loss": 1.9472, + "step": 277140 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.0001519535263332894, + "loss": 2.1036, + "step": 277145 + }, + { + "epoch": 0.65, + "grad_norm": 1.9609375, + "learning_rate": 0.00015195194705359884, + "loss": 2.0399, + "step": 277150 + }, + { + "epoch": 0.65, + "grad_norm": 2.6875, + "learning_rate": 0.00015195036775616055, + "loss": 2.0035, + "step": 277155 + }, + { + "epoch": 0.65, + "grad_norm": 1.984375, + "learning_rate": 0.00015194878844097503, + "loss": 2.0751, + "step": 277160 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015194720910804288, + "loss": 2.0097, + "step": 277165 + }, + { + "epoch": 0.65, + "grad_norm": 2.609375, + "learning_rate": 0.00015194562975736462, + "loss": 2.1117, + "step": 277170 + }, + { + "epoch": 0.65, + "grad_norm": 2.515625, + "learning_rate": 0.0001519440503889408, + "loss": 2.1548, + "step": 277175 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015194247100277196, + "loss": 2.0573, + "step": 277180 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.00015194089159885864, + "loss": 2.0946, + "step": 277185 + }, + { + "epoch": 0.65, + "grad_norm": 2.546875, + "learning_rate": 0.0001519393121772013, + "loss": 2.1274, + "step": 277190 + }, + { + "epoch": 0.65, + "grad_norm": 1.7734375, + "learning_rate": 0.0001519377327378006, + "loss": 2.1917, + "step": 277195 + }, + { + "epoch": 0.65, + "grad_norm": 2.453125, + "learning_rate": 0.00015193615328065705, + "loss": 2.2505, + "step": 277200 + }, + { + "epoch": 0.65, + "grad_norm": 2.4375, + "learning_rate": 0.00015193457380577115, + "loss": 2.1817, + "step": 277205 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.00015193299431314343, + "loss": 2.0288, + "step": 277210 + }, + { + "epoch": 0.65, + "grad_norm": 2.703125, + "learning_rate": 0.0001519314148027745, + "loss": 2.1741, + "step": 277215 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.0001519298352746648, + "loss": 2.0045, + "step": 277220 + }, + { + "epoch": 0.65, + "grad_norm": 2.546875, + "learning_rate": 0.00015192825572881498, + "loss": 2.123, + "step": 277225 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.0001519266761652255, + "loss": 2.0776, + "step": 277230 + }, + { + "epoch": 0.65, + "grad_norm": 2.578125, + "learning_rate": 0.00015192509658389693, + "loss": 2.0569, + "step": 277235 + }, + { + "epoch": 0.65, + "grad_norm": 2.453125, + "learning_rate": 0.0001519235169848298, + "loss": 1.9758, + "step": 277240 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.00015192193736802466, + "loss": 2.2056, + "step": 277245 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015192035773348205, + "loss": 1.9975, + "step": 277250 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.0001519187780812025, + "loss": 2.0541, + "step": 277255 + }, + { + "epoch": 0.65, + "grad_norm": 1.8671875, + "learning_rate": 0.00015191719841118654, + "loss": 1.9048, + "step": 277260 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015191561872343474, + "loss": 2.0993, + "step": 277265 + }, + { + "epoch": 0.65, + "grad_norm": 1.9609375, + "learning_rate": 0.00015191403901794763, + "loss": 2.078, + "step": 277270 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015191245929472574, + "loss": 2.0453, + "step": 277275 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015191087955376958, + "loss": 1.7257, + "step": 277280 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015190929979507978, + "loss": 2.0989, + "step": 277285 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.0001519077200186568, + "loss": 2.0484, + "step": 277290 + }, + { + "epoch": 0.65, + "grad_norm": 1.8515625, + "learning_rate": 0.0001519061402245012, + "loss": 1.8643, + "step": 277295 + }, + { + "epoch": 0.65, + "grad_norm": 2.34375, + "learning_rate": 0.00015190456041261347, + "loss": 2.129, + "step": 277300 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.00015190298058299426, + "loss": 1.8421, + "step": 277305 + }, + { + "epoch": 0.65, + "grad_norm": 1.953125, + "learning_rate": 0.00015190140073564402, + "loss": 2.0617, + "step": 277310 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015189982087056335, + "loss": 2.1012, + "step": 277315 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015189824098775276, + "loss": 2.1054, + "step": 277320 + }, + { + "epoch": 0.65, + "grad_norm": 2.75, + "learning_rate": 0.00015189666108721275, + "loss": 2.1916, + "step": 277325 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015189508116894392, + "loss": 1.9377, + "step": 277330 + }, + { + "epoch": 0.65, + "grad_norm": 1.6953125, + "learning_rate": 0.0001518935012329468, + "loss": 1.9112, + "step": 277335 + }, + { + "epoch": 0.65, + "grad_norm": 2.484375, + "learning_rate": 0.00015189192127922194, + "loss": 2.1168, + "step": 277340 + }, + { + "epoch": 0.65, + "grad_norm": 1.953125, + "learning_rate": 0.00015189034130776982, + "loss": 2.0846, + "step": 277345 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.000151888761318591, + "loss": 2.1314, + "step": 277350 + }, + { + "epoch": 0.65, + "grad_norm": 1.953125, + "learning_rate": 0.00015188718131168606, + "loss": 2.1181, + "step": 277355 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015188560128705556, + "loss": 2.2409, + "step": 277360 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015188402124469995, + "loss": 2.1027, + "step": 277365 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015188244118461986, + "loss": 2.1227, + "step": 277370 + }, + { + "epoch": 0.65, + "grad_norm": 1.8984375, + "learning_rate": 0.00015188086110681575, + "loss": 2.1066, + "step": 277375 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.00015187928101128816, + "loss": 2.1725, + "step": 277380 + }, + { + "epoch": 0.65, + "grad_norm": 3.625, + "learning_rate": 0.00015187770089803774, + "loss": 2.0517, + "step": 277385 + }, + { + "epoch": 0.65, + "grad_norm": 1.9453125, + "learning_rate": 0.00015187612076706492, + "loss": 2.0795, + "step": 277390 + }, + { + "epoch": 0.65, + "grad_norm": 1.9921875, + "learning_rate": 0.00015187454061837028, + "loss": 2.0828, + "step": 277395 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015187296045195435, + "loss": 1.9607, + "step": 277400 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015187138026781768, + "loss": 2.1704, + "step": 277405 + }, + { + "epoch": 0.65, + "grad_norm": 1.84375, + "learning_rate": 0.0001518698000659608, + "loss": 1.986, + "step": 277410 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.00015186821984638428, + "loss": 2.1595, + "step": 277415 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015186663960908863, + "loss": 2.1162, + "step": 277420 + }, + { + "epoch": 0.65, + "grad_norm": 1.5859375, + "learning_rate": 0.0001518650593540744, + "loss": 2.0645, + "step": 277425 + }, + { + "epoch": 0.65, + "grad_norm": 1.71875, + "learning_rate": 0.0001518634790813421, + "loss": 2.0744, + "step": 277430 + }, + { + "epoch": 0.65, + "grad_norm": 1.8671875, + "learning_rate": 0.0001518618987908923, + "loss": 2.0007, + "step": 277435 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.00015186031848272554, + "loss": 2.1101, + "step": 277440 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015185873815684237, + "loss": 2.1697, + "step": 277445 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015185715781324333, + "loss": 2.1611, + "step": 277450 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.0001518555774519289, + "loss": 2.1759, + "step": 277455 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015185399707289964, + "loss": 2.094, + "step": 277460 + }, + { + "epoch": 0.65, + "grad_norm": 1.984375, + "learning_rate": 0.00015185241667615618, + "loss": 2.1604, + "step": 277465 + }, + { + "epoch": 0.65, + "grad_norm": 1.8515625, + "learning_rate": 0.00015185083626169897, + "loss": 2.1859, + "step": 277470 + }, + { + "epoch": 0.65, + "grad_norm": 2.578125, + "learning_rate": 0.00015184925582952858, + "loss": 1.9737, + "step": 277475 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015184767537964553, + "loss": 1.9271, + "step": 277480 + }, + { + "epoch": 0.65, + "grad_norm": 2.4375, + "learning_rate": 0.00015184609491205038, + "loss": 2.0175, + "step": 277485 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015184451442674365, + "loss": 2.1405, + "step": 277490 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.0001518429339237259, + "loss": 2.1009, + "step": 277495 + }, + { + "epoch": 0.65, + "grad_norm": 1.875, + "learning_rate": 0.0001518413534029977, + "loss": 2.0, + "step": 277500 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.00015183977286455954, + "loss": 2.1242, + "step": 277505 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015183819230841193, + "loss": 2.1987, + "step": 277510 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015183661173455548, + "loss": 2.1223, + "step": 277515 + }, + { + "epoch": 0.65, + "grad_norm": 2.578125, + "learning_rate": 0.0001518350311429907, + "loss": 1.9222, + "step": 277520 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.00015183345053371814, + "loss": 2.0404, + "step": 277525 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015183186990673834, + "loss": 2.2276, + "step": 277530 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015183028926205183, + "loss": 2.3563, + "step": 277535 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015182870859965913, + "loss": 2.1862, + "step": 277540 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015182712791956082, + "loss": 2.1382, + "step": 277545 + }, + { + "epoch": 0.65, + "grad_norm": 1.9453125, + "learning_rate": 0.00015182554722175743, + "loss": 2.0686, + "step": 277550 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.00015182396650624948, + "loss": 2.2443, + "step": 277555 + }, + { + "epoch": 0.65, + "grad_norm": 1.890625, + "learning_rate": 0.00015182238577303754, + "loss": 2.2119, + "step": 277560 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.0001518208050221221, + "loss": 1.8551, + "step": 277565 + }, + { + "epoch": 0.65, + "grad_norm": 2.546875, + "learning_rate": 0.00015181922425350376, + "loss": 2.1758, + "step": 277570 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015181764346718305, + "loss": 2.0443, + "step": 277575 + }, + { + "epoch": 0.65, + "grad_norm": 1.8515625, + "learning_rate": 0.00015181606266316045, + "loss": 2.1935, + "step": 277580 + }, + { + "epoch": 0.65, + "grad_norm": 1.875, + "learning_rate": 0.00015181448184143655, + "loss": 2.0345, + "step": 277585 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015181290100201196, + "loss": 1.9034, + "step": 277590 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015181132014488707, + "loss": 2.0509, + "step": 277595 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015180973927006247, + "loss": 1.9841, + "step": 277600 + }, + { + "epoch": 0.65, + "grad_norm": 2.796875, + "learning_rate": 0.00015180815837753878, + "loss": 2.067, + "step": 277605 + }, + { + "epoch": 0.65, + "grad_norm": 2.296875, + "learning_rate": 0.00015180657746731647, + "loss": 2.0735, + "step": 277610 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015180499653939607, + "loss": 1.9398, + "step": 277615 + }, + { + "epoch": 0.65, + "grad_norm": 2.34375, + "learning_rate": 0.00015180341559377817, + "loss": 2.081, + "step": 277620 + }, + { + "epoch": 0.65, + "grad_norm": 1.9609375, + "learning_rate": 0.00015180183463046326, + "loss": 2.1254, + "step": 277625 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015180025364945195, + "loss": 2.1257, + "step": 277630 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015179867265074467, + "loss": 2.253, + "step": 277635 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015179709163434208, + "loss": 2.0048, + "step": 277640 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015179551060024463, + "loss": 1.9939, + "step": 277645 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.0001517939295484529, + "loss": 2.0948, + "step": 277650 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015179234847896743, + "loss": 2.1473, + "step": 277655 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015179076739178874, + "loss": 2.1219, + "step": 277660 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015178918628691742, + "loss": 1.9671, + "step": 277665 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015178760516435395, + "loss": 2.2923, + "step": 277670 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.00015178602402409887, + "loss": 2.1292, + "step": 277675 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.0001517844428661528, + "loss": 2.1795, + "step": 277680 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.0001517828616905162, + "loss": 2.134, + "step": 277685 + }, + { + "epoch": 0.65, + "grad_norm": 2.453125, + "learning_rate": 0.0001517812804971896, + "loss": 2.1909, + "step": 277690 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015177969928617362, + "loss": 1.9901, + "step": 277695 + }, + { + "epoch": 0.65, + "grad_norm": 2.140625, + "learning_rate": 0.00015177811805746876, + "loss": 2.1805, + "step": 277700 + }, + { + "epoch": 0.65, + "grad_norm": 2.609375, + "learning_rate": 0.00015177653681107552, + "loss": 2.2892, + "step": 277705 + }, + { + "epoch": 0.65, + "grad_norm": 2.390625, + "learning_rate": 0.0001517749555469945, + "loss": 1.9704, + "step": 277710 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.00015177337426522622, + "loss": 2.3436, + "step": 277715 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.0001517717929657712, + "loss": 2.1504, + "step": 277720 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.00015177021164863, + "loss": 1.9679, + "step": 277725 + }, + { + "epoch": 0.65, + "grad_norm": 2.734375, + "learning_rate": 0.00015176863031380315, + "loss": 2.1515, + "step": 277730 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.0001517670489612912, + "loss": 2.1029, + "step": 277735 + }, + { + "epoch": 0.65, + "grad_norm": 2.71875, + "learning_rate": 0.00015176546759109472, + "loss": 2.1225, + "step": 277740 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.0001517638862032142, + "loss": 2.2523, + "step": 277745 + }, + { + "epoch": 0.65, + "grad_norm": 1.9921875, + "learning_rate": 0.0001517623047976502, + "loss": 2.0655, + "step": 277750 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.0001517607233744032, + "loss": 2.0509, + "step": 277755 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.00015175914193347388, + "loss": 2.1004, + "step": 277760 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015175756047486268, + "loss": 2.0871, + "step": 277765 + }, + { + "epoch": 0.65, + "grad_norm": 2.84375, + "learning_rate": 0.00015175597899857012, + "loss": 2.079, + "step": 277770 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015175439750459682, + "loss": 1.986, + "step": 277775 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015175281599294325, + "loss": 2.1882, + "step": 277780 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015175123446360998, + "loss": 2.0513, + "step": 277785 + }, + { + "epoch": 0.65, + "grad_norm": 1.90625, + "learning_rate": 0.0001517496529165976, + "loss": 2.1436, + "step": 277790 + }, + { + "epoch": 0.65, + "grad_norm": 2.34375, + "learning_rate": 0.00015174807135190653, + "loss": 2.1683, + "step": 277795 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015174648976953742, + "loss": 2.0369, + "step": 277800 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015174490816949074, + "loss": 2.0563, + "step": 277805 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.0001517433265517671, + "loss": 1.9162, + "step": 277810 + }, + { + "epoch": 0.65, + "grad_norm": 2.4375, + "learning_rate": 0.000151741744916367, + "loss": 2.0922, + "step": 277815 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015174016326329097, + "loss": 1.9348, + "step": 277820 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015173858159253954, + "loss": 2.1282, + "step": 277825 + }, + { + "epoch": 0.65, + "grad_norm": 2.5625, + "learning_rate": 0.0001517369999041133, + "loss": 2.0123, + "step": 277830 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015173541819801277, + "loss": 2.0898, + "step": 277835 + }, + { + "epoch": 0.65, + "grad_norm": 2.34375, + "learning_rate": 0.00015173383647423843, + "loss": 2.0611, + "step": 277840 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015173225473279092, + "loss": 2.0447, + "step": 277845 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015173067297367073, + "loss": 1.7669, + "step": 277850 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015172909119687836, + "loss": 2.1605, + "step": 277855 + }, + { + "epoch": 0.65, + "grad_norm": 2.75, + "learning_rate": 0.00015172750940241445, + "loss": 1.9956, + "step": 277860 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015172592759027945, + "loss": 2.3048, + "step": 277865 + }, + { + "epoch": 0.65, + "grad_norm": 2.59375, + "learning_rate": 0.00015172434576047396, + "loss": 2.0106, + "step": 277870 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.0001517227639129985, + "loss": 1.9401, + "step": 277875 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.0001517211820478536, + "loss": 1.933, + "step": 277880 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.0001517196001650398, + "loss": 2.2263, + "step": 277885 + }, + { + "epoch": 0.65, + "grad_norm": 1.9375, + "learning_rate": 0.00015171801826455763, + "loss": 2.0162, + "step": 277890 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015171643634640764, + "loss": 2.125, + "step": 277895 + }, + { + "epoch": 0.65, + "grad_norm": 2.3125, + "learning_rate": 0.0001517148544105904, + "loss": 2.1035, + "step": 277900 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.00015171327245710645, + "loss": 2.1496, + "step": 277905 + }, + { + "epoch": 0.65, + "grad_norm": 1.96875, + "learning_rate": 0.0001517116904859563, + "loss": 2.1464, + "step": 277910 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.00015171010849714044, + "loss": 2.2354, + "step": 277915 + }, + { + "epoch": 0.65, + "grad_norm": 1.859375, + "learning_rate": 0.00015170852649065952, + "loss": 2.2528, + "step": 277920 + }, + { + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 0.00015170694446651404, + "loss": 2.0821, + "step": 277925 + }, + { + "epoch": 0.65, + "grad_norm": 2.359375, + "learning_rate": 0.0001517053624247045, + "loss": 1.9758, + "step": 277930 + }, + { + "epoch": 0.65, + "grad_norm": 1.90625, + "learning_rate": 0.0001517037803652315, + "loss": 2.104, + "step": 277935 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.0001517021982880955, + "loss": 2.0189, + "step": 277940 + }, + { + "epoch": 0.65, + "grad_norm": 2.0625, + "learning_rate": 0.00015170061619329715, + "loss": 2.1742, + "step": 277945 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.0001516990340808369, + "loss": 1.9241, + "step": 277950 + }, + { + "epoch": 0.65, + "grad_norm": 1.9921875, + "learning_rate": 0.00015169745195071533, + "loss": 2.1176, + "step": 277955 + }, + { + "epoch": 0.65, + "grad_norm": 1.875, + "learning_rate": 0.00015169586980293297, + "loss": 2.0853, + "step": 277960 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015169428763749037, + "loss": 2.2531, + "step": 277965 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015169270545438806, + "loss": 2.1022, + "step": 277970 + }, + { + "epoch": 0.65, + "grad_norm": 1.9453125, + "learning_rate": 0.00015169112325362657, + "loss": 2.253, + "step": 277975 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015168954103520648, + "loss": 2.1822, + "step": 277980 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.0001516879587991283, + "loss": 2.0715, + "step": 277985 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015168637654539255, + "loss": 1.9929, + "step": 277990 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015168479427399984, + "loss": 2.004, + "step": 277995 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.00015168321198495065, + "loss": 2.1874, + "step": 278000 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.0001516816296782455, + "loss": 1.9252, + "step": 278005 + }, + { + "epoch": 0.65, + "grad_norm": 2.203125, + "learning_rate": 0.00015168004735388498, + "loss": 1.991, + "step": 278010 + }, + { + "epoch": 0.65, + "grad_norm": 2.375, + "learning_rate": 0.00015167846501186967, + "loss": 1.9951, + "step": 278015 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.0001516768826522, + "loss": 2.0144, + "step": 278020 + }, + { + "epoch": 0.65, + "grad_norm": 2.1875, + "learning_rate": 0.00015167530027487662, + "loss": 2.068, + "step": 278025 + }, + { + "epoch": 0.65, + "grad_norm": 1.9609375, + "learning_rate": 0.00015167371787989998, + "loss": 2.0564, + "step": 278030 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015167213546727067, + "loss": 2.1362, + "step": 278035 + }, + { + "epoch": 0.65, + "grad_norm": 2.46875, + "learning_rate": 0.00015167055303698924, + "loss": 2.0925, + "step": 278040 + }, + { + "epoch": 0.65, + "grad_norm": 1.9375, + "learning_rate": 0.00015166897058905621, + "loss": 2.0156, + "step": 278045 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.0001516673881234721, + "loss": 1.9817, + "step": 278050 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.00015166580564023748, + "loss": 2.2014, + "step": 278055 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.0001516642231393529, + "loss": 2.1613, + "step": 278060 + }, + { + "epoch": 0.65, + "grad_norm": 1.765625, + "learning_rate": 0.0001516626406208189, + "loss": 2.2499, + "step": 278065 + }, + { + "epoch": 0.65, + "grad_norm": 2.34375, + "learning_rate": 0.00015166105808463596, + "loss": 2.1864, + "step": 278070 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015165947553080468, + "loss": 2.0859, + "step": 278075 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.00015165789295932557, + "loss": 1.9064, + "step": 278080 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015165631037019925, + "loss": 2.1678, + "step": 278085 + }, + { + "epoch": 0.65, + "grad_norm": 2.0, + "learning_rate": 0.00015165472776342616, + "loss": 2.0375, + "step": 278090 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015165314513900687, + "loss": 1.9646, + "step": 278095 + }, + { + "epoch": 0.65, + "grad_norm": 2.875, + "learning_rate": 0.0001516515624969419, + "loss": 2.1269, + "step": 278100 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.00015164997983723187, + "loss": 2.0644, + "step": 278105 + }, + { + "epoch": 0.65, + "grad_norm": 1.9140625, + "learning_rate": 0.00015164839715987725, + "loss": 2.0215, + "step": 278110 + }, + { + "epoch": 0.65, + "grad_norm": 2.453125, + "learning_rate": 0.00015164681446487861, + "loss": 2.1823, + "step": 278115 + }, + { + "epoch": 0.65, + "grad_norm": 1.9140625, + "learning_rate": 0.00015164523175223646, + "loss": 1.9441, + "step": 278120 + }, + { + "epoch": 0.65, + "grad_norm": 2.484375, + "learning_rate": 0.00015164364902195136, + "loss": 2.0717, + "step": 278125 + }, + { + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 0.00015164206627402388, + "loss": 1.97, + "step": 278130 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015164048350845452, + "loss": 2.0659, + "step": 278135 + }, + { + "epoch": 0.65, + "grad_norm": 3.359375, + "learning_rate": 0.00015163890072524385, + "loss": 2.0776, + "step": 278140 + }, + { + "epoch": 0.65, + "grad_norm": 2.6875, + "learning_rate": 0.00015163731792439236, + "loss": 1.9971, + "step": 278145 + }, + { + "epoch": 0.65, + "grad_norm": 2.625, + "learning_rate": 0.00015163573510590066, + "loss": 2.2495, + "step": 278150 + }, + { + "epoch": 0.65, + "grad_norm": 1.703125, + "learning_rate": 0.00015163415226976924, + "loss": 2.0721, + "step": 278155 + }, + { + "epoch": 0.65, + "grad_norm": 2.546875, + "learning_rate": 0.00015163256941599866, + "loss": 2.1252, + "step": 278160 + }, + { + "epoch": 0.65, + "grad_norm": 2.5, + "learning_rate": 0.00015163098654458947, + "loss": 2.169, + "step": 278165 + }, + { + "epoch": 0.65, + "grad_norm": 1.7421875, + "learning_rate": 0.00015162940365554217, + "loss": 2.1533, + "step": 278170 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.00015162782074885738, + "loss": 2.0202, + "step": 278175 + }, + { + "epoch": 0.65, + "grad_norm": 2.171875, + "learning_rate": 0.00015162623782453554, + "loss": 2.0608, + "step": 278180 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015162465488257725, + "loss": 2.218, + "step": 278185 + }, + { + "epoch": 0.65, + "grad_norm": 2.015625, + "learning_rate": 0.00015162307192298307, + "loss": 1.9274, + "step": 278190 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015162148894575346, + "loss": 1.991, + "step": 278195 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.00015161990595088907, + "loss": 2.1081, + "step": 278200 + }, + { + "epoch": 0.65, + "grad_norm": 2.234375, + "learning_rate": 0.00015161832293839037, + "loss": 2.1309, + "step": 278205 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015161673990825788, + "loss": 2.2124, + "step": 278210 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015161515686049222, + "loss": 2.0981, + "step": 278215 + }, + { + "epoch": 0.65, + "grad_norm": 1.9140625, + "learning_rate": 0.00015161357379509386, + "loss": 2.0317, + "step": 278220 + }, + { + "epoch": 0.65, + "grad_norm": 2.96875, + "learning_rate": 0.00015161199071206337, + "loss": 2.0353, + "step": 278225 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.00015161040761140128, + "loss": 2.1486, + "step": 278230 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.00015160882449310816, + "loss": 2.0169, + "step": 278235 + }, + { + "epoch": 0.65, + "grad_norm": 2.484375, + "learning_rate": 0.0001516072413571845, + "loss": 2.2864, + "step": 278240 + }, + { + "epoch": 0.65, + "grad_norm": 2.328125, + "learning_rate": 0.00015160565820363092, + "loss": 2.0467, + "step": 278245 + }, + { + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 0.0001516040750324479, + "loss": 2.0181, + "step": 278250 + }, + { + "epoch": 0.65, + "grad_norm": 1.84375, + "learning_rate": 0.00015160249184363592, + "loss": 2.2078, + "step": 278255 + }, + { + "epoch": 0.65, + "grad_norm": 2.53125, + "learning_rate": 0.00015160090863719568, + "loss": 1.9722, + "step": 278260 + }, + { + "epoch": 0.65, + "grad_norm": 2.421875, + "learning_rate": 0.0001515993254131276, + "loss": 2.1311, + "step": 278265 + }, + { + "epoch": 0.65, + "grad_norm": 2.078125, + "learning_rate": 0.00015159774217143225, + "loss": 1.8855, + "step": 278270 + }, + { + "epoch": 0.65, + "grad_norm": 1.765625, + "learning_rate": 0.00015159615891211016, + "loss": 2.0777, + "step": 278275 + }, + { + "epoch": 0.65, + "grad_norm": 1.9921875, + "learning_rate": 0.00015159457563516192, + "loss": 1.9872, + "step": 278280 + }, + { + "epoch": 0.65, + "grad_norm": 2.265625, + "learning_rate": 0.00015159299234058802, + "loss": 2.0361, + "step": 278285 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015159140902838903, + "loss": 2.0511, + "step": 278290 + }, + { + "epoch": 0.65, + "grad_norm": 2.984375, + "learning_rate": 0.00015158982569856544, + "loss": 1.9724, + "step": 278295 + }, + { + "epoch": 0.65, + "grad_norm": 2.21875, + "learning_rate": 0.00015158824235111788, + "loss": 1.9398, + "step": 278300 + }, + { + "epoch": 0.65, + "grad_norm": 2.125, + "learning_rate": 0.00015158665898604682, + "loss": 2.0153, + "step": 278305 + }, + { + "epoch": 0.65, + "grad_norm": 1.9140625, + "learning_rate": 0.00015158507560335282, + "loss": 2.0954, + "step": 278310 + }, + { + "epoch": 0.65, + "grad_norm": 2.046875, + "learning_rate": 0.00015158349220303644, + "loss": 1.96, + "step": 278315 + }, + { + "epoch": 0.65, + "grad_norm": 2.15625, + "learning_rate": 0.00015158190878509817, + "loss": 2.1304, + "step": 278320 + }, + { + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 0.0001515803253495386, + "loss": 2.0528, + "step": 278325 + }, + { + "epoch": 0.65, + "grad_norm": 2.03125, + "learning_rate": 0.00015157874189635826, + "loss": 2.0532, + "step": 278330 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015157715842555774, + "loss": 2.2158, + "step": 278335 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015157557493713743, + "loss": 2.1898, + "step": 278340 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.000151573991431098, + "loss": 2.153, + "step": 278345 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.00015157240790744, + "loss": 1.9486, + "step": 278350 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.0001515708243661639, + "loss": 2.1575, + "step": 278355 + }, + { + "epoch": 0.66, + "grad_norm": 1.7734375, + "learning_rate": 0.00015156924080727025, + "loss": 2.0667, + "step": 278360 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.00015156765723075966, + "loss": 2.0492, + "step": 278365 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015156607363663258, + "loss": 1.8874, + "step": 278370 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.0001515644900248896, + "loss": 2.0442, + "step": 278375 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015156290639553127, + "loss": 1.9767, + "step": 278380 + }, + { + "epoch": 0.66, + "grad_norm": 1.8671875, + "learning_rate": 0.0001515613227485581, + "loss": 2.0712, + "step": 278385 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015155973908397068, + "loss": 1.9865, + "step": 278390 + }, + { + "epoch": 0.66, + "grad_norm": 1.9921875, + "learning_rate": 0.00015155815540176948, + "loss": 1.9637, + "step": 278395 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.0001515565717019551, + "loss": 2.1947, + "step": 278400 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015155498798452807, + "loss": 2.1287, + "step": 278405 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.0001515534042494889, + "loss": 1.974, + "step": 278410 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015155182049683816, + "loss": 2.0918, + "step": 278415 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015155023672657638, + "loss": 2.1426, + "step": 278420 + }, + { + "epoch": 0.66, + "grad_norm": 2.53125, + "learning_rate": 0.0001515486529387041, + "loss": 2.0611, + "step": 278425 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.00015154706913322188, + "loss": 2.047, + "step": 278430 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015154548531013024, + "loss": 2.0525, + "step": 278435 + }, + { + "epoch": 0.66, + "grad_norm": 1.984375, + "learning_rate": 0.00015154390146942973, + "loss": 1.9371, + "step": 278440 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015154231761112087, + "loss": 2.1613, + "step": 278445 + }, + { + "epoch": 0.66, + "grad_norm": 2.5625, + "learning_rate": 0.00015154073373520424, + "loss": 2.1343, + "step": 278450 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015153914984168036, + "loss": 2.1025, + "step": 278455 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015153756593054975, + "loss": 2.2083, + "step": 278460 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.00015153598200181301, + "loss": 1.9944, + "step": 278465 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.0001515343980554706, + "loss": 2.0165, + "step": 278470 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015153281409152313, + "loss": 2.1744, + "step": 278475 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.0001515312301099711, + "loss": 2.0568, + "step": 278480 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.0001515296461108151, + "loss": 1.7919, + "step": 278485 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.00015152806209405562, + "loss": 1.9578, + "step": 278490 + }, + { + "epoch": 0.66, + "grad_norm": 1.8984375, + "learning_rate": 0.00015152647805969323, + "loss": 1.9154, + "step": 278495 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.00015152489400772846, + "loss": 1.9994, + "step": 278500 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.0001515233099381618, + "loss": 2.1138, + "step": 278505 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015152172585099392, + "loss": 1.9912, + "step": 278510 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.00015152014174622523, + "loss": 1.9527, + "step": 278515 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015151855762385636, + "loss": 1.8812, + "step": 278520 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.0001515169734838878, + "loss": 2.1094, + "step": 278525 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015151538932632009, + "loss": 1.8166, + "step": 278530 + }, + { + "epoch": 0.66, + "grad_norm": 1.7890625, + "learning_rate": 0.00015151380515115382, + "loss": 1.8836, + "step": 278535 + }, + { + "epoch": 0.66, + "grad_norm": 2.578125, + "learning_rate": 0.0001515122209583895, + "loss": 1.8861, + "step": 278540 + }, + { + "epoch": 0.66, + "grad_norm": 2.640625, + "learning_rate": 0.00015151063674802764, + "loss": 2.0534, + "step": 278545 + }, + { + "epoch": 0.66, + "grad_norm": 2.578125, + "learning_rate": 0.0001515090525200688, + "loss": 2.0276, + "step": 278550 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015150746827451356, + "loss": 2.1488, + "step": 278555 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015150588401136248, + "loss": 2.0651, + "step": 278560 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.00015150429973061598, + "loss": 2.1913, + "step": 278565 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015150271543227476, + "loss": 1.8607, + "step": 278570 + }, + { + "epoch": 0.66, + "grad_norm": 1.84375, + "learning_rate": 0.00015150113111633918, + "loss": 2.1047, + "step": 278575 + }, + { + "epoch": 0.66, + "grad_norm": 1.7890625, + "learning_rate": 0.00015149954678280993, + "loss": 2.132, + "step": 278580 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.0001514979624316875, + "loss": 2.0214, + "step": 278585 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015149637806297244, + "loss": 2.1416, + "step": 278590 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015149479367666528, + "loss": 1.9654, + "step": 278595 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015149320927276656, + "loss": 2.1833, + "step": 278600 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.0001514916248512768, + "loss": 2.0843, + "step": 278605 + }, + { + "epoch": 0.66, + "grad_norm": 1.921875, + "learning_rate": 0.00015149004041219662, + "loss": 1.8981, + "step": 278610 + }, + { + "epoch": 0.66, + "grad_norm": 2.453125, + "learning_rate": 0.00015148845595552648, + "loss": 2.0124, + "step": 278615 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.00015148687148126694, + "loss": 2.2497, + "step": 278620 + }, + { + "epoch": 0.66, + "grad_norm": 1.9765625, + "learning_rate": 0.00015148528698941856, + "loss": 2.0369, + "step": 278625 + }, + { + "epoch": 0.66, + "grad_norm": 2.546875, + "learning_rate": 0.0001514837024799819, + "loss": 2.1533, + "step": 278630 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015148211795295744, + "loss": 2.2164, + "step": 278635 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015148053340834574, + "loss": 2.0765, + "step": 278640 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.00015147894884614738, + "loss": 2.0912, + "step": 278645 + }, + { + "epoch": 0.66, + "grad_norm": 2.109375, + "learning_rate": 0.00015147736426636288, + "loss": 1.9863, + "step": 278650 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.0001514757796689928, + "loss": 2.311, + "step": 278655 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.0001514741950540376, + "loss": 2.0787, + "step": 278660 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015147261042149792, + "loss": 1.9267, + "step": 278665 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.00015147102577137424, + "loss": 2.1601, + "step": 278670 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.00015146944110366718, + "loss": 1.8748, + "step": 278675 + }, + { + "epoch": 0.66, + "grad_norm": 1.984375, + "learning_rate": 0.00015146785641837718, + "loss": 2.135, + "step": 278680 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.00015146627171550484, + "loss": 2.117, + "step": 278685 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015146468699505066, + "loss": 2.0628, + "step": 278690 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015146310225701526, + "loss": 2.2179, + "step": 278695 + }, + { + "epoch": 0.66, + "grad_norm": 2.65625, + "learning_rate": 0.0001514615175013991, + "loss": 1.8264, + "step": 278700 + }, + { + "epoch": 0.66, + "grad_norm": 2.890625, + "learning_rate": 0.00015145993272820273, + "loss": 2.1473, + "step": 278705 + }, + { + "epoch": 0.66, + "grad_norm": 1.828125, + "learning_rate": 0.00015145834793742674, + "loss": 2.0548, + "step": 278710 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015145676312907166, + "loss": 2.0588, + "step": 278715 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.000151455178303138, + "loss": 1.9602, + "step": 278720 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.00015145359345962632, + "loss": 2.038, + "step": 278725 + }, + { + "epoch": 0.66, + "grad_norm": 1.9140625, + "learning_rate": 0.00015145200859853714, + "loss": 2.1575, + "step": 278730 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015145042371987106, + "loss": 2.1271, + "step": 278735 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.00015144883882362855, + "loss": 2.1181, + "step": 278740 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015144725390981017, + "loss": 2.0942, + "step": 278745 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.00015144566897841652, + "loss": 2.1082, + "step": 278750 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015144408402944807, + "loss": 2.1213, + "step": 278755 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.0001514424990629054, + "loss": 2.1055, + "step": 278760 + }, + { + "epoch": 0.66, + "grad_norm": 1.796875, + "learning_rate": 0.00015144091407878903, + "loss": 1.9072, + "step": 278765 + }, + { + "epoch": 0.66, + "grad_norm": 1.8984375, + "learning_rate": 0.0001514393290770995, + "loss": 2.0698, + "step": 278770 + }, + { + "epoch": 0.66, + "grad_norm": 2.75, + "learning_rate": 0.0001514377440578374, + "loss": 2.0331, + "step": 278775 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.0001514361590210032, + "loss": 2.089, + "step": 278780 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.0001514345739665975, + "loss": 2.0014, + "step": 278785 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.0001514329888946208, + "loss": 2.1553, + "step": 278790 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.00015143140380507366, + "loss": 2.1141, + "step": 278795 + }, + { + "epoch": 0.66, + "grad_norm": 1.765625, + "learning_rate": 0.00015142981869795664, + "loss": 2.1606, + "step": 278800 + }, + { + "epoch": 0.66, + "grad_norm": 1.9453125, + "learning_rate": 0.00015142823357327024, + "loss": 1.9037, + "step": 278805 + }, + { + "epoch": 0.66, + "grad_norm": 1.8203125, + "learning_rate": 0.00015142664843101501, + "loss": 2.1459, + "step": 278810 + }, + { + "epoch": 0.66, + "grad_norm": 2.859375, + "learning_rate": 0.00015142506327119152, + "loss": 2.04, + "step": 278815 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.00015142347809380028, + "loss": 1.9481, + "step": 278820 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.00015142189289884186, + "loss": 2.0393, + "step": 278825 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.00015142030768631678, + "loss": 1.9496, + "step": 278830 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015141872245622563, + "loss": 2.1523, + "step": 278835 + }, + { + "epoch": 0.66, + "grad_norm": 2.390625, + "learning_rate": 0.00015141713720856888, + "loss": 2.1367, + "step": 278840 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.0001514155519433471, + "loss": 2.0978, + "step": 278845 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015141396666056082, + "loss": 2.043, + "step": 278850 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015141238136021063, + "loss": 2.0978, + "step": 278855 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.000151410796042297, + "loss": 2.0879, + "step": 278860 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015140921070682056, + "loss": 2.2489, + "step": 278865 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.0001514076253537818, + "loss": 2.0905, + "step": 278870 + }, + { + "epoch": 0.66, + "grad_norm": 2.109375, + "learning_rate": 0.00015140603998318124, + "loss": 2.1103, + "step": 278875 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.00015140445459501943, + "loss": 2.157, + "step": 278880 + }, + { + "epoch": 0.66, + "grad_norm": 2.5625, + "learning_rate": 0.00015140286918929692, + "loss": 2.1865, + "step": 278885 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015140128376601428, + "loss": 2.2374, + "step": 278890 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015139969832517203, + "loss": 2.1157, + "step": 278895 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015139811286677071, + "loss": 2.0958, + "step": 278900 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.00015139652739081086, + "loss": 2.0665, + "step": 278905 + }, + { + "epoch": 0.66, + "grad_norm": 1.8515625, + "learning_rate": 0.00015139494189729305, + "loss": 2.2528, + "step": 278910 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.00015139335638621776, + "loss": 2.1382, + "step": 278915 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.00015139177085758557, + "loss": 2.0213, + "step": 278920 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.00015139018531139703, + "loss": 2.2045, + "step": 278925 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015138859974765266, + "loss": 1.8991, + "step": 278930 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.00015138701416635302, + "loss": 2.1788, + "step": 278935 + }, + { + "epoch": 0.66, + "grad_norm": 2.109375, + "learning_rate": 0.00015138542856749863, + "loss": 1.9534, + "step": 278940 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.00015138384295109008, + "loss": 2.0668, + "step": 278945 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015138225731712787, + "loss": 2.0175, + "step": 278950 + }, + { + "epoch": 0.66, + "grad_norm": 1.8984375, + "learning_rate": 0.00015138067166561254, + "loss": 2.1675, + "step": 278955 + }, + { + "epoch": 0.66, + "grad_norm": 1.9375, + "learning_rate": 0.0001513790859965446, + "loss": 1.9149, + "step": 278960 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015137750030992466, + "loss": 2.1058, + "step": 278965 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.00015137591460575325, + "loss": 2.0654, + "step": 278970 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015137432888403092, + "loss": 2.0777, + "step": 278975 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.00015137274314475815, + "loss": 2.1426, + "step": 278980 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.0001513711573879355, + "loss": 2.1675, + "step": 278985 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015136957161356358, + "loss": 2.1925, + "step": 278990 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.00015136798582164284, + "loss": 1.9291, + "step": 278995 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.0001513664000121739, + "loss": 2.2082, + "step": 279000 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015136481418515725, + "loss": 2.301, + "step": 279005 + }, + { + "epoch": 0.66, + "grad_norm": 2.640625, + "learning_rate": 0.00015136322834059346, + "loss": 2.1112, + "step": 279010 + }, + { + "epoch": 0.66, + "grad_norm": 2.578125, + "learning_rate": 0.00015136164247848304, + "loss": 2.0164, + "step": 279015 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.00015136005659882655, + "loss": 2.2421, + "step": 279020 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.00015135847070162455, + "loss": 2.0244, + "step": 279025 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015135688478687756, + "loss": 2.0056, + "step": 279030 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.00015135529885458611, + "loss": 2.1171, + "step": 279035 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.0001513537129047508, + "loss": 2.1717, + "step": 279040 + }, + { + "epoch": 0.66, + "grad_norm": 1.9375, + "learning_rate": 0.00015135212693737207, + "loss": 2.175, + "step": 279045 + }, + { + "epoch": 0.66, + "grad_norm": 1.9140625, + "learning_rate": 0.00015135054095245052, + "loss": 1.9412, + "step": 279050 + }, + { + "epoch": 0.66, + "grad_norm": 1.875, + "learning_rate": 0.00015134895494998675, + "loss": 2.0855, + "step": 279055 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015134736892998124, + "loss": 2.0172, + "step": 279060 + }, + { + "epoch": 0.66, + "grad_norm": 2.828125, + "learning_rate": 0.0001513457828924345, + "loss": 2.1815, + "step": 279065 + }, + { + "epoch": 0.66, + "grad_norm": 2.5625, + "learning_rate": 0.00015134419683734716, + "loss": 1.7729, + "step": 279070 + }, + { + "epoch": 0.66, + "grad_norm": 2.53125, + "learning_rate": 0.00015134261076471964, + "loss": 2.1312, + "step": 279075 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015134102467455262, + "loss": 2.2101, + "step": 279080 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.00015133943856684656, + "loss": 2.1648, + "step": 279085 + }, + { + "epoch": 0.66, + "grad_norm": 1.96875, + "learning_rate": 0.000151337852441602, + "loss": 2.1464, + "step": 279090 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.0001513362662988195, + "loss": 1.975, + "step": 279095 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.0001513346801384996, + "loss": 2.0141, + "step": 279100 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015133309396064286, + "loss": 1.842, + "step": 279105 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.00015133150776524981, + "loss": 2.0262, + "step": 279110 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015132992155232096, + "loss": 2.2324, + "step": 279115 + }, + { + "epoch": 0.66, + "grad_norm": 2.75, + "learning_rate": 0.00015132833532185687, + "loss": 2.2336, + "step": 279120 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.00015132674907385808, + "loss": 2.1742, + "step": 279125 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.0001513251628083252, + "loss": 1.9535, + "step": 279130 + }, + { + "epoch": 0.66, + "grad_norm": 1.8828125, + "learning_rate": 0.00015132357652525871, + "loss": 2.0687, + "step": 279135 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.0001513219902246591, + "loss": 2.1036, + "step": 279140 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.00015132040390652697, + "loss": 2.0646, + "step": 279145 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.0001513188175708629, + "loss": 1.7562, + "step": 279150 + }, + { + "epoch": 0.66, + "grad_norm": 1.9296875, + "learning_rate": 0.0001513172312176674, + "loss": 1.9907, + "step": 279155 + }, + { + "epoch": 0.66, + "grad_norm": 1.9453125, + "learning_rate": 0.00015131564484694097, + "loss": 1.9552, + "step": 279160 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015131405845868418, + "loss": 2.0565, + "step": 279165 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.0001513124720528976, + "loss": 2.2389, + "step": 279170 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015131088562958173, + "loss": 1.9077, + "step": 279175 + }, + { + "epoch": 0.66, + "grad_norm": 2.453125, + "learning_rate": 0.00015130929918873712, + "loss": 2.1468, + "step": 279180 + }, + { + "epoch": 0.66, + "grad_norm": 1.859375, + "learning_rate": 0.00015130771273036436, + "loss": 2.1846, + "step": 279185 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.00015130612625446395, + "loss": 2.2275, + "step": 279190 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.0001513045397610364, + "loss": 2.1993, + "step": 279195 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.0001513029532500823, + "loss": 2.019, + "step": 279200 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.0001513013667216022, + "loss": 2.1201, + "step": 279205 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.00015129978017559663, + "loss": 2.0326, + "step": 279210 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.0001512981936120661, + "loss": 1.8774, + "step": 279215 + }, + { + "epoch": 0.66, + "grad_norm": 1.9921875, + "learning_rate": 0.00015129660703101121, + "loss": 2.1024, + "step": 279220 + }, + { + "epoch": 0.66, + "grad_norm": 1.984375, + "learning_rate": 0.00015129502043243243, + "loss": 2.0716, + "step": 279225 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015129343381633037, + "loss": 2.1036, + "step": 279230 + }, + { + "epoch": 0.66, + "grad_norm": 1.9296875, + "learning_rate": 0.00015129184718270554, + "loss": 1.9206, + "step": 279235 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.00015129026053155843, + "loss": 2.0993, + "step": 279240 + }, + { + "epoch": 0.66, + "grad_norm": 1.953125, + "learning_rate": 0.00015128867386288972, + "loss": 2.1855, + "step": 279245 + }, + { + "epoch": 0.66, + "grad_norm": 2.90625, + "learning_rate": 0.0001512870871766998, + "loss": 2.2613, + "step": 279250 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.0001512855004729893, + "loss": 1.9908, + "step": 279255 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.00015128391375175877, + "loss": 1.9227, + "step": 279260 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015128232701300872, + "loss": 2.0496, + "step": 279265 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.0001512807402567397, + "loss": 2.0613, + "step": 279270 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.00015127915348295222, + "loss": 1.9688, + "step": 279275 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015127756669164685, + "loss": 2.0107, + "step": 279280 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.0001512759798828242, + "loss": 2.0665, + "step": 279285 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015127439305648465, + "loss": 2.0867, + "step": 279290 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.0001512728062126289, + "loss": 2.1343, + "step": 279295 + }, + { + "epoch": 0.66, + "grad_norm": 1.7734375, + "learning_rate": 0.00015127121935125742, + "loss": 2.1532, + "step": 279300 + }, + { + "epoch": 0.66, + "grad_norm": 2.109375, + "learning_rate": 0.00015126963247237075, + "loss": 2.2071, + "step": 279305 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.00015126804557596946, + "loss": 2.217, + "step": 279310 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015126645866205403, + "loss": 1.9515, + "step": 279315 + }, + { + "epoch": 0.66, + "grad_norm": 2.640625, + "learning_rate": 0.0001512648717306251, + "loss": 2.0341, + "step": 279320 + }, + { + "epoch": 0.66, + "grad_norm": 1.9765625, + "learning_rate": 0.00015126328478168316, + "loss": 2.1269, + "step": 279325 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.0001512616978152287, + "loss": 1.9368, + "step": 279330 + }, + { + "epoch": 0.66, + "grad_norm": 1.78125, + "learning_rate": 0.00015126011083126234, + "loss": 2.0175, + "step": 279335 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.0001512585238297846, + "loss": 1.9762, + "step": 279340 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015125693681079601, + "loss": 2.0756, + "step": 279345 + }, + { + "epoch": 0.66, + "grad_norm": 1.9296875, + "learning_rate": 0.00015125534977429716, + "loss": 2.1649, + "step": 279350 + }, + { + "epoch": 0.66, + "grad_norm": 2.65625, + "learning_rate": 0.00015125376272028852, + "loss": 2.0927, + "step": 279355 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015125217564877067, + "loss": 2.1987, + "step": 279360 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015125058855974417, + "loss": 2.1091, + "step": 279365 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.0001512490014532095, + "loss": 2.1139, + "step": 279370 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015124741432916725, + "loss": 2.2736, + "step": 279375 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015124582718761798, + "loss": 2.2853, + "step": 279380 + }, + { + "epoch": 0.66, + "grad_norm": 2.5625, + "learning_rate": 0.00015124424002856222, + "loss": 1.9248, + "step": 279385 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015124265285200043, + "loss": 2.0837, + "step": 279390 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.00015124106565793326, + "loss": 2.2657, + "step": 279395 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015123947844636123, + "loss": 2.0746, + "step": 279400 + }, + { + "epoch": 0.66, + "grad_norm": 1.953125, + "learning_rate": 0.00015123789121728484, + "loss": 1.9987, + "step": 279405 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.00015123630397070469, + "loss": 2.0845, + "step": 279410 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015123471670662123, + "loss": 2.1967, + "step": 279415 + }, + { + "epoch": 0.66, + "grad_norm": 1.9140625, + "learning_rate": 0.00015123312942503508, + "loss": 2.2084, + "step": 279420 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.0001512315421259468, + "loss": 2.0644, + "step": 279425 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015122995480935686, + "loss": 2.0321, + "step": 279430 + }, + { + "epoch": 0.66, + "grad_norm": 2.46875, + "learning_rate": 0.00015122836747526584, + "loss": 1.8781, + "step": 279435 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015122678012367428, + "loss": 2.087, + "step": 279440 + }, + { + "epoch": 0.66, + "grad_norm": 2.453125, + "learning_rate": 0.00015122519275458278, + "loss": 2.054, + "step": 279445 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015122360536799176, + "loss": 2.1881, + "step": 279450 + }, + { + "epoch": 0.66, + "grad_norm": 2.46875, + "learning_rate": 0.00015122201796390186, + "loss": 2.1157, + "step": 279455 + }, + { + "epoch": 0.66, + "grad_norm": 1.9296875, + "learning_rate": 0.00015122043054231358, + "loss": 2.0315, + "step": 279460 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015121884310322745, + "loss": 1.9869, + "step": 279465 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.00015121725564664406, + "loss": 2.0892, + "step": 279470 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.00015121566817256392, + "loss": 2.0175, + "step": 279475 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.0001512140806809876, + "loss": 2.0751, + "step": 279480 + }, + { + "epoch": 0.66, + "grad_norm": 2.453125, + "learning_rate": 0.00015121249317191557, + "loss": 1.9965, + "step": 279485 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015121090564534847, + "loss": 2.1551, + "step": 279490 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.0001512093181012868, + "loss": 1.922, + "step": 279495 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015120773053973105, + "loss": 2.1125, + "step": 279500 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015120614296068184, + "loss": 2.0739, + "step": 279505 + }, + { + "epoch": 0.66, + "grad_norm": 2.890625, + "learning_rate": 0.00015120455536413968, + "loss": 2.0949, + "step": 279510 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015120296775010514, + "loss": 2.0411, + "step": 279515 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.0001512013801185787, + "loss": 2.2261, + "step": 279520 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.00015119979246956095, + "loss": 2.1653, + "step": 279525 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015119820480305243, + "loss": 2.0855, + "step": 279530 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.00015119661711905367, + "loss": 2.1138, + "step": 279535 + }, + { + "epoch": 0.66, + "grad_norm": 1.890625, + "learning_rate": 0.00015119502941756524, + "loss": 2.088, + "step": 279540 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015119344169858763, + "loss": 2.0581, + "step": 279545 + }, + { + "epoch": 0.66, + "grad_norm": 2.703125, + "learning_rate": 0.0001511918539621214, + "loss": 2.059, + "step": 279550 + }, + { + "epoch": 0.66, + "grad_norm": 1.8671875, + "learning_rate": 0.00015119026620816713, + "loss": 2.0104, + "step": 279555 + }, + { + "epoch": 0.66, + "grad_norm": 1.9453125, + "learning_rate": 0.00015118867843672535, + "loss": 1.9884, + "step": 279560 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015118709064779656, + "loss": 1.9918, + "step": 279565 + }, + { + "epoch": 0.66, + "grad_norm": 2.578125, + "learning_rate": 0.00015118550284138132, + "loss": 1.9807, + "step": 279570 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.0001511839150174802, + "loss": 2.0596, + "step": 279575 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.00015118232717609376, + "loss": 2.0067, + "step": 279580 + }, + { + "epoch": 0.66, + "grad_norm": 1.8359375, + "learning_rate": 0.00015118073931722249, + "loss": 1.9328, + "step": 279585 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015117915144086694, + "loss": 2.1713, + "step": 279590 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.00015117756354702765, + "loss": 1.99, + "step": 279595 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.00015117597563570517, + "loss": 1.9788, + "step": 279600 + }, + { + "epoch": 0.66, + "grad_norm": 2.109375, + "learning_rate": 0.0001511743877069001, + "loss": 2.0777, + "step": 279605 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.0001511727997606129, + "loss": 2.1742, + "step": 279610 + }, + { + "epoch": 0.66, + "grad_norm": 2.390625, + "learning_rate": 0.00015117121179684413, + "loss": 2.0427, + "step": 279615 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.00015116962381559436, + "loss": 2.145, + "step": 279620 + }, + { + "epoch": 0.66, + "grad_norm": 2.6875, + "learning_rate": 0.00015116803581686415, + "loss": 2.071, + "step": 279625 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.00015116644780065398, + "loss": 2.004, + "step": 279630 + }, + { + "epoch": 0.66, + "grad_norm": 2.6875, + "learning_rate": 0.00015116485976696443, + "loss": 2.0482, + "step": 279635 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.000151163271715796, + "loss": 1.9753, + "step": 279640 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.0001511616836471493, + "loss": 1.9553, + "step": 279645 + }, + { + "epoch": 0.66, + "grad_norm": 2.5625, + "learning_rate": 0.00015116009556102484, + "loss": 2.0743, + "step": 279650 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.00015115850745742317, + "loss": 2.1255, + "step": 279655 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015115691933634482, + "loss": 1.8672, + "step": 279660 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015115533119779032, + "loss": 1.8849, + "step": 279665 + }, + { + "epoch": 0.66, + "grad_norm": 2.8125, + "learning_rate": 0.00015115374304176026, + "loss": 1.8779, + "step": 279670 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015115215486825515, + "loss": 2.0814, + "step": 279675 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.00015115056667727554, + "loss": 1.9823, + "step": 279680 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015114897846882195, + "loss": 2.1167, + "step": 279685 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.00015114739024289496, + "loss": 2.0921, + "step": 279690 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.0001511458019994951, + "loss": 2.0025, + "step": 279695 + }, + { + "epoch": 0.66, + "grad_norm": 1.7890625, + "learning_rate": 0.00015114421373862286, + "loss": 2.0718, + "step": 279700 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.00015114262546027886, + "loss": 2.2334, + "step": 279705 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.0001511410371644636, + "loss": 2.0838, + "step": 279710 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.00015113944885117764, + "loss": 1.9171, + "step": 279715 + }, + { + "epoch": 0.66, + "grad_norm": 2.65625, + "learning_rate": 0.00015113786052042155, + "loss": 2.2149, + "step": 279720 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.0001511362721721958, + "loss": 1.8883, + "step": 279725 + }, + { + "epoch": 0.66, + "grad_norm": 1.828125, + "learning_rate": 0.00015113468380650099, + "loss": 2.1753, + "step": 279730 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.0001511330954233376, + "loss": 2.0662, + "step": 279735 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.00015113150702270628, + "loss": 1.8561, + "step": 279740 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015112991860460748, + "loss": 1.9543, + "step": 279745 + }, + { + "epoch": 0.66, + "grad_norm": 1.90625, + "learning_rate": 0.00015112833016904179, + "loss": 1.9901, + "step": 279750 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.0001511267417160097, + "loss": 2.1552, + "step": 279755 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015112515324551182, + "loss": 2.2352, + "step": 279760 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015112356475754864, + "loss": 2.1452, + "step": 279765 + }, + { + "epoch": 0.66, + "grad_norm": 2.453125, + "learning_rate": 0.00015112197625212072, + "loss": 1.9732, + "step": 279770 + }, + { + "epoch": 0.66, + "grad_norm": 1.984375, + "learning_rate": 0.00015112038772922866, + "loss": 2.1432, + "step": 279775 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015111879918887287, + "loss": 2.1414, + "step": 279780 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015111721063105403, + "loss": 2.0032, + "step": 279785 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.0001511156220557726, + "loss": 2.0603, + "step": 279790 + }, + { + "epoch": 0.66, + "grad_norm": 2.96875, + "learning_rate": 0.00015111403346302913, + "loss": 2.0597, + "step": 279795 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015111244485282421, + "loss": 2.2274, + "step": 279800 + }, + { + "epoch": 0.66, + "grad_norm": 1.7890625, + "learning_rate": 0.00015111085622515833, + "loss": 2.1682, + "step": 279805 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015110926758003207, + "loss": 1.8763, + "step": 279810 + }, + { + "epoch": 0.66, + "grad_norm": 1.9296875, + "learning_rate": 0.00015110767891744596, + "loss": 2.0655, + "step": 279815 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015110609023740053, + "loss": 2.199, + "step": 279820 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015110450153989633, + "loss": 2.053, + "step": 279825 + }, + { + "epoch": 0.66, + "grad_norm": 2.953125, + "learning_rate": 0.0001511029128249339, + "loss": 2.0228, + "step": 279830 + }, + { + "epoch": 0.66, + "grad_norm": 2.53125, + "learning_rate": 0.00015110132409251384, + "loss": 1.9898, + "step": 279835 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.00015109973534263658, + "loss": 2.1194, + "step": 279840 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.0001510981465753027, + "loss": 2.0479, + "step": 279845 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.00015109655779051284, + "loss": 2.1333, + "step": 279850 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015109496898826745, + "loss": 2.1561, + "step": 279855 + }, + { + "epoch": 0.66, + "grad_norm": 1.8828125, + "learning_rate": 0.00015109338016856708, + "loss": 2.0786, + "step": 279860 + }, + { + "epoch": 0.66, + "grad_norm": 1.921875, + "learning_rate": 0.00015109179133141227, + "loss": 1.9783, + "step": 279865 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.0001510902024768036, + "loss": 2.0628, + "step": 279870 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015108861360474158, + "loss": 2.1823, + "step": 279875 + }, + { + "epoch": 0.66, + "grad_norm": 1.953125, + "learning_rate": 0.00015108702471522677, + "loss": 1.993, + "step": 279880 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.0001510854358082597, + "loss": 2.2977, + "step": 279885 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015108384688384093, + "loss": 1.9839, + "step": 279890 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.00015108225794197095, + "loss": 1.9797, + "step": 279895 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015108066898265041, + "loss": 2.0321, + "step": 279900 + }, + { + "epoch": 0.66, + "grad_norm": 1.875, + "learning_rate": 0.00015107908000587972, + "loss": 2.1513, + "step": 279905 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.00015107749101165953, + "loss": 2.0133, + "step": 279910 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015107590199999032, + "loss": 1.9245, + "step": 279915 + }, + { + "epoch": 0.66, + "grad_norm": 2.59375, + "learning_rate": 0.0001510743129708727, + "loss": 2.0943, + "step": 279920 + }, + { + "epoch": 0.66, + "grad_norm": 2.71875, + "learning_rate": 0.00015107272392430714, + "loss": 2.1469, + "step": 279925 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015107113486029417, + "loss": 2.1876, + "step": 279930 + }, + { + "epoch": 0.66, + "grad_norm": 1.9921875, + "learning_rate": 0.00015106954577883444, + "loss": 1.9908, + "step": 279935 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.0001510679566799284, + "loss": 2.0294, + "step": 279940 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.00015106636756357663, + "loss": 2.1468, + "step": 279945 + }, + { + "epoch": 0.66, + "grad_norm": 1.890625, + "learning_rate": 0.00015106477842977966, + "loss": 2.0676, + "step": 279950 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015106318927853802, + "loss": 2.1481, + "step": 279955 + }, + { + "epoch": 0.66, + "grad_norm": 2.625, + "learning_rate": 0.0001510616001098523, + "loss": 2.1247, + "step": 279960 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015106001092372298, + "loss": 2.1088, + "step": 279965 + }, + { + "epoch": 0.66, + "grad_norm": 1.8203125, + "learning_rate": 0.00015105842172015064, + "loss": 2.1384, + "step": 279970 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.00015105683249913582, + "loss": 2.1676, + "step": 279975 + }, + { + "epoch": 0.66, + "grad_norm": 2.578125, + "learning_rate": 0.00015105524326067905, + "loss": 2.1594, + "step": 279980 + }, + { + "epoch": 0.66, + "grad_norm": 2.109375, + "learning_rate": 0.0001510536540047809, + "loss": 2.0673, + "step": 279985 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.0001510520647314419, + "loss": 2.2745, + "step": 279990 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015105047544066257, + "loss": 2.1319, + "step": 279995 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.0001510488861324435, + "loss": 2.1052, + "step": 280000 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015104729680678514, + "loss": 1.9988, + "step": 280005 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.00015104570746368815, + "loss": 2.1499, + "step": 280010 + }, + { + "epoch": 0.66, + "grad_norm": 1.9765625, + "learning_rate": 0.00015104411810315304, + "loss": 2.0863, + "step": 280015 + }, + { + "epoch": 0.66, + "grad_norm": 2.390625, + "learning_rate": 0.0001510425287251803, + "loss": 2.0665, + "step": 280020 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.00015104093932977053, + "loss": 2.0485, + "step": 280025 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015103934991692421, + "loss": 2.1913, + "step": 280030 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015103776048664195, + "loss": 2.1339, + "step": 280035 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015103617103892427, + "loss": 2.1047, + "step": 280040 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.0001510345815737717, + "loss": 2.107, + "step": 280045 + }, + { + "epoch": 0.66, + "grad_norm": 1.7890625, + "learning_rate": 0.00015103299209118478, + "loss": 2.1135, + "step": 280050 + }, + { + "epoch": 0.66, + "grad_norm": 2.109375, + "learning_rate": 0.00015103140259116406, + "loss": 2.0994, + "step": 280055 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015102981307371012, + "loss": 1.9768, + "step": 280060 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015102822353882346, + "loss": 2.112, + "step": 280065 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.0001510266339865046, + "loss": 2.1066, + "step": 280070 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.00015102504441675415, + "loss": 2.0017, + "step": 280075 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.00015102345482957263, + "loss": 2.2347, + "step": 280080 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015102186522496054, + "loss": 1.9701, + "step": 280085 + }, + { + "epoch": 0.66, + "grad_norm": 2.6875, + "learning_rate": 0.00015102027560291847, + "loss": 2.1143, + "step": 280090 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015101868596344696, + "loss": 2.12, + "step": 280095 + }, + { + "epoch": 0.66, + "grad_norm": 2.609375, + "learning_rate": 0.0001510170963065465, + "loss": 1.9431, + "step": 280100 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015101550663221774, + "loss": 2.028, + "step": 280105 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015101391694046109, + "loss": 2.1246, + "step": 280110 + }, + { + "epoch": 0.66, + "grad_norm": 1.9765625, + "learning_rate": 0.00015101232723127721, + "loss": 1.9259, + "step": 280115 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.00015101073750466656, + "loss": 2.1089, + "step": 280120 + }, + { + "epoch": 0.66, + "grad_norm": 2.640625, + "learning_rate": 0.00015100914776062973, + "loss": 2.0315, + "step": 280125 + }, + { + "epoch": 0.66, + "grad_norm": 1.7578125, + "learning_rate": 0.00015100755799916728, + "loss": 2.1158, + "step": 280130 + }, + { + "epoch": 0.66, + "grad_norm": 1.875, + "learning_rate": 0.0001510059682202797, + "loss": 2.1149, + "step": 280135 + }, + { + "epoch": 0.66, + "grad_norm": 2.578125, + "learning_rate": 0.00015100437842396753, + "loss": 2.0997, + "step": 280140 + }, + { + "epoch": 0.66, + "grad_norm": 1.9140625, + "learning_rate": 0.00015100278861023136, + "loss": 2.1572, + "step": 280145 + }, + { + "epoch": 0.66, + "grad_norm": 1.84375, + "learning_rate": 0.00015100119877907173, + "loss": 2.1977, + "step": 280150 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.00015099960893048917, + "loss": 2.1168, + "step": 280155 + }, + { + "epoch": 0.66, + "grad_norm": 1.8515625, + "learning_rate": 0.00015099801906448418, + "loss": 1.9788, + "step": 280160 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.00015099642918105737, + "loss": 1.8175, + "step": 280165 + }, + { + "epoch": 0.66, + "grad_norm": 1.84375, + "learning_rate": 0.00015099483928020924, + "loss": 2.2267, + "step": 280170 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015099324936194037, + "loss": 2.0846, + "step": 280175 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.00015099165942625127, + "loss": 1.9378, + "step": 280180 + }, + { + "epoch": 0.66, + "grad_norm": 2.59375, + "learning_rate": 0.00015099006947314248, + "loss": 1.9812, + "step": 280185 + }, + { + "epoch": 0.66, + "grad_norm": 1.875, + "learning_rate": 0.00015098847950261455, + "loss": 2.0296, + "step": 280190 + }, + { + "epoch": 0.66, + "grad_norm": 2.609375, + "learning_rate": 0.00015098688951466804, + "loss": 2.0057, + "step": 280195 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.0001509852995093035, + "loss": 2.0586, + "step": 280200 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015098370948652144, + "loss": 2.1135, + "step": 280205 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015098211944632243, + "loss": 2.0429, + "step": 280210 + }, + { + "epoch": 0.66, + "grad_norm": 1.8828125, + "learning_rate": 0.000150980529388707, + "loss": 1.975, + "step": 280215 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.0001509789393136757, + "loss": 1.9172, + "step": 280220 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.00015097734922122906, + "loss": 2.2361, + "step": 280225 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015097575911136765, + "loss": 1.9149, + "step": 280230 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.000150974168984092, + "loss": 2.0895, + "step": 280235 + }, + { + "epoch": 0.66, + "grad_norm": 1.90625, + "learning_rate": 0.00015097257883940263, + "loss": 2.0964, + "step": 280240 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.0001509709886773001, + "loss": 1.9495, + "step": 280245 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015096939849778497, + "loss": 2.2756, + "step": 280250 + }, + { + "epoch": 0.66, + "grad_norm": 2.390625, + "learning_rate": 0.00015096780830085776, + "loss": 1.9771, + "step": 280255 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015096621808651902, + "loss": 2.0282, + "step": 280260 + }, + { + "epoch": 0.66, + "grad_norm": 1.7734375, + "learning_rate": 0.00015096462785476936, + "loss": 1.927, + "step": 280265 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015096303760560916, + "loss": 2.1296, + "step": 280270 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.0001509614473390391, + "loss": 2.0056, + "step": 280275 + }, + { + "epoch": 0.66, + "grad_norm": 2.59375, + "learning_rate": 0.0001509598570550597, + "loss": 2.0621, + "step": 280280 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.00015095826675367148, + "loss": 2.0747, + "step": 280285 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.000150956676434875, + "loss": 2.2144, + "step": 280290 + }, + { + "epoch": 0.66, + "grad_norm": 2.59375, + "learning_rate": 0.00015095508609867076, + "loss": 1.9731, + "step": 280295 + }, + { + "epoch": 0.66, + "grad_norm": 2.9375, + "learning_rate": 0.00015095349574505935, + "loss": 1.9891, + "step": 280300 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.0001509519053740413, + "loss": 2.1127, + "step": 280305 + }, + { + "epoch": 0.66, + "grad_norm": 1.9921875, + "learning_rate": 0.00015095031498561722, + "loss": 1.9864, + "step": 280310 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015094872457978753, + "loss": 2.2069, + "step": 280315 + }, + { + "epoch": 0.66, + "grad_norm": 1.96875, + "learning_rate": 0.00015094713415655283, + "loss": 2.213, + "step": 280320 + }, + { + "epoch": 0.66, + "grad_norm": 1.7890625, + "learning_rate": 0.00015094554371591364, + "loss": 1.9479, + "step": 280325 + }, + { + "epoch": 0.66, + "grad_norm": 1.90625, + "learning_rate": 0.0001509439532578706, + "loss": 2.1665, + "step": 280330 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015094236278242416, + "loss": 2.0, + "step": 280335 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015094077228957486, + "loss": 2.0782, + "step": 280340 + }, + { + "epoch": 0.66, + "grad_norm": 2.59375, + "learning_rate": 0.00015093918177932326, + "loss": 2.2282, + "step": 280345 + }, + { + "epoch": 0.66, + "grad_norm": 2.109375, + "learning_rate": 0.00015093759125166996, + "loss": 2.3543, + "step": 280350 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.0001509360007066154, + "loss": 2.0192, + "step": 280355 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.0001509344101441602, + "loss": 2.0617, + "step": 280360 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.0001509328195643049, + "loss": 2.051, + "step": 280365 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015093122896705, + "loss": 2.085, + "step": 280370 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.0001509296383523961, + "loss": 2.0486, + "step": 280375 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015092804772034367, + "loss": 2.1733, + "step": 280380 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.0001509264570708933, + "loss": 2.2519, + "step": 280385 + }, + { + "epoch": 0.66, + "grad_norm": 2.453125, + "learning_rate": 0.00015092486640404555, + "loss": 1.8923, + "step": 280390 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.00015092327571980096, + "loss": 2.2902, + "step": 280395 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015092168501816002, + "loss": 2.2061, + "step": 280400 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.0001509200942991233, + "loss": 2.1187, + "step": 280405 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.0001509185035626914, + "loss": 1.9346, + "step": 280410 + }, + { + "epoch": 0.66, + "grad_norm": 1.890625, + "learning_rate": 0.00015091691280886478, + "loss": 2.0256, + "step": 280415 + }, + { + "epoch": 0.66, + "grad_norm": 2.46875, + "learning_rate": 0.00015091532203764403, + "loss": 1.9466, + "step": 280420 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015091373124902967, + "loss": 1.9766, + "step": 280425 + }, + { + "epoch": 0.66, + "grad_norm": 2.546875, + "learning_rate": 0.0001509121404430223, + "loss": 1.9732, + "step": 280430 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.00015091054961962236, + "loss": 2.2366, + "step": 280435 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.00015090895877883047, + "loss": 1.9482, + "step": 280440 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.0001509073679206472, + "loss": 2.2519, + "step": 280445 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.000150905777045073, + "loss": 2.1356, + "step": 280450 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015090418615210848, + "loss": 1.9514, + "step": 280455 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015090259524175415, + "loss": 2.1443, + "step": 280460 + }, + { + "epoch": 0.66, + "grad_norm": 2.6875, + "learning_rate": 0.0001509010043140106, + "loss": 2.0992, + "step": 280465 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015089941336887832, + "loss": 2.1656, + "step": 280470 + }, + { + "epoch": 0.66, + "grad_norm": 1.9921875, + "learning_rate": 0.00015089782240635787, + "loss": 2.0396, + "step": 280475 + }, + { + "epoch": 0.66, + "grad_norm": 2.78125, + "learning_rate": 0.00015089623142644984, + "loss": 2.0296, + "step": 280480 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.0001508946404291547, + "loss": 2.127, + "step": 280485 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015089304941447304, + "loss": 2.0985, + "step": 280490 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.00015089145838240539, + "loss": 2.1184, + "step": 280495 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.0001508898673329523, + "loss": 1.9901, + "step": 280500 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.0001508882762661143, + "loss": 2.1551, + "step": 280505 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015088668518189197, + "loss": 2.0511, + "step": 280510 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015088509408028576, + "loss": 2.0613, + "step": 280515 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.00015088350296129638, + "loss": 2.2777, + "step": 280520 + }, + { + "epoch": 0.66, + "grad_norm": 2.53125, + "learning_rate": 0.0001508819118249242, + "loss": 2.0636, + "step": 280525 + }, + { + "epoch": 0.66, + "grad_norm": 1.703125, + "learning_rate": 0.00015088032067116984, + "loss": 1.9337, + "step": 280530 + }, + { + "epoch": 0.66, + "grad_norm": 2.640625, + "learning_rate": 0.00015087872950003383, + "loss": 2.1256, + "step": 280535 + }, + { + "epoch": 0.66, + "grad_norm": 1.984375, + "learning_rate": 0.00015087713831151675, + "loss": 2.1734, + "step": 280540 + }, + { + "epoch": 0.66, + "grad_norm": 1.9921875, + "learning_rate": 0.0001508755471056191, + "loss": 2.0976, + "step": 280545 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015087395588234145, + "loss": 2.0961, + "step": 280550 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015087236464168436, + "loss": 2.0851, + "step": 280555 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015087077338364833, + "loss": 2.0534, + "step": 280560 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015086918210823388, + "loss": 2.1071, + "step": 280565 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.00015086759081544163, + "loss": 2.1015, + "step": 280570 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.00015086599950527209, + "loss": 2.1385, + "step": 280575 + }, + { + "epoch": 0.66, + "grad_norm": 2.453125, + "learning_rate": 0.00015086440817772582, + "loss": 2.2376, + "step": 280580 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.00015086281683280333, + "loss": 1.9725, + "step": 280585 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015086122547050516, + "loss": 2.0724, + "step": 280590 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.00015085963409083186, + "loss": 2.057, + "step": 280595 + }, + { + "epoch": 0.66, + "grad_norm": 3.921875, + "learning_rate": 0.000150858042693784, + "loss": 2.103, + "step": 280600 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.00015085645127936215, + "loss": 2.0559, + "step": 280605 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.0001508548598475668, + "loss": 1.991, + "step": 280610 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015085326839839848, + "loss": 2.1343, + "step": 280615 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.00015085167693185776, + "loss": 2.0279, + "step": 280620 + }, + { + "epoch": 0.66, + "grad_norm": 1.9453125, + "learning_rate": 0.00015085008544794522, + "loss": 2.2106, + "step": 280625 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015084849394666133, + "loss": 2.1701, + "step": 280630 + }, + { + "epoch": 0.66, + "grad_norm": 1.9765625, + "learning_rate": 0.00015084690242800672, + "loss": 1.8385, + "step": 280635 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015084531089198185, + "loss": 2.1329, + "step": 280640 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015084371933858726, + "loss": 2.1389, + "step": 280645 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.00015084212776782363, + "loss": 2.0442, + "step": 280650 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015084053617969137, + "loss": 1.9913, + "step": 280655 + }, + { + "epoch": 0.66, + "grad_norm": 2.5625, + "learning_rate": 0.00015083894457419103, + "loss": 2.1503, + "step": 280660 + }, + { + "epoch": 0.66, + "grad_norm": 2.640625, + "learning_rate": 0.0001508373529513232, + "loss": 2.1614, + "step": 280665 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.0001508357613110884, + "loss": 1.9971, + "step": 280670 + }, + { + "epoch": 0.66, + "grad_norm": 1.984375, + "learning_rate": 0.0001508341696534872, + "loss": 2.0319, + "step": 280675 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.0001508325779785201, + "loss": 2.1737, + "step": 280680 + }, + { + "epoch": 0.66, + "grad_norm": 1.8125, + "learning_rate": 0.0001508309862861877, + "loss": 2.0415, + "step": 280685 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.0001508293945764905, + "loss": 2.1731, + "step": 280690 + }, + { + "epoch": 0.66, + "grad_norm": 1.78125, + "learning_rate": 0.00015082780284942908, + "loss": 2.2296, + "step": 280695 + }, + { + "epoch": 0.66, + "grad_norm": 1.8828125, + "learning_rate": 0.00015082621110500393, + "loss": 2.1623, + "step": 280700 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015082461934321563, + "loss": 2.0791, + "step": 280705 + }, + { + "epoch": 0.66, + "grad_norm": 1.9921875, + "learning_rate": 0.0001508230275640647, + "loss": 1.9342, + "step": 280710 + }, + { + "epoch": 0.66, + "grad_norm": 1.9140625, + "learning_rate": 0.00015082143576755175, + "loss": 2.151, + "step": 280715 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.00015081984395367724, + "loss": 1.9352, + "step": 280720 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015081825212244176, + "loss": 1.9909, + "step": 280725 + }, + { + "epoch": 0.66, + "grad_norm": 1.859375, + "learning_rate": 0.00015081666027384584, + "loss": 1.9739, + "step": 280730 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.00015081506840789003, + "loss": 2.0141, + "step": 280735 + }, + { + "epoch": 0.66, + "grad_norm": 1.6796875, + "learning_rate": 0.00015081347652457487, + "loss": 2.17, + "step": 280740 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015081188462390092, + "loss": 1.994, + "step": 280745 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015081029270586869, + "loss": 1.9177, + "step": 280750 + }, + { + "epoch": 0.66, + "grad_norm": 1.6171875, + "learning_rate": 0.00015080870077047872, + "loss": 1.944, + "step": 280755 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015080710881773163, + "loss": 2.2111, + "step": 280760 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.0001508055168476279, + "loss": 2.0841, + "step": 280765 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015080392486016802, + "loss": 1.9479, + "step": 280770 + }, + { + "epoch": 0.66, + "grad_norm": 2.453125, + "learning_rate": 0.00015080233285535267, + "loss": 1.8282, + "step": 280775 + }, + { + "epoch": 0.66, + "grad_norm": 1.9921875, + "learning_rate": 0.00015080074083318227, + "loss": 1.9559, + "step": 280780 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015079914879365748, + "loss": 2.1023, + "step": 280785 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015079755673677873, + "loss": 1.9695, + "step": 280790 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.00015079596466254663, + "loss": 2.0561, + "step": 280795 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.0001507943725709617, + "loss": 2.0795, + "step": 280800 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.0001507927804620245, + "loss": 1.9127, + "step": 280805 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015079118833573554, + "loss": 2.26, + "step": 280810 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.00015078959619209542, + "loss": 1.9745, + "step": 280815 + }, + { + "epoch": 0.66, + "grad_norm": 2.46875, + "learning_rate": 0.00015078800403110464, + "loss": 1.9876, + "step": 280820 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.00015078641185276376, + "loss": 2.0494, + "step": 280825 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.0001507848196570733, + "loss": 2.0936, + "step": 280830 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.00015078322744403386, + "loss": 2.1693, + "step": 280835 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.00015078163521364593, + "loss": 1.9775, + "step": 280840 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.0001507800429659101, + "loss": 2.1967, + "step": 280845 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.00015077845070082682, + "loss": 2.0717, + "step": 280850 + }, + { + "epoch": 0.66, + "grad_norm": 1.828125, + "learning_rate": 0.00015077685841839678, + "loss": 2.1101, + "step": 280855 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015077526611862035, + "loss": 2.0538, + "step": 280860 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.00015077367380149825, + "loss": 1.991, + "step": 280865 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.0001507720814670309, + "loss": 2.1092, + "step": 280870 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015077048911521894, + "loss": 1.9701, + "step": 280875 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.0001507688967460628, + "loss": 2.0693, + "step": 280880 + }, + { + "epoch": 0.66, + "grad_norm": 2.75, + "learning_rate": 0.00015076730435956313, + "loss": 2.0389, + "step": 280885 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.00015076571195572037, + "loss": 2.1125, + "step": 280890 + }, + { + "epoch": 0.66, + "grad_norm": 1.8671875, + "learning_rate": 0.00015076411953453517, + "loss": 1.8861, + "step": 280895 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.000150762527096008, + "loss": 2.1972, + "step": 280900 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015076093464013948, + "loss": 2.1661, + "step": 280905 + }, + { + "epoch": 0.66, + "grad_norm": 2.109375, + "learning_rate": 0.00015075934216693003, + "loss": 2.0685, + "step": 280910 + }, + { + "epoch": 0.66, + "grad_norm": 1.984375, + "learning_rate": 0.0001507577496763803, + "loss": 2.1006, + "step": 280915 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.00015075615716849083, + "loss": 2.0974, + "step": 280920 + }, + { + "epoch": 0.66, + "grad_norm": 1.9453125, + "learning_rate": 0.00015075456464326212, + "loss": 2.0954, + "step": 280925 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.0001507529721006947, + "loss": 2.0975, + "step": 280930 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.00015075137954078917, + "loss": 1.885, + "step": 280935 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015074978696354602, + "loss": 2.2031, + "step": 280940 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015074819436896588, + "loss": 2.1341, + "step": 280945 + }, + { + "epoch": 0.66, + "grad_norm": 2.625, + "learning_rate": 0.0001507466017570492, + "loss": 2.0353, + "step": 280950 + }, + { + "epoch": 0.66, + "grad_norm": 2.53125, + "learning_rate": 0.00015074500912779656, + "loss": 2.0577, + "step": 280955 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015074341648120852, + "loss": 2.0409, + "step": 280960 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.0001507418238172856, + "loss": 2.0336, + "step": 280965 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.00015074023113602832, + "loss": 1.995, + "step": 280970 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.0001507386384374373, + "loss": 2.0327, + "step": 280975 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.00015073704572151305, + "loss": 1.8432, + "step": 280980 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015073545298825608, + "loss": 2.0309, + "step": 280985 + }, + { + "epoch": 0.66, + "grad_norm": 2.640625, + "learning_rate": 0.00015073386023766697, + "loss": 2.0344, + "step": 280990 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.00015073226746974621, + "loss": 1.965, + "step": 280995 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015073067468449443, + "loss": 2.1389, + "step": 281000 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.00015072908188191214, + "loss": 2.1483, + "step": 281005 + }, + { + "epoch": 0.66, + "grad_norm": 1.8671875, + "learning_rate": 0.00015072748906199985, + "loss": 1.9117, + "step": 281010 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.00015072589622475813, + "loss": 2.1697, + "step": 281015 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015072430337018753, + "loss": 2.167, + "step": 281020 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.0001507227104982886, + "loss": 1.999, + "step": 281025 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015072111760906188, + "loss": 2.0139, + "step": 281030 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.0001507195247025079, + "loss": 2.0337, + "step": 281035 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015071793177862718, + "loss": 2.0934, + "step": 281040 + }, + { + "epoch": 0.66, + "grad_norm": 1.90625, + "learning_rate": 0.00015071633883742035, + "loss": 1.95, + "step": 281045 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015071474587888782, + "loss": 2.0502, + "step": 281050 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.0001507131529030303, + "loss": 2.1712, + "step": 281055 + }, + { + "epoch": 0.66, + "grad_norm": 2.84375, + "learning_rate": 0.00015071155990984818, + "loss": 1.9204, + "step": 281060 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.0001507099668993421, + "loss": 1.9803, + "step": 281065 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015070837387151257, + "loss": 2.0891, + "step": 281070 + }, + { + "epoch": 0.66, + "grad_norm": 2.546875, + "learning_rate": 0.00015070678082636013, + "loss": 2.1909, + "step": 281075 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015070518776388536, + "loss": 2.016, + "step": 281080 + }, + { + "epoch": 0.66, + "grad_norm": 1.9296875, + "learning_rate": 0.00015070359468408875, + "loss": 2.3092, + "step": 281085 + }, + { + "epoch": 0.66, + "grad_norm": 1.9453125, + "learning_rate": 0.0001507020015869709, + "loss": 1.9802, + "step": 281090 + }, + { + "epoch": 0.66, + "grad_norm": 1.8671875, + "learning_rate": 0.0001507004084725323, + "loss": 2.0433, + "step": 281095 + }, + { + "epoch": 0.66, + "grad_norm": 2.78125, + "learning_rate": 0.00015069881534077355, + "loss": 1.9137, + "step": 281100 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015069722219169512, + "loss": 2.0053, + "step": 281105 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.00015069562902529765, + "loss": 2.1689, + "step": 281110 + }, + { + "epoch": 0.66, + "grad_norm": 2.75, + "learning_rate": 0.00015069403584158162, + "loss": 2.0873, + "step": 281115 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015069244264054755, + "loss": 1.9273, + "step": 281120 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015069084942219608, + "loss": 2.046, + "step": 281125 + }, + { + "epoch": 0.66, + "grad_norm": 1.984375, + "learning_rate": 0.00015068925618652766, + "loss": 2.0476, + "step": 281130 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.00015068766293354288, + "loss": 1.9741, + "step": 281135 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.00015068606966324227, + "loss": 1.9969, + "step": 281140 + }, + { + "epoch": 0.66, + "grad_norm": 1.7890625, + "learning_rate": 0.0001506844763756264, + "loss": 2.0414, + "step": 281145 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.0001506828830706958, + "loss": 2.0162, + "step": 281150 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.00015068128974845096, + "loss": 2.2992, + "step": 281155 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015067969640889247, + "loss": 2.072, + "step": 281160 + }, + { + "epoch": 0.66, + "grad_norm": 1.8828125, + "learning_rate": 0.00015067810305202092, + "loss": 2.2015, + "step": 281165 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.0001506765096778368, + "loss": 1.9867, + "step": 281170 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.0001506749162863407, + "loss": 1.9318, + "step": 281175 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.00015067332287753307, + "loss": 2.0486, + "step": 281180 + }, + { + "epoch": 0.66, + "grad_norm": 2.640625, + "learning_rate": 0.00015067172945141454, + "loss": 2.0511, + "step": 281185 + }, + { + "epoch": 0.66, + "grad_norm": 2.578125, + "learning_rate": 0.0001506701360079856, + "loss": 1.9652, + "step": 281190 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.00015066854254724686, + "loss": 2.1673, + "step": 281195 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.0001506669490691988, + "loss": 2.1183, + "step": 281200 + }, + { + "epoch": 0.66, + "grad_norm": 1.8828125, + "learning_rate": 0.00015066535557384204, + "loss": 1.9187, + "step": 281205 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.000150663762061177, + "loss": 2.0745, + "step": 281210 + }, + { + "epoch": 0.66, + "grad_norm": 1.875, + "learning_rate": 0.00015066216853120437, + "loss": 1.9705, + "step": 281215 + }, + { + "epoch": 0.66, + "grad_norm": 1.796875, + "learning_rate": 0.0001506605749839246, + "loss": 2.1761, + "step": 281220 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015065898141933823, + "loss": 2.0641, + "step": 281225 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015065738783744584, + "loss": 2.0497, + "step": 281230 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015065579423824798, + "loss": 2.0331, + "step": 281235 + }, + { + "epoch": 0.66, + "grad_norm": 2.90625, + "learning_rate": 0.0001506542006217452, + "loss": 2.0064, + "step": 281240 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.00015065260698793803, + "loss": 2.0987, + "step": 281245 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.000150651013336827, + "loss": 2.0199, + "step": 281250 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.00015064941966841264, + "loss": 2.0933, + "step": 281255 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015064782598269553, + "loss": 2.0345, + "step": 281260 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.00015064623227967623, + "loss": 1.9712, + "step": 281265 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015064463855935523, + "loss": 2.1037, + "step": 281270 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.0001506430448217331, + "loss": 1.9872, + "step": 281275 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.0001506414510668104, + "loss": 2.2342, + "step": 281280 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.0001506398572945877, + "loss": 2.0468, + "step": 281285 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015063826350506543, + "loss": 2.0494, + "step": 281290 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.00015063666969824426, + "loss": 2.0953, + "step": 281295 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015063507587412467, + "loss": 2.1474, + "step": 281300 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.00015063348203270724, + "loss": 1.9296, + "step": 281305 + }, + { + "epoch": 0.66, + "grad_norm": 2.5625, + "learning_rate": 0.0001506318881739925, + "loss": 2.3524, + "step": 281310 + }, + { + "epoch": 0.66, + "grad_norm": 2.109375, + "learning_rate": 0.00015063029429798094, + "loss": 2.0854, + "step": 281315 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015062870040467318, + "loss": 2.1716, + "step": 281320 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.0001506271064940697, + "loss": 2.2238, + "step": 281325 + }, + { + "epoch": 0.66, + "grad_norm": 2.109375, + "learning_rate": 0.00015062551256617116, + "loss": 2.1375, + "step": 281330 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.000150623918620978, + "loss": 1.9742, + "step": 281335 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.0001506223246584908, + "loss": 2.0194, + "step": 281340 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015062073067871002, + "loss": 1.9578, + "step": 281345 + }, + { + "epoch": 0.66, + "grad_norm": 1.921875, + "learning_rate": 0.00015061913668163633, + "loss": 2.0269, + "step": 281350 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.00015061754266727025, + "loss": 2.1332, + "step": 281355 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015061594863561228, + "loss": 2.1693, + "step": 281360 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.000150614354586663, + "loss": 2.2332, + "step": 281365 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015061276052042287, + "loss": 2.0042, + "step": 281370 + }, + { + "epoch": 0.66, + "grad_norm": 2.828125, + "learning_rate": 0.0001506111664368926, + "loss": 1.9944, + "step": 281375 + }, + { + "epoch": 0.66, + "grad_norm": 2.46875, + "learning_rate": 0.00015060957233607259, + "loss": 2.103, + "step": 281380 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.00015060797821796342, + "loss": 2.0514, + "step": 281385 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.00015060638408256568, + "loss": 2.1691, + "step": 281390 + }, + { + "epoch": 0.66, + "grad_norm": 1.8828125, + "learning_rate": 0.00015060478992987985, + "loss": 2.0235, + "step": 281395 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.00015060319575990652, + "loss": 2.1324, + "step": 281400 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015060160157264623, + "loss": 1.761, + "step": 281405 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.00015060000736809949, + "loss": 2.2398, + "step": 281410 + }, + { + "epoch": 0.66, + "grad_norm": 2.390625, + "learning_rate": 0.0001505984131462669, + "loss": 2.1367, + "step": 281415 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015059681890714895, + "loss": 2.1085, + "step": 281420 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015059522465074622, + "loss": 1.8977, + "step": 281425 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.00015059363037705925, + "loss": 1.9386, + "step": 281430 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015059203608608856, + "loss": 2.0119, + "step": 281435 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.00015059044177783473, + "loss": 2.0176, + "step": 281440 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015058884745229828, + "loss": 2.1649, + "step": 281445 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015058725310947976, + "loss": 2.0268, + "step": 281450 + }, + { + "epoch": 0.66, + "grad_norm": 1.890625, + "learning_rate": 0.0001505856587493797, + "loss": 1.9633, + "step": 281455 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.0001505840643719987, + "loss": 1.9397, + "step": 281460 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015058246997733728, + "loss": 2.1091, + "step": 281465 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.00015058087556539591, + "loss": 2.0924, + "step": 281470 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.0001505792811361752, + "loss": 2.0805, + "step": 281475 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.0001505776866896757, + "loss": 2.1114, + "step": 281480 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.00015057609222589798, + "loss": 2.193, + "step": 281485 + }, + { + "epoch": 0.66, + "grad_norm": 2.46875, + "learning_rate": 0.00015057449774484255, + "loss": 2.2144, + "step": 281490 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.00015057290324650992, + "loss": 1.9594, + "step": 281495 + }, + { + "epoch": 0.66, + "grad_norm": 2.453125, + "learning_rate": 0.00015057130873090069, + "loss": 1.9758, + "step": 281500 + }, + { + "epoch": 0.66, + "grad_norm": 1.9765625, + "learning_rate": 0.00015056971419801535, + "loss": 1.9887, + "step": 281505 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.00015056811964785454, + "loss": 2.1143, + "step": 281510 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015056652508041871, + "loss": 2.2196, + "step": 281515 + }, + { + "epoch": 0.66, + "grad_norm": 1.8046875, + "learning_rate": 0.00015056493049570843, + "loss": 1.9996, + "step": 281520 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015056333589372425, + "loss": 2.0603, + "step": 281525 + }, + { + "epoch": 0.66, + "grad_norm": 2.59375, + "learning_rate": 0.0001505617412744667, + "loss": 2.1191, + "step": 281530 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.0001505601466379364, + "loss": 1.9475, + "step": 281535 + }, + { + "epoch": 0.66, + "grad_norm": 1.7578125, + "learning_rate": 0.0001505585519841338, + "loss": 2.0374, + "step": 281540 + }, + { + "epoch": 0.66, + "grad_norm": 2.5625, + "learning_rate": 0.0001505569573130595, + "loss": 2.1045, + "step": 281545 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.000150555362624714, + "loss": 2.0231, + "step": 281550 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.0001505537679190979, + "loss": 2.0027, + "step": 281555 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.0001505521731962117, + "loss": 2.0418, + "step": 281560 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015055057845605595, + "loss": 1.8496, + "step": 281565 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015054898369863123, + "loss": 2.1291, + "step": 281570 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.00015054738892393804, + "loss": 1.9476, + "step": 281575 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015054579413197695, + "loss": 2.0028, + "step": 281580 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.0001505441993227485, + "loss": 2.0401, + "step": 281585 + }, + { + "epoch": 0.66, + "grad_norm": 2.359375, + "learning_rate": 0.00015054260449625325, + "loss": 2.1156, + "step": 281590 + }, + { + "epoch": 0.66, + "grad_norm": 2.921875, + "learning_rate": 0.0001505410096524917, + "loss": 2.1383, + "step": 281595 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015053941479146446, + "loss": 2.0436, + "step": 281600 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.000150537819913172, + "loss": 2.0245, + "step": 281605 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015053622501761492, + "loss": 2.0791, + "step": 281610 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015053463010479377, + "loss": 2.0327, + "step": 281615 + }, + { + "epoch": 0.66, + "grad_norm": 1.8828125, + "learning_rate": 0.00015053303517470904, + "loss": 1.9347, + "step": 281620 + }, + { + "epoch": 0.66, + "grad_norm": 2.53125, + "learning_rate": 0.00015053144022736135, + "loss": 1.9448, + "step": 281625 + }, + { + "epoch": 0.66, + "grad_norm": 1.9765625, + "learning_rate": 0.00015052984526275117, + "loss": 2.0687, + "step": 281630 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.0001505282502808791, + "loss": 2.1464, + "step": 281635 + }, + { + "epoch": 0.66, + "grad_norm": 3.109375, + "learning_rate": 0.00015052665528174564, + "loss": 2.0364, + "step": 281640 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015052506026535137, + "loss": 2.0888, + "step": 281645 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.00015052346523169682, + "loss": 2.0637, + "step": 281650 + }, + { + "epoch": 0.66, + "grad_norm": 2.65625, + "learning_rate": 0.00015052187018078255, + "loss": 2.2682, + "step": 281655 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.0001505202751126091, + "loss": 2.0296, + "step": 281660 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015051868002717694, + "loss": 2.1171, + "step": 281665 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015051708492448676, + "loss": 2.1062, + "step": 281670 + }, + { + "epoch": 0.66, + "grad_norm": 1.7890625, + "learning_rate": 0.00015051548980453901, + "loss": 1.9663, + "step": 281675 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015051389466733426, + "loss": 2.1431, + "step": 281680 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015051229951287302, + "loss": 2.1463, + "step": 281685 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015051070434115587, + "loss": 1.9957, + "step": 281690 + }, + { + "epoch": 0.66, + "grad_norm": 3.203125, + "learning_rate": 0.00015050910915218334, + "loss": 2.1155, + "step": 281695 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015050751394595603, + "loss": 2.0093, + "step": 281700 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.0001505059187224744, + "loss": 2.2871, + "step": 281705 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015050432348173902, + "loss": 1.9559, + "step": 281710 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.00015050272822375046, + "loss": 2.0486, + "step": 281715 + }, + { + "epoch": 0.66, + "grad_norm": 1.8359375, + "learning_rate": 0.00015050113294850926, + "loss": 1.9092, + "step": 281720 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.0001504995376560159, + "loss": 2.0627, + "step": 281725 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.0001504979423462711, + "loss": 2.1536, + "step": 281730 + }, + { + "epoch": 0.66, + "grad_norm": 2.859375, + "learning_rate": 0.0001504963470192752, + "loss": 1.9627, + "step": 281735 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015049475167502885, + "loss": 2.158, + "step": 281740 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.00015049315631353256, + "loss": 1.9753, + "step": 281745 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015049156093478692, + "loss": 1.8833, + "step": 281750 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015048996553879243, + "loss": 2.0888, + "step": 281755 + }, + { + "epoch": 0.66, + "grad_norm": 2.390625, + "learning_rate": 0.00015048837012554968, + "loss": 1.9075, + "step": 281760 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015048677469505916, + "loss": 1.9358, + "step": 281765 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015048517924732145, + "loss": 2.0388, + "step": 281770 + }, + { + "epoch": 0.66, + "grad_norm": 2.53125, + "learning_rate": 0.00015048358378233706, + "loss": 2.2378, + "step": 281775 + }, + { + "epoch": 0.66, + "grad_norm": 1.921875, + "learning_rate": 0.0001504819883001066, + "loss": 2.221, + "step": 281780 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015048039280063057, + "loss": 2.1294, + "step": 281785 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015047879728390955, + "loss": 2.0229, + "step": 281790 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015047720174994402, + "loss": 1.9843, + "step": 281795 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015047560619873453, + "loss": 2.1782, + "step": 281800 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.00015047401063028173, + "loss": 2.0704, + "step": 281805 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015047241504458604, + "loss": 2.0447, + "step": 281810 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015047081944164807, + "loss": 2.1753, + "step": 281815 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.00015046922382146836, + "loss": 2.2313, + "step": 281820 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.00015046762818404742, + "loss": 1.9811, + "step": 281825 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.0001504660325293859, + "loss": 1.8547, + "step": 281830 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.0001504644368574842, + "loss": 2.084, + "step": 281835 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.00015046284116834296, + "loss": 2.1705, + "step": 281840 + }, + { + "epoch": 0.66, + "grad_norm": 3.546875, + "learning_rate": 0.00015046124546196267, + "loss": 2.0263, + "step": 281845 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015045964973834391, + "loss": 2.0224, + "step": 281850 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015045805399748724, + "loss": 2.2329, + "step": 281855 + }, + { + "epoch": 0.66, + "grad_norm": 2.109375, + "learning_rate": 0.00015045645823939316, + "loss": 1.9047, + "step": 281860 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015045486246406225, + "loss": 2.0395, + "step": 281865 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015045326667149507, + "loss": 1.9806, + "step": 281870 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015045167086169214, + "loss": 2.229, + "step": 281875 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.00015045007503465394, + "loss": 2.0338, + "step": 281880 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.0001504484791903811, + "loss": 2.11, + "step": 281885 + }, + { + "epoch": 0.66, + "grad_norm": 2.890625, + "learning_rate": 0.00015044688332887417, + "loss": 1.9366, + "step": 281890 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015044528745013368, + "loss": 2.0464, + "step": 281895 + }, + { + "epoch": 0.66, + "grad_norm": 2.859375, + "learning_rate": 0.00015044369155416018, + "loss": 2.12, + "step": 281900 + }, + { + "epoch": 0.66, + "grad_norm": 2.65625, + "learning_rate": 0.00015044209564095415, + "loss": 2.0592, + "step": 281905 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.0001504404997105162, + "loss": 2.0561, + "step": 281910 + }, + { + "epoch": 0.66, + "grad_norm": 1.890625, + "learning_rate": 0.00015043890376284687, + "loss": 2.1079, + "step": 281915 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.0001504373077979467, + "loss": 2.1147, + "step": 281920 + }, + { + "epoch": 0.66, + "grad_norm": 1.8125, + "learning_rate": 0.00015043571181581623, + "loss": 2.0815, + "step": 281925 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.000150434115816456, + "loss": 2.208, + "step": 281930 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.00015043251979986652, + "loss": 1.9852, + "step": 281935 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015043092376604842, + "loss": 2.0403, + "step": 281940 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.0001504293277150022, + "loss": 1.7871, + "step": 281945 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.0001504277316467284, + "loss": 2.2782, + "step": 281950 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015042613556122758, + "loss": 2.1437, + "step": 281955 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.00015042453945850025, + "loss": 2.0393, + "step": 281960 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015042294333854702, + "loss": 1.9463, + "step": 281965 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.0001504213472013684, + "loss": 2.1368, + "step": 281970 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.00015041975104696493, + "loss": 1.9031, + "step": 281975 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015041815487533712, + "loss": 2.008, + "step": 281980 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.0001504165586864856, + "loss": 2.1866, + "step": 281985 + }, + { + "epoch": 0.66, + "grad_norm": 2.921875, + "learning_rate": 0.00015041496248041086, + "loss": 2.1119, + "step": 281990 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.00015041336625711344, + "loss": 1.9706, + "step": 281995 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.0001504117700165939, + "loss": 2.0391, + "step": 282000 + }, + { + "epoch": 0.66, + "grad_norm": 2.390625, + "learning_rate": 0.0001504101737588528, + "loss": 1.9842, + "step": 282005 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015040857748389064, + "loss": 2.0332, + "step": 282010 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.00015040698119170802, + "loss": 1.9035, + "step": 282015 + }, + { + "epoch": 0.66, + "grad_norm": 2.609375, + "learning_rate": 0.00015040538488230544, + "loss": 2.0202, + "step": 282020 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.0001504037885556835, + "loss": 2.1448, + "step": 282025 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.0001504021922118427, + "loss": 2.027, + "step": 282030 + }, + { + "epoch": 0.66, + "grad_norm": 2.375, + "learning_rate": 0.0001504005958507836, + "loss": 1.9568, + "step": 282035 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.0001503989994725067, + "loss": 2.0867, + "step": 282040 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.00015039740307701263, + "loss": 2.141, + "step": 282045 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.0001503958066643019, + "loss": 2.0438, + "step": 282050 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.00015039421023437503, + "loss": 2.1556, + "step": 282055 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.0001503926137872326, + "loss": 2.1521, + "step": 282060 + }, + { + "epoch": 0.66, + "grad_norm": 1.765625, + "learning_rate": 0.0001503910173228751, + "loss": 1.863, + "step": 282065 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015038942084130317, + "loss": 2.0311, + "step": 282070 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.00015038782434251724, + "loss": 2.0547, + "step": 282075 + }, + { + "epoch": 0.66, + "grad_norm": 2.59375, + "learning_rate": 0.00015038622782651798, + "loss": 2.1886, + "step": 282080 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015038463129330584, + "loss": 1.9982, + "step": 282085 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015038303474288137, + "loss": 1.9882, + "step": 282090 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.00015038143817524516, + "loss": 2.1045, + "step": 282095 + }, + { + "epoch": 0.66, + "grad_norm": 2.328125, + "learning_rate": 0.00015037984159039779, + "loss": 2.1834, + "step": 282100 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.00015037824498833968, + "loss": 2.1751, + "step": 282105 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.00015037664836907146, + "loss": 1.9776, + "step": 282110 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015037505173259367, + "loss": 2.0438, + "step": 282115 + }, + { + "epoch": 0.66, + "grad_norm": 2.203125, + "learning_rate": 0.00015037345507890683, + "loss": 1.8402, + "step": 282120 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.00015037185840801156, + "loss": 2.0695, + "step": 282125 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015037026171990832, + "loss": 1.8788, + "step": 282130 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.00015036866501459768, + "loss": 2.1142, + "step": 282135 + }, + { + "epoch": 0.66, + "grad_norm": 2.484375, + "learning_rate": 0.00015036706829208018, + "loss": 2.1177, + "step": 282140 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.0001503654715523564, + "loss": 2.1305, + "step": 282145 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015036387479542684, + "loss": 2.0481, + "step": 282150 + }, + { + "epoch": 0.66, + "grad_norm": 2.1875, + "learning_rate": 0.0001503622780212921, + "loss": 2.0329, + "step": 282155 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015036068122995265, + "loss": 2.0531, + "step": 282160 + }, + { + "epoch": 0.66, + "grad_norm": 2.015625, + "learning_rate": 0.0001503590844214091, + "loss": 1.9704, + "step": 282165 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.000150357487595662, + "loss": 2.1182, + "step": 282170 + }, + { + "epoch": 0.66, + "grad_norm": 2.453125, + "learning_rate": 0.00015035589075271184, + "loss": 2.0976, + "step": 282175 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015035429389255916, + "loss": 2.1044, + "step": 282180 + }, + { + "epoch": 0.66, + "grad_norm": 1.8046875, + "learning_rate": 0.0001503526970152046, + "loss": 1.9968, + "step": 282185 + }, + { + "epoch": 0.66, + "grad_norm": 2.46875, + "learning_rate": 0.00015035110012064863, + "loss": 2.0532, + "step": 282190 + }, + { + "epoch": 0.66, + "grad_norm": 2.609375, + "learning_rate": 0.0001503495032088918, + "loss": 2.1174, + "step": 282195 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015034790627993465, + "loss": 2.181, + "step": 282200 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015034630933377776, + "loss": 2.1013, + "step": 282205 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.00015034471237042167, + "loss": 1.9908, + "step": 282210 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.0001503431153898669, + "loss": 2.2246, + "step": 282215 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.000150341518392114, + "loss": 2.159, + "step": 282220 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015033992137716351, + "loss": 1.9949, + "step": 282225 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.00015033832434501604, + "loss": 2.0452, + "step": 282230 + }, + { + "epoch": 0.66, + "grad_norm": 2.546875, + "learning_rate": 0.00015033672729567206, + "loss": 2.1578, + "step": 282235 + }, + { + "epoch": 0.66, + "grad_norm": 1.8125, + "learning_rate": 0.00015033513022913217, + "loss": 2.1684, + "step": 282240 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015033353314539683, + "loss": 2.1607, + "step": 282245 + }, + { + "epoch": 0.66, + "grad_norm": 2.03125, + "learning_rate": 0.0001503319360444667, + "loss": 2.057, + "step": 282250 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015033033892634218, + "loss": 2.0383, + "step": 282255 + }, + { + "epoch": 0.66, + "grad_norm": 1.8515625, + "learning_rate": 0.000150328741791024, + "loss": 2.0724, + "step": 282260 + }, + { + "epoch": 0.66, + "grad_norm": 2.40625, + "learning_rate": 0.00015032714463851255, + "loss": 2.1594, + "step": 282265 + }, + { + "epoch": 0.66, + "grad_norm": 1.9375, + "learning_rate": 0.00015032554746880847, + "loss": 2.0362, + "step": 282270 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015032395028191227, + "loss": 1.9325, + "step": 282275 + }, + { + "epoch": 0.66, + "grad_norm": 2.3125, + "learning_rate": 0.00015032235307782448, + "loss": 2.0092, + "step": 282280 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015032075585654563, + "loss": 2.102, + "step": 282285 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015031915861807635, + "loss": 2.2028, + "step": 282290 + }, + { + "epoch": 0.66, + "grad_norm": 2.0, + "learning_rate": 0.00015031756136241713, + "loss": 2.0015, + "step": 282295 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.00015031596408956847, + "loss": 1.9713, + "step": 282300 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015031436679953099, + "loss": 2.0638, + "step": 282305 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.0001503127694923052, + "loss": 2.1819, + "step": 282310 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.0001503111721678917, + "loss": 1.8671, + "step": 282315 + }, + { + "epoch": 0.66, + "grad_norm": 2.234375, + "learning_rate": 0.00015030957482629095, + "loss": 1.9631, + "step": 282320 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.00015030797746750355, + "loss": 2.0789, + "step": 282325 + }, + { + "epoch": 0.66, + "grad_norm": 1.8671875, + "learning_rate": 0.00015030638009153006, + "loss": 2.1037, + "step": 282330 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.00015030478269837096, + "loss": 2.0837, + "step": 282335 + }, + { + "epoch": 0.66, + "grad_norm": 2.4375, + "learning_rate": 0.00015030318528802683, + "loss": 1.9947, + "step": 282340 + }, + { + "epoch": 0.66, + "grad_norm": 2.5625, + "learning_rate": 0.00015030158786049825, + "loss": 1.9252, + "step": 282345 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.0001502999904157857, + "loss": 1.9092, + "step": 282350 + }, + { + "epoch": 0.66, + "grad_norm": 2.265625, + "learning_rate": 0.0001502983929538898, + "loss": 2.0285, + "step": 282355 + }, + { + "epoch": 0.66, + "grad_norm": 2.34375, + "learning_rate": 0.000150296795474811, + "loss": 2.047, + "step": 282360 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015029519797854995, + "loss": 1.959, + "step": 282365 + }, + { + "epoch": 0.66, + "grad_norm": 2.09375, + "learning_rate": 0.00015029360046510715, + "loss": 2.2173, + "step": 282370 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015029200293448315, + "loss": 2.1548, + "step": 282375 + }, + { + "epoch": 0.66, + "grad_norm": 2.6875, + "learning_rate": 0.00015029040538667844, + "loss": 2.1157, + "step": 282380 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015028880782169365, + "loss": 1.9291, + "step": 282385 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.00015028721023952927, + "loss": 2.0599, + "step": 282390 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015028561264018593, + "loss": 2.1243, + "step": 282395 + }, + { + "epoch": 0.66, + "grad_norm": 1.90625, + "learning_rate": 0.00015028401502366405, + "loss": 1.9465, + "step": 282400 + }, + { + "epoch": 0.66, + "grad_norm": 2.5, + "learning_rate": 0.00015028241738996423, + "loss": 2.0756, + "step": 282405 + }, + { + "epoch": 0.66, + "grad_norm": 2.46875, + "learning_rate": 0.00015028081973908708, + "loss": 2.1605, + "step": 282410 + }, + { + "epoch": 0.66, + "grad_norm": 2.078125, + "learning_rate": 0.00015027922207103305, + "loss": 1.9433, + "step": 282415 + }, + { + "epoch": 0.66, + "grad_norm": 2.640625, + "learning_rate": 0.00015027762438580276, + "loss": 2.1721, + "step": 282420 + }, + { + "epoch": 0.66, + "grad_norm": 2.453125, + "learning_rate": 0.0001502760266833967, + "loss": 1.9653, + "step": 282425 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015027442896381541, + "loss": 1.8863, + "step": 282430 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.00015027283122705947, + "loss": 2.2313, + "step": 282435 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015027123347312945, + "loss": 2.0559, + "step": 282440 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.00015026963570202584, + "loss": 2.0609, + "step": 282445 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015026803791374924, + "loss": 2.1571, + "step": 282450 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015026644010830012, + "loss": 2.1227, + "step": 282455 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.0001502648422856791, + "loss": 2.0119, + "step": 282460 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.00015026324444588672, + "loss": 2.0847, + "step": 282465 + }, + { + "epoch": 0.66, + "grad_norm": 1.9609375, + "learning_rate": 0.0001502616465889235, + "loss": 1.9806, + "step": 282470 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015026004871478995, + "loss": 2.1275, + "step": 282475 + }, + { + "epoch": 0.66, + "grad_norm": 1.9921875, + "learning_rate": 0.00015025845082348668, + "loss": 2.087, + "step": 282480 + }, + { + "epoch": 0.66, + "grad_norm": 2.21875, + "learning_rate": 0.0001502568529150142, + "loss": 2.1793, + "step": 282485 + }, + { + "epoch": 0.66, + "grad_norm": 2.25, + "learning_rate": 0.0001502552549893731, + "loss": 2.1336, + "step": 282490 + }, + { + "epoch": 0.66, + "grad_norm": 2.15625, + "learning_rate": 0.00015025365704656387, + "loss": 1.6673, + "step": 282495 + }, + { + "epoch": 0.66, + "grad_norm": 2.296875, + "learning_rate": 0.00015025205908658707, + "loss": 2.0694, + "step": 282500 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015025046110944326, + "loss": 2.0524, + "step": 282505 + }, + { + "epoch": 0.66, + "grad_norm": 2.515625, + "learning_rate": 0.000150248863115133, + "loss": 2.2037, + "step": 282510 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015024726510365683, + "loss": 2.0237, + "step": 282515 + }, + { + "epoch": 0.66, + "grad_norm": 2.75, + "learning_rate": 0.00015024566707501523, + "loss": 2.1862, + "step": 282520 + }, + { + "epoch": 0.66, + "grad_norm": 2.609375, + "learning_rate": 0.00015024406902920884, + "loss": 2.2127, + "step": 282525 + }, + { + "epoch": 0.66, + "grad_norm": 2.125, + "learning_rate": 0.00015024247096623812, + "loss": 1.9989, + "step": 282530 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015024087288610373, + "loss": 2.1396, + "step": 282535 + }, + { + "epoch": 0.66, + "grad_norm": 2.28125, + "learning_rate": 0.00015023927478880614, + "loss": 2.1179, + "step": 282540 + }, + { + "epoch": 0.66, + "grad_norm": 2.046875, + "learning_rate": 0.00015023767667434585, + "loss": 2.2315, + "step": 282545 + }, + { + "epoch": 0.66, + "grad_norm": 2.140625, + "learning_rate": 0.00015023607854272345, + "loss": 2.0714, + "step": 282550 + }, + { + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 0.00015023448039393954, + "loss": 2.1516, + "step": 282555 + }, + { + "epoch": 0.66, + "grad_norm": 2.171875, + "learning_rate": 0.00015023288222799463, + "loss": 2.1989, + "step": 282560 + }, + { + "epoch": 0.66, + "grad_norm": 1.6328125, + "learning_rate": 0.00015023128404488922, + "loss": 1.935, + "step": 282565 + }, + { + "epoch": 0.66, + "grad_norm": 2.0625, + "learning_rate": 0.0001502296858446239, + "loss": 1.9654, + "step": 282570 + }, + { + "epoch": 0.66, + "grad_norm": 1.8828125, + "learning_rate": 0.00015022808762719922, + "loss": 1.9279, + "step": 282575 + }, + { + "epoch": 0.67, + "grad_norm": 2.6875, + "learning_rate": 0.0001502264893926157, + "loss": 2.0306, + "step": 282580 + }, + { + "epoch": 0.67, + "grad_norm": 1.78125, + "learning_rate": 0.0001502248911408739, + "loss": 1.946, + "step": 282585 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00015022329287197437, + "loss": 2.0327, + "step": 282590 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00015022169458591767, + "loss": 2.1597, + "step": 282595 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.0001502200962827043, + "loss": 2.3629, + "step": 282600 + }, + { + "epoch": 0.67, + "grad_norm": 2.453125, + "learning_rate": 0.00015021849796233484, + "loss": 2.1481, + "step": 282605 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00015021689962480984, + "loss": 2.0651, + "step": 282610 + }, + { + "epoch": 0.67, + "grad_norm": 1.7890625, + "learning_rate": 0.00015021530127012987, + "loss": 1.8853, + "step": 282615 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00015021370289829537, + "loss": 2.1362, + "step": 282620 + }, + { + "epoch": 0.67, + "grad_norm": 2.5625, + "learning_rate": 0.00015021210450930702, + "loss": 2.1583, + "step": 282625 + }, + { + "epoch": 0.67, + "grad_norm": 2.65625, + "learning_rate": 0.00015021050610316528, + "loss": 2.1588, + "step": 282630 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.00015020890767987073, + "loss": 2.0294, + "step": 282635 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00015020730923942388, + "loss": 1.992, + "step": 282640 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.0001502057107818253, + "loss": 1.9025, + "step": 282645 + }, + { + "epoch": 0.67, + "grad_norm": 2.28125, + "learning_rate": 0.0001502041123070756, + "loss": 2.0011, + "step": 282650 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.00015020251381517522, + "loss": 2.11, + "step": 282655 + }, + { + "epoch": 0.67, + "grad_norm": 1.890625, + "learning_rate": 0.00015020091530612475, + "loss": 2.0483, + "step": 282660 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00015019931677992472, + "loss": 2.0881, + "step": 282665 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.00015019771823657574, + "loss": 1.9694, + "step": 282670 + }, + { + "epoch": 0.67, + "grad_norm": 2.0, + "learning_rate": 0.00015019611967607828, + "loss": 2.297, + "step": 282675 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.00015019452109843294, + "loss": 2.0965, + "step": 282680 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.00015019292250364016, + "loss": 2.1769, + "step": 282685 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00015019132389170066, + "loss": 2.0017, + "step": 282690 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.00015018972526261487, + "loss": 2.0542, + "step": 282695 + }, + { + "epoch": 0.67, + "grad_norm": 2.671875, + "learning_rate": 0.00015018812661638333, + "loss": 2.0384, + "step": 282700 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00015018652795300666, + "loss": 2.0548, + "step": 282705 + }, + { + "epoch": 0.67, + "grad_norm": 1.9921875, + "learning_rate": 0.0001501849292724853, + "loss": 1.9457, + "step": 282710 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.00015018333057481993, + "loss": 2.1457, + "step": 282715 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00015018173186001098, + "loss": 2.1142, + "step": 282720 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00015018013312805906, + "loss": 2.1208, + "step": 282725 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.0001501785343789647, + "loss": 1.9113, + "step": 282730 + }, + { + "epoch": 0.67, + "grad_norm": 2.640625, + "learning_rate": 0.0001501769356127284, + "loss": 2.2905, + "step": 282735 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.00015017533682935077, + "loss": 2.058, + "step": 282740 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00015017373802883236, + "loss": 2.0544, + "step": 282745 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.0001501721392111737, + "loss": 1.9467, + "step": 282750 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00015017054037637525, + "loss": 2.033, + "step": 282755 + }, + { + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 0.0001501689415244377, + "loss": 2.3167, + "step": 282760 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.0001501673426553615, + "loss": 1.9267, + "step": 282765 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00015016574376914727, + "loss": 1.8744, + "step": 282770 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00015016414486579546, + "loss": 2.0631, + "step": 282775 + }, + { + "epoch": 0.67, + "grad_norm": 1.640625, + "learning_rate": 0.0001501625459453067, + "loss": 2.041, + "step": 282780 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00015016094700768151, + "loss": 2.173, + "step": 282785 + }, + { + "epoch": 0.67, + "grad_norm": 2.4375, + "learning_rate": 0.0001501593480529204, + "loss": 2.024, + "step": 282790 + }, + { + "epoch": 0.67, + "grad_norm": 2.5625, + "learning_rate": 0.00015015774908102395, + "loss": 2.1099, + "step": 282795 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00015015615009199274, + "loss": 1.829, + "step": 282800 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00015015455108582728, + "loss": 2.0364, + "step": 282805 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.0001501529520625281, + "loss": 2.0347, + "step": 282810 + }, + { + "epoch": 0.67, + "grad_norm": 2.421875, + "learning_rate": 0.00015015135302209572, + "loss": 2.208, + "step": 282815 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00015014975396453076, + "loss": 1.9431, + "step": 282820 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00015014815488983372, + "loss": 1.9515, + "step": 282825 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.0001501465557980052, + "loss": 2.1838, + "step": 282830 + }, + { + "epoch": 0.67, + "grad_norm": 2.5, + "learning_rate": 0.0001501449566890457, + "loss": 2.1702, + "step": 282835 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00015014335756295574, + "loss": 2.0736, + "step": 282840 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.0001501417584197359, + "loss": 2.0132, + "step": 282845 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00015014015925938676, + "loss": 2.0914, + "step": 282850 + }, + { + "epoch": 0.67, + "grad_norm": 1.984375, + "learning_rate": 0.00015013856008190882, + "loss": 1.9765, + "step": 282855 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00015013696088730263, + "loss": 2.1018, + "step": 282860 + }, + { + "epoch": 0.67, + "grad_norm": 2.4375, + "learning_rate": 0.00015013536167556875, + "loss": 2.0076, + "step": 282865 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.0001501337624467077, + "loss": 1.9992, + "step": 282870 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.0001501321632007201, + "loss": 2.0243, + "step": 282875 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.0001501305639376064, + "loss": 2.2273, + "step": 282880 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.0001501289646573672, + "loss": 1.9689, + "step": 282885 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00015012736536000306, + "loss": 1.9333, + "step": 282890 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.00015012576604551446, + "loss": 2.0639, + "step": 282895 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.000150124166713902, + "loss": 2.0561, + "step": 282900 + }, + { + "epoch": 0.67, + "grad_norm": 1.90625, + "learning_rate": 0.00015012256736516623, + "loss": 1.9417, + "step": 282905 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00015012096799930767, + "loss": 2.0101, + "step": 282910 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.0001501193686163269, + "loss": 2.0507, + "step": 282915 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00015011776921622445, + "loss": 2.0746, + "step": 282920 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.00015011616979900084, + "loss": 1.9378, + "step": 282925 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.00015011457036465662, + "loss": 2.1924, + "step": 282930 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00015011297091319236, + "loss": 2.1116, + "step": 282935 + }, + { + "epoch": 0.67, + "grad_norm": 1.5625, + "learning_rate": 0.00015011137144460863, + "loss": 1.9912, + "step": 282940 + }, + { + "epoch": 0.67, + "grad_norm": 2.59375, + "learning_rate": 0.00015010977195890597, + "loss": 2.0725, + "step": 282945 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.00015010817245608485, + "loss": 1.8939, + "step": 282950 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00015010657293614585, + "loss": 1.9718, + "step": 282955 + }, + { + "epoch": 0.67, + "grad_norm": 2.609375, + "learning_rate": 0.00015010497339908957, + "loss": 2.0274, + "step": 282960 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00015010337384491654, + "loss": 1.9619, + "step": 282965 + }, + { + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 0.00015010177427362726, + "loss": 2.1048, + "step": 282970 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00015010017468522233, + "loss": 2.0161, + "step": 282975 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00015009857507970224, + "loss": 1.9194, + "step": 282980 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.0001500969754570676, + "loss": 2.044, + "step": 282985 + }, + { + "epoch": 0.67, + "grad_norm": 2.640625, + "learning_rate": 0.0001500953758173189, + "loss": 1.9214, + "step": 282990 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00015009377616045675, + "loss": 2.1057, + "step": 282995 + }, + { + "epoch": 0.67, + "grad_norm": 2.828125, + "learning_rate": 0.00015009217648648162, + "loss": 2.1803, + "step": 283000 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.0001500905767953941, + "loss": 2.0629, + "step": 283005 + }, + { + "epoch": 0.67, + "grad_norm": 2.4375, + "learning_rate": 0.00015008897708719473, + "loss": 2.0778, + "step": 283010 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00015008737736188404, + "loss": 2.0699, + "step": 283015 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00015008577761946263, + "loss": 2.2923, + "step": 283020 + }, + { + "epoch": 0.67, + "grad_norm": 2.28125, + "learning_rate": 0.00015008417785993097, + "loss": 2.0917, + "step": 283025 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.00015008257808328968, + "loss": 2.1562, + "step": 283030 + }, + { + "epoch": 0.67, + "grad_norm": 2.515625, + "learning_rate": 0.00015008097828953928, + "loss": 2.2408, + "step": 283035 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.0001500793784786803, + "loss": 2.0086, + "step": 283040 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00015007777865071328, + "loss": 1.908, + "step": 283045 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.0001500761788056388, + "loss": 2.1503, + "step": 283050 + }, + { + "epoch": 0.67, + "grad_norm": 1.84375, + "learning_rate": 0.00015007457894345737, + "loss": 2.1166, + "step": 283055 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00015007297906416956, + "loss": 2.1915, + "step": 283060 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00015007137916777592, + "loss": 2.1373, + "step": 283065 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00015006977925427698, + "loss": 2.1239, + "step": 283070 + }, + { + "epoch": 0.67, + "grad_norm": 1.7890625, + "learning_rate": 0.00015006817932367332, + "loss": 2.0731, + "step": 283075 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00015006657937596547, + "loss": 2.0068, + "step": 283080 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00015006497941115392, + "loss": 2.1029, + "step": 283085 + }, + { + "epoch": 0.67, + "grad_norm": 2.453125, + "learning_rate": 0.0001500633794292393, + "loss": 2.1842, + "step": 283090 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.0001500617794302221, + "loss": 2.0183, + "step": 283095 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00015006017941410291, + "loss": 2.095, + "step": 283100 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00015005857938088222, + "loss": 2.1721, + "step": 283105 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.00015005697933056063, + "loss": 2.1653, + "step": 283110 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.00015005537926313868, + "loss": 2.0928, + "step": 283115 + }, + { + "epoch": 0.67, + "grad_norm": 2.4375, + "learning_rate": 0.0001500537791786169, + "loss": 1.9898, + "step": 283120 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.00015005217907699585, + "loss": 2.2207, + "step": 283125 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.00015005057895827606, + "loss": 2.0031, + "step": 283130 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00015004897882245807, + "loss": 2.1355, + "step": 283135 + }, + { + "epoch": 0.67, + "grad_norm": 1.921875, + "learning_rate": 0.00015004737866954244, + "loss": 2.1333, + "step": 283140 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00015004577849952978, + "loss": 2.0772, + "step": 283145 + }, + { + "epoch": 0.67, + "grad_norm": 2.421875, + "learning_rate": 0.0001500441783124205, + "loss": 1.8625, + "step": 283150 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.00015004257810821521, + "loss": 2.0791, + "step": 283155 + }, + { + "epoch": 0.67, + "grad_norm": 2.4375, + "learning_rate": 0.00015004097788691453, + "loss": 1.9828, + "step": 283160 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.0001500393776485189, + "loss": 2.1906, + "step": 283165 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00015003777739302897, + "loss": 2.1346, + "step": 283170 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00015003617712044517, + "loss": 1.9198, + "step": 283175 + }, + { + "epoch": 0.67, + "grad_norm": 1.96875, + "learning_rate": 0.0001500345768307681, + "loss": 1.9517, + "step": 283180 + }, + { + "epoch": 0.67, + "grad_norm": 1.875, + "learning_rate": 0.00015003297652399836, + "loss": 1.9264, + "step": 283185 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.0001500313762001364, + "loss": 2.0968, + "step": 283190 + }, + { + "epoch": 0.67, + "grad_norm": 1.7578125, + "learning_rate": 0.00015002977585918282, + "loss": 1.9218, + "step": 283195 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.0001500281755011382, + "loss": 2.1003, + "step": 283200 + }, + { + "epoch": 0.67, + "grad_norm": 1.9921875, + "learning_rate": 0.00015002657512600302, + "loss": 2.2077, + "step": 283205 + }, + { + "epoch": 0.67, + "grad_norm": 1.859375, + "learning_rate": 0.00015002497473377786, + "loss": 1.9425, + "step": 283210 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00015002337432446327, + "loss": 2.0597, + "step": 283215 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00015002177389805975, + "loss": 2.057, + "step": 283220 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.0001500201734545679, + "loss": 1.9236, + "step": 283225 + }, + { + "epoch": 0.67, + "grad_norm": 1.984375, + "learning_rate": 0.0001500185729939883, + "loss": 2.1076, + "step": 283230 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.0001500169725163214, + "loss": 2.0507, + "step": 283235 + }, + { + "epoch": 0.67, + "grad_norm": 1.9921875, + "learning_rate": 0.00015001537202156783, + "loss": 2.011, + "step": 283240 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.00015001377150972807, + "loss": 2.1903, + "step": 283245 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.0001500121709808027, + "loss": 1.9467, + "step": 283250 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00015001057043479227, + "loss": 2.0779, + "step": 283255 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.00015000896987169734, + "loss": 2.2855, + "step": 283260 + }, + { + "epoch": 0.67, + "grad_norm": 2.78125, + "learning_rate": 0.00015000736929151841, + "loss": 2.044, + "step": 283265 + }, + { + "epoch": 0.67, + "grad_norm": 3.109375, + "learning_rate": 0.00015000576869425607, + "loss": 1.8615, + "step": 283270 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.00015000416807991085, + "loss": 1.9316, + "step": 283275 + }, + { + "epoch": 0.67, + "grad_norm": 1.75, + "learning_rate": 0.00015000256744848332, + "loss": 2.0519, + "step": 283280 + }, + { + "epoch": 0.67, + "grad_norm": 2.5, + "learning_rate": 0.000150000966799974, + "loss": 2.1039, + "step": 283285 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014999936613438342, + "loss": 2.0245, + "step": 283290 + }, + { + "epoch": 0.67, + "grad_norm": 2.515625, + "learning_rate": 0.00014999776545171215, + "loss": 2.1619, + "step": 283295 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014999616475196072, + "loss": 1.9897, + "step": 283300 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00014999456403512975, + "loss": 2.1671, + "step": 283305 + }, + { + "epoch": 0.67, + "grad_norm": 1.8671875, + "learning_rate": 0.0001499929633012197, + "loss": 1.9049, + "step": 283310 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014999136255023114, + "loss": 2.027, + "step": 283315 + }, + { + "epoch": 0.67, + "grad_norm": 1.9921875, + "learning_rate": 0.00014998976178216464, + "loss": 1.9525, + "step": 283320 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.00014998816099702072, + "loss": 2.0413, + "step": 283325 + }, + { + "epoch": 0.67, + "grad_norm": 2.8125, + "learning_rate": 0.00014998656019479993, + "loss": 1.8537, + "step": 283330 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014998495937550285, + "loss": 1.9691, + "step": 283335 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014998335853912996, + "loss": 2.1501, + "step": 283340 + }, + { + "epoch": 0.67, + "grad_norm": 1.828125, + "learning_rate": 0.0001499817576856819, + "loss": 2.0322, + "step": 283345 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014998015681515914, + "loss": 2.1209, + "step": 283350 + }, + { + "epoch": 0.67, + "grad_norm": 1.796875, + "learning_rate": 0.00014997855592756225, + "loss": 1.9343, + "step": 283355 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014997695502289178, + "loss": 1.8597, + "step": 283360 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00014997535410114828, + "loss": 2.132, + "step": 283365 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00014997375316233227, + "loss": 2.1974, + "step": 283370 + }, + { + "epoch": 0.67, + "grad_norm": 4.0625, + "learning_rate": 0.00014997215220644437, + "loss": 1.9936, + "step": 283375 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.00014997055123348502, + "loss": 1.9493, + "step": 283380 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00014996895024345486, + "loss": 2.0803, + "step": 283385 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.0001499673492363544, + "loss": 2.1553, + "step": 283390 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014996574821218416, + "loss": 2.1593, + "step": 283395 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014996414717094475, + "loss": 2.1169, + "step": 283400 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.00014996254611263662, + "loss": 2.0219, + "step": 283405 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014996094503726042, + "loss": 2.2482, + "step": 283410 + }, + { + "epoch": 0.67, + "grad_norm": 1.8671875, + "learning_rate": 0.00014995934394481668, + "loss": 2.1566, + "step": 283415 + }, + { + "epoch": 0.67, + "grad_norm": 1.9765625, + "learning_rate": 0.0001499577428353059, + "loss": 2.144, + "step": 283420 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.00014995614170872865, + "loss": 2.0828, + "step": 283425 + }, + { + "epoch": 0.67, + "grad_norm": 1.9765625, + "learning_rate": 0.00014995454056508547, + "loss": 2.036, + "step": 283430 + }, + { + "epoch": 0.67, + "grad_norm": 1.8203125, + "learning_rate": 0.0001499529394043769, + "loss": 1.9529, + "step": 283435 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014995133822660352, + "loss": 1.9679, + "step": 283440 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.00014994973703176586, + "loss": 1.9731, + "step": 283445 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014994813581986445, + "loss": 2.109, + "step": 283450 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014994653459089987, + "loss": 2.0608, + "step": 283455 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014994493334487263, + "loss": 2.1019, + "step": 283460 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014994333208178328, + "loss": 1.8931, + "step": 283465 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.0001499417308016324, + "loss": 1.9933, + "step": 283470 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.00014994012950442052, + "loss": 2.0572, + "step": 283475 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.0001499385281901482, + "loss": 1.9835, + "step": 283480 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014993692685881594, + "loss": 2.0896, + "step": 283485 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.00014993532551042435, + "loss": 1.9922, + "step": 283490 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.00014993372414497395, + "loss": 2.0771, + "step": 283495 + }, + { + "epoch": 0.67, + "grad_norm": 1.765625, + "learning_rate": 0.00014993212276246526, + "loss": 2.1818, + "step": 283500 + }, + { + "epoch": 0.67, + "grad_norm": 2.28125, + "learning_rate": 0.00014993052136289887, + "loss": 2.0127, + "step": 283505 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.00014992891994627532, + "loss": 2.049, + "step": 283510 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.0001499273185125951, + "loss": 1.9394, + "step": 283515 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014992571706185884, + "loss": 2.149, + "step": 283520 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.00014992411559406702, + "loss": 1.9651, + "step": 283525 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014992251410922023, + "loss": 1.9981, + "step": 283530 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014992091260731903, + "loss": 1.929, + "step": 283535 + }, + { + "epoch": 0.67, + "grad_norm": 2.59375, + "learning_rate": 0.0001499193110883639, + "loss": 2.2989, + "step": 283540 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.00014991770955235544, + "loss": 1.9478, + "step": 283545 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.0001499161079992942, + "loss": 1.9709, + "step": 283550 + }, + { + "epoch": 0.67, + "grad_norm": 2.453125, + "learning_rate": 0.00014991450642918072, + "loss": 2.1096, + "step": 283555 + }, + { + "epoch": 0.67, + "grad_norm": 1.8203125, + "learning_rate": 0.00014991290484201553, + "loss": 1.9763, + "step": 283560 + }, + { + "epoch": 0.67, + "grad_norm": 1.8828125, + "learning_rate": 0.00014991130323779915, + "loss": 1.9771, + "step": 283565 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014990970161653222, + "loss": 2.0463, + "step": 283570 + }, + { + "epoch": 0.67, + "grad_norm": 1.953125, + "learning_rate": 0.0001499080999782152, + "loss": 2.1998, + "step": 283575 + }, + { + "epoch": 0.67, + "grad_norm": 2.59375, + "learning_rate": 0.0001499064983228487, + "loss": 1.9798, + "step": 283580 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.0001499048966504332, + "loss": 2.1203, + "step": 283585 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014990329496096928, + "loss": 1.8935, + "step": 283590 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014990169325445748, + "loss": 2.0797, + "step": 283595 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014990009153089838, + "loss": 2.0573, + "step": 283600 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.0001498984897902925, + "loss": 1.8536, + "step": 283605 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014989688803264043, + "loss": 2.214, + "step": 283610 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014989528625794263, + "loss": 1.9664, + "step": 283615 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014989368446619968, + "loss": 2.0879, + "step": 283620 + }, + { + "epoch": 0.67, + "grad_norm": 1.9453125, + "learning_rate": 0.00014989208265741216, + "loss": 1.9108, + "step": 283625 + }, + { + "epoch": 0.67, + "grad_norm": 2.484375, + "learning_rate": 0.00014989048083158062, + "loss": 2.0927, + "step": 283630 + }, + { + "epoch": 0.67, + "grad_norm": 1.9765625, + "learning_rate": 0.0001498888789887056, + "loss": 2.1684, + "step": 283635 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014988727712878758, + "loss": 2.1169, + "step": 283640 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014988567525182718, + "loss": 2.1424, + "step": 283645 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014988407335782495, + "loss": 2.0175, + "step": 283650 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.0001498824714467814, + "loss": 2.0123, + "step": 283655 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.0001498808695186971, + "loss": 2.1895, + "step": 283660 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.00014987926757357257, + "loss": 2.1047, + "step": 283665 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014987766561140844, + "loss": 2.2978, + "step": 283670 + }, + { + "epoch": 0.67, + "grad_norm": 2.0, + "learning_rate": 0.00014987606363220512, + "loss": 1.9828, + "step": 283675 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014987446163596327, + "loss": 2.1057, + "step": 283680 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014987285962268337, + "loss": 2.0478, + "step": 283685 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.00014987125759236605, + "loss": 2.1739, + "step": 283690 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.00014986965554501179, + "loss": 2.0739, + "step": 283695 + }, + { + "epoch": 0.67, + "grad_norm": 1.9765625, + "learning_rate": 0.0001498680534806211, + "loss": 2.1969, + "step": 283700 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.0001498664513991946, + "loss": 1.8855, + "step": 283705 + }, + { + "epoch": 0.67, + "grad_norm": 2.59375, + "learning_rate": 0.00014986484930073284, + "loss": 2.1997, + "step": 283710 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014986324718523633, + "loss": 2.0362, + "step": 283715 + }, + { + "epoch": 0.67, + "grad_norm": 2.71875, + "learning_rate": 0.00014986164505270563, + "loss": 1.985, + "step": 283720 + }, + { + "epoch": 0.67, + "grad_norm": 1.9921875, + "learning_rate": 0.00014986004290314128, + "loss": 2.1277, + "step": 283725 + }, + { + "epoch": 0.67, + "grad_norm": 1.9453125, + "learning_rate": 0.00014985844073654386, + "loss": 2.0667, + "step": 283730 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014985683855291387, + "loss": 2.0583, + "step": 283735 + }, + { + "epoch": 0.67, + "grad_norm": 2.625, + "learning_rate": 0.0001498552363522519, + "loss": 1.9627, + "step": 283740 + }, + { + "epoch": 0.67, + "grad_norm": 1.8984375, + "learning_rate": 0.00014985363413455845, + "loss": 2.0692, + "step": 283745 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014985203189983413, + "loss": 2.1495, + "step": 283750 + }, + { + "epoch": 0.67, + "grad_norm": 2.5625, + "learning_rate": 0.0001498504296480794, + "loss": 2.1569, + "step": 283755 + }, + { + "epoch": 0.67, + "grad_norm": 1.984375, + "learning_rate": 0.00014984882737929488, + "loss": 1.9405, + "step": 283760 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.00014984722509348112, + "loss": 1.9094, + "step": 283765 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00014984562279063863, + "loss": 2.2717, + "step": 283770 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014984402047076798, + "loss": 2.2722, + "step": 283775 + }, + { + "epoch": 0.67, + "grad_norm": 2.5, + "learning_rate": 0.00014984241813386967, + "loss": 2.2878, + "step": 283780 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014984081577994434, + "loss": 1.9615, + "step": 283785 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.00014983921340899246, + "loss": 2.0803, + "step": 283790 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.0001498376110210146, + "loss": 2.0707, + "step": 283795 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.00014983600861601129, + "loss": 1.9351, + "step": 283800 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.00014983440619398316, + "loss": 1.9858, + "step": 283805 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.0001498328037549306, + "loss": 2.2445, + "step": 283810 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.0001498312012988543, + "loss": 2.1613, + "step": 283815 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00014982959882575475, + "loss": 1.9371, + "step": 283820 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.00014982799633563251, + "loss": 2.1939, + "step": 283825 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014982639382848814, + "loss": 2.0852, + "step": 283830 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014982479130432212, + "loss": 2.0347, + "step": 283835 + }, + { + "epoch": 0.67, + "grad_norm": 2.59375, + "learning_rate": 0.00014982318876313508, + "loss": 1.8946, + "step": 283840 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014982158620492755, + "loss": 2.0157, + "step": 283845 + }, + { + "epoch": 0.67, + "grad_norm": 2.4375, + "learning_rate": 0.00014981998362970005, + "loss": 1.8138, + "step": 283850 + }, + { + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 0.00014981838103745316, + "loss": 2.0937, + "step": 283855 + }, + { + "epoch": 0.67, + "grad_norm": 1.9375, + "learning_rate": 0.00014981677842818735, + "loss": 1.9147, + "step": 283860 + }, + { + "epoch": 0.67, + "grad_norm": 2.28125, + "learning_rate": 0.00014981517580190326, + "loss": 1.9871, + "step": 283865 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00014981357315860141, + "loss": 1.9195, + "step": 283870 + }, + { + "epoch": 0.67, + "grad_norm": 1.921875, + "learning_rate": 0.00014981197049828234, + "loss": 2.0863, + "step": 283875 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00014981036782094658, + "loss": 2.0632, + "step": 283880 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014980876512659473, + "loss": 2.1605, + "step": 283885 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014980716241522725, + "loss": 2.0905, + "step": 283890 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.0001498055596868448, + "loss": 2.1671, + "step": 283895 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014980395694144782, + "loss": 2.048, + "step": 283900 + }, + { + "epoch": 0.67, + "grad_norm": 1.9921875, + "learning_rate": 0.00014980235417903692, + "loss": 1.9487, + "step": 283905 + }, + { + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 0.00014980075139961262, + "loss": 1.9886, + "step": 283910 + }, + { + "epoch": 0.67, + "grad_norm": 2.28125, + "learning_rate": 0.0001497991486031755, + "loss": 2.1308, + "step": 283915 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.0001497975457897261, + "loss": 2.0982, + "step": 283920 + }, + { + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 0.00014979594295926494, + "loss": 1.9668, + "step": 283925 + }, + { + "epoch": 0.67, + "grad_norm": 1.8671875, + "learning_rate": 0.00014979434011179257, + "loss": 2.0462, + "step": 283930 + }, + { + "epoch": 0.67, + "grad_norm": 2.4375, + "learning_rate": 0.00014979273724730955, + "loss": 2.0648, + "step": 283935 + }, + { + "epoch": 0.67, + "grad_norm": 1.703125, + "learning_rate": 0.00014979113436581644, + "loss": 2.0116, + "step": 283940 + }, + { + "epoch": 0.67, + "grad_norm": 2.453125, + "learning_rate": 0.00014978953146731376, + "loss": 2.0056, + "step": 283945 + }, + { + "epoch": 0.67, + "grad_norm": 2.640625, + "learning_rate": 0.0001497879285518021, + "loss": 2.05, + "step": 283950 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.00014978632561928198, + "loss": 2.0319, + "step": 283955 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014978472266975393, + "loss": 2.1434, + "step": 283960 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.00014978311970321851, + "loss": 2.0019, + "step": 283965 + }, + { + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 0.00014978151671967628, + "loss": 2.1525, + "step": 283970 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.0001497799137191278, + "loss": 1.8941, + "step": 283975 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014977831070157356, + "loss": 2.0652, + "step": 283980 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.0001497767076670142, + "loss": 1.9895, + "step": 283985 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014977510461545017, + "loss": 2.1039, + "step": 283990 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014977350154688206, + "loss": 2.4192, + "step": 283995 + }, + { + "epoch": 0.67, + "grad_norm": 1.984375, + "learning_rate": 0.00014977189846131044, + "loss": 2.0782, + "step": 284000 + }, + { + "epoch": 0.67, + "grad_norm": 1.8046875, + "learning_rate": 0.00014977029535873584, + "loss": 2.1571, + "step": 284005 + }, + { + "epoch": 0.67, + "grad_norm": 1.8359375, + "learning_rate": 0.00014976869223915878, + "loss": 2.0706, + "step": 284010 + }, + { + "epoch": 0.67, + "grad_norm": 1.796875, + "learning_rate": 0.0001497670891025799, + "loss": 1.9985, + "step": 284015 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014976548594899957, + "loss": 2.0878, + "step": 284020 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.0001497638827784185, + "loss": 1.9845, + "step": 284025 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014976227959083722, + "loss": 2.1317, + "step": 284030 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.00014976067638625622, + "loss": 2.0546, + "step": 284035 + }, + { + "epoch": 0.67, + "grad_norm": 1.703125, + "learning_rate": 0.00014975907316467605, + "loss": 1.9706, + "step": 284040 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014975746992609729, + "loss": 2.2042, + "step": 284045 + }, + { + "epoch": 0.67, + "grad_norm": 1.8515625, + "learning_rate": 0.00014975586667052048, + "loss": 2.0434, + "step": 284050 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014975426339794616, + "loss": 2.0026, + "step": 284055 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.00014975266010837487, + "loss": 1.8852, + "step": 284060 + }, + { + "epoch": 0.67, + "grad_norm": 1.9296875, + "learning_rate": 0.00014975105680180718, + "loss": 1.9901, + "step": 284065 + }, + { + "epoch": 0.67, + "grad_norm": 2.484375, + "learning_rate": 0.00014974945347824366, + "loss": 2.0505, + "step": 284070 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.00014974785013768476, + "loss": 2.0742, + "step": 284075 + }, + { + "epoch": 0.67, + "grad_norm": 1.7109375, + "learning_rate": 0.00014974624678013113, + "loss": 2.12, + "step": 284080 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.0001497446434055833, + "loss": 2.2379, + "step": 284085 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014974304001404176, + "loss": 2.0458, + "step": 284090 + }, + { + "epoch": 0.67, + "grad_norm": 1.890625, + "learning_rate": 0.0001497414366055071, + "loss": 1.9977, + "step": 284095 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.0001497398331799799, + "loss": 2.0899, + "step": 284100 + }, + { + "epoch": 0.67, + "grad_norm": 2.5, + "learning_rate": 0.00014973822973746062, + "loss": 2.1728, + "step": 284105 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014973662627794986, + "loss": 2.1159, + "step": 284110 + }, + { + "epoch": 0.67, + "grad_norm": 2.5, + "learning_rate": 0.0001497350228014482, + "loss": 1.8965, + "step": 284115 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014973341930795615, + "loss": 2.2302, + "step": 284120 + }, + { + "epoch": 0.67, + "grad_norm": 1.921875, + "learning_rate": 0.00014973181579747426, + "loss": 2.1445, + "step": 284125 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.00014973021227000305, + "loss": 2.0435, + "step": 284130 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.00014972860872554313, + "loss": 2.0776, + "step": 284135 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.000149727005164095, + "loss": 1.9753, + "step": 284140 + }, + { + "epoch": 0.67, + "grad_norm": 2.4375, + "learning_rate": 0.00014972540158565925, + "loss": 2.2005, + "step": 284145 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014972379799023638, + "loss": 2.2622, + "step": 284150 + }, + { + "epoch": 0.67, + "grad_norm": 2.5, + "learning_rate": 0.00014972219437782696, + "loss": 2.0071, + "step": 284155 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014972059074843153, + "loss": 2.1454, + "step": 284160 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014971898710205064, + "loss": 1.9854, + "step": 284165 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014971738343868486, + "loss": 2.1887, + "step": 284170 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014971577975833472, + "loss": 2.1524, + "step": 284175 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.00014971417606100075, + "loss": 2.0271, + "step": 284180 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.00014971257234668354, + "loss": 2.0472, + "step": 284185 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.0001497109686153836, + "loss": 2.0662, + "step": 284190 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014970936486710147, + "loss": 2.0563, + "step": 284195 + }, + { + "epoch": 0.67, + "grad_norm": 1.859375, + "learning_rate": 0.00014970776110183776, + "loss": 2.168, + "step": 284200 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014970615731959295, + "loss": 2.0103, + "step": 284205 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014970455352036763, + "loss": 2.0595, + "step": 284210 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014970294970416233, + "loss": 2.1787, + "step": 284215 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.0001497013458709776, + "loss": 2.0363, + "step": 284220 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.00014969974202081398, + "loss": 2.1624, + "step": 284225 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014969813815367204, + "loss": 2.1189, + "step": 284230 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.0001496965342695523, + "loss": 1.904, + "step": 284235 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.0001496949303684553, + "loss": 2.1007, + "step": 284240 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014969332645038168, + "loss": 1.9622, + "step": 284245 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014969172251533185, + "loss": 1.9821, + "step": 284250 + }, + { + "epoch": 0.67, + "grad_norm": 1.734375, + "learning_rate": 0.00014969011856330648, + "loss": 2.1754, + "step": 284255 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.00014968851459430606, + "loss": 2.0528, + "step": 284260 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.0001496869106083311, + "loss": 1.8538, + "step": 284265 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.0001496853066053822, + "loss": 2.0503, + "step": 284270 + }, + { + "epoch": 0.67, + "grad_norm": 1.765625, + "learning_rate": 0.00014968370258545993, + "loss": 1.8764, + "step": 284275 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014968209854856478, + "loss": 2.1815, + "step": 284280 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014968049449469735, + "loss": 1.9166, + "step": 284285 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.0001496788904238581, + "loss": 2.0305, + "step": 284290 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.0001496772863360477, + "loss": 2.2339, + "step": 284295 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014967568223126662, + "loss": 2.2442, + "step": 284300 + }, + { + "epoch": 0.67, + "grad_norm": 3.0625, + "learning_rate": 0.00014967407810951544, + "loss": 2.0581, + "step": 284305 + }, + { + "epoch": 0.67, + "grad_norm": 1.9921875, + "learning_rate": 0.00014967247397079469, + "loss": 1.9846, + "step": 284310 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.0001496708698151049, + "loss": 2.1452, + "step": 284315 + }, + { + "epoch": 0.67, + "grad_norm": 1.96875, + "learning_rate": 0.00014966926564244663, + "loss": 2.1053, + "step": 284320 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.0001496676614528205, + "loss": 2.2353, + "step": 284325 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00014966605724622695, + "loss": 2.0818, + "step": 284330 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.0001496644530226666, + "loss": 2.2582, + "step": 284335 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014966284878213996, + "loss": 2.0249, + "step": 284340 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.00014966124452464756, + "loss": 2.0355, + "step": 284345 + }, + { + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 0.00014965964025019, + "loss": 2.0164, + "step": 284350 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.0001496580359587678, + "loss": 2.1147, + "step": 284355 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014965643165038153, + "loss": 1.9631, + "step": 284360 + }, + { + "epoch": 0.67, + "grad_norm": 2.421875, + "learning_rate": 0.0001496548273250317, + "loss": 2.1518, + "step": 284365 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.0001496532229827189, + "loss": 2.1172, + "step": 284370 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014965161862344364, + "loss": 1.9388, + "step": 284375 + }, + { + "epoch": 0.67, + "grad_norm": 2.453125, + "learning_rate": 0.00014965001424720653, + "loss": 1.9229, + "step": 284380 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014964840985400802, + "loss": 1.9689, + "step": 284385 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014964680544384877, + "loss": 1.9912, + "step": 284390 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014964520101672923, + "loss": 2.1599, + "step": 284395 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014964359657264998, + "loss": 2.0462, + "step": 284400 + }, + { + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 0.0001496419921116116, + "loss": 1.9837, + "step": 284405 + }, + { + "epoch": 0.67, + "grad_norm": 1.7890625, + "learning_rate": 0.00014964038763361462, + "loss": 2.0593, + "step": 284410 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014963878313865959, + "loss": 2.1376, + "step": 284415 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.000149637178626747, + "loss": 2.2003, + "step": 284420 + }, + { + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 0.00014963557409787749, + "loss": 1.9717, + "step": 284425 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014963396955205158, + "loss": 1.8948, + "step": 284430 + }, + { + "epoch": 0.67, + "grad_norm": 2.28125, + "learning_rate": 0.00014963236498926978, + "loss": 2.1074, + "step": 284435 + }, + { + "epoch": 0.67, + "grad_norm": 1.7109375, + "learning_rate": 0.0001496307604095327, + "loss": 1.9334, + "step": 284440 + }, + { + "epoch": 0.67, + "grad_norm": 1.953125, + "learning_rate": 0.0001496291558128408, + "loss": 2.0375, + "step": 284445 + }, + { + "epoch": 0.67, + "grad_norm": 2.640625, + "learning_rate": 0.00014962755119919468, + "loss": 2.0466, + "step": 284450 + }, + { + "epoch": 0.67, + "grad_norm": 2.84375, + "learning_rate": 0.00014962594656859493, + "loss": 1.9957, + "step": 284455 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00014962434192104207, + "loss": 2.1141, + "step": 284460 + }, + { + "epoch": 0.67, + "grad_norm": 2.765625, + "learning_rate": 0.00014962273725653658, + "loss": 2.1662, + "step": 284465 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.0001496211325750791, + "loss": 2.1925, + "step": 284470 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014961952787667014, + "loss": 2.1591, + "step": 284475 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.0001496179231613102, + "loss": 2.2824, + "step": 284480 + }, + { + "epoch": 0.67, + "grad_norm": 3.140625, + "learning_rate": 0.00014961631842899995, + "loss": 2.1453, + "step": 284485 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.00014961471367973983, + "loss": 2.2024, + "step": 284490 + }, + { + "epoch": 0.67, + "grad_norm": 1.859375, + "learning_rate": 0.00014961310891353042, + "loss": 2.1706, + "step": 284495 + }, + { + "epoch": 0.67, + "grad_norm": 1.8671875, + "learning_rate": 0.00014961150413037228, + "loss": 2.0181, + "step": 284500 + }, + { + "epoch": 0.67, + "grad_norm": 1.8515625, + "learning_rate": 0.00014960989933026595, + "loss": 2.1326, + "step": 284505 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014960829451321196, + "loss": 2.0934, + "step": 284510 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.0001496066896792109, + "loss": 2.0248, + "step": 284515 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00014960508482826328, + "loss": 2.0593, + "step": 284520 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00014960347996036968, + "loss": 1.9641, + "step": 284525 + }, + { + "epoch": 0.67, + "grad_norm": 1.9921875, + "learning_rate": 0.00014960187507553064, + "loss": 1.9798, + "step": 284530 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014960027017374666, + "loss": 2.0766, + "step": 284535 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.00014959866525501836, + "loss": 2.0055, + "step": 284540 + }, + { + "epoch": 0.67, + "grad_norm": 1.8515625, + "learning_rate": 0.00014959706031934624, + "loss": 1.949, + "step": 284545 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014959545536673087, + "loss": 2.0622, + "step": 284550 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014959385039717282, + "loss": 1.9621, + "step": 284555 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.0001495922454106726, + "loss": 2.0607, + "step": 284560 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014959064040723073, + "loss": 2.1879, + "step": 284565 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.00014958903538684782, + "loss": 1.9887, + "step": 284570 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014958743034952442, + "loss": 2.1257, + "step": 284575 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.00014958582529526104, + "loss": 2.2942, + "step": 284580 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014958422022405822, + "loss": 2.118, + "step": 284585 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014958261513591653, + "loss": 1.9145, + "step": 284590 + }, + { + "epoch": 0.67, + "grad_norm": 2.0, + "learning_rate": 0.00014958101003083654, + "loss": 2.1003, + "step": 284595 + }, + { + "epoch": 0.67, + "grad_norm": 2.484375, + "learning_rate": 0.00014957940490881882, + "loss": 2.1131, + "step": 284600 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.0001495777997698638, + "loss": 2.1404, + "step": 284605 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014957619461397213, + "loss": 1.9354, + "step": 284610 + }, + { + "epoch": 0.67, + "grad_norm": 2.90625, + "learning_rate": 0.00014957458944114436, + "loss": 2.0461, + "step": 284615 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.000149572984251381, + "loss": 2.1182, + "step": 284620 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014957137904468259, + "loss": 2.131, + "step": 284625 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.0001495697738210497, + "loss": 1.9367, + "step": 284630 + }, + { + "epoch": 0.67, + "grad_norm": 2.734375, + "learning_rate": 0.0001495681685804829, + "loss": 2.0652, + "step": 284635 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.0001495665633229827, + "loss": 2.0246, + "step": 284640 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014956495804854965, + "loss": 2.208, + "step": 284645 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014956335275718435, + "loss": 2.0376, + "step": 284650 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.00014956174744888728, + "loss": 1.9314, + "step": 284655 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.000149560142123659, + "loss": 2.1227, + "step": 284660 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.0001495585367815001, + "loss": 2.0503, + "step": 284665 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.00014955693142241108, + "loss": 1.8338, + "step": 284670 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014955532604639255, + "loss": 2.0359, + "step": 284675 + }, + { + "epoch": 0.67, + "grad_norm": 1.9453125, + "learning_rate": 0.000149553720653445, + "loss": 2.2133, + "step": 284680 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014955211524356904, + "loss": 2.1572, + "step": 284685 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014955050981676511, + "loss": 2.1528, + "step": 284690 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.00014954890437303387, + "loss": 2.0798, + "step": 284695 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.00014954729891237585, + "loss": 1.9378, + "step": 284700 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014954569343479155, + "loss": 2.0357, + "step": 284705 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.0001495440879402815, + "loss": 2.035, + "step": 284710 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.00014954248242884637, + "loss": 2.0432, + "step": 284715 + }, + { + "epoch": 0.67, + "grad_norm": 2.5, + "learning_rate": 0.00014954087690048656, + "loss": 1.9679, + "step": 284720 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.0001495392713552027, + "loss": 2.0366, + "step": 284725 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.00014953766579299535, + "loss": 1.958, + "step": 284730 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014953606021386506, + "loss": 2.1156, + "step": 284735 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014953445461781233, + "loss": 2.2891, + "step": 284740 + }, + { + "epoch": 0.67, + "grad_norm": 2.640625, + "learning_rate": 0.00014953284900483767, + "loss": 2.0624, + "step": 284745 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.00014953124337494176, + "loss": 2.1925, + "step": 284750 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.0001495296377281251, + "loss": 2.1718, + "step": 284755 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014952803206438818, + "loss": 2.161, + "step": 284760 + }, + { + "epoch": 0.67, + "grad_norm": 2.28125, + "learning_rate": 0.00014952642638373158, + "loss": 2.0018, + "step": 284765 + }, + { + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 0.00014952482068615583, + "loss": 1.9566, + "step": 284770 + }, + { + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 0.00014952321497166152, + "loss": 2.1449, + "step": 284775 + }, + { + "epoch": 0.67, + "grad_norm": 1.890625, + "learning_rate": 0.00014952160924024924, + "loss": 2.0833, + "step": 284780 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014952000349191942, + "loss": 1.873, + "step": 284785 + }, + { + "epoch": 0.67, + "grad_norm": 1.9140625, + "learning_rate": 0.00014951839772667268, + "loss": 2.0323, + "step": 284790 + }, + { + "epoch": 0.67, + "grad_norm": 1.875, + "learning_rate": 0.00014951679194450957, + "loss": 1.9167, + "step": 284795 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.0001495151861454306, + "loss": 1.9089, + "step": 284800 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014951358032943635, + "loss": 2.0343, + "step": 284805 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.00014951197449652738, + "loss": 2.0829, + "step": 284810 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014951036864670422, + "loss": 2.0742, + "step": 284815 + }, + { + "epoch": 0.67, + "grad_norm": 2.75, + "learning_rate": 0.00014950876277996744, + "loss": 2.0549, + "step": 284820 + }, + { + "epoch": 0.67, + "grad_norm": 2.484375, + "learning_rate": 0.00014950715689631753, + "loss": 2.2657, + "step": 284825 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.0001495055509957551, + "loss": 2.0292, + "step": 284830 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.00014950394507828066, + "loss": 2.0655, + "step": 284835 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014950233914389478, + "loss": 1.9157, + "step": 284840 + }, + { + "epoch": 0.67, + "grad_norm": 1.953125, + "learning_rate": 0.000149500733192598, + "loss": 1.8636, + "step": 284845 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.0001494991272243909, + "loss": 2.1721, + "step": 284850 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014949752123927397, + "loss": 2.1427, + "step": 284855 + }, + { + "epoch": 0.67, + "grad_norm": 2.546875, + "learning_rate": 0.0001494959152372478, + "loss": 1.9416, + "step": 284860 + }, + { + "epoch": 0.67, + "grad_norm": 1.7109375, + "learning_rate": 0.00014949430921831292, + "loss": 2.193, + "step": 284865 + }, + { + "epoch": 0.67, + "grad_norm": 3.203125, + "learning_rate": 0.00014949270318246992, + "loss": 1.9905, + "step": 284870 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.00014949109712971928, + "loss": 2.2089, + "step": 284875 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014948949106006156, + "loss": 1.8498, + "step": 284880 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.00014948788497349736, + "loss": 2.2484, + "step": 284885 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.0001494862788700272, + "loss": 2.168, + "step": 284890 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014948467274965166, + "loss": 2.0159, + "step": 284895 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014948306661237122, + "loss": 1.9494, + "step": 284900 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014948146045818645, + "loss": 2.0689, + "step": 284905 + }, + { + "epoch": 0.67, + "grad_norm": 1.78125, + "learning_rate": 0.00014947985428709795, + "loss": 1.9392, + "step": 284910 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014947824809910624, + "loss": 2.1445, + "step": 284915 + }, + { + "epoch": 0.67, + "grad_norm": 1.9375, + "learning_rate": 0.00014947664189421186, + "loss": 2.3029, + "step": 284920 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014947503567241537, + "loss": 2.092, + "step": 284925 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.00014947342943371727, + "loss": 2.1465, + "step": 284930 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00014947182317811815, + "loss": 2.2725, + "step": 284935 + }, + { + "epoch": 0.67, + "grad_norm": 2.875, + "learning_rate": 0.00014947021690561862, + "loss": 2.2341, + "step": 284940 + }, + { + "epoch": 0.67, + "grad_norm": 1.65625, + "learning_rate": 0.0001494686106162191, + "loss": 1.9781, + "step": 284945 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014946700430992026, + "loss": 2.1043, + "step": 284950 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014946539798672256, + "loss": 2.1627, + "step": 284955 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014946379164662658, + "loss": 1.9966, + "step": 284960 + }, + { + "epoch": 0.67, + "grad_norm": 1.84375, + "learning_rate": 0.00014946218528963287, + "loss": 1.9962, + "step": 284965 + }, + { + "epoch": 0.67, + "grad_norm": 1.78125, + "learning_rate": 0.000149460578915742, + "loss": 1.9825, + "step": 284970 + }, + { + "epoch": 0.67, + "grad_norm": 1.9296875, + "learning_rate": 0.0001494589725249545, + "loss": 2.0495, + "step": 284975 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.00014945736611727092, + "loss": 2.099, + "step": 284980 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014945575969269178, + "loss": 2.0245, + "step": 284985 + }, + { + "epoch": 0.67, + "grad_norm": 2.4375, + "learning_rate": 0.0001494541532512177, + "loss": 2.1754, + "step": 284990 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014945254679284912, + "loss": 2.1451, + "step": 284995 + }, + { + "epoch": 0.67, + "grad_norm": 2.28125, + "learning_rate": 0.00014945094031758668, + "loss": 2.1734, + "step": 285000 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.00014944933382543093, + "loss": 2.2233, + "step": 285005 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.0001494477273163824, + "loss": 2.0507, + "step": 285010 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.00014944612079044158, + "loss": 2.0729, + "step": 285015 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014944451424760908, + "loss": 1.9555, + "step": 285020 + }, + { + "epoch": 0.67, + "grad_norm": 1.90625, + "learning_rate": 0.00014944290768788545, + "loss": 2.0287, + "step": 285025 + }, + { + "epoch": 0.67, + "grad_norm": 2.9375, + "learning_rate": 0.00014944130111127125, + "loss": 2.0694, + "step": 285030 + }, + { + "epoch": 0.67, + "grad_norm": 1.75, + "learning_rate": 0.00014943969451776696, + "loss": 2.1913, + "step": 285035 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00014943808790737317, + "loss": 2.2566, + "step": 285040 + }, + { + "epoch": 0.67, + "grad_norm": 1.96875, + "learning_rate": 0.00014943648128009046, + "loss": 2.034, + "step": 285045 + }, + { + "epoch": 0.67, + "grad_norm": 1.921875, + "learning_rate": 0.00014943487463591937, + "loss": 2.207, + "step": 285050 + }, + { + "epoch": 0.67, + "grad_norm": 2.5625, + "learning_rate": 0.0001494332679748604, + "loss": 2.0535, + "step": 285055 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.00014943166129691412, + "loss": 1.9971, + "step": 285060 + }, + { + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 0.00014943005460208111, + "loss": 2.0744, + "step": 285065 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014942844789036188, + "loss": 2.0194, + "step": 285070 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014942684116175703, + "loss": 2.1271, + "step": 285075 + }, + { + "epoch": 0.67, + "grad_norm": 1.9140625, + "learning_rate": 0.00014942523441626704, + "loss": 1.9973, + "step": 285080 + }, + { + "epoch": 0.67, + "grad_norm": 2.0, + "learning_rate": 0.00014942362765389252, + "loss": 2.3208, + "step": 285085 + }, + { + "epoch": 0.67, + "grad_norm": 2.28125, + "learning_rate": 0.00014942202087463396, + "loss": 2.0323, + "step": 285090 + }, + { + "epoch": 0.67, + "grad_norm": 1.8984375, + "learning_rate": 0.00014942041407849197, + "loss": 2.0312, + "step": 285095 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.00014941880726546705, + "loss": 2.1805, + "step": 285100 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014941720043555977, + "loss": 2.1901, + "step": 285105 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014941559358877068, + "loss": 2.0915, + "step": 285110 + }, + { + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 0.00014941398672510035, + "loss": 2.0581, + "step": 285115 + }, + { + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 0.00014941237984454929, + "loss": 2.0184, + "step": 285120 + }, + { + "epoch": 0.67, + "grad_norm": 2.546875, + "learning_rate": 0.00014941077294711806, + "loss": 2.0523, + "step": 285125 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00014940916603280722, + "loss": 2.0019, + "step": 285130 + }, + { + "epoch": 0.67, + "grad_norm": 1.84375, + "learning_rate": 0.0001494075591016173, + "loss": 1.9233, + "step": 285135 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014940595215354887, + "loss": 2.0573, + "step": 285140 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014940434518860246, + "loss": 1.9516, + "step": 285145 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014940273820677867, + "loss": 2.1229, + "step": 285150 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.00014940113120807797, + "loss": 2.132, + "step": 285155 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014939952419250097, + "loss": 2.0995, + "step": 285160 + }, + { + "epoch": 0.67, + "grad_norm": 2.0, + "learning_rate": 0.0001493979171600482, + "loss": 2.0987, + "step": 285165 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.00014939631011072016, + "loss": 2.002, + "step": 285170 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014939470304451747, + "loss": 2.0334, + "step": 285175 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014939309596144067, + "loss": 2.077, + "step": 285180 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.0001493914888614903, + "loss": 2.0619, + "step": 285185 + }, + { + "epoch": 0.67, + "grad_norm": 2.484375, + "learning_rate": 0.00014938988174466688, + "loss": 2.0983, + "step": 285190 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.00014938827461097098, + "loss": 2.2156, + "step": 285195 + }, + { + "epoch": 0.67, + "grad_norm": 2.59375, + "learning_rate": 0.00014938666746040313, + "loss": 2.0806, + "step": 285200 + }, + { + "epoch": 0.67, + "grad_norm": 1.7578125, + "learning_rate": 0.00014938506029296393, + "loss": 2.0568, + "step": 285205 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.0001493834531086539, + "loss": 2.118, + "step": 285210 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014938184590747357, + "loss": 2.0075, + "step": 285215 + }, + { + "epoch": 0.67, + "grad_norm": 1.90625, + "learning_rate": 0.00014938023868942351, + "loss": 2.1031, + "step": 285220 + }, + { + "epoch": 0.67, + "grad_norm": 1.96875, + "learning_rate": 0.00014937863145450428, + "loss": 2.092, + "step": 285225 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.0001493770242027164, + "loss": 1.877, + "step": 285230 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.00014937541693406042, + "loss": 2.129, + "step": 285235 + }, + { + "epoch": 0.67, + "grad_norm": 2.0, + "learning_rate": 0.00014937380964853694, + "loss": 2.1432, + "step": 285240 + }, + { + "epoch": 0.67, + "grad_norm": 1.9765625, + "learning_rate": 0.00014937220234614645, + "loss": 2.0416, + "step": 285245 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.0001493705950268895, + "loss": 2.014, + "step": 285250 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.0001493689876907667, + "loss": 2.3325, + "step": 285255 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014936738033777852, + "loss": 2.01, + "step": 285260 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.00014936577296792555, + "loss": 2.0111, + "step": 285265 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014936416558120836, + "loss": 2.2066, + "step": 285270 + }, + { + "epoch": 0.67, + "grad_norm": 2.765625, + "learning_rate": 0.00014936255817762746, + "loss": 1.9998, + "step": 285275 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014936095075718342, + "loss": 2.1258, + "step": 285280 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.0001493593433198768, + "loss": 2.0108, + "step": 285285 + }, + { + "epoch": 0.67, + "grad_norm": 1.9375, + "learning_rate": 0.0001493577358657081, + "loss": 1.9463, + "step": 285290 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014935612839467793, + "loss": 2.2392, + "step": 285295 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014935452090678681, + "loss": 2.2141, + "step": 285300 + }, + { + "epoch": 0.67, + "grad_norm": 1.9921875, + "learning_rate": 0.00014935291340203527, + "loss": 1.9908, + "step": 285305 + }, + { + "epoch": 0.67, + "grad_norm": 1.8671875, + "learning_rate": 0.0001493513058804239, + "loss": 1.8483, + "step": 285310 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.0001493496983419532, + "loss": 2.3083, + "step": 285315 + }, + { + "epoch": 0.67, + "grad_norm": 1.875, + "learning_rate": 0.00014934809078662376, + "loss": 1.9466, + "step": 285320 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014934648321443617, + "loss": 1.9668, + "step": 285325 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.0001493448756253909, + "loss": 2.064, + "step": 285330 + }, + { + "epoch": 0.67, + "grad_norm": 1.9140625, + "learning_rate": 0.00014934326801948846, + "loss": 1.9551, + "step": 285335 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.0001493416603967295, + "loss": 2.0287, + "step": 285340 + }, + { + "epoch": 0.67, + "grad_norm": 2.0, + "learning_rate": 0.00014934005275711457, + "loss": 2.1311, + "step": 285345 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.0001493384451006442, + "loss": 1.8585, + "step": 285350 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014933683742731886, + "loss": 2.088, + "step": 285355 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.00014933522973713915, + "loss": 1.9681, + "step": 285360 + }, + { + "epoch": 0.67, + "grad_norm": 2.421875, + "learning_rate": 0.00014933362203010567, + "loss": 2.2422, + "step": 285365 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014933201430621896, + "loss": 1.9644, + "step": 285370 + }, + { + "epoch": 0.67, + "grad_norm": 2.0, + "learning_rate": 0.0001493304065654795, + "loss": 1.9913, + "step": 285375 + }, + { + "epoch": 0.67, + "grad_norm": 2.65625, + "learning_rate": 0.00014932879880788787, + "loss": 2.03, + "step": 285380 + }, + { + "epoch": 0.67, + "grad_norm": 2.671875, + "learning_rate": 0.00014932719103344464, + "loss": 1.9798, + "step": 285385 + }, + { + "epoch": 0.67, + "grad_norm": 2.59375, + "learning_rate": 0.00014932558324215033, + "loss": 2.0829, + "step": 285390 + }, + { + "epoch": 0.67, + "grad_norm": 2.453125, + "learning_rate": 0.00014932397543400555, + "loss": 2.1369, + "step": 285395 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.00014932236760901077, + "loss": 2.2261, + "step": 285400 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014932075976716657, + "loss": 2.046, + "step": 285405 + }, + { + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 0.0001493191519084735, + "loss": 2.1696, + "step": 285410 + }, + { + "epoch": 0.67, + "grad_norm": 1.9765625, + "learning_rate": 0.00014931754403293217, + "loss": 1.9055, + "step": 285415 + }, + { + "epoch": 0.67, + "grad_norm": 1.8984375, + "learning_rate": 0.00014931593614054303, + "loss": 2.1825, + "step": 285420 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014931432823130666, + "loss": 1.9982, + "step": 285425 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014931272030522364, + "loss": 2.0837, + "step": 285430 + }, + { + "epoch": 0.67, + "grad_norm": 2.453125, + "learning_rate": 0.0001493111123622945, + "loss": 2.0892, + "step": 285435 + }, + { + "epoch": 0.67, + "grad_norm": 2.65625, + "learning_rate": 0.00014930950440251978, + "loss": 1.9648, + "step": 285440 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014930789642590003, + "loss": 2.0652, + "step": 285445 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014930628843243585, + "loss": 2.049, + "step": 285450 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00014930468042212772, + "loss": 2.0631, + "step": 285455 + }, + { + "epoch": 0.67, + "grad_norm": 1.8984375, + "learning_rate": 0.00014930307239497623, + "loss": 2.0404, + "step": 285460 + }, + { + "epoch": 0.67, + "grad_norm": 2.609375, + "learning_rate": 0.00014930146435098189, + "loss": 2.2594, + "step": 285465 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014929985629014527, + "loss": 2.0785, + "step": 285470 + }, + { + "epoch": 0.67, + "grad_norm": 2.484375, + "learning_rate": 0.00014929824821246696, + "loss": 1.9794, + "step": 285475 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.00014929664011794746, + "loss": 2.1382, + "step": 285480 + }, + { + "epoch": 0.67, + "grad_norm": 2.421875, + "learning_rate": 0.00014929503200658735, + "loss": 1.9623, + "step": 285485 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014929342387838714, + "loss": 2.0935, + "step": 285490 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.0001492918157333474, + "loss": 2.0266, + "step": 285495 + }, + { + "epoch": 0.67, + "grad_norm": 1.8828125, + "learning_rate": 0.00014929020757146867, + "loss": 2.0317, + "step": 285500 + }, + { + "epoch": 0.67, + "grad_norm": 1.921875, + "learning_rate": 0.00014928859939275158, + "loss": 1.9559, + "step": 285505 + }, + { + "epoch": 0.67, + "grad_norm": 2.421875, + "learning_rate": 0.00014928699119719653, + "loss": 2.0004, + "step": 285510 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.0001492853829848042, + "loss": 2.1329, + "step": 285515 + }, + { + "epoch": 0.67, + "grad_norm": 1.921875, + "learning_rate": 0.00014928377475557504, + "loss": 1.9545, + "step": 285520 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.0001492821665095097, + "loss": 1.9827, + "step": 285525 + }, + { + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 0.00014928055824660867, + "loss": 2.0144, + "step": 285530 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014927894996687248, + "loss": 2.09, + "step": 285535 + }, + { + "epoch": 0.67, + "grad_norm": 1.6875, + "learning_rate": 0.0001492773416703017, + "loss": 1.7355, + "step": 285540 + }, + { + "epoch": 0.67, + "grad_norm": 2.515625, + "learning_rate": 0.00014927573335689695, + "loss": 1.8066, + "step": 285545 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014927412502665867, + "loss": 2.1735, + "step": 285550 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014927251667958745, + "loss": 2.0498, + "step": 285555 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.00014927090831568386, + "loss": 1.9479, + "step": 285560 + }, + { + "epoch": 0.67, + "grad_norm": 1.9140625, + "learning_rate": 0.00014926929993494843, + "loss": 2.0133, + "step": 285565 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.0001492676915373817, + "loss": 2.1224, + "step": 285570 + }, + { + "epoch": 0.67, + "grad_norm": 1.65625, + "learning_rate": 0.00014926608312298428, + "loss": 1.9344, + "step": 285575 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.0001492644746917566, + "loss": 2.1833, + "step": 285580 + }, + { + "epoch": 0.67, + "grad_norm": 3.203125, + "learning_rate": 0.00014926286624369935, + "loss": 1.9728, + "step": 285585 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014926125777881298, + "loss": 2.0455, + "step": 285590 + }, + { + "epoch": 0.67, + "grad_norm": 2.609375, + "learning_rate": 0.00014925964929709805, + "loss": 2.1661, + "step": 285595 + }, + { + "epoch": 0.67, + "grad_norm": 2.5, + "learning_rate": 0.0001492580407985552, + "loss": 2.051, + "step": 285600 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014925643228318482, + "loss": 1.984, + "step": 285605 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.0001492548237509876, + "loss": 2.0661, + "step": 285610 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.000149253215201964, + "loss": 2.1767, + "step": 285615 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014925160663611467, + "loss": 1.9278, + "step": 285620 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014924999805344008, + "loss": 2.0344, + "step": 285625 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.00014924838945394074, + "loss": 2.006, + "step": 285630 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00014924678083761732, + "loss": 1.995, + "step": 285635 + }, + { + "epoch": 0.67, + "grad_norm": 1.9921875, + "learning_rate": 0.00014924517220447033, + "loss": 2.1624, + "step": 285640 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014924356355450023, + "loss": 1.8534, + "step": 285645 + }, + { + "epoch": 0.67, + "grad_norm": 1.765625, + "learning_rate": 0.00014924195488770766, + "loss": 2.1462, + "step": 285650 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014924034620409312, + "loss": 2.0181, + "step": 285655 + }, + { + "epoch": 0.67, + "grad_norm": 1.9296875, + "learning_rate": 0.00014923873750365723, + "loss": 2.0077, + "step": 285660 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014923712878640047, + "loss": 1.975, + "step": 285665 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014923552005232345, + "loss": 1.9959, + "step": 285670 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014923391130142663, + "loss": 2.0228, + "step": 285675 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00014923230253371063, + "loss": 2.0569, + "step": 285680 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.000149230693749176, + "loss": 2.0817, + "step": 285685 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.00014922908494782326, + "loss": 2.0941, + "step": 285690 + }, + { + "epoch": 0.67, + "grad_norm": 2.90625, + "learning_rate": 0.00014922747612965294, + "loss": 2.1912, + "step": 285695 + }, + { + "epoch": 0.67, + "grad_norm": 1.875, + "learning_rate": 0.00014922586729466567, + "loss": 2.0319, + "step": 285700 + }, + { + "epoch": 0.67, + "grad_norm": 2.65625, + "learning_rate": 0.00014922425844286194, + "loss": 1.9665, + "step": 285705 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.0001492226495742423, + "loss": 2.1544, + "step": 285710 + }, + { + "epoch": 0.67, + "grad_norm": 2.59375, + "learning_rate": 0.0001492210406888073, + "loss": 2.015, + "step": 285715 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00014921943178655754, + "loss": 2.3014, + "step": 285720 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.0001492178228674935, + "loss": 2.0575, + "step": 285725 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014921621393161575, + "loss": 2.0531, + "step": 285730 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014921460497892486, + "loss": 2.1194, + "step": 285735 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.0001492129960094214, + "loss": 2.0892, + "step": 285740 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014921138702310584, + "loss": 2.0918, + "step": 285745 + }, + { + "epoch": 0.67, + "grad_norm": 2.5625, + "learning_rate": 0.0001492097780199788, + "loss": 2.1344, + "step": 285750 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.0001492081690000408, + "loss": 2.0247, + "step": 285755 + }, + { + "epoch": 0.67, + "grad_norm": 2.4375, + "learning_rate": 0.0001492065599632924, + "loss": 2.0623, + "step": 285760 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014920495090973415, + "loss": 2.0308, + "step": 285765 + }, + { + "epoch": 0.67, + "grad_norm": 1.6875, + "learning_rate": 0.00014920334183936656, + "loss": 2.1371, + "step": 285770 + }, + { + "epoch": 0.67, + "grad_norm": 3.0625, + "learning_rate": 0.00014920173275219026, + "loss": 2.0054, + "step": 285775 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00014920012364820576, + "loss": 2.0855, + "step": 285780 + }, + { + "epoch": 0.67, + "grad_norm": 2.6875, + "learning_rate": 0.0001491985145274136, + "loss": 2.2298, + "step": 285785 + }, + { + "epoch": 0.67, + "grad_norm": 2.90625, + "learning_rate": 0.0001491969053898143, + "loss": 2.1307, + "step": 285790 + }, + { + "epoch": 0.67, + "grad_norm": 2.484375, + "learning_rate": 0.0001491952962354085, + "loss": 2.1149, + "step": 285795 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.00014919368706419665, + "loss": 2.0503, + "step": 285800 + }, + { + "epoch": 0.67, + "grad_norm": 1.9296875, + "learning_rate": 0.00014919207787617936, + "loss": 2.1975, + "step": 285805 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014919046867135718, + "loss": 2.1021, + "step": 285810 + }, + { + "epoch": 0.67, + "grad_norm": 1.859375, + "learning_rate": 0.0001491888594497306, + "loss": 1.8436, + "step": 285815 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.00014918725021130025, + "loss": 2.1674, + "step": 285820 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.00014918564095606665, + "loss": 1.9608, + "step": 285825 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014918403168403033, + "loss": 2.0427, + "step": 285830 + }, + { + "epoch": 0.67, + "grad_norm": 1.9609375, + "learning_rate": 0.00014918242239519186, + "loss": 2.0082, + "step": 285835 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.00014918081308955177, + "loss": 2.163, + "step": 285840 + }, + { + "epoch": 0.67, + "grad_norm": 2.609375, + "learning_rate": 0.0001491792037671106, + "loss": 2.0499, + "step": 285845 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014917759442786897, + "loss": 1.9098, + "step": 285850 + }, + { + "epoch": 0.67, + "grad_norm": 2.484375, + "learning_rate": 0.00014917598507182734, + "loss": 1.95, + "step": 285855 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00014917437569898634, + "loss": 2.1404, + "step": 285860 + }, + { + "epoch": 0.67, + "grad_norm": 1.8203125, + "learning_rate": 0.00014917276630934645, + "loss": 1.8905, + "step": 285865 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.00014917115690290825, + "loss": 2.0998, + "step": 285870 + }, + { + "epoch": 0.67, + "grad_norm": 1.75, + "learning_rate": 0.00014916954747967232, + "loss": 1.8316, + "step": 285875 + }, + { + "epoch": 0.67, + "grad_norm": 1.8671875, + "learning_rate": 0.00014916793803963915, + "loss": 2.1059, + "step": 285880 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014916632858280935, + "loss": 2.253, + "step": 285885 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.0001491647191091834, + "loss": 1.9479, + "step": 285890 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.0001491631096187619, + "loss": 2.0529, + "step": 285895 + }, + { + "epoch": 0.67, + "grad_norm": 2.28125, + "learning_rate": 0.00014916150011154544, + "loss": 1.9682, + "step": 285900 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.0001491598905875345, + "loss": 1.9603, + "step": 285905 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014915828104672963, + "loss": 1.9347, + "step": 285910 + }, + { + "epoch": 0.67, + "grad_norm": 2.65625, + "learning_rate": 0.00014915667148913142, + "loss": 2.0571, + "step": 285915 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.0001491550619147404, + "loss": 2.0906, + "step": 285920 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00014915345232355707, + "loss": 2.154, + "step": 285925 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014915184271558206, + "loss": 2.1482, + "step": 285930 + }, + { + "epoch": 0.67, + "grad_norm": 1.9765625, + "learning_rate": 0.0001491502330908159, + "loss": 2.1995, + "step": 285935 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014914862344925914, + "loss": 2.096, + "step": 285940 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.0001491470137909123, + "loss": 2.0986, + "step": 285945 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.0001491454041157759, + "loss": 2.2783, + "step": 285950 + }, + { + "epoch": 0.67, + "grad_norm": 2.515625, + "learning_rate": 0.00014914379442385061, + "loss": 2.1464, + "step": 285955 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.0001491421847151369, + "loss": 2.1225, + "step": 285960 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.00014914057498963533, + "loss": 1.8193, + "step": 285965 + }, + { + "epoch": 0.67, + "grad_norm": 2.75, + "learning_rate": 0.00014913896524734643, + "loss": 1.9793, + "step": 285970 + }, + { + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 0.00014913735548827077, + "loss": 1.8763, + "step": 285975 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.0001491357457124089, + "loss": 2.003, + "step": 285980 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014913413591976138, + "loss": 2.0456, + "step": 285985 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00014913252611032873, + "loss": 1.9706, + "step": 285990 + }, + { + "epoch": 0.67, + "grad_norm": 1.9296875, + "learning_rate": 0.00014913091628411152, + "loss": 2.105, + "step": 285995 + }, + { + "epoch": 0.67, + "grad_norm": 1.7421875, + "learning_rate": 0.0001491293064411103, + "loss": 2.0566, + "step": 286000 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014912769658132562, + "loss": 1.9846, + "step": 286005 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014912608670475803, + "loss": 2.2364, + "step": 286010 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00014912447681140806, + "loss": 2.0184, + "step": 286015 + }, + { + "epoch": 0.67, + "grad_norm": 2.484375, + "learning_rate": 0.0001491228669012763, + "loss": 2.0162, + "step": 286020 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00014912125697436328, + "loss": 2.0216, + "step": 286025 + }, + { + "epoch": 0.67, + "grad_norm": 1.7734375, + "learning_rate": 0.00014911964703066954, + "loss": 1.8627, + "step": 286030 + }, + { + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 0.00014911803707019563, + "loss": 2.0433, + "step": 286035 + }, + { + "epoch": 0.67, + "grad_norm": 2.640625, + "learning_rate": 0.00014911642709294212, + "loss": 2.2288, + "step": 286040 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014911481709890953, + "loss": 2.0113, + "step": 286045 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.00014911320708809847, + "loss": 1.9796, + "step": 286050 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.0001491115970605094, + "loss": 2.092, + "step": 286055 + }, + { + "epoch": 0.67, + "grad_norm": 1.96875, + "learning_rate": 0.00014910998701614294, + "loss": 1.9866, + "step": 286060 + }, + { + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 0.0001491083769549996, + "loss": 2.042, + "step": 286065 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00014910676687707996, + "loss": 1.8939, + "step": 286070 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.0001491051567823846, + "loss": 2.0911, + "step": 286075 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00014910354667091398, + "loss": 2.0269, + "step": 286080 + }, + { + "epoch": 0.67, + "grad_norm": 2.71875, + "learning_rate": 0.00014910193654266867, + "loss": 2.0927, + "step": 286085 + }, + { + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 0.00014910032639764927, + "loss": 1.8961, + "step": 286090 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014909871623585636, + "loss": 2.128, + "step": 286095 + }, + { + "epoch": 0.67, + "grad_norm": 2.453125, + "learning_rate": 0.0001490971060572904, + "loss": 1.916, + "step": 286100 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.00014909549586195197, + "loss": 2.1098, + "step": 286105 + }, + { + "epoch": 0.67, + "grad_norm": 1.9296875, + "learning_rate": 0.00014909388564984162, + "loss": 2.1127, + "step": 286110 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.00014909227542095994, + "loss": 1.9936, + "step": 286115 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014909066517530744, + "loss": 1.9629, + "step": 286120 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014908905491288469, + "loss": 2.0824, + "step": 286125 + }, + { + "epoch": 0.67, + "grad_norm": 1.890625, + "learning_rate": 0.00014908744463369217, + "loss": 2.0301, + "step": 286130 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014908583433773052, + "loss": 2.1512, + "step": 286135 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.00014908422402500028, + "loss": 2.0153, + "step": 286140 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014908261369550195, + "loss": 2.0511, + "step": 286145 + }, + { + "epoch": 0.67, + "grad_norm": 2.5625, + "learning_rate": 0.00014908100334923614, + "loss": 2.1, + "step": 286150 + }, + { + "epoch": 0.67, + "grad_norm": 2.0, + "learning_rate": 0.00014907939298620338, + "loss": 1.862, + "step": 286155 + }, + { + "epoch": 0.67, + "grad_norm": 2.578125, + "learning_rate": 0.00014907778260640417, + "loss": 1.8825, + "step": 286160 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.0001490761722098391, + "loss": 2.1039, + "step": 286165 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.00014907456179650874, + "loss": 1.8696, + "step": 286170 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.0001490729513664136, + "loss": 2.0303, + "step": 286175 + }, + { + "epoch": 0.67, + "grad_norm": 1.828125, + "learning_rate": 0.00014907134091955425, + "loss": 1.9936, + "step": 286180 + }, + { + "epoch": 0.67, + "grad_norm": 1.859375, + "learning_rate": 0.00014906973045593127, + "loss": 1.9465, + "step": 286185 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014906811997554515, + "loss": 2.1803, + "step": 286190 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014906650947839648, + "loss": 2.0062, + "step": 286195 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.0001490648989644858, + "loss": 1.9475, + "step": 286200 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014906328843381368, + "loss": 2.1886, + "step": 286205 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014906167788638066, + "loss": 2.1699, + "step": 286210 + }, + { + "epoch": 0.67, + "grad_norm": 2.734375, + "learning_rate": 0.00014906006732218725, + "loss": 2.1163, + "step": 286215 + }, + { + "epoch": 0.67, + "grad_norm": 2.609375, + "learning_rate": 0.000149058456741234, + "loss": 2.2578, + "step": 286220 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014905684614352155, + "loss": 2.2271, + "step": 286225 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.00014905523552905035, + "loss": 1.8495, + "step": 286230 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00014905362489782103, + "loss": 2.0842, + "step": 286235 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014905201424983408, + "loss": 1.9964, + "step": 286240 + }, + { + "epoch": 0.67, + "grad_norm": 2.765625, + "learning_rate": 0.00014905040358509006, + "loss": 2.0403, + "step": 286245 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014904879290358956, + "loss": 2.1392, + "step": 286250 + }, + { + "epoch": 0.67, + "grad_norm": 1.9453125, + "learning_rate": 0.0001490471822053331, + "loss": 1.8434, + "step": 286255 + }, + { + "epoch": 0.67, + "grad_norm": 2.625, + "learning_rate": 0.0001490455714903212, + "loss": 1.9977, + "step": 286260 + }, + { + "epoch": 0.67, + "grad_norm": 2.671875, + "learning_rate": 0.0001490439607585545, + "loss": 2.0133, + "step": 286265 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014904235001003344, + "loss": 2.0551, + "step": 286270 + }, + { + "epoch": 0.67, + "grad_norm": 1.6171875, + "learning_rate": 0.00014904073924475867, + "loss": 2.0038, + "step": 286275 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014903912846273064, + "loss": 2.0177, + "step": 286280 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.00014903751766395, + "loss": 2.0335, + "step": 286285 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014903590684841724, + "loss": 2.1778, + "step": 286290 + }, + { + "epoch": 0.67, + "grad_norm": 1.96875, + "learning_rate": 0.00014903429601613292, + "loss": 2.0288, + "step": 286295 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.0001490326851670976, + "loss": 2.1999, + "step": 286300 + }, + { + "epoch": 0.67, + "grad_norm": 1.53125, + "learning_rate": 0.0001490310743013118, + "loss": 1.9933, + "step": 286305 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.00014902946341877613, + "loss": 2.2019, + "step": 286310 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.0001490278525194911, + "loss": 2.0162, + "step": 286315 + }, + { + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 0.00014902624160345726, + "loss": 2.0445, + "step": 286320 + }, + { + "epoch": 0.67, + "grad_norm": 1.890625, + "learning_rate": 0.00014902463067067515, + "loss": 2.2054, + "step": 286325 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014902301972114537, + "loss": 2.1599, + "step": 286330 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014902140875486844, + "loss": 2.0099, + "step": 286335 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014901979777184486, + "loss": 1.9726, + "step": 286340 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.0001490181867720753, + "loss": 2.1637, + "step": 286345 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.0001490165757555602, + "loss": 2.066, + "step": 286350 + }, + { + "epoch": 0.67, + "grad_norm": 2.0, + "learning_rate": 0.0001490149647223001, + "loss": 2.1277, + "step": 286355 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014901335367229566, + "loss": 2.051, + "step": 286360 + }, + { + "epoch": 0.67, + "grad_norm": 1.8515625, + "learning_rate": 0.00014901174260554737, + "loss": 2.1448, + "step": 286365 + }, + { + "epoch": 0.67, + "grad_norm": 2.796875, + "learning_rate": 0.00014901013152205576, + "loss": 1.9897, + "step": 286370 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.0001490085204218214, + "loss": 2.1118, + "step": 286375 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014900690930484483, + "loss": 2.243, + "step": 286380 + }, + { + "epoch": 0.67, + "grad_norm": 1.9765625, + "learning_rate": 0.00014900529817112664, + "loss": 2.0834, + "step": 286385 + }, + { + "epoch": 0.67, + "grad_norm": 2.5, + "learning_rate": 0.00014900368702066734, + "loss": 2.0903, + "step": 286390 + }, + { + "epoch": 0.67, + "grad_norm": 2.765625, + "learning_rate": 0.00014900207585346752, + "loss": 2.1335, + "step": 286395 + }, + { + "epoch": 0.67, + "grad_norm": 1.90625, + "learning_rate": 0.00014900046466952764, + "loss": 2.0855, + "step": 286400 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.00014899885346884831, + "loss": 2.0914, + "step": 286405 + }, + { + "epoch": 0.67, + "grad_norm": 2.015625, + "learning_rate": 0.00014899724225143015, + "loss": 2.0239, + "step": 286410 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.0001489956310172736, + "loss": 2.1873, + "step": 286415 + }, + { + "epoch": 0.67, + "grad_norm": 3.203125, + "learning_rate": 0.00014899401976637927, + "loss": 2.2065, + "step": 286420 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00014899240849874769, + "loss": 2.0292, + "step": 286425 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.0001489907972143794, + "loss": 2.1157, + "step": 286430 + }, + { + "epoch": 0.67, + "grad_norm": 3.0, + "learning_rate": 0.000148989185913275, + "loss": 2.0965, + "step": 286435 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014898757459543498, + "loss": 2.0459, + "step": 286440 + }, + { + "epoch": 0.67, + "grad_norm": 1.7265625, + "learning_rate": 0.0001489859632608599, + "loss": 2.076, + "step": 286445 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.00014898435190955036, + "loss": 2.2639, + "step": 286450 + }, + { + "epoch": 0.67, + "grad_norm": 2.15625, + "learning_rate": 0.00014898274054150686, + "loss": 2.1635, + "step": 286455 + }, + { + "epoch": 0.67, + "grad_norm": 1.9453125, + "learning_rate": 0.00014898112915672998, + "loss": 2.1584, + "step": 286460 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014897951775522022, + "loss": 1.934, + "step": 286465 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014897790633697824, + "loss": 2.1097, + "step": 286470 + }, + { + "epoch": 0.67, + "grad_norm": 1.8125, + "learning_rate": 0.00014897629490200447, + "loss": 2.0175, + "step": 286475 + }, + { + "epoch": 0.67, + "grad_norm": 1.9140625, + "learning_rate": 0.0001489746834502995, + "loss": 2.0678, + "step": 286480 + }, + { + "epoch": 0.67, + "grad_norm": 2.171875, + "learning_rate": 0.0001489730719818639, + "loss": 2.0914, + "step": 286485 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014897146049669822, + "loss": 2.2293, + "step": 286490 + }, + { + "epoch": 0.67, + "grad_norm": 1.9296875, + "learning_rate": 0.000148969848994803, + "loss": 2.0633, + "step": 286495 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014896823747617878, + "loss": 2.0015, + "step": 286500 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014896662594082616, + "loss": 2.0683, + "step": 286505 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014896501438874563, + "loss": 2.045, + "step": 286510 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014896340281993773, + "loss": 2.1251, + "step": 286515 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.00014896179123440306, + "loss": 2.1164, + "step": 286520 + }, + { + "epoch": 0.67, + "grad_norm": 2.0625, + "learning_rate": 0.0001489601796321422, + "loss": 2.0994, + "step": 286525 + }, + { + "epoch": 0.67, + "grad_norm": 2.390625, + "learning_rate": 0.0001489585680131556, + "loss": 2.0398, + "step": 286530 + }, + { + "epoch": 0.67, + "grad_norm": 2.3125, + "learning_rate": 0.0001489569563774439, + "loss": 1.9845, + "step": 286535 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014895534472500756, + "loss": 2.1545, + "step": 286540 + }, + { + "epoch": 0.67, + "grad_norm": 2.234375, + "learning_rate": 0.00014895373305584723, + "loss": 1.9729, + "step": 286545 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00014895212136996344, + "loss": 1.8699, + "step": 286550 + }, + { + "epoch": 0.67, + "grad_norm": 2.1875, + "learning_rate": 0.00014895050966735667, + "loss": 2.0259, + "step": 286555 + }, + { + "epoch": 0.67, + "grad_norm": 2.0, + "learning_rate": 0.00014894889794802755, + "loss": 2.0097, + "step": 286560 + }, + { + "epoch": 0.67, + "grad_norm": 1.8125, + "learning_rate": 0.00014894728621197659, + "loss": 2.0494, + "step": 286565 + }, + { + "epoch": 0.67, + "grad_norm": 2.296875, + "learning_rate": 0.00014894567445920435, + "loss": 1.9146, + "step": 286570 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.00014894406268971136, + "loss": 2.06, + "step": 286575 + }, + { + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 0.00014894245090349824, + "loss": 2.0192, + "step": 286580 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014894083910056545, + "loss": 1.9157, + "step": 286585 + }, + { + "epoch": 0.67, + "grad_norm": 2.0, + "learning_rate": 0.00014893922728091358, + "loss": 2.0819, + "step": 286590 + }, + { + "epoch": 0.67, + "grad_norm": 1.8359375, + "learning_rate": 0.0001489376154445432, + "loss": 1.9799, + "step": 286595 + }, + { + "epoch": 0.67, + "grad_norm": 1.84375, + "learning_rate": 0.00014893600359145484, + "loss": 2.1209, + "step": 286600 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.00014893439172164905, + "loss": 2.0662, + "step": 286605 + }, + { + "epoch": 0.67, + "grad_norm": 2.921875, + "learning_rate": 0.00014893277983512635, + "loss": 1.9604, + "step": 286610 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.0001489311679318874, + "loss": 2.0433, + "step": 286615 + }, + { + "epoch": 0.67, + "grad_norm": 2.34375, + "learning_rate": 0.0001489295560119326, + "loss": 2.0523, + "step": 286620 + }, + { + "epoch": 0.67, + "grad_norm": 2.28125, + "learning_rate": 0.00014892794407526263, + "loss": 2.0878, + "step": 286625 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014892633212187798, + "loss": 2.0061, + "step": 286630 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.00014892472015177918, + "loss": 1.9734, + "step": 286635 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014892310816496683, + "loss": 1.929, + "step": 286640 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014892149616144143, + "loss": 1.9946, + "step": 286645 + }, + { + "epoch": 0.67, + "grad_norm": 2.421875, + "learning_rate": 0.0001489198841412036, + "loss": 2.0981, + "step": 286650 + }, + { + "epoch": 0.67, + "grad_norm": 2.375, + "learning_rate": 0.00014891827210425381, + "loss": 2.3805, + "step": 286655 + }, + { + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 0.00014891666005059268, + "loss": 1.9942, + "step": 286660 + }, + { + "epoch": 0.67, + "grad_norm": 2.125, + "learning_rate": 0.00014891504798022077, + "loss": 1.9769, + "step": 286665 + }, + { + "epoch": 0.67, + "grad_norm": 1.9453125, + "learning_rate": 0.00014891343589313853, + "loss": 2.1354, + "step": 286670 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014891182378934658, + "loss": 1.8241, + "step": 286675 + }, + { + "epoch": 0.67, + "grad_norm": 3.421875, + "learning_rate": 0.00014891021166884546, + "loss": 2.1936, + "step": 286680 + }, + { + "epoch": 0.67, + "grad_norm": 2.328125, + "learning_rate": 0.00014890859953163577, + "loss": 1.9066, + "step": 286685 + }, + { + "epoch": 0.67, + "grad_norm": 2.109375, + "learning_rate": 0.00014890698737771797, + "loss": 2.2228, + "step": 286690 + }, + { + "epoch": 0.67, + "grad_norm": 2.09375, + "learning_rate": 0.00014890537520709268, + "loss": 1.9526, + "step": 286695 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.00014890376301976042, + "loss": 2.1585, + "step": 286700 + }, + { + "epoch": 0.67, + "grad_norm": 3.328125, + "learning_rate": 0.00014890215081572172, + "loss": 1.9834, + "step": 286705 + }, + { + "epoch": 0.67, + "grad_norm": 2.21875, + "learning_rate": 0.00014890053859497723, + "loss": 2.1535, + "step": 286710 + }, + { + "epoch": 0.67, + "grad_norm": 3.15625, + "learning_rate": 0.00014889892635752734, + "loss": 2.134, + "step": 286715 + }, + { + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 0.00014889731410337275, + "loss": 1.9773, + "step": 286720 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014889570183251394, + "loss": 2.2333, + "step": 286725 + }, + { + "epoch": 0.67, + "grad_norm": 2.546875, + "learning_rate": 0.00014889408954495145, + "loss": 2.0746, + "step": 286730 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014889247724068588, + "loss": 2.0421, + "step": 286735 + }, + { + "epoch": 0.67, + "grad_norm": 2.46875, + "learning_rate": 0.00014889086491971773, + "loss": 1.9059, + "step": 286740 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.0001488892525820476, + "loss": 2.1248, + "step": 286745 + }, + { + "epoch": 0.67, + "grad_norm": 2.046875, + "learning_rate": 0.000148887640227676, + "loss": 2.0575, + "step": 286750 + }, + { + "epoch": 0.67, + "grad_norm": 1.9765625, + "learning_rate": 0.00014888602785660348, + "loss": 2.0982, + "step": 286755 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014888441546883062, + "loss": 2.1581, + "step": 286760 + }, + { + "epoch": 0.67, + "grad_norm": 2.359375, + "learning_rate": 0.00014888280306435796, + "loss": 2.1631, + "step": 286765 + }, + { + "epoch": 0.67, + "grad_norm": 2.03125, + "learning_rate": 0.00014888119064318604, + "loss": 2.0094, + "step": 286770 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014887957820531545, + "loss": 2.162, + "step": 286775 + }, + { + "epoch": 0.67, + "grad_norm": 1.984375, + "learning_rate": 0.00014887796575074666, + "loss": 1.9946, + "step": 286780 + }, + { + "epoch": 0.67, + "grad_norm": 2.265625, + "learning_rate": 0.0001488763532794803, + "loss": 1.8707, + "step": 286785 + }, + { + "epoch": 0.67, + "grad_norm": 2.203125, + "learning_rate": 0.0001488747407915169, + "loss": 1.9873, + "step": 286790 + }, + { + "epoch": 0.67, + "grad_norm": 2.59375, + "learning_rate": 0.000148873128286857, + "loss": 2.0101, + "step": 286795 + }, + { + "epoch": 0.67, + "grad_norm": 2.28125, + "learning_rate": 0.00014887151576550113, + "loss": 2.0559, + "step": 286800 + }, + { + "epoch": 0.67, + "grad_norm": 2.140625, + "learning_rate": 0.00014886990322744988, + "loss": 1.9813, + "step": 286805 + }, + { + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 0.00014886829067270376, + "loss": 1.9829, + "step": 286810 + }, + { + "epoch": 0.67, + "grad_norm": 1.859375, + "learning_rate": 0.00014886667810126337, + "loss": 1.8183, + "step": 286815 + }, + { + "epoch": 0.67, + "grad_norm": 2.25, + "learning_rate": 0.00014886506551312924, + "loss": 1.9452, + "step": 286820 + }, + { + "epoch": 0.67, + "grad_norm": 1.8203125, + "learning_rate": 0.0001488634529083019, + "loss": 1.8934, + "step": 286825 + }, + { + "epoch": 0.68, + "grad_norm": 1.9921875, + "learning_rate": 0.00014886184028678194, + "loss": 2.1226, + "step": 286830 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014886022764856987, + "loss": 2.0742, + "step": 286835 + }, + { + "epoch": 0.68, + "grad_norm": 2.78125, + "learning_rate": 0.00014885861499366624, + "loss": 1.8283, + "step": 286840 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.00014885700232207167, + "loss": 1.9794, + "step": 286845 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014885538963378664, + "loss": 2.2563, + "step": 286850 + }, + { + "epoch": 0.68, + "grad_norm": 2.578125, + "learning_rate": 0.00014885377692881174, + "loss": 2.0329, + "step": 286855 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.00014885216420714746, + "loss": 2.0846, + "step": 286860 + }, + { + "epoch": 0.68, + "grad_norm": 2.359375, + "learning_rate": 0.00014885055146879442, + "loss": 2.0164, + "step": 286865 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014884893871375317, + "loss": 2.1014, + "step": 286870 + }, + { + "epoch": 0.68, + "grad_norm": 1.84375, + "learning_rate": 0.00014884732594202423, + "loss": 1.958, + "step": 286875 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014884571315360812, + "loss": 2.0533, + "step": 286880 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014884410034850544, + "loss": 2.0734, + "step": 286885 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014884248752671675, + "loss": 2.0687, + "step": 286890 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.00014884087468824256, + "loss": 1.9702, + "step": 286895 + }, + { + "epoch": 0.68, + "grad_norm": 1.8671875, + "learning_rate": 0.00014883926183308346, + "loss": 1.9385, + "step": 286900 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.00014883764896123997, + "loss": 2.0146, + "step": 286905 + }, + { + "epoch": 0.68, + "grad_norm": 1.9921875, + "learning_rate": 0.00014883603607271267, + "loss": 2.1144, + "step": 286910 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014883442316750208, + "loss": 1.9551, + "step": 286915 + }, + { + "epoch": 0.68, + "grad_norm": 1.9921875, + "learning_rate": 0.00014883281024560875, + "loss": 1.856, + "step": 286920 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014883119730703327, + "loss": 2.0571, + "step": 286925 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014882958435177618, + "loss": 2.1542, + "step": 286930 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.000148827971379838, + "loss": 2.0585, + "step": 286935 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.00014882635839121928, + "loss": 2.0596, + "step": 286940 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014882474538592059, + "loss": 1.9263, + "step": 286945 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.00014882313236394253, + "loss": 2.0044, + "step": 286950 + }, + { + "epoch": 0.68, + "grad_norm": 1.90625, + "learning_rate": 0.00014882151932528554, + "loss": 2.0176, + "step": 286955 + }, + { + "epoch": 0.68, + "grad_norm": 2.03125, + "learning_rate": 0.0001488199062699503, + "loss": 2.0286, + "step": 286960 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014881829319793725, + "loss": 1.9409, + "step": 286965 + }, + { + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 0.000148816680109247, + "loss": 2.051, + "step": 286970 + }, + { + "epoch": 0.68, + "grad_norm": 1.84375, + "learning_rate": 0.00014881506700388007, + "loss": 2.0902, + "step": 286975 + }, + { + "epoch": 0.68, + "grad_norm": 2.46875, + "learning_rate": 0.00014881345388183706, + "loss": 2.0637, + "step": 286980 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014881184074311846, + "loss": 2.064, + "step": 286985 + }, + { + "epoch": 0.68, + "grad_norm": 1.9453125, + "learning_rate": 0.00014881022758772488, + "loss": 1.9321, + "step": 286990 + }, + { + "epoch": 0.68, + "grad_norm": 2.359375, + "learning_rate": 0.00014880861441565678, + "loss": 2.1388, + "step": 286995 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014880700122691482, + "loss": 2.0306, + "step": 287000 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014880538802149952, + "loss": 2.1273, + "step": 287005 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.0001488037747994114, + "loss": 2.1329, + "step": 287010 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.00014880216156065102, + "loss": 2.2063, + "step": 287015 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.0001488005483052189, + "loss": 1.9213, + "step": 287020 + }, + { + "epoch": 0.68, + "grad_norm": 1.6875, + "learning_rate": 0.0001487989350331157, + "loss": 2.0655, + "step": 287025 + }, + { + "epoch": 0.68, + "grad_norm": 2.453125, + "learning_rate": 0.00014879732174434183, + "loss": 2.1328, + "step": 287030 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014879570843889795, + "loss": 2.0111, + "step": 287035 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.00014879409511678454, + "loss": 2.1546, + "step": 287040 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.0001487924817780022, + "loss": 1.9865, + "step": 287045 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014879086842255147, + "loss": 2.1589, + "step": 287050 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.0001487892550504329, + "loss": 2.1453, + "step": 287055 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014878764166164702, + "loss": 2.1156, + "step": 287060 + }, + { + "epoch": 0.68, + "grad_norm": 1.9765625, + "learning_rate": 0.0001487860282561944, + "loss": 1.945, + "step": 287065 + }, + { + "epoch": 0.68, + "grad_norm": 2.03125, + "learning_rate": 0.0001487844148340756, + "loss": 1.957, + "step": 287070 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014878280139529114, + "loss": 1.9931, + "step": 287075 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014878118793984162, + "loss": 2.1414, + "step": 287080 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014877957446772752, + "loss": 1.9917, + "step": 287085 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014877796097894947, + "loss": 2.0293, + "step": 287090 + }, + { + "epoch": 0.68, + "grad_norm": 1.9921875, + "learning_rate": 0.00014877634747350796, + "loss": 2.2156, + "step": 287095 + }, + { + "epoch": 0.68, + "grad_norm": 1.953125, + "learning_rate": 0.00014877473395140357, + "loss": 2.0253, + "step": 287100 + }, + { + "epoch": 0.68, + "grad_norm": 2.484375, + "learning_rate": 0.00014877312041263686, + "loss": 2.2171, + "step": 287105 + }, + { + "epoch": 0.68, + "grad_norm": 1.984375, + "learning_rate": 0.00014877150685720835, + "loss": 2.1698, + "step": 287110 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.00014876989328511863, + "loss": 2.0515, + "step": 287115 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.00014876827969636822, + "loss": 2.1362, + "step": 287120 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.0001487666660909577, + "loss": 2.1321, + "step": 287125 + }, + { + "epoch": 0.68, + "grad_norm": 2.8125, + "learning_rate": 0.00014876505246888755, + "loss": 2.0309, + "step": 287130 + }, + { + "epoch": 0.68, + "grad_norm": 2.546875, + "learning_rate": 0.00014876343883015838, + "loss": 2.0425, + "step": 287135 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014876182517477077, + "loss": 1.9568, + "step": 287140 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014876021150272522, + "loss": 1.9866, + "step": 287145 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014875859781402233, + "loss": 1.9999, + "step": 287150 + }, + { + "epoch": 0.68, + "grad_norm": 1.90625, + "learning_rate": 0.00014875698410866253, + "loss": 2.1394, + "step": 287155 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.00014875537038664657, + "loss": 2.0899, + "step": 287160 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014875375664797483, + "loss": 2.0617, + "step": 287165 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014875214289264795, + "loss": 2.1056, + "step": 287170 + }, + { + "epoch": 0.68, + "grad_norm": 2.546875, + "learning_rate": 0.00014875052912066643, + "loss": 1.9404, + "step": 287175 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014874891533203085, + "loss": 2.0795, + "step": 287180 + }, + { + "epoch": 0.68, + "grad_norm": 2.546875, + "learning_rate": 0.00014874730152674177, + "loss": 2.1358, + "step": 287185 + }, + { + "epoch": 0.68, + "grad_norm": 1.90625, + "learning_rate": 0.0001487456877047997, + "loss": 2.1057, + "step": 287190 + }, + { + "epoch": 0.68, + "grad_norm": 1.984375, + "learning_rate": 0.00014874407386620522, + "loss": 2.1643, + "step": 287195 + }, + { + "epoch": 0.68, + "grad_norm": 2.5, + "learning_rate": 0.0001487424600109589, + "loss": 1.9791, + "step": 287200 + }, + { + "epoch": 0.68, + "grad_norm": 1.8515625, + "learning_rate": 0.00014874084613906128, + "loss": 2.0807, + "step": 287205 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.0001487392322505129, + "loss": 2.1393, + "step": 287210 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.0001487376183453143, + "loss": 2.1449, + "step": 287215 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.00014873600442346605, + "loss": 2.0983, + "step": 287220 + }, + { + "epoch": 0.68, + "grad_norm": 2.5, + "learning_rate": 0.0001487343904849687, + "loss": 2.0572, + "step": 287225 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.0001487327765298228, + "loss": 1.9213, + "step": 287230 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014873116255802887, + "loss": 2.1142, + "step": 287235 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.00014872954856958755, + "loss": 2.0757, + "step": 287240 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014872793456449928, + "loss": 1.9368, + "step": 287245 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.0001487263205427647, + "loss": 2.0938, + "step": 287250 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014872470650438433, + "loss": 2.0719, + "step": 287255 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014872309244935868, + "loss": 2.0855, + "step": 287260 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014872147837768833, + "loss": 1.7879, + "step": 287265 + }, + { + "epoch": 0.68, + "grad_norm": 1.953125, + "learning_rate": 0.00014871986428937386, + "loss": 1.8989, + "step": 287270 + }, + { + "epoch": 0.68, + "grad_norm": 2.46875, + "learning_rate": 0.00014871825018441582, + "loss": 2.2007, + "step": 287275 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014871663606281474, + "loss": 2.0556, + "step": 287280 + }, + { + "epoch": 0.68, + "grad_norm": 2.890625, + "learning_rate": 0.00014871502192457116, + "loss": 2.0066, + "step": 287285 + }, + { + "epoch": 0.68, + "grad_norm": 1.9375, + "learning_rate": 0.0001487134077696856, + "loss": 1.996, + "step": 287290 + }, + { + "epoch": 0.68, + "grad_norm": 2.515625, + "learning_rate": 0.00014871179359815873, + "loss": 2.0585, + "step": 287295 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.000148710179409991, + "loss": 2.224, + "step": 287300 + }, + { + "epoch": 0.68, + "grad_norm": 1.765625, + "learning_rate": 0.000148708565205183, + "loss": 2.0057, + "step": 287305 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014870695098373525, + "loss": 2.0852, + "step": 287310 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.0001487053367456483, + "loss": 2.0377, + "step": 287315 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.0001487037224909228, + "loss": 2.2839, + "step": 287320 + }, + { + "epoch": 0.68, + "grad_norm": 1.8984375, + "learning_rate": 0.00014870210821955917, + "loss": 1.9052, + "step": 287325 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014870049393155802, + "loss": 1.9834, + "step": 287330 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014869887962691992, + "loss": 2.1106, + "step": 287335 + }, + { + "epoch": 0.68, + "grad_norm": 1.8984375, + "learning_rate": 0.00014869726530564536, + "loss": 2.1391, + "step": 287340 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014869565096773497, + "loss": 2.0177, + "step": 287345 + }, + { + "epoch": 0.68, + "grad_norm": 1.78125, + "learning_rate": 0.00014869403661318923, + "loss": 2.0803, + "step": 287350 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014869242224200874, + "loss": 2.0139, + "step": 287355 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014869080785419404, + "loss": 2.169, + "step": 287360 + }, + { + "epoch": 0.68, + "grad_norm": 2.359375, + "learning_rate": 0.00014868919344974567, + "loss": 2.1505, + "step": 287365 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.0001486875790286642, + "loss": 2.0679, + "step": 287370 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014868596459095015, + "loss": 1.9704, + "step": 287375 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014868435013660412, + "loss": 2.0123, + "step": 287380 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.0001486827356656266, + "loss": 2.066, + "step": 287385 + }, + { + "epoch": 0.68, + "grad_norm": 2.59375, + "learning_rate": 0.00014868112117801823, + "loss": 2.1625, + "step": 287390 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014867950667377943, + "loss": 2.1811, + "step": 287395 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014867789215291087, + "loss": 2.0655, + "step": 287400 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014867627761541306, + "loss": 2.0962, + "step": 287405 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014867466306128655, + "loss": 2.1553, + "step": 287410 + }, + { + "epoch": 0.68, + "grad_norm": 2.453125, + "learning_rate": 0.0001486730484905319, + "loss": 2.1064, + "step": 287415 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014867143390314964, + "loss": 2.0984, + "step": 287420 + }, + { + "epoch": 0.68, + "grad_norm": 2.625, + "learning_rate": 0.0001486698192991403, + "loss": 2.0501, + "step": 287425 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014866820467850453, + "loss": 2.3108, + "step": 287430 + }, + { + "epoch": 0.68, + "grad_norm": 2.484375, + "learning_rate": 0.0001486665900412428, + "loss": 2.0886, + "step": 287435 + }, + { + "epoch": 0.68, + "grad_norm": 1.9609375, + "learning_rate": 0.00014866497538735567, + "loss": 2.1072, + "step": 287440 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.0001486633607168437, + "loss": 2.0162, + "step": 287445 + }, + { + "epoch": 0.68, + "grad_norm": 1.7109375, + "learning_rate": 0.00014866174602970743, + "loss": 2.0529, + "step": 287450 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014866013132594746, + "loss": 2.0902, + "step": 287455 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.0001486585166055643, + "loss": 2.058, + "step": 287460 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.0001486569018685585, + "loss": 2.1272, + "step": 287465 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014865528711493063, + "loss": 2.0738, + "step": 287470 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.0001486536723446812, + "loss": 2.2041, + "step": 287475 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.0001486520575578108, + "loss": 2.0972, + "step": 287480 + }, + { + "epoch": 0.68, + "grad_norm": 1.9375, + "learning_rate": 0.00014865044275432, + "loss": 1.9929, + "step": 287485 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.0001486488279342093, + "loss": 2.1555, + "step": 287490 + }, + { + "epoch": 0.68, + "grad_norm": 1.96875, + "learning_rate": 0.00014864721309747932, + "loss": 2.1084, + "step": 287495 + }, + { + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 0.00014864559824413051, + "loss": 2.1228, + "step": 287500 + }, + { + "epoch": 0.68, + "grad_norm": 1.9921875, + "learning_rate": 0.00014864398337416352, + "loss": 2.2229, + "step": 287505 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014864236848757887, + "loss": 1.8981, + "step": 287510 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014864075358437707, + "loss": 1.9858, + "step": 287515 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014863913866455873, + "loss": 2.0638, + "step": 287520 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.0001486375237281244, + "loss": 2.0762, + "step": 287525 + }, + { + "epoch": 0.68, + "grad_norm": 1.7265625, + "learning_rate": 0.00014863590877507455, + "loss": 1.9999, + "step": 287530 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.00014863429380540985, + "loss": 2.1136, + "step": 287535 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014863267881913076, + "loss": 2.0794, + "step": 287540 + }, + { + "epoch": 0.68, + "grad_norm": 1.9765625, + "learning_rate": 0.0001486310638162379, + "loss": 2.0326, + "step": 287545 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.00014862944879673174, + "loss": 2.1329, + "step": 287550 + }, + { + "epoch": 0.68, + "grad_norm": 1.890625, + "learning_rate": 0.0001486278337606129, + "loss": 1.9843, + "step": 287555 + }, + { + "epoch": 0.68, + "grad_norm": 1.7890625, + "learning_rate": 0.00014862621870788192, + "loss": 2.0741, + "step": 287560 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.0001486246036385393, + "loss": 2.0701, + "step": 287565 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.00014862298855258568, + "loss": 2.1295, + "step": 287570 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.00014862137345002157, + "loss": 2.174, + "step": 287575 + }, + { + "epoch": 0.68, + "grad_norm": 2.65625, + "learning_rate": 0.00014861975833084748, + "loss": 1.9444, + "step": 287580 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.000148618143195064, + "loss": 1.9636, + "step": 287585 + }, + { + "epoch": 0.68, + "grad_norm": 2.46875, + "learning_rate": 0.00014861652804267172, + "loss": 2.202, + "step": 287590 + }, + { + "epoch": 0.68, + "grad_norm": 1.8671875, + "learning_rate": 0.00014861491287367114, + "loss": 2.0489, + "step": 287595 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.0001486132976880628, + "loss": 2.0919, + "step": 287600 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.0001486116824858473, + "loss": 1.9271, + "step": 287605 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014861006726702512, + "loss": 2.0198, + "step": 287610 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014860845203159694, + "loss": 1.8816, + "step": 287615 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014860683677956317, + "loss": 1.9384, + "step": 287620 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014860522151092443, + "loss": 2.1074, + "step": 287625 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.0001486036062256813, + "loss": 2.0486, + "step": 287630 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014860199092383428, + "loss": 1.9882, + "step": 287635 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.0001486003756053839, + "loss": 1.8785, + "step": 287640 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.0001485987602703308, + "loss": 1.9851, + "step": 287645 + }, + { + "epoch": 0.68, + "grad_norm": 1.984375, + "learning_rate": 0.00014859714491867546, + "loss": 1.9891, + "step": 287650 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014859552955041844, + "loss": 2.1448, + "step": 287655 + }, + { + "epoch": 0.68, + "grad_norm": 1.7265625, + "learning_rate": 0.00014859391416556034, + "loss": 2.1005, + "step": 287660 + }, + { + "epoch": 0.68, + "grad_norm": 1.8359375, + "learning_rate": 0.00014859229876410166, + "loss": 2.0462, + "step": 287665 + }, + { + "epoch": 0.68, + "grad_norm": 1.9140625, + "learning_rate": 0.00014859068334604296, + "loss": 2.1134, + "step": 287670 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.00014858906791138482, + "loss": 2.2317, + "step": 287675 + }, + { + "epoch": 0.68, + "grad_norm": 1.859375, + "learning_rate": 0.00014858745246012776, + "loss": 1.818, + "step": 287680 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014858583699227235, + "loss": 2.0769, + "step": 287685 + }, + { + "epoch": 0.68, + "grad_norm": 1.859375, + "learning_rate": 0.0001485842215078191, + "loss": 1.9404, + "step": 287690 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.0001485826060067687, + "loss": 2.1169, + "step": 287695 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.0001485809904891215, + "loss": 2.087, + "step": 287700 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014857937495487818, + "loss": 1.9912, + "step": 287705 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.0001485777594040393, + "loss": 2.1028, + "step": 287710 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.00014857614383660534, + "loss": 2.051, + "step": 287715 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014857452825257689, + "loss": 1.9221, + "step": 287720 + }, + { + "epoch": 0.68, + "grad_norm": 1.9921875, + "learning_rate": 0.0001485729126519545, + "loss": 1.8901, + "step": 287725 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014857129703473875, + "loss": 2.1195, + "step": 287730 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014856968140093015, + "loss": 2.1747, + "step": 287735 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014856806575052925, + "loss": 1.8392, + "step": 287740 + }, + { + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 0.0001485664500835366, + "loss": 2.1239, + "step": 287745 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014856483439995284, + "loss": 2.1344, + "step": 287750 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.0001485632186997784, + "loss": 2.0999, + "step": 287755 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.0001485616029830139, + "loss": 1.9849, + "step": 287760 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014855998724965986, + "loss": 2.1215, + "step": 287765 + }, + { + "epoch": 0.68, + "grad_norm": 2.53125, + "learning_rate": 0.00014855837149971686, + "loss": 2.0287, + "step": 287770 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014855675573318541, + "loss": 2.1408, + "step": 287775 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014855513995006613, + "loss": 1.9422, + "step": 287780 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014855352415035953, + "loss": 2.0853, + "step": 287785 + }, + { + "epoch": 0.68, + "grad_norm": 1.984375, + "learning_rate": 0.00014855190833406616, + "loss": 2.0218, + "step": 287790 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.00014855029250118658, + "loss": 1.876, + "step": 287795 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.0001485486766517213, + "loss": 1.988, + "step": 287800 + }, + { + "epoch": 0.68, + "grad_norm": 2.46875, + "learning_rate": 0.00014854706078567094, + "loss": 2.1161, + "step": 287805 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014854544490303605, + "loss": 2.1674, + "step": 287810 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014854382900381715, + "loss": 2.0382, + "step": 287815 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.0001485422130880148, + "loss": 2.0491, + "step": 287820 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014854059715562948, + "loss": 1.925, + "step": 287825 + }, + { + "epoch": 0.68, + "grad_norm": 2.59375, + "learning_rate": 0.00014853898120666188, + "loss": 1.9961, + "step": 287830 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.0001485373652411125, + "loss": 2.0367, + "step": 287835 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.0001485357492589818, + "loss": 1.9309, + "step": 287840 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.0001485341332602705, + "loss": 2.0246, + "step": 287845 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.000148532517244979, + "loss": 2.1224, + "step": 287850 + }, + { + "epoch": 0.68, + "grad_norm": 1.90625, + "learning_rate": 0.0001485309012131079, + "loss": 2.0802, + "step": 287855 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014852928516465778, + "loss": 2.0643, + "step": 287860 + }, + { + "epoch": 0.68, + "grad_norm": 1.9609375, + "learning_rate": 0.00014852766909962921, + "loss": 2.071, + "step": 287865 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014852605301802268, + "loss": 1.9547, + "step": 287870 + }, + { + "epoch": 0.68, + "grad_norm": 1.78125, + "learning_rate": 0.00014852443691983877, + "loss": 1.9975, + "step": 287875 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.000148522820805078, + "loss": 2.1623, + "step": 287880 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.000148521204673741, + "loss": 2.1905, + "step": 287885 + }, + { + "epoch": 0.68, + "grad_norm": 2.515625, + "learning_rate": 0.00014851958852582828, + "loss": 2.1391, + "step": 287890 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014851797236134037, + "loss": 2.0087, + "step": 287895 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.00014851635618027783, + "loss": 2.0563, + "step": 287900 + }, + { + "epoch": 0.68, + "grad_norm": 1.8515625, + "learning_rate": 0.00014851473998264122, + "loss": 2.1523, + "step": 287905 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014851312376843115, + "loss": 2.0776, + "step": 287910 + }, + { + "epoch": 0.68, + "grad_norm": 2.484375, + "learning_rate": 0.00014851150753764807, + "loss": 2.1336, + "step": 287915 + }, + { + "epoch": 0.68, + "grad_norm": 1.9375, + "learning_rate": 0.00014850989129029259, + "loss": 1.9629, + "step": 287920 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014850827502636523, + "loss": 2.2015, + "step": 287925 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.0001485066587458666, + "loss": 2.176, + "step": 287930 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.0001485050424487972, + "loss": 2.0393, + "step": 287935 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.00014850342613515757, + "loss": 2.1495, + "step": 287940 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014850180980494835, + "loss": 2.0136, + "step": 287945 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014850019345816998, + "loss": 2.0472, + "step": 287950 + }, + { + "epoch": 0.68, + "grad_norm": 1.875, + "learning_rate": 0.0001484985770948231, + "loss": 2.005, + "step": 287955 + }, + { + "epoch": 0.68, + "grad_norm": 1.9921875, + "learning_rate": 0.0001484969607149082, + "loss": 1.8915, + "step": 287960 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014849534431842586, + "loss": 1.8865, + "step": 287965 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.00014849372790537663, + "loss": 2.3355, + "step": 287970 + }, + { + "epoch": 0.68, + "grad_norm": 1.875, + "learning_rate": 0.00014849211147576106, + "loss": 2.0504, + "step": 287975 + }, + { + "epoch": 0.68, + "grad_norm": 5.6875, + "learning_rate": 0.00014849049502957976, + "loss": 2.1585, + "step": 287980 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.00014848887856683317, + "loss": 2.0194, + "step": 287985 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.0001484872620875219, + "loss": 2.0734, + "step": 287990 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014848564559164653, + "loss": 2.0963, + "step": 287995 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014848402907920754, + "loss": 1.9471, + "step": 288000 + }, + { + "epoch": 0.68, + "grad_norm": 1.9453125, + "learning_rate": 0.00014848241255020558, + "loss": 2.0466, + "step": 288005 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014848079600464112, + "loss": 2.1084, + "step": 288010 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014847917944251475, + "loss": 2.1905, + "step": 288015 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.000148477562863827, + "loss": 1.9626, + "step": 288020 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014847594626857846, + "loss": 2.1633, + "step": 288025 + }, + { + "epoch": 0.68, + "grad_norm": 1.8359375, + "learning_rate": 0.00014847432965676963, + "loss": 2.1002, + "step": 288030 + }, + { + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 0.0001484727130284011, + "loss": 2.0625, + "step": 288035 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.0001484710963834734, + "loss": 2.0641, + "step": 288040 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014846947972198713, + "loss": 2.1707, + "step": 288045 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.0001484678630439428, + "loss": 1.8971, + "step": 288050 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014846624634934095, + "loss": 2.0816, + "step": 288055 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014846462963818215, + "loss": 1.8752, + "step": 288060 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014846301291046693, + "loss": 2.045, + "step": 288065 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014846139616619592, + "loss": 1.9078, + "step": 288070 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.0001484597794053696, + "loss": 2.0303, + "step": 288075 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014845816262798852, + "loss": 2.0354, + "step": 288080 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014845654583405327, + "loss": 2.1084, + "step": 288085 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.0001484549290235644, + "loss": 1.9247, + "step": 288090 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.0001484533121965224, + "loss": 2.0936, + "step": 288095 + }, + { + "epoch": 0.68, + "grad_norm": 2.546875, + "learning_rate": 0.0001484516953529279, + "loss": 2.1746, + "step": 288100 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014845007849278142, + "loss": 2.1391, + "step": 288105 + }, + { + "epoch": 0.68, + "grad_norm": 1.890625, + "learning_rate": 0.00014844846161608352, + "loss": 2.0215, + "step": 288110 + }, + { + "epoch": 0.68, + "grad_norm": 2.359375, + "learning_rate": 0.00014844684472283475, + "loss": 1.976, + "step": 288115 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.00014844522781303564, + "loss": 1.9423, + "step": 288120 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014844361088668678, + "loss": 2.0509, + "step": 288125 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.0001484419939437887, + "loss": 2.2192, + "step": 288130 + }, + { + "epoch": 0.68, + "grad_norm": 1.9609375, + "learning_rate": 0.00014844037698434195, + "loss": 1.7791, + "step": 288135 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.0001484387600083471, + "loss": 2.0001, + "step": 288140 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.0001484371430158047, + "loss": 1.9371, + "step": 288145 + }, + { + "epoch": 0.68, + "grad_norm": 1.8671875, + "learning_rate": 0.00014843552600671527, + "loss": 1.8301, + "step": 288150 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.00014843390898107938, + "loss": 2.1416, + "step": 288155 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.0001484322919388976, + "loss": 2.1268, + "step": 288160 + }, + { + "epoch": 0.68, + "grad_norm": 2.578125, + "learning_rate": 0.00014843067488017048, + "loss": 2.0986, + "step": 288165 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014842905780489856, + "loss": 1.9788, + "step": 288170 + }, + { + "epoch": 0.68, + "grad_norm": 2.640625, + "learning_rate": 0.00014842744071308237, + "loss": 2.2046, + "step": 288175 + }, + { + "epoch": 0.68, + "grad_norm": 1.8828125, + "learning_rate": 0.0001484258236047225, + "loss": 2.1621, + "step": 288180 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014842420647981952, + "loss": 2.1536, + "step": 288185 + }, + { + "epoch": 0.68, + "grad_norm": 1.953125, + "learning_rate": 0.00014842258933837394, + "loss": 1.9739, + "step": 288190 + }, + { + "epoch": 0.68, + "grad_norm": 1.984375, + "learning_rate": 0.00014842097218038632, + "loss": 2.2179, + "step": 288195 + }, + { + "epoch": 0.68, + "grad_norm": 2.53125, + "learning_rate": 0.0001484193550058572, + "loss": 2.1328, + "step": 288200 + }, + { + "epoch": 0.68, + "grad_norm": 2.03125, + "learning_rate": 0.00014841773781478716, + "loss": 2.178, + "step": 288205 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014841612060717676, + "loss": 2.327, + "step": 288210 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.0001484145033830265, + "loss": 2.2772, + "step": 288215 + }, + { + "epoch": 0.68, + "grad_norm": 2.765625, + "learning_rate": 0.00014841288614233703, + "loss": 2.2987, + "step": 288220 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014841126888510877, + "loss": 2.1056, + "step": 288225 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.00014840965161134237, + "loss": 1.93, + "step": 288230 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014840803432103835, + "loss": 1.8221, + "step": 288235 + }, + { + "epoch": 0.68, + "grad_norm": 2.53125, + "learning_rate": 0.00014840641701419728, + "loss": 2.1287, + "step": 288240 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.0001484047996908197, + "loss": 2.0096, + "step": 288245 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.00014840318235090615, + "loss": 1.9797, + "step": 288250 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014840156499445722, + "loss": 1.8473, + "step": 288255 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.0001483999476214734, + "loss": 2.0515, + "step": 288260 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.0001483983302319553, + "loss": 1.938, + "step": 288265 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.00014839671282590347, + "loss": 2.2187, + "step": 288270 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014839509540331844, + "loss": 2.1782, + "step": 288275 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014839347796420074, + "loss": 2.1551, + "step": 288280 + }, + { + "epoch": 0.68, + "grad_norm": 1.96875, + "learning_rate": 0.00014839186050855097, + "loss": 2.1462, + "step": 288285 + }, + { + "epoch": 0.68, + "grad_norm": 2.90625, + "learning_rate": 0.00014839024303636966, + "loss": 1.9843, + "step": 288290 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014838862554765735, + "loss": 2.0747, + "step": 288295 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014838700804241465, + "loss": 2.1598, + "step": 288300 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014838539052064204, + "loss": 2.0588, + "step": 288305 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.0001483837729823401, + "loss": 1.9024, + "step": 288310 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014838215542750937, + "loss": 1.935, + "step": 288315 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014838053785615047, + "loss": 1.9711, + "step": 288320 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.00014837892026826386, + "loss": 2.1685, + "step": 288325 + }, + { + "epoch": 0.68, + "grad_norm": 3.234375, + "learning_rate": 0.00014837730266385014, + "loss": 1.966, + "step": 288330 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014837568504290984, + "loss": 1.8928, + "step": 288335 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014837406740544359, + "loss": 1.9809, + "step": 288340 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.00014837244975145181, + "loss": 1.878, + "step": 288345 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014837083208093515, + "loss": 1.9739, + "step": 288350 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.0001483692143938942, + "loss": 1.9, + "step": 288355 + }, + { + "epoch": 0.68, + "grad_norm": 2.515625, + "learning_rate": 0.00014836759669032933, + "loss": 1.9503, + "step": 288360 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.0001483659789702413, + "loss": 1.9805, + "step": 288365 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.00014836436123363052, + "loss": 2.158, + "step": 288370 + }, + { + "epoch": 0.68, + "grad_norm": 1.9609375, + "learning_rate": 0.00014836274348049763, + "loss": 2.2122, + "step": 288375 + }, + { + "epoch": 0.68, + "grad_norm": 2.65625, + "learning_rate": 0.00014836112571084314, + "loss": 2.212, + "step": 288380 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.0001483595079246676, + "loss": 2.0373, + "step": 288385 + }, + { + "epoch": 0.68, + "grad_norm": 1.9765625, + "learning_rate": 0.0001483578901219716, + "loss": 2.104, + "step": 288390 + }, + { + "epoch": 0.68, + "grad_norm": 1.9296875, + "learning_rate": 0.00014835627230275566, + "loss": 2.2241, + "step": 288395 + }, + { + "epoch": 0.68, + "grad_norm": 2.03125, + "learning_rate": 0.00014835465446702036, + "loss": 2.1763, + "step": 288400 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.0001483530366147662, + "loss": 2.0355, + "step": 288405 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.0001483514187459938, + "loss": 2.1415, + "step": 288410 + }, + { + "epoch": 0.68, + "grad_norm": 2.5, + "learning_rate": 0.0001483498008607036, + "loss": 2.0217, + "step": 288415 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014834818295889632, + "loss": 2.2492, + "step": 288420 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014834656504057237, + "loss": 2.1358, + "step": 288425 + }, + { + "epoch": 0.68, + "grad_norm": 1.875, + "learning_rate": 0.0001483449471057324, + "loss": 2.0581, + "step": 288430 + }, + { + "epoch": 0.68, + "grad_norm": 2.59375, + "learning_rate": 0.0001483433291543769, + "loss": 2.1389, + "step": 288435 + }, + { + "epoch": 0.68, + "grad_norm": 1.7421875, + "learning_rate": 0.0001483417111865064, + "loss": 1.779, + "step": 288440 + }, + { + "epoch": 0.68, + "grad_norm": 2.640625, + "learning_rate": 0.00014834009320212156, + "loss": 2.1022, + "step": 288445 + }, + { + "epoch": 0.68, + "grad_norm": 2.625, + "learning_rate": 0.00014833847520122283, + "loss": 2.1192, + "step": 288450 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014833685718381085, + "loss": 2.0312, + "step": 288455 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014833523914988607, + "loss": 2.0928, + "step": 288460 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014833362109944914, + "loss": 2.0261, + "step": 288465 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.0001483320030325005, + "loss": 1.947, + "step": 288470 + }, + { + "epoch": 0.68, + "grad_norm": 1.8125, + "learning_rate": 0.0001483303849490408, + "loss": 1.9743, + "step": 288475 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.0001483287668490706, + "loss": 2.2023, + "step": 288480 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.0001483271487325904, + "loss": 2.0412, + "step": 288485 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014832553059960076, + "loss": 2.195, + "step": 288490 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.0001483239124501022, + "loss": 2.0253, + "step": 288495 + }, + { + "epoch": 0.68, + "grad_norm": 1.90625, + "learning_rate": 0.00014832229428409538, + "loss": 2.0558, + "step": 288500 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.00014832067610158077, + "loss": 2.0698, + "step": 288505 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014831905790255894, + "loss": 2.1234, + "step": 288510 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014831743968703043, + "loss": 2.1892, + "step": 288515 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014831582145499581, + "loss": 2.0648, + "step": 288520 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014831420320645563, + "loss": 1.9325, + "step": 288525 + }, + { + "epoch": 0.68, + "grad_norm": 1.9375, + "learning_rate": 0.00014831258494141044, + "loss": 1.9965, + "step": 288530 + }, + { + "epoch": 0.68, + "grad_norm": 2.96875, + "learning_rate": 0.00014831096665986082, + "loss": 2.133, + "step": 288535 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.0001483093483618073, + "loss": 2.1575, + "step": 288540 + }, + { + "epoch": 0.68, + "grad_norm": 1.9296875, + "learning_rate": 0.00014830773004725038, + "loss": 2.0926, + "step": 288545 + }, + { + "epoch": 0.68, + "grad_norm": 2.359375, + "learning_rate": 0.0001483061117161907, + "loss": 2.0247, + "step": 288550 + }, + { + "epoch": 0.68, + "grad_norm": 2.515625, + "learning_rate": 0.00014830449336862876, + "loss": 2.1212, + "step": 288555 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014830287500456515, + "loss": 2.0288, + "step": 288560 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014830125662400039, + "loss": 2.0397, + "step": 288565 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.00014829963822693504, + "loss": 2.108, + "step": 288570 + }, + { + "epoch": 0.68, + "grad_norm": 1.8828125, + "learning_rate": 0.00014829801981336966, + "loss": 2.1811, + "step": 288575 + }, + { + "epoch": 0.68, + "grad_norm": 1.65625, + "learning_rate": 0.00014829640138330479, + "loss": 1.9335, + "step": 288580 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.000148294782936741, + "loss": 1.9293, + "step": 288585 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014829316447367884, + "loss": 2.0183, + "step": 288590 + }, + { + "epoch": 0.68, + "grad_norm": 2.546875, + "learning_rate": 0.0001482915459941189, + "loss": 2.0858, + "step": 288595 + }, + { + "epoch": 0.68, + "grad_norm": 2.609375, + "learning_rate": 0.00014828992749806163, + "loss": 2.1189, + "step": 288600 + }, + { + "epoch": 0.68, + "grad_norm": 2.5, + "learning_rate": 0.00014828830898550764, + "loss": 2.1182, + "step": 288605 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014828669045645752, + "loss": 2.0014, + "step": 288610 + }, + { + "epoch": 0.68, + "grad_norm": 1.640625, + "learning_rate": 0.0001482850719109118, + "loss": 1.9891, + "step": 288615 + }, + { + "epoch": 0.68, + "grad_norm": 1.84375, + "learning_rate": 0.00014828345334887103, + "loss": 2.0641, + "step": 288620 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014828183477033573, + "loss": 1.9306, + "step": 288625 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014828021617530648, + "loss": 1.9892, + "step": 288630 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014827859756378386, + "loss": 1.9248, + "step": 288635 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014827697893576839, + "loss": 2.1287, + "step": 288640 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.0001482753602912606, + "loss": 2.0652, + "step": 288645 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.0001482737416302611, + "loss": 2.042, + "step": 288650 + }, + { + "epoch": 0.68, + "grad_norm": 1.953125, + "learning_rate": 0.00014827212295277035, + "loss": 1.9051, + "step": 288655 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014827050425878904, + "loss": 2.1671, + "step": 288660 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014826888554831763, + "loss": 2.1377, + "step": 288665 + }, + { + "epoch": 0.68, + "grad_norm": 5.0, + "learning_rate": 0.00014826726682135667, + "loss": 1.9481, + "step": 288670 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014826564807790676, + "loss": 2.0913, + "step": 288675 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014826402931796844, + "loss": 2.0825, + "step": 288680 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014826241054154225, + "loss": 2.0824, + "step": 288685 + }, + { + "epoch": 0.68, + "grad_norm": 1.75, + "learning_rate": 0.00014826079174862873, + "loss": 1.9085, + "step": 288690 + }, + { + "epoch": 0.68, + "grad_norm": 1.9921875, + "learning_rate": 0.00014825917293922845, + "loss": 1.9803, + "step": 288695 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014825755411334194, + "loss": 2.0204, + "step": 288700 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.00014825593527096982, + "loss": 2.1092, + "step": 288705 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014825431641211256, + "loss": 1.9115, + "step": 288710 + }, + { + "epoch": 0.68, + "grad_norm": 2.640625, + "learning_rate": 0.00014825269753677075, + "loss": 1.9509, + "step": 288715 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014825107864494498, + "loss": 2.1244, + "step": 288720 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014824945973663573, + "loss": 2.0387, + "step": 288725 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014824784081184363, + "loss": 2.1236, + "step": 288730 + }, + { + "epoch": 0.68, + "grad_norm": 3.015625, + "learning_rate": 0.00014824622187056916, + "loss": 2.1441, + "step": 288735 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.0001482446029128129, + "loss": 2.165, + "step": 288740 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014824298393857543, + "loss": 2.2282, + "step": 288745 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.0001482413649478573, + "loss": 2.0906, + "step": 288750 + }, + { + "epoch": 0.68, + "grad_norm": 1.7109375, + "learning_rate": 0.000148239745940659, + "loss": 2.1082, + "step": 288755 + }, + { + "epoch": 0.68, + "grad_norm": 2.546875, + "learning_rate": 0.00014823812691698116, + "loss": 1.9321, + "step": 288760 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014823650787682425, + "loss": 2.114, + "step": 288765 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014823488882018893, + "loss": 2.0703, + "step": 288770 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014823326974707567, + "loss": 2.1485, + "step": 288775 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014823165065748508, + "loss": 2.1779, + "step": 288780 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014823003155141766, + "loss": 2.1752, + "step": 288785 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014822841242887396, + "loss": 2.1409, + "step": 288790 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.0001482267932898546, + "loss": 1.9634, + "step": 288795 + }, + { + "epoch": 0.68, + "grad_norm": 2.578125, + "learning_rate": 0.00014822517413436007, + "loss": 1.9039, + "step": 288800 + }, + { + "epoch": 0.68, + "grad_norm": 2.546875, + "learning_rate": 0.00014822355496239096, + "loss": 2.0709, + "step": 288805 + }, + { + "epoch": 0.68, + "grad_norm": 2.53125, + "learning_rate": 0.00014822193577394783, + "loss": 1.925, + "step": 288810 + }, + { + "epoch": 0.68, + "grad_norm": 2.546875, + "learning_rate": 0.00014822031656903118, + "loss": 1.8884, + "step": 288815 + }, + { + "epoch": 0.68, + "grad_norm": 2.53125, + "learning_rate": 0.0001482186973476416, + "loss": 1.9721, + "step": 288820 + }, + { + "epoch": 0.68, + "grad_norm": 1.8515625, + "learning_rate": 0.00014821707810977962, + "loss": 1.8981, + "step": 288825 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014821545885544584, + "loss": 2.0787, + "step": 288830 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014821383958464078, + "loss": 2.0044, + "step": 288835 + }, + { + "epoch": 0.68, + "grad_norm": 3.234375, + "learning_rate": 0.000148212220297365, + "loss": 2.2192, + "step": 288840 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014821060099361907, + "loss": 2.048, + "step": 288845 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.0001482089816734035, + "loss": 2.1376, + "step": 288850 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014820736233671887, + "loss": 2.1911, + "step": 288855 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.0001482057429835657, + "loss": 2.1245, + "step": 288860 + }, + { + "epoch": 0.68, + "grad_norm": 1.984375, + "learning_rate": 0.00014820412361394463, + "loss": 2.1671, + "step": 288865 + }, + { + "epoch": 0.68, + "grad_norm": 2.53125, + "learning_rate": 0.0001482025042278561, + "loss": 2.0764, + "step": 288870 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014820088482530078, + "loss": 2.0126, + "step": 288875 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.0001481992654062791, + "loss": 2.0629, + "step": 288880 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.00014819764597079173, + "loss": 2.0246, + "step": 288885 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014819602651883917, + "loss": 2.1383, + "step": 288890 + }, + { + "epoch": 0.68, + "grad_norm": 2.859375, + "learning_rate": 0.00014819440705042194, + "loss": 2.0809, + "step": 288895 + }, + { + "epoch": 0.68, + "grad_norm": 2.703125, + "learning_rate": 0.00014819278756554062, + "loss": 2.2841, + "step": 288900 + }, + { + "epoch": 0.68, + "grad_norm": 2.03125, + "learning_rate": 0.00014819116806419582, + "loss": 1.8853, + "step": 288905 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014818954854638801, + "loss": 1.9967, + "step": 288910 + }, + { + "epoch": 0.68, + "grad_norm": 3.234375, + "learning_rate": 0.00014818792901211779, + "loss": 2.2039, + "step": 288915 + }, + { + "epoch": 0.68, + "grad_norm": 2.53125, + "learning_rate": 0.0001481863094613857, + "loss": 2.0609, + "step": 288920 + }, + { + "epoch": 0.68, + "grad_norm": 1.984375, + "learning_rate": 0.00014818468989419225, + "loss": 2.0408, + "step": 288925 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014818307031053807, + "loss": 2.1632, + "step": 288930 + }, + { + "epoch": 0.68, + "grad_norm": 2.546875, + "learning_rate": 0.00014818145071042368, + "loss": 2.1738, + "step": 288935 + }, + { + "epoch": 0.68, + "grad_norm": 2.546875, + "learning_rate": 0.00014817983109384966, + "loss": 2.1186, + "step": 288940 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.0001481782114608165, + "loss": 2.0095, + "step": 288945 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014817659181132477, + "loss": 2.0565, + "step": 288950 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014817497214537507, + "loss": 1.8928, + "step": 288955 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014817335246296794, + "loss": 2.1012, + "step": 288960 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.0001481717327641039, + "loss": 1.9898, + "step": 288965 + }, + { + "epoch": 0.68, + "grad_norm": 2.609375, + "learning_rate": 0.00014817011304878354, + "loss": 2.0683, + "step": 288970 + }, + { + "epoch": 0.68, + "grad_norm": 2.609375, + "learning_rate": 0.00014816849331700737, + "loss": 2.1807, + "step": 288975 + }, + { + "epoch": 0.68, + "grad_norm": 2.453125, + "learning_rate": 0.000148166873568776, + "loss": 2.055, + "step": 288980 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014816525380408993, + "loss": 2.0925, + "step": 288985 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.00014816363402294974, + "loss": 2.1145, + "step": 288990 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014816201422535597, + "loss": 1.8308, + "step": 288995 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014816039441130919, + "loss": 2.1832, + "step": 289000 + }, + { + "epoch": 0.68, + "grad_norm": 1.7109375, + "learning_rate": 0.00014815877458080993, + "loss": 2.1523, + "step": 289005 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014815715473385878, + "loss": 2.3392, + "step": 289010 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014815553487045627, + "loss": 2.0749, + "step": 289015 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014815391499060297, + "loss": 2.1894, + "step": 289020 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.0001481522950942994, + "loss": 2.265, + "step": 289025 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014815067518154614, + "loss": 2.1327, + "step": 289030 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.0001481490552523437, + "loss": 1.9493, + "step": 289035 + }, + { + "epoch": 0.68, + "grad_norm": 1.8671875, + "learning_rate": 0.0001481474353066927, + "loss": 2.1597, + "step": 289040 + }, + { + "epoch": 0.68, + "grad_norm": 1.984375, + "learning_rate": 0.00014814581534459368, + "loss": 1.9404, + "step": 289045 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.0001481441953660472, + "loss": 2.0281, + "step": 289050 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014814257537105373, + "loss": 2.1323, + "step": 289055 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014814095535961388, + "loss": 1.9008, + "step": 289060 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014813933533172825, + "loss": 2.0264, + "step": 289065 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014813771528739733, + "loss": 1.8395, + "step": 289070 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014813609522662172, + "loss": 2.2078, + "step": 289075 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.0001481344751494019, + "loss": 1.9756, + "step": 289080 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.00014813285505573846, + "loss": 2.1752, + "step": 289085 + }, + { + "epoch": 0.68, + "grad_norm": 2.359375, + "learning_rate": 0.000148131234945632, + "loss": 2.0376, + "step": 289090 + }, + { + "epoch": 0.68, + "grad_norm": 2.453125, + "learning_rate": 0.00014812961481908306, + "loss": 2.0496, + "step": 289095 + }, + { + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 0.00014812799467609213, + "loss": 1.9962, + "step": 289100 + }, + { + "epoch": 0.68, + "grad_norm": 1.9453125, + "learning_rate": 0.00014812637451665982, + "loss": 2.2423, + "step": 289105 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.00014812475434078666, + "loss": 2.096, + "step": 289110 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014812313414847323, + "loss": 2.0973, + "step": 289115 + }, + { + "epoch": 0.68, + "grad_norm": 1.875, + "learning_rate": 0.00014812151393972002, + "loss": 2.0734, + "step": 289120 + }, + { + "epoch": 0.68, + "grad_norm": 1.9765625, + "learning_rate": 0.00014811989371452767, + "loss": 2.0613, + "step": 289125 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014811827347289668, + "loss": 2.0847, + "step": 289130 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.0001481166532148276, + "loss": 1.988, + "step": 289135 + }, + { + "epoch": 0.68, + "grad_norm": 1.9609375, + "learning_rate": 0.00014811503294032103, + "loss": 2.1379, + "step": 289140 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.0001481134126493775, + "loss": 2.0613, + "step": 289145 + }, + { + "epoch": 0.68, + "grad_norm": 2.609375, + "learning_rate": 0.0001481117923419975, + "loss": 2.158, + "step": 289150 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014811017201818167, + "loss": 2.085, + "step": 289155 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014810855167793056, + "loss": 1.968, + "step": 289160 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014810693132124466, + "loss": 2.1183, + "step": 289165 + }, + { + "epoch": 0.68, + "grad_norm": 2.46875, + "learning_rate": 0.00014810531094812455, + "loss": 2.0253, + "step": 289170 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014810369055857082, + "loss": 1.915, + "step": 289175 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.000148102070152584, + "loss": 1.9652, + "step": 289180 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.00014810044973016464, + "loss": 2.2082, + "step": 289185 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014809882929131326, + "loss": 1.8267, + "step": 289190 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014809720883603047, + "loss": 2.0796, + "step": 289195 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014809558836431682, + "loss": 2.1846, + "step": 289200 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014809396787617284, + "loss": 1.9883, + "step": 289205 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014809234737159907, + "loss": 1.8953, + "step": 289210 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.0001480907268505961, + "loss": 2.2189, + "step": 289215 + }, + { + "epoch": 0.68, + "grad_norm": 1.9921875, + "learning_rate": 0.00014808910631316442, + "loss": 2.031, + "step": 289220 + }, + { + "epoch": 0.68, + "grad_norm": 1.921875, + "learning_rate": 0.0001480874857593047, + "loss": 1.8659, + "step": 289225 + }, + { + "epoch": 0.68, + "grad_norm": 2.546875, + "learning_rate": 0.00014808586518901738, + "loss": 2.1095, + "step": 289230 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014808424460230305, + "loss": 2.0041, + "step": 289235 + }, + { + "epoch": 0.68, + "grad_norm": 2.453125, + "learning_rate": 0.00014808262399916227, + "loss": 2.1611, + "step": 289240 + }, + { + "epoch": 0.68, + "grad_norm": 2.609375, + "learning_rate": 0.0001480810033795956, + "loss": 2.2393, + "step": 289245 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.0001480793827436036, + "loss": 2.1002, + "step": 289250 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.0001480777620911868, + "loss": 2.1218, + "step": 289255 + }, + { + "epoch": 0.68, + "grad_norm": 2.734375, + "learning_rate": 0.00014807614142234578, + "loss": 1.7466, + "step": 289260 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014807452073708105, + "loss": 1.9956, + "step": 289265 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014807290003539323, + "loss": 1.9415, + "step": 289270 + }, + { + "epoch": 0.68, + "grad_norm": 1.921875, + "learning_rate": 0.0001480712793172828, + "loss": 2.1741, + "step": 289275 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014806965858275037, + "loss": 2.1604, + "step": 289280 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014806803783179646, + "loss": 1.9478, + "step": 289285 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014806641706442163, + "loss": 2.3317, + "step": 289290 + }, + { + "epoch": 0.68, + "grad_norm": 1.96875, + "learning_rate": 0.00014806479628062645, + "loss": 2.1365, + "step": 289295 + }, + { + "epoch": 0.68, + "grad_norm": 1.8203125, + "learning_rate": 0.00014806317548041146, + "loss": 2.0435, + "step": 289300 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014806155466377723, + "loss": 2.0654, + "step": 289305 + }, + { + "epoch": 0.68, + "grad_norm": 2.46875, + "learning_rate": 0.0001480599338307243, + "loss": 1.9876, + "step": 289310 + }, + { + "epoch": 0.68, + "grad_norm": 2.5, + "learning_rate": 0.00014805831298125323, + "loss": 2.3093, + "step": 289315 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014805669211536457, + "loss": 2.2273, + "step": 289320 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.00014805507123305883, + "loss": 2.2142, + "step": 289325 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014805345033433668, + "loss": 1.9817, + "step": 289330 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014805182941919853, + "loss": 2.076, + "step": 289335 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.00014805020848764503, + "loss": 2.1142, + "step": 289340 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014804858753967673, + "loss": 2.0818, + "step": 289345 + }, + { + "epoch": 0.68, + "grad_norm": 2.453125, + "learning_rate": 0.00014804696657529413, + "loss": 2.1564, + "step": 289350 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.0001480453455944978, + "loss": 1.9898, + "step": 289355 + }, + { + "epoch": 0.68, + "grad_norm": 2.703125, + "learning_rate": 0.00014804372459728836, + "loss": 2.1346, + "step": 289360 + }, + { + "epoch": 0.68, + "grad_norm": 1.7578125, + "learning_rate": 0.00014804210358366628, + "loss": 2.1986, + "step": 289365 + }, + { + "epoch": 0.68, + "grad_norm": 1.9921875, + "learning_rate": 0.00014804048255363217, + "loss": 2.0453, + "step": 289370 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014803886150718652, + "loss": 2.0335, + "step": 289375 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.00014803724044432993, + "loss": 1.9132, + "step": 289380 + }, + { + "epoch": 0.68, + "grad_norm": 1.7734375, + "learning_rate": 0.00014803561936506298, + "loss": 1.9926, + "step": 289385 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014803399826938618, + "loss": 2.0849, + "step": 289390 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014803237715730008, + "loss": 2.0981, + "step": 289395 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.00014803075602880524, + "loss": 1.9478, + "step": 289400 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014802913488390227, + "loss": 2.1635, + "step": 289405 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.00014802751372259163, + "loss": 2.1715, + "step": 289410 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014802589254487393, + "loss": 2.0613, + "step": 289415 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014802427135074972, + "loss": 2.0784, + "step": 289420 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014802265014021956, + "loss": 2.017, + "step": 289425 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014802102891328398, + "loss": 2.0443, + "step": 289430 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.00014801940766994353, + "loss": 2.1038, + "step": 289435 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.0001480177864101988, + "loss": 2.021, + "step": 289440 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014801616513405031, + "loss": 2.0931, + "step": 289445 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.00014801454384149863, + "loss": 2.0265, + "step": 289450 + }, + { + "epoch": 0.68, + "grad_norm": 2.46875, + "learning_rate": 0.00014801292253254435, + "loss": 1.8311, + "step": 289455 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014801130120718794, + "loss": 1.9507, + "step": 289460 + }, + { + "epoch": 0.68, + "grad_norm": 2.734375, + "learning_rate": 0.00014800967986543, + "loss": 2.1257, + "step": 289465 + }, + { + "epoch": 0.68, + "grad_norm": 2.703125, + "learning_rate": 0.00014800805850727111, + "loss": 2.163, + "step": 289470 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014800643713271176, + "loss": 2.1355, + "step": 289475 + }, + { + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 0.0001480048157417526, + "loss": 2.2366, + "step": 289480 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014800319433439408, + "loss": 1.9038, + "step": 289485 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.0001480015729106368, + "loss": 2.0795, + "step": 289490 + }, + { + "epoch": 0.68, + "grad_norm": 3.234375, + "learning_rate": 0.0001479999514704813, + "loss": 2.0242, + "step": 289495 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014799833001392817, + "loss": 2.0679, + "step": 289500 + }, + { + "epoch": 0.68, + "grad_norm": 1.9765625, + "learning_rate": 0.00014799670854097793, + "loss": 2.0669, + "step": 289505 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014799508705163114, + "loss": 2.0433, + "step": 289510 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014799346554588834, + "loss": 2.1028, + "step": 289515 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014799184402375016, + "loss": 2.0257, + "step": 289520 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.00014799022248521707, + "loss": 2.016, + "step": 289525 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014798860093028963, + "loss": 2.0972, + "step": 289530 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014798697935896842, + "loss": 1.9544, + "step": 289535 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014798535777125397, + "loss": 2.1681, + "step": 289540 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.0001479837361671469, + "loss": 2.1712, + "step": 289545 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014798211454664767, + "loss": 2.114, + "step": 289550 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.0001479804929097569, + "loss": 2.0499, + "step": 289555 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014797887125647514, + "loss": 2.0455, + "step": 289560 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.0001479772495868029, + "loss": 2.1412, + "step": 289565 + }, + { + "epoch": 0.68, + "grad_norm": 2.671875, + "learning_rate": 0.00014797562790074077, + "loss": 2.058, + "step": 289570 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.0001479740061982893, + "loss": 2.2402, + "step": 289575 + }, + { + "epoch": 0.68, + "grad_norm": 1.9296875, + "learning_rate": 0.00014797238447944902, + "loss": 2.0853, + "step": 289580 + }, + { + "epoch": 0.68, + "grad_norm": 2.671875, + "learning_rate": 0.00014797076274422052, + "loss": 2.1705, + "step": 289585 + }, + { + "epoch": 0.68, + "grad_norm": 2.5, + "learning_rate": 0.00014796914099260433, + "loss": 2.0109, + "step": 289590 + }, + { + "epoch": 0.68, + "grad_norm": 2.671875, + "learning_rate": 0.00014796751922460102, + "loss": 2.2378, + "step": 289595 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014796589744021113, + "loss": 1.9695, + "step": 289600 + }, + { + "epoch": 0.68, + "grad_norm": 1.8828125, + "learning_rate": 0.00014796427563943524, + "loss": 2.0328, + "step": 289605 + }, + { + "epoch": 0.68, + "grad_norm": 1.84375, + "learning_rate": 0.00014796265382227385, + "loss": 2.0862, + "step": 289610 + }, + { + "epoch": 0.68, + "grad_norm": 1.7265625, + "learning_rate": 0.0001479610319887276, + "loss": 1.9318, + "step": 289615 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014795941013879692, + "loss": 2.1722, + "step": 289620 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.0001479577882724825, + "loss": 2.0993, + "step": 289625 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014795616638978477, + "loss": 1.9838, + "step": 289630 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.0001479545444907044, + "loss": 2.0963, + "step": 289635 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.00014795292257524184, + "loss": 1.9779, + "step": 289640 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014795130064339772, + "loss": 2.0123, + "step": 289645 + }, + { + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 0.00014794967869517252, + "loss": 2.1953, + "step": 289650 + }, + { + "epoch": 0.68, + "grad_norm": 1.921875, + "learning_rate": 0.0001479480567305669, + "loss": 1.9833, + "step": 289655 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014794643474958137, + "loss": 2.1847, + "step": 289660 + }, + { + "epoch": 0.68, + "grad_norm": 2.6875, + "learning_rate": 0.00014794481275221642, + "loss": 1.9859, + "step": 289665 + }, + { + "epoch": 0.68, + "grad_norm": 2.640625, + "learning_rate": 0.00014794319073847267, + "loss": 2.0885, + "step": 289670 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014794156870835063, + "loss": 2.0574, + "step": 289675 + }, + { + "epoch": 0.68, + "grad_norm": 2.625, + "learning_rate": 0.0001479399466618509, + "loss": 2.0762, + "step": 289680 + }, + { + "epoch": 0.68, + "grad_norm": 1.8515625, + "learning_rate": 0.00014793832459897402, + "loss": 2.072, + "step": 289685 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014793670251972055, + "loss": 1.9777, + "step": 289690 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.000147935080424091, + "loss": 1.8646, + "step": 289695 + }, + { + "epoch": 0.68, + "grad_norm": 1.9296875, + "learning_rate": 0.000147933458312086, + "loss": 2.0999, + "step": 289700 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.00014793183618370603, + "loss": 1.9364, + "step": 289705 + }, + { + "epoch": 0.68, + "grad_norm": 1.8671875, + "learning_rate": 0.00014793021403895168, + "loss": 2.01, + "step": 289710 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.0001479285918778235, + "loss": 2.0784, + "step": 289715 + }, + { + "epoch": 0.68, + "grad_norm": 1.9140625, + "learning_rate": 0.00014792696970032204, + "loss": 2.024, + "step": 289720 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014792534750644788, + "loss": 1.9636, + "step": 289725 + }, + { + "epoch": 0.68, + "grad_norm": 2.53125, + "learning_rate": 0.00014792372529620154, + "loss": 2.1108, + "step": 289730 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.0001479221030695836, + "loss": 2.2024, + "step": 289735 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014792048082659456, + "loss": 2.0575, + "step": 289740 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014791885856723505, + "loss": 2.0068, + "step": 289745 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014791723629150558, + "loss": 2.103, + "step": 289750 + }, + { + "epoch": 0.68, + "grad_norm": 1.6640625, + "learning_rate": 0.0001479156139994067, + "loss": 2.1814, + "step": 289755 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014791399169093898, + "loss": 1.992, + "step": 289760 + }, + { + "epoch": 0.68, + "grad_norm": 2.5625, + "learning_rate": 0.000147912369366103, + "loss": 2.1038, + "step": 289765 + }, + { + "epoch": 0.68, + "grad_norm": 1.9765625, + "learning_rate": 0.00014791074702489925, + "loss": 1.9707, + "step": 289770 + }, + { + "epoch": 0.68, + "grad_norm": 1.9921875, + "learning_rate": 0.00014790912466732835, + "loss": 2.118, + "step": 289775 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.0001479075022933908, + "loss": 2.1178, + "step": 289780 + }, + { + "epoch": 0.68, + "grad_norm": 1.5546875, + "learning_rate": 0.00014790587990308717, + "loss": 1.8205, + "step": 289785 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.00014790425749641804, + "loss": 2.0708, + "step": 289790 + }, + { + "epoch": 0.68, + "grad_norm": 2.453125, + "learning_rate": 0.00014790263507338396, + "loss": 2.2374, + "step": 289795 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014790101263398547, + "loss": 2.1479, + "step": 289800 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.00014789939017822312, + "loss": 1.9809, + "step": 289805 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014789776770609742, + "loss": 2.1853, + "step": 289810 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014789614521760905, + "loss": 1.988, + "step": 289815 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014789452271275844, + "loss": 2.0071, + "step": 289820 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014789290019154622, + "loss": 2.1485, + "step": 289825 + }, + { + "epoch": 0.68, + "grad_norm": 2.359375, + "learning_rate": 0.0001478912776539729, + "loss": 2.2276, + "step": 289830 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014788965510003903, + "loss": 2.0651, + "step": 289835 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.00014788803252974525, + "loss": 2.1506, + "step": 289840 + }, + { + "epoch": 0.68, + "grad_norm": 2.5, + "learning_rate": 0.000147886409943092, + "loss": 2.032, + "step": 289845 + }, + { + "epoch": 0.68, + "grad_norm": 2.03125, + "learning_rate": 0.0001478847873400799, + "loss": 2.1896, + "step": 289850 + }, + { + "epoch": 0.68, + "grad_norm": 1.9921875, + "learning_rate": 0.00014788316472070948, + "loss": 1.8782, + "step": 289855 + }, + { + "epoch": 0.68, + "grad_norm": 1.9765625, + "learning_rate": 0.00014788154208498128, + "loss": 2.0291, + "step": 289860 + }, + { + "epoch": 0.68, + "grad_norm": 2.484375, + "learning_rate": 0.00014787991943289593, + "loss": 2.0867, + "step": 289865 + }, + { + "epoch": 0.68, + "grad_norm": 1.890625, + "learning_rate": 0.00014787829676445388, + "loss": 2.0127, + "step": 289870 + }, + { + "epoch": 0.68, + "grad_norm": 1.96875, + "learning_rate": 0.00014787667407965575, + "loss": 1.9675, + "step": 289875 + }, + { + "epoch": 0.68, + "grad_norm": 1.875, + "learning_rate": 0.0001478750513785021, + "loss": 2.0402, + "step": 289880 + }, + { + "epoch": 0.68, + "grad_norm": 1.8984375, + "learning_rate": 0.00014787342866099345, + "loss": 2.2714, + "step": 289885 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014787180592713036, + "loss": 2.0531, + "step": 289890 + }, + { + "epoch": 0.68, + "grad_norm": 1.7734375, + "learning_rate": 0.00014787018317691342, + "loss": 1.9605, + "step": 289895 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014786856041034313, + "loss": 2.111, + "step": 289900 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014786693762742007, + "loss": 1.9644, + "step": 289905 + }, + { + "epoch": 0.68, + "grad_norm": 1.9375, + "learning_rate": 0.00014786531482814482, + "loss": 1.94, + "step": 289910 + }, + { + "epoch": 0.68, + "grad_norm": 2.59375, + "learning_rate": 0.0001478636920125179, + "loss": 1.9961, + "step": 289915 + }, + { + "epoch": 0.68, + "grad_norm": 1.7265625, + "learning_rate": 0.00014786206918053987, + "loss": 1.9675, + "step": 289920 + }, + { + "epoch": 0.68, + "grad_norm": 2.578125, + "learning_rate": 0.0001478604463322113, + "loss": 2.066, + "step": 289925 + }, + { + "epoch": 0.68, + "grad_norm": 1.7265625, + "learning_rate": 0.00014785882346753275, + "loss": 1.9302, + "step": 289930 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014785720058650473, + "loss": 2.1209, + "step": 289935 + }, + { + "epoch": 0.68, + "grad_norm": 2.5625, + "learning_rate": 0.00014785557768912778, + "loss": 2.1254, + "step": 289940 + }, + { + "epoch": 0.68, + "grad_norm": 2.03125, + "learning_rate": 0.00014785395477540255, + "loss": 2.1264, + "step": 289945 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014785233184532953, + "loss": 2.1495, + "step": 289950 + }, + { + "epoch": 0.68, + "grad_norm": 2.5, + "learning_rate": 0.00014785070889890932, + "loss": 1.8619, + "step": 289955 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.00014784908593614243, + "loss": 1.8672, + "step": 289960 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014784746295702937, + "loss": 2.139, + "step": 289965 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.00014784583996157078, + "loss": 2.109, + "step": 289970 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.0001478442169497672, + "loss": 2.2069, + "step": 289975 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014784259392161916, + "loss": 1.8893, + "step": 289980 + }, + { + "epoch": 0.68, + "grad_norm": 2.03125, + "learning_rate": 0.0001478409708771272, + "loss": 2.1866, + "step": 289985 + }, + { + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 0.00014783934781629191, + "loss": 2.1302, + "step": 289990 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.00014783772473911383, + "loss": 1.9571, + "step": 289995 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014783610164559354, + "loss": 2.0109, + "step": 290000 + }, + { + "epoch": 0.68, + "grad_norm": 1.875, + "learning_rate": 0.00014783447853573156, + "loss": 2.1529, + "step": 290005 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.00014783285540952842, + "loss": 1.9412, + "step": 290010 + }, + { + "epoch": 0.68, + "grad_norm": 1.90625, + "learning_rate": 0.00014783123226698474, + "loss": 2.1154, + "step": 290015 + }, + { + "epoch": 0.68, + "grad_norm": 2.359375, + "learning_rate": 0.00014782960910810104, + "loss": 2.0621, + "step": 290020 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014782798593287788, + "loss": 1.9615, + "step": 290025 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.00014782636274131582, + "loss": 2.1032, + "step": 290030 + }, + { + "epoch": 0.68, + "grad_norm": 2.625, + "learning_rate": 0.00014782473953341537, + "loss": 2.0525, + "step": 290035 + }, + { + "epoch": 0.68, + "grad_norm": 1.9375, + "learning_rate": 0.00014782311630917716, + "loss": 2.017, + "step": 290040 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.0001478214930686017, + "loss": 2.1274, + "step": 290045 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.00014781986981168954, + "loss": 2.1064, + "step": 290050 + }, + { + "epoch": 0.68, + "grad_norm": 1.9140625, + "learning_rate": 0.00014781824653844122, + "loss": 2.1063, + "step": 290055 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.00014781662324885736, + "loss": 1.9687, + "step": 290060 + }, + { + "epoch": 0.68, + "grad_norm": 2.625, + "learning_rate": 0.00014781499994293845, + "loss": 1.9815, + "step": 290065 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.0001478133766206851, + "loss": 1.9637, + "step": 290070 + }, + { + "epoch": 0.68, + "grad_norm": 2.515625, + "learning_rate": 0.00014781175328209783, + "loss": 2.1889, + "step": 290075 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014781012992717715, + "loss": 2.1351, + "step": 290080 + }, + { + "epoch": 0.68, + "grad_norm": 1.9375, + "learning_rate": 0.0001478085065559237, + "loss": 1.9935, + "step": 290085 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.00014780688316833802, + "loss": 1.9575, + "step": 290090 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014780525976442062, + "loss": 2.0797, + "step": 290095 + }, + { + "epoch": 0.68, + "grad_norm": 2.546875, + "learning_rate": 0.00014780363634417208, + "loss": 2.1107, + "step": 290100 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.0001478020129075929, + "loss": 2.2973, + "step": 290105 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014780038945468374, + "loss": 2.1227, + "step": 290110 + }, + { + "epoch": 0.68, + "grad_norm": 2.484375, + "learning_rate": 0.00014779876598544509, + "loss": 1.8211, + "step": 290115 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.00014779714249987753, + "loss": 2.1783, + "step": 290120 + }, + { + "epoch": 0.68, + "grad_norm": 1.8125, + "learning_rate": 0.00014779551899798157, + "loss": 2.1042, + "step": 290125 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.0001477938954797578, + "loss": 1.9236, + "step": 290130 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014779227194520677, + "loss": 2.1181, + "step": 290135 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014779064839432902, + "loss": 2.2312, + "step": 290140 + }, + { + "epoch": 0.68, + "grad_norm": 2.828125, + "learning_rate": 0.00014778902482712517, + "loss": 2.1128, + "step": 290145 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014778740124359567, + "loss": 2.1782, + "step": 290150 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014778577764374114, + "loss": 2.0381, + "step": 290155 + }, + { + "epoch": 0.68, + "grad_norm": 1.734375, + "learning_rate": 0.00014778415402756214, + "loss": 1.7969, + "step": 290160 + }, + { + "epoch": 0.68, + "grad_norm": 3.796875, + "learning_rate": 0.00014778253039505917, + "loss": 2.1679, + "step": 290165 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.00014778090674623287, + "loss": 2.1413, + "step": 290170 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.0001477792830810837, + "loss": 2.1161, + "step": 290175 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014777765939961228, + "loss": 2.1326, + "step": 290180 + }, + { + "epoch": 0.68, + "grad_norm": 1.96875, + "learning_rate": 0.00014777603570181915, + "loss": 2.1471, + "step": 290185 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014777441198770484, + "loss": 2.1466, + "step": 290190 + }, + { + "epoch": 0.68, + "grad_norm": 1.8359375, + "learning_rate": 0.00014777278825726996, + "loss": 1.8255, + "step": 290195 + }, + { + "epoch": 0.68, + "grad_norm": 2.53125, + "learning_rate": 0.000147771164510515, + "loss": 1.9496, + "step": 290200 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014776954074744057, + "loss": 2.0726, + "step": 290205 + }, + { + "epoch": 0.68, + "grad_norm": 2.453125, + "learning_rate": 0.00014776791696804716, + "loss": 2.1367, + "step": 290210 + }, + { + "epoch": 0.68, + "grad_norm": 1.8203125, + "learning_rate": 0.00014776629317233536, + "loss": 1.9671, + "step": 290215 + }, + { + "epoch": 0.68, + "grad_norm": 2.5, + "learning_rate": 0.00014776466936030578, + "loss": 2.0181, + "step": 290220 + }, + { + "epoch": 0.68, + "grad_norm": 1.890625, + "learning_rate": 0.0001477630455319589, + "loss": 1.9437, + "step": 290225 + }, + { + "epoch": 0.68, + "grad_norm": 2.453125, + "learning_rate": 0.0001477614216872953, + "loss": 1.8935, + "step": 290230 + }, + { + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 0.00014775979782631548, + "loss": 2.0806, + "step": 290235 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.0001477581739490201, + "loss": 2.178, + "step": 290240 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014775655005540964, + "loss": 2.1003, + "step": 290245 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.0001477549261454847, + "loss": 2.0298, + "step": 290250 + }, + { + "epoch": 0.68, + "grad_norm": 2.453125, + "learning_rate": 0.0001477533022192458, + "loss": 1.9888, + "step": 290255 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014775167827669352, + "loss": 1.9965, + "step": 290260 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014775005431782838, + "loss": 2.0493, + "step": 290265 + }, + { + "epoch": 0.68, + "grad_norm": 1.8671875, + "learning_rate": 0.00014774843034265098, + "loss": 2.1263, + "step": 290270 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014774680635116182, + "loss": 2.2496, + "step": 290275 + }, + { + "epoch": 0.68, + "grad_norm": 2.609375, + "learning_rate": 0.0001477451823433615, + "loss": 1.9411, + "step": 290280 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014774355831925054, + "loss": 2.0062, + "step": 290285 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014774193427882955, + "loss": 2.2843, + "step": 290290 + }, + { + "epoch": 0.68, + "grad_norm": 1.890625, + "learning_rate": 0.00014774031022209903, + "loss": 2.1563, + "step": 290295 + }, + { + "epoch": 0.68, + "grad_norm": 2.609375, + "learning_rate": 0.00014773868614905954, + "loss": 2.2372, + "step": 290300 + }, + { + "epoch": 0.68, + "grad_norm": 2.4375, + "learning_rate": 0.00014773706205971167, + "loss": 2.1728, + "step": 290305 + }, + { + "epoch": 0.68, + "grad_norm": 1.9453125, + "learning_rate": 0.00014773543795405596, + "loss": 2.1428, + "step": 290310 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014773381383209293, + "loss": 2.0485, + "step": 290315 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014773218969382318, + "loss": 2.0234, + "step": 290320 + }, + { + "epoch": 0.68, + "grad_norm": 2.03125, + "learning_rate": 0.00014773056553924724, + "loss": 2.0604, + "step": 290325 + }, + { + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 0.00014772894136836568, + "loss": 1.9763, + "step": 290330 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014772731718117906, + "loss": 1.9478, + "step": 290335 + }, + { + "epoch": 0.68, + "grad_norm": 2.03125, + "learning_rate": 0.00014772569297768793, + "loss": 2.0118, + "step": 290340 + }, + { + "epoch": 0.68, + "grad_norm": 2.09375, + "learning_rate": 0.0001477240687578928, + "loss": 2.0703, + "step": 290345 + }, + { + "epoch": 0.68, + "grad_norm": 1.8203125, + "learning_rate": 0.00014772244452179427, + "loss": 2.0019, + "step": 290350 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.0001477208202693929, + "loss": 2.1966, + "step": 290355 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014771919600068924, + "loss": 2.0923, + "step": 290360 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014771757171568386, + "loss": 2.0411, + "step": 290365 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.00014771594741437723, + "loss": 2.0, + "step": 290370 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014771432309677, + "loss": 2.024, + "step": 290375 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.0001477126987628627, + "loss": 1.9914, + "step": 290380 + }, + { + "epoch": 0.68, + "grad_norm": 1.90625, + "learning_rate": 0.00014771107441265588, + "loss": 2.0143, + "step": 290385 + }, + { + "epoch": 0.68, + "grad_norm": 1.9296875, + "learning_rate": 0.00014770945004615008, + "loss": 2.0819, + "step": 290390 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014770782566334587, + "loss": 1.9761, + "step": 290395 + }, + { + "epoch": 0.68, + "grad_norm": 1.9296875, + "learning_rate": 0.00014770620126424377, + "loss": 2.1221, + "step": 290400 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.0001477045768488444, + "loss": 1.997, + "step": 290405 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.00014770295241714826, + "loss": 2.1679, + "step": 290410 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014770132796915596, + "loss": 1.9729, + "step": 290415 + }, + { + "epoch": 0.68, + "grad_norm": 1.984375, + "learning_rate": 0.000147699703504868, + "loss": 2.1617, + "step": 290420 + }, + { + "epoch": 0.68, + "grad_norm": 1.8828125, + "learning_rate": 0.00014769807902428495, + "loss": 2.1339, + "step": 290425 + }, + { + "epoch": 0.68, + "grad_norm": 2.46875, + "learning_rate": 0.0001476964545274074, + "loss": 2.1113, + "step": 290430 + }, + { + "epoch": 0.68, + "grad_norm": 2.515625, + "learning_rate": 0.00014769483001423583, + "loss": 2.1535, + "step": 290435 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.0001476932054847709, + "loss": 2.098, + "step": 290440 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014769158093901305, + "loss": 1.9504, + "step": 290445 + }, + { + "epoch": 0.68, + "grad_norm": 2.609375, + "learning_rate": 0.00014768995637696292, + "loss": 1.9791, + "step": 290450 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.00014768833179862103, + "loss": 2.0718, + "step": 290455 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014768670720398795, + "loss": 2.0795, + "step": 290460 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.0001476850825930642, + "loss": 2.107, + "step": 290465 + }, + { + "epoch": 0.68, + "grad_norm": 1.7734375, + "learning_rate": 0.00014768345796585037, + "loss": 2.0554, + "step": 290470 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.000147681833322347, + "loss": 2.1674, + "step": 290475 + }, + { + "epoch": 0.68, + "grad_norm": 2.78125, + "learning_rate": 0.0001476802086625547, + "loss": 2.1337, + "step": 290480 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014767858398647392, + "loss": 1.9264, + "step": 290485 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.0001476769592941053, + "loss": 2.1091, + "step": 290490 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.00014767533458544935, + "loss": 2.0194, + "step": 290495 + }, + { + "epoch": 0.68, + "grad_norm": 2.6875, + "learning_rate": 0.0001476737098605067, + "loss": 2.0567, + "step": 290500 + }, + { + "epoch": 0.68, + "grad_norm": 2.5625, + "learning_rate": 0.00014767208511927776, + "loss": 2.0105, + "step": 290505 + }, + { + "epoch": 0.68, + "grad_norm": 2.484375, + "learning_rate": 0.0001476704603617632, + "loss": 2.0617, + "step": 290510 + }, + { + "epoch": 0.68, + "grad_norm": 1.921875, + "learning_rate": 0.00014766883558796354, + "loss": 2.0237, + "step": 290515 + }, + { + "epoch": 0.68, + "grad_norm": 2.03125, + "learning_rate": 0.00014766721079787938, + "loss": 1.7857, + "step": 290520 + }, + { + "epoch": 0.68, + "grad_norm": 2.46875, + "learning_rate": 0.0001476655859915112, + "loss": 2.0672, + "step": 290525 + }, + { + "epoch": 0.68, + "grad_norm": 2.515625, + "learning_rate": 0.0001476639611688596, + "loss": 2.1913, + "step": 290530 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014766233632992513, + "loss": 2.2452, + "step": 290535 + }, + { + "epoch": 0.68, + "grad_norm": 2.5625, + "learning_rate": 0.00014766071147470834, + "loss": 1.9204, + "step": 290540 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014765908660320978, + "loss": 2.1318, + "step": 290545 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014765746171543003, + "loss": 2.1899, + "step": 290550 + }, + { + "epoch": 0.68, + "grad_norm": 1.9375, + "learning_rate": 0.0001476558368113696, + "loss": 2.0291, + "step": 290555 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014765421189102908, + "loss": 2.0983, + "step": 290560 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014765258695440906, + "loss": 2.0438, + "step": 290565 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014765096200150998, + "loss": 2.0543, + "step": 290570 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.0001476493370323325, + "loss": 2.1205, + "step": 290575 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014764771204687715, + "loss": 2.0361, + "step": 290580 + }, + { + "epoch": 0.68, + "grad_norm": 2.828125, + "learning_rate": 0.00014764608704514448, + "loss": 1.9713, + "step": 290585 + }, + { + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 0.00014764446202713503, + "loss": 2.0454, + "step": 290590 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014764283699284937, + "loss": 2.165, + "step": 290595 + }, + { + "epoch": 0.68, + "grad_norm": 2.78125, + "learning_rate": 0.00014764121194228805, + "loss": 2.1788, + "step": 290600 + }, + { + "epoch": 0.68, + "grad_norm": 2.53125, + "learning_rate": 0.00014763958687545167, + "loss": 2.1036, + "step": 290605 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014763796179234067, + "loss": 1.8695, + "step": 290610 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014763633669295572, + "loss": 2.044, + "step": 290615 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014763471157729733, + "loss": 2.0265, + "step": 290620 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.00014763308644536607, + "loss": 2.1693, + "step": 290625 + }, + { + "epoch": 0.68, + "grad_norm": 2.359375, + "learning_rate": 0.00014763146129716245, + "loss": 1.8268, + "step": 290630 + }, + { + "epoch": 0.68, + "grad_norm": 3.34375, + "learning_rate": 0.0001476298361326871, + "loss": 1.9716, + "step": 290635 + }, + { + "epoch": 0.68, + "grad_norm": 2.359375, + "learning_rate": 0.0001476282109519405, + "loss": 2.0728, + "step": 290640 + }, + { + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 0.00014762658575492326, + "loss": 2.0394, + "step": 290645 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.0001476249605416359, + "loss": 2.0894, + "step": 290650 + }, + { + "epoch": 0.68, + "grad_norm": 2.453125, + "learning_rate": 0.00014762333531207899, + "loss": 2.0031, + "step": 290655 + }, + { + "epoch": 0.68, + "grad_norm": 1.7578125, + "learning_rate": 0.00014762171006625313, + "loss": 1.9392, + "step": 290660 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 0.00014762008480415876, + "loss": 2.1078, + "step": 290665 + }, + { + "epoch": 0.68, + "grad_norm": 2.625, + "learning_rate": 0.00014761845952579653, + "loss": 2.1169, + "step": 290670 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.00014761683423116697, + "loss": 2.1176, + "step": 290675 + }, + { + "epoch": 0.68, + "grad_norm": 1.7890625, + "learning_rate": 0.00014761520892027068, + "loss": 1.8721, + "step": 290680 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014761358359310814, + "loss": 1.9661, + "step": 290685 + }, + { + "epoch": 0.68, + "grad_norm": 1.921875, + "learning_rate": 0.0001476119582496799, + "loss": 2.1672, + "step": 290690 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014761033288998658, + "loss": 2.1266, + "step": 290695 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014760870751402873, + "loss": 1.9789, + "step": 290700 + }, + { + "epoch": 0.68, + "grad_norm": 2.703125, + "learning_rate": 0.00014760708212180687, + "loss": 1.9439, + "step": 290705 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014760545671332155, + "loss": 2.0445, + "step": 290710 + }, + { + "epoch": 0.68, + "grad_norm": 2.25, + "learning_rate": 0.00014760383128857335, + "loss": 2.1246, + "step": 290715 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014760220584756278, + "loss": 2.0576, + "step": 290720 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014760058039029053, + "loss": 2.0773, + "step": 290725 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.000147598954916757, + "loss": 2.0268, + "step": 290730 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.0001475973294269628, + "loss": 1.8506, + "step": 290735 + }, + { + "epoch": 0.68, + "grad_norm": 1.703125, + "learning_rate": 0.0001475957039209085, + "loss": 2.0985, + "step": 290740 + }, + { + "epoch": 0.68, + "grad_norm": 2.515625, + "learning_rate": 0.00014759407839859463, + "loss": 2.1325, + "step": 290745 + }, + { + "epoch": 0.68, + "grad_norm": 1.9375, + "learning_rate": 0.00014759245286002177, + "loss": 1.9736, + "step": 290750 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014759082730519046, + "loss": 2.0876, + "step": 290755 + }, + { + "epoch": 0.68, + "grad_norm": 1.90625, + "learning_rate": 0.00014758920173410127, + "loss": 1.9171, + "step": 290760 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.0001475875761467547, + "loss": 2.1952, + "step": 290765 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.0001475859505431514, + "loss": 2.1153, + "step": 290770 + }, + { + "epoch": 0.68, + "grad_norm": 1.7421875, + "learning_rate": 0.0001475843249232919, + "loss": 2.1364, + "step": 290775 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014758269928717666, + "loss": 2.0794, + "step": 290780 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014758107363480635, + "loss": 2.0686, + "step": 290785 + }, + { + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 0.0001475794479661815, + "loss": 2.139, + "step": 290790 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014757782228130265, + "loss": 2.1372, + "step": 290795 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.0001475761965801703, + "loss": 1.9307, + "step": 290800 + }, + { + "epoch": 0.68, + "grad_norm": 1.8828125, + "learning_rate": 0.00014757457086278508, + "loss": 1.9966, + "step": 290805 + }, + { + "epoch": 0.68, + "grad_norm": 3.421875, + "learning_rate": 0.00014757294512914753, + "loss": 2.1019, + "step": 290810 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.00014757131937925822, + "loss": 2.0595, + "step": 290815 + }, + { + "epoch": 0.68, + "grad_norm": 2.296875, + "learning_rate": 0.00014756969361311766, + "loss": 2.0393, + "step": 290820 + }, + { + "epoch": 0.68, + "grad_norm": 2.34375, + "learning_rate": 0.0001475680678307264, + "loss": 2.1918, + "step": 290825 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014756644203208508, + "loss": 2.1909, + "step": 290830 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.0001475648162171942, + "loss": 2.0191, + "step": 290835 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014756319038605428, + "loss": 2.0719, + "step": 290840 + }, + { + "epoch": 0.68, + "grad_norm": 2.859375, + "learning_rate": 0.00014756156453866592, + "loss": 2.0012, + "step": 290845 + }, + { + "epoch": 0.68, + "grad_norm": 2.203125, + "learning_rate": 0.00014755993867502967, + "loss": 2.2247, + "step": 290850 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.0001475583127951461, + "loss": 2.2957, + "step": 290855 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014755668689901574, + "loss": 1.9757, + "step": 290860 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.00014755506098663914, + "loss": 2.0426, + "step": 290865 + }, + { + "epoch": 0.68, + "grad_norm": 1.8671875, + "learning_rate": 0.00014755343505801688, + "loss": 1.9824, + "step": 290870 + }, + { + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 0.0001475518091131495, + "loss": 2.0069, + "step": 290875 + }, + { + "epoch": 0.68, + "grad_norm": 1.9453125, + "learning_rate": 0.00014755018315203757, + "loss": 2.1486, + "step": 290880 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014754855717468161, + "loss": 2.2583, + "step": 290885 + }, + { + "epoch": 0.68, + "grad_norm": 2.390625, + "learning_rate": 0.00014754693118108222, + "loss": 2.1297, + "step": 290890 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.00014754530517123993, + "loss": 2.3206, + "step": 290895 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.0001475436791451553, + "loss": 1.9638, + "step": 290900 + }, + { + "epoch": 0.68, + "grad_norm": 1.9296875, + "learning_rate": 0.0001475420531028289, + "loss": 2.1254, + "step": 290905 + }, + { + "epoch": 0.68, + "grad_norm": 2.234375, + "learning_rate": 0.00014754042704426124, + "loss": 2.0826, + "step": 290910 + }, + { + "epoch": 0.68, + "grad_norm": 2.109375, + "learning_rate": 0.00014753880096945294, + "loss": 2.1241, + "step": 290915 + }, + { + "epoch": 0.68, + "grad_norm": 2.46875, + "learning_rate": 0.00014753717487840453, + "loss": 1.8282, + "step": 290920 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014753554877111654, + "loss": 1.7986, + "step": 290925 + }, + { + "epoch": 0.68, + "grad_norm": 2.0625, + "learning_rate": 0.00014753392264758955, + "loss": 1.898, + "step": 290930 + }, + { + "epoch": 0.68, + "grad_norm": 1.5546875, + "learning_rate": 0.0001475322965078241, + "loss": 1.8202, + "step": 290935 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.00014753067035182076, + "loss": 2.2058, + "step": 290940 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.00014752904417958008, + "loss": 2.1073, + "step": 290945 + }, + { + "epoch": 0.68, + "grad_norm": 2.421875, + "learning_rate": 0.00014752741799110264, + "loss": 2.1171, + "step": 290950 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014752579178638897, + "loss": 1.9546, + "step": 290955 + }, + { + "epoch": 0.68, + "grad_norm": 2.0, + "learning_rate": 0.00014752416556543957, + "loss": 2.0803, + "step": 290960 + }, + { + "epoch": 0.68, + "grad_norm": 1.953125, + "learning_rate": 0.0001475225393282551, + "loss": 1.9398, + "step": 290965 + }, + { + "epoch": 0.68, + "grad_norm": 2.40625, + "learning_rate": 0.00014752091307483606, + "loss": 2.1438, + "step": 290970 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014751928680518305, + "loss": 2.0844, + "step": 290975 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014751766051929654, + "loss": 2.0167, + "step": 290980 + }, + { + "epoch": 0.68, + "grad_norm": 2.5, + "learning_rate": 0.00014751603421717715, + "loss": 2.1149, + "step": 290985 + }, + { + "epoch": 0.68, + "grad_norm": 2.21875, + "learning_rate": 0.0001475144078988254, + "loss": 1.8893, + "step": 290990 + }, + { + "epoch": 0.68, + "grad_norm": 2.078125, + "learning_rate": 0.0001475127815642419, + "loss": 2.0283, + "step": 290995 + }, + { + "epoch": 0.68, + "grad_norm": 2.65625, + "learning_rate": 0.00014751115521342717, + "loss": 2.0474, + "step": 291000 + }, + { + "epoch": 0.68, + "grad_norm": 2.125, + "learning_rate": 0.00014750952884638177, + "loss": 2.19, + "step": 291005 + }, + { + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 0.00014750790246310623, + "loss": 2.0297, + "step": 291010 + }, + { + "epoch": 0.68, + "grad_norm": 2.984375, + "learning_rate": 0.00014750627606360112, + "loss": 2.1464, + "step": 291015 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014750464964786707, + "loss": 1.9837, + "step": 291020 + }, + { + "epoch": 0.68, + "grad_norm": 2.1875, + "learning_rate": 0.00014750302321590451, + "loss": 1.9082, + "step": 291025 + }, + { + "epoch": 0.68, + "grad_norm": 2.046875, + "learning_rate": 0.00014750139676771408, + "loss": 2.1552, + "step": 291030 + }, + { + "epoch": 0.68, + "grad_norm": 1.8984375, + "learning_rate": 0.0001474997703032963, + "loss": 1.9556, + "step": 291035 + }, + { + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 0.00014749814382265173, + "loss": 2.0143, + "step": 291040 + }, + { + "epoch": 0.68, + "grad_norm": 2.28125, + "learning_rate": 0.00014749651732578095, + "loss": 1.9868, + "step": 291045 + }, + { + "epoch": 0.68, + "grad_norm": 1.7734375, + "learning_rate": 0.0001474948908126845, + "loss": 1.9234, + "step": 291050 + }, + { + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 0.0001474932642833629, + "loss": 2.0811, + "step": 291055 + }, + { + "epoch": 0.68, + "grad_norm": 2.359375, + "learning_rate": 0.00014749163773781677, + "loss": 2.1942, + "step": 291060 + }, + { + "epoch": 0.68, + "grad_norm": 2.140625, + "learning_rate": 0.00014749001117604665, + "loss": 2.0454, + "step": 291065 + }, + { + "epoch": 0.68, + "grad_norm": 2.515625, + "learning_rate": 0.00014748838459805307, + "loss": 2.0156, + "step": 291070 + }, + { + "epoch": 0.68, + "grad_norm": 2.03125, + "learning_rate": 0.00014748675800383657, + "loss": 2.1289, + "step": 291075 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.00014748513139339775, + "loss": 2.0703, + "step": 291080 + }, + { + "epoch": 0.69, + "grad_norm": 3.0625, + "learning_rate": 0.00014748350476673713, + "loss": 2.0508, + "step": 291085 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014748187812385532, + "loss": 2.1226, + "step": 291090 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014748025146475282, + "loss": 2.1159, + "step": 291095 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.0001474786247894302, + "loss": 2.0498, + "step": 291100 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.000147476998097888, + "loss": 1.9467, + "step": 291105 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014747537139012687, + "loss": 1.9193, + "step": 291110 + }, + { + "epoch": 0.69, + "grad_norm": 1.96875, + "learning_rate": 0.00014747374466614723, + "loss": 2.0895, + "step": 291115 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.0001474721179259497, + "loss": 2.083, + "step": 291120 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.00014747049116953484, + "loss": 2.0368, + "step": 291125 + }, + { + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 0.0001474688643969032, + "loss": 1.9404, + "step": 291130 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014746723760805536, + "loss": 2.0913, + "step": 291135 + }, + { + "epoch": 0.69, + "grad_norm": 1.7890625, + "learning_rate": 0.00014746561080299184, + "loss": 1.8342, + "step": 291140 + }, + { + "epoch": 0.69, + "grad_norm": 2.453125, + "learning_rate": 0.0001474639839817132, + "loss": 2.0361, + "step": 291145 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014746235714421997, + "loss": 1.9887, + "step": 291150 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.0001474607302905128, + "loss": 2.1351, + "step": 291155 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014745910342059214, + "loss": 1.9967, + "step": 291160 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.0001474574765344586, + "loss": 2.2381, + "step": 291165 + }, + { + "epoch": 0.69, + "grad_norm": 2.453125, + "learning_rate": 0.00014745584963211274, + "loss": 2.0748, + "step": 291170 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014745422271355508, + "loss": 2.0227, + "step": 291175 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.0001474525957787862, + "loss": 2.2086, + "step": 291180 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014745096882780664, + "loss": 2.254, + "step": 291185 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.000147449341860617, + "loss": 2.1756, + "step": 291190 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.0001474477148772178, + "loss": 2.1692, + "step": 291195 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.0001474460878776096, + "loss": 1.9198, + "step": 291200 + }, + { + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 0.00014744446086179292, + "loss": 1.9515, + "step": 291205 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014744283382976838, + "loss": 1.8736, + "step": 291210 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.0001474412067815365, + "loss": 1.9426, + "step": 291215 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.00014743957971709786, + "loss": 2.1883, + "step": 291220 + }, + { + "epoch": 0.69, + "grad_norm": 2.53125, + "learning_rate": 0.000147437952636453, + "loss": 2.1136, + "step": 291225 + }, + { + "epoch": 0.69, + "grad_norm": 2.703125, + "learning_rate": 0.00014743632553960244, + "loss": 2.0747, + "step": 291230 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.0001474346984265468, + "loss": 2.0396, + "step": 291235 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014743307129728658, + "loss": 2.0595, + "step": 291240 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.0001474314441518224, + "loss": 2.0465, + "step": 291245 + }, + { + "epoch": 0.69, + "grad_norm": 2.578125, + "learning_rate": 0.00014742981699015476, + "loss": 1.955, + "step": 291250 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.00014742818981228423, + "loss": 2.128, + "step": 291255 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014742656261821139, + "loss": 1.943, + "step": 291260 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014742493540793677, + "loss": 1.9821, + "step": 291265 + }, + { + "epoch": 0.69, + "grad_norm": 2.0, + "learning_rate": 0.00014742330818146092, + "loss": 2.0736, + "step": 291270 + }, + { + "epoch": 0.69, + "grad_norm": 1.6484375, + "learning_rate": 0.0001474216809387844, + "loss": 1.9589, + "step": 291275 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.0001474200536799078, + "loss": 2.1766, + "step": 291280 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014741842640483163, + "loss": 2.1239, + "step": 291285 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014741679911355648, + "loss": 2.163, + "step": 291290 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.0001474151718060829, + "loss": 2.0595, + "step": 291295 + }, + { + "epoch": 0.69, + "grad_norm": 2.53125, + "learning_rate": 0.0001474135444824114, + "loss": 1.9926, + "step": 291300 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.0001474119171425426, + "loss": 2.03, + "step": 291305 + }, + { + "epoch": 0.69, + "grad_norm": 2.453125, + "learning_rate": 0.000147410289786477, + "loss": 2.2281, + "step": 291310 + }, + { + "epoch": 0.69, + "grad_norm": 2.71875, + "learning_rate": 0.00014740866241421522, + "loss": 2.0982, + "step": 291315 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.00014740703502575777, + "loss": 2.0449, + "step": 291320 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014740540762110522, + "loss": 1.9843, + "step": 291325 + }, + { + "epoch": 0.69, + "grad_norm": 1.6640625, + "learning_rate": 0.0001474037802002581, + "loss": 1.8693, + "step": 291330 + }, + { + "epoch": 0.69, + "grad_norm": 2.75, + "learning_rate": 0.00014740215276321702, + "loss": 2.2938, + "step": 291335 + }, + { + "epoch": 0.69, + "grad_norm": 1.8359375, + "learning_rate": 0.0001474005253099825, + "loss": 1.8911, + "step": 291340 + }, + { + "epoch": 0.69, + "grad_norm": 1.90625, + "learning_rate": 0.0001473988978405551, + "loss": 1.9358, + "step": 291345 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014739727035493536, + "loss": 2.1203, + "step": 291350 + }, + { + "epoch": 0.69, + "grad_norm": 2.703125, + "learning_rate": 0.00014739564285312385, + "loss": 2.0754, + "step": 291355 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014739401533512116, + "loss": 2.1326, + "step": 291360 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014739238780092778, + "loss": 2.0637, + "step": 291365 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014739076025054433, + "loss": 2.1323, + "step": 291370 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.0001473891326839713, + "loss": 2.2153, + "step": 291375 + }, + { + "epoch": 0.69, + "grad_norm": 1.890625, + "learning_rate": 0.00014738750510120933, + "loss": 1.8468, + "step": 291380 + }, + { + "epoch": 0.69, + "grad_norm": 3.578125, + "learning_rate": 0.0001473858775022589, + "loss": 2.0204, + "step": 291385 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.0001473842498871206, + "loss": 2.0071, + "step": 291390 + }, + { + "epoch": 0.69, + "grad_norm": 1.8125, + "learning_rate": 0.00014738262225579498, + "loss": 2.1125, + "step": 291395 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.0001473809946082826, + "loss": 1.8938, + "step": 291400 + }, + { + "epoch": 0.69, + "grad_norm": 1.9140625, + "learning_rate": 0.00014737936694458404, + "loss": 1.9864, + "step": 291405 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.0001473777392646998, + "loss": 2.2015, + "step": 291410 + }, + { + "epoch": 0.69, + "grad_norm": 1.9453125, + "learning_rate": 0.00014737611156863043, + "loss": 1.9981, + "step": 291415 + }, + { + "epoch": 0.69, + "grad_norm": 1.640625, + "learning_rate": 0.00014737448385637654, + "loss": 1.7972, + "step": 291420 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.0001473728561279387, + "loss": 2.1641, + "step": 291425 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014737122838331745, + "loss": 2.2098, + "step": 291430 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.0001473696006225133, + "loss": 2.1111, + "step": 291435 + }, + { + "epoch": 0.69, + "grad_norm": 2.390625, + "learning_rate": 0.0001473679728455268, + "loss": 2.0767, + "step": 291440 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014736634505235858, + "loss": 2.1855, + "step": 291445 + }, + { + "epoch": 0.69, + "grad_norm": 2.796875, + "learning_rate": 0.0001473647172430092, + "loss": 2.0139, + "step": 291450 + }, + { + "epoch": 0.69, + "grad_norm": 2.546875, + "learning_rate": 0.00014736308941747908, + "loss": 2.132, + "step": 291455 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014736146157576894, + "loss": 2.2099, + "step": 291460 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014735983371787922, + "loss": 1.9548, + "step": 291465 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014735820584381055, + "loss": 2.1045, + "step": 291470 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014735657795356344, + "loss": 1.9745, + "step": 291475 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014735495004713847, + "loss": 2.1109, + "step": 291480 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.0001473533221245362, + "loss": 2.1091, + "step": 291485 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014735169418575716, + "loss": 2.2913, + "step": 291490 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014735006623080193, + "loss": 2.0047, + "step": 291495 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.0001473484382596711, + "loss": 2.136, + "step": 291500 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014734681027236512, + "loss": 2.0114, + "step": 291505 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014734518226888462, + "loss": 1.9485, + "step": 291510 + }, + { + "epoch": 0.69, + "grad_norm": 2.5, + "learning_rate": 0.0001473435542492302, + "loss": 2.1374, + "step": 291515 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014734192621340233, + "loss": 2.0593, + "step": 291520 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014734029816140158, + "loss": 2.1205, + "step": 291525 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.00014733867009322852, + "loss": 2.0648, + "step": 291530 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014733704200888375, + "loss": 2.0227, + "step": 291535 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014733541390836777, + "loss": 2.0113, + "step": 291540 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.0001473337857916812, + "loss": 2.1411, + "step": 291545 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014733215765882446, + "loss": 2.0993, + "step": 291550 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014733052950979825, + "loss": 2.1166, + "step": 291555 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.00014732890134460308, + "loss": 2.2276, + "step": 291560 + }, + { + "epoch": 0.69, + "grad_norm": 1.9375, + "learning_rate": 0.0001473272731632395, + "loss": 2.1236, + "step": 291565 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014732564496570803, + "loss": 2.2373, + "step": 291570 + }, + { + "epoch": 0.69, + "grad_norm": 2.828125, + "learning_rate": 0.00014732401675200926, + "loss": 1.9864, + "step": 291575 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.00014732238852214376, + "loss": 2.0033, + "step": 291580 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014732076027611213, + "loss": 2.1013, + "step": 291585 + }, + { + "epoch": 0.69, + "grad_norm": 1.546875, + "learning_rate": 0.0001473191320139148, + "loss": 2.0247, + "step": 291590 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014731750373555243, + "loss": 2.1009, + "step": 291595 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.00014731587544102555, + "loss": 2.0769, + "step": 291600 + }, + { + "epoch": 0.69, + "grad_norm": 2.921875, + "learning_rate": 0.00014731424713033465, + "loss": 2.0795, + "step": 291605 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014731261880348038, + "loss": 1.9951, + "step": 291610 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.0001473109904604633, + "loss": 2.0679, + "step": 291615 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.0001473093621012839, + "loss": 1.9908, + "step": 291620 + }, + { + "epoch": 0.69, + "grad_norm": 2.390625, + "learning_rate": 0.00014730773372594275, + "loss": 2.0596, + "step": 291625 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014730610533444043, + "loss": 2.1011, + "step": 291630 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014730447692677746, + "loss": 1.9171, + "step": 291635 + }, + { + "epoch": 0.69, + "grad_norm": 1.984375, + "learning_rate": 0.00014730284850295446, + "loss": 1.9372, + "step": 291640 + }, + { + "epoch": 0.69, + "grad_norm": 2.828125, + "learning_rate": 0.00014730122006297193, + "loss": 1.9045, + "step": 291645 + }, + { + "epoch": 0.69, + "grad_norm": 2.421875, + "learning_rate": 0.00014729959160683046, + "loss": 2.199, + "step": 291650 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.00014729796313453058, + "loss": 2.088, + "step": 291655 + }, + { + "epoch": 0.69, + "grad_norm": 2.609375, + "learning_rate": 0.00014729633464607286, + "loss": 2.1017, + "step": 291660 + }, + { + "epoch": 0.69, + "grad_norm": 1.9140625, + "learning_rate": 0.00014729470614145785, + "loss": 2.1463, + "step": 291665 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014729307762068614, + "loss": 2.0956, + "step": 291670 + }, + { + "epoch": 0.69, + "grad_norm": 2.5, + "learning_rate": 0.00014729144908375821, + "loss": 1.873, + "step": 291675 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.00014728982053067473, + "loss": 2.2226, + "step": 291680 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.00014728819196143612, + "loss": 2.1163, + "step": 291685 + }, + { + "epoch": 0.69, + "grad_norm": 1.9375, + "learning_rate": 0.00014728656337604303, + "loss": 2.0468, + "step": 291690 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.000147284934774496, + "loss": 2.0848, + "step": 291695 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014728330615679559, + "loss": 2.0496, + "step": 291700 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014728167752294232, + "loss": 1.9736, + "step": 291705 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.00014728004887293677, + "loss": 2.0643, + "step": 291710 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.0001472784202067795, + "loss": 2.1175, + "step": 291715 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.00014727679152447108, + "loss": 2.1751, + "step": 291720 + }, + { + "epoch": 0.69, + "grad_norm": 2.5625, + "learning_rate": 0.00014727516282601205, + "loss": 2.0728, + "step": 291725 + }, + { + "epoch": 0.69, + "grad_norm": 1.7578125, + "learning_rate": 0.00014727353411140296, + "loss": 2.1559, + "step": 291730 + }, + { + "epoch": 0.69, + "grad_norm": 2.71875, + "learning_rate": 0.00014727190538064438, + "loss": 2.3552, + "step": 291735 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014727027663373685, + "loss": 2.1101, + "step": 291740 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014726864787068095, + "loss": 2.1794, + "step": 291745 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.0001472670190914772, + "loss": 2.2333, + "step": 291750 + }, + { + "epoch": 0.69, + "grad_norm": 1.90625, + "learning_rate": 0.0001472653902961262, + "loss": 2.1094, + "step": 291755 + }, + { + "epoch": 0.69, + "grad_norm": 2.0, + "learning_rate": 0.0001472637614846285, + "loss": 2.0824, + "step": 291760 + }, + { + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 0.0001472621326569846, + "loss": 1.9375, + "step": 291765 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014726050381319513, + "loss": 1.9747, + "step": 291770 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.00014725887495326058, + "loss": 2.1584, + "step": 291775 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014725724607718156, + "loss": 2.0716, + "step": 291780 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014725561718495863, + "loss": 1.8858, + "step": 291785 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014725398827659228, + "loss": 2.2853, + "step": 291790 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.00014725235935208314, + "loss": 2.1884, + "step": 291795 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014725073041143172, + "loss": 1.9967, + "step": 291800 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014724910145463862, + "loss": 2.3649, + "step": 291805 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014724747248170435, + "loss": 1.9294, + "step": 291810 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.0001472458434926295, + "loss": 2.0284, + "step": 291815 + }, + { + "epoch": 0.69, + "grad_norm": 1.9375, + "learning_rate": 0.0001472442144874146, + "loss": 1.9786, + "step": 291820 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014724258546606022, + "loss": 2.0111, + "step": 291825 + }, + { + "epoch": 0.69, + "grad_norm": 2.390625, + "learning_rate": 0.00014724095642856694, + "loss": 2.0194, + "step": 291830 + }, + { + "epoch": 0.69, + "grad_norm": 2.421875, + "learning_rate": 0.00014723932737493526, + "loss": 2.114, + "step": 291835 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.0001472376983051658, + "loss": 2.2148, + "step": 291840 + }, + { + "epoch": 0.69, + "grad_norm": 1.9609375, + "learning_rate": 0.00014723606921925907, + "loss": 2.1582, + "step": 291845 + }, + { + "epoch": 0.69, + "grad_norm": 1.8984375, + "learning_rate": 0.00014723444011721564, + "loss": 1.9124, + "step": 291850 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.00014723281099903606, + "loss": 2.1378, + "step": 291855 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.0001472311818647209, + "loss": 2.019, + "step": 291860 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014722955271427075, + "loss": 2.2297, + "step": 291865 + }, + { + "epoch": 0.69, + "grad_norm": 2.625, + "learning_rate": 0.00014722792354768607, + "loss": 1.9924, + "step": 291870 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.0001472262943649675, + "loss": 2.0195, + "step": 291875 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.0001472246651661156, + "loss": 2.069, + "step": 291880 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014722303595113086, + "loss": 2.1195, + "step": 291885 + }, + { + "epoch": 0.69, + "grad_norm": 2.53125, + "learning_rate": 0.00014722140672001387, + "loss": 1.9247, + "step": 291890 + }, + { + "epoch": 0.69, + "grad_norm": 2.703125, + "learning_rate": 0.0001472197774727652, + "loss": 2.0338, + "step": 291895 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014721814820938537, + "loss": 2.0654, + "step": 291900 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014721651892987502, + "loss": 2.1753, + "step": 291905 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014721488963423464, + "loss": 2.2191, + "step": 291910 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.0001472132603224648, + "loss": 2.2043, + "step": 291915 + }, + { + "epoch": 0.69, + "grad_norm": 2.53125, + "learning_rate": 0.00014721163099456602, + "loss": 2.1386, + "step": 291920 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.0001472100016505389, + "loss": 1.9045, + "step": 291925 + }, + { + "epoch": 0.69, + "grad_norm": 2.5625, + "learning_rate": 0.000147208372290384, + "loss": 2.1457, + "step": 291930 + }, + { + "epoch": 0.69, + "grad_norm": 2.640625, + "learning_rate": 0.00014720674291410186, + "loss": 2.0055, + "step": 291935 + }, + { + "epoch": 0.69, + "grad_norm": 1.9296875, + "learning_rate": 0.00014720511352169305, + "loss": 2.3818, + "step": 291940 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.0001472034841131581, + "loss": 2.14, + "step": 291945 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.0001472018546884976, + "loss": 1.9401, + "step": 291950 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014720022524771206, + "loss": 2.1382, + "step": 291955 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.0001471985957908021, + "loss": 2.0124, + "step": 291960 + }, + { + "epoch": 0.69, + "grad_norm": 1.984375, + "learning_rate": 0.00014719696631776823, + "loss": 2.0346, + "step": 291965 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.000147195336828611, + "loss": 2.1541, + "step": 291970 + }, + { + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 0.00014719370732333106, + "loss": 2.1333, + "step": 291975 + }, + { + "epoch": 0.69, + "grad_norm": 2.5625, + "learning_rate": 0.0001471920778019288, + "loss": 2.1391, + "step": 291980 + }, + { + "epoch": 0.69, + "grad_norm": 2.515625, + "learning_rate": 0.00014719044826440493, + "loss": 2.049, + "step": 291985 + }, + { + "epoch": 0.69, + "grad_norm": 1.9453125, + "learning_rate": 0.00014718881871075994, + "loss": 2.0646, + "step": 291990 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.00014718718914099438, + "loss": 2.0675, + "step": 291995 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.00014718555955510885, + "loss": 2.1418, + "step": 292000 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.0001471839299531038, + "loss": 1.8898, + "step": 292005 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.00014718230033497994, + "loss": 1.9219, + "step": 292010 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014718067070073772, + "loss": 2.0619, + "step": 292015 + }, + { + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 0.00014717904105037777, + "loss": 2.011, + "step": 292020 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014717741138390054, + "loss": 2.209, + "step": 292025 + }, + { + "epoch": 0.69, + "grad_norm": 2.515625, + "learning_rate": 0.00014717578170130667, + "loss": 2.1159, + "step": 292030 + }, + { + "epoch": 0.69, + "grad_norm": 2.53125, + "learning_rate": 0.0001471741520025967, + "loss": 2.0276, + "step": 292035 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.0001471725222877712, + "loss": 2.1565, + "step": 292040 + }, + { + "epoch": 0.69, + "grad_norm": 1.8984375, + "learning_rate": 0.0001471708925568307, + "loss": 1.8948, + "step": 292045 + }, + { + "epoch": 0.69, + "grad_norm": 2.515625, + "learning_rate": 0.00014716926280977579, + "loss": 2.151, + "step": 292050 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014716763304660697, + "loss": 2.057, + "step": 292055 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014716600326732484, + "loss": 1.9094, + "step": 292060 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014716437347192995, + "loss": 2.0566, + "step": 292065 + }, + { + "epoch": 0.69, + "grad_norm": 1.90625, + "learning_rate": 0.00014716274366042287, + "loss": 2.0064, + "step": 292070 + }, + { + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 0.0001471611138328041, + "loss": 2.2413, + "step": 292075 + }, + { + "epoch": 0.69, + "grad_norm": 2.0, + "learning_rate": 0.0001471594839890743, + "loss": 2.0359, + "step": 292080 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014715785412923392, + "loss": 1.9864, + "step": 292085 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014715622425328357, + "loss": 2.1607, + "step": 292090 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.0001471545943612238, + "loss": 2.063, + "step": 292095 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014715296445305516, + "loss": 1.9295, + "step": 292100 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014715133452877823, + "loss": 2.1282, + "step": 292105 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.00014714970458839353, + "loss": 2.1758, + "step": 292110 + }, + { + "epoch": 0.69, + "grad_norm": 2.828125, + "learning_rate": 0.00014714807463190165, + "loss": 2.2134, + "step": 292115 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014714644465930313, + "loss": 2.0425, + "step": 292120 + }, + { + "epoch": 0.69, + "grad_norm": 2.90625, + "learning_rate": 0.00014714481467059852, + "loss": 1.9881, + "step": 292125 + }, + { + "epoch": 0.69, + "grad_norm": 2.640625, + "learning_rate": 0.00014714318466578838, + "loss": 2.0976, + "step": 292130 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.0001471415546448733, + "loss": 2.1505, + "step": 292135 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014713992460785378, + "loss": 2.0746, + "step": 292140 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014713829455473043, + "loss": 1.977, + "step": 292145 + }, + { + "epoch": 0.69, + "grad_norm": 2.578125, + "learning_rate": 0.00014713666448550375, + "loss": 1.8669, + "step": 292150 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014713503440017437, + "loss": 1.9328, + "step": 292155 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014713340429874282, + "loss": 2.2676, + "step": 292160 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.0001471317741812096, + "loss": 2.2073, + "step": 292165 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014713014404757534, + "loss": 2.0983, + "step": 292170 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014712851389784055, + "loss": 2.0743, + "step": 292175 + }, + { + "epoch": 0.69, + "grad_norm": 1.9296875, + "learning_rate": 0.00014712688373200581, + "loss": 1.9158, + "step": 292180 + }, + { + "epoch": 0.69, + "grad_norm": 1.9296875, + "learning_rate": 0.00014712525355007168, + "loss": 2.0911, + "step": 292185 + }, + { + "epoch": 0.69, + "grad_norm": 1.8828125, + "learning_rate": 0.0001471236233520387, + "loss": 2.1262, + "step": 292190 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014712199313790742, + "loss": 2.1597, + "step": 292195 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.00014712036290767845, + "loss": 1.8985, + "step": 292200 + }, + { + "epoch": 0.69, + "grad_norm": 2.859375, + "learning_rate": 0.0001471187326613523, + "loss": 1.9901, + "step": 292205 + }, + { + "epoch": 0.69, + "grad_norm": 2.0, + "learning_rate": 0.0001471171023989295, + "loss": 2.1529, + "step": 292210 + }, + { + "epoch": 0.69, + "grad_norm": 1.9375, + "learning_rate": 0.00014711547212041068, + "loss": 2.1219, + "step": 292215 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.00014711384182579634, + "loss": 1.8513, + "step": 292220 + }, + { + "epoch": 0.69, + "grad_norm": 1.90625, + "learning_rate": 0.00014711221151508707, + "loss": 2.1127, + "step": 292225 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.0001471105811882834, + "loss": 1.938, + "step": 292230 + }, + { + "epoch": 0.69, + "grad_norm": 1.984375, + "learning_rate": 0.00014710895084538592, + "loss": 2.0194, + "step": 292235 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.00014710732048639518, + "loss": 2.0095, + "step": 292240 + }, + { + "epoch": 0.69, + "grad_norm": 3.34375, + "learning_rate": 0.0001471056901113117, + "loss": 1.9108, + "step": 292245 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.00014710405972013607, + "loss": 2.0631, + "step": 292250 + }, + { + "epoch": 0.69, + "grad_norm": 1.859375, + "learning_rate": 0.0001471024293128688, + "loss": 2.0053, + "step": 292255 + }, + { + "epoch": 0.69, + "grad_norm": 1.546875, + "learning_rate": 0.00014710079888951053, + "loss": 2.0057, + "step": 292260 + }, + { + "epoch": 0.69, + "grad_norm": 2.515625, + "learning_rate": 0.00014709916845006177, + "loss": 2.043, + "step": 292265 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.0001470975379945231, + "loss": 2.0327, + "step": 292270 + }, + { + "epoch": 0.69, + "grad_norm": 2.828125, + "learning_rate": 0.00014709590752289502, + "loss": 2.1187, + "step": 292275 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014709427703517812, + "loss": 2.0958, + "step": 292280 + }, + { + "epoch": 0.69, + "grad_norm": 1.8828125, + "learning_rate": 0.000147092646531373, + "loss": 2.2107, + "step": 292285 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.00014709101601148016, + "loss": 2.3567, + "step": 292290 + }, + { + "epoch": 0.69, + "grad_norm": 1.984375, + "learning_rate": 0.00014708938547550017, + "loss": 2.0771, + "step": 292295 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014708775492343356, + "loss": 1.9915, + "step": 292300 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014708612435528095, + "loss": 2.0417, + "step": 292305 + }, + { + "epoch": 0.69, + "grad_norm": 1.8984375, + "learning_rate": 0.00014708449377104287, + "loss": 2.004, + "step": 292310 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.0001470828631707199, + "loss": 2.1805, + "step": 292315 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014708123255431255, + "loss": 2.1357, + "step": 292320 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014707960192182135, + "loss": 1.9854, + "step": 292325 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014707797127324693, + "loss": 2.01, + "step": 292330 + }, + { + "epoch": 0.69, + "grad_norm": 1.921875, + "learning_rate": 0.00014707634060858986, + "loss": 2.1394, + "step": 292335 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.0001470747099278506, + "loss": 2.1985, + "step": 292340 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.0001470730792310298, + "loss": 2.0354, + "step": 292345 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.000147071448518128, + "loss": 2.0834, + "step": 292350 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.00014706981778914567, + "loss": 1.8954, + "step": 292355 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014706818704408355, + "loss": 2.0423, + "step": 292360 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.000147066556282942, + "loss": 2.151, + "step": 292365 + }, + { + "epoch": 0.69, + "grad_norm": 2.421875, + "learning_rate": 0.00014706492550572168, + "loss": 1.9024, + "step": 292370 + }, + { + "epoch": 0.69, + "grad_norm": 2.390625, + "learning_rate": 0.00014706329471242312, + "loss": 2.133, + "step": 292375 + }, + { + "epoch": 0.69, + "grad_norm": 1.9765625, + "learning_rate": 0.00014706166390304688, + "loss": 2.3522, + "step": 292380 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014706003307759353, + "loss": 2.1247, + "step": 292385 + }, + { + "epoch": 0.69, + "grad_norm": 2.875, + "learning_rate": 0.00014705840223606362, + "loss": 1.9568, + "step": 292390 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014705677137845773, + "loss": 2.3078, + "step": 292395 + }, + { + "epoch": 0.69, + "grad_norm": 2.53125, + "learning_rate": 0.00014705514050477635, + "loss": 1.9398, + "step": 292400 + }, + { + "epoch": 0.69, + "grad_norm": 1.9921875, + "learning_rate": 0.00014705350961502012, + "loss": 1.9886, + "step": 292405 + }, + { + "epoch": 0.69, + "grad_norm": 1.7265625, + "learning_rate": 0.00014705187870918952, + "loss": 2.0814, + "step": 292410 + }, + { + "epoch": 0.69, + "grad_norm": 2.453125, + "learning_rate": 0.00014705024778728518, + "loss": 2.1841, + "step": 292415 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.0001470486168493076, + "loss": 2.2216, + "step": 292420 + }, + { + "epoch": 0.69, + "grad_norm": 2.625, + "learning_rate": 0.00014704698589525735, + "loss": 2.1894, + "step": 292425 + }, + { + "epoch": 0.69, + "grad_norm": 1.828125, + "learning_rate": 0.00014704535492513504, + "loss": 1.9841, + "step": 292430 + }, + { + "epoch": 0.69, + "grad_norm": 2.671875, + "learning_rate": 0.00014704372393894115, + "loss": 1.9694, + "step": 292435 + }, + { + "epoch": 0.69, + "grad_norm": 2.71875, + "learning_rate": 0.0001470420929366763, + "loss": 1.9953, + "step": 292440 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014704046191834098, + "loss": 2.014, + "step": 292445 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.0001470388308839358, + "loss": 2.1242, + "step": 292450 + }, + { + "epoch": 0.69, + "grad_norm": 2.578125, + "learning_rate": 0.0001470371998334613, + "loss": 1.9385, + "step": 292455 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014703556876691803, + "loss": 1.9116, + "step": 292460 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014703393768430658, + "loss": 2.0591, + "step": 292465 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014703230658562747, + "loss": 2.0715, + "step": 292470 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014703067547088128, + "loss": 2.1623, + "step": 292475 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014702904434006855, + "loss": 2.1252, + "step": 292480 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.00014702741319318983, + "loss": 2.1026, + "step": 292485 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014702578203024572, + "loss": 2.0184, + "step": 292490 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014702415085123674, + "loss": 2.1951, + "step": 292495 + }, + { + "epoch": 0.69, + "grad_norm": 1.890625, + "learning_rate": 0.00014702251965616347, + "loss": 2.0868, + "step": 292500 + }, + { + "epoch": 0.69, + "grad_norm": 1.9140625, + "learning_rate": 0.00014702088844502644, + "loss": 1.9911, + "step": 292505 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014701925721782622, + "loss": 2.1182, + "step": 292510 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.00014701762597456338, + "loss": 2.3499, + "step": 292515 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.00014701599471523847, + "loss": 2.0094, + "step": 292520 + }, + { + "epoch": 0.69, + "grad_norm": 1.96875, + "learning_rate": 0.00014701436343985202, + "loss": 1.9931, + "step": 292525 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014701273214840464, + "loss": 1.9847, + "step": 292530 + }, + { + "epoch": 0.69, + "grad_norm": 1.8515625, + "learning_rate": 0.0001470111008408968, + "loss": 1.9792, + "step": 292535 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014700946951732918, + "loss": 2.0501, + "step": 292540 + }, + { + "epoch": 0.69, + "grad_norm": 1.8203125, + "learning_rate": 0.00014700783817770226, + "loss": 1.9533, + "step": 292545 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014700620682201658, + "loss": 2.1355, + "step": 292550 + }, + { + "epoch": 0.69, + "grad_norm": 2.65625, + "learning_rate": 0.00014700457545027275, + "loss": 2.2191, + "step": 292555 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.0001470029440624713, + "loss": 2.1768, + "step": 292560 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014700131265861282, + "loss": 1.856, + "step": 292565 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.00014699968123869778, + "loss": 1.9628, + "step": 292570 + }, + { + "epoch": 0.69, + "grad_norm": 2.421875, + "learning_rate": 0.00014699804980272685, + "loss": 1.9583, + "step": 292575 + }, + { + "epoch": 0.69, + "grad_norm": 1.9453125, + "learning_rate": 0.00014699641835070048, + "loss": 1.945, + "step": 292580 + }, + { + "epoch": 0.69, + "grad_norm": 1.796875, + "learning_rate": 0.00014699478688261935, + "loss": 2.0333, + "step": 292585 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.0001469931553984839, + "loss": 2.084, + "step": 292590 + }, + { + "epoch": 0.69, + "grad_norm": 2.71875, + "learning_rate": 0.00014699152389829474, + "loss": 1.9977, + "step": 292595 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014698989238205243, + "loss": 1.9774, + "step": 292600 + }, + { + "epoch": 0.69, + "grad_norm": 2.515625, + "learning_rate": 0.0001469882608497575, + "loss": 2.1866, + "step": 292605 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.0001469866293014106, + "loss": 2.122, + "step": 292610 + }, + { + "epoch": 0.69, + "grad_norm": 1.9140625, + "learning_rate": 0.00014698499773701216, + "loss": 2.1892, + "step": 292615 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.00014698336615656275, + "loss": 1.9326, + "step": 292620 + }, + { + "epoch": 0.69, + "grad_norm": 1.953125, + "learning_rate": 0.00014698173456006303, + "loss": 1.9094, + "step": 292625 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014698010294751353, + "loss": 1.8793, + "step": 292630 + }, + { + "epoch": 0.69, + "grad_norm": 2.515625, + "learning_rate": 0.00014697847131891472, + "loss": 2.1072, + "step": 292635 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.0001469768396742672, + "loss": 2.0889, + "step": 292640 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.00014697520801357157, + "loss": 2.0222, + "step": 292645 + }, + { + "epoch": 0.69, + "grad_norm": 2.640625, + "learning_rate": 0.00014697357633682834, + "loss": 2.1163, + "step": 292650 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.0001469719446440381, + "loss": 2.1646, + "step": 292655 + }, + { + "epoch": 0.69, + "grad_norm": 3.09375, + "learning_rate": 0.0001469703129352014, + "loss": 2.0314, + "step": 292660 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014696868121031876, + "loss": 2.0592, + "step": 292665 + }, + { + "epoch": 0.69, + "grad_norm": 2.875, + "learning_rate": 0.00014696704946939076, + "loss": 2.1034, + "step": 292670 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.000146965417712418, + "loss": 1.7618, + "step": 292675 + }, + { + "epoch": 0.69, + "grad_norm": 2.421875, + "learning_rate": 0.000146963785939401, + "loss": 2.0782, + "step": 292680 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.0001469621541503403, + "loss": 2.17, + "step": 292685 + }, + { + "epoch": 0.69, + "grad_norm": 1.8125, + "learning_rate": 0.00014696052234523647, + "loss": 2.0414, + "step": 292690 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014695889052409008, + "loss": 2.0383, + "step": 292695 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014695725868690168, + "loss": 2.2203, + "step": 292700 + }, + { + "epoch": 0.69, + "grad_norm": 1.9609375, + "learning_rate": 0.00014695562683367185, + "loss": 2.0533, + "step": 292705 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.0001469539949644011, + "loss": 2.0269, + "step": 292710 + }, + { + "epoch": 0.69, + "grad_norm": 2.5625, + "learning_rate": 0.00014695236307909004, + "loss": 2.0168, + "step": 292715 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.0001469507311777392, + "loss": 2.1567, + "step": 292720 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014694909926034913, + "loss": 1.9902, + "step": 292725 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.00014694746732692038, + "loss": 2.1333, + "step": 292730 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014694583537745351, + "loss": 1.9251, + "step": 292735 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014694420341194913, + "loss": 2.1392, + "step": 292740 + }, + { + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 0.00014694257143040777, + "loss": 2.0946, + "step": 292745 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014694093943282995, + "loss": 1.9931, + "step": 292750 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014693930741921623, + "loss": 2.1486, + "step": 292755 + }, + { + "epoch": 0.69, + "grad_norm": 1.6953125, + "learning_rate": 0.00014693767538956722, + "loss": 1.9611, + "step": 292760 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014693604334388346, + "loss": 1.9706, + "step": 292765 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014693441128216548, + "loss": 2.1405, + "step": 292770 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014693277920441386, + "loss": 2.1331, + "step": 292775 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014693114711062915, + "loss": 2.0749, + "step": 292780 + }, + { + "epoch": 0.69, + "grad_norm": 1.8515625, + "learning_rate": 0.0001469295150008119, + "loss": 2.0934, + "step": 292785 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.00014692788287496268, + "loss": 2.1104, + "step": 292790 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014692625073308205, + "loss": 1.9999, + "step": 292795 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014692461857517057, + "loss": 2.0156, + "step": 292800 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.00014692298640122878, + "loss": 2.2179, + "step": 292805 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014692135421125725, + "loss": 2.1675, + "step": 292810 + }, + { + "epoch": 0.69, + "grad_norm": 2.8125, + "learning_rate": 0.00014691972200525652, + "loss": 2.1551, + "step": 292815 + }, + { + "epoch": 0.69, + "grad_norm": 2.453125, + "learning_rate": 0.00014691808978322716, + "loss": 2.0947, + "step": 292820 + }, + { + "epoch": 0.69, + "grad_norm": 2.453125, + "learning_rate": 0.00014691645754516975, + "loss": 2.0532, + "step": 292825 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014691482529108482, + "loss": 2.1545, + "step": 292830 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014691319302097294, + "loss": 1.9934, + "step": 292835 + }, + { + "epoch": 0.69, + "grad_norm": 1.9375, + "learning_rate": 0.00014691156073483465, + "loss": 2.1104, + "step": 292840 + }, + { + "epoch": 0.69, + "grad_norm": 2.59375, + "learning_rate": 0.00014690992843267053, + "loss": 2.0948, + "step": 292845 + }, + { + "epoch": 0.69, + "grad_norm": 1.890625, + "learning_rate": 0.00014690829611448112, + "loss": 2.0288, + "step": 292850 + }, + { + "epoch": 0.69, + "grad_norm": 2.671875, + "learning_rate": 0.00014690666378026698, + "loss": 2.0962, + "step": 292855 + }, + { + "epoch": 0.69, + "grad_norm": 2.640625, + "learning_rate": 0.00014690503143002868, + "loss": 1.9361, + "step": 292860 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014690339906376677, + "loss": 2.1479, + "step": 292865 + }, + { + "epoch": 0.69, + "grad_norm": 1.8984375, + "learning_rate": 0.00014690176668148182, + "loss": 1.965, + "step": 292870 + }, + { + "epoch": 0.69, + "grad_norm": 1.7890625, + "learning_rate": 0.00014690013428317436, + "loss": 2.0662, + "step": 292875 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.000146898501868845, + "loss": 2.0799, + "step": 292880 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014689686943849422, + "loss": 2.1047, + "step": 292885 + }, + { + "epoch": 0.69, + "grad_norm": 2.734375, + "learning_rate": 0.00014689523699212262, + "loss": 2.1834, + "step": 292890 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014689360452973076, + "loss": 2.0078, + "step": 292895 + }, + { + "epoch": 0.69, + "grad_norm": 2.5, + "learning_rate": 0.0001468919720513192, + "loss": 2.0569, + "step": 292900 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.00014689033955688852, + "loss": 2.1412, + "step": 292905 + }, + { + "epoch": 0.69, + "grad_norm": 1.6953125, + "learning_rate": 0.0001468887070464392, + "loss": 1.7825, + "step": 292910 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014688707451997188, + "loss": 1.9783, + "step": 292915 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.00014688544197748705, + "loss": 2.0836, + "step": 292920 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014688380941898537, + "loss": 1.9762, + "step": 292925 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014688217684446727, + "loss": 2.0384, + "step": 292930 + }, + { + "epoch": 0.69, + "grad_norm": 1.96875, + "learning_rate": 0.00014688054425393337, + "loss": 1.9675, + "step": 292935 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014687891164738426, + "loss": 2.1104, + "step": 292940 + }, + { + "epoch": 0.69, + "grad_norm": 2.625, + "learning_rate": 0.00014687727902482043, + "loss": 1.9819, + "step": 292945 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.0001468756463862425, + "loss": 2.0765, + "step": 292950 + }, + { + "epoch": 0.69, + "grad_norm": 1.875, + "learning_rate": 0.000146874013731651, + "loss": 2.1899, + "step": 292955 + }, + { + "epoch": 0.69, + "grad_norm": 2.515625, + "learning_rate": 0.00014687238106104647, + "loss": 2.0268, + "step": 292960 + }, + { + "epoch": 0.69, + "grad_norm": 2.859375, + "learning_rate": 0.00014687074837442947, + "loss": 1.9239, + "step": 292965 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014686911567180058, + "loss": 1.9354, + "step": 292970 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014686748295316036, + "loss": 1.8826, + "step": 292975 + }, + { + "epoch": 0.69, + "grad_norm": 1.90625, + "learning_rate": 0.00014686585021850935, + "loss": 2.1738, + "step": 292980 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014686421746784814, + "loss": 2.1681, + "step": 292985 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.00014686258470117728, + "loss": 2.1899, + "step": 292990 + }, + { + "epoch": 0.69, + "grad_norm": 2.78125, + "learning_rate": 0.00014686095191849725, + "loss": 2.1977, + "step": 292995 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.0001468593191198087, + "loss": 1.8427, + "step": 293000 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014685768630511215, + "loss": 2.1958, + "step": 293005 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014685605347440817, + "loss": 1.9853, + "step": 293010 + }, + { + "epoch": 0.69, + "grad_norm": 1.9765625, + "learning_rate": 0.0001468544206276973, + "loss": 2.0991, + "step": 293015 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.00014685278776498014, + "loss": 2.0181, + "step": 293020 + }, + { + "epoch": 0.69, + "grad_norm": 1.984375, + "learning_rate": 0.00014685115488625717, + "loss": 2.0521, + "step": 293025 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.00014684952199152902, + "loss": 2.176, + "step": 293030 + }, + { + "epoch": 0.69, + "grad_norm": 2.75, + "learning_rate": 0.00014684788908079623, + "loss": 2.1405, + "step": 293035 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014684625615405937, + "loss": 2.0763, + "step": 293040 + }, + { + "epoch": 0.69, + "grad_norm": 1.8671875, + "learning_rate": 0.00014684462321131898, + "loss": 2.082, + "step": 293045 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014684299025257555, + "loss": 2.1861, + "step": 293050 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014684135727782978, + "loss": 2.2044, + "step": 293055 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.00014683972428708211, + "loss": 2.1845, + "step": 293060 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014683809128033317, + "loss": 1.9585, + "step": 293065 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014683645825758348, + "loss": 2.1411, + "step": 293070 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014683482521883357, + "loss": 2.0181, + "step": 293075 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014683319216408407, + "loss": 2.1342, + "step": 293080 + }, + { + "epoch": 0.69, + "grad_norm": 2.875, + "learning_rate": 0.00014683155909333553, + "loss": 1.9418, + "step": 293085 + }, + { + "epoch": 0.69, + "grad_norm": 1.9375, + "learning_rate": 0.00014682992600658843, + "loss": 2.0281, + "step": 293090 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.0001468282929038434, + "loss": 2.0795, + "step": 293095 + }, + { + "epoch": 0.69, + "grad_norm": 2.453125, + "learning_rate": 0.00014682665978510095, + "loss": 2.119, + "step": 293100 + }, + { + "epoch": 0.69, + "grad_norm": 1.875, + "learning_rate": 0.0001468250266503617, + "loss": 2.1258, + "step": 293105 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014682339349962614, + "loss": 2.2476, + "step": 293110 + }, + { + "epoch": 0.69, + "grad_norm": 1.75, + "learning_rate": 0.00014682176033289487, + "loss": 2.0079, + "step": 293115 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014682012715016844, + "loss": 2.0578, + "step": 293120 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.00014681849395144743, + "loss": 2.0251, + "step": 293125 + }, + { + "epoch": 0.69, + "grad_norm": 2.765625, + "learning_rate": 0.00014681686073673235, + "loss": 2.0141, + "step": 293130 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014681522750602377, + "loss": 2.113, + "step": 293135 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014681359425932228, + "loss": 2.1054, + "step": 293140 + }, + { + "epoch": 0.69, + "grad_norm": 2.71875, + "learning_rate": 0.00014681196099662839, + "loss": 2.2413, + "step": 293145 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014681032771794272, + "loss": 2.0245, + "step": 293150 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.00014680869442326578, + "loss": 1.9411, + "step": 293155 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014680706111259813, + "loss": 2.2148, + "step": 293160 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014680542778594034, + "loss": 2.0298, + "step": 293165 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014680379444329298, + "loss": 2.0962, + "step": 293170 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014680216108465662, + "loss": 2.2339, + "step": 293175 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014680052771003175, + "loss": 2.1242, + "step": 293180 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014679889431941896, + "loss": 2.1274, + "step": 293185 + }, + { + "epoch": 0.69, + "grad_norm": 1.828125, + "learning_rate": 0.00014679726091281885, + "loss": 1.9925, + "step": 293190 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.00014679562749023191, + "loss": 2.1638, + "step": 293195 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014679399405165882, + "loss": 2.0426, + "step": 293200 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014679236059709998, + "loss": 2.148, + "step": 293205 + }, + { + "epoch": 0.69, + "grad_norm": 1.9453125, + "learning_rate": 0.000146790727126556, + "loss": 2.0991, + "step": 293210 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014678909364002752, + "loss": 1.8932, + "step": 293215 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014678746013751503, + "loss": 2.1791, + "step": 293220 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014678582661901908, + "loss": 2.0208, + "step": 293225 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014678419308454023, + "loss": 2.1249, + "step": 293230 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014678255953407904, + "loss": 1.9752, + "step": 293235 + }, + { + "epoch": 0.69, + "grad_norm": 1.9921875, + "learning_rate": 0.00014678092596763612, + "loss": 1.9958, + "step": 293240 + }, + { + "epoch": 0.69, + "grad_norm": 2.515625, + "learning_rate": 0.00014677929238521197, + "loss": 1.8546, + "step": 293245 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014677765878680718, + "loss": 2.2039, + "step": 293250 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014677602517242227, + "loss": 2.0745, + "step": 293255 + }, + { + "epoch": 0.69, + "grad_norm": 2.390625, + "learning_rate": 0.00014677439154205782, + "loss": 1.9693, + "step": 293260 + }, + { + "epoch": 0.69, + "grad_norm": 1.984375, + "learning_rate": 0.0001467727578957144, + "loss": 2.0961, + "step": 293265 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014677112423339256, + "loss": 2.1754, + "step": 293270 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014676949055509285, + "loss": 2.1341, + "step": 293275 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014676785686081585, + "loss": 2.0705, + "step": 293280 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014676622315056207, + "loss": 2.0066, + "step": 293285 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014676458942433214, + "loss": 2.1265, + "step": 293290 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014676295568212655, + "loss": 2.037, + "step": 293295 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.0001467613219239459, + "loss": 2.0438, + "step": 293300 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.0001467596881497907, + "loss": 2.0985, + "step": 293305 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014675805435966158, + "loss": 2.0537, + "step": 293310 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014675642055355907, + "loss": 2.1657, + "step": 293315 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.0001467547867314837, + "loss": 1.9894, + "step": 293320 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014675315289343604, + "loss": 2.1148, + "step": 293325 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.00014675151903941668, + "loss": 1.947, + "step": 293330 + }, + { + "epoch": 0.69, + "grad_norm": 1.890625, + "learning_rate": 0.00014674988516942615, + "loss": 2.0847, + "step": 293335 + }, + { + "epoch": 0.69, + "grad_norm": 1.96875, + "learning_rate": 0.000146748251283465, + "loss": 2.1465, + "step": 293340 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014674661738153373, + "loss": 1.9506, + "step": 293345 + }, + { + "epoch": 0.69, + "grad_norm": 1.5390625, + "learning_rate": 0.00014674498346363304, + "loss": 1.9151, + "step": 293350 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014674334952976344, + "loss": 2.1883, + "step": 293355 + }, + { + "epoch": 0.69, + "grad_norm": 1.953125, + "learning_rate": 0.00014674171557992543, + "loss": 2.3021, + "step": 293360 + }, + { + "epoch": 0.69, + "grad_norm": 1.8203125, + "learning_rate": 0.00014674008161411962, + "loss": 1.9765, + "step": 293365 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.0001467384476323465, + "loss": 2.025, + "step": 293370 + }, + { + "epoch": 0.69, + "grad_norm": 2.578125, + "learning_rate": 0.00014673681363460672, + "loss": 2.1649, + "step": 293375 + }, + { + "epoch": 0.69, + "grad_norm": 2.59375, + "learning_rate": 0.0001467351796209008, + "loss": 2.174, + "step": 293380 + }, + { + "epoch": 0.69, + "grad_norm": 2.578125, + "learning_rate": 0.0001467335455912293, + "loss": 1.9229, + "step": 293385 + }, + { + "epoch": 0.69, + "grad_norm": 2.84375, + "learning_rate": 0.00014673191154559277, + "loss": 1.9328, + "step": 293390 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014673027748399176, + "loss": 2.0821, + "step": 293395 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014672864340642682, + "loss": 1.9234, + "step": 293400 + }, + { + "epoch": 0.69, + "grad_norm": 2.703125, + "learning_rate": 0.00014672700931289855, + "loss": 2.1491, + "step": 293405 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.0001467253752034075, + "loss": 2.0381, + "step": 293410 + }, + { + "epoch": 0.69, + "grad_norm": 2.890625, + "learning_rate": 0.00014672374107795418, + "loss": 2.1967, + "step": 293415 + }, + { + "epoch": 0.69, + "grad_norm": 2.671875, + "learning_rate": 0.0001467221069365392, + "loss": 2.1248, + "step": 293420 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.00014672047277916313, + "loss": 2.053, + "step": 293425 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014671883860582644, + "loss": 2.1066, + "step": 293430 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014671720441652978, + "loss": 2.0073, + "step": 293435 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014671557021127369, + "loss": 2.0147, + "step": 293440 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014671393599005868, + "loss": 2.1234, + "step": 293445 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014671230175288536, + "loss": 2.0343, + "step": 293450 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.0001467106674997543, + "loss": 1.9905, + "step": 293455 + }, + { + "epoch": 0.69, + "grad_norm": 2.515625, + "learning_rate": 0.000146709033230666, + "loss": 2.2096, + "step": 293460 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014670739894562105, + "loss": 2.051, + "step": 293465 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014670576464462001, + "loss": 1.9251, + "step": 293470 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014670413032766343, + "loss": 2.1033, + "step": 293475 + }, + { + "epoch": 0.69, + "grad_norm": 2.71875, + "learning_rate": 0.00014670249599475183, + "loss": 1.8704, + "step": 293480 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.00014670086164588587, + "loss": 1.9529, + "step": 293485 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.000146699227281066, + "loss": 2.0671, + "step": 293490 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.0001466975929002929, + "loss": 1.9971, + "step": 293495 + }, + { + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 0.000146695958503567, + "loss": 2.1562, + "step": 293500 + }, + { + "epoch": 0.69, + "grad_norm": 2.53125, + "learning_rate": 0.0001466943240908889, + "loss": 2.1294, + "step": 293505 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.0001466926896622592, + "loss": 2.0116, + "step": 293510 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014669105521767845, + "loss": 2.2365, + "step": 293515 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014668942075714715, + "loss": 1.8543, + "step": 293520 + }, + { + "epoch": 0.69, + "grad_norm": 2.546875, + "learning_rate": 0.00014668778628066593, + "loss": 2.1305, + "step": 293525 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014668615178823529, + "loss": 2.197, + "step": 293530 + }, + { + "epoch": 0.69, + "grad_norm": 1.84375, + "learning_rate": 0.00014668451727985578, + "loss": 2.2201, + "step": 293535 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014668288275552807, + "loss": 2.183, + "step": 293540 + }, + { + "epoch": 0.69, + "grad_norm": 2.0, + "learning_rate": 0.00014668124821525262, + "loss": 2.1202, + "step": 293545 + }, + { + "epoch": 0.69, + "grad_norm": 1.859375, + "learning_rate": 0.00014667961365902999, + "loss": 2.0081, + "step": 293550 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014667797908686077, + "loss": 2.0046, + "step": 293555 + }, + { + "epoch": 0.69, + "grad_norm": 1.953125, + "learning_rate": 0.00014667634449874547, + "loss": 1.9418, + "step": 293560 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014667470989468472, + "loss": 2.0383, + "step": 293565 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014667307527467904, + "loss": 1.93, + "step": 293570 + }, + { + "epoch": 0.69, + "grad_norm": 3.09375, + "learning_rate": 0.00014667144063872897, + "loss": 2.093, + "step": 293575 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014666980598683512, + "loss": 2.0712, + "step": 293580 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.000146668171318998, + "loss": 2.0494, + "step": 293585 + }, + { + "epoch": 0.69, + "grad_norm": 2.921875, + "learning_rate": 0.0001466665366352182, + "loss": 2.1669, + "step": 293590 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014666490193549625, + "loss": 2.0158, + "step": 293595 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014666326721983272, + "loss": 2.023, + "step": 293600 + }, + { + "epoch": 0.69, + "grad_norm": 1.984375, + "learning_rate": 0.00014666163248822818, + "loss": 2.3136, + "step": 293605 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014665999774068317, + "loss": 1.9693, + "step": 293610 + }, + { + "epoch": 0.69, + "grad_norm": 2.5625, + "learning_rate": 0.00014665836297719827, + "loss": 2.2001, + "step": 293615 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014665672819777403, + "loss": 2.1328, + "step": 293620 + }, + { + "epoch": 0.69, + "grad_norm": 2.59375, + "learning_rate": 0.00014665509340241098, + "loss": 2.1356, + "step": 293625 + }, + { + "epoch": 0.69, + "grad_norm": 2.546875, + "learning_rate": 0.00014665345859110975, + "loss": 2.1182, + "step": 293630 + }, + { + "epoch": 0.69, + "grad_norm": 2.984375, + "learning_rate": 0.00014665182376387084, + "loss": 2.0475, + "step": 293635 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014665018892069478, + "loss": 2.0784, + "step": 293640 + }, + { + "epoch": 0.69, + "grad_norm": 2.8125, + "learning_rate": 0.0001466485540615822, + "loss": 2.0731, + "step": 293645 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014664691918653361, + "loss": 2.1437, + "step": 293650 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014664528429554963, + "loss": 1.9879, + "step": 293655 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014664364938863074, + "loss": 1.9458, + "step": 293660 + }, + { + "epoch": 0.69, + "grad_norm": 2.5625, + "learning_rate": 0.00014664201446577753, + "loss": 2.1187, + "step": 293665 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014664037952699056, + "loss": 2.1028, + "step": 293670 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.00014663874457227042, + "loss": 1.9527, + "step": 293675 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.0001466371096016176, + "loss": 1.9897, + "step": 293680 + }, + { + "epoch": 0.69, + "grad_norm": 1.9609375, + "learning_rate": 0.00014663547461503274, + "loss": 1.8836, + "step": 293685 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.00014663383961251633, + "loss": 1.9516, + "step": 293690 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014663220459406896, + "loss": 1.9457, + "step": 293695 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014663056955969116, + "loss": 2.04, + "step": 293700 + }, + { + "epoch": 0.69, + "grad_norm": 2.640625, + "learning_rate": 0.00014662893450938355, + "loss": 1.74, + "step": 293705 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014662729944314663, + "loss": 2.0676, + "step": 293710 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014662566436098097, + "loss": 2.1303, + "step": 293715 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014662402926288715, + "loss": 2.1791, + "step": 293720 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.0001466223941488657, + "loss": 1.8714, + "step": 293725 + }, + { + "epoch": 0.69, + "grad_norm": 2.859375, + "learning_rate": 0.00014662075901891722, + "loss": 2.0898, + "step": 293730 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014661912387304224, + "loss": 2.0547, + "step": 293735 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.00014661748871124133, + "loss": 2.1153, + "step": 293740 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.000146615853533515, + "loss": 2.1577, + "step": 293745 + }, + { + "epoch": 0.69, + "grad_norm": 1.7265625, + "learning_rate": 0.00014661421833986392, + "loss": 1.9827, + "step": 293750 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.0001466125831302885, + "loss": 2.1311, + "step": 293755 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014661094790478942, + "loss": 1.9198, + "step": 293760 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.00014660931266336717, + "loss": 1.9705, + "step": 293765 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014660767740602238, + "loss": 2.134, + "step": 293770 + }, + { + "epoch": 0.69, + "grad_norm": 2.71875, + "learning_rate": 0.0001466060421327555, + "loss": 2.2236, + "step": 293775 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014660440684356718, + "loss": 2.1618, + "step": 293780 + }, + { + "epoch": 0.69, + "grad_norm": 1.9140625, + "learning_rate": 0.00014660277153845795, + "loss": 1.9043, + "step": 293785 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.0001466011362174284, + "loss": 1.954, + "step": 293790 + }, + { + "epoch": 0.69, + "grad_norm": 2.703125, + "learning_rate": 0.00014659950088047902, + "loss": 2.0366, + "step": 293795 + }, + { + "epoch": 0.69, + "grad_norm": 2.0, + "learning_rate": 0.00014659786552761038, + "loss": 1.9641, + "step": 293800 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.0001465962301588231, + "loss": 2.149, + "step": 293805 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014659459477411773, + "loss": 2.221, + "step": 293810 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014659295937349476, + "loss": 2.1186, + "step": 293815 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014659132395695477, + "loss": 1.9141, + "step": 293820 + }, + { + "epoch": 0.69, + "grad_norm": 1.9921875, + "learning_rate": 0.00014658968852449838, + "loss": 1.9637, + "step": 293825 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014658805307612605, + "loss": 1.9468, + "step": 293830 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014658641761183848, + "loss": 2.1122, + "step": 293835 + }, + { + "epoch": 0.69, + "grad_norm": 1.9609375, + "learning_rate": 0.0001465847821316361, + "loss": 1.9548, + "step": 293840 + }, + { + "epoch": 0.69, + "grad_norm": 1.9296875, + "learning_rate": 0.00014658314663551953, + "loss": 2.0485, + "step": 293845 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.0001465815111234893, + "loss": 2.1687, + "step": 293850 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014657987559554597, + "loss": 1.942, + "step": 293855 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014657824005169013, + "loss": 1.9962, + "step": 293860 + }, + { + "epoch": 0.69, + "grad_norm": 2.90625, + "learning_rate": 0.0001465766044919223, + "loss": 2.1195, + "step": 293865 + }, + { + "epoch": 0.69, + "grad_norm": 1.9453125, + "learning_rate": 0.00014657496891624307, + "loss": 2.0047, + "step": 293870 + }, + { + "epoch": 0.69, + "grad_norm": 2.734375, + "learning_rate": 0.000146573333324653, + "loss": 2.0234, + "step": 293875 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.0001465716977171526, + "loss": 2.0866, + "step": 293880 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014657006209374248, + "loss": 2.1241, + "step": 293885 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.0001465684264544232, + "loss": 2.0007, + "step": 293890 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014656679079919527, + "loss": 2.2066, + "step": 293895 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.0001465651551280593, + "loss": 2.1885, + "step": 293900 + }, + { + "epoch": 0.69, + "grad_norm": 2.546875, + "learning_rate": 0.00014656351944101583, + "loss": 1.9781, + "step": 293905 + }, + { + "epoch": 0.69, + "grad_norm": 2.546875, + "learning_rate": 0.00014656188373806541, + "loss": 2.0048, + "step": 293910 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.0001465602480192086, + "loss": 2.1176, + "step": 293915 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014655861228444597, + "loss": 2.0549, + "step": 293920 + }, + { + "epoch": 0.69, + "grad_norm": 1.984375, + "learning_rate": 0.0001465569765337781, + "loss": 1.9916, + "step": 293925 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.0001465553407672055, + "loss": 2.1407, + "step": 293930 + }, + { + "epoch": 0.69, + "grad_norm": 1.9765625, + "learning_rate": 0.00014655370498472872, + "loss": 2.1061, + "step": 293935 + }, + { + "epoch": 0.69, + "grad_norm": 2.390625, + "learning_rate": 0.00014655206918634837, + "loss": 2.0303, + "step": 293940 + }, + { + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 0.000146550433372065, + "loss": 1.9592, + "step": 293945 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014654879754187917, + "loss": 2.288, + "step": 293950 + }, + { + "epoch": 0.69, + "grad_norm": 1.9921875, + "learning_rate": 0.0001465471616957914, + "loss": 1.98, + "step": 293955 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014654552583380227, + "loss": 2.0916, + "step": 293960 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014654388995591236, + "loss": 2.2368, + "step": 293965 + }, + { + "epoch": 0.69, + "grad_norm": 2.390625, + "learning_rate": 0.00014654225406212225, + "loss": 1.9486, + "step": 293970 + }, + { + "epoch": 0.69, + "grad_norm": 1.9921875, + "learning_rate": 0.00014654061815243244, + "loss": 1.9405, + "step": 293975 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.0001465389822268435, + "loss": 2.0267, + "step": 293980 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014653734628535596, + "loss": 2.052, + "step": 293985 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014653571032797045, + "loss": 2.027, + "step": 293990 + }, + { + "epoch": 0.69, + "grad_norm": 2.5, + "learning_rate": 0.0001465340743546875, + "loss": 2.0229, + "step": 293995 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014653243836550768, + "loss": 2.0594, + "step": 294000 + }, + { + "epoch": 0.69, + "grad_norm": 1.9140625, + "learning_rate": 0.00014653080236043154, + "loss": 2.1758, + "step": 294005 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.0001465291663394596, + "loss": 2.3119, + "step": 294010 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014652753030259248, + "loss": 1.8493, + "step": 294015 + }, + { + "epoch": 0.69, + "grad_norm": 1.9296875, + "learning_rate": 0.0001465258942498307, + "loss": 2.0218, + "step": 294020 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014652425818117483, + "loss": 2.3503, + "step": 294025 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.00014652262209662544, + "loss": 2.1704, + "step": 294030 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014652098599618306, + "loss": 1.9782, + "step": 294035 + }, + { + "epoch": 0.69, + "grad_norm": 2.75, + "learning_rate": 0.00014651934987984828, + "loss": 2.1147, + "step": 294040 + }, + { + "epoch": 0.69, + "grad_norm": 2.59375, + "learning_rate": 0.00014651771374762164, + "loss": 2.0421, + "step": 294045 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014651607759950372, + "loss": 2.024, + "step": 294050 + }, + { + "epoch": 0.69, + "grad_norm": 1.875, + "learning_rate": 0.00014651444143549508, + "loss": 2.1233, + "step": 294055 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014651280525559624, + "loss": 2.239, + "step": 294060 + }, + { + "epoch": 0.69, + "grad_norm": 1.9375, + "learning_rate": 0.00014651116905980779, + "loss": 1.9557, + "step": 294065 + }, + { + "epoch": 0.69, + "grad_norm": 2.71875, + "learning_rate": 0.00014650953284813025, + "loss": 2.0345, + "step": 294070 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014650789662056424, + "loss": 2.1088, + "step": 294075 + }, + { + "epoch": 0.69, + "grad_norm": 3.109375, + "learning_rate": 0.00014650626037711025, + "loss": 2.1816, + "step": 294080 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014650462411776896, + "loss": 2.1371, + "step": 294085 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014650298784254077, + "loss": 1.9498, + "step": 294090 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014650135155142631, + "loss": 2.0981, + "step": 294095 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.0001464997152444262, + "loss": 1.9344, + "step": 294100 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014649807892154094, + "loss": 2.0352, + "step": 294105 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.00014649644258277105, + "loss": 2.0764, + "step": 294110 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014649480622811717, + "loss": 2.1361, + "step": 294115 + }, + { + "epoch": 0.69, + "grad_norm": 1.90625, + "learning_rate": 0.0001464931698575798, + "loss": 1.9639, + "step": 294120 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.0001464915334711595, + "loss": 2.1715, + "step": 294125 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.0001464898970688569, + "loss": 2.1763, + "step": 294130 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.0001464882606506725, + "loss": 1.9713, + "step": 294135 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014648662421660685, + "loss": 2.0337, + "step": 294140 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014648498776666054, + "loss": 1.9942, + "step": 294145 + }, + { + "epoch": 0.69, + "grad_norm": 1.875, + "learning_rate": 0.00014648335130083409, + "loss": 2.0347, + "step": 294150 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.00014648171481912808, + "loss": 2.2812, + "step": 294155 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.0001464800783215431, + "loss": 2.1866, + "step": 294160 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.00014647844180807967, + "loss": 1.8475, + "step": 294165 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014647680527873834, + "loss": 2.1903, + "step": 294170 + }, + { + "epoch": 0.69, + "grad_norm": 2.171875, + "learning_rate": 0.00014647516873351975, + "loss": 2.2304, + "step": 294175 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014647353217242434, + "loss": 1.8911, + "step": 294180 + }, + { + "epoch": 0.69, + "grad_norm": 2.859375, + "learning_rate": 0.00014647189559545277, + "loss": 1.9637, + "step": 294185 + }, + { + "epoch": 0.69, + "grad_norm": 2.421875, + "learning_rate": 0.00014647025900260552, + "loss": 2.01, + "step": 294190 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014646862239388323, + "loss": 1.9023, + "step": 294195 + }, + { + "epoch": 0.69, + "grad_norm": 2.734375, + "learning_rate": 0.00014646698576928637, + "loss": 2.1864, + "step": 294200 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014646534912881557, + "loss": 2.0432, + "step": 294205 + }, + { + "epoch": 0.69, + "grad_norm": 1.9296875, + "learning_rate": 0.00014646371247247134, + "loss": 1.9422, + "step": 294210 + }, + { + "epoch": 0.69, + "grad_norm": 2.390625, + "learning_rate": 0.00014646207580025432, + "loss": 2.0742, + "step": 294215 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014646043911216498, + "loss": 2.1263, + "step": 294220 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.0001464588024082039, + "loss": 2.1013, + "step": 294225 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014645716568837164, + "loss": 2.1594, + "step": 294230 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014645552895266878, + "loss": 2.0243, + "step": 294235 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.00014645389220109588, + "loss": 1.9857, + "step": 294240 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.00014645225543365352, + "loss": 2.0498, + "step": 294245 + }, + { + "epoch": 0.69, + "grad_norm": 5.65625, + "learning_rate": 0.00014645061865034217, + "loss": 2.1475, + "step": 294250 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014644898185116244, + "loss": 2.0638, + "step": 294255 + }, + { + "epoch": 0.69, + "grad_norm": 2.453125, + "learning_rate": 0.00014644734503611493, + "loss": 2.163, + "step": 294260 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014644570820520017, + "loss": 2.0631, + "step": 294265 + }, + { + "epoch": 0.69, + "grad_norm": 1.6953125, + "learning_rate": 0.0001464440713584187, + "loss": 2.0729, + "step": 294270 + }, + { + "epoch": 0.69, + "grad_norm": 2.0, + "learning_rate": 0.00014644243449577111, + "loss": 1.9356, + "step": 294275 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.0001464407976172579, + "loss": 1.8899, + "step": 294280 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014643916072287972, + "loss": 2.1545, + "step": 294285 + }, + { + "epoch": 0.69, + "grad_norm": 1.9921875, + "learning_rate": 0.00014643752381263706, + "loss": 2.1275, + "step": 294290 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.0001464358868865305, + "loss": 2.1328, + "step": 294295 + }, + { + "epoch": 0.69, + "grad_norm": 2.65625, + "learning_rate": 0.00014643424994456057, + "loss": 2.1102, + "step": 294300 + }, + { + "epoch": 0.69, + "grad_norm": 1.90625, + "learning_rate": 0.0001464326129867279, + "loss": 2.0375, + "step": 294305 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.000146430976013033, + "loss": 2.1929, + "step": 294310 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014642933902347642, + "loss": 2.1936, + "step": 294315 + }, + { + "epoch": 0.69, + "grad_norm": 3.125, + "learning_rate": 0.00014642770201805876, + "loss": 1.9113, + "step": 294320 + }, + { + "epoch": 0.69, + "grad_norm": 1.984375, + "learning_rate": 0.00014642606499678054, + "loss": 1.951, + "step": 294325 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014642442795964234, + "loss": 2.3737, + "step": 294330 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014642279090664468, + "loss": 2.1603, + "step": 294335 + }, + { + "epoch": 0.69, + "grad_norm": 1.421875, + "learning_rate": 0.00014642115383778817, + "loss": 1.9611, + "step": 294340 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014641951675307336, + "loss": 1.946, + "step": 294345 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014641787965250083, + "loss": 2.261, + "step": 294350 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014641624253607108, + "loss": 2.1597, + "step": 294355 + }, + { + "epoch": 0.69, + "grad_norm": 2.65625, + "learning_rate": 0.00014641460540378468, + "loss": 2.1301, + "step": 294360 + }, + { + "epoch": 0.69, + "grad_norm": 2.546875, + "learning_rate": 0.00014641296825564223, + "loss": 2.2058, + "step": 294365 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.0001464113310916443, + "loss": 2.118, + "step": 294370 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014640969391179138, + "loss": 2.0802, + "step": 294375 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014640805671608408, + "loss": 2.1912, + "step": 294380 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014640641950452294, + "loss": 2.0107, + "step": 294385 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014640478227710853, + "loss": 2.1063, + "step": 294390 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014640314503384138, + "loss": 1.9344, + "step": 294395 + }, + { + "epoch": 0.69, + "grad_norm": 2.421875, + "learning_rate": 0.00014640150777472211, + "loss": 2.105, + "step": 294400 + }, + { + "epoch": 0.69, + "grad_norm": 2.75, + "learning_rate": 0.00014639987049975124, + "loss": 2.0556, + "step": 294405 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.0001463982332089293, + "loss": 1.9236, + "step": 294410 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.0001463965959022569, + "loss": 1.9831, + "step": 294415 + }, + { + "epoch": 0.69, + "grad_norm": 2.390625, + "learning_rate": 0.00014639495857973457, + "loss": 1.9807, + "step": 294420 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014639332124136294, + "loss": 1.9299, + "step": 294425 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014639168388714247, + "loss": 2.0862, + "step": 294430 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014639004651707373, + "loss": 2.0049, + "step": 294435 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.00014638840913115735, + "loss": 2.0214, + "step": 294440 + }, + { + "epoch": 0.69, + "grad_norm": 2.515625, + "learning_rate": 0.00014638677172939383, + "loss": 2.0806, + "step": 294445 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.00014638513431178374, + "loss": 2.0247, + "step": 294450 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014638349687832766, + "loss": 1.9869, + "step": 294455 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014638185942902613, + "loss": 1.9471, + "step": 294460 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014638022196387972, + "loss": 1.9599, + "step": 294465 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.000146378584482889, + "loss": 2.1081, + "step": 294470 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.0001463769469860545, + "loss": 1.9504, + "step": 294475 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014637530947337677, + "loss": 2.1153, + "step": 294480 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.00014637367194485643, + "loss": 2.0015, + "step": 294485 + }, + { + "epoch": 0.69, + "grad_norm": 2.640625, + "learning_rate": 0.00014637203440049396, + "loss": 2.0335, + "step": 294490 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014637039684028998, + "loss": 1.9324, + "step": 294495 + }, + { + "epoch": 0.69, + "grad_norm": 1.8359375, + "learning_rate": 0.00014636875926424508, + "loss": 2.0157, + "step": 294500 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014636712167235972, + "loss": 1.9984, + "step": 294505 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.0001463654840646345, + "loss": 2.1011, + "step": 294510 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014636384644107006, + "loss": 1.946, + "step": 294515 + }, + { + "epoch": 0.69, + "grad_norm": 2.15625, + "learning_rate": 0.00014636220880166683, + "loss": 2.0832, + "step": 294520 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.0001463605711464254, + "loss": 2.1882, + "step": 294525 + }, + { + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 0.00014635893347534642, + "loss": 1.9025, + "step": 294530 + }, + { + "epoch": 0.69, + "grad_norm": 1.9140625, + "learning_rate": 0.00014635729578843035, + "loss": 1.9689, + "step": 294535 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014635565808567783, + "loss": 2.212, + "step": 294540 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014635402036708935, + "loss": 2.0379, + "step": 294545 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.0001463523826326655, + "loss": 2.2936, + "step": 294550 + }, + { + "epoch": 0.69, + "grad_norm": 1.78125, + "learning_rate": 0.00014635074488240682, + "loss": 2.0335, + "step": 294555 + }, + { + "epoch": 0.69, + "grad_norm": 2.4375, + "learning_rate": 0.0001463491071163139, + "loss": 1.997, + "step": 294560 + }, + { + "epoch": 0.69, + "grad_norm": 2.796875, + "learning_rate": 0.00014634746933438729, + "loss": 2.1175, + "step": 294565 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014634583153662754, + "loss": 1.991, + "step": 294570 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014634419372303517, + "loss": 2.1226, + "step": 294575 + }, + { + "epoch": 0.69, + "grad_norm": 2.53125, + "learning_rate": 0.00014634255589361086, + "loss": 2.0898, + "step": 294580 + }, + { + "epoch": 0.69, + "grad_norm": 1.984375, + "learning_rate": 0.00014634091804835504, + "loss": 2.0788, + "step": 294585 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014633928018726832, + "loss": 2.2044, + "step": 294590 + }, + { + "epoch": 0.69, + "grad_norm": 1.9453125, + "learning_rate": 0.00014633764231035127, + "loss": 2.0352, + "step": 294595 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014633600441760447, + "loss": 2.1464, + "step": 294600 + }, + { + "epoch": 0.69, + "grad_norm": 1.9453125, + "learning_rate": 0.00014633436650902842, + "loss": 2.0369, + "step": 294605 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.0001463327285846237, + "loss": 2.1375, + "step": 294610 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014633109064439092, + "loss": 2.1598, + "step": 294615 + }, + { + "epoch": 0.69, + "grad_norm": 1.9375, + "learning_rate": 0.00014632945268833056, + "loss": 2.0214, + "step": 294620 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014632781471644323, + "loss": 2.0738, + "step": 294625 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014632617672872948, + "loss": 2.1661, + "step": 294630 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014632453872518987, + "loss": 2.0947, + "step": 294635 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014632290070582497, + "loss": 1.9727, + "step": 294640 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.0001463212626706353, + "loss": 2.2444, + "step": 294645 + }, + { + "epoch": 0.69, + "grad_norm": 2.5, + "learning_rate": 0.00014631962461962147, + "loss": 2.2327, + "step": 294650 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014631798655278402, + "loss": 2.1357, + "step": 294655 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.0001463163484701235, + "loss": 2.218, + "step": 294660 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014631471037164045, + "loss": 2.1605, + "step": 294665 + }, + { + "epoch": 0.69, + "grad_norm": 1.9296875, + "learning_rate": 0.00014631307225733547, + "loss": 2.1369, + "step": 294670 + }, + { + "epoch": 0.69, + "grad_norm": 2.65625, + "learning_rate": 0.00014631143412720916, + "loss": 2.0144, + "step": 294675 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014630979598126195, + "loss": 2.0591, + "step": 294680 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014630815781949447, + "loss": 1.942, + "step": 294685 + }, + { + "epoch": 0.69, + "grad_norm": 2.0, + "learning_rate": 0.00014630651964190735, + "loss": 2.0279, + "step": 294690 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014630488144850104, + "loss": 1.9907, + "step": 294695 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014630324323927613, + "loss": 2.1086, + "step": 294700 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014630160501423323, + "loss": 2.0931, + "step": 294705 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.0001462999667733728, + "loss": 2.0286, + "step": 294710 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.0001462983285166955, + "loss": 2.1243, + "step": 294715 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.0001462966902442019, + "loss": 2.2631, + "step": 294720 + }, + { + "epoch": 0.69, + "grad_norm": 1.6796875, + "learning_rate": 0.00014629505195589245, + "loss": 2.0368, + "step": 294725 + }, + { + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 0.0001462934136517678, + "loss": 2.2673, + "step": 294730 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014629177533182842, + "loss": 2.1506, + "step": 294735 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.000146290136996075, + "loss": 2.0219, + "step": 294740 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014628849864450801, + "loss": 2.067, + "step": 294745 + }, + { + "epoch": 0.69, + "grad_norm": 2.015625, + "learning_rate": 0.00014628686027712805, + "loss": 2.2296, + "step": 294750 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.0001462852218939356, + "loss": 2.0886, + "step": 294755 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.0001462835834949313, + "loss": 2.0967, + "step": 294760 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014628194508011573, + "loss": 1.9892, + "step": 294765 + }, + { + "epoch": 0.69, + "grad_norm": 1.953125, + "learning_rate": 0.00014628030664948937, + "loss": 1.8561, + "step": 294770 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014627866820305282, + "loss": 2.0649, + "step": 294775 + }, + { + "epoch": 0.69, + "grad_norm": 1.8125, + "learning_rate": 0.00014627702974080666, + "loss": 2.2404, + "step": 294780 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.0001462753912627514, + "loss": 2.0597, + "step": 294785 + }, + { + "epoch": 0.69, + "grad_norm": 1.96875, + "learning_rate": 0.00014627375276888763, + "loss": 1.9256, + "step": 294790 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.0001462721142592159, + "loss": 1.9476, + "step": 294795 + }, + { + "epoch": 0.69, + "grad_norm": 1.9296875, + "learning_rate": 0.00014627047573373682, + "loss": 2.0638, + "step": 294800 + }, + { + "epoch": 0.69, + "grad_norm": 2.5, + "learning_rate": 0.00014626883719245088, + "loss": 2.0248, + "step": 294805 + }, + { + "epoch": 0.69, + "grad_norm": 1.859375, + "learning_rate": 0.00014626719863535868, + "loss": 2.0419, + "step": 294810 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014626556006246076, + "loss": 1.8024, + "step": 294815 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014626392147375764, + "loss": 2.0281, + "step": 294820 + }, + { + "epoch": 0.69, + "grad_norm": 2.65625, + "learning_rate": 0.00014626228286924997, + "loss": 2.1412, + "step": 294825 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014626064424893827, + "loss": 2.0827, + "step": 294830 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.0001462590056128231, + "loss": 2.2078, + "step": 294835 + }, + { + "epoch": 0.69, + "grad_norm": 1.5703125, + "learning_rate": 0.000146257366960905, + "loss": 1.9817, + "step": 294840 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.0001462557282931845, + "loss": 1.8911, + "step": 294845 + }, + { + "epoch": 0.69, + "grad_norm": 2.5, + "learning_rate": 0.00014625408960966226, + "loss": 2.038, + "step": 294850 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014625245091033878, + "loss": 2.2508, + "step": 294855 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014625081219521462, + "loss": 2.0599, + "step": 294860 + }, + { + "epoch": 0.69, + "grad_norm": 2.390625, + "learning_rate": 0.00014624917346429034, + "loss": 2.0407, + "step": 294865 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.00014624753471756646, + "loss": 1.936, + "step": 294870 + }, + { + "epoch": 0.69, + "grad_norm": 2.328125, + "learning_rate": 0.00014624589595504364, + "loss": 2.134, + "step": 294875 + }, + { + "epoch": 0.69, + "grad_norm": 1.9609375, + "learning_rate": 0.0001462442571767224, + "loss": 2.0711, + "step": 294880 + }, + { + "epoch": 0.69, + "grad_norm": 2.390625, + "learning_rate": 0.00014624261838260325, + "loss": 2.0659, + "step": 294885 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014624097957268676, + "loss": 1.9891, + "step": 294890 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014623934074697355, + "loss": 2.078, + "step": 294895 + }, + { + "epoch": 0.69, + "grad_norm": 2.0, + "learning_rate": 0.0001462377019054641, + "loss": 2.1341, + "step": 294900 + }, + { + "epoch": 0.69, + "grad_norm": 2.421875, + "learning_rate": 0.00014623606304815906, + "loss": 2.2032, + "step": 294905 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.00014623442417505892, + "loss": 2.1354, + "step": 294910 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014623278528616427, + "loss": 2.1309, + "step": 294915 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.00014623114638147566, + "loss": 2.0614, + "step": 294920 + }, + { + "epoch": 0.69, + "grad_norm": 1.8671875, + "learning_rate": 0.00014622950746099364, + "loss": 2.0148, + "step": 294925 + }, + { + "epoch": 0.69, + "grad_norm": 1.6953125, + "learning_rate": 0.0001462278685247188, + "loss": 2.0138, + "step": 294930 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014622622957265167, + "loss": 2.0642, + "step": 294935 + }, + { + "epoch": 0.69, + "grad_norm": 1.9921875, + "learning_rate": 0.00014622459060479283, + "loss": 2.0793, + "step": 294940 + }, + { + "epoch": 0.69, + "grad_norm": 2.453125, + "learning_rate": 0.0001462229516211428, + "loss": 2.1056, + "step": 294945 + }, + { + "epoch": 0.69, + "grad_norm": 2.453125, + "learning_rate": 0.00014622131262170222, + "loss": 1.8472, + "step": 294950 + }, + { + "epoch": 0.69, + "grad_norm": 1.8828125, + "learning_rate": 0.00014621967360647158, + "loss": 2.0083, + "step": 294955 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.00014621803457545146, + "loss": 1.8632, + "step": 294960 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.0001462163955286424, + "loss": 2.0457, + "step": 294965 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014621475646604503, + "loss": 2.018, + "step": 294970 + }, + { + "epoch": 0.69, + "grad_norm": 1.828125, + "learning_rate": 0.00014621311738765983, + "loss": 2.0102, + "step": 294975 + }, + { + "epoch": 0.69, + "grad_norm": 1.7890625, + "learning_rate": 0.0001462114782934874, + "loss": 1.9433, + "step": 294980 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014620983918352829, + "loss": 2.177, + "step": 294985 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014620820005778305, + "loss": 1.9279, + "step": 294990 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014620656091625226, + "loss": 1.9938, + "step": 294995 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014620492175893647, + "loss": 2.0342, + "step": 295000 + }, + { + "epoch": 0.69, + "grad_norm": 2.5, + "learning_rate": 0.00014620328258583624, + "loss": 1.9657, + "step": 295005 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014620164339695214, + "loss": 2.1787, + "step": 295010 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014620000419228473, + "loss": 2.0203, + "step": 295015 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014619836497183452, + "loss": 2.129, + "step": 295020 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014619672573560211, + "loss": 2.1426, + "step": 295025 + }, + { + "epoch": 0.69, + "grad_norm": 3.140625, + "learning_rate": 0.0001461950864835881, + "loss": 1.9499, + "step": 295030 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.000146193447215793, + "loss": 1.8405, + "step": 295035 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014619180793221735, + "loss": 2.1169, + "step": 295040 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014619016863286178, + "loss": 2.0878, + "step": 295045 + }, + { + "epoch": 0.69, + "grad_norm": 1.9921875, + "learning_rate": 0.0001461885293177268, + "loss": 1.9847, + "step": 295050 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014618688998681298, + "loss": 2.2943, + "step": 295055 + }, + { + "epoch": 0.69, + "grad_norm": 2.25, + "learning_rate": 0.00014618525064012086, + "loss": 2.0599, + "step": 295060 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014618361127765106, + "loss": 2.0795, + "step": 295065 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.00014618197189940406, + "loss": 2.0041, + "step": 295070 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014618033250538047, + "loss": 2.1644, + "step": 295075 + }, + { + "epoch": 0.69, + "grad_norm": 2.515625, + "learning_rate": 0.00014617869309558086, + "loss": 2.1522, + "step": 295080 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014617705367000577, + "loss": 2.0522, + "step": 295085 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014617541422865576, + "loss": 2.0605, + "step": 295090 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014617377477153137, + "loss": 2.0009, + "step": 295095 + }, + { + "epoch": 0.69, + "grad_norm": 2.34375, + "learning_rate": 0.00014617213529863321, + "loss": 1.9361, + "step": 295100 + }, + { + "epoch": 0.69, + "grad_norm": 1.9453125, + "learning_rate": 0.00014617049580996182, + "loss": 1.9666, + "step": 295105 + }, + { + "epoch": 0.69, + "grad_norm": 2.609375, + "learning_rate": 0.0001461688563055177, + "loss": 2.0356, + "step": 295110 + }, + { + "epoch": 0.69, + "grad_norm": 1.9921875, + "learning_rate": 0.0001461672167853015, + "loss": 1.9689, + "step": 295115 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.0001461655772493137, + "loss": 2.0973, + "step": 295120 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014616393769755496, + "loss": 1.9105, + "step": 295125 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014616229813002574, + "loss": 2.2012, + "step": 295130 + }, + { + "epoch": 0.69, + "grad_norm": 2.484375, + "learning_rate": 0.00014616065854672664, + "loss": 1.9342, + "step": 295135 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014615901894765823, + "loss": 1.9189, + "step": 295140 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.00014615737933282107, + "loss": 2.2029, + "step": 295145 + }, + { + "epoch": 0.69, + "grad_norm": 2.78125, + "learning_rate": 0.00014615573970221574, + "loss": 2.1697, + "step": 295150 + }, + { + "epoch": 0.69, + "grad_norm": 2.59375, + "learning_rate": 0.00014615410005584274, + "loss": 2.0951, + "step": 295155 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.00014615246039370266, + "loss": 2.1038, + "step": 295160 + }, + { + "epoch": 0.69, + "grad_norm": 2.1875, + "learning_rate": 0.00014615082071579605, + "loss": 1.9351, + "step": 295165 + }, + { + "epoch": 0.69, + "grad_norm": 2.140625, + "learning_rate": 0.00014614918102212352, + "loss": 1.8971, + "step": 295170 + }, + { + "epoch": 0.69, + "grad_norm": 1.953125, + "learning_rate": 0.00014614754131268558, + "loss": 2.0205, + "step": 295175 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.0001461459015874828, + "loss": 1.9504, + "step": 295180 + }, + { + "epoch": 0.69, + "grad_norm": 2.5, + "learning_rate": 0.00014614426184651573, + "loss": 2.0637, + "step": 295185 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014614262208978494, + "loss": 1.9554, + "step": 295190 + }, + { + "epoch": 0.69, + "grad_norm": 2.234375, + "learning_rate": 0.00014614098231729102, + "loss": 1.9834, + "step": 295195 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014613934252903448, + "loss": 1.9065, + "step": 295200 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.00014613770272501591, + "loss": 2.1183, + "step": 295205 + }, + { + "epoch": 0.69, + "grad_norm": 1.953125, + "learning_rate": 0.00014613606290523587, + "loss": 2.2045, + "step": 295210 + }, + { + "epoch": 0.69, + "grad_norm": 2.09375, + "learning_rate": 0.0001461344230696949, + "loss": 1.9943, + "step": 295215 + }, + { + "epoch": 0.69, + "grad_norm": 2.265625, + "learning_rate": 0.00014613278321839359, + "loss": 2.0215, + "step": 295220 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.0001461311433513325, + "loss": 1.9447, + "step": 295225 + }, + { + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 0.00014612950346851213, + "loss": 2.0307, + "step": 295230 + }, + { + "epoch": 0.69, + "grad_norm": 1.9765625, + "learning_rate": 0.0001461278635699331, + "loss": 2.0791, + "step": 295235 + }, + { + "epoch": 0.69, + "grad_norm": 2.203125, + "learning_rate": 0.00014612622365559597, + "loss": 2.1108, + "step": 295240 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.00014612458372550128, + "loss": 2.0905, + "step": 295245 + }, + { + "epoch": 0.69, + "grad_norm": 2.078125, + "learning_rate": 0.00014612294377964961, + "loss": 2.1133, + "step": 295250 + }, + { + "epoch": 0.69, + "grad_norm": 2.46875, + "learning_rate": 0.0001461213038180415, + "loss": 1.7809, + "step": 295255 + }, + { + "epoch": 0.69, + "grad_norm": 2.21875, + "learning_rate": 0.0001461196638406775, + "loss": 2.1204, + "step": 295260 + }, + { + "epoch": 0.69, + "grad_norm": 2.296875, + "learning_rate": 0.00014611802384755822, + "loss": 1.87, + "step": 295265 + }, + { + "epoch": 0.69, + "grad_norm": 2.375, + "learning_rate": 0.00014611638383868415, + "loss": 1.8477, + "step": 295270 + }, + { + "epoch": 0.69, + "grad_norm": 2.0, + "learning_rate": 0.0001461147438140559, + "loss": 2.1887, + "step": 295275 + }, + { + "epoch": 0.69, + "grad_norm": 2.125, + "learning_rate": 0.000146113103773674, + "loss": 2.2084, + "step": 295280 + }, + { + "epoch": 0.69, + "grad_norm": 2.03125, + "learning_rate": 0.00014611146371753904, + "loss": 1.9487, + "step": 295285 + }, + { + "epoch": 0.69, + "grad_norm": 2.046875, + "learning_rate": 0.0001461098236456516, + "loss": 1.881, + "step": 295290 + }, + { + "epoch": 0.69, + "grad_norm": 2.0625, + "learning_rate": 0.00014610818355801218, + "loss": 2.0552, + "step": 295295 + }, + { + "epoch": 0.69, + "grad_norm": 1.7890625, + "learning_rate": 0.00014610654345462132, + "loss": 1.8144, + "step": 295300 + }, + { + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 0.0001461049033354797, + "loss": 2.0449, + "step": 295305 + }, + { + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 0.0001461032632005878, + "loss": 1.9471, + "step": 295310 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.00014610162304994618, + "loss": 2.0001, + "step": 295315 + }, + { + "epoch": 0.69, + "grad_norm": 2.28125, + "learning_rate": 0.0001460999828835554, + "loss": 2.1622, + "step": 295320 + }, + { + "epoch": 0.69, + "grad_norm": 2.109375, + "learning_rate": 0.00014609834270141602, + "loss": 2.1826, + "step": 295325 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014609670250352865, + "loss": 1.9573, + "step": 295330 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.00014609506228989379, + "loss": 2.1025, + "step": 295335 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.000146093422060512, + "loss": 2.0974, + "step": 295340 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.00014609178181538388, + "loss": 2.0447, + "step": 295345 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014609014155450995, + "loss": 2.1272, + "step": 295350 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.0001460885012778908, + "loss": 2.0973, + "step": 295355 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014608686098552698, + "loss": 2.2806, + "step": 295360 + }, + { + "epoch": 0.7, + "grad_norm": 1.9453125, + "learning_rate": 0.00014608522067741908, + "loss": 1.8242, + "step": 295365 + }, + { + "epoch": 0.7, + "grad_norm": 2.8125, + "learning_rate": 0.00014608358035356762, + "loss": 2.14, + "step": 295370 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014608194001397318, + "loss": 2.1072, + "step": 295375 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.0001460802996586363, + "loss": 2.1253, + "step": 295380 + }, + { + "epoch": 0.7, + "grad_norm": 1.8046875, + "learning_rate": 0.00014607865928755755, + "loss": 1.977, + "step": 295385 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.0001460770189007375, + "loss": 2.1594, + "step": 295390 + }, + { + "epoch": 0.7, + "grad_norm": 3.25, + "learning_rate": 0.00014607537849817672, + "loss": 2.3058, + "step": 295395 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.00014607373807987574, + "loss": 2.0789, + "step": 295400 + }, + { + "epoch": 0.7, + "grad_norm": 2.421875, + "learning_rate": 0.00014607209764583512, + "loss": 2.0859, + "step": 295405 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014607045719605544, + "loss": 2.0071, + "step": 295410 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.0001460688167305373, + "loss": 2.0234, + "step": 295415 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014606717624928117, + "loss": 1.9266, + "step": 295420 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.00014606553575228769, + "loss": 2.0769, + "step": 295425 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.00014606389523955736, + "loss": 2.141, + "step": 295430 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.00014606225471109076, + "loss": 1.8745, + "step": 295435 + }, + { + "epoch": 0.7, + "grad_norm": 2.46875, + "learning_rate": 0.00014606061416688848, + "loss": 2.0109, + "step": 295440 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.0001460589736069511, + "loss": 2.158, + "step": 295445 + }, + { + "epoch": 0.7, + "grad_norm": 2.015625, + "learning_rate": 0.00014605733303127907, + "loss": 1.9667, + "step": 295450 + }, + { + "epoch": 0.7, + "grad_norm": 1.7421875, + "learning_rate": 0.00014605569243987304, + "loss": 2.2008, + "step": 295455 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014605405183273353, + "loss": 2.0115, + "step": 295460 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014605241120986117, + "loss": 2.0261, + "step": 295465 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.00014605077057125643, + "loss": 1.9564, + "step": 295470 + }, + { + "epoch": 0.7, + "grad_norm": 2.578125, + "learning_rate": 0.00014604912991691995, + "loss": 2.0964, + "step": 295475 + }, + { + "epoch": 0.7, + "grad_norm": 2.546875, + "learning_rate": 0.00014604748924685224, + "loss": 2.1318, + "step": 295480 + }, + { + "epoch": 0.7, + "grad_norm": 2.890625, + "learning_rate": 0.00014604584856105384, + "loss": 1.9976, + "step": 295485 + }, + { + "epoch": 0.7, + "grad_norm": 3.625, + "learning_rate": 0.00014604420785952537, + "loss": 2.0669, + "step": 295490 + }, + { + "epoch": 0.7, + "grad_norm": 2.71875, + "learning_rate": 0.00014604256714226735, + "loss": 2.2223, + "step": 295495 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014604092640928037, + "loss": 2.3097, + "step": 295500 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014603928566056496, + "loss": 2.0428, + "step": 295505 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.0001460376448961217, + "loss": 2.1708, + "step": 295510 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014603600411595115, + "loss": 2.0694, + "step": 295515 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014603436332005386, + "loss": 2.1306, + "step": 295520 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.0001460327225084304, + "loss": 2.2084, + "step": 295525 + }, + { + "epoch": 0.7, + "grad_norm": 2.5, + "learning_rate": 0.00014603108168108136, + "loss": 1.9919, + "step": 295530 + }, + { + "epoch": 0.7, + "grad_norm": 1.96875, + "learning_rate": 0.00014602944083800723, + "loss": 2.1738, + "step": 295535 + }, + { + "epoch": 0.7, + "grad_norm": 1.9609375, + "learning_rate": 0.0001460277999792086, + "loss": 2.1446, + "step": 295540 + }, + { + "epoch": 0.7, + "grad_norm": 1.8046875, + "learning_rate": 0.00014602615910468605, + "loss": 2.1398, + "step": 295545 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014602451821444013, + "loss": 2.1362, + "step": 295550 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014602287730847139, + "loss": 2.0641, + "step": 295555 + }, + { + "epoch": 0.7, + "grad_norm": 1.796875, + "learning_rate": 0.00014602123638678043, + "loss": 2.0692, + "step": 295560 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.0001460195954493678, + "loss": 2.0878, + "step": 295565 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014601795449623397, + "loss": 2.1627, + "step": 295570 + }, + { + "epoch": 0.7, + "grad_norm": 1.921875, + "learning_rate": 0.0001460163135273796, + "loss": 2.0833, + "step": 295575 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014601467254280525, + "loss": 2.0062, + "step": 295580 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.00014601303154251143, + "loss": 1.8978, + "step": 295585 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014601139052649873, + "loss": 2.1559, + "step": 295590 + }, + { + "epoch": 0.7, + "grad_norm": 1.9765625, + "learning_rate": 0.00014600974949476769, + "loss": 2.1073, + "step": 295595 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.0001460081084473189, + "loss": 2.0012, + "step": 295600 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.0001460064673841529, + "loss": 2.1144, + "step": 295605 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014600482630527025, + "loss": 2.0623, + "step": 295610 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014600318521067152, + "loss": 2.1243, + "step": 295615 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014600154410035726, + "loss": 2.1376, + "step": 295620 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014599990297432808, + "loss": 2.1643, + "step": 295625 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014599826183258447, + "loss": 2.0473, + "step": 295630 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014599662067512703, + "loss": 2.047, + "step": 295635 + }, + { + "epoch": 0.7, + "grad_norm": 1.84375, + "learning_rate": 0.00014599497950195628, + "loss": 2.13, + "step": 295640 + }, + { + "epoch": 0.7, + "grad_norm": 1.953125, + "learning_rate": 0.00014599333831307284, + "loss": 2.0592, + "step": 295645 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014599169710847724, + "loss": 2.1739, + "step": 295650 + }, + { + "epoch": 0.7, + "grad_norm": 2.640625, + "learning_rate": 0.00014599005588817002, + "loss": 2.0227, + "step": 295655 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014598841465215178, + "loss": 2.0194, + "step": 295660 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014598677340042307, + "loss": 2.0916, + "step": 295665 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014598513213298444, + "loss": 2.1347, + "step": 295670 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014598349084983645, + "loss": 2.1066, + "step": 295675 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014598184955097966, + "loss": 2.0741, + "step": 295680 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014598020823641464, + "loss": 2.1416, + "step": 295685 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014597856690614196, + "loss": 2.1072, + "step": 295690 + }, + { + "epoch": 0.7, + "grad_norm": 2.484375, + "learning_rate": 0.00014597692556016215, + "loss": 1.9601, + "step": 295695 + }, + { + "epoch": 0.7, + "grad_norm": 2.65625, + "learning_rate": 0.0001459752841984758, + "loss": 2.0568, + "step": 295700 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014597364282108342, + "loss": 2.1031, + "step": 295705 + }, + { + "epoch": 0.7, + "grad_norm": 2.53125, + "learning_rate": 0.00014597200142798565, + "loss": 2.1406, + "step": 295710 + }, + { + "epoch": 0.7, + "grad_norm": 1.9296875, + "learning_rate": 0.00014597036001918302, + "loss": 2.107, + "step": 295715 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.0001459687185946761, + "loss": 1.9504, + "step": 295720 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014596707715446537, + "loss": 2.0776, + "step": 295725 + }, + { + "epoch": 0.7, + "grad_norm": 1.6796875, + "learning_rate": 0.00014596543569855147, + "loss": 2.0535, + "step": 295730 + }, + { + "epoch": 0.7, + "grad_norm": 2.53125, + "learning_rate": 0.00014596379422693496, + "loss": 2.1258, + "step": 295735 + }, + { + "epoch": 0.7, + "grad_norm": 1.859375, + "learning_rate": 0.0001459621527396164, + "loss": 2.0313, + "step": 295740 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014596051123659632, + "loss": 2.0791, + "step": 295745 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014595886971787526, + "loss": 2.0589, + "step": 295750 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014595722818345385, + "loss": 2.0192, + "step": 295755 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014595558663333264, + "loss": 2.0418, + "step": 295760 + }, + { + "epoch": 0.7, + "grad_norm": 3.078125, + "learning_rate": 0.0001459539450675121, + "loss": 2.0791, + "step": 295765 + }, + { + "epoch": 0.7, + "grad_norm": 2.109375, + "learning_rate": 0.00014595230348599293, + "loss": 2.0435, + "step": 295770 + }, + { + "epoch": 0.7, + "grad_norm": 2.015625, + "learning_rate": 0.0001459506618887756, + "loss": 2.212, + "step": 295775 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014594902027586067, + "loss": 2.0358, + "step": 295780 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.00014594737864724873, + "loss": 2.094, + "step": 295785 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014594573700294034, + "loss": 2.1328, + "step": 295790 + }, + { + "epoch": 0.7, + "grad_norm": 1.890625, + "learning_rate": 0.00014594409534293606, + "loss": 2.0639, + "step": 295795 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014594245366723643, + "loss": 1.9395, + "step": 295800 + }, + { + "epoch": 0.7, + "grad_norm": 1.984375, + "learning_rate": 0.000145940811975842, + "loss": 1.9681, + "step": 295805 + }, + { + "epoch": 0.7, + "grad_norm": 2.40625, + "learning_rate": 0.0001459391702687534, + "loss": 2.0673, + "step": 295810 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.0001459375285459711, + "loss": 1.9904, + "step": 295815 + }, + { + "epoch": 0.7, + "grad_norm": 2.59375, + "learning_rate": 0.00014593588680749574, + "loss": 1.9589, + "step": 295820 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014593424505332785, + "loss": 2.1849, + "step": 295825 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014593260328346798, + "loss": 2.1305, + "step": 295830 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.0001459309614979167, + "loss": 2.1338, + "step": 295835 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.0001459293196966746, + "loss": 2.1037, + "step": 295840 + }, + { + "epoch": 0.7, + "grad_norm": 1.953125, + "learning_rate": 0.00014592767787974215, + "loss": 1.9331, + "step": 295845 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014592603604712, + "loss": 2.0264, + "step": 295850 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014592439419880873, + "loss": 1.9648, + "step": 295855 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.00014592275233480878, + "loss": 2.1536, + "step": 295860 + }, + { + "epoch": 0.7, + "grad_norm": 2.984375, + "learning_rate": 0.00014592111045512083, + "loss": 1.9497, + "step": 295865 + }, + { + "epoch": 0.7, + "grad_norm": 2.5, + "learning_rate": 0.00014591946855974536, + "loss": 2.0075, + "step": 295870 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.000145917826648683, + "loss": 1.9956, + "step": 295875 + }, + { + "epoch": 0.7, + "grad_norm": 1.859375, + "learning_rate": 0.00014591618472193429, + "loss": 2.2832, + "step": 295880 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014591454277949976, + "loss": 1.9618, + "step": 295885 + }, + { + "epoch": 0.7, + "grad_norm": 2.546875, + "learning_rate": 0.00014591290082137997, + "loss": 2.2115, + "step": 295890 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014591125884757551, + "loss": 2.1035, + "step": 295895 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014590961685808694, + "loss": 2.0993, + "step": 295900 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.00014590797485291483, + "loss": 2.1477, + "step": 295905 + }, + { + "epoch": 0.7, + "grad_norm": 2.5625, + "learning_rate": 0.0001459063328320597, + "loss": 2.0592, + "step": 295910 + }, + { + "epoch": 0.7, + "grad_norm": 2.625, + "learning_rate": 0.0001459046907955221, + "loss": 2.0419, + "step": 295915 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.0001459030487433027, + "loss": 1.9668, + "step": 295920 + }, + { + "epoch": 0.7, + "grad_norm": 1.9765625, + "learning_rate": 0.00014590140667540194, + "loss": 2.1848, + "step": 295925 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014589976459182045, + "loss": 2.134, + "step": 295930 + }, + { + "epoch": 0.7, + "grad_norm": 2.0, + "learning_rate": 0.00014589812249255873, + "loss": 2.0543, + "step": 295935 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.00014589648037761742, + "loss": 1.8883, + "step": 295940 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014589483824699703, + "loss": 2.1033, + "step": 295945 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.0001458931961006981, + "loss": 2.0241, + "step": 295950 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.00014589155393872124, + "loss": 2.2195, + "step": 295955 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014588991176106702, + "loss": 1.7733, + "step": 295960 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014588826956773594, + "loss": 2.2185, + "step": 295965 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.0001458866273587286, + "loss": 2.1056, + "step": 295970 + }, + { + "epoch": 0.7, + "grad_norm": 1.8125, + "learning_rate": 0.00014588498513404556, + "loss": 2.0914, + "step": 295975 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.0001458833428936874, + "loss": 2.033, + "step": 295980 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014588170063765463, + "loss": 1.9549, + "step": 295985 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.00014588005836594788, + "loss": 1.9232, + "step": 295990 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.0001458784160785676, + "loss": 2.0805, + "step": 295995 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.00014587677377551444, + "loss": 1.8755, + "step": 296000 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014587513145678899, + "loss": 1.9354, + "step": 296005 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.0001458734891223917, + "loss": 2.1416, + "step": 296010 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014587184677232327, + "loss": 2.0616, + "step": 296015 + }, + { + "epoch": 0.7, + "grad_norm": 2.625, + "learning_rate": 0.00014587020440658415, + "loss": 2.1385, + "step": 296020 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.0001458685620251749, + "loss": 2.0017, + "step": 296025 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.00014586691962809614, + "loss": 1.9339, + "step": 296030 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014586527721534843, + "loss": 2.0235, + "step": 296035 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.0001458636347869323, + "loss": 2.0439, + "step": 296040 + }, + { + "epoch": 0.7, + "grad_norm": 2.546875, + "learning_rate": 0.0001458619923428483, + "loss": 2.1259, + "step": 296045 + }, + { + "epoch": 0.7, + "grad_norm": 1.9140625, + "learning_rate": 0.000145860349883097, + "loss": 2.0977, + "step": 296050 + }, + { + "epoch": 0.7, + "grad_norm": 2.671875, + "learning_rate": 0.00014585870740767902, + "loss": 2.1067, + "step": 296055 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014585706491659486, + "loss": 1.9511, + "step": 296060 + }, + { + "epoch": 0.7, + "grad_norm": 2.59375, + "learning_rate": 0.00014585542240984508, + "loss": 1.9911, + "step": 296065 + }, + { + "epoch": 0.7, + "grad_norm": 2.84375, + "learning_rate": 0.00014585377988743028, + "loss": 1.8659, + "step": 296070 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.00014585213734935094, + "loss": 2.025, + "step": 296075 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.00014585049479560775, + "loss": 2.1924, + "step": 296080 + }, + { + "epoch": 0.7, + "grad_norm": 3.734375, + "learning_rate": 0.00014584885222620117, + "loss": 2.1188, + "step": 296085 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.0001458472096411318, + "loss": 2.1117, + "step": 296090 + }, + { + "epoch": 0.7, + "grad_norm": 3.078125, + "learning_rate": 0.00014584556704040015, + "loss": 2.1463, + "step": 296095 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014584392442400687, + "loss": 2.0679, + "step": 296100 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.00014584228179195246, + "loss": 2.224, + "step": 296105 + }, + { + "epoch": 0.7, + "grad_norm": 2.609375, + "learning_rate": 0.00014584063914423748, + "loss": 2.2083, + "step": 296110 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014583899648086253, + "loss": 2.1581, + "step": 296115 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014583735380182812, + "loss": 1.9882, + "step": 296120 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014583571110713485, + "loss": 2.019, + "step": 296125 + }, + { + "epoch": 0.7, + "grad_norm": 1.890625, + "learning_rate": 0.00014583406839678329, + "loss": 1.9681, + "step": 296130 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014583242567077395, + "loss": 1.9932, + "step": 296135 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014583078292910746, + "loss": 1.8908, + "step": 296140 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.0001458291401717843, + "loss": 2.0751, + "step": 296145 + }, + { + "epoch": 0.7, + "grad_norm": 2.65625, + "learning_rate": 0.00014582749739880513, + "loss": 2.0345, + "step": 296150 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.0001458258546101704, + "loss": 2.0798, + "step": 296155 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014582421180588074, + "loss": 2.077, + "step": 296160 + }, + { + "epoch": 0.7, + "grad_norm": 2.484375, + "learning_rate": 0.00014582256898593672, + "loss": 1.9816, + "step": 296165 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014582092615033888, + "loss": 2.0776, + "step": 296170 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014581928329908777, + "loss": 2.1541, + "step": 296175 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.00014581764043218395, + "loss": 2.1665, + "step": 296180 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014581599754962798, + "loss": 2.0531, + "step": 296185 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.00014581435465142045, + "loss": 2.1888, + "step": 296190 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014581271173756192, + "loss": 1.8995, + "step": 296195 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014581106880805295, + "loss": 2.0393, + "step": 296200 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014580942586289405, + "loss": 2.1975, + "step": 296205 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014580778290208582, + "loss": 2.0546, + "step": 296210 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014580613992562887, + "loss": 2.0975, + "step": 296215 + }, + { + "epoch": 0.7, + "grad_norm": 1.9453125, + "learning_rate": 0.00014580449693352368, + "loss": 1.9794, + "step": 296220 + }, + { + "epoch": 0.7, + "grad_norm": 2.109375, + "learning_rate": 0.00014580285392577083, + "loss": 2.1517, + "step": 296225 + }, + { + "epoch": 0.7, + "grad_norm": 1.921875, + "learning_rate": 0.0001458012109023709, + "loss": 2.1757, + "step": 296230 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014579956786332448, + "loss": 2.2011, + "step": 296235 + }, + { + "epoch": 0.7, + "grad_norm": 2.5625, + "learning_rate": 0.00014579792480863205, + "loss": 2.1134, + "step": 296240 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014579628173829422, + "loss": 2.3213, + "step": 296245 + }, + { + "epoch": 0.7, + "grad_norm": 2.546875, + "learning_rate": 0.00014579463865231157, + "loss": 2.0395, + "step": 296250 + }, + { + "epoch": 0.7, + "grad_norm": 2.5625, + "learning_rate": 0.00014579299555068464, + "loss": 1.9562, + "step": 296255 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.000145791352433414, + "loss": 2.0121, + "step": 296260 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.0001457897093005002, + "loss": 2.1117, + "step": 296265 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014578806615194378, + "loss": 2.1087, + "step": 296270 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014578642298774535, + "loss": 2.1857, + "step": 296275 + }, + { + "epoch": 0.7, + "grad_norm": 2.109375, + "learning_rate": 0.00014578477980790544, + "loss": 2.0252, + "step": 296280 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014578313661242463, + "loss": 1.9996, + "step": 296285 + }, + { + "epoch": 0.7, + "grad_norm": 1.9296875, + "learning_rate": 0.00014578149340130345, + "loss": 2.0756, + "step": 296290 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.0001457798501745425, + "loss": 1.9639, + "step": 296295 + }, + { + "epoch": 0.7, + "grad_norm": 2.640625, + "learning_rate": 0.0001457782069321423, + "loss": 2.1845, + "step": 296300 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014577656367410345, + "loss": 2.1855, + "step": 296305 + }, + { + "epoch": 0.7, + "grad_norm": 2.484375, + "learning_rate": 0.00014577492040042652, + "loss": 2.0076, + "step": 296310 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.00014577327711111203, + "loss": 2.1103, + "step": 296315 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014577163380616052, + "loss": 2.1145, + "step": 296320 + }, + { + "epoch": 0.7, + "grad_norm": 2.53125, + "learning_rate": 0.00014576999048557262, + "loss": 2.0087, + "step": 296325 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014576834714934888, + "loss": 2.1636, + "step": 296330 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014576670379748982, + "loss": 2.0928, + "step": 296335 + }, + { + "epoch": 0.7, + "grad_norm": 2.75, + "learning_rate": 0.00014576506042999603, + "loss": 2.1204, + "step": 296340 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.00014576341704686805, + "loss": 1.8899, + "step": 296345 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.0001457617736481065, + "loss": 2.0201, + "step": 296350 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014576013023371185, + "loss": 2.021, + "step": 296355 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014575848680368475, + "loss": 2.0405, + "step": 296360 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014575684335802567, + "loss": 1.9987, + "step": 296365 + }, + { + "epoch": 0.7, + "grad_norm": 2.40625, + "learning_rate": 0.00014575519989673525, + "loss": 2.1186, + "step": 296370 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014575355641981402, + "loss": 2.1357, + "step": 296375 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.00014575191292726255, + "loss": 2.1946, + "step": 296380 + }, + { + "epoch": 0.7, + "grad_norm": 2.40625, + "learning_rate": 0.0001457502694190814, + "loss": 2.2856, + "step": 296385 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014574862589527115, + "loss": 1.9553, + "step": 296390 + }, + { + "epoch": 0.7, + "grad_norm": 3.296875, + "learning_rate": 0.0001457469823558323, + "loss": 1.9781, + "step": 296395 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.00014574533880076546, + "loss": 2.0928, + "step": 296400 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.0001457436952300712, + "loss": 1.9424, + "step": 296405 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014574205164375004, + "loss": 2.1669, + "step": 296410 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014574040804180257, + "loss": 2.0538, + "step": 296415 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014573876442422937, + "loss": 2.144, + "step": 296420 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.00014573712079103096, + "loss": 2.0155, + "step": 296425 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014573547714220793, + "loss": 2.088, + "step": 296430 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014573383347776084, + "loss": 2.0076, + "step": 296435 + }, + { + "epoch": 0.7, + "grad_norm": 2.640625, + "learning_rate": 0.0001457321897976902, + "loss": 2.1598, + "step": 296440 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.0001457305461019967, + "loss": 2.1877, + "step": 296445 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.0001457289023906807, + "loss": 2.038, + "step": 296450 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014572725866374294, + "loss": 2.2334, + "step": 296455 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014572561492118394, + "loss": 2.0104, + "step": 296460 + }, + { + "epoch": 0.7, + "grad_norm": 1.984375, + "learning_rate": 0.00014572397116300424, + "loss": 2.0945, + "step": 296465 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014572232738920438, + "loss": 2.0891, + "step": 296470 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.0001457206835997849, + "loss": 2.1128, + "step": 296475 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014571903979474646, + "loss": 2.0557, + "step": 296480 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014571739597408955, + "loss": 2.1442, + "step": 296485 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.0001457157521378148, + "loss": 2.278, + "step": 296490 + }, + { + "epoch": 0.7, + "grad_norm": 1.875, + "learning_rate": 0.00014571410828592265, + "loss": 2.0966, + "step": 296495 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014571246441841376, + "loss": 2.0683, + "step": 296500 + }, + { + "epoch": 0.7, + "grad_norm": 1.8828125, + "learning_rate": 0.00014571082053528863, + "loss": 1.9146, + "step": 296505 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014570917663654791, + "loss": 2.0812, + "step": 296510 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014570753272219206, + "loss": 2.0878, + "step": 296515 + }, + { + "epoch": 0.7, + "grad_norm": 2.640625, + "learning_rate": 0.00014570588879222172, + "loss": 2.0001, + "step": 296520 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.0001457042448466374, + "loss": 2.2266, + "step": 296525 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014570260088543968, + "loss": 2.1113, + "step": 296530 + }, + { + "epoch": 0.7, + "grad_norm": 3.0625, + "learning_rate": 0.00014570095690862912, + "loss": 1.9704, + "step": 296535 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014569931291620632, + "loss": 2.017, + "step": 296540 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014569766890817177, + "loss": 2.1791, + "step": 296545 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.0001456960248845261, + "loss": 2.0614, + "step": 296550 + }, + { + "epoch": 0.7, + "grad_norm": 2.734375, + "learning_rate": 0.0001456943808452698, + "loss": 2.0027, + "step": 296555 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014569273679040348, + "loss": 1.8993, + "step": 296560 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014569109271992772, + "loss": 2.0729, + "step": 296565 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.00014568944863384303, + "loss": 1.728, + "step": 296570 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014568780453215, + "loss": 2.2141, + "step": 296575 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.0001456861604148492, + "loss": 2.1806, + "step": 296580 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014568451628194115, + "loss": 2.1782, + "step": 296585 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014568287213342646, + "loss": 2.0128, + "step": 296590 + }, + { + "epoch": 0.7, + "grad_norm": 1.96875, + "learning_rate": 0.00014568122796930567, + "loss": 2.0817, + "step": 296595 + }, + { + "epoch": 0.7, + "grad_norm": 1.96875, + "learning_rate": 0.00014567958378957933, + "loss": 1.9612, + "step": 296600 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014567793959424804, + "loss": 2.1273, + "step": 296605 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.00014567629538331234, + "loss": 2.0434, + "step": 296610 + }, + { + "epoch": 0.7, + "grad_norm": 1.953125, + "learning_rate": 0.00014567465115677278, + "loss": 1.8936, + "step": 296615 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014567300691462992, + "loss": 1.8743, + "step": 296620 + }, + { + "epoch": 0.7, + "grad_norm": 1.8828125, + "learning_rate": 0.00014567136265688434, + "loss": 2.1582, + "step": 296625 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014566971838353657, + "loss": 2.0617, + "step": 296630 + }, + { + "epoch": 0.7, + "grad_norm": 1.9140625, + "learning_rate": 0.00014566807409458724, + "loss": 1.9436, + "step": 296635 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014566642979003684, + "loss": 2.0815, + "step": 296640 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.00014566478546988599, + "loss": 2.0467, + "step": 296645 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.0001456631411341352, + "loss": 2.0819, + "step": 296650 + }, + { + "epoch": 0.7, + "grad_norm": 1.8125, + "learning_rate": 0.00014566149678278505, + "loss": 1.9648, + "step": 296655 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014565985241583607, + "loss": 2.1248, + "step": 296660 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.0001456582080332889, + "loss": 2.1674, + "step": 296665 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014565656363514407, + "loss": 2.0659, + "step": 296670 + }, + { + "epoch": 0.7, + "grad_norm": 2.46875, + "learning_rate": 0.00014565491922140212, + "loss": 1.9888, + "step": 296675 + }, + { + "epoch": 0.7, + "grad_norm": 2.65625, + "learning_rate": 0.0001456532747920636, + "loss": 2.087, + "step": 296680 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.0001456516303471291, + "loss": 2.1308, + "step": 296685 + }, + { + "epoch": 0.7, + "grad_norm": 2.8125, + "learning_rate": 0.00014564998588659917, + "loss": 1.9874, + "step": 296690 + }, + { + "epoch": 0.7, + "grad_norm": 2.71875, + "learning_rate": 0.0001456483414104744, + "loss": 2.14, + "step": 296695 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.0001456466969187553, + "loss": 2.2366, + "step": 296700 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.00014564505241144247, + "loss": 2.0297, + "step": 296705 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014564340788853647, + "loss": 2.1429, + "step": 296710 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014564176335003784, + "loss": 2.0403, + "step": 296715 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.0001456401187959472, + "loss": 2.0131, + "step": 296720 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014563847422626502, + "loss": 2.0994, + "step": 296725 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014563682964099193, + "loss": 2.1755, + "step": 296730 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014563518504012845, + "loss": 2.0147, + "step": 296735 + }, + { + "epoch": 0.7, + "grad_norm": 2.5625, + "learning_rate": 0.0001456335404236752, + "loss": 2.0981, + "step": 296740 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.00014563189579163267, + "loss": 2.0178, + "step": 296745 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014563025114400148, + "loss": 1.9577, + "step": 296750 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014562860648078215, + "loss": 2.143, + "step": 296755 + }, + { + "epoch": 0.7, + "grad_norm": 2.734375, + "learning_rate": 0.0001456269618019753, + "loss": 2.2327, + "step": 296760 + }, + { + "epoch": 0.7, + "grad_norm": 2.421875, + "learning_rate": 0.0001456253171075814, + "loss": 2.1347, + "step": 296765 + }, + { + "epoch": 0.7, + "grad_norm": 2.421875, + "learning_rate": 0.0001456236723976011, + "loss": 2.1135, + "step": 296770 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.0001456220276720349, + "loss": 2.1451, + "step": 296775 + }, + { + "epoch": 0.7, + "grad_norm": 1.84375, + "learning_rate": 0.0001456203829308834, + "loss": 1.9833, + "step": 296780 + }, + { + "epoch": 0.7, + "grad_norm": 2.40625, + "learning_rate": 0.00014561873817414716, + "loss": 2.1749, + "step": 296785 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014561709340182673, + "loss": 2.1615, + "step": 296790 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014561544861392266, + "loss": 1.9422, + "step": 296795 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.0001456138038104355, + "loss": 2.0258, + "step": 296800 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014561215899136588, + "loss": 2.0311, + "step": 296805 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014561051415671434, + "loss": 2.0369, + "step": 296810 + }, + { + "epoch": 0.7, + "grad_norm": 1.8671875, + "learning_rate": 0.00014560886930648138, + "loss": 2.1256, + "step": 296815 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014560722444066762, + "loss": 2.1245, + "step": 296820 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.00014560557955927358, + "loss": 2.0654, + "step": 296825 + }, + { + "epoch": 0.7, + "grad_norm": 2.578125, + "learning_rate": 0.00014560393466229988, + "loss": 2.0461, + "step": 296830 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.000145602289749747, + "loss": 2.2668, + "step": 296835 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.0001456006448216156, + "loss": 2.0072, + "step": 296840 + }, + { + "epoch": 0.7, + "grad_norm": 1.9453125, + "learning_rate": 0.0001455989998779062, + "loss": 2.1399, + "step": 296845 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.0001455973549186193, + "loss": 2.2191, + "step": 296850 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014559570994375558, + "loss": 2.1066, + "step": 296855 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.0001455940649533155, + "loss": 1.958, + "step": 296860 + }, + { + "epoch": 0.7, + "grad_norm": 2.015625, + "learning_rate": 0.00014559241994729967, + "loss": 1.9723, + "step": 296865 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014559077492570864, + "loss": 1.9401, + "step": 296870 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.000145589129888543, + "loss": 1.9823, + "step": 296875 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014558748483580323, + "loss": 2.1177, + "step": 296880 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014558583976748997, + "loss": 2.1002, + "step": 296885 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014558419468360375, + "loss": 2.2544, + "step": 296890 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.00014558254958414518, + "loss": 2.1169, + "step": 296895 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014558090446911478, + "loss": 2.0655, + "step": 296900 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014557925933851308, + "loss": 2.0452, + "step": 296905 + }, + { + "epoch": 0.7, + "grad_norm": 1.9375, + "learning_rate": 0.0001455776141923407, + "loss": 2.212, + "step": 296910 + }, + { + "epoch": 0.7, + "grad_norm": 2.578125, + "learning_rate": 0.00014557596903059817, + "loss": 2.1596, + "step": 296915 + }, + { + "epoch": 0.7, + "grad_norm": 2.46875, + "learning_rate": 0.0001455743238532861, + "loss": 2.058, + "step": 296920 + }, + { + "epoch": 0.7, + "grad_norm": 1.9140625, + "learning_rate": 0.000145572678660405, + "loss": 2.0416, + "step": 296925 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014557103345195542, + "loss": 2.1068, + "step": 296930 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014556938822793796, + "loss": 1.9763, + "step": 296935 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014556774298835317, + "loss": 2.0935, + "step": 296940 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014556609773320163, + "loss": 2.24, + "step": 296945 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014556445246248387, + "loss": 2.0368, + "step": 296950 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014556280717620049, + "loss": 2.1727, + "step": 296955 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014556116187435195, + "loss": 2.0352, + "step": 296960 + }, + { + "epoch": 0.7, + "grad_norm": 1.953125, + "learning_rate": 0.00014555951655693898, + "loss": 2.1017, + "step": 296965 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014555787122396202, + "loss": 2.0211, + "step": 296970 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014555622587542167, + "loss": 1.9721, + "step": 296975 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.00014555458051131848, + "loss": 1.959, + "step": 296980 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.000145552935131653, + "loss": 2.0779, + "step": 296985 + }, + { + "epoch": 0.7, + "grad_norm": 1.8984375, + "learning_rate": 0.00014555128973642585, + "loss": 2.0733, + "step": 296990 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014554964432563755, + "loss": 1.9458, + "step": 296995 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014554799889928864, + "loss": 2.075, + "step": 297000 + }, + { + "epoch": 0.7, + "grad_norm": 2.40625, + "learning_rate": 0.00014554635345737972, + "loss": 1.814, + "step": 297005 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014554470799991133, + "loss": 2.0921, + "step": 297010 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014554306252688405, + "loss": 1.9911, + "step": 297015 + }, + { + "epoch": 0.7, + "grad_norm": 1.8671875, + "learning_rate": 0.00014554141703829843, + "loss": 2.1012, + "step": 297020 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014553977153415504, + "loss": 2.1295, + "step": 297025 + }, + { + "epoch": 0.7, + "grad_norm": 2.578125, + "learning_rate": 0.00014553812601445445, + "loss": 2.1306, + "step": 297030 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.0001455364804791972, + "loss": 1.9968, + "step": 297035 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.00014553483492838384, + "loss": 2.0565, + "step": 297040 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.000145533189362015, + "loss": 2.1325, + "step": 297045 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.00014553154378009115, + "loss": 2.1441, + "step": 297050 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014552989818261293, + "loss": 2.0104, + "step": 297055 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014552825256958086, + "loss": 2.0524, + "step": 297060 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014552660694099552, + "loss": 1.8391, + "step": 297065 + }, + { + "epoch": 0.7, + "grad_norm": 1.9375, + "learning_rate": 0.00014552496129685745, + "loss": 1.9407, + "step": 297070 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.00014552331563716722, + "loss": 1.9923, + "step": 297075 + }, + { + "epoch": 0.7, + "grad_norm": 2.109375, + "learning_rate": 0.00014552166996192547, + "loss": 2.1553, + "step": 297080 + }, + { + "epoch": 0.7, + "grad_norm": 1.8125, + "learning_rate": 0.00014552002427113263, + "loss": 2.0872, + "step": 297085 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014551837856478935, + "loss": 2.0989, + "step": 297090 + }, + { + "epoch": 0.7, + "grad_norm": 2.40625, + "learning_rate": 0.00014551673284289612, + "loss": 2.3049, + "step": 297095 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.00014551508710545358, + "loss": 2.149, + "step": 297100 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014551344135246228, + "loss": 2.1366, + "step": 297105 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014551179558392274, + "loss": 2.0279, + "step": 297110 + }, + { + "epoch": 0.7, + "grad_norm": 2.46875, + "learning_rate": 0.00014551014979983552, + "loss": 1.7897, + "step": 297115 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014550850400020125, + "loss": 1.9284, + "step": 297120 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014550685818502044, + "loss": 2.0764, + "step": 297125 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014550521235429367, + "loss": 2.022, + "step": 297130 + }, + { + "epoch": 0.7, + "grad_norm": 2.015625, + "learning_rate": 0.00014550356650802146, + "loss": 2.0919, + "step": 297135 + }, + { + "epoch": 0.7, + "grad_norm": 2.015625, + "learning_rate": 0.00014550192064620443, + "loss": 2.1956, + "step": 297140 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.0001455002747688431, + "loss": 2.1424, + "step": 297145 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014549862887593807, + "loss": 2.2304, + "step": 297150 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014549698296748988, + "loss": 1.88, + "step": 297155 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014549533704349908, + "loss": 2.1131, + "step": 297160 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.0001454936911039663, + "loss": 2.0737, + "step": 297165 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.000145492045148892, + "loss": 2.0593, + "step": 297170 + }, + { + "epoch": 0.7, + "grad_norm": 1.953125, + "learning_rate": 0.0001454903991782768, + "loss": 2.0855, + "step": 297175 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014548875319212126, + "loss": 1.9413, + "step": 297180 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014548710719042594, + "loss": 2.1183, + "step": 297185 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.0001454854611731914, + "loss": 2.143, + "step": 297190 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014548381514041823, + "loss": 1.9071, + "step": 297195 + }, + { + "epoch": 0.7, + "grad_norm": 1.9140625, + "learning_rate": 0.0001454821690921069, + "loss": 2.0472, + "step": 297200 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.00014548052302825807, + "loss": 2.1333, + "step": 297205 + }, + { + "epoch": 0.7, + "grad_norm": 2.109375, + "learning_rate": 0.0001454788769488723, + "loss": 2.0493, + "step": 297210 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014547723085395005, + "loss": 2.0004, + "step": 297215 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014547558474349202, + "loss": 2.0019, + "step": 297220 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.00014547393861749866, + "loss": 1.9532, + "step": 297225 + }, + { + "epoch": 0.7, + "grad_norm": 2.0, + "learning_rate": 0.00014547229247597059, + "loss": 2.12, + "step": 297230 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.0001454706463189084, + "loss": 2.081, + "step": 297235 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014546900014631255, + "loss": 1.9501, + "step": 297240 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014546735395818368, + "loss": 2.0462, + "step": 297245 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.00014546570775452236, + "loss": 1.8475, + "step": 297250 + }, + { + "epoch": 0.7, + "grad_norm": 1.8828125, + "learning_rate": 0.0001454640615353291, + "loss": 2.0791, + "step": 297255 + }, + { + "epoch": 0.7, + "grad_norm": 2.609375, + "learning_rate": 0.00014546241530060455, + "loss": 2.0866, + "step": 297260 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.00014546076905034915, + "loss": 2.0779, + "step": 297265 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.00014545912278456354, + "loss": 1.897, + "step": 297270 + }, + { + "epoch": 0.7, + "grad_norm": 2.40625, + "learning_rate": 0.0001454574765032483, + "loss": 2.0447, + "step": 297275 + }, + { + "epoch": 0.7, + "grad_norm": 1.9296875, + "learning_rate": 0.00014545583020640393, + "loss": 2.0786, + "step": 297280 + }, + { + "epoch": 0.7, + "grad_norm": 2.71875, + "learning_rate": 0.00014545418389403105, + "loss": 1.8882, + "step": 297285 + }, + { + "epoch": 0.7, + "grad_norm": 2.015625, + "learning_rate": 0.0001454525375661302, + "loss": 1.9337, + "step": 297290 + }, + { + "epoch": 0.7, + "grad_norm": 2.015625, + "learning_rate": 0.0001454508912227019, + "loss": 2.1144, + "step": 297295 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.0001454492448637468, + "loss": 2.1493, + "step": 297300 + }, + { + "epoch": 0.7, + "grad_norm": 2.0, + "learning_rate": 0.00014544759848926538, + "loss": 2.2015, + "step": 297305 + }, + { + "epoch": 0.7, + "grad_norm": 2.578125, + "learning_rate": 0.00014544595209925823, + "loss": 1.8696, + "step": 297310 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014544430569372595, + "loss": 2.1112, + "step": 297315 + }, + { + "epoch": 0.7, + "grad_norm": 1.890625, + "learning_rate": 0.00014544265927266903, + "loss": 2.0737, + "step": 297320 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014544101283608807, + "loss": 2.0064, + "step": 297325 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.0001454393663839837, + "loss": 1.928, + "step": 297330 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.00014543771991635637, + "loss": 2.0366, + "step": 297335 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.0001454360734332067, + "loss": 2.0933, + "step": 297340 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014543442693453523, + "loss": 2.0525, + "step": 297345 + }, + { + "epoch": 0.7, + "grad_norm": 1.9375, + "learning_rate": 0.00014543278042034258, + "loss": 2.0185, + "step": 297350 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014543113389062923, + "loss": 2.0145, + "step": 297355 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014542948734539582, + "loss": 2.0634, + "step": 297360 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.0001454278407846428, + "loss": 2.1114, + "step": 297365 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.00014542619420837083, + "loss": 1.8602, + "step": 297370 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.0001454245476165805, + "loss": 1.708, + "step": 297375 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.0001454229010092723, + "loss": 2.1208, + "step": 297380 + }, + { + "epoch": 0.7, + "grad_norm": 1.953125, + "learning_rate": 0.0001454212543864468, + "loss": 2.0, + "step": 297385 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014541960774810454, + "loss": 2.0592, + "step": 297390 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.0001454179610942462, + "loss": 2.138, + "step": 297395 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.0001454163144248722, + "loss": 2.0995, + "step": 297400 + }, + { + "epoch": 0.7, + "grad_norm": 1.9375, + "learning_rate": 0.00014541466773998317, + "loss": 2.1487, + "step": 297405 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.0001454130210395797, + "loss": 2.1297, + "step": 297410 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014541137432366228, + "loss": 1.9417, + "step": 297415 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014540972759223152, + "loss": 2.1688, + "step": 297420 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014540808084528796, + "loss": 2.2467, + "step": 297425 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.0001454064340828322, + "loss": 2.1935, + "step": 297430 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.00014540478730486477, + "loss": 2.0934, + "step": 297435 + }, + { + "epoch": 0.7, + "grad_norm": 2.015625, + "learning_rate": 0.00014540314051138625, + "loss": 1.9375, + "step": 297440 + }, + { + "epoch": 0.7, + "grad_norm": 2.015625, + "learning_rate": 0.0001454014937023972, + "loss": 2.0092, + "step": 297445 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014539984687789813, + "loss": 1.8989, + "step": 297450 + }, + { + "epoch": 0.7, + "grad_norm": 2.546875, + "learning_rate": 0.00014539820003788971, + "loss": 2.1008, + "step": 297455 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.0001453965531823724, + "loss": 2.0244, + "step": 297460 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014539490631134685, + "loss": 1.8716, + "step": 297465 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.0001453932594248135, + "loss": 1.9663, + "step": 297470 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.00014539161252277304, + "loss": 2.1179, + "step": 297475 + }, + { + "epoch": 0.7, + "grad_norm": 2.015625, + "learning_rate": 0.00014538996560522599, + "loss": 2.0977, + "step": 297480 + }, + { + "epoch": 0.7, + "grad_norm": 1.7890625, + "learning_rate": 0.0001453883186721729, + "loss": 1.9906, + "step": 297485 + }, + { + "epoch": 0.7, + "grad_norm": 1.8828125, + "learning_rate": 0.00014538667172361432, + "loss": 2.1927, + "step": 297490 + }, + { + "epoch": 0.7, + "grad_norm": 1.6640625, + "learning_rate": 0.00014538502475955082, + "loss": 2.0448, + "step": 297495 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014538337777998298, + "loss": 1.9577, + "step": 297500 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014538173078491138, + "loss": 2.0173, + "step": 297505 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.00014538008377433656, + "loss": 1.9548, + "step": 297510 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014537843674825905, + "loss": 2.2366, + "step": 297515 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014537678970667948, + "loss": 1.9846, + "step": 297520 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014537514264959832, + "loss": 2.2309, + "step": 297525 + }, + { + "epoch": 0.7, + "grad_norm": 1.796875, + "learning_rate": 0.00014537349557701624, + "loss": 2.1135, + "step": 297530 + }, + { + "epoch": 0.7, + "grad_norm": 2.640625, + "learning_rate": 0.00014537184848893375, + "loss": 2.0057, + "step": 297535 + }, + { + "epoch": 0.7, + "grad_norm": 3.96875, + "learning_rate": 0.00014537020138535137, + "loss": 2.1721, + "step": 297540 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014536855426626973, + "loss": 1.9848, + "step": 297545 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014536690713168936, + "loss": 2.1488, + "step": 297550 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014536525998161087, + "loss": 2.0696, + "step": 297555 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014536361281603473, + "loss": 2.1289, + "step": 297560 + }, + { + "epoch": 0.7, + "grad_norm": 1.9609375, + "learning_rate": 0.0001453619656349616, + "loss": 2.1702, + "step": 297565 + }, + { + "epoch": 0.7, + "grad_norm": 1.8359375, + "learning_rate": 0.000145360318438392, + "loss": 2.2235, + "step": 297570 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014535867122632645, + "loss": 2.1356, + "step": 297575 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.0001453570239987656, + "loss": 2.139, + "step": 297580 + }, + { + "epoch": 0.7, + "grad_norm": 2.40625, + "learning_rate": 0.00014535537675570994, + "loss": 2.2983, + "step": 297585 + }, + { + "epoch": 0.7, + "grad_norm": 2.109375, + "learning_rate": 0.00014535372949716008, + "loss": 2.2345, + "step": 297590 + }, + { + "epoch": 0.7, + "grad_norm": 2.109375, + "learning_rate": 0.00014535208222311654, + "loss": 2.1794, + "step": 297595 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.0001453504349335799, + "loss": 2.1574, + "step": 297600 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.00014534878762855076, + "loss": 2.0627, + "step": 297605 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014534714030802964, + "loss": 2.0695, + "step": 297610 + }, + { + "epoch": 0.7, + "grad_norm": 2.40625, + "learning_rate": 0.00014534549297201714, + "loss": 2.1055, + "step": 297615 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014534384562051375, + "loss": 1.9986, + "step": 297620 + }, + { + "epoch": 0.7, + "grad_norm": 2.59375, + "learning_rate": 0.00014534219825352015, + "loss": 2.0309, + "step": 297625 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014534055087103676, + "loss": 2.0732, + "step": 297630 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014533890347306422, + "loss": 2.0554, + "step": 297635 + }, + { + "epoch": 0.7, + "grad_norm": 1.984375, + "learning_rate": 0.00014533725605960312, + "loss": 2.0152, + "step": 297640 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014533560863065398, + "loss": 1.9792, + "step": 297645 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014533396118621738, + "loss": 1.9572, + "step": 297650 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014533231372629386, + "loss": 2.1941, + "step": 297655 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.00014533066625088402, + "loss": 2.027, + "step": 297660 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014532901875998836, + "loss": 2.0722, + "step": 297665 + }, + { + "epoch": 0.7, + "grad_norm": 2.5, + "learning_rate": 0.00014532737125360755, + "loss": 2.015, + "step": 297670 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014532572373174207, + "loss": 2.0486, + "step": 297675 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.0001453240761943925, + "loss": 1.9447, + "step": 297680 + }, + { + "epoch": 0.7, + "grad_norm": 2.109375, + "learning_rate": 0.00014532242864155934, + "loss": 2.1624, + "step": 297685 + }, + { + "epoch": 0.7, + "grad_norm": 1.703125, + "learning_rate": 0.0001453207810732433, + "loss": 1.8601, + "step": 297690 + }, + { + "epoch": 0.7, + "grad_norm": 1.6875, + "learning_rate": 0.00014531913348944482, + "loss": 1.9567, + "step": 297695 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.0001453174858901645, + "loss": 2.0875, + "step": 297700 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.00014531583827540292, + "loss": 2.1132, + "step": 297705 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.0001453141906451606, + "loss": 2.0683, + "step": 297710 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014531254299943815, + "loss": 2.0685, + "step": 297715 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014531089533823612, + "loss": 2.0115, + "step": 297720 + }, + { + "epoch": 0.7, + "grad_norm": 1.9609375, + "learning_rate": 0.00014530924766155507, + "loss": 2.0733, + "step": 297725 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014530759996939555, + "loss": 2.1705, + "step": 297730 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014530595226175814, + "loss": 2.082, + "step": 297735 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.00014530430453864337, + "loss": 1.988, + "step": 297740 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014530265680005183, + "loss": 2.0552, + "step": 297745 + }, + { + "epoch": 0.7, + "grad_norm": 2.71875, + "learning_rate": 0.00014530100904598412, + "loss": 2.0289, + "step": 297750 + }, + { + "epoch": 0.7, + "grad_norm": 1.8671875, + "learning_rate": 0.00014529936127644072, + "loss": 1.9839, + "step": 297755 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.0001452977134914223, + "loss": 2.0812, + "step": 297760 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014529606569092929, + "loss": 2.0462, + "step": 297765 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014529441787496233, + "loss": 2.161, + "step": 297770 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.000145292770043522, + "loss": 2.0083, + "step": 297775 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014529112219660882, + "loss": 2.0775, + "step": 297780 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014528947433422343, + "loss": 2.0766, + "step": 297785 + }, + { + "epoch": 0.7, + "grad_norm": 2.671875, + "learning_rate": 0.00014528782645636625, + "loss": 2.0067, + "step": 297790 + }, + { + "epoch": 0.7, + "grad_norm": 1.9609375, + "learning_rate": 0.00014528617856303798, + "loss": 1.8469, + "step": 297795 + }, + { + "epoch": 0.7, + "grad_norm": 2.0, + "learning_rate": 0.0001452845306542391, + "loss": 2.0257, + "step": 297800 + }, + { + "epoch": 0.7, + "grad_norm": 2.421875, + "learning_rate": 0.00014528288272997022, + "loss": 1.9657, + "step": 297805 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014528123479023191, + "loss": 1.9827, + "step": 297810 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.00014527958683502467, + "loss": 1.9673, + "step": 297815 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.0001452779388643491, + "loss": 1.9928, + "step": 297820 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.0001452762908782058, + "loss": 1.9915, + "step": 297825 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.00014527464287659528, + "loss": 2.0399, + "step": 297830 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014527299485951812, + "loss": 2.0777, + "step": 297835 + }, + { + "epoch": 0.7, + "grad_norm": 1.7890625, + "learning_rate": 0.00014527134682697487, + "loss": 2.1694, + "step": 297840 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.0001452696987789661, + "loss": 2.0508, + "step": 297845 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.00014526805071549243, + "loss": 2.1956, + "step": 297850 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014526640263655432, + "loss": 1.9472, + "step": 297855 + }, + { + "epoch": 0.7, + "grad_norm": 1.8359375, + "learning_rate": 0.00014526475454215243, + "loss": 2.0123, + "step": 297860 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014526310643228724, + "loss": 2.0315, + "step": 297865 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.00014526145830695938, + "loss": 2.0375, + "step": 297870 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014525981016616935, + "loss": 1.9418, + "step": 297875 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.0001452581620099178, + "loss": 1.9762, + "step": 297880 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.0001452565138382052, + "loss": 2.23, + "step": 297885 + }, + { + "epoch": 0.7, + "grad_norm": 1.8984375, + "learning_rate": 0.00014525486565103218, + "loss": 2.1272, + "step": 297890 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014525321744839927, + "loss": 2.1562, + "step": 297895 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.000145251569230307, + "loss": 2.1673, + "step": 297900 + }, + { + "epoch": 0.7, + "grad_norm": 1.84375, + "learning_rate": 0.00014524992099675604, + "loss": 2.3556, + "step": 297905 + }, + { + "epoch": 0.7, + "grad_norm": 2.59375, + "learning_rate": 0.00014524827274774684, + "loss": 2.0839, + "step": 297910 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014524662448328003, + "loss": 2.2099, + "step": 297915 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014524497620335615, + "loss": 2.054, + "step": 297920 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014524332790797573, + "loss": 2.031, + "step": 297925 + }, + { + "epoch": 0.7, + "grad_norm": 3.1875, + "learning_rate": 0.00014524167959713942, + "loss": 2.1032, + "step": 297930 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.0001452400312708477, + "loss": 1.9968, + "step": 297935 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014523838292910122, + "loss": 1.9993, + "step": 297940 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014523673457190043, + "loss": 2.3065, + "step": 297945 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.00014523508619924592, + "loss": 2.027, + "step": 297950 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014523343781113837, + "loss": 2.1574, + "step": 297955 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.0001452317894075782, + "loss": 1.9224, + "step": 297960 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014523014098856605, + "loss": 1.964, + "step": 297965 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014522849255410247, + "loss": 2.0851, + "step": 297970 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.000145226844104188, + "loss": 2.0784, + "step": 297975 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014522519563882322, + "loss": 2.1749, + "step": 297980 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014522354715800872, + "loss": 1.9379, + "step": 297985 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014522189866174502, + "loss": 1.9814, + "step": 297990 + }, + { + "epoch": 0.7, + "grad_norm": 1.8203125, + "learning_rate": 0.00014522025015003268, + "loss": 1.974, + "step": 297995 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.0001452186016228723, + "loss": 1.9096, + "step": 298000 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.0001452169530802644, + "loss": 2.1345, + "step": 298005 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.0001452153045222096, + "loss": 2.1092, + "step": 298010 + }, + { + "epoch": 0.7, + "grad_norm": 2.421875, + "learning_rate": 0.00014521365594870846, + "loss": 2.1036, + "step": 298015 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014521200735976146, + "loss": 2.1038, + "step": 298020 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014521035875536927, + "loss": 2.0833, + "step": 298025 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014520871013553235, + "loss": 2.001, + "step": 298030 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014520706150025135, + "loss": 1.9279, + "step": 298035 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014520541284952676, + "loss": 2.0771, + "step": 298040 + }, + { + "epoch": 0.7, + "grad_norm": 2.484375, + "learning_rate": 0.0001452037641833592, + "loss": 1.9995, + "step": 298045 + }, + { + "epoch": 0.7, + "grad_norm": 1.9453125, + "learning_rate": 0.00014520211550174923, + "loss": 2.0737, + "step": 298050 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.0001452004668046974, + "loss": 2.271, + "step": 298055 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014519881809220422, + "loss": 2.1436, + "step": 298060 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014519716936427034, + "loss": 2.0504, + "step": 298065 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.0001451955206208963, + "loss": 2.0545, + "step": 298070 + }, + { + "epoch": 0.7, + "grad_norm": 1.921875, + "learning_rate": 0.00014519387186208262, + "loss": 1.9303, + "step": 298075 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014519222308782994, + "loss": 2.2334, + "step": 298080 + }, + { + "epoch": 0.7, + "grad_norm": 2.5, + "learning_rate": 0.00014519057429813873, + "loss": 2.1213, + "step": 298085 + }, + { + "epoch": 0.7, + "grad_norm": 1.96875, + "learning_rate": 0.00014518892549300963, + "loss": 2.1447, + "step": 298090 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014518727667244314, + "loss": 2.0387, + "step": 298095 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014518562783643991, + "loss": 2.069, + "step": 298100 + }, + { + "epoch": 0.7, + "grad_norm": 1.875, + "learning_rate": 0.00014518397898500042, + "loss": 1.9998, + "step": 298105 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014518233011812527, + "loss": 2.0952, + "step": 298110 + }, + { + "epoch": 0.7, + "grad_norm": 2.75, + "learning_rate": 0.00014518068123581498, + "loss": 1.9472, + "step": 298115 + }, + { + "epoch": 0.7, + "grad_norm": 1.8671875, + "learning_rate": 0.0001451790323380702, + "loss": 1.954, + "step": 298120 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014517738342489142, + "loss": 2.0335, + "step": 298125 + }, + { + "epoch": 0.7, + "grad_norm": 1.9296875, + "learning_rate": 0.00014517573449627925, + "loss": 1.9098, + "step": 298130 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.0001451740855522342, + "loss": 1.9916, + "step": 298135 + }, + { + "epoch": 0.7, + "grad_norm": 2.484375, + "learning_rate": 0.00014517243659275685, + "loss": 2.1127, + "step": 298140 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.0001451707876178478, + "loss": 1.9996, + "step": 298145 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.0001451691386275076, + "loss": 1.9985, + "step": 298150 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.0001451674896217368, + "loss": 1.9309, + "step": 298155 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014516584060053593, + "loss": 1.9276, + "step": 298160 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014516419156390562, + "loss": 2.1501, + "step": 298165 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.0001451625425118464, + "loss": 1.9115, + "step": 298170 + }, + { + "epoch": 0.7, + "grad_norm": 2.0, + "learning_rate": 0.00014516089344435883, + "loss": 2.116, + "step": 298175 + }, + { + "epoch": 0.7, + "grad_norm": 2.0, + "learning_rate": 0.0001451592443614435, + "loss": 2.0392, + "step": 298180 + }, + { + "epoch": 0.7, + "grad_norm": 2.46875, + "learning_rate": 0.00014515759526310094, + "loss": 2.2641, + "step": 298185 + }, + { + "epoch": 0.7, + "grad_norm": 1.9609375, + "learning_rate": 0.00014515594614933174, + "loss": 2.0268, + "step": 298190 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014515429702013642, + "loss": 2.067, + "step": 298195 + }, + { + "epoch": 0.7, + "grad_norm": 2.609375, + "learning_rate": 0.0001451526478755156, + "loss": 2.0023, + "step": 298200 + }, + { + "epoch": 0.7, + "grad_norm": 2.59375, + "learning_rate": 0.00014515099871546979, + "loss": 1.9599, + "step": 298205 + }, + { + "epoch": 0.7, + "grad_norm": 1.8984375, + "learning_rate": 0.00014514934953999958, + "loss": 2.0792, + "step": 298210 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.0001451477003491056, + "loss": 2.0655, + "step": 298215 + }, + { + "epoch": 0.7, + "grad_norm": 2.5625, + "learning_rate": 0.00014514605114278824, + "loss": 2.1133, + "step": 298220 + }, + { + "epoch": 0.7, + "grad_norm": 2.609375, + "learning_rate": 0.00014514440192104825, + "loss": 2.0932, + "step": 298225 + }, + { + "epoch": 0.7, + "grad_norm": 2.609375, + "learning_rate": 0.0001451427526838861, + "loss": 1.8561, + "step": 298230 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014514110343130237, + "loss": 2.0509, + "step": 298235 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.0001451394541632976, + "loss": 1.9725, + "step": 298240 + }, + { + "epoch": 0.7, + "grad_norm": 2.53125, + "learning_rate": 0.00014513780487987235, + "loss": 2.0164, + "step": 298245 + }, + { + "epoch": 0.7, + "grad_norm": 1.90625, + "learning_rate": 0.00014513615558102726, + "loss": 2.1809, + "step": 298250 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.0001451345062667628, + "loss": 1.9367, + "step": 298255 + }, + { + "epoch": 0.7, + "grad_norm": 2.6875, + "learning_rate": 0.0001451328569370796, + "loss": 2.1591, + "step": 298260 + }, + { + "epoch": 0.7, + "grad_norm": 2.65625, + "learning_rate": 0.0001451312075919782, + "loss": 2.1099, + "step": 298265 + }, + { + "epoch": 0.7, + "grad_norm": 1.859375, + "learning_rate": 0.00014512955823145916, + "loss": 2.0872, + "step": 298270 + }, + { + "epoch": 0.7, + "grad_norm": 1.9140625, + "learning_rate": 0.000145127908855523, + "loss": 2.0354, + "step": 298275 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014512625946417037, + "loss": 2.0524, + "step": 298280 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.0001451246100574018, + "loss": 2.0776, + "step": 298285 + }, + { + "epoch": 0.7, + "grad_norm": 1.828125, + "learning_rate": 0.00014512296063521785, + "loss": 1.8152, + "step": 298290 + }, + { + "epoch": 0.7, + "grad_norm": 1.90625, + "learning_rate": 0.00014512131119761903, + "loss": 2.0976, + "step": 298295 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014511966174460595, + "loss": 2.1479, + "step": 298300 + }, + { + "epoch": 0.7, + "grad_norm": 2.421875, + "learning_rate": 0.00014511801227617922, + "loss": 2.3119, + "step": 298305 + }, + { + "epoch": 0.7, + "grad_norm": 2.109375, + "learning_rate": 0.00014511636279233937, + "loss": 2.0432, + "step": 298310 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014511471329308693, + "loss": 2.1391, + "step": 298315 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.00014511306377842248, + "loss": 2.2375, + "step": 298320 + }, + { + "epoch": 0.7, + "grad_norm": 2.484375, + "learning_rate": 0.00014511141424834658, + "loss": 1.9407, + "step": 298325 + }, + { + "epoch": 0.7, + "grad_norm": 1.9140625, + "learning_rate": 0.00014510976470285983, + "loss": 2.1626, + "step": 298330 + }, + { + "epoch": 0.7, + "grad_norm": 2.421875, + "learning_rate": 0.00014510811514196274, + "loss": 2.0419, + "step": 298335 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.0001451064655656559, + "loss": 2.0384, + "step": 298340 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014510481597393988, + "loss": 2.1636, + "step": 298345 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.00014510316636681526, + "loss": 2.0464, + "step": 298350 + }, + { + "epoch": 0.7, + "grad_norm": 2.109375, + "learning_rate": 0.00014510151674428253, + "loss": 2.1396, + "step": 298355 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014509986710634234, + "loss": 2.0816, + "step": 298360 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.0001450982174529952, + "loss": 1.9723, + "step": 298365 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.0001450965677842417, + "loss": 1.9865, + "step": 298370 + }, + { + "epoch": 0.7, + "grad_norm": 1.9609375, + "learning_rate": 0.0001450949181000824, + "loss": 2.3117, + "step": 298375 + }, + { + "epoch": 0.7, + "grad_norm": 1.9453125, + "learning_rate": 0.00014509326840051788, + "loss": 2.2287, + "step": 298380 + }, + { + "epoch": 0.7, + "grad_norm": 2.5625, + "learning_rate": 0.00014509161868554867, + "loss": 2.0456, + "step": 298385 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.0001450899689551753, + "loss": 2.0664, + "step": 298390 + }, + { + "epoch": 0.7, + "grad_norm": 2.46875, + "learning_rate": 0.00014508831920939845, + "loss": 2.1221, + "step": 298395 + }, + { + "epoch": 0.7, + "grad_norm": 1.9140625, + "learning_rate": 0.00014508666944821857, + "loss": 2.046, + "step": 298400 + }, + { + "epoch": 0.7, + "grad_norm": 2.421875, + "learning_rate": 0.00014508501967163624, + "loss": 2.1345, + "step": 298405 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.0001450833698796521, + "loss": 2.0835, + "step": 298410 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014508172007226663, + "loss": 2.3323, + "step": 298415 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014508007024948045, + "loss": 2.0294, + "step": 298420 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014507842041129408, + "loss": 2.0496, + "step": 298425 + }, + { + "epoch": 0.7, + "grad_norm": 2.75, + "learning_rate": 0.0001450767705577081, + "loss": 2.2684, + "step": 298430 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.00014507512068872308, + "loss": 1.886, + "step": 298435 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014507347080433962, + "loss": 2.1476, + "step": 298440 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014507182090455823, + "loss": 2.0429, + "step": 298445 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014507017098937947, + "loss": 2.0224, + "step": 298450 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014506852105880392, + "loss": 2.1045, + "step": 298455 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.00014506687111283216, + "loss": 2.0155, + "step": 298460 + }, + { + "epoch": 0.7, + "grad_norm": 2.59375, + "learning_rate": 0.00014506522115146473, + "loss": 2.0606, + "step": 298465 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.0001450635711747022, + "loss": 2.1065, + "step": 298470 + }, + { + "epoch": 0.7, + "grad_norm": 1.90625, + "learning_rate": 0.00014506192118254516, + "loss": 2.0901, + "step": 298475 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.00014506027117499413, + "loss": 2.1304, + "step": 298480 + }, + { + "epoch": 0.7, + "grad_norm": 1.9609375, + "learning_rate": 0.00014505862115204968, + "loss": 1.956, + "step": 298485 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.0001450569711137124, + "loss": 2.2155, + "step": 298490 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014505532105998285, + "loss": 2.2412, + "step": 298495 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.0001450536709908616, + "loss": 2.0703, + "step": 298500 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.00014505202090634917, + "loss": 2.1965, + "step": 298505 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014505037080644618, + "loss": 2.1495, + "step": 298510 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.00014504872069115313, + "loss": 2.0461, + "step": 298515 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014504707056047064, + "loss": 2.0151, + "step": 298520 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.00014504542041439926, + "loss": 2.0071, + "step": 298525 + }, + { + "epoch": 0.7, + "grad_norm": 2.0, + "learning_rate": 0.00014504377025293956, + "loss": 2.2541, + "step": 298530 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.0001450421200760921, + "loss": 2.1205, + "step": 298535 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014504046988385736, + "loss": 1.9033, + "step": 298540 + }, + { + "epoch": 0.7, + "grad_norm": 1.890625, + "learning_rate": 0.00014503881967623601, + "loss": 1.9185, + "step": 298545 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014503716945322862, + "loss": 2.0857, + "step": 298550 + }, + { + "epoch": 0.7, + "grad_norm": 2.109375, + "learning_rate": 0.00014503551921483571, + "loss": 2.0636, + "step": 298555 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014503386896105785, + "loss": 1.9598, + "step": 298560 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014503221869189558, + "loss": 1.9984, + "step": 298565 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014503056840734947, + "loss": 2.1624, + "step": 298570 + }, + { + "epoch": 0.7, + "grad_norm": 1.9140625, + "learning_rate": 0.00014502891810742015, + "loss": 1.962, + "step": 298575 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014502726779210812, + "loss": 2.0271, + "step": 298580 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014502561746141394, + "loss": 2.1528, + "step": 298585 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.0001450239671153382, + "loss": 2.1037, + "step": 298590 + }, + { + "epoch": 0.7, + "grad_norm": 1.4765625, + "learning_rate": 0.00014502231675388145, + "loss": 2.0148, + "step": 298595 + }, + { + "epoch": 0.7, + "grad_norm": 2.0, + "learning_rate": 0.00014502066637704427, + "loss": 1.9646, + "step": 298600 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.0001450190159848272, + "loss": 1.8253, + "step": 298605 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.00014501736557723084, + "loss": 1.989, + "step": 298610 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014501571515425574, + "loss": 2.0589, + "step": 298615 + }, + { + "epoch": 0.7, + "grad_norm": 2.421875, + "learning_rate": 0.00014501406471590245, + "loss": 2.1608, + "step": 298620 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.00014501241426217152, + "loss": 2.1393, + "step": 298625 + }, + { + "epoch": 0.7, + "grad_norm": 2.546875, + "learning_rate": 0.00014501076379306354, + "loss": 2.1573, + "step": 298630 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014500911330857905, + "loss": 2.0094, + "step": 298635 + }, + { + "epoch": 0.7, + "grad_norm": 2.8125, + "learning_rate": 0.00014500746280871865, + "loss": 1.9909, + "step": 298640 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014500581229348288, + "loss": 2.1627, + "step": 298645 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.0001450041617628723, + "loss": 1.9594, + "step": 298650 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.0001450025112168875, + "loss": 2.2511, + "step": 298655 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014500086065552902, + "loss": 2.1168, + "step": 298660 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014499921007879741, + "loss": 2.0833, + "step": 298665 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.0001449975594866933, + "loss": 2.0062, + "step": 298670 + }, + { + "epoch": 0.7, + "grad_norm": 2.640625, + "learning_rate": 0.00014499590887921717, + "loss": 2.0754, + "step": 298675 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.00014499425825636962, + "loss": 2.1076, + "step": 298680 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014499260761815123, + "loss": 2.0818, + "step": 298685 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014499095696456258, + "loss": 1.9677, + "step": 298690 + }, + { + "epoch": 0.7, + "grad_norm": 2.71875, + "learning_rate": 0.00014498930629560416, + "loss": 2.0665, + "step": 298695 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.00014498765561127657, + "loss": 2.1529, + "step": 298700 + }, + { + "epoch": 0.7, + "grad_norm": 2.015625, + "learning_rate": 0.0001449860049115804, + "loss": 2.0391, + "step": 298705 + }, + { + "epoch": 0.7, + "grad_norm": 1.953125, + "learning_rate": 0.00014498435419651616, + "loss": 2.0388, + "step": 298710 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014498270346608453, + "loss": 2.0304, + "step": 298715 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014498105272028593, + "loss": 2.1089, + "step": 298720 + }, + { + "epoch": 0.7, + "grad_norm": 2.40625, + "learning_rate": 0.000144979401959121, + "loss": 2.1453, + "step": 298725 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014497775118259027, + "loss": 2.1986, + "step": 298730 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014497610039069435, + "loss": 2.1697, + "step": 298735 + }, + { + "epoch": 0.7, + "grad_norm": 1.8046875, + "learning_rate": 0.0001449744495834338, + "loss": 2.021, + "step": 298740 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014497279876080912, + "loss": 1.9615, + "step": 298745 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 0.00014497114792282094, + "loss": 2.1615, + "step": 298750 + }, + { + "epoch": 0.7, + "grad_norm": 2.046875, + "learning_rate": 0.0001449694970694698, + "loss": 2.0744, + "step": 298755 + }, + { + "epoch": 0.7, + "grad_norm": 2.53125, + "learning_rate": 0.00014496784620075624, + "loss": 2.0175, + "step": 298760 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014496619531668086, + "loss": 1.846, + "step": 298765 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014496454441724423, + "loss": 2.0368, + "step": 298770 + }, + { + "epoch": 0.7, + "grad_norm": 2.546875, + "learning_rate": 0.00014496289350244686, + "loss": 2.2012, + "step": 298775 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.0001449612425722894, + "loss": 2.0599, + "step": 298780 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.00014495959162677232, + "loss": 2.0472, + "step": 298785 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.00014495794066589625, + "loss": 2.0029, + "step": 298790 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014495628968966173, + "loss": 1.8681, + "step": 298795 + }, + { + "epoch": 0.7, + "grad_norm": 2.734375, + "learning_rate": 0.00014495463869806932, + "loss": 1.8748, + "step": 298800 + }, + { + "epoch": 0.7, + "grad_norm": 1.84375, + "learning_rate": 0.0001449529876911196, + "loss": 2.0012, + "step": 298805 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.0001449513366688131, + "loss": 2.1023, + "step": 298810 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.00014494968563115044, + "loss": 1.996, + "step": 298815 + }, + { + "epoch": 0.7, + "grad_norm": 2.015625, + "learning_rate": 0.00014494803457813213, + "loss": 2.1008, + "step": 298820 + }, + { + "epoch": 0.7, + "grad_norm": 1.984375, + "learning_rate": 0.00014494638350975877, + "loss": 2.0643, + "step": 298825 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014494473242603092, + "loss": 1.9117, + "step": 298830 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.0001449430813269491, + "loss": 1.9948, + "step": 298835 + }, + { + "epoch": 0.7, + "grad_norm": 2.734375, + "learning_rate": 0.0001449414302125139, + "loss": 2.1229, + "step": 298840 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014493977908272595, + "loss": 2.0799, + "step": 298845 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014493812793758574, + "loss": 2.2356, + "step": 298850 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.00014493647677709382, + "loss": 2.0868, + "step": 298855 + }, + { + "epoch": 0.7, + "grad_norm": 1.9609375, + "learning_rate": 0.00014493482560125082, + "loss": 2.1923, + "step": 298860 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014493317441005723, + "loss": 2.2843, + "step": 298865 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.0001449315232035137, + "loss": 1.9697, + "step": 298870 + }, + { + "epoch": 0.7, + "grad_norm": 1.8203125, + "learning_rate": 0.0001449298719816207, + "loss": 1.9665, + "step": 298875 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014492822074437886, + "loss": 2.1288, + "step": 298880 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.00014492656949178874, + "loss": 2.0615, + "step": 298885 + }, + { + "epoch": 0.7, + "grad_norm": 1.984375, + "learning_rate": 0.00014492491822385085, + "loss": 2.1146, + "step": 298890 + }, + { + "epoch": 0.7, + "grad_norm": 2.484375, + "learning_rate": 0.00014492326694056582, + "loss": 2.1673, + "step": 298895 + }, + { + "epoch": 0.7, + "grad_norm": 2.421875, + "learning_rate": 0.0001449216156419342, + "loss": 2.0483, + "step": 298900 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014491996432795652, + "loss": 1.9971, + "step": 298905 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.00014491831299863337, + "loss": 2.0837, + "step": 298910 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014491666165396532, + "loss": 1.9118, + "step": 298915 + }, + { + "epoch": 0.7, + "grad_norm": 1.921875, + "learning_rate": 0.00014491501029395288, + "loss": 1.9908, + "step": 298920 + }, + { + "epoch": 0.7, + "grad_norm": 2.703125, + "learning_rate": 0.0001449133589185967, + "loss": 2.1858, + "step": 298925 + }, + { + "epoch": 0.7, + "grad_norm": 2.765625, + "learning_rate": 0.0001449117075278973, + "loss": 1.9746, + "step": 298930 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014491005612185523, + "loss": 1.9842, + "step": 298935 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.00014490840470047112, + "loss": 2.0731, + "step": 298940 + }, + { + "epoch": 0.7, + "grad_norm": 1.7734375, + "learning_rate": 0.00014490675326374542, + "loss": 2.0027, + "step": 298945 + }, + { + "epoch": 0.7, + "grad_norm": 2.359375, + "learning_rate": 0.00014490510181167877, + "loss": 2.0706, + "step": 298950 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014490345034427174, + "loss": 2.0796, + "step": 298955 + }, + { + "epoch": 0.7, + "grad_norm": 2.0, + "learning_rate": 0.00014490179886152488, + "loss": 1.926, + "step": 298960 + }, + { + "epoch": 0.7, + "grad_norm": 1.8046875, + "learning_rate": 0.00014490014736343878, + "loss": 1.9193, + "step": 298965 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014489849585001393, + "loss": 1.9301, + "step": 298970 + }, + { + "epoch": 0.7, + "grad_norm": 1.9609375, + "learning_rate": 0.00014489684432125093, + "loss": 2.1613, + "step": 298975 + }, + { + "epoch": 0.7, + "grad_norm": 1.9296875, + "learning_rate": 0.0001448951927771504, + "loss": 1.9359, + "step": 298980 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.00014489354121771281, + "loss": 2.1231, + "step": 298985 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.00014489188964293882, + "loss": 1.9272, + "step": 298990 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.0001448902380528289, + "loss": 1.9043, + "step": 298995 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.00014488858644738368, + "loss": 2.1137, + "step": 299000 + }, + { + "epoch": 0.7, + "grad_norm": 1.8359375, + "learning_rate": 0.00014488693482660368, + "loss": 1.9206, + "step": 299005 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014488528319048954, + "loss": 2.1432, + "step": 299010 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014488363153904177, + "loss": 2.0609, + "step": 299015 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.0001448819798722609, + "loss": 2.0871, + "step": 299020 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.00014488032819014754, + "loss": 2.0023, + "step": 299025 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014487867649270225, + "loss": 2.0028, + "step": 299030 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.0001448770247799256, + "loss": 2.0857, + "step": 299035 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014487537305181815, + "loss": 2.0831, + "step": 299040 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.00014487372130838044, + "loss": 1.9797, + "step": 299045 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014487206954961305, + "loss": 2.0281, + "step": 299050 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014487041777551656, + "loss": 2.0931, + "step": 299055 + }, + { + "epoch": 0.7, + "grad_norm": 2.765625, + "learning_rate": 0.00014486876598609151, + "loss": 2.0299, + "step": 299060 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.0001448671141813385, + "loss": 2.078, + "step": 299065 + }, + { + "epoch": 0.7, + "grad_norm": 1.8671875, + "learning_rate": 0.00014486546236125805, + "loss": 2.1877, + "step": 299070 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014486381052585074, + "loss": 1.9425, + "step": 299075 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.00014486215867511715, + "loss": 2.0423, + "step": 299080 + }, + { + "epoch": 0.7, + "grad_norm": 2.640625, + "learning_rate": 0.00014486050680905782, + "loss": 2.079, + "step": 299085 + }, + { + "epoch": 0.7, + "grad_norm": 2.484375, + "learning_rate": 0.00014485885492767335, + "loss": 2.1085, + "step": 299090 + }, + { + "epoch": 0.7, + "grad_norm": 2.5, + "learning_rate": 0.00014485720303096427, + "loss": 1.9846, + "step": 299095 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014485555111893117, + "loss": 2.0253, + "step": 299100 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014485389919157457, + "loss": 1.9491, + "step": 299105 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014485224724889508, + "loss": 2.1036, + "step": 299110 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014485059529089324, + "loss": 2.0262, + "step": 299115 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.00014484894331756964, + "loss": 2.1226, + "step": 299120 + }, + { + "epoch": 0.7, + "grad_norm": 2.46875, + "learning_rate": 0.00014484729132892483, + "loss": 2.2178, + "step": 299125 + }, + { + "epoch": 0.7, + "grad_norm": 2.640625, + "learning_rate": 0.00014484563932495935, + "loss": 2.0665, + "step": 299130 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.0001448439873056738, + "loss": 2.0613, + "step": 299135 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014484233527106873, + "loss": 1.8885, + "step": 299140 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014484068322114473, + "loss": 2.1955, + "step": 299145 + }, + { + "epoch": 0.7, + "grad_norm": 2.546875, + "learning_rate": 0.00014483903115590228, + "loss": 2.0487, + "step": 299150 + }, + { + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 0.00014483737907534203, + "loss": 2.0187, + "step": 299155 + }, + { + "epoch": 0.7, + "grad_norm": 2.421875, + "learning_rate": 0.00014483572697946453, + "loss": 1.9842, + "step": 299160 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014483407486827035, + "loss": 1.9986, + "step": 299165 + }, + { + "epoch": 0.7, + "grad_norm": 2.0, + "learning_rate": 0.00014483242274176, + "loss": 2.0834, + "step": 299170 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.0001448307705999341, + "loss": 2.1992, + "step": 299175 + }, + { + "epoch": 0.7, + "grad_norm": 2.3125, + "learning_rate": 0.0001448291184427932, + "loss": 2.0221, + "step": 299180 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.0001448274662703378, + "loss": 2.1396, + "step": 299185 + }, + { + "epoch": 0.7, + "grad_norm": 1.984375, + "learning_rate": 0.00014482581408256862, + "loss": 2.0231, + "step": 299190 + }, + { + "epoch": 0.7, + "grad_norm": 1.890625, + "learning_rate": 0.00014482416187948607, + "loss": 2.1891, + "step": 299195 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.0001448225096610908, + "loss": 2.0914, + "step": 299200 + }, + { + "epoch": 0.7, + "grad_norm": 1.9453125, + "learning_rate": 0.00014482085742738334, + "loss": 2.2452, + "step": 299205 + }, + { + "epoch": 0.7, + "grad_norm": 1.8515625, + "learning_rate": 0.00014481920517836426, + "loss": 1.9875, + "step": 299210 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.0001448175529140341, + "loss": 2.1956, + "step": 299215 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014481590063439347, + "loss": 1.9685, + "step": 299220 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014481424833944292, + "loss": 1.9674, + "step": 299225 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014481259602918304, + "loss": 2.036, + "step": 299230 + }, + { + "epoch": 0.7, + "grad_norm": 2.5625, + "learning_rate": 0.00014481094370361435, + "loss": 2.0804, + "step": 299235 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.0001448092913627374, + "loss": 1.9574, + "step": 299240 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014480763900655277, + "loss": 2.0445, + "step": 299245 + }, + { + "epoch": 0.7, + "grad_norm": 2.078125, + "learning_rate": 0.00014480598663506108, + "loss": 1.9908, + "step": 299250 + }, + { + "epoch": 0.7, + "grad_norm": 1.8125, + "learning_rate": 0.00014480433424826283, + "loss": 2.0168, + "step": 299255 + }, + { + "epoch": 0.7, + "grad_norm": 2.5625, + "learning_rate": 0.00014480268184615864, + "loss": 2.1347, + "step": 299260 + }, + { + "epoch": 0.7, + "grad_norm": 1.796875, + "learning_rate": 0.000144801029428749, + "loss": 2.0476, + "step": 299265 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.00014479937699603455, + "loss": 1.8589, + "step": 299270 + }, + { + "epoch": 0.7, + "grad_norm": 1.8515625, + "learning_rate": 0.0001447977245480158, + "loss": 2.0451, + "step": 299275 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.00014479607208469334, + "loss": 2.0505, + "step": 299280 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014479441960606773, + "loss": 2.145, + "step": 299285 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014479276711213952, + "loss": 1.9151, + "step": 299290 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.0001447911146029093, + "loss": 2.1794, + "step": 299295 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.00014478946207837764, + "loss": 2.2047, + "step": 299300 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014478780953854504, + "loss": 2.3086, + "step": 299305 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.00014478615698341216, + "loss": 1.9557, + "step": 299310 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.0001447845044129795, + "loss": 1.9644, + "step": 299315 + }, + { + "epoch": 0.7, + "grad_norm": 2.234375, + "learning_rate": 0.0001447828518272476, + "loss": 2.0081, + "step": 299320 + }, + { + "epoch": 0.7, + "grad_norm": 2.734375, + "learning_rate": 0.00014478119922621714, + "loss": 2.1125, + "step": 299325 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.00014477954660988858, + "loss": 2.0288, + "step": 299330 + }, + { + "epoch": 0.7, + "grad_norm": 2.328125, + "learning_rate": 0.00014477789397826252, + "loss": 1.9972, + "step": 299335 + }, + { + "epoch": 0.7, + "grad_norm": 2.84375, + "learning_rate": 0.00014477624133133949, + "loss": 1.9067, + "step": 299340 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014477458866912012, + "loss": 2.1423, + "step": 299345 + }, + { + "epoch": 0.7, + "grad_norm": 2.40625, + "learning_rate": 0.0001447729359916049, + "loss": 2.1295, + "step": 299350 + }, + { + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 0.00014477128329879447, + "loss": 2.104, + "step": 299355 + }, + { + "epoch": 0.7, + "grad_norm": 2.578125, + "learning_rate": 0.00014476963059068933, + "loss": 1.9357, + "step": 299360 + }, + { + "epoch": 0.7, + "grad_norm": 2.109375, + "learning_rate": 0.0001447679778672901, + "loss": 2.0187, + "step": 299365 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.00014476632512859728, + "loss": 1.896, + "step": 299370 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.0001447646723746115, + "loss": 1.9656, + "step": 299375 + }, + { + "epoch": 0.7, + "grad_norm": 2.21875, + "learning_rate": 0.0001447630196053333, + "loss": 2.1551, + "step": 299380 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014476136682076322, + "loss": 2.0437, + "step": 299385 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 0.00014475971402090184, + "loss": 2.3061, + "step": 299390 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014475806120574976, + "loss": 2.0719, + "step": 299395 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.0001447564083753075, + "loss": 2.1862, + "step": 299400 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014475475552957562, + "loss": 1.9867, + "step": 299405 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.00014475310266855472, + "loss": 2.1259, + "step": 299410 + }, + { + "epoch": 0.7, + "grad_norm": 2.453125, + "learning_rate": 0.00014475144979224537, + "loss": 2.0781, + "step": 299415 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014474979690064812, + "loss": 2.0649, + "step": 299420 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014474814399376346, + "loss": 1.9425, + "step": 299425 + }, + { + "epoch": 0.7, + "grad_norm": 2.25, + "learning_rate": 0.0001447464910715921, + "loss": 1.9254, + "step": 299430 + }, + { + "epoch": 0.7, + "grad_norm": 2.125, + "learning_rate": 0.00014474483813413445, + "loss": 2.1248, + "step": 299435 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.00014474318518139124, + "loss": 2.0268, + "step": 299440 + }, + { + "epoch": 0.7, + "grad_norm": 2.0625, + "learning_rate": 0.0001447415322133629, + "loss": 2.0786, + "step": 299445 + }, + { + "epoch": 0.7, + "grad_norm": 2.03125, + "learning_rate": 0.00014473987923005002, + "loss": 2.0847, + "step": 299450 + }, + { + "epoch": 0.7, + "grad_norm": 2.28125, + "learning_rate": 0.00014473822623145322, + "loss": 2.1307, + "step": 299455 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.000144736573217573, + "loss": 2.3213, + "step": 299460 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.00014473492018841, + "loss": 2.2443, + "step": 299465 + }, + { + "epoch": 0.7, + "grad_norm": 2.265625, + "learning_rate": 0.0001447332671439647, + "loss": 2.0808, + "step": 299470 + }, + { + "epoch": 0.7, + "grad_norm": 2.515625, + "learning_rate": 0.00014473161408423772, + "loss": 1.9113, + "step": 299475 + }, + { + "epoch": 0.7, + "grad_norm": 1.9765625, + "learning_rate": 0.00014472996100922957, + "loss": 2.1397, + "step": 299480 + }, + { + "epoch": 0.7, + "grad_norm": 2.5, + "learning_rate": 0.00014472830791894092, + "loss": 1.88, + "step": 299485 + }, + { + "epoch": 0.7, + "grad_norm": 2.4375, + "learning_rate": 0.00014472665481337225, + "loss": 2.0879, + "step": 299490 + }, + { + "epoch": 0.7, + "grad_norm": 1.9921875, + "learning_rate": 0.0001447250016925241, + "loss": 2.0611, + "step": 299495 + }, + { + "epoch": 0.7, + "grad_norm": 1.8984375, + "learning_rate": 0.00014472334855639713, + "loss": 1.8597, + "step": 299500 + }, + { + "epoch": 0.7, + "grad_norm": 2.375, + "learning_rate": 0.00014472169540499184, + "loss": 1.9637, + "step": 299505 + }, + { + "epoch": 0.7, + "grad_norm": 2.0, + "learning_rate": 0.0001447200422383088, + "loss": 1.8324, + "step": 299510 + }, + { + "epoch": 0.7, + "grad_norm": 2.65625, + "learning_rate": 0.00014471838905634859, + "loss": 1.9766, + "step": 299515 + }, + { + "epoch": 0.7, + "grad_norm": 2.734375, + "learning_rate": 0.00014471673585911176, + "loss": 2.1551, + "step": 299520 + }, + { + "epoch": 0.7, + "grad_norm": 2.171875, + "learning_rate": 0.0001447150826465989, + "loss": 2.203, + "step": 299525 + }, + { + "epoch": 0.7, + "grad_norm": 1.8515625, + "learning_rate": 0.00014471342941881052, + "loss": 2.0691, + "step": 299530 + }, + { + "epoch": 0.7, + "grad_norm": 1.9375, + "learning_rate": 0.00014471177617574724, + "loss": 2.0737, + "step": 299535 + }, + { + "epoch": 0.7, + "grad_norm": 2.1875, + "learning_rate": 0.0001447101229174096, + "loss": 2.1328, + "step": 299540 + }, + { + "epoch": 0.7, + "grad_norm": 2.15625, + "learning_rate": 0.00014470846964379817, + "loss": 2.1536, + "step": 299545 + }, + { + "epoch": 0.7, + "grad_norm": 4.34375, + "learning_rate": 0.00014470681635491355, + "loss": 1.9822, + "step": 299550 + }, + { + "epoch": 0.7, + "grad_norm": 1.765625, + "learning_rate": 0.00014470516305075626, + "loss": 2.0299, + "step": 299555 + }, + { + "epoch": 0.7, + "grad_norm": 2.203125, + "learning_rate": 0.00014470350973132685, + "loss": 2.1045, + "step": 299560 + }, + { + "epoch": 0.7, + "grad_norm": 2.828125, + "learning_rate": 0.0001447018563966259, + "loss": 1.9548, + "step": 299565 + }, + { + "epoch": 0.7, + "grad_norm": 2.90625, + "learning_rate": 0.00014470020304665403, + "loss": 2.1457, + "step": 299570 + }, + { + "epoch": 0.7, + "grad_norm": 2.34375, + "learning_rate": 0.00014469854968141173, + "loss": 2.1927, + "step": 299575 + }, + { + "epoch": 0.71, + "grad_norm": 1.7421875, + "learning_rate": 0.00014469689630089961, + "loss": 2.0548, + "step": 299580 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.0001446952429051182, + "loss": 1.9149, + "step": 299585 + }, + { + "epoch": 0.71, + "grad_norm": 2.5, + "learning_rate": 0.0001446935894940681, + "loss": 2.0819, + "step": 299590 + }, + { + "epoch": 0.71, + "grad_norm": 2.65625, + "learning_rate": 0.00014469193606774986, + "loss": 1.9106, + "step": 299595 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014469028262616406, + "loss": 2.1119, + "step": 299600 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.0001446886291693112, + "loss": 1.976, + "step": 299605 + }, + { + "epoch": 0.71, + "grad_norm": 2.46875, + "learning_rate": 0.00014468697569719195, + "loss": 2.03, + "step": 299610 + }, + { + "epoch": 0.71, + "grad_norm": 2.515625, + "learning_rate": 0.00014468532220980674, + "loss": 1.9508, + "step": 299615 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.0001446836687071563, + "loss": 1.9776, + "step": 299620 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014468201518924107, + "loss": 2.1618, + "step": 299625 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014468036165606166, + "loss": 2.0153, + "step": 299630 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014467870810761863, + "loss": 2.1351, + "step": 299635 + }, + { + "epoch": 0.71, + "grad_norm": 1.6796875, + "learning_rate": 0.00014467705454391253, + "loss": 2.0981, + "step": 299640 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.00014467540096494395, + "loss": 2.0544, + "step": 299645 + }, + { + "epoch": 0.71, + "grad_norm": 1.90625, + "learning_rate": 0.00014467374737071344, + "loss": 1.9488, + "step": 299650 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014467209376122157, + "loss": 2.0518, + "step": 299655 + }, + { + "epoch": 0.71, + "grad_norm": 1.90625, + "learning_rate": 0.0001446704401364689, + "loss": 2.0799, + "step": 299660 + }, + { + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 0.000144668786496456, + "loss": 2.2202, + "step": 299665 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.0001446671328411834, + "loss": 1.9648, + "step": 299670 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014466547917065174, + "loss": 1.9482, + "step": 299675 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.00014466382548486152, + "loss": 2.0776, + "step": 299680 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014466217178381335, + "loss": 2.0627, + "step": 299685 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014466051806750777, + "loss": 2.073, + "step": 299690 + }, + { + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 0.0001446588643359453, + "loss": 2.1171, + "step": 299695 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.0001446572105891266, + "loss": 1.9878, + "step": 299700 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014465555682705218, + "loss": 2.1329, + "step": 299705 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.0001446539030497226, + "loss": 2.0724, + "step": 299710 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014465224925713844, + "loss": 2.1735, + "step": 299715 + }, + { + "epoch": 0.71, + "grad_norm": 2.0, + "learning_rate": 0.00014465059544930027, + "loss": 2.1154, + "step": 299720 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014464894162620863, + "loss": 2.0246, + "step": 299725 + }, + { + "epoch": 0.71, + "grad_norm": 2.5, + "learning_rate": 0.00014464728778786412, + "loss": 2.1185, + "step": 299730 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.0001446456339342673, + "loss": 2.0966, + "step": 299735 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014464398006541868, + "loss": 1.9356, + "step": 299740 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014464232618131888, + "loss": 2.0709, + "step": 299745 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014464067228196848, + "loss": 2.1348, + "step": 299750 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.000144639018367368, + "loss": 2.0177, + "step": 299755 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.000144637364437518, + "loss": 2.1645, + "step": 299760 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.0001446357104924191, + "loss": 2.2044, + "step": 299765 + }, + { + "epoch": 0.71, + "grad_norm": 2.0, + "learning_rate": 0.00014463405653207181, + "loss": 2.1049, + "step": 299770 + }, + { + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 0.00014463240255647669, + "loss": 2.121, + "step": 299775 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.00014463074856563442, + "loss": 1.8944, + "step": 299780 + }, + { + "epoch": 0.71, + "grad_norm": 2.734375, + "learning_rate": 0.0001446290945595454, + "loss": 2.1173, + "step": 299785 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014462744053821028, + "loss": 2.029, + "step": 299790 + }, + { + "epoch": 0.71, + "grad_norm": 2.546875, + "learning_rate": 0.00014462578650162964, + "loss": 2.0914, + "step": 299795 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014462413244980402, + "loss": 1.8696, + "step": 299800 + }, + { + "epoch": 0.71, + "grad_norm": 2.546875, + "learning_rate": 0.000144622478382734, + "loss": 1.913, + "step": 299805 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.0001446208243004201, + "loss": 1.9559, + "step": 299810 + }, + { + "epoch": 0.71, + "grad_norm": 2.59375, + "learning_rate": 0.00014461917020286292, + "loss": 2.0538, + "step": 299815 + }, + { + "epoch": 0.71, + "grad_norm": 2.90625, + "learning_rate": 0.00014461751609006303, + "loss": 2.1384, + "step": 299820 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.000144615861962021, + "loss": 2.1423, + "step": 299825 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014461420781873737, + "loss": 2.0934, + "step": 299830 + }, + { + "epoch": 0.71, + "grad_norm": 2.0, + "learning_rate": 0.00014461255366021272, + "loss": 2.0339, + "step": 299835 + }, + { + "epoch": 0.71, + "grad_norm": 1.75, + "learning_rate": 0.0001446108994864476, + "loss": 2.1615, + "step": 299840 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.0001446092452974426, + "loss": 2.0511, + "step": 299845 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014460759109319828, + "loss": 1.8502, + "step": 299850 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014460593687371518, + "loss": 2.0598, + "step": 299855 + }, + { + "epoch": 0.71, + "grad_norm": 1.828125, + "learning_rate": 0.00014460428263899391, + "loss": 2.0241, + "step": 299860 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.00014460262838903498, + "loss": 2.0507, + "step": 299865 + }, + { + "epoch": 0.71, + "grad_norm": 1.9296875, + "learning_rate": 0.00014460097412383898, + "loss": 1.9429, + "step": 299870 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014459931984340654, + "loss": 2.0354, + "step": 299875 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.0001445976655477381, + "loss": 2.1628, + "step": 299880 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014459601123683428, + "loss": 1.9994, + "step": 299885 + }, + { + "epoch": 0.71, + "grad_norm": 1.859375, + "learning_rate": 0.00014459435691069566, + "loss": 1.9021, + "step": 299890 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014459270256932284, + "loss": 2.0342, + "step": 299895 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014459104821271633, + "loss": 2.1744, + "step": 299900 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.0001445893938408767, + "loss": 2.1009, + "step": 299905 + }, + { + "epoch": 0.71, + "grad_norm": 1.796875, + "learning_rate": 0.0001445877394538045, + "loss": 2.0459, + "step": 299910 + }, + { + "epoch": 0.71, + "grad_norm": 2.609375, + "learning_rate": 0.00014458608505150036, + "loss": 2.0491, + "step": 299915 + }, + { + "epoch": 0.71, + "grad_norm": 2.375, + "learning_rate": 0.0001445844306339648, + "loss": 2.0571, + "step": 299920 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014458277620119837, + "loss": 2.0791, + "step": 299925 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.0001445811217532017, + "loss": 2.0729, + "step": 299930 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014457946728997525, + "loss": 2.0889, + "step": 299935 + }, + { + "epoch": 0.71, + "grad_norm": 2.5625, + "learning_rate": 0.00014457781281151968, + "loss": 2.0496, + "step": 299940 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.0001445761583178355, + "loss": 2.0774, + "step": 299945 + }, + { + "epoch": 0.71, + "grad_norm": 1.96875, + "learning_rate": 0.0001445745038089233, + "loss": 2.0697, + "step": 299950 + }, + { + "epoch": 0.71, + "grad_norm": 2.375, + "learning_rate": 0.00014457284928478365, + "loss": 1.9413, + "step": 299955 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014457119474541708, + "loss": 2.0892, + "step": 299960 + }, + { + "epoch": 0.71, + "grad_norm": 2.53125, + "learning_rate": 0.00014456954019082422, + "loss": 2.1211, + "step": 299965 + }, + { + "epoch": 0.71, + "grad_norm": 1.90625, + "learning_rate": 0.00014456788562100558, + "loss": 2.1096, + "step": 299970 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014456623103596176, + "loss": 2.0023, + "step": 299975 + }, + { + "epoch": 0.71, + "grad_norm": 2.46875, + "learning_rate": 0.00014456457643569328, + "loss": 2.0739, + "step": 299980 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014456292182020078, + "loss": 2.1075, + "step": 299985 + }, + { + "epoch": 0.71, + "grad_norm": 2.59375, + "learning_rate": 0.0001445612671894847, + "loss": 2.0263, + "step": 299990 + }, + { + "epoch": 0.71, + "grad_norm": 1.7109375, + "learning_rate": 0.00014455961254354572, + "loss": 1.8767, + "step": 299995 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.0001445579578823844, + "loss": 2.103, + "step": 300000 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014455630320600124, + "loss": 2.0449, + "step": 300005 + }, + { + "epoch": 0.71, + "grad_norm": 2.859375, + "learning_rate": 0.00014455464851439686, + "loss": 2.1119, + "step": 300010 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.00014455299380757177, + "loss": 2.0571, + "step": 300015 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.0001445513390855266, + "loss": 2.1286, + "step": 300020 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014454968434826187, + "loss": 2.1421, + "step": 300025 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.00014454802959577818, + "loss": 2.2596, + "step": 300030 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.00014454637482807606, + "loss": 2.0836, + "step": 300035 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014454472004515604, + "loss": 2.0405, + "step": 300040 + }, + { + "epoch": 0.71, + "grad_norm": 1.8828125, + "learning_rate": 0.0001445430652470188, + "loss": 2.16, + "step": 300045 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014454141043366485, + "loss": 2.1224, + "step": 300050 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014453975560509472, + "loss": 1.9955, + "step": 300055 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.000144538100761309, + "loss": 2.1357, + "step": 300060 + }, + { + "epoch": 0.71, + "grad_norm": 1.796875, + "learning_rate": 0.00014453644590230825, + "loss": 2.1594, + "step": 300065 + }, + { + "epoch": 0.71, + "grad_norm": 1.890625, + "learning_rate": 0.00014453479102809305, + "loss": 2.133, + "step": 300070 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014453313613866397, + "loss": 2.1819, + "step": 300075 + }, + { + "epoch": 0.71, + "grad_norm": 2.0, + "learning_rate": 0.00014453148123402156, + "loss": 1.9979, + "step": 300080 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014452982631416638, + "loss": 1.9718, + "step": 300085 + }, + { + "epoch": 0.71, + "grad_norm": 1.84375, + "learning_rate": 0.000144528171379099, + "loss": 2.099, + "step": 300090 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014452651642882, + "loss": 2.0763, + "step": 300095 + }, + { + "epoch": 0.71, + "grad_norm": 1.8984375, + "learning_rate": 0.00014452486146332994, + "loss": 2.0255, + "step": 300100 + }, + { + "epoch": 0.71, + "grad_norm": 1.6796875, + "learning_rate": 0.00014452320648262936, + "loss": 2.0161, + "step": 300105 + }, + { + "epoch": 0.71, + "grad_norm": 1.9453125, + "learning_rate": 0.00014452155148671883, + "loss": 2.1936, + "step": 300110 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014451989647559899, + "loss": 2.0552, + "step": 300115 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014451824144927027, + "loss": 2.0532, + "step": 300120 + }, + { + "epoch": 0.71, + "grad_norm": 1.9609375, + "learning_rate": 0.00014451658640773338, + "loss": 1.9652, + "step": 300125 + }, + { + "epoch": 0.71, + "grad_norm": 2.609375, + "learning_rate": 0.00014451493135098878, + "loss": 2.1065, + "step": 300130 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.0001445132762790371, + "loss": 1.9318, + "step": 300135 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014451162119187883, + "loss": 2.2012, + "step": 300140 + }, + { + "epoch": 0.71, + "grad_norm": 1.5859375, + "learning_rate": 0.00014450996608951464, + "loss": 1.83, + "step": 300145 + }, + { + "epoch": 0.71, + "grad_norm": 2.578125, + "learning_rate": 0.000144508310971945, + "loss": 2.0622, + "step": 300150 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014450665583917052, + "loss": 2.4116, + "step": 300155 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014450500069119175, + "loss": 2.0308, + "step": 300160 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014450334552800932, + "loss": 2.2322, + "step": 300165 + }, + { + "epoch": 0.71, + "grad_norm": 2.65625, + "learning_rate": 0.0001445016903496237, + "loss": 2.0447, + "step": 300170 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014450003515603546, + "loss": 1.9991, + "step": 300175 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014449837994724523, + "loss": 1.9798, + "step": 300180 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014449672472325356, + "loss": 2.0868, + "step": 300185 + }, + { + "epoch": 0.71, + "grad_norm": 2.546875, + "learning_rate": 0.000144495069484061, + "loss": 2.0178, + "step": 300190 + }, + { + "epoch": 0.71, + "grad_norm": 2.46875, + "learning_rate": 0.0001444934142296681, + "loss": 1.8749, + "step": 300195 + }, + { + "epoch": 0.71, + "grad_norm": 2.375, + "learning_rate": 0.00014449175896007543, + "loss": 1.9573, + "step": 300200 + }, + { + "epoch": 0.71, + "grad_norm": 2.484375, + "learning_rate": 0.0001444901036752836, + "loss": 2.0543, + "step": 300205 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014448844837529316, + "loss": 2.0754, + "step": 300210 + }, + { + "epoch": 0.71, + "grad_norm": 1.9921875, + "learning_rate": 0.00014448679306010463, + "loss": 2.0995, + "step": 300215 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.0001444851377297186, + "loss": 2.1027, + "step": 300220 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.00014448348238413564, + "loss": 2.174, + "step": 300225 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014448182702335633, + "loss": 2.1119, + "step": 300230 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014448017164738123, + "loss": 2.1033, + "step": 300235 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014447851625621087, + "loss": 2.0106, + "step": 300240 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014447686084984585, + "loss": 2.0457, + "step": 300245 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.00014447520542828674, + "loss": 2.0358, + "step": 300250 + }, + { + "epoch": 0.71, + "grad_norm": 1.8671875, + "learning_rate": 0.00014447354999153408, + "loss": 1.9872, + "step": 300255 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014447189453958845, + "loss": 1.9258, + "step": 300260 + }, + { + "epoch": 0.71, + "grad_norm": 1.828125, + "learning_rate": 0.00014447023907245043, + "loss": 2.0827, + "step": 300265 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014446858359012054, + "loss": 2.1595, + "step": 300270 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.0001444669280925994, + "loss": 1.7923, + "step": 300275 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014446527257988754, + "loss": 2.156, + "step": 300280 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014446361705198555, + "loss": 2.2459, + "step": 300285 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014446196150889396, + "loss": 2.0784, + "step": 300290 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.00014446030595061338, + "loss": 2.0612, + "step": 300295 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014445865037714434, + "loss": 2.0014, + "step": 300300 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014445699478848745, + "loss": 1.7843, + "step": 300305 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014445533918464317, + "loss": 2.2314, + "step": 300310 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.0001444536835656122, + "loss": 1.9773, + "step": 300315 + }, + { + "epoch": 0.71, + "grad_norm": 2.671875, + "learning_rate": 0.00014445202793139504, + "loss": 2.1626, + "step": 300320 + }, + { + "epoch": 0.71, + "grad_norm": 1.8515625, + "learning_rate": 0.00014445037228199228, + "loss": 1.9309, + "step": 300325 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014444871661740442, + "loss": 2.0578, + "step": 300330 + }, + { + "epoch": 0.71, + "grad_norm": 2.875, + "learning_rate": 0.00014444706093763208, + "loss": 2.283, + "step": 300335 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014444540524267583, + "loss": 2.0611, + "step": 300340 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014444374953253623, + "loss": 2.022, + "step": 300345 + }, + { + "epoch": 0.71, + "grad_norm": 2.609375, + "learning_rate": 0.00014444209380721384, + "loss": 2.0422, + "step": 300350 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014444043806670923, + "loss": 2.1417, + "step": 300355 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014443878231102292, + "loss": 1.9965, + "step": 300360 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014443712654015552, + "loss": 2.0575, + "step": 300365 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014443547075410765, + "loss": 2.183, + "step": 300370 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.0001444338149528798, + "loss": 2.0179, + "step": 300375 + }, + { + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 0.00014443215913647253, + "loss": 1.9702, + "step": 300380 + }, + { + "epoch": 0.71, + "grad_norm": 2.0, + "learning_rate": 0.0001444305033048864, + "loss": 1.967, + "step": 300385 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.00014442884745812206, + "loss": 2.056, + "step": 300390 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014442719159617998, + "loss": 1.946, + "step": 300395 + }, + { + "epoch": 0.71, + "grad_norm": 1.84375, + "learning_rate": 0.0001444255357190608, + "loss": 2.0061, + "step": 300400 + }, + { + "epoch": 0.71, + "grad_norm": 2.4375, + "learning_rate": 0.00014442387982676504, + "loss": 2.0636, + "step": 300405 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014442222391929327, + "loss": 2.0977, + "step": 300410 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014442056799664609, + "loss": 2.1007, + "step": 300415 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014441891205882397, + "loss": 1.9834, + "step": 300420 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014441725610582758, + "loss": 1.9191, + "step": 300425 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014441560013765748, + "loss": 2.1495, + "step": 300430 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014441394415431417, + "loss": 2.0898, + "step": 300435 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014441228815579828, + "loss": 1.9713, + "step": 300440 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.0001444106321421103, + "loss": 1.9258, + "step": 300445 + }, + { + "epoch": 0.71, + "grad_norm": 2.5, + "learning_rate": 0.00014440897611325091, + "loss": 2.0601, + "step": 300450 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014440732006922055, + "loss": 2.1237, + "step": 300455 + }, + { + "epoch": 0.71, + "grad_norm": 1.9296875, + "learning_rate": 0.00014440566401001989, + "loss": 2.1108, + "step": 300460 + }, + { + "epoch": 0.71, + "grad_norm": 1.90625, + "learning_rate": 0.00014440400793564942, + "loss": 2.1109, + "step": 300465 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014440235184610972, + "loss": 2.1343, + "step": 300470 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.0001444006957414014, + "loss": 2.2314, + "step": 300475 + }, + { + "epoch": 0.71, + "grad_norm": 1.953125, + "learning_rate": 0.000144399039621525, + "loss": 2.2577, + "step": 300480 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.0001443973834864811, + "loss": 2.2752, + "step": 300485 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.0001443957273362702, + "loss": 2.0226, + "step": 300490 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.0001443940711708929, + "loss": 2.0894, + "step": 300495 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014439241499034983, + "loss": 2.1599, + "step": 300500 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.0001443907587946415, + "loss": 2.0484, + "step": 300505 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014438910258376847, + "loss": 1.85, + "step": 300510 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014438744635773135, + "loss": 2.1406, + "step": 300515 + }, + { + "epoch": 0.71, + "grad_norm": 1.78125, + "learning_rate": 0.00014438579011653063, + "loss": 2.0739, + "step": 300520 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014438413386016693, + "loss": 1.9739, + "step": 300525 + }, + { + "epoch": 0.71, + "grad_norm": 1.71875, + "learning_rate": 0.0001443824775886408, + "loss": 1.9861, + "step": 300530 + }, + { + "epoch": 0.71, + "grad_norm": 2.59375, + "learning_rate": 0.00014438082130195282, + "loss": 2.007, + "step": 300535 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014437916500010357, + "loss": 1.9348, + "step": 300540 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014437750868309354, + "loss": 1.8939, + "step": 300545 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.0001443758523509234, + "loss": 2.0668, + "step": 300550 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014437419600359363, + "loss": 2.3721, + "step": 300555 + }, + { + "epoch": 0.71, + "grad_norm": 1.9609375, + "learning_rate": 0.0001443725396411048, + "loss": 1.8579, + "step": 300560 + }, + { + "epoch": 0.71, + "grad_norm": 2.375, + "learning_rate": 0.00014437088326345757, + "loss": 2.2251, + "step": 300565 + }, + { + "epoch": 0.71, + "grad_norm": 1.984375, + "learning_rate": 0.00014436922687065243, + "loss": 2.1226, + "step": 300570 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014436757046268993, + "loss": 1.8587, + "step": 300575 + }, + { + "epoch": 0.71, + "grad_norm": 2.609375, + "learning_rate": 0.00014436591403957068, + "loss": 2.0911, + "step": 300580 + }, + { + "epoch": 0.71, + "grad_norm": 2.515625, + "learning_rate": 0.00014436425760129524, + "loss": 2.1358, + "step": 300585 + }, + { + "epoch": 0.71, + "grad_norm": 2.421875, + "learning_rate": 0.00014436260114786417, + "loss": 2.0314, + "step": 300590 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014436094467927798, + "loss": 2.0811, + "step": 300595 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014435928819553736, + "loss": 2.042, + "step": 300600 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.0001443576316966427, + "loss": 2.0023, + "step": 300605 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014435597518259476, + "loss": 2.215, + "step": 300610 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.000144354318653394, + "loss": 2.0768, + "step": 300615 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.000144352662109041, + "loss": 2.266, + "step": 300620 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014435100554953628, + "loss": 2.0912, + "step": 300625 + }, + { + "epoch": 0.71, + "grad_norm": 3.125, + "learning_rate": 0.00014434934897488048, + "loss": 2.3051, + "step": 300630 + }, + { + "epoch": 0.71, + "grad_norm": 2.53125, + "learning_rate": 0.00014434769238507415, + "loss": 2.2578, + "step": 300635 + }, + { + "epoch": 0.71, + "grad_norm": 1.859375, + "learning_rate": 0.00014434603578011786, + "loss": 1.9423, + "step": 300640 + }, + { + "epoch": 0.71, + "grad_norm": 2.828125, + "learning_rate": 0.00014434437916001214, + "loss": 2.0769, + "step": 300645 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.00014434272252475755, + "loss": 2.0386, + "step": 300650 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.0001443410658743547, + "loss": 1.8368, + "step": 300655 + }, + { + "epoch": 0.71, + "grad_norm": 2.53125, + "learning_rate": 0.00014433940920880413, + "loss": 2.0595, + "step": 300660 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014433775252810645, + "loss": 2.0333, + "step": 300665 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014433609583226216, + "loss": 2.0025, + "step": 300670 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014433443912127185, + "loss": 2.0951, + "step": 300675 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.0001443327823951361, + "loss": 1.9652, + "step": 300680 + }, + { + "epoch": 0.71, + "grad_norm": 1.984375, + "learning_rate": 0.0001443311256538555, + "loss": 2.2131, + "step": 300685 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014432946889743052, + "loss": 2.0276, + "step": 300690 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014432781212586184, + "loss": 1.9575, + "step": 300695 + }, + { + "epoch": 0.71, + "grad_norm": 1.9609375, + "learning_rate": 0.00014432615533914995, + "loss": 2.005, + "step": 300700 + }, + { + "epoch": 0.71, + "grad_norm": 2.515625, + "learning_rate": 0.00014432449853729544, + "loss": 1.9209, + "step": 300705 + }, + { + "epoch": 0.71, + "grad_norm": 1.90625, + "learning_rate": 0.0001443228417202989, + "loss": 2.0186, + "step": 300710 + }, + { + "epoch": 0.71, + "grad_norm": 1.8046875, + "learning_rate": 0.00014432118488816084, + "loss": 1.9122, + "step": 300715 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.0001443195280408819, + "loss": 1.9964, + "step": 300720 + }, + { + "epoch": 0.71, + "grad_norm": 1.9921875, + "learning_rate": 0.00014431787117846257, + "loss": 1.9687, + "step": 300725 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014431621430090348, + "loss": 2.1291, + "step": 300730 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014431455740820517, + "loss": 2.1433, + "step": 300735 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014431290050036816, + "loss": 2.0103, + "step": 300740 + }, + { + "epoch": 0.71, + "grad_norm": 1.9296875, + "learning_rate": 0.00014431124357739309, + "loss": 2.0354, + "step": 300745 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.0001443095866392805, + "loss": 2.2623, + "step": 300750 + }, + { + "epoch": 0.71, + "grad_norm": 1.9765625, + "learning_rate": 0.00014430792968603094, + "loss": 2.0442, + "step": 300755 + }, + { + "epoch": 0.71, + "grad_norm": 2.5, + "learning_rate": 0.000144306272717645, + "loss": 2.0482, + "step": 300760 + }, + { + "epoch": 0.71, + "grad_norm": 2.4375, + "learning_rate": 0.0001443046157341232, + "loss": 2.0319, + "step": 300765 + }, + { + "epoch": 0.71, + "grad_norm": 2.46875, + "learning_rate": 0.00014430295873546617, + "loss": 2.0152, + "step": 300770 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.00014430130172167445, + "loss": 2.1434, + "step": 300775 + }, + { + "epoch": 0.71, + "grad_norm": 2.4375, + "learning_rate": 0.00014429964469274862, + "loss": 2.1734, + "step": 300780 + }, + { + "epoch": 0.71, + "grad_norm": 1.828125, + "learning_rate": 0.00014429798764868917, + "loss": 2.1768, + "step": 300785 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.00014429633058949675, + "loss": 2.1774, + "step": 300790 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.0001442946735151719, + "loss": 2.1078, + "step": 300795 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014429301642571524, + "loss": 1.8448, + "step": 300800 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014429135932112722, + "loss": 2.0236, + "step": 300805 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.00014428970220140848, + "loss": 1.8759, + "step": 300810 + }, + { + "epoch": 0.71, + "grad_norm": 1.9609375, + "learning_rate": 0.00014428804506655957, + "loss": 2.0707, + "step": 300815 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014428638791658108, + "loss": 1.9986, + "step": 300820 + }, + { + "epoch": 0.71, + "grad_norm": 1.9609375, + "learning_rate": 0.00014428473075147352, + "loss": 2.0626, + "step": 300825 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014428307357123755, + "loss": 2.0861, + "step": 300830 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014428141637587362, + "loss": 2.1786, + "step": 300835 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.0001442797591653824, + "loss": 1.9346, + "step": 300840 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.00014427810193976437, + "loss": 2.128, + "step": 300845 + }, + { + "epoch": 0.71, + "grad_norm": 1.8203125, + "learning_rate": 0.00014427644469902016, + "loss": 2.0568, + "step": 300850 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.0001442747874431503, + "loss": 2.1947, + "step": 300855 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014427313017215538, + "loss": 2.1062, + "step": 300860 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014427147288603598, + "loss": 2.2094, + "step": 300865 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.0001442698155847926, + "loss": 2.2155, + "step": 300870 + }, + { + "epoch": 0.71, + "grad_norm": 2.578125, + "learning_rate": 0.00014426815826842587, + "loss": 2.2123, + "step": 300875 + }, + { + "epoch": 0.71, + "grad_norm": 1.7109375, + "learning_rate": 0.00014426650093693635, + "loss": 2.0292, + "step": 300880 + }, + { + "epoch": 0.71, + "grad_norm": 1.8125, + "learning_rate": 0.00014426484359032456, + "loss": 1.9195, + "step": 300885 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.0001442631862285911, + "loss": 2.0782, + "step": 300890 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014426152885173657, + "loss": 2.1853, + "step": 300895 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014425987145976147, + "loss": 2.0296, + "step": 300900 + }, + { + "epoch": 0.71, + "grad_norm": 2.59375, + "learning_rate": 0.00014425821405266638, + "loss": 2.026, + "step": 300905 + }, + { + "epoch": 0.71, + "grad_norm": 1.9140625, + "learning_rate": 0.0001442565566304519, + "loss": 2.0104, + "step": 300910 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014425489919311858, + "loss": 1.9648, + "step": 300915 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014425324174066698, + "loss": 2.1798, + "step": 300920 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014425158427309768, + "loss": 2.1827, + "step": 300925 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.0001442499267904112, + "loss": 1.9903, + "step": 300930 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014424826929260818, + "loss": 1.8987, + "step": 300935 + }, + { + "epoch": 0.71, + "grad_norm": 2.5, + "learning_rate": 0.00014424661177968914, + "loss": 2.0555, + "step": 300940 + }, + { + "epoch": 0.71, + "grad_norm": 1.7109375, + "learning_rate": 0.00014424495425165466, + "loss": 2.087, + "step": 300945 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014424329670850527, + "loss": 1.9888, + "step": 300950 + }, + { + "epoch": 0.71, + "grad_norm": 1.9140625, + "learning_rate": 0.0001442416391502416, + "loss": 1.9933, + "step": 300955 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014423998157686417, + "loss": 2.0851, + "step": 300960 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014423832398837357, + "loss": 2.1281, + "step": 300965 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014423666638477037, + "loss": 2.1318, + "step": 300970 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.0001442350087660551, + "loss": 2.1259, + "step": 300975 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014423335113222834, + "loss": 2.1176, + "step": 300980 + }, + { + "epoch": 0.71, + "grad_norm": 2.84375, + "learning_rate": 0.00014423169348329069, + "loss": 2.2405, + "step": 300985 + }, + { + "epoch": 0.71, + "grad_norm": 2.8125, + "learning_rate": 0.0001442300358192427, + "loss": 2.0744, + "step": 300990 + }, + { + "epoch": 0.71, + "grad_norm": 1.9375, + "learning_rate": 0.00014422837814008488, + "loss": 2.0184, + "step": 300995 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014422672044581787, + "loss": 2.1289, + "step": 301000 + }, + { + "epoch": 0.71, + "grad_norm": 1.8125, + "learning_rate": 0.0001442250627364422, + "loss": 2.0439, + "step": 301005 + }, + { + "epoch": 0.71, + "grad_norm": 2.640625, + "learning_rate": 0.00014422340501195845, + "loss": 2.151, + "step": 301010 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014422174727236718, + "loss": 1.9269, + "step": 301015 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.000144220089517669, + "loss": 1.8726, + "step": 301020 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014421843174786437, + "loss": 2.1043, + "step": 301025 + }, + { + "epoch": 0.71, + "grad_norm": 1.9453125, + "learning_rate": 0.000144216773962954, + "loss": 2.0358, + "step": 301030 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.0001442151161629383, + "loss": 1.9509, + "step": 301035 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014421345834781795, + "loss": 2.0321, + "step": 301040 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014421180051759348, + "loss": 1.9753, + "step": 301045 + }, + { + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 0.00014421014267226547, + "loss": 1.9949, + "step": 301050 + }, + { + "epoch": 0.71, + "grad_norm": 1.8984375, + "learning_rate": 0.00014420848481183448, + "loss": 2.1222, + "step": 301055 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.000144206826936301, + "loss": 2.0445, + "step": 301060 + }, + { + "epoch": 0.71, + "grad_norm": 1.9296875, + "learning_rate": 0.0001442051690456657, + "loss": 2.0871, + "step": 301065 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.00014420351113992916, + "loss": 1.8495, + "step": 301070 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014420185321909187, + "loss": 2.1453, + "step": 301075 + }, + { + "epoch": 0.71, + "grad_norm": 2.84375, + "learning_rate": 0.00014420019528315443, + "loss": 2.136, + "step": 301080 + }, + { + "epoch": 0.71, + "grad_norm": 1.65625, + "learning_rate": 0.0001441985373321174, + "loss": 1.7832, + "step": 301085 + }, + { + "epoch": 0.71, + "grad_norm": 1.9375, + "learning_rate": 0.00014419687936598132, + "loss": 2.0646, + "step": 301090 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014419522138474684, + "loss": 2.2157, + "step": 301095 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014419356338841443, + "loss": 2.1467, + "step": 301100 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014419190537698468, + "loss": 2.0413, + "step": 301105 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014419024735045822, + "loss": 1.9568, + "step": 301110 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014418858930883554, + "loss": 1.9542, + "step": 301115 + }, + { + "epoch": 0.71, + "grad_norm": 2.515625, + "learning_rate": 0.00014418693125211725, + "loss": 2.107, + "step": 301120 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014418527318030392, + "loss": 2.2047, + "step": 301125 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014418361509339604, + "loss": 2.2423, + "step": 301130 + }, + { + "epoch": 0.71, + "grad_norm": 2.5, + "learning_rate": 0.00014418195699139429, + "loss": 1.9168, + "step": 301135 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014418029887429916, + "loss": 2.345, + "step": 301140 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014417864074211124, + "loss": 2.0382, + "step": 301145 + }, + { + "epoch": 0.71, + "grad_norm": 1.890625, + "learning_rate": 0.0001441769825948311, + "loss": 2.0158, + "step": 301150 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.0001441753244324593, + "loss": 2.1389, + "step": 301155 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.0001441736662549964, + "loss": 2.155, + "step": 301160 + }, + { + "epoch": 0.71, + "grad_norm": 2.78125, + "learning_rate": 0.000144172008062443, + "loss": 2.0454, + "step": 301165 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014417034985479961, + "loss": 2.1115, + "step": 301170 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014416869163206682, + "loss": 2.2007, + "step": 301175 + }, + { + "epoch": 0.71, + "grad_norm": 2.546875, + "learning_rate": 0.00014416703339424525, + "loss": 2.0752, + "step": 301180 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.0001441653751413354, + "loss": 2.3633, + "step": 301185 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014416371687333784, + "loss": 1.9864, + "step": 301190 + }, + { + "epoch": 0.71, + "grad_norm": 1.8671875, + "learning_rate": 0.00014416205859025315, + "loss": 2.2019, + "step": 301195 + }, + { + "epoch": 0.71, + "grad_norm": 1.8828125, + "learning_rate": 0.00014416040029208193, + "loss": 1.9476, + "step": 301200 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.0001441587419788247, + "loss": 2.1225, + "step": 301205 + }, + { + "epoch": 0.71, + "grad_norm": 1.90625, + "learning_rate": 0.00014415708365048206, + "loss": 2.1395, + "step": 301210 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014415542530705455, + "loss": 1.9568, + "step": 301215 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014415376694854273, + "loss": 2.1086, + "step": 301220 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.0001441521085749472, + "loss": 2.0689, + "step": 301225 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.0001441504501862685, + "loss": 2.0082, + "step": 301230 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.0001441487917825072, + "loss": 2.2367, + "step": 301235 + }, + { + "epoch": 0.71, + "grad_norm": 1.9140625, + "learning_rate": 0.00014414713336366388, + "loss": 1.9865, + "step": 301240 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.0001441454749297391, + "loss": 1.9583, + "step": 301245 + }, + { + "epoch": 0.71, + "grad_norm": 2.375, + "learning_rate": 0.0001441438164807334, + "loss": 2.1798, + "step": 301250 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014414215801664742, + "loss": 2.0835, + "step": 301255 + }, + { + "epoch": 0.71, + "grad_norm": 2.0, + "learning_rate": 0.00014414049953748165, + "loss": 1.8509, + "step": 301260 + }, + { + "epoch": 0.71, + "grad_norm": 2.890625, + "learning_rate": 0.0001441388410432367, + "loss": 2.1083, + "step": 301265 + }, + { + "epoch": 0.71, + "grad_norm": 2.515625, + "learning_rate": 0.0001441371825339131, + "loss": 2.0361, + "step": 301270 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014413552400951144, + "loss": 2.1772, + "step": 301275 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.0001441338654700323, + "loss": 2.2128, + "step": 301280 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014413220691547623, + "loss": 1.9457, + "step": 301285 + }, + { + "epoch": 0.71, + "grad_norm": 2.421875, + "learning_rate": 0.00014413054834584377, + "loss": 2.0555, + "step": 301290 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.00014412888976113555, + "loss": 1.9185, + "step": 301295 + }, + { + "epoch": 0.71, + "grad_norm": 2.484375, + "learning_rate": 0.00014412723116135207, + "loss": 2.0425, + "step": 301300 + }, + { + "epoch": 0.71, + "grad_norm": 2.375, + "learning_rate": 0.00014412557254649396, + "loss": 2.1298, + "step": 301305 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.00014412391391656172, + "loss": 2.3018, + "step": 301310 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.00014412225527155596, + "loss": 1.9313, + "step": 301315 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014412059661147726, + "loss": 2.0149, + "step": 301320 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014411893793632615, + "loss": 1.8797, + "step": 301325 + }, + { + "epoch": 0.71, + "grad_norm": 1.65625, + "learning_rate": 0.0001441172792461032, + "loss": 1.899, + "step": 301330 + }, + { + "epoch": 0.71, + "grad_norm": 1.8671875, + "learning_rate": 0.000144115620540809, + "loss": 2.1946, + "step": 301335 + }, + { + "epoch": 0.71, + "grad_norm": 2.546875, + "learning_rate": 0.0001441139618204441, + "loss": 1.8966, + "step": 301340 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014411230308500908, + "loss": 2.1031, + "step": 301345 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.0001441106443345045, + "loss": 2.014, + "step": 301350 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014410898556893088, + "loss": 2.0455, + "step": 301355 + }, + { + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 0.00014410732678828887, + "loss": 2.0711, + "step": 301360 + }, + { + "epoch": 0.71, + "grad_norm": 1.9453125, + "learning_rate": 0.000144105667992579, + "loss": 1.912, + "step": 301365 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014410400918180185, + "loss": 2.1989, + "step": 301370 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014410235035595793, + "loss": 1.8502, + "step": 301375 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014410069151504784, + "loss": 2.0739, + "step": 301380 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.0001440990326590722, + "loss": 2.0024, + "step": 301385 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014409737378803152, + "loss": 2.058, + "step": 301390 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014409571490192635, + "loss": 1.9073, + "step": 301395 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014409405600075733, + "loss": 2.0158, + "step": 301400 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014409239708452494, + "loss": 1.9665, + "step": 301405 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.00014409073815322977, + "loss": 2.0461, + "step": 301410 + }, + { + "epoch": 0.71, + "grad_norm": 2.484375, + "learning_rate": 0.00014408907920687245, + "loss": 1.8106, + "step": 301415 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014408742024545347, + "loss": 2.0835, + "step": 301420 + }, + { + "epoch": 0.71, + "grad_norm": 2.4375, + "learning_rate": 0.00014408576126897344, + "loss": 2.061, + "step": 301425 + }, + { + "epoch": 0.71, + "grad_norm": 1.8671875, + "learning_rate": 0.0001440841022774329, + "loss": 2.1628, + "step": 301430 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014408244327083247, + "loss": 2.0484, + "step": 301435 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014408078424917265, + "loss": 1.9129, + "step": 301440 + }, + { + "epoch": 0.71, + "grad_norm": 2.4375, + "learning_rate": 0.00014407912521245404, + "loss": 2.1182, + "step": 301445 + }, + { + "epoch": 0.71, + "grad_norm": 2.46875, + "learning_rate": 0.0001440774661606772, + "loss": 2.1229, + "step": 301450 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.0001440758070938427, + "loss": 2.0544, + "step": 301455 + }, + { + "epoch": 0.71, + "grad_norm": 1.875, + "learning_rate": 0.0001440741480119511, + "loss": 2.1041, + "step": 301460 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.00014407248891500296, + "loss": 2.0991, + "step": 301465 + }, + { + "epoch": 0.71, + "grad_norm": 2.703125, + "learning_rate": 0.00014407082980299885, + "loss": 2.081, + "step": 301470 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.0001440691706759394, + "loss": 2.0357, + "step": 301475 + }, + { + "epoch": 0.71, + "grad_norm": 2.546875, + "learning_rate": 0.00014406751153382507, + "loss": 2.0606, + "step": 301480 + }, + { + "epoch": 0.71, + "grad_norm": 2.5, + "learning_rate": 0.00014406585237665653, + "loss": 2.155, + "step": 301485 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.00014406419320443426, + "loss": 2.1374, + "step": 301490 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014406253401715887, + "loss": 2.0649, + "step": 301495 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.0001440608748148309, + "loss": 1.919, + "step": 301500 + }, + { + "epoch": 0.71, + "grad_norm": 2.4375, + "learning_rate": 0.00014405921559745098, + "loss": 1.9968, + "step": 301505 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014405755636501957, + "loss": 2.1487, + "step": 301510 + }, + { + "epoch": 0.71, + "grad_norm": 2.0, + "learning_rate": 0.00014405589711753732, + "loss": 1.8352, + "step": 301515 + }, + { + "epoch": 0.71, + "grad_norm": 2.59375, + "learning_rate": 0.0001440542378550048, + "loss": 1.9179, + "step": 301520 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014405257857742258, + "loss": 2.0721, + "step": 301525 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014405091928479114, + "loss": 2.0242, + "step": 301530 + }, + { + "epoch": 0.71, + "grad_norm": 1.96875, + "learning_rate": 0.00014404925997711114, + "loss": 2.1447, + "step": 301535 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.0001440476006543831, + "loss": 2.2048, + "step": 301540 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.0001440459413166076, + "loss": 2.2165, + "step": 301545 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014404428196378524, + "loss": 1.7796, + "step": 301550 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014404262259591651, + "loss": 1.9915, + "step": 301555 + }, + { + "epoch": 0.71, + "grad_norm": 1.8671875, + "learning_rate": 0.00014404096321300204, + "loss": 1.7891, + "step": 301560 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.0001440393038150424, + "loss": 1.9355, + "step": 301565 + }, + { + "epoch": 0.71, + "grad_norm": 1.78125, + "learning_rate": 0.0001440376444020381, + "loss": 2.0312, + "step": 301570 + }, + { + "epoch": 0.71, + "grad_norm": 2.65625, + "learning_rate": 0.00014403598497398975, + "loss": 2.264, + "step": 301575 + }, + { + "epoch": 0.71, + "grad_norm": 2.4375, + "learning_rate": 0.00014403432553089792, + "loss": 2.1406, + "step": 301580 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014403266607276315, + "loss": 1.9937, + "step": 301585 + }, + { + "epoch": 0.71, + "grad_norm": 1.8828125, + "learning_rate": 0.00014403100659958603, + "loss": 1.9978, + "step": 301590 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014402934711136713, + "loss": 1.9976, + "step": 301595 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.000144027687608107, + "loss": 2.0678, + "step": 301600 + }, + { + "epoch": 0.71, + "grad_norm": 2.5625, + "learning_rate": 0.0001440260280898062, + "loss": 2.03, + "step": 301605 + }, + { + "epoch": 0.71, + "grad_norm": 1.828125, + "learning_rate": 0.00014402436855646532, + "loss": 2.0382, + "step": 301610 + }, + { + "epoch": 0.71, + "grad_norm": 1.71875, + "learning_rate": 0.00014402270900808493, + "loss": 2.1582, + "step": 301615 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.0001440210494446656, + "loss": 2.133, + "step": 301620 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014401938986620784, + "loss": 2.0192, + "step": 301625 + }, + { + "epoch": 0.71, + "grad_norm": 3.09375, + "learning_rate": 0.00014401773027271228, + "loss": 2.1008, + "step": 301630 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014401607066417946, + "loss": 2.1108, + "step": 301635 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.00014401441104060995, + "loss": 2.1706, + "step": 301640 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014401275140200434, + "loss": 1.9741, + "step": 301645 + }, + { + "epoch": 0.71, + "grad_norm": 1.8984375, + "learning_rate": 0.00014401109174836313, + "loss": 1.8733, + "step": 301650 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014400943207968695, + "loss": 1.901, + "step": 301655 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014400777239597637, + "loss": 2.08, + "step": 301660 + }, + { + "epoch": 0.71, + "grad_norm": 1.984375, + "learning_rate": 0.00014400611269723193, + "loss": 2.0839, + "step": 301665 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.0001440044529834542, + "loss": 2.187, + "step": 301670 + }, + { + "epoch": 0.71, + "grad_norm": 1.8515625, + "learning_rate": 0.00014400279325464373, + "loss": 2.0271, + "step": 301675 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014400113351080112, + "loss": 2.1577, + "step": 301680 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.00014399947375192697, + "loss": 2.0907, + "step": 301685 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014399781397802177, + "loss": 2.0273, + "step": 301690 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.0001439961541890861, + "loss": 2.0307, + "step": 301695 + }, + { + "epoch": 0.71, + "grad_norm": 1.9453125, + "learning_rate": 0.00014399449438512053, + "loss": 2.1877, + "step": 301700 + }, + { + "epoch": 0.71, + "grad_norm": 2.515625, + "learning_rate": 0.00014399283456612572, + "loss": 1.9484, + "step": 301705 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014399117473210208, + "loss": 2.0127, + "step": 301710 + }, + { + "epoch": 0.71, + "grad_norm": 2.53125, + "learning_rate": 0.0001439895148830503, + "loss": 2.1457, + "step": 301715 + }, + { + "epoch": 0.71, + "grad_norm": 2.4375, + "learning_rate": 0.0001439878550189709, + "loss": 2.2387, + "step": 301720 + }, + { + "epoch": 0.71, + "grad_norm": 2.5625, + "learning_rate": 0.00014398619513986445, + "loss": 1.9363, + "step": 301725 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.0001439845352457315, + "loss": 2.2575, + "step": 301730 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014398287533657264, + "loss": 2.1217, + "step": 301735 + }, + { + "epoch": 0.71, + "grad_norm": 2.703125, + "learning_rate": 0.00014398121541238842, + "loss": 1.9685, + "step": 301740 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014397955547317944, + "loss": 2.0363, + "step": 301745 + }, + { + "epoch": 0.71, + "grad_norm": 2.515625, + "learning_rate": 0.00014397789551894624, + "loss": 2.1828, + "step": 301750 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014397623554968943, + "loss": 2.0133, + "step": 301755 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.0001439745755654095, + "loss": 2.2465, + "step": 301760 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014397291556610706, + "loss": 2.1341, + "step": 301765 + }, + { + "epoch": 0.71, + "grad_norm": 2.53125, + "learning_rate": 0.00014397125555178268, + "loss": 2.054, + "step": 301770 + }, + { + "epoch": 0.71, + "grad_norm": 1.7734375, + "learning_rate": 0.00014396959552243692, + "loss": 2.0408, + "step": 301775 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014396793547807036, + "loss": 2.0411, + "step": 301780 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014396627541868353, + "loss": 1.9034, + "step": 301785 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014396461534427703, + "loss": 2.2003, + "step": 301790 + }, + { + "epoch": 0.71, + "grad_norm": 1.921875, + "learning_rate": 0.00014396295525485143, + "loss": 1.9581, + "step": 301795 + }, + { + "epoch": 0.71, + "grad_norm": 2.515625, + "learning_rate": 0.0001439612951504073, + "loss": 2.2419, + "step": 301800 + }, + { + "epoch": 0.71, + "grad_norm": 1.9765625, + "learning_rate": 0.00014395963503094518, + "loss": 2.077, + "step": 301805 + }, + { + "epoch": 0.71, + "grad_norm": 1.8125, + "learning_rate": 0.00014395797489646564, + "loss": 2.0756, + "step": 301810 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014395631474696928, + "loss": 1.9681, + "step": 301815 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014395465458245664, + "loss": 1.9243, + "step": 301820 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014395299440292828, + "loss": 2.0553, + "step": 301825 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.0001439513342083848, + "loss": 2.144, + "step": 301830 + }, + { + "epoch": 0.71, + "grad_norm": 1.96875, + "learning_rate": 0.00014394967399882673, + "loss": 1.9469, + "step": 301835 + }, + { + "epoch": 0.71, + "grad_norm": 1.84375, + "learning_rate": 0.00014394801377425463, + "loss": 1.8691, + "step": 301840 + }, + { + "epoch": 0.71, + "grad_norm": 2.375, + "learning_rate": 0.00014394635353466917, + "loss": 1.8837, + "step": 301845 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.0001439446932800708, + "loss": 1.9405, + "step": 301850 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.0001439430330104601, + "loss": 2.221, + "step": 301855 + }, + { + "epoch": 0.71, + "grad_norm": 1.9921875, + "learning_rate": 0.00014394137272583768, + "loss": 2.0295, + "step": 301860 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014393971242620407, + "loss": 2.2162, + "step": 301865 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.0001439380521115599, + "loss": 2.0541, + "step": 301870 + }, + { + "epoch": 0.71, + "grad_norm": 2.578125, + "learning_rate": 0.00014393639178190566, + "loss": 2.0235, + "step": 301875 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014393473143724196, + "loss": 2.2723, + "step": 301880 + }, + { + "epoch": 0.71, + "grad_norm": 2.515625, + "learning_rate": 0.00014393307107756935, + "loss": 1.9482, + "step": 301885 + }, + { + "epoch": 0.71, + "grad_norm": 2.625, + "learning_rate": 0.00014393141070288842, + "loss": 2.1, + "step": 301890 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014392975031319972, + "loss": 2.1082, + "step": 301895 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014392808990850383, + "loss": 2.0883, + "step": 301900 + }, + { + "epoch": 0.71, + "grad_norm": 1.9140625, + "learning_rate": 0.0001439264294888013, + "loss": 1.7879, + "step": 301905 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.0001439247690540927, + "loss": 2.066, + "step": 301910 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014392310860437862, + "loss": 1.9178, + "step": 301915 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014392144813965958, + "loss": 2.0361, + "step": 301920 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014391978765993622, + "loss": 1.9713, + "step": 301925 + }, + { + "epoch": 0.71, + "grad_norm": 2.875, + "learning_rate": 0.000143918127165209, + "loss": 2.1217, + "step": 301930 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014391646665547862, + "loss": 2.1753, + "step": 301935 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014391480613074555, + "loss": 2.0778, + "step": 301940 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014391314559101037, + "loss": 2.022, + "step": 301945 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014391148503627366, + "loss": 2.1964, + "step": 301950 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.000143909824466536, + "loss": 2.0532, + "step": 301955 + }, + { + "epoch": 0.71, + "grad_norm": 2.625, + "learning_rate": 0.00014390816388179796, + "loss": 2.205, + "step": 301960 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.0001439065032820601, + "loss": 1.9712, + "step": 301965 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014390484266732295, + "loss": 2.0562, + "step": 301970 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014390318203758715, + "loss": 2.052, + "step": 301975 + }, + { + "epoch": 0.71, + "grad_norm": 1.9765625, + "learning_rate": 0.00014390152139285323, + "loss": 1.9835, + "step": 301980 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014389986073312173, + "loss": 2.1629, + "step": 301985 + }, + { + "epoch": 0.71, + "grad_norm": 1.8046875, + "learning_rate": 0.00014389820005839322, + "loss": 1.9969, + "step": 301990 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.0001438965393686683, + "loss": 2.1605, + "step": 301995 + }, + { + "epoch": 0.71, + "grad_norm": 1.8984375, + "learning_rate": 0.00014389487866394758, + "loss": 2.1004, + "step": 302000 + }, + { + "epoch": 0.71, + "grad_norm": 1.953125, + "learning_rate": 0.0001438932179442315, + "loss": 2.0142, + "step": 302005 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014389155720952073, + "loss": 1.7588, + "step": 302010 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014388989645981582, + "loss": 1.8952, + "step": 302015 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014388823569511732, + "loss": 2.0815, + "step": 302020 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014388657491542578, + "loss": 2.1646, + "step": 302025 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014388491412074182, + "loss": 2.2651, + "step": 302030 + }, + { + "epoch": 0.71, + "grad_norm": 2.0, + "learning_rate": 0.00014388325331106598, + "loss": 2.0262, + "step": 302035 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.0001438815924863988, + "loss": 2.0862, + "step": 302040 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014387993164674087, + "loss": 2.0698, + "step": 302045 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.00014387827079209278, + "loss": 2.0148, + "step": 302050 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014387660992245507, + "loss": 1.9991, + "step": 302055 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.0001438749490378283, + "loss": 2.0727, + "step": 302060 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.0001438732881382131, + "loss": 2.042, + "step": 302065 + }, + { + "epoch": 0.71, + "grad_norm": 1.9921875, + "learning_rate": 0.00014387162722360994, + "loss": 2.1082, + "step": 302070 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014386996629401944, + "loss": 1.8999, + "step": 302075 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014386830534944217, + "loss": 2.0707, + "step": 302080 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.0001438666443898787, + "loss": 2.0118, + "step": 302085 + }, + { + "epoch": 0.71, + "grad_norm": 1.9453125, + "learning_rate": 0.00014386498341532956, + "loss": 2.1941, + "step": 302090 + }, + { + "epoch": 0.71, + "grad_norm": 1.984375, + "learning_rate": 0.00014386332242579541, + "loss": 2.0455, + "step": 302095 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.0001438616614212767, + "loss": 2.1688, + "step": 302100 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014386000040177404, + "loss": 2.0871, + "step": 302105 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014385833936728806, + "loss": 2.1593, + "step": 302110 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014385667831781927, + "loss": 2.1521, + "step": 302115 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014385501725336822, + "loss": 2.0375, + "step": 302120 + }, + { + "epoch": 0.71, + "grad_norm": 2.4375, + "learning_rate": 0.0001438533561739355, + "loss": 2.0265, + "step": 302125 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014385169507952165, + "loss": 1.9954, + "step": 302130 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.0001438500339701273, + "loss": 2.1545, + "step": 302135 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014384837284575302, + "loss": 2.175, + "step": 302140 + }, + { + "epoch": 0.71, + "grad_norm": 1.8359375, + "learning_rate": 0.00014384671170639926, + "loss": 2.1286, + "step": 302145 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.0001438450505520667, + "loss": 2.2242, + "step": 302150 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.0001438433893827559, + "loss": 2.1277, + "step": 302155 + }, + { + "epoch": 0.71, + "grad_norm": 1.9921875, + "learning_rate": 0.0001438417281984674, + "loss": 2.0232, + "step": 302160 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014384006699920176, + "loss": 1.9535, + "step": 302165 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.00014383840578495956, + "loss": 2.111, + "step": 302170 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014383674455574136, + "loss": 2.2384, + "step": 302175 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014383508331154775, + "loss": 1.9745, + "step": 302180 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014383342205237924, + "loss": 1.9801, + "step": 302185 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014383176077823647, + "loss": 1.9629, + "step": 302190 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014383009948911997, + "loss": 1.9855, + "step": 302195 + }, + { + "epoch": 0.71, + "grad_norm": 2.78125, + "learning_rate": 0.00014382843818503029, + "loss": 2.054, + "step": 302200 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014382677686596807, + "loss": 2.072, + "step": 302205 + }, + { + "epoch": 0.71, + "grad_norm": 2.5, + "learning_rate": 0.00014382511553193378, + "loss": 2.0798, + "step": 302210 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014382345418292806, + "loss": 2.0473, + "step": 302215 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014382179281895144, + "loss": 2.0555, + "step": 302220 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.0001438201314400045, + "loss": 2.1463, + "step": 302225 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014381847004608787, + "loss": 2.2112, + "step": 302230 + }, + { + "epoch": 0.71, + "grad_norm": 1.96875, + "learning_rate": 0.000143816808637202, + "loss": 1.9447, + "step": 302235 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.0001438151472133475, + "loss": 1.9712, + "step": 302240 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014381348577452495, + "loss": 2.1419, + "step": 302245 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014381182432073494, + "loss": 2.0318, + "step": 302250 + }, + { + "epoch": 0.71, + "grad_norm": 1.7265625, + "learning_rate": 0.00014381016285197804, + "loss": 2.1076, + "step": 302255 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014380850136825476, + "loss": 2.0375, + "step": 302260 + }, + { + "epoch": 0.71, + "grad_norm": 2.46875, + "learning_rate": 0.0001438068398695657, + "loss": 1.9888, + "step": 302265 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014380517835591142, + "loss": 2.1547, + "step": 302270 + }, + { + "epoch": 0.71, + "grad_norm": 2.0, + "learning_rate": 0.00014380351682729257, + "loss": 1.9662, + "step": 302275 + }, + { + "epoch": 0.71, + "grad_norm": 3.28125, + "learning_rate": 0.00014380185528370957, + "loss": 2.1602, + "step": 302280 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014380019372516308, + "loss": 2.2227, + "step": 302285 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014379853215165363, + "loss": 2.0515, + "step": 302290 + }, + { + "epoch": 0.71, + "grad_norm": 1.84375, + "learning_rate": 0.00014379687056318186, + "loss": 2.1324, + "step": 302295 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014379520895974824, + "loss": 2.1468, + "step": 302300 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.0001437935473413534, + "loss": 2.0577, + "step": 302305 + }, + { + "epoch": 0.71, + "grad_norm": 1.953125, + "learning_rate": 0.00014379188570799788, + "loss": 1.9009, + "step": 302310 + }, + { + "epoch": 0.71, + "grad_norm": 2.4375, + "learning_rate": 0.00014379022405968224, + "loss": 2.0827, + "step": 302315 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.0001437885623964071, + "loss": 2.0028, + "step": 302320 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.000143786900718173, + "loss": 2.282, + "step": 302325 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014378523902498048, + "loss": 2.0882, + "step": 302330 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.0001437835773168301, + "loss": 1.8877, + "step": 302335 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014378191559372253, + "loss": 1.9561, + "step": 302340 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014378025385565818, + "loss": 1.8603, + "step": 302345 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014377859210263776, + "loss": 2.1111, + "step": 302350 + }, + { + "epoch": 0.71, + "grad_norm": 2.546875, + "learning_rate": 0.00014377693033466174, + "loss": 2.1495, + "step": 302355 + }, + { + "epoch": 0.71, + "grad_norm": 2.59375, + "learning_rate": 0.00014377526855173075, + "loss": 1.9495, + "step": 302360 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.00014377360675384534, + "loss": 1.8594, + "step": 302365 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014377194494100606, + "loss": 1.9872, + "step": 302370 + }, + { + "epoch": 0.71, + "grad_norm": 1.9140625, + "learning_rate": 0.0001437702831132135, + "loss": 1.827, + "step": 302375 + }, + { + "epoch": 0.71, + "grad_norm": 1.625, + "learning_rate": 0.00014376862127046819, + "loss": 2.1503, + "step": 302380 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014376695941277075, + "loss": 2.1664, + "step": 302385 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014376529754012174, + "loss": 1.8226, + "step": 302390 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014376363565252168, + "loss": 2.0194, + "step": 302395 + }, + { + "epoch": 0.71, + "grad_norm": 1.6875, + "learning_rate": 0.00014376197374997115, + "loss": 2.0306, + "step": 302400 + }, + { + "epoch": 0.71, + "grad_norm": 2.484375, + "learning_rate": 0.00014376031183247078, + "loss": 2.316, + "step": 302405 + }, + { + "epoch": 0.71, + "grad_norm": 2.421875, + "learning_rate": 0.0001437586499000211, + "loss": 2.0858, + "step": 302410 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014375698795262263, + "loss": 1.9992, + "step": 302415 + }, + { + "epoch": 0.71, + "grad_norm": 1.9921875, + "learning_rate": 0.000143755325990276, + "loss": 2.1278, + "step": 302420 + }, + { + "epoch": 0.71, + "grad_norm": 2.875, + "learning_rate": 0.00014375366401298174, + "loss": 2.0156, + "step": 302425 + }, + { + "epoch": 0.71, + "grad_norm": 1.6640625, + "learning_rate": 0.00014375200202074043, + "loss": 2.0484, + "step": 302430 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.0001437503400135527, + "loss": 1.9304, + "step": 302435 + }, + { + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 0.00014374867799141902, + "loss": 2.1502, + "step": 302440 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014374701595434, + "loss": 2.1406, + "step": 302445 + }, + { + "epoch": 0.71, + "grad_norm": 1.9375, + "learning_rate": 0.00014374535390231617, + "loss": 2.0337, + "step": 302450 + }, + { + "epoch": 0.71, + "grad_norm": 2.421875, + "learning_rate": 0.00014374369183534823, + "loss": 2.0669, + "step": 302455 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.0001437420297534366, + "loss": 2.1975, + "step": 302460 + }, + { + "epoch": 0.71, + "grad_norm": 1.9140625, + "learning_rate": 0.00014374036765658189, + "loss": 2.0933, + "step": 302465 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014373870554478465, + "loss": 2.0059, + "step": 302470 + }, + { + "epoch": 0.71, + "grad_norm": 1.8515625, + "learning_rate": 0.0001437370434180455, + "loss": 2.1001, + "step": 302475 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014373538127636498, + "loss": 2.0659, + "step": 302480 + }, + { + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 0.00014373371911974368, + "loss": 2.2427, + "step": 302485 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.00014373205694818213, + "loss": 2.2461, + "step": 302490 + }, + { + "epoch": 0.71, + "grad_norm": 2.484375, + "learning_rate": 0.00014373039476168093, + "loss": 2.1043, + "step": 302495 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.0001437287325602406, + "loss": 1.8936, + "step": 302500 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.0001437270703438618, + "loss": 2.1955, + "step": 302505 + }, + { + "epoch": 0.71, + "grad_norm": 2.421875, + "learning_rate": 0.000143725408112545, + "loss": 1.9919, + "step": 302510 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.0001437237458662908, + "loss": 2.1238, + "step": 302515 + }, + { + "epoch": 0.71, + "grad_norm": 1.8828125, + "learning_rate": 0.0001437220836050998, + "loss": 1.8788, + "step": 302520 + }, + { + "epoch": 0.71, + "grad_norm": 1.9375, + "learning_rate": 0.00014372042132897254, + "loss": 2.0906, + "step": 302525 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.0001437187590379096, + "loss": 2.1057, + "step": 302530 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014371709673191148, + "loss": 2.2165, + "step": 302535 + }, + { + "epoch": 0.71, + "grad_norm": 1.9453125, + "learning_rate": 0.00014371543441097888, + "loss": 2.1617, + "step": 302540 + }, + { + "epoch": 0.71, + "grad_norm": 1.96875, + "learning_rate": 0.00014371377207511224, + "loss": 2.1244, + "step": 302545 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014371210972431223, + "loss": 2.0533, + "step": 302550 + }, + { + "epoch": 0.71, + "grad_norm": 2.734375, + "learning_rate": 0.00014371044735857934, + "loss": 1.9455, + "step": 302555 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014370878497791417, + "loss": 1.96, + "step": 302560 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.0001437071225823173, + "loss": 2.0425, + "step": 302565 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014370546017178929, + "loss": 1.7775, + "step": 302570 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014370379774633068, + "loss": 2.0507, + "step": 302575 + }, + { + "epoch": 0.71, + "grad_norm": 2.578125, + "learning_rate": 0.00014370213530594207, + "loss": 2.1747, + "step": 302580 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.000143700472850624, + "loss": 1.9506, + "step": 302585 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014369881038037708, + "loss": 2.0291, + "step": 302590 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014369714789520187, + "loss": 2.192, + "step": 302595 + }, + { + "epoch": 0.71, + "grad_norm": 2.375, + "learning_rate": 0.0001436954853950989, + "loss": 2.0311, + "step": 302600 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014369382288006877, + "loss": 2.041, + "step": 302605 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014369216035011202, + "loss": 1.9548, + "step": 302610 + }, + { + "epoch": 0.71, + "grad_norm": 1.9296875, + "learning_rate": 0.00014369049780522922, + "loss": 2.0882, + "step": 302615 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014368883524542097, + "loss": 1.8838, + "step": 302620 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014368717267068784, + "loss": 2.2452, + "step": 302625 + }, + { + "epoch": 0.71, + "grad_norm": 1.7421875, + "learning_rate": 0.00014368551008103037, + "loss": 1.9331, + "step": 302630 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014368384747644915, + "loss": 1.9802, + "step": 302635 + }, + { + "epoch": 0.71, + "grad_norm": 2.734375, + "learning_rate": 0.0001436821848569447, + "loss": 1.9898, + "step": 302640 + }, + { + "epoch": 0.71, + "grad_norm": 1.859375, + "learning_rate": 0.00014368052222251765, + "loss": 2.1218, + "step": 302645 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014367885957316853, + "loss": 2.0826, + "step": 302650 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014367719690889792, + "loss": 2.0775, + "step": 302655 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014367553422970638, + "loss": 2.043, + "step": 302660 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014367387153559454, + "loss": 1.9526, + "step": 302665 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014367220882656286, + "loss": 1.9605, + "step": 302670 + }, + { + "epoch": 0.71, + "grad_norm": 8.0, + "learning_rate": 0.00014367054610261196, + "loss": 2.121, + "step": 302675 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014366888336374243, + "loss": 1.9296, + "step": 302680 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.0001436672206099548, + "loss": 1.9865, + "step": 302685 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.0001436655578412497, + "loss": 1.9863, + "step": 302690 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.00014366389505762758, + "loss": 2.0307, + "step": 302695 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014366223225908913, + "loss": 1.9064, + "step": 302700 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014366056944563487, + "loss": 2.0478, + "step": 302705 + }, + { + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 0.00014365890661726535, + "loss": 1.9755, + "step": 302710 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014365724377398114, + "loss": 2.0112, + "step": 302715 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014365558091578286, + "loss": 2.0028, + "step": 302720 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.000143653918042671, + "loss": 1.9888, + "step": 302725 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014365225515464624, + "loss": 2.0216, + "step": 302730 + }, + { + "epoch": 0.71, + "grad_norm": 2.78125, + "learning_rate": 0.00014365059225170901, + "loss": 2.1933, + "step": 302735 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014364892933386, + "loss": 2.0808, + "step": 302740 + }, + { + "epoch": 0.71, + "grad_norm": 2.71875, + "learning_rate": 0.00014364726640109967, + "loss": 1.9521, + "step": 302745 + }, + { + "epoch": 0.71, + "grad_norm": 2.375, + "learning_rate": 0.0001436456034534287, + "loss": 2.1219, + "step": 302750 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014364394049084755, + "loss": 1.9927, + "step": 302755 + }, + { + "epoch": 0.71, + "grad_norm": 1.9375, + "learning_rate": 0.00014364227751335686, + "loss": 1.7838, + "step": 302760 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014364061452095718, + "loss": 1.9626, + "step": 302765 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014363895151364906, + "loss": 1.9932, + "step": 302770 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.0001436372884914331, + "loss": 2.0343, + "step": 302775 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014363562545430982, + "loss": 2.0765, + "step": 302780 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014363396240227983, + "loss": 1.9189, + "step": 302785 + }, + { + "epoch": 0.71, + "grad_norm": 1.921875, + "learning_rate": 0.0001436322993353437, + "loss": 2.0661, + "step": 302790 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014363063625350198, + "loss": 2.1437, + "step": 302795 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014362897315675527, + "loss": 1.9171, + "step": 302800 + }, + { + "epoch": 0.71, + "grad_norm": 1.8359375, + "learning_rate": 0.00014362731004510408, + "loss": 2.1012, + "step": 302805 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.00014362564691854904, + "loss": 1.993, + "step": 302810 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014362398377709066, + "loss": 2.0525, + "step": 302815 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014362232062072953, + "loss": 2.0836, + "step": 302820 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014362065744946622, + "loss": 2.0775, + "step": 302825 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.0001436189942633013, + "loss": 2.1824, + "step": 302830 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.0001436173310622354, + "loss": 2.0118, + "step": 302835 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014361566784626897, + "loss": 1.9457, + "step": 302840 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014361400461540268, + "loss": 2.0286, + "step": 302845 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.000143612341369637, + "loss": 1.9052, + "step": 302850 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.0001436106781089726, + "loss": 2.1098, + "step": 302855 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014360901483340997, + "loss": 2.1136, + "step": 302860 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014360735154294972, + "loss": 2.0461, + "step": 302865 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014360568823759242, + "loss": 2.0413, + "step": 302870 + }, + { + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 0.00014360402491733862, + "loss": 2.1111, + "step": 302875 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014360236158218888, + "loss": 1.8854, + "step": 302880 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.0001436006982321438, + "loss": 2.0745, + "step": 302885 + }, + { + "epoch": 0.71, + "grad_norm": 2.625, + "learning_rate": 0.00014359903486720394, + "loss": 2.0851, + "step": 302890 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014359737148736984, + "loss": 1.9438, + "step": 302895 + }, + { + "epoch": 0.71, + "grad_norm": 1.96875, + "learning_rate": 0.00014359570809264208, + "loss": 1.9517, + "step": 302900 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014359404468302123, + "loss": 1.9254, + "step": 302905 + }, + { + "epoch": 0.71, + "grad_norm": 2.5, + "learning_rate": 0.00014359238125850792, + "loss": 2.1105, + "step": 302910 + }, + { + "epoch": 0.71, + "grad_norm": 2.484375, + "learning_rate": 0.0001435907178191026, + "loss": 1.9546, + "step": 302915 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.0001435890543648059, + "loss": 1.9142, + "step": 302920 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014358739089561842, + "loss": 2.2005, + "step": 302925 + }, + { + "epoch": 0.71, + "grad_norm": 2.578125, + "learning_rate": 0.0001435857274115407, + "loss": 2.2002, + "step": 302930 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.0001435840639125733, + "loss": 2.166, + "step": 302935 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.00014358240039871677, + "loss": 2.0232, + "step": 302940 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014358073686997173, + "loss": 2.0923, + "step": 302945 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.0001435790733263387, + "loss": 2.0389, + "step": 302950 + }, + { + "epoch": 0.71, + "grad_norm": 1.9453125, + "learning_rate": 0.0001435774097678183, + "loss": 2.1296, + "step": 302955 + }, + { + "epoch": 0.71, + "grad_norm": 2.921875, + "learning_rate": 0.00014357574619441104, + "loss": 2.0698, + "step": 302960 + }, + { + "epoch": 0.71, + "grad_norm": 1.9453125, + "learning_rate": 0.00014357408260611752, + "loss": 2.1724, + "step": 302965 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014357241900293826, + "loss": 2.0685, + "step": 302970 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.0001435707553848739, + "loss": 2.0319, + "step": 302975 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014356909175192503, + "loss": 2.016, + "step": 302980 + }, + { + "epoch": 0.71, + "grad_norm": 2.625, + "learning_rate": 0.00014356742810409215, + "loss": 2.0806, + "step": 302985 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.0001435657644413758, + "loss": 2.0574, + "step": 302990 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.0001435641007637766, + "loss": 1.9891, + "step": 302995 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.00014356243707129516, + "loss": 2.0251, + "step": 303000 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014356077336393196, + "loss": 2.1148, + "step": 303005 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014355910964168763, + "loss": 2.2145, + "step": 303010 + }, + { + "epoch": 0.71, + "grad_norm": 1.8671875, + "learning_rate": 0.0001435574459045627, + "loss": 2.0412, + "step": 303015 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014355578215255777, + "loss": 2.1226, + "step": 303020 + }, + { + "epoch": 0.71, + "grad_norm": 1.75, + "learning_rate": 0.00014355411838567342, + "loss": 2.1287, + "step": 303025 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014355245460391015, + "loss": 2.0067, + "step": 303030 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014355079080726862, + "loss": 2.0366, + "step": 303035 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014354912699574926, + "loss": 2.1851, + "step": 303040 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014354746316935283, + "loss": 2.0904, + "step": 303045 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.00014354579932807977, + "loss": 2.2849, + "step": 303050 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014354413547193064, + "loss": 2.1591, + "step": 303055 + }, + { + "epoch": 0.71, + "grad_norm": 2.703125, + "learning_rate": 0.00014354247160090606, + "loss": 2.1389, + "step": 303060 + }, + { + "epoch": 0.71, + "grad_norm": 2.0, + "learning_rate": 0.00014354080771500658, + "loss": 1.918, + "step": 303065 + }, + { + "epoch": 0.71, + "grad_norm": 2.46875, + "learning_rate": 0.0001435391438142328, + "loss": 2.0214, + "step": 303070 + }, + { + "epoch": 0.71, + "grad_norm": 2.546875, + "learning_rate": 0.0001435374798985852, + "loss": 2.2916, + "step": 303075 + }, + { + "epoch": 0.71, + "grad_norm": 2.71875, + "learning_rate": 0.00014353581596806447, + "loss": 2.0588, + "step": 303080 + }, + { + "epoch": 0.71, + "grad_norm": 2.703125, + "learning_rate": 0.00014353415202267107, + "loss": 2.086, + "step": 303085 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.00014353248806240566, + "loss": 2.1723, + "step": 303090 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014353082408726872, + "loss": 2.225, + "step": 303095 + }, + { + "epoch": 0.71, + "grad_norm": 2.640625, + "learning_rate": 0.00014352916009726088, + "loss": 1.9879, + "step": 303100 + }, + { + "epoch": 0.71, + "grad_norm": 1.90625, + "learning_rate": 0.00014352749609238268, + "loss": 1.9254, + "step": 303105 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.0001435258320726347, + "loss": 2.1503, + "step": 303110 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014352416803801755, + "loss": 2.1737, + "step": 303115 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.00014352250398853167, + "loss": 2.1057, + "step": 303120 + }, + { + "epoch": 0.71, + "grad_norm": 2.46875, + "learning_rate": 0.00014352083992417774, + "loss": 2.2102, + "step": 303125 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014351917584495635, + "loss": 2.102, + "step": 303130 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.000143517511750868, + "loss": 1.9824, + "step": 303135 + }, + { + "epoch": 0.71, + "grad_norm": 1.796875, + "learning_rate": 0.00014351584764191327, + "loss": 2.2433, + "step": 303140 + }, + { + "epoch": 0.71, + "grad_norm": 2.0, + "learning_rate": 0.00014351418351809273, + "loss": 2.2658, + "step": 303145 + }, + { + "epoch": 0.71, + "grad_norm": 2.8125, + "learning_rate": 0.00014351251937940693, + "loss": 1.8642, + "step": 303150 + }, + { + "epoch": 0.71, + "grad_norm": 1.7265625, + "learning_rate": 0.0001435108552258565, + "loss": 1.9965, + "step": 303155 + }, + { + "epoch": 0.71, + "grad_norm": 2.53125, + "learning_rate": 0.00014350919105744197, + "loss": 2.2188, + "step": 303160 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014350752687416392, + "loss": 1.9619, + "step": 303165 + }, + { + "epoch": 0.71, + "grad_norm": 2.46875, + "learning_rate": 0.00014350586267602292, + "loss": 2.0202, + "step": 303170 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014350419846301948, + "loss": 2.0207, + "step": 303175 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.00014350253423515424, + "loss": 2.1142, + "step": 303180 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014350086999242775, + "loss": 2.2183, + "step": 303185 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.0001434992057348406, + "loss": 2.1468, + "step": 303190 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014349754146239329, + "loss": 2.0229, + "step": 303195 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.0001434958771750864, + "loss": 1.875, + "step": 303200 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014349421287292063, + "loss": 2.0456, + "step": 303205 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014349254855589637, + "loss": 2.0092, + "step": 303210 + }, + { + "epoch": 0.71, + "grad_norm": 1.8515625, + "learning_rate": 0.00014349088422401432, + "loss": 2.1809, + "step": 303215 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.00014348921987727495, + "loss": 2.0563, + "step": 303220 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.0001434875555156789, + "loss": 2.0745, + "step": 303225 + }, + { + "epoch": 0.71, + "grad_norm": 2.421875, + "learning_rate": 0.00014348589113922667, + "loss": 2.0272, + "step": 303230 + }, + { + "epoch": 0.71, + "grad_norm": 1.765625, + "learning_rate": 0.00014348422674791893, + "loss": 2.139, + "step": 303235 + }, + { + "epoch": 0.71, + "grad_norm": 2.65625, + "learning_rate": 0.00014348256234175617, + "loss": 2.0095, + "step": 303240 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014348089792073896, + "loss": 2.2463, + "step": 303245 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.0001434792334848679, + "loss": 2.0426, + "step": 303250 + }, + { + "epoch": 0.71, + "grad_norm": 2.46875, + "learning_rate": 0.00014347756903414354, + "loss": 2.1813, + "step": 303255 + }, + { + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 0.00014347590456856646, + "loss": 2.1211, + "step": 303260 + }, + { + "epoch": 0.71, + "grad_norm": 3.0, + "learning_rate": 0.0001434742400881372, + "loss": 2.0479, + "step": 303265 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014347257559285638, + "loss": 2.0549, + "step": 303270 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014347091108272454, + "loss": 2.0295, + "step": 303275 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.00014346924655774226, + "loss": 2.0042, + "step": 303280 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014346758201791004, + "loss": 2.0735, + "step": 303285 + }, + { + "epoch": 0.71, + "grad_norm": 2.671875, + "learning_rate": 0.00014346591746322855, + "loss": 2.262, + "step": 303290 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.00014346425289369833, + "loss": 1.9951, + "step": 303295 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014346258830931992, + "loss": 2.1666, + "step": 303300 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014346092371009389, + "loss": 1.9428, + "step": 303305 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014345925909602078, + "loss": 2.0147, + "step": 303310 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014345759446710124, + "loss": 1.9458, + "step": 303315 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.00014345592982333583, + "loss": 1.8953, + "step": 303320 + }, + { + "epoch": 0.71, + "grad_norm": 1.8828125, + "learning_rate": 0.00014345426516472506, + "loss": 2.0758, + "step": 303325 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.0001434526004912695, + "loss": 2.1369, + "step": 303330 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014345093580296975, + "loss": 2.124, + "step": 303335 + }, + { + "epoch": 0.71, + "grad_norm": 2.421875, + "learning_rate": 0.0001434492710998264, + "loss": 1.9712, + "step": 303340 + }, + { + "epoch": 0.71, + "grad_norm": 2.5, + "learning_rate": 0.00014344760638183997, + "loss": 2.1404, + "step": 303345 + }, + { + "epoch": 0.71, + "grad_norm": 2.46875, + "learning_rate": 0.00014344594164901104, + "loss": 1.9894, + "step": 303350 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.0001434442769013402, + "loss": 1.9768, + "step": 303355 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014344261213882802, + "loss": 2.1925, + "step": 303360 + }, + { + "epoch": 0.71, + "grad_norm": 1.953125, + "learning_rate": 0.00014344094736147504, + "loss": 1.9375, + "step": 303365 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.00014343928256928186, + "loss": 2.203, + "step": 303370 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014343761776224902, + "loss": 2.1978, + "step": 303375 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.0001434359529403771, + "loss": 2.0539, + "step": 303380 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.0001434342881036667, + "loss": 1.8214, + "step": 303385 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014343262325211832, + "loss": 1.8581, + "step": 303390 + }, + { + "epoch": 0.71, + "grad_norm": 1.953125, + "learning_rate": 0.00014343095838573256, + "loss": 1.955, + "step": 303395 + }, + { + "epoch": 0.71, + "grad_norm": 1.828125, + "learning_rate": 0.00014342929350451004, + "loss": 1.8323, + "step": 303400 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.00014342762860845127, + "loss": 2.1686, + "step": 303405 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014342596369755684, + "loss": 2.0235, + "step": 303410 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.00014342429877182728, + "loss": 2.0445, + "step": 303415 + }, + { + "epoch": 0.71, + "grad_norm": 2.796875, + "learning_rate": 0.00014342263383126323, + "loss": 2.134, + "step": 303420 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.0001434209688758652, + "loss": 2.1552, + "step": 303425 + }, + { + "epoch": 0.71, + "grad_norm": 2.5625, + "learning_rate": 0.00014341930390563378, + "loss": 1.9434, + "step": 303430 + }, + { + "epoch": 0.71, + "grad_norm": 1.875, + "learning_rate": 0.00014341763892056957, + "loss": 1.7728, + "step": 303435 + }, + { + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 0.00014341597392067307, + "loss": 1.9572, + "step": 303440 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.0001434143089059449, + "loss": 2.1185, + "step": 303445 + }, + { + "epoch": 0.71, + "grad_norm": 1.6171875, + "learning_rate": 0.0001434126438763856, + "loss": 2.2908, + "step": 303450 + }, + { + "epoch": 0.71, + "grad_norm": 2.578125, + "learning_rate": 0.0001434109788319958, + "loss": 1.9933, + "step": 303455 + }, + { + "epoch": 0.71, + "grad_norm": 1.84375, + "learning_rate": 0.00014340931377277599, + "loss": 2.2393, + "step": 303460 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.00014340764869872675, + "loss": 1.9079, + "step": 303465 + }, + { + "epoch": 0.71, + "grad_norm": 1.921875, + "learning_rate": 0.00014340598360984867, + "loss": 2.1444, + "step": 303470 + }, + { + "epoch": 0.71, + "grad_norm": 2.125, + "learning_rate": 0.00014340431850614236, + "loss": 2.0699, + "step": 303475 + }, + { + "epoch": 0.71, + "grad_norm": 4.71875, + "learning_rate": 0.00014340265338760833, + "loss": 2.118, + "step": 303480 + }, + { + "epoch": 0.71, + "grad_norm": 2.015625, + "learning_rate": 0.00014340098825424717, + "loss": 2.1737, + "step": 303485 + }, + { + "epoch": 0.71, + "grad_norm": 1.796875, + "learning_rate": 0.00014339932310605943, + "loss": 1.8828, + "step": 303490 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.0001433976579430457, + "loss": 2.0485, + "step": 303495 + }, + { + "epoch": 0.71, + "grad_norm": 1.953125, + "learning_rate": 0.00014339599276520654, + "loss": 2.132, + "step": 303500 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014339432757254255, + "loss": 2.0095, + "step": 303505 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014339266236505424, + "loss": 1.952, + "step": 303510 + }, + { + "epoch": 0.71, + "grad_norm": 2.234375, + "learning_rate": 0.0001433909971427422, + "loss": 2.0598, + "step": 303515 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014338933190560703, + "loss": 2.216, + "step": 303520 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.00014338766665364928, + "loss": 1.9016, + "step": 303525 + }, + { + "epoch": 0.71, + "grad_norm": 2.171875, + "learning_rate": 0.0001433860013868695, + "loss": 2.1628, + "step": 303530 + }, + { + "epoch": 0.71, + "grad_norm": 1.9375, + "learning_rate": 0.0001433843361052683, + "loss": 2.0023, + "step": 303535 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.0001433826708088462, + "loss": 2.0137, + "step": 303540 + }, + { + "epoch": 0.71, + "grad_norm": 2.078125, + "learning_rate": 0.0001433810054976038, + "loss": 1.9752, + "step": 303545 + }, + { + "epoch": 0.71, + "grad_norm": 2.203125, + "learning_rate": 0.00014337934017154166, + "loss": 2.261, + "step": 303550 + }, + { + "epoch": 0.71, + "grad_norm": 1.90625, + "learning_rate": 0.00014337767483066035, + "loss": 2.0735, + "step": 303555 + }, + { + "epoch": 0.71, + "grad_norm": 1.8828125, + "learning_rate": 0.00014337600947496044, + "loss": 2.1116, + "step": 303560 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014337434410444252, + "loss": 2.1812, + "step": 303565 + }, + { + "epoch": 0.71, + "grad_norm": 2.359375, + "learning_rate": 0.0001433726787191071, + "loss": 1.9873, + "step": 303570 + }, + { + "epoch": 0.71, + "grad_norm": 2.4375, + "learning_rate": 0.00014337101331895482, + "loss": 2.2001, + "step": 303575 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.0001433693479039862, + "loss": 2.0976, + "step": 303580 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.0001433676824742018, + "loss": 1.9449, + "step": 303585 + }, + { + "epoch": 0.71, + "grad_norm": 2.1875, + "learning_rate": 0.00014336601702960228, + "loss": 2.0853, + "step": 303590 + }, + { + "epoch": 0.71, + "grad_norm": 2.40625, + "learning_rate": 0.00014336435157018807, + "loss": 2.0873, + "step": 303595 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014336268609595987, + "loss": 2.0017, + "step": 303600 + }, + { + "epoch": 0.71, + "grad_norm": 2.4375, + "learning_rate": 0.00014336102060691813, + "loss": 1.9999, + "step": 303605 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.0001433593551030635, + "loss": 2.2478, + "step": 303610 + }, + { + "epoch": 0.71, + "grad_norm": 2.375, + "learning_rate": 0.00014335768958439657, + "loss": 1.8364, + "step": 303615 + }, + { + "epoch": 0.71, + "grad_norm": 2.6875, + "learning_rate": 0.00014335602405091783, + "loss": 1.949, + "step": 303620 + }, + { + "epoch": 0.71, + "grad_norm": 1.890625, + "learning_rate": 0.0001433543585026279, + "loss": 1.8906, + "step": 303625 + }, + { + "epoch": 0.71, + "grad_norm": 1.8359375, + "learning_rate": 0.00014335269293952728, + "loss": 1.9855, + "step": 303630 + }, + { + "epoch": 0.71, + "grad_norm": 1.7734375, + "learning_rate": 0.00014335102736161667, + "loss": 2.0574, + "step": 303635 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.00014334936176889652, + "loss": 2.1376, + "step": 303640 + }, + { + "epoch": 0.71, + "grad_norm": 2.0625, + "learning_rate": 0.00014334769616136748, + "loss": 1.9932, + "step": 303645 + }, + { + "epoch": 0.71, + "grad_norm": 2.265625, + "learning_rate": 0.00014334603053903005, + "loss": 1.9805, + "step": 303650 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.00014334436490188483, + "loss": 1.9983, + "step": 303655 + }, + { + "epoch": 0.71, + "grad_norm": 2.546875, + "learning_rate": 0.0001433426992499324, + "loss": 2.0983, + "step": 303660 + }, + { + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 0.0001433410335831733, + "loss": 2.1676, + "step": 303665 + }, + { + "epoch": 0.71, + "grad_norm": 2.875, + "learning_rate": 0.00014333936790160814, + "loss": 2.0734, + "step": 303670 + }, + { + "epoch": 0.71, + "grad_norm": 2.34375, + "learning_rate": 0.00014333770220523745, + "loss": 1.9731, + "step": 303675 + }, + { + "epoch": 0.71, + "grad_norm": 1.828125, + "learning_rate": 0.00014333603649406183, + "loss": 2.2596, + "step": 303680 + }, + { + "epoch": 0.71, + "grad_norm": 2.328125, + "learning_rate": 0.0001433343707680818, + "loss": 2.0149, + "step": 303685 + }, + { + "epoch": 0.71, + "grad_norm": 2.28125, + "learning_rate": 0.000143332705027298, + "loss": 2.0909, + "step": 303690 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014333103927171095, + "loss": 2.2844, + "step": 303695 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014332937350132122, + "loss": 1.9932, + "step": 303700 + }, + { + "epoch": 0.71, + "grad_norm": 2.546875, + "learning_rate": 0.0001433277077161294, + "loss": 2.0298, + "step": 303705 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014332604191613605, + "loss": 2.0068, + "step": 303710 + }, + { + "epoch": 0.71, + "grad_norm": 1.96875, + "learning_rate": 0.00014332437610134173, + "loss": 2.172, + "step": 303715 + }, + { + "epoch": 0.71, + "grad_norm": 2.0, + "learning_rate": 0.00014332271027174705, + "loss": 2.0211, + "step": 303720 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.0001433210444273525, + "loss": 2.114, + "step": 303725 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014331937856815874, + "loss": 2.1477, + "step": 303730 + }, + { + "epoch": 0.71, + "grad_norm": 2.484375, + "learning_rate": 0.00014331771269416628, + "loss": 2.0107, + "step": 303735 + }, + { + "epoch": 0.71, + "grad_norm": 2.03125, + "learning_rate": 0.00014331604680537568, + "loss": 2.0468, + "step": 303740 + }, + { + "epoch": 0.71, + "grad_norm": 1.9609375, + "learning_rate": 0.00014331438090178753, + "loss": 2.0531, + "step": 303745 + }, + { + "epoch": 0.71, + "grad_norm": 1.9296875, + "learning_rate": 0.00014331271498340244, + "loss": 1.9449, + "step": 303750 + }, + { + "epoch": 0.71, + "grad_norm": 2.046875, + "learning_rate": 0.0001433110490502209, + "loss": 2.1722, + "step": 303755 + }, + { + "epoch": 0.71, + "grad_norm": 2.578125, + "learning_rate": 0.00014330938310224358, + "loss": 2.0206, + "step": 303760 + }, + { + "epoch": 0.71, + "grad_norm": 2.484375, + "learning_rate": 0.0001433077171394709, + "loss": 2.2092, + "step": 303765 + }, + { + "epoch": 0.71, + "grad_norm": 2.09375, + "learning_rate": 0.0001433060511619036, + "loss": 1.9884, + "step": 303770 + }, + { + "epoch": 0.71, + "grad_norm": 2.140625, + "learning_rate": 0.00014330438516954214, + "loss": 2.014, + "step": 303775 + }, + { + "epoch": 0.71, + "grad_norm": 1.9765625, + "learning_rate": 0.0001433027191623871, + "loss": 2.05, + "step": 303780 + }, + { + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 0.0001433010531404391, + "loss": 1.9961, + "step": 303785 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014329938710369862, + "loss": 2.1853, + "step": 303790 + }, + { + "epoch": 0.71, + "grad_norm": 2.21875, + "learning_rate": 0.00014329772105216635, + "loss": 2.1806, + "step": 303795 + }, + { + "epoch": 0.71, + "grad_norm": 1.9375, + "learning_rate": 0.00014329605498584276, + "loss": 2.141, + "step": 303800 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014329438890472844, + "loss": 2.2535, + "step": 303805 + }, + { + "epoch": 0.71, + "grad_norm": 2.15625, + "learning_rate": 0.000143292722808824, + "loss": 2.0436, + "step": 303810 + }, + { + "epoch": 0.71, + "grad_norm": 2.296875, + "learning_rate": 0.00014329105669812995, + "loss": 2.2039, + "step": 303815 + }, + { + "epoch": 0.71, + "grad_norm": 2.109375, + "learning_rate": 0.0001432893905726469, + "loss": 2.0445, + "step": 303820 + }, + { + "epoch": 0.71, + "grad_norm": 2.25, + "learning_rate": 0.00014328772443237541, + "loss": 2.3044, + "step": 303825 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014328605827731606, + "loss": 2.0296, + "step": 303830 + }, + { + "epoch": 0.72, + "grad_norm": 2.8125, + "learning_rate": 0.0001432843921074694, + "loss": 2.1582, + "step": 303835 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014328272592283603, + "loss": 2.0215, + "step": 303840 + }, + { + "epoch": 0.72, + "grad_norm": 1.796875, + "learning_rate": 0.00014328105972341646, + "loss": 2.1022, + "step": 303845 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.0001432793935092113, + "loss": 2.11, + "step": 303850 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.00014327772728022113, + "loss": 2.0909, + "step": 303855 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014327606103644647, + "loss": 1.9843, + "step": 303860 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.000143274394777888, + "loss": 1.9786, + "step": 303865 + }, + { + "epoch": 0.72, + "grad_norm": 2.703125, + "learning_rate": 0.00014327272850454614, + "loss": 2.0468, + "step": 303870 + }, + { + "epoch": 0.72, + "grad_norm": 2.640625, + "learning_rate": 0.00014327106221642154, + "loss": 1.898, + "step": 303875 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.00014326939591351477, + "loss": 2.0834, + "step": 303880 + }, + { + "epoch": 0.72, + "grad_norm": 2.515625, + "learning_rate": 0.00014326772959582642, + "loss": 2.1002, + "step": 303885 + }, + { + "epoch": 0.72, + "grad_norm": 1.9765625, + "learning_rate": 0.00014326606326335698, + "loss": 2.0994, + "step": 303890 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.00014326439691610712, + "loss": 2.1374, + "step": 303895 + }, + { + "epoch": 0.72, + "grad_norm": 2.703125, + "learning_rate": 0.0001432627305540773, + "loss": 2.0181, + "step": 303900 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014326106417726818, + "loss": 2.0875, + "step": 303905 + }, + { + "epoch": 0.72, + "grad_norm": 2.9375, + "learning_rate": 0.00014325939778568028, + "loss": 2.0466, + "step": 303910 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.0001432577313793142, + "loss": 2.3566, + "step": 303915 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.0001432560649581705, + "loss": 1.9995, + "step": 303920 + }, + { + "epoch": 0.72, + "grad_norm": 1.984375, + "learning_rate": 0.0001432543985222497, + "loss": 2.1571, + "step": 303925 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014325273207155248, + "loss": 2.0677, + "step": 303930 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.0001432510656060793, + "loss": 2.1883, + "step": 303935 + }, + { + "epoch": 0.72, + "grad_norm": 1.8984375, + "learning_rate": 0.00014324939912583077, + "loss": 2.2263, + "step": 303940 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014324773263080747, + "loss": 2.1445, + "step": 303945 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014324606612100996, + "loss": 1.8759, + "step": 303950 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.0001432443995964388, + "loss": 1.925, + "step": 303955 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.0001432427330570946, + "loss": 2.074, + "step": 303960 + }, + { + "epoch": 0.72, + "grad_norm": 2.828125, + "learning_rate": 0.00014324106650297785, + "loss": 2.1232, + "step": 303965 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.0001432393999340892, + "loss": 1.9404, + "step": 303970 + }, + { + "epoch": 0.72, + "grad_norm": 2.46875, + "learning_rate": 0.00014323773335042918, + "loss": 2.0177, + "step": 303975 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014323606675199836, + "loss": 2.1004, + "step": 303980 + }, + { + "epoch": 0.72, + "grad_norm": 2.671875, + "learning_rate": 0.00014323440013879734, + "loss": 1.9848, + "step": 303985 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014323273351082665, + "loss": 2.2051, + "step": 303990 + }, + { + "epoch": 0.72, + "grad_norm": 2.4375, + "learning_rate": 0.00014323106686808687, + "loss": 2.0672, + "step": 303995 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014322940021057858, + "loss": 2.2707, + "step": 304000 + }, + { + "epoch": 0.72, + "grad_norm": 1.96875, + "learning_rate": 0.00014322773353830231, + "loss": 2.0675, + "step": 304005 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.0001432260668512587, + "loss": 2.1738, + "step": 304010 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.00014322440014944828, + "loss": 1.9911, + "step": 304015 + }, + { + "epoch": 0.72, + "grad_norm": 1.921875, + "learning_rate": 0.0001432227334328716, + "loss": 2.0263, + "step": 304020 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014322106670152927, + "loss": 1.8386, + "step": 304025 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014321939995542185, + "loss": 1.9097, + "step": 304030 + }, + { + "epoch": 0.72, + "grad_norm": 2.53125, + "learning_rate": 0.00014321773319454984, + "loss": 2.1332, + "step": 304035 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.00014321606641891393, + "loss": 2.1626, + "step": 304040 + }, + { + "epoch": 0.72, + "grad_norm": 1.7578125, + "learning_rate": 0.00014321439962851462, + "loss": 1.9062, + "step": 304045 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014321273282335247, + "loss": 2.1051, + "step": 304050 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.0001432110660034281, + "loss": 2.0232, + "step": 304055 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.000143209399168742, + "loss": 2.1878, + "step": 304060 + }, + { + "epoch": 0.72, + "grad_norm": 2.84375, + "learning_rate": 0.0001432077323192948, + "loss": 2.0063, + "step": 304065 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014320606545508707, + "loss": 1.9283, + "step": 304070 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014320439857611934, + "loss": 1.9346, + "step": 304075 + }, + { + "epoch": 0.72, + "grad_norm": 2.609375, + "learning_rate": 0.00014320273168239223, + "loss": 1.9911, + "step": 304080 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014320106477390624, + "loss": 2.1609, + "step": 304085 + }, + { + "epoch": 0.72, + "grad_norm": 2.859375, + "learning_rate": 0.00014319939785066202, + "loss": 1.8027, + "step": 304090 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.0001431977309126601, + "loss": 1.8936, + "step": 304095 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014319606395990107, + "loss": 2.1335, + "step": 304100 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014319439699238544, + "loss": 2.1104, + "step": 304105 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014319273001011385, + "loss": 2.1344, + "step": 304110 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014319106301308683, + "loss": 1.9648, + "step": 304115 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.00014318939600130494, + "loss": 1.9399, + "step": 304120 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.0001431877289747688, + "loss": 1.8413, + "step": 304125 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014318606193347892, + "loss": 1.9813, + "step": 304130 + }, + { + "epoch": 0.72, + "grad_norm": 2.640625, + "learning_rate": 0.00014318439487743593, + "loss": 2.0324, + "step": 304135 + }, + { + "epoch": 0.72, + "grad_norm": 2.921875, + "learning_rate": 0.00014318272780664034, + "loss": 2.0843, + "step": 304140 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014318106072109277, + "loss": 2.2884, + "step": 304145 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.00014317939362079376, + "loss": 2.0579, + "step": 304150 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014317772650574389, + "loss": 1.9667, + "step": 304155 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.0001431760593759437, + "loss": 2.113, + "step": 304160 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014317439223139383, + "loss": 2.1486, + "step": 304165 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014317272507209476, + "loss": 2.0995, + "step": 304170 + }, + { + "epoch": 0.72, + "grad_norm": 1.8515625, + "learning_rate": 0.0001431710578980471, + "loss": 1.8435, + "step": 304175 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014316939070925145, + "loss": 2.1553, + "step": 304180 + }, + { + "epoch": 0.72, + "grad_norm": 1.953125, + "learning_rate": 0.00014316772350570837, + "loss": 2.0621, + "step": 304185 + }, + { + "epoch": 0.72, + "grad_norm": 2.40625, + "learning_rate": 0.0001431660562874184, + "loss": 1.9746, + "step": 304190 + }, + { + "epoch": 0.72, + "grad_norm": 2.609375, + "learning_rate": 0.00014316438905438207, + "loss": 2.1889, + "step": 304195 + }, + { + "epoch": 0.72, + "grad_norm": 1.984375, + "learning_rate": 0.00014316272180660007, + "loss": 2.1378, + "step": 304200 + }, + { + "epoch": 0.72, + "grad_norm": 2.484375, + "learning_rate": 0.00014316105454407287, + "loss": 2.217, + "step": 304205 + }, + { + "epoch": 0.72, + "grad_norm": 2.609375, + "learning_rate": 0.00014315938726680108, + "loss": 2.1669, + "step": 304210 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014315771997478526, + "loss": 2.2743, + "step": 304215 + }, + { + "epoch": 0.72, + "grad_norm": 1.9765625, + "learning_rate": 0.00014315605266802597, + "loss": 2.1374, + "step": 304220 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.0001431543853465238, + "loss": 1.9643, + "step": 304225 + }, + { + "epoch": 0.72, + "grad_norm": 2.65625, + "learning_rate": 0.0001431527180102793, + "loss": 2.1674, + "step": 304230 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014315105065929306, + "loss": 1.9656, + "step": 304235 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014314938329356564, + "loss": 2.1254, + "step": 304240 + }, + { + "epoch": 0.72, + "grad_norm": 1.984375, + "learning_rate": 0.0001431477159130976, + "loss": 2.07, + "step": 304245 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014314604851788952, + "loss": 2.1308, + "step": 304250 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014314438110794197, + "loss": 2.2004, + "step": 304255 + }, + { + "epoch": 0.72, + "grad_norm": 1.7578125, + "learning_rate": 0.00014314271368325553, + "loss": 2.0972, + "step": 304260 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014314104624383074, + "loss": 2.0829, + "step": 304265 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014313937878966817, + "loss": 2.053, + "step": 304270 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014313771132076842, + "loss": 2.0005, + "step": 304275 + }, + { + "epoch": 0.72, + "grad_norm": 2.59375, + "learning_rate": 0.00014313604383713206, + "loss": 2.1288, + "step": 304280 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014313437633875964, + "loss": 1.8792, + "step": 304285 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014313270882565174, + "loss": 2.0933, + "step": 304290 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014313104129780893, + "loss": 2.1801, + "step": 304295 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.0001431293737552317, + "loss": 2.2623, + "step": 304300 + }, + { + "epoch": 0.72, + "grad_norm": 1.9375, + "learning_rate": 0.00014312770619792078, + "loss": 2.0246, + "step": 304305 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014312603862587662, + "loss": 2.0999, + "step": 304310 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014312437103909982, + "loss": 2.1738, + "step": 304315 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014312270343759098, + "loss": 1.9748, + "step": 304320 + }, + { + "epoch": 0.72, + "grad_norm": 1.9453125, + "learning_rate": 0.00014312103582135062, + "loss": 1.9437, + "step": 304325 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014311936819037932, + "loss": 2.0798, + "step": 304330 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.0001431177005446777, + "loss": 1.8808, + "step": 304335 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014311603288424626, + "loss": 2.1885, + "step": 304340 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014311436520908561, + "loss": 2.0243, + "step": 304345 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014311269751919633, + "loss": 1.9265, + "step": 304350 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.0001431110298145789, + "loss": 2.0052, + "step": 304355 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014310936209523402, + "loss": 2.0764, + "step": 304360 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.0001431076943611622, + "loss": 1.9999, + "step": 304365 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.000143106026612364, + "loss": 2.1506, + "step": 304370 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014310435884884, + "loss": 2.0238, + "step": 304375 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014310269107059077, + "loss": 1.9471, + "step": 304380 + }, + { + "epoch": 0.72, + "grad_norm": 2.484375, + "learning_rate": 0.00014310102327761686, + "loss": 2.2502, + "step": 304385 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.0001430993554699189, + "loss": 2.0177, + "step": 304390 + }, + { + "epoch": 0.72, + "grad_norm": 2.578125, + "learning_rate": 0.0001430976876474974, + "loss": 2.2974, + "step": 304395 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014309601981035296, + "loss": 2.1106, + "step": 304400 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014309435195848611, + "loss": 2.1441, + "step": 304405 + }, + { + "epoch": 0.72, + "grad_norm": 2.640625, + "learning_rate": 0.00014309268409189743, + "loss": 2.1681, + "step": 304410 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014309101621058756, + "loss": 2.198, + "step": 304415 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.000143089348314557, + "loss": 1.9463, + "step": 304420 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014308768040380634, + "loss": 2.1842, + "step": 304425 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014308601247833613, + "loss": 2.119, + "step": 304430 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014308434453814696, + "loss": 2.2007, + "step": 304435 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014308267658323938, + "loss": 2.1789, + "step": 304440 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.000143081008613614, + "loss": 1.9052, + "step": 304445 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014307934062927137, + "loss": 2.0705, + "step": 304450 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014307767263021208, + "loss": 1.8782, + "step": 304455 + }, + { + "epoch": 0.72, + "grad_norm": 1.7578125, + "learning_rate": 0.00014307600461643664, + "loss": 1.9987, + "step": 304460 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014307433658794565, + "loss": 1.8904, + "step": 304465 + }, + { + "epoch": 0.72, + "grad_norm": 2.40625, + "learning_rate": 0.0001430726685447397, + "loss": 1.932, + "step": 304470 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014307100048681937, + "loss": 2.0982, + "step": 304475 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.00014306933241418516, + "loss": 2.0138, + "step": 304480 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.0001430676643268377, + "loss": 2.0082, + "step": 304485 + }, + { + "epoch": 0.72, + "grad_norm": 2.5, + "learning_rate": 0.00014306599622477753, + "loss": 2.0378, + "step": 304490 + }, + { + "epoch": 0.72, + "grad_norm": 2.46875, + "learning_rate": 0.00014306432810800524, + "loss": 2.0569, + "step": 304495 + }, + { + "epoch": 0.72, + "grad_norm": 2.484375, + "learning_rate": 0.00014306265997652144, + "loss": 2.0449, + "step": 304500 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014306099183032664, + "loss": 2.0991, + "step": 304505 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014305932366942138, + "loss": 2.0694, + "step": 304510 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.0001430576554938063, + "loss": 1.9928, + "step": 304515 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014305598730348195, + "loss": 2.0836, + "step": 304520 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014305431909844888, + "loss": 2.0593, + "step": 304525 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.0001430526508787077, + "loss": 2.0441, + "step": 304530 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014305098264425894, + "loss": 1.8469, + "step": 304535 + }, + { + "epoch": 0.72, + "grad_norm": 1.9921875, + "learning_rate": 0.00014304931439510315, + "loss": 1.9566, + "step": 304540 + }, + { + "epoch": 0.72, + "grad_norm": 1.9296875, + "learning_rate": 0.00014304764613124097, + "loss": 1.9755, + "step": 304545 + }, + { + "epoch": 0.72, + "grad_norm": 1.703125, + "learning_rate": 0.00014304597785267292, + "loss": 2.1876, + "step": 304550 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.0001430443095593996, + "loss": 2.1827, + "step": 304555 + }, + { + "epoch": 0.72, + "grad_norm": 2.515625, + "learning_rate": 0.00014304264125142155, + "loss": 1.9064, + "step": 304560 + }, + { + "epoch": 0.72, + "grad_norm": 2.515625, + "learning_rate": 0.00014304097292873935, + "loss": 2.1059, + "step": 304565 + }, + { + "epoch": 0.72, + "grad_norm": 2.5625, + "learning_rate": 0.00014303930459135357, + "loss": 1.9911, + "step": 304570 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.0001430376362392648, + "loss": 1.9724, + "step": 304575 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014303596787247356, + "loss": 1.9517, + "step": 304580 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014303429949098052, + "loss": 2.2353, + "step": 304585 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014303263109478613, + "loss": 2.084, + "step": 304590 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.000143030962683891, + "loss": 2.0283, + "step": 304595 + }, + { + "epoch": 0.72, + "grad_norm": 2.640625, + "learning_rate": 0.00014302929425829574, + "loss": 2.1915, + "step": 304600 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014302762581800088, + "loss": 2.1898, + "step": 304605 + }, + { + "epoch": 0.72, + "grad_norm": 2.640625, + "learning_rate": 0.000143025957363007, + "loss": 2.2147, + "step": 304610 + }, + { + "epoch": 0.72, + "grad_norm": 2.40625, + "learning_rate": 0.00014302428889331474, + "loss": 2.1922, + "step": 304615 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.0001430226204089245, + "loss": 2.0287, + "step": 304620 + }, + { + "epoch": 0.72, + "grad_norm": 2.5, + "learning_rate": 0.00014302095190983703, + "loss": 2.0247, + "step": 304625 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014301928339605278, + "loss": 2.3038, + "step": 304630 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.0001430176148675724, + "loss": 2.063, + "step": 304635 + }, + { + "epoch": 0.72, + "grad_norm": 2.53125, + "learning_rate": 0.0001430159463243964, + "loss": 2.1162, + "step": 304640 + }, + { + "epoch": 0.72, + "grad_norm": 1.65625, + "learning_rate": 0.00014301427776652534, + "loss": 1.8824, + "step": 304645 + }, + { + "epoch": 0.72, + "grad_norm": 1.875, + "learning_rate": 0.00014301260919395985, + "loss": 1.8499, + "step": 304650 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014301094060670047, + "loss": 1.8817, + "step": 304655 + }, + { + "epoch": 0.72, + "grad_norm": 2.53125, + "learning_rate": 0.00014300927200474779, + "loss": 2.0574, + "step": 304660 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014300760338810235, + "loss": 1.8891, + "step": 304665 + }, + { + "epoch": 0.72, + "grad_norm": 1.8515625, + "learning_rate": 0.00014300593475676473, + "loss": 1.9338, + "step": 304670 + }, + { + "epoch": 0.72, + "grad_norm": 2.703125, + "learning_rate": 0.00014300426611073549, + "loss": 2.0774, + "step": 304675 + }, + { + "epoch": 0.72, + "grad_norm": 1.8359375, + "learning_rate": 0.00014300259745001523, + "loss": 2.0321, + "step": 304680 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.0001430009287746045, + "loss": 2.098, + "step": 304685 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.0001429992600845039, + "loss": 2.0476, + "step": 304690 + }, + { + "epoch": 0.72, + "grad_norm": 3.375, + "learning_rate": 0.00014299759137971393, + "loss": 2.0167, + "step": 304695 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014299592266023522, + "loss": 2.3073, + "step": 304700 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014299425392606832, + "loss": 2.061, + "step": 304705 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.0001429925851772138, + "loss": 1.9975, + "step": 304710 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014299091641367223, + "loss": 1.9167, + "step": 304715 + }, + { + "epoch": 0.72, + "grad_norm": 2.46875, + "learning_rate": 0.00014298924763544418, + "loss": 2.1343, + "step": 304720 + }, + { + "epoch": 0.72, + "grad_norm": 1.9296875, + "learning_rate": 0.00014298757884253024, + "loss": 2.1778, + "step": 304725 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014298591003493097, + "loss": 2.033, + "step": 304730 + }, + { + "epoch": 0.72, + "grad_norm": 2.703125, + "learning_rate": 0.00014298424121264694, + "loss": 2.025, + "step": 304735 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014298257237567867, + "loss": 1.9926, + "step": 304740 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014298090352402683, + "loss": 1.9044, + "step": 304745 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.0001429792346576919, + "loss": 1.9574, + "step": 304750 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014297756577667448, + "loss": 2.1493, + "step": 304755 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014297589688097513, + "loss": 1.9625, + "step": 304760 + }, + { + "epoch": 0.72, + "grad_norm": 2.4375, + "learning_rate": 0.00014297422797059447, + "loss": 2.0777, + "step": 304765 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.000142972559045533, + "loss": 2.0199, + "step": 304770 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.0001429708901057914, + "loss": 1.9284, + "step": 304775 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.0001429692211513701, + "loss": 2.1889, + "step": 304780 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014296755218226972, + "loss": 2.0549, + "step": 304785 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014296588319849085, + "loss": 2.2451, + "step": 304790 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.0001429642142000341, + "loss": 1.9775, + "step": 304795 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014296254518689997, + "loss": 1.8866, + "step": 304800 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014296087615908905, + "loss": 2.0408, + "step": 304805 + }, + { + "epoch": 0.72, + "grad_norm": 2.59375, + "learning_rate": 0.0001429592071166019, + "loss": 2.0228, + "step": 304810 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014295753805943913, + "loss": 2.0833, + "step": 304815 + }, + { + "epoch": 0.72, + "grad_norm": 2.5, + "learning_rate": 0.00014295586898760128, + "loss": 2.2544, + "step": 304820 + }, + { + "epoch": 0.72, + "grad_norm": 2.59375, + "learning_rate": 0.00014295419990108897, + "loss": 2.1203, + "step": 304825 + }, + { + "epoch": 0.72, + "grad_norm": 1.90625, + "learning_rate": 0.00014295253079990268, + "loss": 2.0007, + "step": 304830 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.000142950861684043, + "loss": 2.1935, + "step": 304835 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014294919255351056, + "loss": 2.0185, + "step": 304840 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.0001429475234083059, + "loss": 1.9652, + "step": 304845 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.0001429458542484296, + "loss": 1.9285, + "step": 304850 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014294418507388218, + "loss": 2.1764, + "step": 304855 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014294251588466427, + "loss": 2.1113, + "step": 304860 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014294084668077642, + "loss": 2.0657, + "step": 304865 + }, + { + "epoch": 0.72, + "grad_norm": 1.96875, + "learning_rate": 0.00014293917746221918, + "loss": 2.2615, + "step": 304870 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014293750822899314, + "loss": 2.0575, + "step": 304875 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.0001429358389810989, + "loss": 1.8993, + "step": 304880 + }, + { + "epoch": 0.72, + "grad_norm": 1.9375, + "learning_rate": 0.000142934169718537, + "loss": 2.0791, + "step": 304885 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014293250044130796, + "loss": 2.0526, + "step": 304890 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.0001429308311494124, + "loss": 2.0555, + "step": 304895 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014292916184285092, + "loss": 2.0618, + "step": 304900 + }, + { + "epoch": 0.72, + "grad_norm": 2.4375, + "learning_rate": 0.00014292749252162404, + "loss": 1.8578, + "step": 304905 + }, + { + "epoch": 0.72, + "grad_norm": 1.953125, + "learning_rate": 0.0001429258231857324, + "loss": 2.2018, + "step": 304910 + }, + { + "epoch": 0.72, + "grad_norm": 2.546875, + "learning_rate": 0.00014292415383517648, + "loss": 2.0685, + "step": 304915 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014292248446995688, + "loss": 2.0399, + "step": 304920 + }, + { + "epoch": 0.72, + "grad_norm": 2.484375, + "learning_rate": 0.0001429208150900742, + "loss": 2.1109, + "step": 304925 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.000142919145695529, + "loss": 2.0897, + "step": 304930 + }, + { + "epoch": 0.72, + "grad_norm": 1.90625, + "learning_rate": 0.00014291747628632185, + "loss": 2.1197, + "step": 304935 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014291580686245328, + "loss": 2.0543, + "step": 304940 + }, + { + "epoch": 0.72, + "grad_norm": 2.828125, + "learning_rate": 0.00014291413742392386, + "loss": 2.1713, + "step": 304945 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014291246797073424, + "loss": 2.075, + "step": 304950 + }, + { + "epoch": 0.72, + "grad_norm": 1.84375, + "learning_rate": 0.000142910798502885, + "loss": 2.0614, + "step": 304955 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014290912902037656, + "loss": 1.9064, + "step": 304960 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.0001429074595232096, + "loss": 1.9285, + "step": 304965 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014290579001138468, + "loss": 2.0665, + "step": 304970 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.0001429041204849024, + "loss": 2.0989, + "step": 304975 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014290245094376327, + "loss": 1.885, + "step": 304980 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014290078138796787, + "loss": 1.921, + "step": 304985 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.0001428991118175168, + "loss": 2.0689, + "step": 304990 + }, + { + "epoch": 0.72, + "grad_norm": 2.46875, + "learning_rate": 0.00014289744223241061, + "loss": 1.9754, + "step": 304995 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.0001428957726326499, + "loss": 1.7303, + "step": 305000 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014289410301823518, + "loss": 2.0573, + "step": 305005 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014289243338916707, + "loss": 2.0512, + "step": 305010 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014289076374544613, + "loss": 2.1155, + "step": 305015 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014288909408707292, + "loss": 2.08, + "step": 305020 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.000142887424414048, + "loss": 1.9861, + "step": 305025 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014288575472637201, + "loss": 2.1788, + "step": 305030 + }, + { + "epoch": 0.72, + "grad_norm": 1.9453125, + "learning_rate": 0.00014288408502404542, + "loss": 2.1183, + "step": 305035 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014288241530706886, + "loss": 2.1404, + "step": 305040 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014288074557544292, + "loss": 1.9056, + "step": 305045 + }, + { + "epoch": 0.72, + "grad_norm": 2.515625, + "learning_rate": 0.00014287907582916808, + "loss": 2.0771, + "step": 305050 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014287740606824502, + "loss": 2.0497, + "step": 305055 + }, + { + "epoch": 0.72, + "grad_norm": 2.765625, + "learning_rate": 0.00014287573629267425, + "loss": 2.2853, + "step": 305060 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014287406650245635, + "loss": 2.1943, + "step": 305065 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.0001428723966975919, + "loss": 2.0439, + "step": 305070 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014287072687808145, + "loss": 2.1368, + "step": 305075 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014286905704392555, + "loss": 1.8581, + "step": 305080 + }, + { + "epoch": 0.72, + "grad_norm": 2.640625, + "learning_rate": 0.00014286738719512484, + "loss": 2.0301, + "step": 305085 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.00014286571733167986, + "loss": 2.1314, + "step": 305090 + }, + { + "epoch": 0.72, + "grad_norm": 2.53125, + "learning_rate": 0.00014286404745359117, + "loss": 2.1, + "step": 305095 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014286237756085934, + "loss": 2.0653, + "step": 305100 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.0001428607076534849, + "loss": 2.143, + "step": 305105 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014285903773146854, + "loss": 2.1858, + "step": 305110 + }, + { + "epoch": 0.72, + "grad_norm": 1.7109375, + "learning_rate": 0.00014285736779481073, + "loss": 2.0581, + "step": 305115 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014285569784351206, + "loss": 2.1275, + "step": 305120 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.0001428540278775731, + "loss": 2.0709, + "step": 305125 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.0001428523578969944, + "loss": 1.9986, + "step": 305130 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014285068790177661, + "loss": 1.9354, + "step": 305135 + }, + { + "epoch": 0.72, + "grad_norm": 2.84375, + "learning_rate": 0.00014284901789192022, + "loss": 2.0215, + "step": 305140 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014284734786742585, + "loss": 1.8991, + "step": 305145 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014284567782829401, + "loss": 1.9935, + "step": 305150 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014284400777452534, + "loss": 2.0718, + "step": 305155 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014284233770612038, + "loss": 1.8293, + "step": 305160 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014284066762307968, + "loss": 1.8396, + "step": 305165 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014283899752540383, + "loss": 1.9917, + "step": 305170 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014283732741309342, + "loss": 1.8675, + "step": 305175 + }, + { + "epoch": 0.72, + "grad_norm": 1.8515625, + "learning_rate": 0.00014283565728614902, + "loss": 2.0443, + "step": 305180 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014283398714457114, + "loss": 2.1443, + "step": 305185 + }, + { + "epoch": 0.72, + "grad_norm": 1.90625, + "learning_rate": 0.0001428323169883604, + "loss": 2.0559, + "step": 305190 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.0001428306468175174, + "loss": 2.1528, + "step": 305195 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014282897663204263, + "loss": 1.9936, + "step": 305200 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014282730643193676, + "loss": 2.2637, + "step": 305205 + }, + { + "epoch": 0.72, + "grad_norm": 2.4375, + "learning_rate": 0.00014282563621720025, + "loss": 2.0981, + "step": 305210 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014282396598783373, + "loss": 2.1883, + "step": 305215 + }, + { + "epoch": 0.72, + "grad_norm": 2.4375, + "learning_rate": 0.00014282229574383778, + "loss": 1.9366, + "step": 305220 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014282062548521299, + "loss": 2.0208, + "step": 305225 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014281895521195986, + "loss": 1.848, + "step": 305230 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.000142817284924079, + "loss": 2.1894, + "step": 305235 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014281561462157097, + "loss": 2.034, + "step": 305240 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014281394430443637, + "loss": 2.0276, + "step": 305245 + }, + { + "epoch": 0.72, + "grad_norm": 2.40625, + "learning_rate": 0.00014281227397267576, + "loss": 1.9916, + "step": 305250 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014281060362628968, + "loss": 2.0603, + "step": 305255 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014280893326527874, + "loss": 2.0501, + "step": 305260 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014280726288964346, + "loss": 2.2883, + "step": 305265 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.0001428055924993845, + "loss": 1.961, + "step": 305270 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.0001428039220945023, + "loss": 2.1463, + "step": 305275 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014280225167499755, + "loss": 2.0295, + "step": 305280 + }, + { + "epoch": 0.72, + "grad_norm": 2.625, + "learning_rate": 0.00014280058124087076, + "loss": 2.1542, + "step": 305285 + }, + { + "epoch": 0.72, + "grad_norm": 2.46875, + "learning_rate": 0.00014279891079212252, + "loss": 2.0429, + "step": 305290 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014279724032875338, + "loss": 2.0286, + "step": 305295 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014279556985076394, + "loss": 2.2127, + "step": 305300 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014279389935815476, + "loss": 2.0455, + "step": 305305 + }, + { + "epoch": 0.72, + "grad_norm": 2.5625, + "learning_rate": 0.00014279222885092642, + "loss": 2.1161, + "step": 305310 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014279055832907946, + "loss": 2.0209, + "step": 305315 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014278888779261446, + "loss": 2.0598, + "step": 305320 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014278721724153201, + "loss": 1.7997, + "step": 305325 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014278554667583268, + "loss": 2.0749, + "step": 305330 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014278387609551705, + "loss": 1.9958, + "step": 305335 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014278220550058564, + "loss": 1.8407, + "step": 305340 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014278053489103905, + "loss": 2.0234, + "step": 305345 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014277886426687788, + "loss": 1.9844, + "step": 305350 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014277719362810264, + "loss": 2.2593, + "step": 305355 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014277552297471395, + "loss": 2.1204, + "step": 305360 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014277385230671239, + "loss": 2.0554, + "step": 305365 + }, + { + "epoch": 0.72, + "grad_norm": 1.9296875, + "learning_rate": 0.00014277218162409846, + "loss": 2.0188, + "step": 305370 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014277051092687282, + "loss": 2.0113, + "step": 305375 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014276884021503596, + "loss": 1.9837, + "step": 305380 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014276716948858853, + "loss": 2.0445, + "step": 305385 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014276549874753103, + "loss": 2.0035, + "step": 305390 + }, + { + "epoch": 0.72, + "grad_norm": 2.671875, + "learning_rate": 0.00014276382799186408, + "loss": 1.9279, + "step": 305395 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014276215722158818, + "loss": 2.1856, + "step": 305400 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014276048643670404, + "loss": 2.0384, + "step": 305405 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014275881563721209, + "loss": 1.9465, + "step": 305410 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014275714482311292, + "loss": 1.8414, + "step": 305415 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.0001427554739944072, + "loss": 2.1596, + "step": 305420 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014275380315109537, + "loss": 1.9489, + "step": 305425 + }, + { + "epoch": 0.72, + "grad_norm": 1.9921875, + "learning_rate": 0.00014275213229317813, + "loss": 2.0721, + "step": 305430 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014275046142065594, + "loss": 2.0342, + "step": 305435 + }, + { + "epoch": 0.72, + "grad_norm": 1.8203125, + "learning_rate": 0.00014274879053352942, + "loss": 2.0073, + "step": 305440 + }, + { + "epoch": 0.72, + "grad_norm": 1.9921875, + "learning_rate": 0.00014274711963179916, + "loss": 2.1736, + "step": 305445 + }, + { + "epoch": 0.72, + "grad_norm": 1.9140625, + "learning_rate": 0.00014274544871546572, + "loss": 1.9776, + "step": 305450 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.0001427437777845296, + "loss": 1.8193, + "step": 305455 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.0001427421068389915, + "loss": 2.0315, + "step": 305460 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.0001427404358788519, + "loss": 1.9829, + "step": 305465 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014273876490411135, + "loss": 2.0701, + "step": 305470 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.0001427370939147705, + "loss": 1.9032, + "step": 305475 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.0001427354229108299, + "loss": 2.0902, + "step": 305480 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014273375189229005, + "loss": 2.0386, + "step": 305485 + }, + { + "epoch": 0.72, + "grad_norm": 1.8203125, + "learning_rate": 0.0001427320808591516, + "loss": 1.9379, + "step": 305490 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.00014273040981141512, + "loss": 2.0708, + "step": 305495 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014272873874908117, + "loss": 1.9508, + "step": 305500 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014272706767215024, + "loss": 2.2197, + "step": 305505 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014272539658062304, + "loss": 2.0039, + "step": 305510 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.0001427237254745, + "loss": 2.0224, + "step": 305515 + }, + { + "epoch": 0.72, + "grad_norm": 1.9453125, + "learning_rate": 0.00014272205435378183, + "loss": 2.0896, + "step": 305520 + }, + { + "epoch": 0.72, + "grad_norm": 2.515625, + "learning_rate": 0.00014272038321846902, + "loss": 2.0003, + "step": 305525 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.0001427187120685621, + "loss": 1.8718, + "step": 305530 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014271704090406174, + "loss": 2.1247, + "step": 305535 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014271536972496843, + "loss": 2.0178, + "step": 305540 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014271369853128283, + "loss": 1.976, + "step": 305545 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.0001427120273230054, + "loss": 2.0434, + "step": 305550 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.00014271035610013679, + "loss": 2.0453, + "step": 305555 + }, + { + "epoch": 0.72, + "grad_norm": 2.4375, + "learning_rate": 0.00014270868486267752, + "loss": 1.9549, + "step": 305560 + }, + { + "epoch": 0.72, + "grad_norm": 1.90625, + "learning_rate": 0.00014270701361062825, + "loss": 2.0223, + "step": 305565 + }, + { + "epoch": 0.72, + "grad_norm": 2.578125, + "learning_rate": 0.00014270534234398944, + "loss": 2.0031, + "step": 305570 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014270367106276172, + "loss": 2.2809, + "step": 305575 + }, + { + "epoch": 0.72, + "grad_norm": 3.328125, + "learning_rate": 0.00014270199976694565, + "loss": 2.0019, + "step": 305580 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014270032845654183, + "loss": 2.1415, + "step": 305585 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014269865713155077, + "loss": 1.9583, + "step": 305590 + }, + { + "epoch": 0.72, + "grad_norm": 2.46875, + "learning_rate": 0.00014269698579197308, + "loss": 2.1183, + "step": 305595 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014269531443780932, + "loss": 2.1609, + "step": 305600 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.0001426936430690601, + "loss": 2.1395, + "step": 305605 + }, + { + "epoch": 0.72, + "grad_norm": 2.515625, + "learning_rate": 0.00014269197168572593, + "loss": 1.9055, + "step": 305610 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.0001426903002878074, + "loss": 1.8918, + "step": 305615 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.0001426886288753051, + "loss": 2.1959, + "step": 305620 + }, + { + "epoch": 0.72, + "grad_norm": 2.84375, + "learning_rate": 0.0001426869574482196, + "loss": 2.026, + "step": 305625 + }, + { + "epoch": 0.72, + "grad_norm": 2.6875, + "learning_rate": 0.00014268528600655146, + "loss": 2.3067, + "step": 305630 + }, + { + "epoch": 0.72, + "grad_norm": 3.0625, + "learning_rate": 0.00014268361455030125, + "loss": 2.2303, + "step": 305635 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.00014268194307946952, + "loss": 2.0329, + "step": 305640 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014268027159405688, + "loss": 2.0766, + "step": 305645 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014267860009406388, + "loss": 1.9874, + "step": 305650 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.0001426769285794911, + "loss": 1.8967, + "step": 305655 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014267525705033914, + "loss": 2.1365, + "step": 305660 + }, + { + "epoch": 0.72, + "grad_norm": 1.953125, + "learning_rate": 0.0001426735855066085, + "loss": 2.1902, + "step": 305665 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.0001426719139482998, + "loss": 1.9084, + "step": 305670 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.0001426702423754136, + "loss": 1.9825, + "step": 305675 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014266857078795047, + "loss": 1.8793, + "step": 305680 + }, + { + "epoch": 0.72, + "grad_norm": 2.703125, + "learning_rate": 0.00014266689918591098, + "loss": 2.0058, + "step": 305685 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014266522756929573, + "loss": 2.1004, + "step": 305690 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.00014266355593810522, + "loss": 1.9713, + "step": 305695 + }, + { + "epoch": 0.72, + "grad_norm": 2.59375, + "learning_rate": 0.0001426618842923401, + "loss": 2.1484, + "step": 305700 + }, + { + "epoch": 0.72, + "grad_norm": 2.53125, + "learning_rate": 0.0001426602126320009, + "loss": 1.8532, + "step": 305705 + }, + { + "epoch": 0.72, + "grad_norm": 1.796875, + "learning_rate": 0.00014265854095708817, + "loss": 2.205, + "step": 305710 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014265686926760254, + "loss": 1.9625, + "step": 305715 + }, + { + "epoch": 0.72, + "grad_norm": 2.84375, + "learning_rate": 0.0001426551975635445, + "loss": 2.1501, + "step": 305720 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014265352584491475, + "loss": 1.9473, + "step": 305725 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014265185411171374, + "loss": 2.1919, + "step": 305730 + }, + { + "epoch": 0.72, + "grad_norm": 1.9765625, + "learning_rate": 0.0001426501823639421, + "loss": 2.0877, + "step": 305735 + }, + { + "epoch": 0.72, + "grad_norm": 2.515625, + "learning_rate": 0.00014264851060160033, + "loss": 2.1246, + "step": 305740 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.0001426468388246891, + "loss": 2.0364, + "step": 305745 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014264516703320893, + "loss": 1.7748, + "step": 305750 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.0001426434952271604, + "loss": 2.168, + "step": 305755 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014264182340654408, + "loss": 2.009, + "step": 305760 + }, + { + "epoch": 0.72, + "grad_norm": 2.46875, + "learning_rate": 0.00014264015157136055, + "loss": 2.0931, + "step": 305765 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014263847972161036, + "loss": 2.0359, + "step": 305770 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014263680785729408, + "loss": 1.9563, + "step": 305775 + }, + { + "epoch": 0.72, + "grad_norm": 2.53125, + "learning_rate": 0.0001426351359784123, + "loss": 2.0761, + "step": 305780 + }, + { + "epoch": 0.72, + "grad_norm": 3.203125, + "learning_rate": 0.00014263346408496558, + "loss": 2.09, + "step": 305785 + }, + { + "epoch": 0.72, + "grad_norm": 1.96875, + "learning_rate": 0.0001426317921769545, + "loss": 2.0959, + "step": 305790 + }, + { + "epoch": 0.72, + "grad_norm": 1.9609375, + "learning_rate": 0.00014263012025437965, + "loss": 2.1594, + "step": 305795 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014262844831724154, + "loss": 2.0904, + "step": 305800 + }, + { + "epoch": 0.72, + "grad_norm": 1.8203125, + "learning_rate": 0.0001426267763655408, + "loss": 1.8305, + "step": 305805 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.000142625104399278, + "loss": 1.9154, + "step": 305810 + }, + { + "epoch": 0.72, + "grad_norm": 1.9375, + "learning_rate": 0.00014262343241845367, + "loss": 2.095, + "step": 305815 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014262176042306837, + "loss": 2.0738, + "step": 305820 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014262008841312276, + "loss": 2.1114, + "step": 305825 + }, + { + "epoch": 0.72, + "grad_norm": 1.859375, + "learning_rate": 0.0001426184163886173, + "loss": 2.0326, + "step": 305830 + }, + { + "epoch": 0.72, + "grad_norm": 1.8984375, + "learning_rate": 0.00014261674434955265, + "loss": 1.9979, + "step": 305835 + }, + { + "epoch": 0.72, + "grad_norm": 1.8515625, + "learning_rate": 0.00014261507229592938, + "loss": 1.9829, + "step": 305840 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014261340022774795, + "loss": 1.8016, + "step": 305845 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014261172814500908, + "loss": 2.1739, + "step": 305850 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.0001426100560477132, + "loss": 2.0814, + "step": 305855 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.000142608383935861, + "loss": 2.0181, + "step": 305860 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014260671180945302, + "loss": 2.0295, + "step": 305865 + }, + { + "epoch": 0.72, + "grad_norm": 2.4375, + "learning_rate": 0.0001426050396684898, + "loss": 2.1096, + "step": 305870 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.0001426033675129719, + "loss": 2.0045, + "step": 305875 + }, + { + "epoch": 0.72, + "grad_norm": 2.75, + "learning_rate": 0.00014260169534289994, + "loss": 2.0676, + "step": 305880 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014260002315827445, + "loss": 2.0804, + "step": 305885 + }, + { + "epoch": 0.72, + "grad_norm": 1.90625, + "learning_rate": 0.00014259835095909603, + "loss": 2.1243, + "step": 305890 + }, + { + "epoch": 0.72, + "grad_norm": 2.40625, + "learning_rate": 0.00014259667874536524, + "loss": 2.1184, + "step": 305895 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014259500651708266, + "loss": 2.0611, + "step": 305900 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014259333427424885, + "loss": 2.0786, + "step": 305905 + }, + { + "epoch": 0.72, + "grad_norm": 1.7734375, + "learning_rate": 0.0001425916620168644, + "loss": 2.1187, + "step": 305910 + }, + { + "epoch": 0.72, + "grad_norm": 2.40625, + "learning_rate": 0.00014258998974492983, + "loss": 2.1257, + "step": 305915 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014258831745844576, + "loss": 1.8331, + "step": 305920 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014258664515741276, + "loss": 2.0179, + "step": 305925 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.0001425849728418314, + "loss": 1.9512, + "step": 305930 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.0001425833005117022, + "loss": 2.1009, + "step": 305935 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.0001425816281670258, + "loss": 2.057, + "step": 305940 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014257995580780272, + "loss": 2.0407, + "step": 305945 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.0001425782834340336, + "loss": 2.1009, + "step": 305950 + }, + { + "epoch": 0.72, + "grad_norm": 2.484375, + "learning_rate": 0.00014257661104571896, + "loss": 2.1147, + "step": 305955 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014257493864285934, + "loss": 2.0533, + "step": 305960 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.00014257326622545534, + "loss": 1.9176, + "step": 305965 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014257159379350758, + "loss": 2.1908, + "step": 305970 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014256992134701658, + "loss": 1.9391, + "step": 305975 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014256824888598293, + "loss": 1.9269, + "step": 305980 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014256657641040717, + "loss": 2.0502, + "step": 305985 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.0001425649039202899, + "loss": 2.0766, + "step": 305990 + }, + { + "epoch": 0.72, + "grad_norm": 2.515625, + "learning_rate": 0.00014256323141563173, + "loss": 2.0318, + "step": 305995 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014256155889643315, + "loss": 2.1487, + "step": 306000 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014255988636269478, + "loss": 2.0505, + "step": 306005 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014255821381441717, + "loss": 1.9849, + "step": 306010 + }, + { + "epoch": 0.72, + "grad_norm": 2.78125, + "learning_rate": 0.0001425565412516009, + "loss": 1.9156, + "step": 306015 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014255486867424658, + "loss": 2.1229, + "step": 306020 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.0001425531960823547, + "loss": 2.214, + "step": 306025 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.0001425515234759259, + "loss": 1.7641, + "step": 306030 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.0001425498508549607, + "loss": 1.9728, + "step": 306035 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014254817821945974, + "loss": 1.984, + "step": 306040 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014254650556942352, + "loss": 2.0935, + "step": 306045 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014254483290485265, + "loss": 2.2011, + "step": 306050 + }, + { + "epoch": 0.72, + "grad_norm": 1.8984375, + "learning_rate": 0.0001425431602257477, + "loss": 2.0504, + "step": 306055 + }, + { + "epoch": 0.72, + "grad_norm": 1.875, + "learning_rate": 0.00014254148753210924, + "loss": 1.9915, + "step": 306060 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014253981482393783, + "loss": 2.0682, + "step": 306065 + }, + { + "epoch": 0.72, + "grad_norm": 2.4375, + "learning_rate": 0.00014253814210123404, + "loss": 2.034, + "step": 306070 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014253646936399848, + "loss": 2.1686, + "step": 306075 + }, + { + "epoch": 0.72, + "grad_norm": 2.625, + "learning_rate": 0.00014253479661223162, + "loss": 2.0676, + "step": 306080 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014253312384593416, + "loss": 2.032, + "step": 306085 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014253145106510665, + "loss": 2.3853, + "step": 306090 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014252977826974953, + "loss": 1.8601, + "step": 306095 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014252810545986353, + "loss": 2.1498, + "step": 306100 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014252643263544914, + "loss": 2.0231, + "step": 306105 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014252475979650698, + "loss": 2.0369, + "step": 306110 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014252308694303754, + "loss": 2.0741, + "step": 306115 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014252141407504145, + "loss": 2.2675, + "step": 306120 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.0001425197411925193, + "loss": 2.1687, + "step": 306125 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.0001425180682954716, + "loss": 2.0113, + "step": 306130 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014251639538389902, + "loss": 1.9988, + "step": 306135 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014251472245780202, + "loss": 2.2544, + "step": 306140 + }, + { + "epoch": 0.72, + "grad_norm": 1.859375, + "learning_rate": 0.00014251304951718122, + "loss": 1.9769, + "step": 306145 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014251137656203717, + "loss": 2.1715, + "step": 306150 + }, + { + "epoch": 0.72, + "grad_norm": 1.96875, + "learning_rate": 0.00014250970359237054, + "loss": 1.8032, + "step": 306155 + }, + { + "epoch": 0.72, + "grad_norm": 3.046875, + "learning_rate": 0.00014250803060818177, + "loss": 1.9579, + "step": 306160 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014250635760947148, + "loss": 2.2319, + "step": 306165 + }, + { + "epoch": 0.72, + "grad_norm": 2.484375, + "learning_rate": 0.00014250468459624024, + "loss": 2.0749, + "step": 306170 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014250301156848867, + "loss": 2.1534, + "step": 306175 + }, + { + "epoch": 0.72, + "grad_norm": 1.9921875, + "learning_rate": 0.00014250133852621727, + "loss": 1.969, + "step": 306180 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014249966546942667, + "loss": 2.1639, + "step": 306185 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014249799239811735, + "loss": 1.9573, + "step": 306190 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014249631931229003, + "loss": 2.0881, + "step": 306195 + }, + { + "epoch": 0.72, + "grad_norm": 2.6875, + "learning_rate": 0.00014249464621194513, + "loss": 2.201, + "step": 306200 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.0001424929730970833, + "loss": 2.1951, + "step": 306205 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014249129996770515, + "loss": 2.0685, + "step": 306210 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.00014248962682381116, + "loss": 2.1371, + "step": 306215 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014248795366540192, + "loss": 2.0096, + "step": 306220 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014248628049247808, + "loss": 1.92, + "step": 306225 + }, + { + "epoch": 0.72, + "grad_norm": 2.53125, + "learning_rate": 0.0001424846073050401, + "loss": 2.083, + "step": 306230 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014248293410308864, + "loss": 1.8994, + "step": 306235 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014248126088662423, + "loss": 2.2307, + "step": 306240 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014247958765564747, + "loss": 2.1603, + "step": 306245 + }, + { + "epoch": 0.72, + "grad_norm": 1.953125, + "learning_rate": 0.00014247791441015893, + "loss": 2.094, + "step": 306250 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014247624115015908, + "loss": 2.1525, + "step": 306255 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014247456787564862, + "loss": 1.9755, + "step": 306260 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.0001424728945866281, + "loss": 2.0953, + "step": 306265 + }, + { + "epoch": 0.72, + "grad_norm": 1.8203125, + "learning_rate": 0.0001424712212830981, + "loss": 2.2256, + "step": 306270 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.0001424695479650591, + "loss": 2.1783, + "step": 306275 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014246787463251175, + "loss": 1.9831, + "step": 306280 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014246620128545657, + "loss": 2.0407, + "step": 306285 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.0001424645279238942, + "loss": 1.9447, + "step": 306290 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014246285454782518, + "loss": 2.243, + "step": 306295 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.0001424611811572501, + "loss": 2.0215, + "step": 306300 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014245950775216946, + "loss": 2.2021, + "step": 306305 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.0001424578343325839, + "loss": 2.0898, + "step": 306310 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.000142456160898494, + "loss": 2.0362, + "step": 306315 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014245448744990027, + "loss": 2.246, + "step": 306320 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014245281398680333, + "loss": 1.9747, + "step": 306325 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014245114050920373, + "loss": 2.0885, + "step": 306330 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014244946701710208, + "loss": 2.3433, + "step": 306335 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014244779351049887, + "loss": 1.9501, + "step": 306340 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014244611998939476, + "loss": 2.0771, + "step": 306345 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014244444645379029, + "loss": 2.0419, + "step": 306350 + }, + { + "epoch": 0.72, + "grad_norm": 1.9921875, + "learning_rate": 0.00014244277290368604, + "loss": 2.0653, + "step": 306355 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014244109933908255, + "loss": 2.0489, + "step": 306360 + }, + { + "epoch": 0.72, + "grad_norm": 2.609375, + "learning_rate": 0.0001424394257599804, + "loss": 1.9689, + "step": 306365 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014243775216638018, + "loss": 2.0661, + "step": 306370 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014243607855828245, + "loss": 1.9177, + "step": 306375 + }, + { + "epoch": 0.72, + "grad_norm": 4.0625, + "learning_rate": 0.00014243440493568778, + "loss": 2.0416, + "step": 306380 + }, + { + "epoch": 0.72, + "grad_norm": 1.6484375, + "learning_rate": 0.00014243273129859677, + "loss": 2.114, + "step": 306385 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014243105764700993, + "loss": 2.1021, + "step": 306390 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014242938398092793, + "loss": 2.0092, + "step": 306395 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014242771030035123, + "loss": 2.0144, + "step": 306400 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014242603660528051, + "loss": 2.1447, + "step": 306405 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014242436289571626, + "loss": 2.0395, + "step": 306410 + }, + { + "epoch": 0.72, + "grad_norm": 1.84375, + "learning_rate": 0.00014242268917165904, + "loss": 1.9939, + "step": 306415 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.0001424210154331095, + "loss": 1.8705, + "step": 306420 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014241934168006816, + "loss": 2.0268, + "step": 306425 + }, + { + "epoch": 0.72, + "grad_norm": 2.671875, + "learning_rate": 0.00014241766791253562, + "loss": 2.266, + "step": 306430 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014241599413051243, + "loss": 2.1171, + "step": 306435 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014241432033399918, + "loss": 2.0286, + "step": 306440 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014241264652299637, + "loss": 2.1976, + "step": 306445 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.0001424109726975047, + "loss": 2.027, + "step": 306450 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014240929885752464, + "loss": 2.105, + "step": 306455 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.0001424076250030568, + "loss": 2.1266, + "step": 306460 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014240595113410173, + "loss": 2.0246, + "step": 306465 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014240427725066004, + "loss": 2.0327, + "step": 306470 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014240260335273227, + "loss": 2.0333, + "step": 306475 + }, + { + "epoch": 0.72, + "grad_norm": 1.8828125, + "learning_rate": 0.000142400929440319, + "loss": 2.0971, + "step": 306480 + }, + { + "epoch": 0.72, + "grad_norm": 1.765625, + "learning_rate": 0.0001423992555134208, + "loss": 1.9831, + "step": 306485 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014239758157203827, + "loss": 2.15, + "step": 306490 + }, + { + "epoch": 0.72, + "grad_norm": 1.984375, + "learning_rate": 0.0001423959076161719, + "loss": 1.9907, + "step": 306495 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.0001423942336458224, + "loss": 2.1017, + "step": 306500 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014239255966099021, + "loss": 2.1811, + "step": 306505 + }, + { + "epoch": 0.72, + "grad_norm": 2.59375, + "learning_rate": 0.00014239088566167597, + "loss": 2.1168, + "step": 306510 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014238921164788022, + "loss": 2.0894, + "step": 306515 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014238753761960354, + "loss": 1.9981, + "step": 306520 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.0001423858635768465, + "loss": 2.1288, + "step": 306525 + }, + { + "epoch": 0.72, + "grad_norm": 1.84375, + "learning_rate": 0.00014238418951960973, + "loss": 2.2083, + "step": 306530 + }, + { + "epoch": 0.72, + "grad_norm": 1.921875, + "learning_rate": 0.0001423825154478937, + "loss": 2.0736, + "step": 306535 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014238084136169906, + "loss": 2.0542, + "step": 306540 + }, + { + "epoch": 0.72, + "grad_norm": 2.53125, + "learning_rate": 0.00014237916726102639, + "loss": 2.0983, + "step": 306545 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014237749314587617, + "loss": 1.9974, + "step": 306550 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014237581901624906, + "loss": 1.9806, + "step": 306555 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014237414487214557, + "loss": 1.8767, + "step": 306560 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014237247071356636, + "loss": 2.1457, + "step": 306565 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014237079654051191, + "loss": 2.1272, + "step": 306570 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014236912235298283, + "loss": 2.0126, + "step": 306575 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014236744815097967, + "loss": 2.2385, + "step": 306580 + }, + { + "epoch": 0.72, + "grad_norm": 2.828125, + "learning_rate": 0.00014236577393450306, + "loss": 2.2125, + "step": 306585 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.0001423640997035535, + "loss": 2.179, + "step": 306590 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.0001423624254581316, + "loss": 2.0254, + "step": 306595 + }, + { + "epoch": 0.72, + "grad_norm": 1.84375, + "learning_rate": 0.00014236075119823796, + "loss": 2.2306, + "step": 306600 + }, + { + "epoch": 0.72, + "grad_norm": 2.40625, + "learning_rate": 0.00014235907692387306, + "loss": 2.0234, + "step": 306605 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.00014235740263503758, + "loss": 2.168, + "step": 306610 + }, + { + "epoch": 0.72, + "grad_norm": 1.8203125, + "learning_rate": 0.00014235572833173204, + "loss": 1.9963, + "step": 306615 + }, + { + "epoch": 0.72, + "grad_norm": 3.171875, + "learning_rate": 0.00014235405401395702, + "loss": 2.0895, + "step": 306620 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014235237968171304, + "loss": 2.0755, + "step": 306625 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014235070533500076, + "loss": 1.9801, + "step": 306630 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.0001423490309738207, + "loss": 2.017, + "step": 306635 + }, + { + "epoch": 0.72, + "grad_norm": 2.578125, + "learning_rate": 0.00014234735659817343, + "loss": 2.0778, + "step": 306640 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014234568220805957, + "loss": 2.1082, + "step": 306645 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014234400780347963, + "loss": 2.098, + "step": 306650 + }, + { + "epoch": 0.72, + "grad_norm": 1.9609375, + "learning_rate": 0.0001423423333844342, + "loss": 2.1073, + "step": 306655 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.0001423406589509239, + "loss": 2.0155, + "step": 306660 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014233898450294923, + "loss": 2.0212, + "step": 306665 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014233731004051082, + "loss": 1.9977, + "step": 306670 + }, + { + "epoch": 0.72, + "grad_norm": 2.609375, + "learning_rate": 0.00014233563556360917, + "loss": 2.0275, + "step": 306675 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014233396107224497, + "loss": 2.0744, + "step": 306680 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014233228656641866, + "loss": 2.1551, + "step": 306685 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.0001423306120461309, + "loss": 2.1631, + "step": 306690 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014232893751138224, + "loss": 1.9037, + "step": 306695 + }, + { + "epoch": 0.72, + "grad_norm": 2.609375, + "learning_rate": 0.00014232726296217326, + "loss": 2.0947, + "step": 306700 + }, + { + "epoch": 0.72, + "grad_norm": 7.09375, + "learning_rate": 0.00014232558839850448, + "loss": 2.0989, + "step": 306705 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014232391382037652, + "loss": 1.9526, + "step": 306710 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014232223922778993, + "loss": 2.094, + "step": 306715 + }, + { + "epoch": 0.72, + "grad_norm": 1.7265625, + "learning_rate": 0.00014232056462074532, + "loss": 2.1423, + "step": 306720 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014231888999924325, + "loss": 1.8558, + "step": 306725 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.00014231721536328427, + "loss": 2.2498, + "step": 306730 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014231554071286894, + "loss": 2.2636, + "step": 306735 + }, + { + "epoch": 0.72, + "grad_norm": 1.8203125, + "learning_rate": 0.00014231386604799786, + "loss": 2.2013, + "step": 306740 + }, + { + "epoch": 0.72, + "grad_norm": 2.5, + "learning_rate": 0.00014231219136867163, + "loss": 1.9799, + "step": 306745 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014231051667489076, + "loss": 2.0728, + "step": 306750 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014230884196665585, + "loss": 2.0707, + "step": 306755 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.0001423071672439675, + "loss": 2.1145, + "step": 306760 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014230549250682622, + "loss": 2.2162, + "step": 306765 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014230381775523263, + "loss": 2.1509, + "step": 306770 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014230214298918727, + "loss": 2.2256, + "step": 306775 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014230046820869077, + "loss": 1.8277, + "step": 306780 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014229879341374363, + "loss": 1.838, + "step": 306785 + }, + { + "epoch": 0.72, + "grad_norm": 2.71875, + "learning_rate": 0.00014229711860434647, + "loss": 2.0238, + "step": 306790 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014229544378049983, + "loss": 1.9516, + "step": 306795 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014229376894220433, + "loss": 2.1837, + "step": 306800 + }, + { + "epoch": 0.72, + "grad_norm": 2.84375, + "learning_rate": 0.00014229209408946048, + "loss": 1.9971, + "step": 306805 + }, + { + "epoch": 0.72, + "grad_norm": 1.8046875, + "learning_rate": 0.0001422904192222689, + "loss": 2.0864, + "step": 306810 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.00014228874434063017, + "loss": 2.0827, + "step": 306815 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014228706944454476, + "loss": 1.9223, + "step": 306820 + }, + { + "epoch": 0.72, + "grad_norm": 1.875, + "learning_rate": 0.00014228539453401337, + "loss": 2.1777, + "step": 306825 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014228371960903652, + "loss": 2.112, + "step": 306830 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.0001422820446696148, + "loss": 2.1983, + "step": 306835 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014228036971574877, + "loss": 2.0763, + "step": 306840 + }, + { + "epoch": 0.72, + "grad_norm": 2.4375, + "learning_rate": 0.00014227869474743896, + "loss": 1.9672, + "step": 306845 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014227701976468602, + "loss": 2.0481, + "step": 306850 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014227534476749044, + "loss": 2.1471, + "step": 306855 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.0001422736697558529, + "loss": 2.1801, + "step": 306860 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014227199472977386, + "loss": 2.0111, + "step": 306865 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014227031968925392, + "loss": 2.0648, + "step": 306870 + }, + { + "epoch": 0.72, + "grad_norm": 2.703125, + "learning_rate": 0.0001422686446342937, + "loss": 1.9948, + "step": 306875 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014226696956489378, + "loss": 2.0115, + "step": 306880 + }, + { + "epoch": 0.72, + "grad_norm": 1.8046875, + "learning_rate": 0.00014226529448105465, + "loss": 1.9614, + "step": 306885 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014226361938277698, + "loss": 2.0787, + "step": 306890 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.00014226194427006123, + "loss": 2.0385, + "step": 306895 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014226026914290803, + "loss": 2.1556, + "step": 306900 + }, + { + "epoch": 0.72, + "grad_norm": 3.375, + "learning_rate": 0.000142258594001318, + "loss": 2.1414, + "step": 306905 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.00014225691884529166, + "loss": 2.0507, + "step": 306910 + }, + { + "epoch": 0.72, + "grad_norm": 1.9921875, + "learning_rate": 0.0001422552436748296, + "loss": 1.9775, + "step": 306915 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014225356848993236, + "loss": 1.93, + "step": 306920 + }, + { + "epoch": 0.72, + "grad_norm": 2.640625, + "learning_rate": 0.0001422518932906005, + "loss": 2.0978, + "step": 306925 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014225021807683468, + "loss": 1.9915, + "step": 306930 + }, + { + "epoch": 0.72, + "grad_norm": 1.8203125, + "learning_rate": 0.0001422485428486354, + "loss": 1.9093, + "step": 306935 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014224686760600324, + "loss": 1.9428, + "step": 306940 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014224519234893882, + "loss": 2.1301, + "step": 306945 + }, + { + "epoch": 0.72, + "grad_norm": 2.484375, + "learning_rate": 0.00014224351707744264, + "loss": 2.0916, + "step": 306950 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014224184179151533, + "loss": 2.1727, + "step": 306955 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014224016649115744, + "loss": 2.1369, + "step": 306960 + }, + { + "epoch": 0.72, + "grad_norm": 1.984375, + "learning_rate": 0.00014223849117636953, + "loss": 2.2109, + "step": 306965 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014223681584715222, + "loss": 2.0362, + "step": 306970 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014223514050350602, + "loss": 1.9996, + "step": 306975 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014223346514543152, + "loss": 2.1, + "step": 306980 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.0001422317897729293, + "loss": 2.2105, + "step": 306985 + }, + { + "epoch": 0.72, + "grad_norm": 1.796875, + "learning_rate": 0.00014223011438599997, + "loss": 2.0259, + "step": 306990 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014222843898464404, + "loss": 2.2608, + "step": 306995 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.0001422267635688621, + "loss": 2.0546, + "step": 307000 + }, + { + "epoch": 0.72, + "grad_norm": 2.78125, + "learning_rate": 0.00014222508813865474, + "loss": 2.0183, + "step": 307005 + }, + { + "epoch": 0.72, + "grad_norm": 2.671875, + "learning_rate": 0.00014222341269402256, + "loss": 1.7579, + "step": 307010 + }, + { + "epoch": 0.72, + "grad_norm": 1.8828125, + "learning_rate": 0.00014222173723496605, + "loss": 1.9995, + "step": 307015 + }, + { + "epoch": 0.72, + "grad_norm": 1.6796875, + "learning_rate": 0.00014222006176148585, + "loss": 1.7573, + "step": 307020 + }, + { + "epoch": 0.72, + "grad_norm": 1.875, + "learning_rate": 0.0001422183862735825, + "loss": 2.0603, + "step": 307025 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.0001422167107712566, + "loss": 2.0378, + "step": 307030 + }, + { + "epoch": 0.72, + "grad_norm": 1.9453125, + "learning_rate": 0.00014221503525450868, + "loss": 2.1051, + "step": 307035 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.0001422133597233394, + "loss": 1.8501, + "step": 307040 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.0001422116841777492, + "loss": 2.045, + "step": 307045 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014221000861773875, + "loss": 2.1019, + "step": 307050 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.0001422083330433086, + "loss": 2.0943, + "step": 307055 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014220665745445928, + "loss": 1.9273, + "step": 307060 + }, + { + "epoch": 0.72, + "grad_norm": 1.9765625, + "learning_rate": 0.00014220498185119147, + "loss": 2.14, + "step": 307065 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014220330623350563, + "loss": 2.0034, + "step": 307070 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014220163060140237, + "loss": 2.074, + "step": 307075 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014219995495488229, + "loss": 2.1715, + "step": 307080 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014219827929394592, + "loss": 2.1297, + "step": 307085 + }, + { + "epoch": 0.72, + "grad_norm": 1.9140625, + "learning_rate": 0.00014219660361859386, + "loss": 2.0782, + "step": 307090 + }, + { + "epoch": 0.72, + "grad_norm": 1.875, + "learning_rate": 0.00014219492792882666, + "loss": 2.1358, + "step": 307095 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014219325222464494, + "loss": 2.1588, + "step": 307100 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.0001421915765060492, + "loss": 2.0829, + "step": 307105 + }, + { + "epoch": 0.72, + "grad_norm": 2.4375, + "learning_rate": 0.00014218990077304012, + "loss": 1.9811, + "step": 307110 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014218822502561814, + "loss": 2.0916, + "step": 307115 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.0001421865492637839, + "loss": 2.2201, + "step": 307120 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.000142184873487538, + "loss": 1.8683, + "step": 307125 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014218319769688096, + "loss": 1.9774, + "step": 307130 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.0001421815218918134, + "loss": 2.0426, + "step": 307135 + }, + { + "epoch": 0.72, + "grad_norm": 2.65625, + "learning_rate": 0.00014217984607233584, + "loss": 2.1482, + "step": 307140 + }, + { + "epoch": 0.72, + "grad_norm": 2.578125, + "learning_rate": 0.00014217817023844887, + "loss": 1.8868, + "step": 307145 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.0001421764943901531, + "loss": 2.1178, + "step": 307150 + }, + { + "epoch": 0.72, + "grad_norm": 2.59375, + "learning_rate": 0.0001421748185274491, + "loss": 2.1465, + "step": 307155 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014217314265033737, + "loss": 2.0034, + "step": 307160 + }, + { + "epoch": 0.72, + "grad_norm": 1.9375, + "learning_rate": 0.00014217146675881852, + "loss": 2.2005, + "step": 307165 + }, + { + "epoch": 0.72, + "grad_norm": 2.734375, + "learning_rate": 0.00014216979085289317, + "loss": 1.9612, + "step": 307170 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014216811493256187, + "loss": 1.9639, + "step": 307175 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014216643899782515, + "loss": 2.0272, + "step": 307180 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014216476304868358, + "loss": 2.1207, + "step": 307185 + }, + { + "epoch": 0.72, + "grad_norm": 2.578125, + "learning_rate": 0.0001421630870851378, + "loss": 1.9147, + "step": 307190 + }, + { + "epoch": 0.72, + "grad_norm": 1.8359375, + "learning_rate": 0.0001421614111071883, + "loss": 2.076, + "step": 307195 + }, + { + "epoch": 0.72, + "grad_norm": 3.046875, + "learning_rate": 0.00014215973511483575, + "loss": 1.9494, + "step": 307200 + }, + { + "epoch": 0.72, + "grad_norm": 1.9140625, + "learning_rate": 0.00014215805910808066, + "loss": 2.1219, + "step": 307205 + }, + { + "epoch": 0.72, + "grad_norm": 1.828125, + "learning_rate": 0.00014215638308692362, + "loss": 1.9933, + "step": 307210 + }, + { + "epoch": 0.72, + "grad_norm": 2.890625, + "learning_rate": 0.00014215470705136515, + "loss": 2.1863, + "step": 307215 + }, + { + "epoch": 0.72, + "grad_norm": 1.4921875, + "learning_rate": 0.0001421530310014059, + "loss": 1.8774, + "step": 307220 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014215135493704642, + "loss": 2.1705, + "step": 307225 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014214967885828726, + "loss": 1.9952, + "step": 307230 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014214800276512898, + "loss": 2.0042, + "step": 307235 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014214632665757223, + "loss": 2.1847, + "step": 307240 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014214465053561748, + "loss": 2.079, + "step": 307245 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014214297439926537, + "loss": 1.992, + "step": 307250 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.00014214129824851647, + "loss": 2.0311, + "step": 307255 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014213962208337134, + "loss": 1.9889, + "step": 307260 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014213794590383055, + "loss": 2.1466, + "step": 307265 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014213626970989467, + "loss": 1.8897, + "step": 307270 + }, + { + "epoch": 0.72, + "grad_norm": 1.8203125, + "learning_rate": 0.00014213459350156425, + "loss": 2.1372, + "step": 307275 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.0001421329172788399, + "loss": 2.2553, + "step": 307280 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014213124104172218, + "loss": 2.0687, + "step": 307285 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.0001421295647902117, + "loss": 2.0037, + "step": 307290 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014212788852430898, + "loss": 2.0936, + "step": 307295 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014212621224401456, + "loss": 2.0266, + "step": 307300 + }, + { + "epoch": 0.72, + "grad_norm": 2.46875, + "learning_rate": 0.00014212453594932912, + "loss": 2.0477, + "step": 307305 + }, + { + "epoch": 0.72, + "grad_norm": 1.765625, + "learning_rate": 0.00014212285964025317, + "loss": 1.9398, + "step": 307310 + }, + { + "epoch": 0.72, + "grad_norm": 1.9140625, + "learning_rate": 0.00014212118331678728, + "loss": 2.1865, + "step": 307315 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014211950697893202, + "loss": 1.9747, + "step": 307320 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014211783062668796, + "loss": 2.1192, + "step": 307325 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014211615426005568, + "loss": 2.0284, + "step": 307330 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.0001421144778790358, + "loss": 2.0242, + "step": 307335 + }, + { + "epoch": 0.72, + "grad_norm": 1.890625, + "learning_rate": 0.00014211280148362885, + "loss": 2.0037, + "step": 307340 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014211112507383537, + "loss": 1.9769, + "step": 307345 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.000142109448649656, + "loss": 2.0693, + "step": 307350 + }, + { + "epoch": 0.72, + "grad_norm": 2.640625, + "learning_rate": 0.00014210777221109123, + "loss": 2.0508, + "step": 307355 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.0001421060957581417, + "loss": 2.158, + "step": 307360 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.000142104419290808, + "loss": 2.0911, + "step": 307365 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014210274280909065, + "loss": 2.0367, + "step": 307370 + }, + { + "epoch": 0.72, + "grad_norm": 1.953125, + "learning_rate": 0.0001421010663129902, + "loss": 2.0421, + "step": 307375 + }, + { + "epoch": 0.72, + "grad_norm": 1.9375, + "learning_rate": 0.0001420993898025073, + "loss": 1.9567, + "step": 307380 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014209771327764248, + "loss": 2.1819, + "step": 307385 + }, + { + "epoch": 0.72, + "grad_norm": 1.859375, + "learning_rate": 0.00014209603673839632, + "loss": 1.9078, + "step": 307390 + }, + { + "epoch": 0.72, + "grad_norm": 2.75, + "learning_rate": 0.00014209436018476938, + "loss": 2.0479, + "step": 307395 + }, + { + "epoch": 0.72, + "grad_norm": 2.71875, + "learning_rate": 0.00014209268361676223, + "loss": 2.1354, + "step": 307400 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.0001420910070343755, + "loss": 2.0707, + "step": 307405 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.00014208933043760968, + "loss": 2.1169, + "step": 307410 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014208765382646538, + "loss": 2.0005, + "step": 307415 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.00014208597720094318, + "loss": 2.0352, + "step": 307420 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014208430056104367, + "loss": 1.8952, + "step": 307425 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014208262390676742, + "loss": 2.1481, + "step": 307430 + }, + { + "epoch": 0.72, + "grad_norm": 1.734375, + "learning_rate": 0.00014208094723811493, + "loss": 2.0451, + "step": 307435 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.0001420792705550868, + "loss": 1.9457, + "step": 307440 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014207759385768368, + "loss": 1.9524, + "step": 307445 + }, + { + "epoch": 0.72, + "grad_norm": 2.640625, + "learning_rate": 0.00014207591714590612, + "loss": 1.9931, + "step": 307450 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014207424041975462, + "loss": 2.197, + "step": 307455 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014207256367922976, + "loss": 2.1122, + "step": 307460 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.0001420708869243322, + "loss": 1.9753, + "step": 307465 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.00014206921015506247, + "loss": 2.058, + "step": 307470 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014206753337142112, + "loss": 2.0225, + "step": 307475 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014206585657340875, + "loss": 2.1496, + "step": 307480 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014206417976102588, + "loss": 2.076, + "step": 307485 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014206250293427312, + "loss": 2.1654, + "step": 307490 + }, + { + "epoch": 0.72, + "grad_norm": 2.109375, + "learning_rate": 0.0001420608260931511, + "loss": 2.1877, + "step": 307495 + }, + { + "epoch": 0.72, + "grad_norm": 1.9921875, + "learning_rate": 0.00014205914923766033, + "loss": 2.1117, + "step": 307500 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014205747236780138, + "loss": 2.0772, + "step": 307505 + }, + { + "epoch": 0.72, + "grad_norm": 2.640625, + "learning_rate": 0.00014205579548357483, + "loss": 1.8994, + "step": 307510 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014205411858498126, + "loss": 2.0576, + "step": 307515 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.0001420524416720212, + "loss": 2.1224, + "step": 307520 + }, + { + "epoch": 0.72, + "grad_norm": 1.984375, + "learning_rate": 0.00014205076474469531, + "loss": 2.1177, + "step": 307525 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014204908780300412, + "loss": 2.0071, + "step": 307530 + }, + { + "epoch": 0.72, + "grad_norm": 2.421875, + "learning_rate": 0.0001420474108469482, + "loss": 2.0652, + "step": 307535 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014204573387652808, + "loss": 1.9956, + "step": 307540 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.0001420440568917444, + "loss": 2.0481, + "step": 307545 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014204237989259773, + "loss": 2.0868, + "step": 307550 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.0001420407028790886, + "loss": 2.0504, + "step": 307555 + }, + { + "epoch": 0.72, + "grad_norm": 2.515625, + "learning_rate": 0.0001420390258512176, + "loss": 2.0278, + "step": 307560 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014203734880898533, + "loss": 2.0046, + "step": 307565 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.0001420356717523923, + "loss": 2.1482, + "step": 307570 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014203399468143914, + "loss": 2.2381, + "step": 307575 + }, + { + "epoch": 0.72, + "grad_norm": 2.46875, + "learning_rate": 0.0001420323175961264, + "loss": 2.071, + "step": 307580 + }, + { + "epoch": 0.72, + "grad_norm": 1.984375, + "learning_rate": 0.0001420306404964547, + "loss": 2.0733, + "step": 307585 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014202896338242452, + "loss": 2.0434, + "step": 307590 + }, + { + "epoch": 0.72, + "grad_norm": 2.515625, + "learning_rate": 0.0001420272862540365, + "loss": 2.1594, + "step": 307595 + }, + { + "epoch": 0.72, + "grad_norm": 2.203125, + "learning_rate": 0.0001420256091112912, + "loss": 1.8767, + "step": 307600 + }, + { + "epoch": 0.72, + "grad_norm": 1.9140625, + "learning_rate": 0.00014202393195418916, + "loss": 2.0036, + "step": 307605 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014202225478273102, + "loss": 2.0806, + "step": 307610 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014202057759691732, + "loss": 2.0757, + "step": 307615 + }, + { + "epoch": 0.72, + "grad_norm": 2.59375, + "learning_rate": 0.0001420189003967486, + "loss": 1.9769, + "step": 307620 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014201722318222547, + "loss": 2.1965, + "step": 307625 + }, + { + "epoch": 0.72, + "grad_norm": 1.9296875, + "learning_rate": 0.00014201554595334853, + "loss": 1.9432, + "step": 307630 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.00014201386871011827, + "loss": 2.0181, + "step": 307635 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.00014201219145253534, + "loss": 2.0905, + "step": 307640 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014201051418060028, + "loss": 2.0198, + "step": 307645 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014200883689431364, + "loss": 2.1285, + "step": 307650 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014200715959367604, + "loss": 2.0758, + "step": 307655 + }, + { + "epoch": 0.72, + "grad_norm": 1.90625, + "learning_rate": 0.00014200548227868805, + "loss": 1.8613, + "step": 307660 + }, + { + "epoch": 0.72, + "grad_norm": 2.6875, + "learning_rate": 0.0001420038049493502, + "loss": 2.0923, + "step": 307665 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014200212760566308, + "loss": 2.0248, + "step": 307670 + }, + { + "epoch": 0.72, + "grad_norm": 2.609375, + "learning_rate": 0.0001420004502476273, + "loss": 2.0426, + "step": 307675 + }, + { + "epoch": 0.72, + "grad_norm": 1.9453125, + "learning_rate": 0.00014199877287524336, + "loss": 2.034, + "step": 307680 + }, + { + "epoch": 0.72, + "grad_norm": 2.03125, + "learning_rate": 0.00014199709548851192, + "loss": 2.066, + "step": 307685 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.0001419954180874335, + "loss": 2.2157, + "step": 307690 + }, + { + "epoch": 0.72, + "grad_norm": 2.59375, + "learning_rate": 0.00014199374067200868, + "loss": 2.0644, + "step": 307695 + }, + { + "epoch": 0.72, + "grad_norm": 2.09375, + "learning_rate": 0.00014199206324223807, + "loss": 1.9759, + "step": 307700 + }, + { + "epoch": 0.72, + "grad_norm": 2.515625, + "learning_rate": 0.00014199038579812214, + "loss": 2.0066, + "step": 307705 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014198870833966157, + "loss": 2.1027, + "step": 307710 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.0001419870308668569, + "loss": 2.0748, + "step": 307715 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014198535337970868, + "loss": 2.0338, + "step": 307720 + }, + { + "epoch": 0.72, + "grad_norm": 3.328125, + "learning_rate": 0.00014198367587821756, + "loss": 2.2258, + "step": 307725 + }, + { + "epoch": 0.72, + "grad_norm": 1.984375, + "learning_rate": 0.00014198199836238398, + "loss": 2.1615, + "step": 307730 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.0001419803208322086, + "loss": 2.229, + "step": 307735 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.000141978643287692, + "loss": 2.1686, + "step": 307740 + }, + { + "epoch": 0.72, + "grad_norm": 2.390625, + "learning_rate": 0.00014197696572883478, + "loss": 1.9168, + "step": 307745 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014197528815563742, + "loss": 2.067, + "step": 307750 + }, + { + "epoch": 0.72, + "grad_norm": 1.8671875, + "learning_rate": 0.00014197361056810052, + "loss": 2.1104, + "step": 307755 + }, + { + "epoch": 0.72, + "grad_norm": 2.5625, + "learning_rate": 0.0001419719329662247, + "loss": 2.2372, + "step": 307760 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014197025535001053, + "loss": 2.0997, + "step": 307765 + }, + { + "epoch": 0.72, + "grad_norm": 2.4375, + "learning_rate": 0.0001419685777194585, + "loss": 2.0104, + "step": 307770 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014196690007456928, + "loss": 1.9983, + "step": 307775 + }, + { + "epoch": 0.72, + "grad_norm": 1.5703125, + "learning_rate": 0.0001419652224153434, + "loss": 2.2685, + "step": 307780 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.0001419635447417814, + "loss": 2.0323, + "step": 307785 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.00014196186705388398, + "loss": 2.2758, + "step": 307790 + }, + { + "epoch": 0.72, + "grad_norm": 2.59375, + "learning_rate": 0.00014196018935165158, + "loss": 2.1411, + "step": 307795 + }, + { + "epoch": 0.72, + "grad_norm": 2.578125, + "learning_rate": 0.0001419585116350848, + "loss": 2.0269, + "step": 307800 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014195683390418425, + "loss": 2.077, + "step": 307805 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014195515615895047, + "loss": 2.0756, + "step": 307810 + }, + { + "epoch": 0.72, + "grad_norm": 1.953125, + "learning_rate": 0.00014195347839938407, + "loss": 2.0107, + "step": 307815 + }, + { + "epoch": 0.72, + "grad_norm": 1.9296875, + "learning_rate": 0.00014195180062548558, + "loss": 2.1103, + "step": 307820 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014195012283725562, + "loss": 2.1362, + "step": 307825 + }, + { + "epoch": 0.72, + "grad_norm": 2.59375, + "learning_rate": 0.0001419484450346947, + "loss": 2.0683, + "step": 307830 + }, + { + "epoch": 0.72, + "grad_norm": 1.9609375, + "learning_rate": 0.00014194676721780347, + "loss": 1.9119, + "step": 307835 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.00014194508938658243, + "loss": 2.042, + "step": 307840 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014194341154103222, + "loss": 2.123, + "step": 307845 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014194173368115335, + "loss": 2.173, + "step": 307850 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.00014194005580694643, + "loss": 1.9493, + "step": 307855 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014193837791841204, + "loss": 2.0162, + "step": 307860 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014193670001555074, + "loss": 2.1068, + "step": 307865 + }, + { + "epoch": 0.72, + "grad_norm": 2.0, + "learning_rate": 0.00014193502209836306, + "loss": 2.07, + "step": 307870 + }, + { + "epoch": 0.72, + "grad_norm": 2.34375, + "learning_rate": 0.00014193334416684968, + "loss": 1.9413, + "step": 307875 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.0001419316662210111, + "loss": 2.2146, + "step": 307880 + }, + { + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 0.00014192998826084783, + "loss": 2.0751, + "step": 307885 + }, + { + "epoch": 0.72, + "grad_norm": 2.5, + "learning_rate": 0.00014192831028636058, + "loss": 2.0957, + "step": 307890 + }, + { + "epoch": 0.72, + "grad_norm": 2.046875, + "learning_rate": 0.00014192663229754983, + "loss": 2.0452, + "step": 307895 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.0001419249542944162, + "loss": 2.1321, + "step": 307900 + }, + { + "epoch": 0.72, + "grad_norm": 1.953125, + "learning_rate": 0.00014192327627696028, + "loss": 1.9427, + "step": 307905 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014192159824518257, + "loss": 1.9236, + "step": 307910 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.00014191992019908363, + "loss": 2.0897, + "step": 307915 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.00014191824213866415, + "loss": 2.1245, + "step": 307920 + }, + { + "epoch": 0.72, + "grad_norm": 1.9140625, + "learning_rate": 0.00014191656406392465, + "loss": 2.0426, + "step": 307925 + }, + { + "epoch": 0.72, + "grad_norm": 2.28125, + "learning_rate": 0.00014191488597486567, + "loss": 2.1568, + "step": 307930 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.0001419132078714878, + "loss": 2.0573, + "step": 307935 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.00014191152975379163, + "loss": 1.9785, + "step": 307940 + }, + { + "epoch": 0.72, + "grad_norm": 1.9921875, + "learning_rate": 0.0001419098516217777, + "loss": 2.0116, + "step": 307945 + }, + { + "epoch": 0.72, + "grad_norm": 2.078125, + "learning_rate": 0.0001419081734754466, + "loss": 1.9184, + "step": 307950 + }, + { + "epoch": 0.72, + "grad_norm": 2.59375, + "learning_rate": 0.00014190649531479893, + "loss": 2.0956, + "step": 307955 + }, + { + "epoch": 0.72, + "grad_norm": 2.125, + "learning_rate": 0.00014190481713983522, + "loss": 2.11, + "step": 307960 + }, + { + "epoch": 0.72, + "grad_norm": 2.015625, + "learning_rate": 0.00014190313895055608, + "loss": 2.0874, + "step": 307965 + }, + { + "epoch": 0.72, + "grad_norm": 1.8984375, + "learning_rate": 0.0001419014607469621, + "loss": 1.8444, + "step": 307970 + }, + { + "epoch": 0.72, + "grad_norm": 2.1875, + "learning_rate": 0.00014189978252905378, + "loss": 2.1079, + "step": 307975 + }, + { + "epoch": 0.72, + "grad_norm": 1.96875, + "learning_rate": 0.00014189810429683175, + "loss": 1.9769, + "step": 307980 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014189642605029655, + "loss": 2.1337, + "step": 307985 + }, + { + "epoch": 0.72, + "grad_norm": 2.265625, + "learning_rate": 0.0001418947477894488, + "loss": 2.1529, + "step": 307990 + }, + { + "epoch": 0.72, + "grad_norm": 2.234375, + "learning_rate": 0.000141893069514289, + "loss": 1.9985, + "step": 307995 + }, + { + "epoch": 0.72, + "grad_norm": 2.21875, + "learning_rate": 0.0001418913912248178, + "loss": 1.9396, + "step": 308000 + }, + { + "epoch": 0.72, + "grad_norm": 2.15625, + "learning_rate": 0.00014188971292103573, + "loss": 1.9756, + "step": 308005 + }, + { + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 0.00014188803460294338, + "loss": 2.1082, + "step": 308010 + }, + { + "epoch": 0.72, + "grad_norm": 1.9921875, + "learning_rate": 0.0001418863562705413, + "loss": 1.9945, + "step": 308015 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014188467792383014, + "loss": 2.071, + "step": 308020 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.00014188299956281034, + "loss": 2.0529, + "step": 308025 + }, + { + "epoch": 0.72, + "grad_norm": 1.90625, + "learning_rate": 0.0001418813211874826, + "loss": 1.9986, + "step": 308030 + }, + { + "epoch": 0.72, + "grad_norm": 1.796875, + "learning_rate": 0.0001418796427978474, + "loss": 2.1705, + "step": 308035 + }, + { + "epoch": 0.72, + "grad_norm": 2.375, + "learning_rate": 0.0001418779643939054, + "loss": 1.8616, + "step": 308040 + }, + { + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 0.0001418762859756571, + "loss": 2.214, + "step": 308045 + }, + { + "epoch": 0.72, + "grad_norm": 2.25, + "learning_rate": 0.0001418746075431031, + "loss": 2.045, + "step": 308050 + }, + { + "epoch": 0.72, + "grad_norm": 2.171875, + "learning_rate": 0.00014187292909624398, + "loss": 2.0789, + "step": 308055 + }, + { + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 0.0001418712506350803, + "loss": 2.1661, + "step": 308060 + }, + { + "epoch": 0.72, + "grad_norm": 2.328125, + "learning_rate": 0.00014186957215961266, + "loss": 2.1239, + "step": 308065 + }, + { + "epoch": 0.72, + "grad_norm": 2.140625, + "learning_rate": 0.0001418678936698416, + "loss": 2.1894, + "step": 308070 + }, + { + "epoch": 0.72, + "grad_norm": 2.0625, + "learning_rate": 0.0001418662151657677, + "loss": 2.1172, + "step": 308075 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014186453664739155, + "loss": 2.0888, + "step": 308080 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.00014186285811471373, + "loss": 2.0477, + "step": 308085 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.0001418611795677348, + "loss": 2.1378, + "step": 308090 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.0001418595010064553, + "loss": 2.0086, + "step": 308095 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.00014185782243087585, + "loss": 2.0966, + "step": 308100 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.00014185614384099702, + "loss": 1.9019, + "step": 308105 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.00014185446523681935, + "loss": 2.0056, + "step": 308110 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014185278661834347, + "loss": 2.1476, + "step": 308115 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.0001418511079855699, + "loss": 2.0993, + "step": 308120 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.00014184942933849923, + "loss": 2.1496, + "step": 308125 + }, + { + "epoch": 0.73, + "grad_norm": 1.9453125, + "learning_rate": 0.00014184775067713204, + "loss": 2.1136, + "step": 308130 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014184607200146888, + "loss": 2.1856, + "step": 308135 + }, + { + "epoch": 0.73, + "grad_norm": 1.9453125, + "learning_rate": 0.00014184439331151036, + "loss": 1.975, + "step": 308140 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014184271460725704, + "loss": 2.1651, + "step": 308145 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014184103588870948, + "loss": 1.8362, + "step": 308150 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.0001418393571558683, + "loss": 2.208, + "step": 308155 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.000141837678408734, + "loss": 1.8671, + "step": 308160 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.0001418359996473072, + "loss": 1.9932, + "step": 308165 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014183432087158844, + "loss": 2.0188, + "step": 308170 + }, + { + "epoch": 0.73, + "grad_norm": 2.515625, + "learning_rate": 0.00014183264208157836, + "loss": 2.0217, + "step": 308175 + }, + { + "epoch": 0.73, + "grad_norm": 2.578125, + "learning_rate": 0.00014183096327727746, + "loss": 2.3162, + "step": 308180 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.00014182928445868634, + "loss": 1.8573, + "step": 308185 + }, + { + "epoch": 0.73, + "grad_norm": 1.84375, + "learning_rate": 0.0001418276056258056, + "loss": 2.07, + "step": 308190 + }, + { + "epoch": 0.73, + "grad_norm": 1.78125, + "learning_rate": 0.00014182592677863576, + "loss": 2.1616, + "step": 308195 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014182424791717749, + "loss": 2.0807, + "step": 308200 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014182256904143124, + "loss": 2.009, + "step": 308205 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014182089015139763, + "loss": 2.1523, + "step": 308210 + }, + { + "epoch": 0.73, + "grad_norm": 2.0, + "learning_rate": 0.00014181921124707726, + "loss": 2.1681, + "step": 308215 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014181753232847072, + "loss": 2.1335, + "step": 308220 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014181585339557854, + "loss": 2.1431, + "step": 308225 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014181417444840132, + "loss": 1.9933, + "step": 308230 + }, + { + "epoch": 0.73, + "grad_norm": 1.8984375, + "learning_rate": 0.00014181249548693957, + "loss": 2.2013, + "step": 308235 + }, + { + "epoch": 0.73, + "grad_norm": 2.484375, + "learning_rate": 0.00014181081651119397, + "loss": 2.1446, + "step": 308240 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.000141809137521165, + "loss": 1.9853, + "step": 308245 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.0001418074585168533, + "loss": 1.9958, + "step": 308250 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014180577949825937, + "loss": 2.1646, + "step": 308255 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014180410046538386, + "loss": 2.064, + "step": 308260 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.0001418024214182273, + "loss": 1.9802, + "step": 308265 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014180074235679027, + "loss": 2.113, + "step": 308270 + }, + { + "epoch": 0.73, + "grad_norm": 2.484375, + "learning_rate": 0.00014179906328107338, + "loss": 1.9463, + "step": 308275 + }, + { + "epoch": 0.73, + "grad_norm": 1.9765625, + "learning_rate": 0.00014179738419107713, + "loss": 1.9172, + "step": 308280 + }, + { + "epoch": 0.73, + "grad_norm": 2.46875, + "learning_rate": 0.00014179570508680216, + "loss": 2.1901, + "step": 308285 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.000141794025968249, + "loss": 2.069, + "step": 308290 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014179234683541826, + "loss": 2.1296, + "step": 308295 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.0001417906676883105, + "loss": 2.2515, + "step": 308300 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014178898852692625, + "loss": 1.9494, + "step": 308305 + }, + { + "epoch": 0.73, + "grad_norm": 1.9765625, + "learning_rate": 0.0001417873093512662, + "loss": 2.045, + "step": 308310 + }, + { + "epoch": 0.73, + "grad_norm": 1.9296875, + "learning_rate": 0.0001417856301613308, + "loss": 2.0793, + "step": 308315 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014178395095712065, + "loss": 2.1179, + "step": 308320 + }, + { + "epoch": 0.73, + "grad_norm": 2.515625, + "learning_rate": 0.00014178227173863636, + "loss": 2.2616, + "step": 308325 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014178059250587852, + "loss": 1.9005, + "step": 308330 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014177891325884765, + "loss": 1.9414, + "step": 308335 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.00014177723399754434, + "loss": 2.0322, + "step": 308340 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014177555472196914, + "loss": 1.8751, + "step": 308345 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.0001417738754321227, + "loss": 2.1964, + "step": 308350 + }, + { + "epoch": 0.73, + "grad_norm": 1.765625, + "learning_rate": 0.00014177219612800557, + "loss": 1.8828, + "step": 308355 + }, + { + "epoch": 0.73, + "grad_norm": 1.9921875, + "learning_rate": 0.00014177051680961826, + "loss": 2.2443, + "step": 308360 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014176883747696136, + "loss": 1.9601, + "step": 308365 + }, + { + "epoch": 0.73, + "grad_norm": 1.9375, + "learning_rate": 0.0001417671581300355, + "loss": 2.0808, + "step": 308370 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014176547876884122, + "loss": 1.8701, + "step": 308375 + }, + { + "epoch": 0.73, + "grad_norm": 1.921875, + "learning_rate": 0.0001417637993933791, + "loss": 2.0831, + "step": 308380 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.00014176212000364968, + "loss": 2.1975, + "step": 308385 + }, + { + "epoch": 0.73, + "grad_norm": 2.46875, + "learning_rate": 0.00014176044059965357, + "loss": 1.9129, + "step": 308390 + }, + { + "epoch": 0.73, + "grad_norm": 2.515625, + "learning_rate": 0.00014175876118139138, + "loss": 2.3301, + "step": 308395 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.0001417570817488636, + "loss": 2.1304, + "step": 308400 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014175540230207086, + "loss": 2.0708, + "step": 308405 + }, + { + "epoch": 0.73, + "grad_norm": 1.9921875, + "learning_rate": 0.00014175372284101369, + "loss": 1.9443, + "step": 308410 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.0001417520433656927, + "loss": 2.1282, + "step": 308415 + }, + { + "epoch": 0.73, + "grad_norm": 2.484375, + "learning_rate": 0.00014175036387610847, + "loss": 2.0165, + "step": 308420 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.00014174868437226155, + "loss": 2.2265, + "step": 308425 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014174700485415254, + "loss": 2.0626, + "step": 308430 + }, + { + "epoch": 0.73, + "grad_norm": 2.0, + "learning_rate": 0.00014174532532178197, + "loss": 2.0242, + "step": 308435 + }, + { + "epoch": 0.73, + "grad_norm": 2.671875, + "learning_rate": 0.00014174364577515045, + "loss": 2.204, + "step": 308440 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.00014174196621425856, + "loss": 1.9903, + "step": 308445 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014174028663910683, + "loss": 2.0076, + "step": 308450 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014173860704969586, + "loss": 2.1546, + "step": 308455 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014173692744602625, + "loss": 2.0079, + "step": 308460 + }, + { + "epoch": 0.73, + "grad_norm": 1.859375, + "learning_rate": 0.00014173524782809852, + "loss": 1.8796, + "step": 308465 + }, + { + "epoch": 0.73, + "grad_norm": 2.609375, + "learning_rate": 0.0001417335681959133, + "loss": 2.1328, + "step": 308470 + }, + { + "epoch": 0.73, + "grad_norm": 1.6875, + "learning_rate": 0.00014173188854947114, + "loss": 1.8814, + "step": 308475 + }, + { + "epoch": 0.73, + "grad_norm": 1.7578125, + "learning_rate": 0.00014173020888877259, + "loss": 2.0342, + "step": 308480 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014172852921381823, + "loss": 1.957, + "step": 308485 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014172684952460868, + "loss": 1.9217, + "step": 308490 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014172516982114447, + "loss": 2.1634, + "step": 308495 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.0001417234901034262, + "loss": 2.1081, + "step": 308500 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014172181037145438, + "loss": 2.2182, + "step": 308505 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014172013062522967, + "loss": 2.1414, + "step": 308510 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.0001417184508647526, + "loss": 2.1312, + "step": 308515 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014171677109002376, + "loss": 2.3455, + "step": 308520 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014171509130104373, + "loss": 2.0831, + "step": 308525 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014171341149781303, + "loss": 1.9672, + "step": 308530 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.0001417117316803323, + "loss": 2.016, + "step": 308535 + }, + { + "epoch": 0.73, + "grad_norm": 1.984375, + "learning_rate": 0.00014171005184860207, + "loss": 1.9579, + "step": 308540 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014170837200262293, + "loss": 2.0024, + "step": 308545 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.00014170669214239544, + "loss": 2.0647, + "step": 308550 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.0001417050122679202, + "loss": 2.1378, + "step": 308555 + }, + { + "epoch": 0.73, + "grad_norm": 1.9453125, + "learning_rate": 0.00014170333237919778, + "loss": 2.0357, + "step": 308560 + }, + { + "epoch": 0.73, + "grad_norm": 2.546875, + "learning_rate": 0.00014170165247622874, + "loss": 2.085, + "step": 308565 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.00014169997255901367, + "loss": 2.1297, + "step": 308570 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014169829262755313, + "loss": 2.1373, + "step": 308575 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014169661268184769, + "loss": 1.9733, + "step": 308580 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.00014169493272189794, + "loss": 2.0616, + "step": 308585 + }, + { + "epoch": 0.73, + "grad_norm": 2.671875, + "learning_rate": 0.0001416932527477044, + "loss": 2.0784, + "step": 308590 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014169157275926774, + "loss": 2.0703, + "step": 308595 + }, + { + "epoch": 0.73, + "grad_norm": 1.9921875, + "learning_rate": 0.00014168989275658845, + "loss": 1.965, + "step": 308600 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014168821273966717, + "loss": 2.0358, + "step": 308605 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.00014168653270850445, + "loss": 2.1016, + "step": 308610 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.00014168485266310082, + "loss": 1.9742, + "step": 308615 + }, + { + "epoch": 0.73, + "grad_norm": 2.0, + "learning_rate": 0.00014168317260345687, + "loss": 1.8794, + "step": 308620 + }, + { + "epoch": 0.73, + "grad_norm": 1.859375, + "learning_rate": 0.00014168149252957323, + "loss": 2.0351, + "step": 308625 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014167981244145046, + "loss": 2.2531, + "step": 308630 + }, + { + "epoch": 0.73, + "grad_norm": 2.46875, + "learning_rate": 0.00014167813233908908, + "loss": 2.0755, + "step": 308635 + }, + { + "epoch": 0.73, + "grad_norm": 2.484375, + "learning_rate": 0.00014167645222248963, + "loss": 1.982, + "step": 308640 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014167477209165282, + "loss": 2.0269, + "step": 308645 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014167309194657916, + "loss": 2.1454, + "step": 308650 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.0001416714117872692, + "loss": 2.1731, + "step": 308655 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014166973161372352, + "loss": 2.0906, + "step": 308660 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.0001416680514259427, + "loss": 2.2052, + "step": 308665 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.0001416663712239273, + "loss": 2.1899, + "step": 308670 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014166469100767798, + "loss": 1.9303, + "step": 308675 + }, + { + "epoch": 0.73, + "grad_norm": 1.8828125, + "learning_rate": 0.0001416630107771952, + "loss": 2.0164, + "step": 308680 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.0001416613305324796, + "loss": 1.8905, + "step": 308685 + }, + { + "epoch": 0.73, + "grad_norm": 1.9765625, + "learning_rate": 0.00014165965027353172, + "loss": 2.0525, + "step": 308690 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014165797000035215, + "loss": 2.0695, + "step": 308695 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014165628971294145, + "loss": 2.0872, + "step": 308700 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014165460941130021, + "loss": 2.0565, + "step": 308705 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.000141652929095429, + "loss": 2.0491, + "step": 308710 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.0001416512487653284, + "loss": 1.7744, + "step": 308715 + }, + { + "epoch": 0.73, + "grad_norm": 3.4375, + "learning_rate": 0.000141649568420999, + "loss": 1.9187, + "step": 308720 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014164788806244133, + "loss": 2.0944, + "step": 308725 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014164620768965597, + "loss": 1.9678, + "step": 308730 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.0001416445273026435, + "loss": 2.2234, + "step": 308735 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014164284690140454, + "loss": 1.9943, + "step": 308740 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.0001416411664859396, + "loss": 2.0335, + "step": 308745 + }, + { + "epoch": 0.73, + "grad_norm": 1.8359375, + "learning_rate": 0.00014163948605624933, + "loss": 2.0915, + "step": 308750 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.0001416378056123342, + "loss": 2.199, + "step": 308755 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.0001416361251541949, + "loss": 1.9705, + "step": 308760 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014163444468183193, + "loss": 2.2712, + "step": 308765 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014163276419524584, + "loss": 2.1422, + "step": 308770 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014163108369443726, + "loss": 2.181, + "step": 308775 + }, + { + "epoch": 0.73, + "grad_norm": 2.59375, + "learning_rate": 0.00014162940317940674, + "loss": 2.1475, + "step": 308780 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.00014162772265015488, + "loss": 2.055, + "step": 308785 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014162604210668224, + "loss": 2.0824, + "step": 308790 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014162436154898937, + "loss": 2.0578, + "step": 308795 + }, + { + "epoch": 0.73, + "grad_norm": 2.90625, + "learning_rate": 0.00014162268097707686, + "loss": 2.1348, + "step": 308800 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014162100039094528, + "loss": 1.8607, + "step": 308805 + }, + { + "epoch": 0.73, + "grad_norm": 3.03125, + "learning_rate": 0.00014161931979059524, + "loss": 2.0658, + "step": 308810 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.0001416176391760273, + "loss": 1.8439, + "step": 308815 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014161595854724198, + "loss": 2.0698, + "step": 308820 + }, + { + "epoch": 0.73, + "grad_norm": 2.765625, + "learning_rate": 0.00014161427790423988, + "loss": 2.0879, + "step": 308825 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.00014161259724702164, + "loss": 2.1598, + "step": 308830 + }, + { + "epoch": 0.73, + "grad_norm": 2.625, + "learning_rate": 0.00014161091657558775, + "loss": 2.152, + "step": 308835 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014160923588993883, + "loss": 2.207, + "step": 308840 + }, + { + "epoch": 0.73, + "grad_norm": 2.65625, + "learning_rate": 0.00014160755519007542, + "loss": 2.336, + "step": 308845 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014160587447599815, + "loss": 2.0893, + "step": 308850 + }, + { + "epoch": 0.73, + "grad_norm": 2.65625, + "learning_rate": 0.0001416041937477075, + "loss": 1.9451, + "step": 308855 + }, + { + "epoch": 0.73, + "grad_norm": 1.828125, + "learning_rate": 0.00014160251300520416, + "loss": 2.1173, + "step": 308860 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.0001416008322484886, + "loss": 2.0965, + "step": 308865 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.0001415991514775615, + "loss": 2.1907, + "step": 308870 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014159747069242333, + "loss": 1.9255, + "step": 308875 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014159578989307472, + "loss": 2.0638, + "step": 308880 + }, + { + "epoch": 0.73, + "grad_norm": 1.9453125, + "learning_rate": 0.0001415941090795162, + "loss": 2.199, + "step": 308885 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.00014159242825174844, + "loss": 2.0251, + "step": 308890 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014159074740977194, + "loss": 2.1067, + "step": 308895 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014158906655358728, + "loss": 2.0874, + "step": 308900 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014158738568319503, + "loss": 2.123, + "step": 308905 + }, + { + "epoch": 0.73, + "grad_norm": 2.421875, + "learning_rate": 0.00014158570479859577, + "loss": 2.1167, + "step": 308910 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.0001415840238997901, + "loss": 1.9579, + "step": 308915 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014158234298677856, + "loss": 2.0642, + "step": 308920 + }, + { + "epoch": 0.73, + "grad_norm": 2.515625, + "learning_rate": 0.00014158066205956175, + "loss": 2.0618, + "step": 308925 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.00014157898111814021, + "loss": 2.0968, + "step": 308930 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014157730016251452, + "loss": 2.1109, + "step": 308935 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014157561919268528, + "loss": 2.0617, + "step": 308940 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.0001415739382086531, + "loss": 2.089, + "step": 308945 + }, + { + "epoch": 0.73, + "grad_norm": 2.484375, + "learning_rate": 0.00014157225721041848, + "loss": 2.2277, + "step": 308950 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014157057619798203, + "loss": 1.9504, + "step": 308955 + }, + { + "epoch": 0.73, + "grad_norm": 2.546875, + "learning_rate": 0.0001415688951713443, + "loss": 1.967, + "step": 308960 + }, + { + "epoch": 0.73, + "grad_norm": 2.421875, + "learning_rate": 0.00014156721413050586, + "loss": 2.2124, + "step": 308965 + }, + { + "epoch": 0.73, + "grad_norm": 1.9921875, + "learning_rate": 0.00014156553307546736, + "loss": 2.2691, + "step": 308970 + }, + { + "epoch": 0.73, + "grad_norm": 1.921875, + "learning_rate": 0.0001415638520062293, + "loss": 1.9947, + "step": 308975 + }, + { + "epoch": 0.73, + "grad_norm": 2.0, + "learning_rate": 0.0001415621709227923, + "loss": 1.8837, + "step": 308980 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.00014156048982515686, + "loss": 2.1334, + "step": 308985 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.00014155880871332362, + "loss": 2.0356, + "step": 308990 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014155712758729313, + "loss": 2.0894, + "step": 308995 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.000141555446447066, + "loss": 1.8553, + "step": 309000 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.00014155376529264275, + "loss": 2.1614, + "step": 309005 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.000141552084124024, + "loss": 2.0444, + "step": 309010 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014155040294121026, + "loss": 1.8577, + "step": 309015 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014154872174420218, + "loss": 2.2832, + "step": 309020 + }, + { + "epoch": 0.73, + "grad_norm": 2.5, + "learning_rate": 0.00014154704053300033, + "loss": 2.0673, + "step": 309025 + }, + { + "epoch": 0.73, + "grad_norm": 2.46875, + "learning_rate": 0.0001415453593076052, + "loss": 2.0492, + "step": 309030 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014154367806801748, + "loss": 2.1357, + "step": 309035 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014154199681423764, + "loss": 2.1556, + "step": 309040 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.0001415403155462663, + "loss": 2.0067, + "step": 309045 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014153863426410407, + "loss": 2.0231, + "step": 309050 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.0001415369529677515, + "loss": 2.0981, + "step": 309055 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014153527165720913, + "loss": 2.1488, + "step": 309060 + }, + { + "epoch": 0.73, + "grad_norm": 1.96875, + "learning_rate": 0.00014153359033247754, + "loss": 2.0406, + "step": 309065 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014153190899355732, + "loss": 1.9992, + "step": 309070 + }, + { + "epoch": 0.73, + "grad_norm": 1.8046875, + "learning_rate": 0.0001415302276404491, + "loss": 1.9487, + "step": 309075 + }, + { + "epoch": 0.73, + "grad_norm": 3.03125, + "learning_rate": 0.00014152854627315334, + "loss": 1.9964, + "step": 309080 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014152686489167073, + "loss": 2.0403, + "step": 309085 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014152518349600174, + "loss": 2.1361, + "step": 309090 + }, + { + "epoch": 0.73, + "grad_norm": 2.0, + "learning_rate": 0.000141523502086147, + "loss": 1.8395, + "step": 309095 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.0001415218206621071, + "loss": 1.8927, + "step": 309100 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.0001415201392238826, + "loss": 1.8661, + "step": 309105 + }, + { + "epoch": 0.73, + "grad_norm": 1.9296875, + "learning_rate": 0.00014151845777147406, + "loss": 2.0881, + "step": 309110 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014151677630488204, + "loss": 2.1383, + "step": 309115 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014151509482410713, + "loss": 2.0076, + "step": 309120 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014151341332914994, + "loss": 2.0311, + "step": 309125 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014151173182001104, + "loss": 2.2378, + "step": 309130 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014151005029669093, + "loss": 1.9951, + "step": 309135 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014150836875919027, + "loss": 2.2302, + "step": 309140 + }, + { + "epoch": 0.73, + "grad_norm": 1.96875, + "learning_rate": 0.00014150668720750958, + "loss": 1.9963, + "step": 309145 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014150500564164944, + "loss": 2.1928, + "step": 309150 + }, + { + "epoch": 0.73, + "grad_norm": 1.75, + "learning_rate": 0.00014150332406161044, + "loss": 1.9875, + "step": 309155 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.00014150164246739318, + "loss": 2.1267, + "step": 309160 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014149996085899817, + "loss": 1.9665, + "step": 309165 + }, + { + "epoch": 0.73, + "grad_norm": 1.953125, + "learning_rate": 0.00014149827923642605, + "loss": 2.0251, + "step": 309170 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.00014149659759967736, + "loss": 2.0322, + "step": 309175 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014149491594875265, + "loss": 2.0473, + "step": 309180 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014149323428365256, + "loss": 1.8494, + "step": 309185 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.0001414915526043776, + "loss": 1.9241, + "step": 309190 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.0001414898709109284, + "loss": 1.9742, + "step": 309195 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.0001414881892033055, + "loss": 1.9935, + "step": 309200 + }, + { + "epoch": 0.73, + "grad_norm": 1.96875, + "learning_rate": 0.00014148650748150947, + "loss": 1.9344, + "step": 309205 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014148482574554092, + "loss": 2.0839, + "step": 309210 + }, + { + "epoch": 0.73, + "grad_norm": 2.609375, + "learning_rate": 0.00014148314399540037, + "loss": 2.1187, + "step": 309215 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014148146223108847, + "loss": 2.0093, + "step": 309220 + }, + { + "epoch": 0.73, + "grad_norm": 1.984375, + "learning_rate": 0.00014147978045260572, + "loss": 1.9449, + "step": 309225 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.0001414780986599527, + "loss": 2.0988, + "step": 309230 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014147641685313006, + "loss": 2.0427, + "step": 309235 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.0001414747350321383, + "loss": 2.1225, + "step": 309240 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.00014147305319697802, + "loss": 1.9709, + "step": 309245 + }, + { + "epoch": 0.73, + "grad_norm": 2.609375, + "learning_rate": 0.0001414713713476498, + "loss": 1.955, + "step": 309250 + }, + { + "epoch": 0.73, + "grad_norm": 2.0, + "learning_rate": 0.00014146968948415417, + "loss": 2.033, + "step": 309255 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014146800760649178, + "loss": 2.0924, + "step": 309260 + }, + { + "epoch": 0.73, + "grad_norm": 1.8828125, + "learning_rate": 0.00014146632571466317, + "loss": 1.9762, + "step": 309265 + }, + { + "epoch": 0.73, + "grad_norm": 1.8828125, + "learning_rate": 0.00014146464380866892, + "loss": 2.1541, + "step": 309270 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.00014146296188850954, + "loss": 1.9964, + "step": 309275 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.0001414612799541857, + "loss": 1.9357, + "step": 309280 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014145959800569794, + "loss": 2.0604, + "step": 309285 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.0001414579160430468, + "loss": 2.0698, + "step": 309290 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014145623406623293, + "loss": 2.129, + "step": 309295 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014145455207525683, + "loss": 1.9774, + "step": 309300 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.0001414528700701191, + "loss": 2.0647, + "step": 309305 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014145118805082032, + "loss": 2.1046, + "step": 309310 + }, + { + "epoch": 0.73, + "grad_norm": 1.8125, + "learning_rate": 0.00014144950601736108, + "loss": 1.9494, + "step": 309315 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.00014144782396974189, + "loss": 1.9881, + "step": 309320 + }, + { + "epoch": 0.73, + "grad_norm": 2.46875, + "learning_rate": 0.00014144614190796342, + "loss": 2.1433, + "step": 309325 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014144445983202617, + "loss": 2.115, + "step": 309330 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014144277774193076, + "loss": 2.0635, + "step": 309335 + }, + { + "epoch": 0.73, + "grad_norm": 2.5, + "learning_rate": 0.0001414410956376777, + "loss": 2.0376, + "step": 309340 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.0001414394135192677, + "loss": 2.0419, + "step": 309345 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.00014143773138670116, + "loss": 2.0365, + "step": 309350 + }, + { + "epoch": 0.73, + "grad_norm": 1.84375, + "learning_rate": 0.00014143604923997879, + "loss": 2.0086, + "step": 309355 + }, + { + "epoch": 0.73, + "grad_norm": 2.890625, + "learning_rate": 0.0001414343670791011, + "loss": 2.143, + "step": 309360 + }, + { + "epoch": 0.73, + "grad_norm": 1.9609375, + "learning_rate": 0.00014143268490406864, + "loss": 2.0886, + "step": 309365 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014143100271488205, + "loss": 2.0092, + "step": 309370 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.00014142932051154189, + "loss": 1.9458, + "step": 309375 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.00014142763829404876, + "loss": 2.088, + "step": 309380 + }, + { + "epoch": 0.73, + "grad_norm": 1.8828125, + "learning_rate": 0.00014142595606240314, + "loss": 2.0591, + "step": 309385 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014142427381660567, + "loss": 1.7999, + "step": 309390 + }, + { + "epoch": 0.73, + "grad_norm": 1.96875, + "learning_rate": 0.00014142259155665692, + "loss": 2.1118, + "step": 309395 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014142090928255746, + "loss": 2.0561, + "step": 309400 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.0001414192269943079, + "loss": 1.9729, + "step": 309405 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014141754469190873, + "loss": 1.9541, + "step": 309410 + }, + { + "epoch": 0.73, + "grad_norm": 2.515625, + "learning_rate": 0.00014141586237536056, + "loss": 2.1902, + "step": 309415 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.00014141418004466407, + "loss": 1.9393, + "step": 309420 + }, + { + "epoch": 0.73, + "grad_norm": 3.109375, + "learning_rate": 0.0001414124976998197, + "loss": 1.9575, + "step": 309425 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014141081534082805, + "loss": 2.0718, + "step": 309430 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.00014140913296768973, + "loss": 2.1135, + "step": 309435 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014140745058040529, + "loss": 2.1708, + "step": 309440 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.00014140576817897534, + "loss": 1.9195, + "step": 309445 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014140408576340037, + "loss": 2.1304, + "step": 309450 + }, + { + "epoch": 0.73, + "grad_norm": 2.671875, + "learning_rate": 0.00014140240333368107, + "loss": 2.1389, + "step": 309455 + }, + { + "epoch": 0.73, + "grad_norm": 2.46875, + "learning_rate": 0.00014140072088981794, + "loss": 2.0326, + "step": 309460 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014139903843181156, + "loss": 2.0156, + "step": 309465 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014139735595966254, + "loss": 2.1072, + "step": 309470 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014139567347337144, + "loss": 2.0909, + "step": 309475 + }, + { + "epoch": 0.73, + "grad_norm": 2.5625, + "learning_rate": 0.0001413939909729388, + "loss": 2.1726, + "step": 309480 + }, + { + "epoch": 0.73, + "grad_norm": 2.703125, + "learning_rate": 0.00014139230845836525, + "loss": 2.0175, + "step": 309485 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014139062592965132, + "loss": 2.0015, + "step": 309490 + }, + { + "epoch": 0.73, + "grad_norm": 1.984375, + "learning_rate": 0.0001413889433867976, + "loss": 2.0649, + "step": 309495 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014138726082980465, + "loss": 2.0011, + "step": 309500 + }, + { + "epoch": 0.73, + "grad_norm": 2.578125, + "learning_rate": 0.0001413855782586731, + "loss": 1.9747, + "step": 309505 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014138389567340344, + "loss": 2.1428, + "step": 309510 + }, + { + "epoch": 0.73, + "grad_norm": 2.65625, + "learning_rate": 0.00014138221307399636, + "loss": 2.1136, + "step": 309515 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.0001413805304604523, + "loss": 2.1428, + "step": 309520 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.0001413788478327719, + "loss": 2.1433, + "step": 309525 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014137716519095576, + "loss": 1.9556, + "step": 309530 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014137548253500442, + "loss": 1.9537, + "step": 309535 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.0001413737998649185, + "loss": 1.9592, + "step": 309540 + }, + { + "epoch": 0.73, + "grad_norm": 1.75, + "learning_rate": 0.00014137211718069848, + "loss": 1.9271, + "step": 309545 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014137043448234505, + "loss": 2.0909, + "step": 309550 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.0001413687517698587, + "loss": 2.1084, + "step": 309555 + }, + { + "epoch": 0.73, + "grad_norm": 1.5546875, + "learning_rate": 0.00014136706904324003, + "loss": 2.2522, + "step": 309560 + }, + { + "epoch": 0.73, + "grad_norm": 1.9921875, + "learning_rate": 0.00014136538630248963, + "loss": 2.2584, + "step": 309565 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014136370354760807, + "loss": 2.0604, + "step": 309570 + }, + { + "epoch": 0.73, + "grad_norm": 1.96875, + "learning_rate": 0.0001413620207785959, + "loss": 1.9303, + "step": 309575 + }, + { + "epoch": 0.73, + "grad_norm": 2.5625, + "learning_rate": 0.00014136033799545374, + "loss": 2.1378, + "step": 309580 + }, + { + "epoch": 0.73, + "grad_norm": 1.9296875, + "learning_rate": 0.00014135865519818212, + "loss": 1.985, + "step": 309585 + }, + { + "epoch": 0.73, + "grad_norm": 1.828125, + "learning_rate": 0.00014135697238678162, + "loss": 2.1098, + "step": 309590 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014135528956125287, + "loss": 2.078, + "step": 309595 + }, + { + "epoch": 0.73, + "grad_norm": 1.9609375, + "learning_rate": 0.00014135360672159637, + "loss": 2.1252, + "step": 309600 + }, + { + "epoch": 0.73, + "grad_norm": 2.609375, + "learning_rate": 0.00014135192386781272, + "loss": 2.1282, + "step": 309605 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014135024099990254, + "loss": 2.0878, + "step": 309610 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.0001413485581178663, + "loss": 2.2952, + "step": 309615 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014134687522170472, + "loss": 2.084, + "step": 309620 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014134519231141825, + "loss": 2.0577, + "step": 309625 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014134350938700753, + "loss": 1.9772, + "step": 309630 + }, + { + "epoch": 0.73, + "grad_norm": 1.8203125, + "learning_rate": 0.00014134182644847312, + "loss": 1.9254, + "step": 309635 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.0001413401434958156, + "loss": 1.9763, + "step": 309640 + }, + { + "epoch": 0.73, + "grad_norm": 1.90625, + "learning_rate": 0.0001413384605290355, + "loss": 1.9914, + "step": 309645 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014133677754813348, + "loss": 2.1289, + "step": 309650 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014133509455311005, + "loss": 1.9459, + "step": 309655 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014133341154396579, + "loss": 2.1704, + "step": 309660 + }, + { + "epoch": 0.73, + "grad_norm": 2.484375, + "learning_rate": 0.00014133172852070128, + "loss": 2.1741, + "step": 309665 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014133004548331712, + "loss": 2.083, + "step": 309670 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014132836243181386, + "loss": 2.1788, + "step": 309675 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.0001413266793661921, + "loss": 1.8915, + "step": 309680 + }, + { + "epoch": 0.73, + "grad_norm": 2.0, + "learning_rate": 0.00014132499628645236, + "loss": 2.2053, + "step": 309685 + }, + { + "epoch": 0.73, + "grad_norm": 2.0, + "learning_rate": 0.00014132331319259526, + "loss": 2.3304, + "step": 309690 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.0001413216300846214, + "loss": 2.1826, + "step": 309695 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.0001413199469625313, + "loss": 1.9743, + "step": 309700 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014131826382632556, + "loss": 2.0583, + "step": 309705 + }, + { + "epoch": 0.73, + "grad_norm": 2.671875, + "learning_rate": 0.00014131658067600473, + "loss": 2.0015, + "step": 309710 + }, + { + "epoch": 0.73, + "grad_norm": 1.671875, + "learning_rate": 0.00014131489751156943, + "loss": 2.1044, + "step": 309715 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014131321433302022, + "loss": 2.0903, + "step": 309720 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014131153114035766, + "loss": 2.0721, + "step": 309725 + }, + { + "epoch": 0.73, + "grad_norm": 1.9765625, + "learning_rate": 0.00014130984793358234, + "loss": 1.9553, + "step": 309730 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.0001413081647126948, + "loss": 2.0734, + "step": 309735 + }, + { + "epoch": 0.73, + "grad_norm": 2.484375, + "learning_rate": 0.00014130648147769566, + "loss": 2.2237, + "step": 309740 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014130479822858547, + "loss": 2.0596, + "step": 309745 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014130311496536482, + "loss": 2.2711, + "step": 309750 + }, + { + "epoch": 0.73, + "grad_norm": 2.703125, + "learning_rate": 0.00014130143168803426, + "loss": 2.0312, + "step": 309755 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.0001412997483965944, + "loss": 2.0623, + "step": 309760 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.00014129806509104577, + "loss": 2.0215, + "step": 309765 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.000141296381771389, + "loss": 2.1743, + "step": 309770 + }, + { + "epoch": 0.73, + "grad_norm": 2.546875, + "learning_rate": 0.0001412946984376246, + "loss": 2.1605, + "step": 309775 + }, + { + "epoch": 0.73, + "grad_norm": 2.484375, + "learning_rate": 0.00014129301508975322, + "loss": 2.0299, + "step": 309780 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.0001412913317277754, + "loss": 2.0316, + "step": 309785 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.0001412896483516917, + "loss": 2.1311, + "step": 309790 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.0001412879649615027, + "loss": 2.115, + "step": 309795 + }, + { + "epoch": 0.73, + "grad_norm": 1.953125, + "learning_rate": 0.000141286281557209, + "loss": 1.9487, + "step": 309800 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014128459813881114, + "loss": 2.0673, + "step": 309805 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014128291470630975, + "loss": 2.0647, + "step": 309810 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014128123125970533, + "loss": 2.0089, + "step": 309815 + }, + { + "epoch": 0.73, + "grad_norm": 2.65625, + "learning_rate": 0.00014127954779899848, + "loss": 1.9743, + "step": 309820 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.0001412778643241898, + "loss": 2.2316, + "step": 309825 + }, + { + "epoch": 0.73, + "grad_norm": 2.5, + "learning_rate": 0.0001412761808352799, + "loss": 2.3167, + "step": 309830 + }, + { + "epoch": 0.73, + "grad_norm": 2.0, + "learning_rate": 0.00014127449733226924, + "loss": 1.9494, + "step": 309835 + }, + { + "epoch": 0.73, + "grad_norm": 2.421875, + "learning_rate": 0.0001412728138151585, + "loss": 2.0158, + "step": 309840 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014127113028394818, + "loss": 2.1581, + "step": 309845 + }, + { + "epoch": 0.73, + "grad_norm": 2.5625, + "learning_rate": 0.00014126944673863894, + "loss": 1.9529, + "step": 309850 + }, + { + "epoch": 0.73, + "grad_norm": 1.96875, + "learning_rate": 0.0001412677631792313, + "loss": 2.0644, + "step": 309855 + }, + { + "epoch": 0.73, + "grad_norm": 2.546875, + "learning_rate": 0.00014126607960572585, + "loss": 2.0548, + "step": 309860 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014126439601812313, + "loss": 2.0673, + "step": 309865 + }, + { + "epoch": 0.73, + "grad_norm": 1.8203125, + "learning_rate": 0.00014126271241642378, + "loss": 2.0916, + "step": 309870 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.0001412610288006283, + "loss": 2.1034, + "step": 309875 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.0001412593451707373, + "loss": 2.1747, + "step": 309880 + }, + { + "epoch": 0.73, + "grad_norm": 1.921875, + "learning_rate": 0.0001412576615267514, + "loss": 2.0577, + "step": 309885 + }, + { + "epoch": 0.73, + "grad_norm": 1.828125, + "learning_rate": 0.00014125597786867112, + "loss": 1.9913, + "step": 309890 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014125429419649705, + "loss": 2.0143, + "step": 309895 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014125261051022977, + "loss": 1.8833, + "step": 309900 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.0001412509268098698, + "loss": 2.0259, + "step": 309905 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.00014124924309541782, + "loss": 1.8769, + "step": 309910 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014124755936687435, + "loss": 2.0665, + "step": 309915 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014124587562423994, + "loss": 1.8048, + "step": 309920 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.0001412441918675152, + "loss": 2.0347, + "step": 309925 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.0001412425080967007, + "loss": 2.0129, + "step": 309930 + }, + { + "epoch": 0.73, + "grad_norm": 2.578125, + "learning_rate": 0.000141240824311797, + "loss": 2.0882, + "step": 309935 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014123914051280471, + "loss": 2.0515, + "step": 309940 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.0001412374566997244, + "loss": 2.0215, + "step": 309945 + }, + { + "epoch": 0.73, + "grad_norm": 1.984375, + "learning_rate": 0.0001412357728725566, + "loss": 2.0512, + "step": 309950 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.00014123408903130187, + "loss": 2.0495, + "step": 309955 + }, + { + "epoch": 0.73, + "grad_norm": 1.9375, + "learning_rate": 0.00014123240517596085, + "loss": 2.1959, + "step": 309960 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.00014123072130653412, + "loss": 2.199, + "step": 309965 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.00014122903742302224, + "loss": 1.8983, + "step": 309970 + }, + { + "epoch": 0.73, + "grad_norm": 1.6796875, + "learning_rate": 0.00014122735352542574, + "loss": 1.9094, + "step": 309975 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.0001412256696137452, + "loss": 2.0158, + "step": 309980 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014122398568798127, + "loss": 2.018, + "step": 309985 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.0001412223017481345, + "loss": 1.9216, + "step": 309990 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.00014122061779420542, + "loss": 2.1633, + "step": 309995 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.00014121893382619462, + "loss": 1.991, + "step": 310000 + }, + { + "epoch": 0.73, + "grad_norm": 1.8203125, + "learning_rate": 0.00014121724984410264, + "loss": 2.0588, + "step": 310005 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014121556584793015, + "loss": 2.1599, + "step": 310010 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.0001412138818376777, + "loss": 1.926, + "step": 310015 + }, + { + "epoch": 0.73, + "grad_norm": 2.484375, + "learning_rate": 0.00014121219781334582, + "loss": 1.9187, + "step": 310020 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.0001412105137749351, + "loss": 2.0023, + "step": 310025 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.0001412088297224461, + "loss": 2.1391, + "step": 310030 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014120714565587944, + "loss": 2.0153, + "step": 310035 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.00014120546157523568, + "loss": 2.2174, + "step": 310040 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014120377748051536, + "loss": 2.2453, + "step": 310045 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.0001412020933717191, + "loss": 1.8181, + "step": 310050 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.00014120040924884745, + "loss": 1.9952, + "step": 310055 + }, + { + "epoch": 0.73, + "grad_norm": 2.65625, + "learning_rate": 0.000141198725111901, + "loss": 2.1409, + "step": 310060 + }, + { + "epoch": 0.73, + "grad_norm": 2.65625, + "learning_rate": 0.0001411970409608803, + "loss": 1.9052, + "step": 310065 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.000141195356795786, + "loss": 2.0695, + "step": 310070 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014119367261661856, + "loss": 2.08, + "step": 310075 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014119198842337864, + "loss": 1.984, + "step": 310080 + }, + { + "epoch": 0.73, + "grad_norm": 1.96875, + "learning_rate": 0.00014119030421606677, + "loss": 2.0594, + "step": 310085 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014118861999468357, + "loss": 1.9138, + "step": 310090 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014118693575922956, + "loss": 2.0594, + "step": 310095 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014118525150970537, + "loss": 2.0503, + "step": 310100 + }, + { + "epoch": 0.73, + "grad_norm": 1.953125, + "learning_rate": 0.00014118356724611157, + "loss": 1.9208, + "step": 310105 + }, + { + "epoch": 0.73, + "grad_norm": 2.765625, + "learning_rate": 0.0001411818829684487, + "loss": 2.0861, + "step": 310110 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.0001411801986767173, + "loss": 2.0737, + "step": 310115 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014117851437091803, + "loss": 2.0656, + "step": 310120 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014117683005105146, + "loss": 2.0797, + "step": 310125 + }, + { + "epoch": 0.73, + "grad_norm": 1.9375, + "learning_rate": 0.00014117514571711817, + "loss": 1.8852, + "step": 310130 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014117346136911863, + "loss": 1.9806, + "step": 310135 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014117177700705347, + "loss": 2.1293, + "step": 310140 + }, + { + "epoch": 0.73, + "grad_norm": 3.0, + "learning_rate": 0.00014117009263092336, + "loss": 2.0153, + "step": 310145 + }, + { + "epoch": 0.73, + "grad_norm": 1.9609375, + "learning_rate": 0.0001411684082407288, + "loss": 1.9879, + "step": 310150 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.0001411667238364703, + "loss": 2.013, + "step": 310155 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014116503941814855, + "loss": 2.0602, + "step": 310160 + }, + { + "epoch": 0.73, + "grad_norm": 1.8515625, + "learning_rate": 0.00014116335498576404, + "loss": 2.032, + "step": 310165 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.0001411616705393174, + "loss": 1.9323, + "step": 310170 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.0001411599860788092, + "loss": 2.0903, + "step": 310175 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014115830160424, + "loss": 1.9449, + "step": 310180 + }, + { + "epoch": 0.73, + "grad_norm": 1.796875, + "learning_rate": 0.00014115661711561038, + "loss": 2.0122, + "step": 310185 + }, + { + "epoch": 0.73, + "grad_norm": 2.5, + "learning_rate": 0.0001411549326129209, + "loss": 2.1732, + "step": 310190 + }, + { + "epoch": 0.73, + "grad_norm": 1.7734375, + "learning_rate": 0.00014115324809617214, + "loss": 2.2343, + "step": 310195 + }, + { + "epoch": 0.73, + "grad_norm": 1.9921875, + "learning_rate": 0.00014115156356536468, + "loss": 2.2054, + "step": 310200 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.00014114987902049913, + "loss": 1.8409, + "step": 310205 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.000141148194461576, + "loss": 2.1267, + "step": 310210 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.00014114650988859592, + "loss": 2.0997, + "step": 310215 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014114482530155944, + "loss": 2.1108, + "step": 310220 + }, + { + "epoch": 0.73, + "grad_norm": 2.0, + "learning_rate": 0.00014114314070046716, + "loss": 2.0856, + "step": 310225 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.0001411414560853196, + "loss": 2.0321, + "step": 310230 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014113977145611742, + "loss": 2.1631, + "step": 310235 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.00014113808681286114, + "loss": 1.9568, + "step": 310240 + }, + { + "epoch": 0.73, + "grad_norm": 1.8828125, + "learning_rate": 0.00014113640215555133, + "loss": 2.2242, + "step": 310245 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014113471748418855, + "loss": 2.0336, + "step": 310250 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014113303279877343, + "loss": 2.1207, + "step": 310255 + }, + { + "epoch": 0.73, + "grad_norm": 1.8046875, + "learning_rate": 0.00014113134809930653, + "loss": 1.9371, + "step": 310260 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.00014112966338578842, + "loss": 1.8675, + "step": 310265 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.00014112797865821966, + "loss": 2.2609, + "step": 310270 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.0001411262939166008, + "loss": 1.9676, + "step": 310275 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014112460916093248, + "loss": 1.973, + "step": 310280 + }, + { + "epoch": 0.73, + "grad_norm": 2.625, + "learning_rate": 0.00014112292439121527, + "loss": 2.1766, + "step": 310285 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.0001411212396074497, + "loss": 1.8915, + "step": 310290 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.0001411195548096364, + "loss": 1.8715, + "step": 310295 + }, + { + "epoch": 0.73, + "grad_norm": 1.9453125, + "learning_rate": 0.00014111786999777584, + "loss": 2.187, + "step": 310300 + }, + { + "epoch": 0.73, + "grad_norm": 2.5, + "learning_rate": 0.00014111618517186875, + "loss": 2.0308, + "step": 310305 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.0001411145003319156, + "loss": 2.032, + "step": 310310 + }, + { + "epoch": 0.73, + "grad_norm": 2.921875, + "learning_rate": 0.000141112815477917, + "loss": 2.2433, + "step": 310315 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.0001411111306098735, + "loss": 2.1285, + "step": 310320 + }, + { + "epoch": 0.73, + "grad_norm": 1.9765625, + "learning_rate": 0.00014110944572778567, + "loss": 2.1333, + "step": 310325 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014110776083165415, + "loss": 2.0992, + "step": 310330 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.00014110607592147947, + "loss": 2.1908, + "step": 310335 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.00014110439099726218, + "loss": 2.1117, + "step": 310340 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014110270605900292, + "loss": 1.8922, + "step": 310345 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.0001411010211067022, + "loss": 2.0072, + "step": 310350 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.0001410993361403606, + "loss": 1.9878, + "step": 310355 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014109765115997877, + "loss": 2.1247, + "step": 310360 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.00014109596616555722, + "loss": 2.2126, + "step": 310365 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014109428115709655, + "loss": 2.1285, + "step": 310370 + }, + { + "epoch": 0.73, + "grad_norm": 1.828125, + "learning_rate": 0.00014109259613459733, + "loss": 1.9566, + "step": 310375 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014109091109806013, + "loss": 1.9893, + "step": 310380 + }, + { + "epoch": 0.73, + "grad_norm": 2.484375, + "learning_rate": 0.00014108922604748553, + "loss": 2.045, + "step": 310385 + }, + { + "epoch": 0.73, + "grad_norm": 1.7890625, + "learning_rate": 0.0001410875409828741, + "loss": 1.9483, + "step": 310390 + }, + { + "epoch": 0.73, + "grad_norm": 1.9921875, + "learning_rate": 0.00014108585590422642, + "loss": 1.9291, + "step": 310395 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.0001410841708115431, + "loss": 2.2473, + "step": 310400 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.00014108248570482464, + "loss": 2.0282, + "step": 310405 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014108080058407166, + "loss": 2.1569, + "step": 310410 + }, + { + "epoch": 0.73, + "grad_norm": 1.890625, + "learning_rate": 0.00014107911544928477, + "loss": 1.9637, + "step": 310415 + }, + { + "epoch": 0.73, + "grad_norm": 2.421875, + "learning_rate": 0.00014107743030046445, + "loss": 1.8795, + "step": 310420 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014107574513761142, + "loss": 2.2491, + "step": 310425 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.0001410740599607261, + "loss": 1.9842, + "step": 310430 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014107237476980913, + "loss": 1.8908, + "step": 310435 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.0001410706895648611, + "loss": 1.9453, + "step": 310440 + }, + { + "epoch": 0.73, + "grad_norm": 1.9296875, + "learning_rate": 0.00014106900434588264, + "loss": 2.1839, + "step": 310445 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.0001410673191128742, + "loss": 1.9266, + "step": 310450 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014106563386583645, + "loss": 2.0553, + "step": 310455 + }, + { + "epoch": 0.73, + "grad_norm": 1.7578125, + "learning_rate": 0.0001410639486047699, + "loss": 2.1159, + "step": 310460 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014106226332967522, + "loss": 1.9159, + "step": 310465 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.00014106057804055285, + "loss": 2.1065, + "step": 310470 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.0001410588927374035, + "loss": 2.222, + "step": 310475 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014105720742022766, + "loss": 2.1206, + "step": 310480 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.00014105552208902594, + "loss": 1.9379, + "step": 310485 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014105383674379893, + "loss": 2.1984, + "step": 310490 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014105215138454714, + "loss": 2.0076, + "step": 310495 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014105046601127122, + "loss": 1.9984, + "step": 310500 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014104878062397172, + "loss": 2.1374, + "step": 310505 + }, + { + "epoch": 0.73, + "grad_norm": 2.703125, + "learning_rate": 0.0001410470952226492, + "loss": 2.0027, + "step": 310510 + }, + { + "epoch": 0.73, + "grad_norm": 2.5625, + "learning_rate": 0.00014104540980730426, + "loss": 2.0259, + "step": 310515 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014104372437793745, + "loss": 1.9928, + "step": 310520 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014104203893454937, + "loss": 2.3619, + "step": 310525 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014104035347714056, + "loss": 1.7744, + "step": 310530 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.0001410386680057117, + "loss": 1.8933, + "step": 310535 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014103698252026318, + "loss": 2.1744, + "step": 310540 + }, + { + "epoch": 0.73, + "grad_norm": 1.953125, + "learning_rate": 0.00014103529702079573, + "loss": 2.0311, + "step": 310545 + }, + { + "epoch": 0.73, + "grad_norm": 2.625, + "learning_rate": 0.00014103361150730987, + "loss": 2.0541, + "step": 310550 + }, + { + "epoch": 0.73, + "grad_norm": 2.546875, + "learning_rate": 0.00014103192597980622, + "loss": 2.1781, + "step": 310555 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.0001410302404382853, + "loss": 2.1332, + "step": 310560 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014102855488274768, + "loss": 2.1752, + "step": 310565 + }, + { + "epoch": 0.73, + "grad_norm": 2.59375, + "learning_rate": 0.00014102686931319398, + "loss": 2.1758, + "step": 310570 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014102518372962475, + "loss": 1.8907, + "step": 310575 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.00014102349813204058, + "loss": 1.9501, + "step": 310580 + }, + { + "epoch": 0.73, + "grad_norm": 1.9375, + "learning_rate": 0.00014102181252044206, + "loss": 1.9955, + "step": 310585 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.00014102012689482974, + "loss": 2.1638, + "step": 310590 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014101844125520414, + "loss": 2.0197, + "step": 310595 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014101675560156593, + "loss": 2.1648, + "step": 310600 + }, + { + "epoch": 0.73, + "grad_norm": 2.421875, + "learning_rate": 0.00014101506993391568, + "loss": 1.9561, + "step": 310605 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.00014101338425225396, + "loss": 2.0561, + "step": 310610 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014101169855658126, + "loss": 2.1648, + "step": 310615 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014101001284689825, + "loss": 1.9621, + "step": 310620 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014100832712320548, + "loss": 2.1195, + "step": 310625 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.0001410066413855035, + "loss": 2.0315, + "step": 310630 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014100495563379293, + "loss": 1.9878, + "step": 310635 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014100326986807432, + "loss": 2.0611, + "step": 310640 + }, + { + "epoch": 0.73, + "grad_norm": 2.421875, + "learning_rate": 0.00014100158408834825, + "loss": 2.1709, + "step": 310645 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.00014099989829461528, + "loss": 1.9097, + "step": 310650 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.000140998212486876, + "loss": 2.0865, + "step": 310655 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014099652666513102, + "loss": 1.8678, + "step": 310660 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014099484082938084, + "loss": 2.0772, + "step": 310665 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014099315497962612, + "loss": 2.0485, + "step": 310670 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014099146911586736, + "loss": 1.9675, + "step": 310675 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014098978323810518, + "loss": 2.2583, + "step": 310680 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014098809734634015, + "loss": 2.0149, + "step": 310685 + }, + { + "epoch": 0.73, + "grad_norm": 2.6875, + "learning_rate": 0.00014098641144057286, + "loss": 2.1087, + "step": 310690 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014098472552080387, + "loss": 2.0753, + "step": 310695 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014098303958703373, + "loss": 2.1296, + "step": 310700 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.000140981353639263, + "loss": 1.9939, + "step": 310705 + }, + { + "epoch": 0.73, + "grad_norm": 2.46875, + "learning_rate": 0.00014097966767749238, + "loss": 2.0096, + "step": 310710 + }, + { + "epoch": 0.73, + "grad_norm": 1.828125, + "learning_rate": 0.00014097798170172233, + "loss": 2.0288, + "step": 310715 + }, + { + "epoch": 0.73, + "grad_norm": 2.546875, + "learning_rate": 0.00014097629571195345, + "loss": 2.1277, + "step": 310720 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014097460970818635, + "loss": 2.0787, + "step": 310725 + }, + { + "epoch": 0.73, + "grad_norm": 1.734375, + "learning_rate": 0.00014097292369042152, + "loss": 1.9619, + "step": 310730 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014097123765865964, + "loss": 2.1489, + "step": 310735 + }, + { + "epoch": 0.73, + "grad_norm": 1.7734375, + "learning_rate": 0.00014096955161290126, + "loss": 1.9336, + "step": 310740 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.0001409678655531469, + "loss": 1.9862, + "step": 310745 + }, + { + "epoch": 0.73, + "grad_norm": 1.8828125, + "learning_rate": 0.0001409661794793972, + "loss": 2.0355, + "step": 310750 + }, + { + "epoch": 0.73, + "grad_norm": 2.8125, + "learning_rate": 0.0001409644933916527, + "loss": 2.0412, + "step": 310755 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.000140962807289914, + "loss": 1.8925, + "step": 310760 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014096112117418163, + "loss": 2.0775, + "step": 310765 + }, + { + "epoch": 0.73, + "grad_norm": 1.8671875, + "learning_rate": 0.0001409594350444562, + "loss": 1.9486, + "step": 310770 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.0001409577489007383, + "loss": 2.1687, + "step": 310775 + }, + { + "epoch": 0.73, + "grad_norm": 1.9453125, + "learning_rate": 0.00014095606274302852, + "loss": 2.0429, + "step": 310780 + }, + { + "epoch": 0.73, + "grad_norm": 1.765625, + "learning_rate": 0.00014095437657132736, + "loss": 1.8943, + "step": 310785 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014095269038563548, + "loss": 2.0122, + "step": 310790 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014095100418595337, + "loss": 2.2209, + "step": 310795 + }, + { + "epoch": 0.73, + "grad_norm": 1.71875, + "learning_rate": 0.0001409493179722817, + "loss": 1.9174, + "step": 310800 + }, + { + "epoch": 0.73, + "grad_norm": 2.421875, + "learning_rate": 0.000140947631744621, + "loss": 2.2101, + "step": 310805 + }, + { + "epoch": 0.73, + "grad_norm": 1.9296875, + "learning_rate": 0.0001409459455029718, + "loss": 2.0286, + "step": 310810 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014094425924733475, + "loss": 2.1121, + "step": 310815 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.0001409425729777104, + "loss": 2.1502, + "step": 310820 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014094088669409932, + "loss": 1.9417, + "step": 310825 + }, + { + "epoch": 0.73, + "grad_norm": 1.8671875, + "learning_rate": 0.0001409392003965021, + "loss": 2.1415, + "step": 310830 + }, + { + "epoch": 0.73, + "grad_norm": 1.9765625, + "learning_rate": 0.0001409375140849193, + "loss": 2.1452, + "step": 310835 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.0001409358277593515, + "loss": 2.0734, + "step": 310840 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014093414141979927, + "loss": 1.9049, + "step": 310845 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014093245506626323, + "loss": 2.0427, + "step": 310850 + }, + { + "epoch": 0.73, + "grad_norm": 3.265625, + "learning_rate": 0.00014093076869874392, + "loss": 1.994, + "step": 310855 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.0001409290823172419, + "loss": 1.9386, + "step": 310860 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014092739592175773, + "loss": 2.1206, + "step": 310865 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014092570951229204, + "loss": 2.0298, + "step": 310870 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014092402308884543, + "loss": 2.2455, + "step": 310875 + }, + { + "epoch": 0.73, + "grad_norm": 2.59375, + "learning_rate": 0.0001409223366514184, + "loss": 2.2073, + "step": 310880 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.00014092065020001156, + "loss": 2.0186, + "step": 310885 + }, + { + "epoch": 0.73, + "grad_norm": 2.734375, + "learning_rate": 0.00014091896373462544, + "loss": 2.2604, + "step": 310890 + }, + { + "epoch": 0.73, + "grad_norm": 1.9765625, + "learning_rate": 0.0001409172772552607, + "loss": 2.0627, + "step": 310895 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014091559076191788, + "loss": 1.9509, + "step": 310900 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014091390425459755, + "loss": 2.0211, + "step": 310905 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.00014091221773330028, + "loss": 2.0376, + "step": 310910 + }, + { + "epoch": 0.73, + "grad_norm": 2.59375, + "learning_rate": 0.00014091053119802665, + "loss": 2.1711, + "step": 310915 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.00014090884464877724, + "loss": 1.992, + "step": 310920 + }, + { + "epoch": 0.73, + "grad_norm": 1.96875, + "learning_rate": 0.00014090715808555264, + "loss": 1.953, + "step": 310925 + }, + { + "epoch": 0.73, + "grad_norm": 2.609375, + "learning_rate": 0.0001409054715083534, + "loss": 2.0523, + "step": 310930 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014090378491718014, + "loss": 2.0938, + "step": 310935 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014090209831203336, + "loss": 2.1237, + "step": 310940 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.0001409004116929137, + "loss": 2.1056, + "step": 310945 + }, + { + "epoch": 0.73, + "grad_norm": 2.609375, + "learning_rate": 0.00014089872505982173, + "loss": 1.9321, + "step": 310950 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.00014089703841275802, + "loss": 2.0175, + "step": 310955 + }, + { + "epoch": 0.73, + "grad_norm": 1.875, + "learning_rate": 0.00014089535175172314, + "loss": 2.1325, + "step": 310960 + }, + { + "epoch": 0.73, + "grad_norm": 2.71875, + "learning_rate": 0.0001408936650767176, + "loss": 2.0983, + "step": 310965 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014089197838774214, + "loss": 2.0361, + "step": 310970 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014089029168479718, + "loss": 1.9938, + "step": 310975 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014088860496788333, + "loss": 2.1632, + "step": 310980 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014088691823700125, + "loss": 2.0497, + "step": 310985 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.00014088523149215145, + "loss": 2.1439, + "step": 310990 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.0001408835447333345, + "loss": 2.0631, + "step": 310995 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.00014088185796055097, + "loss": 2.0334, + "step": 311000 + }, + { + "epoch": 0.73, + "grad_norm": 2.609375, + "learning_rate": 0.00014088017117380147, + "loss": 1.9015, + "step": 311005 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014087848437308658, + "loss": 1.994, + "step": 311010 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014087679755840686, + "loss": 1.9376, + "step": 311015 + }, + { + "epoch": 0.73, + "grad_norm": 1.84375, + "learning_rate": 0.00014087511072976287, + "loss": 2.1685, + "step": 311020 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014087342388715516, + "loss": 2.1558, + "step": 311025 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014087173703058442, + "loss": 2.2732, + "step": 311030 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.0001408700501600511, + "loss": 2.3617, + "step": 311035 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014086836327555587, + "loss": 2.1857, + "step": 311040 + }, + { + "epoch": 0.73, + "grad_norm": 2.515625, + "learning_rate": 0.00014086667637709926, + "loss": 1.9589, + "step": 311045 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.0001408649894646818, + "loss": 1.9715, + "step": 311050 + }, + { + "epoch": 0.73, + "grad_norm": 1.8984375, + "learning_rate": 0.00014086330253830417, + "loss": 1.9203, + "step": 311055 + }, + { + "epoch": 0.73, + "grad_norm": 1.9296875, + "learning_rate": 0.00014086161559796688, + "loss": 1.9966, + "step": 311060 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.00014085992864367052, + "loss": 2.1574, + "step": 311065 + }, + { + "epoch": 0.73, + "grad_norm": 1.640625, + "learning_rate": 0.00014085824167541568, + "loss": 1.9909, + "step": 311070 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.0001408565546932029, + "loss": 2.2353, + "step": 311075 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.0001408548676970328, + "loss": 2.0228, + "step": 311080 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014085318068690595, + "loss": 2.0614, + "step": 311085 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014085149366282286, + "loss": 2.116, + "step": 311090 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.00014084980662478418, + "loss": 1.9909, + "step": 311095 + }, + { + "epoch": 0.73, + "grad_norm": 2.640625, + "learning_rate": 0.0001408481195727905, + "loss": 2.1973, + "step": 311100 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014084643250684232, + "loss": 2.0732, + "step": 311105 + }, + { + "epoch": 0.73, + "grad_norm": 2.671875, + "learning_rate": 0.00014084474542694028, + "loss": 1.9664, + "step": 311110 + }, + { + "epoch": 0.73, + "grad_norm": 2.421875, + "learning_rate": 0.00014084305833308492, + "loss": 2.1132, + "step": 311115 + }, + { + "epoch": 0.73, + "grad_norm": 2.515625, + "learning_rate": 0.00014084137122527684, + "loss": 1.9879, + "step": 311120 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014083968410351662, + "loss": 2.0762, + "step": 311125 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.00014083799696780483, + "loss": 1.8844, + "step": 311130 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.00014083630981814198, + "loss": 2.0325, + "step": 311135 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014083462265452877, + "loss": 1.9021, + "step": 311140 + }, + { + "epoch": 0.73, + "grad_norm": 2.578125, + "learning_rate": 0.0001408329354769657, + "loss": 2.1062, + "step": 311145 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014083124828545335, + "loss": 2.2416, + "step": 311150 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.0001408295610799923, + "loss": 1.86, + "step": 311155 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014082787386058312, + "loss": 1.7782, + "step": 311160 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014082618662722643, + "loss": 2.1064, + "step": 311165 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014082449937992278, + "loss": 2.2043, + "step": 311170 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014082281211867269, + "loss": 1.9792, + "step": 311175 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.0001408211248434768, + "loss": 1.9514, + "step": 311180 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014081943755433568, + "loss": 2.1021, + "step": 311185 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.0001408177502512499, + "loss": 2.0371, + "step": 311190 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014081606293422008, + "loss": 2.0888, + "step": 311195 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.0001408143756032467, + "loss": 2.1001, + "step": 311200 + }, + { + "epoch": 0.73, + "grad_norm": 1.859375, + "learning_rate": 0.0001408126882583304, + "loss": 2.0082, + "step": 311205 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014081100089947173, + "loss": 2.1309, + "step": 311210 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014080931352667133, + "loss": 2.2285, + "step": 311215 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.0001408076261399297, + "loss": 2.0303, + "step": 311220 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.00014080593873924745, + "loss": 2.0967, + "step": 311225 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.00014080425132462514, + "loss": 2.0459, + "step": 311230 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.00014080256389606337, + "loss": 1.9571, + "step": 311235 + }, + { + "epoch": 0.73, + "grad_norm": 2.640625, + "learning_rate": 0.0001408008764535627, + "loss": 1.984, + "step": 311240 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.0001407991889971237, + "loss": 1.9546, + "step": 311245 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.00014079750152674696, + "loss": 2.0141, + "step": 311250 + }, + { + "epoch": 0.73, + "grad_norm": 2.421875, + "learning_rate": 0.00014079581404243308, + "loss": 2.0943, + "step": 311255 + }, + { + "epoch": 0.73, + "grad_norm": 2.546875, + "learning_rate": 0.00014079412654418257, + "loss": 2.0708, + "step": 311260 + }, + { + "epoch": 0.73, + "grad_norm": 2.546875, + "learning_rate": 0.00014079243903199605, + "loss": 1.9387, + "step": 311265 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.00014079075150587412, + "loss": 2.2126, + "step": 311270 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.0001407890639658173, + "loss": 2.0537, + "step": 311275 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014078737641182623, + "loss": 2.2569, + "step": 311280 + }, + { + "epoch": 0.73, + "grad_norm": 2.625, + "learning_rate": 0.00014078568884390148, + "loss": 2.0687, + "step": 311285 + }, + { + "epoch": 0.73, + "grad_norm": 1.6953125, + "learning_rate": 0.00014078400126204353, + "loss": 2.13, + "step": 311290 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.00014078231366625304, + "loss": 2.0663, + "step": 311295 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.00014078062605653058, + "loss": 2.1355, + "step": 311300 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014077893843287672, + "loss": 2.0596, + "step": 311305 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014077725079529208, + "loss": 2.0044, + "step": 311310 + }, + { + "epoch": 0.73, + "grad_norm": 2.484375, + "learning_rate": 0.00014077556314377713, + "loss": 2.1727, + "step": 311315 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.0001407738754783325, + "loss": 1.9928, + "step": 311320 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.0001407721877989588, + "loss": 1.9738, + "step": 311325 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.0001407705001056566, + "loss": 1.8597, + "step": 311330 + }, + { + "epoch": 0.73, + "grad_norm": 3.34375, + "learning_rate": 0.00014076881239842646, + "loss": 2.0824, + "step": 311335 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014076712467726895, + "loss": 1.9909, + "step": 311340 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.0001407654369421846, + "loss": 1.8706, + "step": 311345 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.0001407637491931741, + "loss": 2.1056, + "step": 311350 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.00014076206143023794, + "loss": 1.9862, + "step": 311355 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.0001407603736533767, + "loss": 2.0361, + "step": 311360 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.000140758685862591, + "loss": 1.9183, + "step": 311365 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.0001407569980578814, + "loss": 1.9763, + "step": 311370 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.00014075531023924844, + "loss": 1.8788, + "step": 311375 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014075362240669275, + "loss": 2.1484, + "step": 311380 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014075193456021488, + "loss": 2.0132, + "step": 311385 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.0001407502466998154, + "loss": 1.8556, + "step": 311390 + }, + { + "epoch": 0.73, + "grad_norm": 2.625, + "learning_rate": 0.0001407485588254949, + "loss": 2.1365, + "step": 311395 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014074687093725395, + "loss": 1.9183, + "step": 311400 + }, + { + "epoch": 0.73, + "grad_norm": 1.9140625, + "learning_rate": 0.00014074518303509317, + "loss": 2.0257, + "step": 311405 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014074349511901307, + "loss": 2.0245, + "step": 311410 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.00014074180718901422, + "loss": 1.9688, + "step": 311415 + }, + { + "epoch": 0.73, + "grad_norm": 2.5, + "learning_rate": 0.00014074011924509726, + "loss": 1.9689, + "step": 311420 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.00014073843128726275, + "loss": 2.0354, + "step": 311425 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.00014073674331551122, + "loss": 1.9555, + "step": 311430 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014073505532984331, + "loss": 1.9233, + "step": 311435 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014073336733025954, + "loss": 2.1147, + "step": 311440 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014073167931676056, + "loss": 2.0311, + "step": 311445 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014072999128934686, + "loss": 1.9867, + "step": 311450 + }, + { + "epoch": 0.73, + "grad_norm": 1.8203125, + "learning_rate": 0.00014072830324801905, + "loss": 1.8945, + "step": 311455 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014072661519277772, + "loss": 2.3013, + "step": 311460 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014072492712362345, + "loss": 2.1123, + "step": 311465 + }, + { + "epoch": 0.73, + "grad_norm": 2.5625, + "learning_rate": 0.0001407232390405568, + "loss": 2.202, + "step": 311470 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.00014072155094357837, + "loss": 2.1844, + "step": 311475 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.00014071986283268869, + "loss": 2.0373, + "step": 311480 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014071817470788836, + "loss": 2.2328, + "step": 311485 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014071648656917803, + "loss": 2.1429, + "step": 311490 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014071479841655816, + "loss": 1.9968, + "step": 311495 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014071311025002937, + "loss": 1.9879, + "step": 311500 + }, + { + "epoch": 0.73, + "grad_norm": 2.609375, + "learning_rate": 0.00014071142206959224, + "loss": 2.0949, + "step": 311505 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.0001407097338752474, + "loss": 1.9668, + "step": 311510 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.00014070804566699533, + "loss": 1.9487, + "step": 311515 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014070635744483666, + "loss": 2.0666, + "step": 311520 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.00014070466920877193, + "loss": 1.7787, + "step": 311525 + }, + { + "epoch": 0.73, + "grad_norm": 2.203125, + "learning_rate": 0.0001407029809588018, + "loss": 2.0618, + "step": 311530 + }, + { + "epoch": 0.73, + "grad_norm": 2.515625, + "learning_rate": 0.00014070129269492677, + "loss": 1.9755, + "step": 311535 + }, + { + "epoch": 0.73, + "grad_norm": 1.8984375, + "learning_rate": 0.00014069960441714745, + "loss": 2.0284, + "step": 311540 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.0001406979161254644, + "loss": 1.8223, + "step": 311545 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.00014069622781987822, + "loss": 2.0687, + "step": 311550 + }, + { + "epoch": 0.73, + "grad_norm": 1.9296875, + "learning_rate": 0.00014069453950038945, + "loss": 2.1628, + "step": 311555 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.0001406928511669987, + "loss": 2.0162, + "step": 311560 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.0001406911628197065, + "loss": 2.0495, + "step": 311565 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014068947445851351, + "loss": 1.9747, + "step": 311570 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014068778608342022, + "loss": 2.1292, + "step": 311575 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.00014068609769442728, + "loss": 2.1377, + "step": 311580 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.0001406844092915352, + "loss": 2.0144, + "step": 311585 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.0001406827208747446, + "loss": 2.072, + "step": 311590 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014068103244405604, + "loss": 2.0005, + "step": 311595 + }, + { + "epoch": 0.73, + "grad_norm": 2.671875, + "learning_rate": 0.0001406793439994701, + "loss": 2.1253, + "step": 311600 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.00014067765554098737, + "loss": 1.83, + "step": 311605 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014067596706860842, + "loss": 1.9618, + "step": 311610 + }, + { + "epoch": 0.73, + "grad_norm": 2.78125, + "learning_rate": 0.0001406742785823338, + "loss": 2.073, + "step": 311615 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014067259008216412, + "loss": 2.0592, + "step": 311620 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014067090156809995, + "loss": 2.0906, + "step": 311625 + }, + { + "epoch": 0.73, + "grad_norm": 2.921875, + "learning_rate": 0.00014066921304014185, + "loss": 2.0946, + "step": 311630 + }, + { + "epoch": 0.73, + "grad_norm": 1.8671875, + "learning_rate": 0.00014066752449829042, + "loss": 2.1209, + "step": 311635 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.00014066583594254622, + "loss": 2.021, + "step": 311640 + }, + { + "epoch": 0.73, + "grad_norm": 2.53125, + "learning_rate": 0.00014066414737290984, + "loss": 2.1149, + "step": 311645 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014066245878938184, + "loss": 2.1295, + "step": 311650 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.0001406607701919628, + "loss": 2.0626, + "step": 311655 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.00014065908158065332, + "loss": 2.0601, + "step": 311660 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014065739295545396, + "loss": 1.9598, + "step": 311665 + }, + { + "epoch": 0.73, + "grad_norm": 1.9140625, + "learning_rate": 0.0001406557043163653, + "loss": 2.2565, + "step": 311670 + }, + { + "epoch": 0.73, + "grad_norm": 1.8671875, + "learning_rate": 0.0001406540156633879, + "loss": 2.1406, + "step": 311675 + }, + { + "epoch": 0.73, + "grad_norm": 2.546875, + "learning_rate": 0.00014065232699652234, + "loss": 2.1709, + "step": 311680 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.00014065063831576922, + "loss": 1.974, + "step": 311685 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014064894962112913, + "loss": 2.16, + "step": 311690 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.0001406472609126026, + "loss": 2.0177, + "step": 311695 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.00014064557219019023, + "loss": 1.9297, + "step": 311700 + }, + { + "epoch": 0.73, + "grad_norm": 1.875, + "learning_rate": 0.0001406438834538926, + "loss": 2.1236, + "step": 311705 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014064219470371028, + "loss": 2.0604, + "step": 311710 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014064050593964384, + "loss": 2.2391, + "step": 311715 + }, + { + "epoch": 0.73, + "grad_norm": 1.9453125, + "learning_rate": 0.00014063881716169387, + "loss": 2.0703, + "step": 311720 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014063712836986093, + "loss": 2.2794, + "step": 311725 + }, + { + "epoch": 0.73, + "grad_norm": 2.4375, + "learning_rate": 0.00014063543956414565, + "loss": 2.0923, + "step": 311730 + }, + { + "epoch": 0.73, + "grad_norm": 1.9921875, + "learning_rate": 0.00014063375074454854, + "loss": 2.1983, + "step": 311735 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.0001406320619110702, + "loss": 2.1389, + "step": 311740 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014063037306371123, + "loss": 2.0968, + "step": 311745 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.00014062868420247217, + "loss": 2.069, + "step": 311750 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.0001406269953273536, + "loss": 2.099, + "step": 311755 + }, + { + "epoch": 0.73, + "grad_norm": 1.734375, + "learning_rate": 0.00014062530643835615, + "loss": 1.9047, + "step": 311760 + }, + { + "epoch": 0.73, + "grad_norm": 2.515625, + "learning_rate": 0.00014062361753548034, + "loss": 2.0767, + "step": 311765 + }, + { + "epoch": 0.73, + "grad_norm": 1.7890625, + "learning_rate": 0.0001406219286187268, + "loss": 1.8477, + "step": 311770 + }, + { + "epoch": 0.73, + "grad_norm": 1.96875, + "learning_rate": 0.000140620239688096, + "loss": 2.1138, + "step": 311775 + }, + { + "epoch": 0.73, + "grad_norm": 1.9453125, + "learning_rate": 0.0001406185507435886, + "loss": 2.14, + "step": 311780 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.00014061686178520525, + "loss": 2.004, + "step": 311785 + }, + { + "epoch": 0.73, + "grad_norm": 3.0, + "learning_rate": 0.00014061517281294637, + "loss": 1.9124, + "step": 311790 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014061348382681264, + "loss": 2.0773, + "step": 311795 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014061179482680457, + "loss": 2.0697, + "step": 311800 + }, + { + "epoch": 0.73, + "grad_norm": 2.5, + "learning_rate": 0.00014061010581292282, + "loss": 2.1809, + "step": 311805 + }, + { + "epoch": 0.73, + "grad_norm": 2.546875, + "learning_rate": 0.0001406084167851679, + "loss": 2.0552, + "step": 311810 + }, + { + "epoch": 0.73, + "grad_norm": 2.65625, + "learning_rate": 0.0001406067277435404, + "loss": 2.043, + "step": 311815 + }, + { + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 0.00014060503868804092, + "loss": 2.0821, + "step": 311820 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014060334961867, + "loss": 2.0539, + "step": 311825 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.00014060166053542827, + "loss": 2.0312, + "step": 311830 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014059997143831625, + "loss": 1.8779, + "step": 311835 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014059828232733455, + "loss": 2.1381, + "step": 311840 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014059659320248376, + "loss": 2.0748, + "step": 311845 + }, + { + "epoch": 0.73, + "grad_norm": 2.59375, + "learning_rate": 0.0001405949040637644, + "loss": 2.0634, + "step": 311850 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014059321491117715, + "loss": 1.9781, + "step": 311855 + }, + { + "epoch": 0.73, + "grad_norm": 2.046875, + "learning_rate": 0.00014059152574472244, + "loss": 2.0124, + "step": 311860 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.000140589836564401, + "loss": 2.0086, + "step": 311865 + }, + { + "epoch": 0.73, + "grad_norm": 2.609375, + "learning_rate": 0.00014058814737021329, + "loss": 2.055, + "step": 311870 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014058645816215996, + "loss": 1.9749, + "step": 311875 + }, + { + "epoch": 0.73, + "grad_norm": 1.9296875, + "learning_rate": 0.00014058476894024153, + "loss": 1.9608, + "step": 311880 + }, + { + "epoch": 0.73, + "grad_norm": 2.703125, + "learning_rate": 0.00014058307970445864, + "loss": 1.85, + "step": 311885 + }, + { + "epoch": 0.73, + "grad_norm": 1.9296875, + "learning_rate": 0.0001405813904548118, + "loss": 2.3322, + "step": 311890 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014057970119130163, + "loss": 1.966, + "step": 311895 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014057801191392875, + "loss": 1.9992, + "step": 311900 + }, + { + "epoch": 0.73, + "grad_norm": 2.421875, + "learning_rate": 0.00014057632262269362, + "loss": 1.9568, + "step": 311905 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014057463331759688, + "loss": 2.1665, + "step": 311910 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014057294399863913, + "loss": 2.0122, + "step": 311915 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014057125466582096, + "loss": 2.1264, + "step": 311920 + }, + { + "epoch": 0.73, + "grad_norm": 2.625, + "learning_rate": 0.0001405695653191429, + "loss": 2.0864, + "step": 311925 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014056787595860553, + "loss": 2.0736, + "step": 311930 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014056618658420938, + "loss": 1.9496, + "step": 311935 + }, + { + "epoch": 0.73, + "grad_norm": 2.734375, + "learning_rate": 0.00014056449719595517, + "loss": 2.0372, + "step": 311940 + }, + { + "epoch": 0.73, + "grad_norm": 2.03125, + "learning_rate": 0.00014056280779384337, + "loss": 1.9992, + "step": 311945 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.00014056111837787454, + "loss": 2.2241, + "step": 311950 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014055942894804934, + "loss": 1.9529, + "step": 311955 + }, + { + "epoch": 0.73, + "grad_norm": 2.578125, + "learning_rate": 0.00014055773950436828, + "loss": 1.9354, + "step": 311960 + }, + { + "epoch": 0.73, + "grad_norm": 2.09375, + "learning_rate": 0.00014055605004683198, + "loss": 2.065, + "step": 311965 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.000140554360575441, + "loss": 2.032, + "step": 311970 + }, + { + "epoch": 0.73, + "grad_norm": 2.453125, + "learning_rate": 0.0001405526710901959, + "loss": 1.9719, + "step": 311975 + }, + { + "epoch": 0.73, + "grad_norm": 3.0, + "learning_rate": 0.00014055098159109725, + "loss": 2.0001, + "step": 311980 + }, + { + "epoch": 0.73, + "grad_norm": 2.640625, + "learning_rate": 0.0001405492920781457, + "loss": 2.1347, + "step": 311985 + }, + { + "epoch": 0.73, + "grad_norm": 1.9921875, + "learning_rate": 0.00014054760255134173, + "loss": 2.0949, + "step": 311990 + }, + { + "epoch": 0.73, + "grad_norm": 2.390625, + "learning_rate": 0.000140545913010686, + "loss": 2.0332, + "step": 311995 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014054422345617903, + "loss": 1.9674, + "step": 312000 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014054253388782141, + "loss": 2.1024, + "step": 312005 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014054084430561374, + "loss": 2.0017, + "step": 312010 + }, + { + "epoch": 0.73, + "grad_norm": 1.84375, + "learning_rate": 0.0001405391547095566, + "loss": 2.0368, + "step": 312015 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.0001405374650996505, + "loss": 2.0388, + "step": 312020 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014053577547589613, + "loss": 1.987, + "step": 312025 + }, + { + "epoch": 0.73, + "grad_norm": 2.5, + "learning_rate": 0.00014053408583829396, + "loss": 2.1168, + "step": 312030 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.0001405323961868446, + "loss": 2.0276, + "step": 312035 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014053070652154866, + "loss": 2.1134, + "step": 312040 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.0001405290168424067, + "loss": 2.1148, + "step": 312045 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.00014052732714941928, + "loss": 1.9146, + "step": 312050 + }, + { + "epoch": 0.73, + "grad_norm": 2.515625, + "learning_rate": 0.000140525637442587, + "loss": 2.05, + "step": 312055 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.00014052394772191045, + "loss": 1.9831, + "step": 312060 + }, + { + "epoch": 0.73, + "grad_norm": 2.46875, + "learning_rate": 0.00014052225798739015, + "loss": 1.999, + "step": 312065 + }, + { + "epoch": 0.73, + "grad_norm": 2.140625, + "learning_rate": 0.0001405205682390267, + "loss": 2.1413, + "step": 312070 + }, + { + "epoch": 0.73, + "grad_norm": 2.6875, + "learning_rate": 0.00014051887847682073, + "loss": 2.0204, + "step": 312075 + }, + { + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 0.00014051718870077275, + "loss": 1.9951, + "step": 312080 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014051549891088338, + "loss": 2.1769, + "step": 312085 + }, + { + "epoch": 0.73, + "grad_norm": 1.9296875, + "learning_rate": 0.00014051380910715317, + "loss": 2.0164, + "step": 312090 + }, + { + "epoch": 0.73, + "grad_norm": 2.296875, + "learning_rate": 0.0001405121192895827, + "loss": 2.0201, + "step": 312095 + }, + { + "epoch": 0.73, + "grad_norm": 1.921875, + "learning_rate": 0.00014051042945817258, + "loss": 2.0068, + "step": 312100 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014050873961292333, + "loss": 1.917, + "step": 312105 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.00014050704975383557, + "loss": 2.1057, + "step": 312110 + }, + { + "epoch": 0.73, + "grad_norm": 2.359375, + "learning_rate": 0.0001405053598809099, + "loss": 2.2134, + "step": 312115 + }, + { + "epoch": 0.73, + "grad_norm": 1.75, + "learning_rate": 0.00014050366999414683, + "loss": 2.0821, + "step": 312120 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.00014050198009354697, + "loss": 1.8858, + "step": 312125 + }, + { + "epoch": 0.73, + "grad_norm": 1.8828125, + "learning_rate": 0.0001405002901791109, + "loss": 2.1362, + "step": 312130 + }, + { + "epoch": 0.73, + "grad_norm": 2.1875, + "learning_rate": 0.00014049860025083924, + "loss": 2.0453, + "step": 312135 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014049691030873246, + "loss": 2.1956, + "step": 312140 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014049522035279125, + "loss": 2.0151, + "step": 312145 + }, + { + "epoch": 0.73, + "grad_norm": 1.6875, + "learning_rate": 0.00014049353038301612, + "loss": 2.0936, + "step": 312150 + }, + { + "epoch": 0.73, + "grad_norm": 1.9375, + "learning_rate": 0.00014049184039940764, + "loss": 1.9126, + "step": 312155 + }, + { + "epoch": 0.73, + "grad_norm": 2.078125, + "learning_rate": 0.00014049015040196644, + "loss": 2.0582, + "step": 312160 + }, + { + "epoch": 0.73, + "grad_norm": 2.15625, + "learning_rate": 0.00014048846039069307, + "loss": 2.1001, + "step": 312165 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.00014048677036558813, + "loss": 2.1094, + "step": 312170 + }, + { + "epoch": 0.73, + "grad_norm": 2.21875, + "learning_rate": 0.00014048508032665213, + "loss": 2.1804, + "step": 312175 + }, + { + "epoch": 0.73, + "grad_norm": 2.109375, + "learning_rate": 0.0001404833902738857, + "loss": 1.8663, + "step": 312180 + }, + { + "epoch": 0.73, + "grad_norm": 2.25, + "learning_rate": 0.00014048170020728945, + "loss": 1.8988, + "step": 312185 + }, + { + "epoch": 0.73, + "grad_norm": 2.234375, + "learning_rate": 0.00014048001012686388, + "loss": 2.0792, + "step": 312190 + }, + { + "epoch": 0.73, + "grad_norm": 2.5, + "learning_rate": 0.00014047832003260963, + "loss": 2.1726, + "step": 312195 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.0001404766299245272, + "loss": 2.1858, + "step": 312200 + }, + { + "epoch": 0.73, + "grad_norm": 2.375, + "learning_rate": 0.00014047493980261726, + "loss": 2.0444, + "step": 312205 + }, + { + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 0.00014047324966688033, + "loss": 1.9588, + "step": 312210 + }, + { + "epoch": 0.73, + "grad_norm": 2.984375, + "learning_rate": 0.00014047155951731706, + "loss": 1.9745, + "step": 312215 + }, + { + "epoch": 0.73, + "grad_norm": 2.8125, + "learning_rate": 0.00014046986935392793, + "loss": 2.0182, + "step": 312220 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014046817917671354, + "loss": 2.1107, + "step": 312225 + }, + { + "epoch": 0.73, + "grad_norm": 1.953125, + "learning_rate": 0.00014046648898567447, + "loss": 1.9296, + "step": 312230 + }, + { + "epoch": 0.73, + "grad_norm": 1.90625, + "learning_rate": 0.00014046479878081138, + "loss": 1.8954, + "step": 312235 + }, + { + "epoch": 0.73, + "grad_norm": 1.953125, + "learning_rate": 0.00014046310856212472, + "loss": 1.9768, + "step": 312240 + }, + { + "epoch": 0.73, + "grad_norm": 2.59375, + "learning_rate": 0.00014046141832961518, + "loss": 2.0274, + "step": 312245 + }, + { + "epoch": 0.73, + "grad_norm": 2.46875, + "learning_rate": 0.00014045972808328326, + "loss": 2.145, + "step": 312250 + }, + { + "epoch": 0.73, + "grad_norm": 2.0, + "learning_rate": 0.00014045803782312955, + "loss": 2.066, + "step": 312255 + }, + { + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 0.00014045634754915464, + "loss": 2.0586, + "step": 312260 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014045465726135914, + "loss": 2.0962, + "step": 312265 + }, + { + "epoch": 0.73, + "grad_norm": 2.3125, + "learning_rate": 0.00014045296695974356, + "loss": 1.9921, + "step": 312270 + }, + { + "epoch": 0.73, + "grad_norm": 1.9765625, + "learning_rate": 0.00014045127664430852, + "loss": 2.0573, + "step": 312275 + }, + { + "epoch": 0.73, + "grad_norm": 2.703125, + "learning_rate": 0.0001404495863150546, + "loss": 2.0269, + "step": 312280 + }, + { + "epoch": 0.73, + "grad_norm": 2.0, + "learning_rate": 0.00014044789597198238, + "loss": 1.9732, + "step": 312285 + }, + { + "epoch": 0.73, + "grad_norm": 2.125, + "learning_rate": 0.0001404462056150924, + "loss": 2.0074, + "step": 312290 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014044451524438528, + "loss": 2.0741, + "step": 312295 + }, + { + "epoch": 0.73, + "grad_norm": 2.0625, + "learning_rate": 0.00014044282485986158, + "loss": 1.9177, + "step": 312300 + }, + { + "epoch": 0.73, + "grad_norm": 2.65625, + "learning_rate": 0.00014044113446152188, + "loss": 2.1475, + "step": 312305 + }, + { + "epoch": 0.73, + "grad_norm": 2.421875, + "learning_rate": 0.00014043944404936674, + "loss": 2.0221, + "step": 312310 + }, + { + "epoch": 0.73, + "grad_norm": 2.265625, + "learning_rate": 0.00014043775362339674, + "loss": 1.8899, + "step": 312315 + }, + { + "epoch": 0.73, + "grad_norm": 2.34375, + "learning_rate": 0.0001404360631836125, + "loss": 2.1414, + "step": 312320 + }, + { + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 0.00014043437273001457, + "loss": 1.94, + "step": 312325 + }, + { + "epoch": 0.74, + "grad_norm": 3.515625, + "learning_rate": 0.00014043268226260352, + "loss": 1.8897, + "step": 312330 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.0001404309917813799, + "loss": 2.1275, + "step": 312335 + }, + { + "epoch": 0.74, + "grad_norm": 1.9765625, + "learning_rate": 0.00014042930128634436, + "loss": 1.9628, + "step": 312340 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.00014042761077749741, + "loss": 2.153, + "step": 312345 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00014042592025483968, + "loss": 1.9513, + "step": 312350 + }, + { + "epoch": 0.74, + "grad_norm": 1.9765625, + "learning_rate": 0.00014042422971837173, + "loss": 2.1037, + "step": 312355 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.0001404225391680941, + "loss": 2.054, + "step": 312360 + }, + { + "epoch": 0.74, + "grad_norm": 2.546875, + "learning_rate": 0.0001404208486040074, + "loss": 2.0746, + "step": 312365 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.0001404191580261122, + "loss": 1.9522, + "step": 312370 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.0001404174674344091, + "loss": 2.1171, + "step": 312375 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00014041577682889867, + "loss": 2.0965, + "step": 312380 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.00014041408620958144, + "loss": 2.1547, + "step": 312385 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00014041239557645803, + "loss": 1.9255, + "step": 312390 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00014041070492952905, + "loss": 2.0039, + "step": 312395 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00014040901426879505, + "loss": 2.0633, + "step": 312400 + }, + { + "epoch": 0.74, + "grad_norm": 1.859375, + "learning_rate": 0.00014040732359425653, + "loss": 2.1808, + "step": 312405 + }, + { + "epoch": 0.74, + "grad_norm": 1.921875, + "learning_rate": 0.0001404056329059142, + "loss": 2.0215, + "step": 312410 + }, + { + "epoch": 0.74, + "grad_norm": 2.53125, + "learning_rate": 0.00014040394220376853, + "loss": 2.0815, + "step": 312415 + }, + { + "epoch": 0.74, + "grad_norm": 2.484375, + "learning_rate": 0.00014040225148782018, + "loss": 2.1964, + "step": 312420 + }, + { + "epoch": 0.74, + "grad_norm": 2.640625, + "learning_rate": 0.00014040056075806967, + "loss": 1.9954, + "step": 312425 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.00014039887001451756, + "loss": 1.9893, + "step": 312430 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.0001403971792571645, + "loss": 2.1127, + "step": 312435 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00014039548848601103, + "loss": 2.0701, + "step": 312440 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00014039379770105774, + "loss": 1.9469, + "step": 312445 + }, + { + "epoch": 0.74, + "grad_norm": 2.609375, + "learning_rate": 0.00014039210690230518, + "loss": 2.264, + "step": 312450 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.00014039041608975394, + "loss": 2.1374, + "step": 312455 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.00014038872526340462, + "loss": 2.1204, + "step": 312460 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00014038703442325777, + "loss": 2.0804, + "step": 312465 + }, + { + "epoch": 0.74, + "grad_norm": 3.046875, + "learning_rate": 0.00014038534356931395, + "loss": 1.9279, + "step": 312470 + }, + { + "epoch": 0.74, + "grad_norm": 3.078125, + "learning_rate": 0.00014038365270157378, + "loss": 1.9665, + "step": 312475 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.0001403819618200378, + "loss": 1.9452, + "step": 312480 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.00014038027092470667, + "loss": 1.9861, + "step": 312485 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.0001403785800155809, + "loss": 2.0643, + "step": 312490 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.000140376889092661, + "loss": 2.0422, + "step": 312495 + }, + { + "epoch": 0.74, + "grad_norm": 1.7890625, + "learning_rate": 0.0001403751981559477, + "loss": 2.1028, + "step": 312500 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.00014037350720544144, + "loss": 2.1087, + "step": 312505 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.00014037181624114293, + "loss": 2.1124, + "step": 312510 + }, + { + "epoch": 0.74, + "grad_norm": 2.578125, + "learning_rate": 0.00014037012526305263, + "loss": 2.1141, + "step": 312515 + }, + { + "epoch": 0.74, + "grad_norm": 2.46875, + "learning_rate": 0.00014036843427117117, + "loss": 1.9035, + "step": 312520 + }, + { + "epoch": 0.74, + "grad_norm": 2.5625, + "learning_rate": 0.0001403667432654991, + "loss": 2.0428, + "step": 312525 + }, + { + "epoch": 0.74, + "grad_norm": 1.7265625, + "learning_rate": 0.00014036505224603705, + "loss": 2.0792, + "step": 312530 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00014036336121278555, + "loss": 2.1084, + "step": 312535 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.0001403616701657452, + "loss": 2.0357, + "step": 312540 + }, + { + "epoch": 0.74, + "grad_norm": 2.453125, + "learning_rate": 0.00014035997910491656, + "loss": 1.9395, + "step": 312545 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.0001403582880303002, + "loss": 2.0202, + "step": 312550 + }, + { + "epoch": 0.74, + "grad_norm": 1.734375, + "learning_rate": 0.00014035659694189677, + "loss": 2.058, + "step": 312555 + }, + { + "epoch": 0.74, + "grad_norm": 1.828125, + "learning_rate": 0.00014035490583970677, + "loss": 2.0859, + "step": 312560 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.0001403532147237308, + "loss": 1.9122, + "step": 312565 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00014035152359396944, + "loss": 2.0424, + "step": 312570 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00014034983245042324, + "loss": 2.1108, + "step": 312575 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00014034814129309286, + "loss": 1.9044, + "step": 312580 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.0001403464501219788, + "loss": 2.208, + "step": 312585 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.00014034475893708165, + "loss": 2.0517, + "step": 312590 + }, + { + "epoch": 0.74, + "grad_norm": 2.6875, + "learning_rate": 0.000140343067738402, + "loss": 2.0437, + "step": 312595 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00014034137652594043, + "loss": 1.9969, + "step": 312600 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.0001403396852996975, + "loss": 1.9782, + "step": 312605 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00014033799405967386, + "loss": 2.0565, + "step": 312610 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00014033630280586997, + "loss": 2.083, + "step": 312615 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.0001403346115382865, + "loss": 2.1442, + "step": 312620 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.000140332920256924, + "loss": 1.9074, + "step": 312625 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.000140331228961783, + "loss": 2.2662, + "step": 312630 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00014032953765286414, + "loss": 2.087, + "step": 312635 + }, + { + "epoch": 0.74, + "grad_norm": 2.453125, + "learning_rate": 0.000140327846330168, + "loss": 2.0689, + "step": 312640 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00014032615499369512, + "loss": 2.101, + "step": 312645 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.00014032446364344608, + "loss": 2.073, + "step": 312650 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.0001403227722794215, + "loss": 2.0987, + "step": 312655 + }, + { + "epoch": 0.74, + "grad_norm": 2.796875, + "learning_rate": 0.0001403210809016219, + "loss": 2.0869, + "step": 312660 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.0001403193895100479, + "loss": 1.966, + "step": 312665 + }, + { + "epoch": 0.74, + "grad_norm": 1.984375, + "learning_rate": 0.00014031769810470007, + "loss": 2.0896, + "step": 312670 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00014031600668557901, + "loss": 2.0868, + "step": 312675 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.0001403143152526852, + "loss": 1.9057, + "step": 312680 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00014031262380601934, + "loss": 2.0622, + "step": 312685 + }, + { + "epoch": 0.74, + "grad_norm": 2.671875, + "learning_rate": 0.00014031093234558197, + "loss": 2.0003, + "step": 312690 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.00014030924087137363, + "loss": 2.0294, + "step": 312695 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00014030754938339492, + "loss": 1.9236, + "step": 312700 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00014030585788164644, + "loss": 2.1508, + "step": 312705 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00014030416636612872, + "loss": 2.0618, + "step": 312710 + }, + { + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 0.00014030247483684237, + "loss": 2.1755, + "step": 312715 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.00014030078329378798, + "loss": 2.0146, + "step": 312720 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.0001402990917369661, + "loss": 2.1186, + "step": 312725 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.0001402974001663773, + "loss": 2.0382, + "step": 312730 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.00014029570858202224, + "loss": 1.9733, + "step": 312735 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00014029401698390135, + "loss": 1.9928, + "step": 312740 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.0001402923253720154, + "loss": 2.1487, + "step": 312745 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00014029063374636474, + "loss": 1.9821, + "step": 312750 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00014028894210695017, + "loss": 2.1997, + "step": 312755 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00014028725045377212, + "loss": 2.1083, + "step": 312760 + }, + { + "epoch": 0.74, + "grad_norm": 2.484375, + "learning_rate": 0.0001402855587868312, + "loss": 2.0421, + "step": 312765 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.00014028386710612803, + "loss": 2.1893, + "step": 312770 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.00014028217541166315, + "loss": 2.0568, + "step": 312775 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00014028048370343715, + "loss": 1.8704, + "step": 312780 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.0001402787919814506, + "loss": 2.1324, + "step": 312785 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.00014027710024570408, + "loss": 1.9734, + "step": 312790 + }, + { + "epoch": 0.74, + "grad_norm": 1.96875, + "learning_rate": 0.0001402754084961982, + "loss": 2.112, + "step": 312795 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00014027371673293348, + "loss": 1.9841, + "step": 312800 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.00014027202495591056, + "loss": 2.0964, + "step": 312805 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00014027033316512996, + "loss": 2.1198, + "step": 312810 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.0001402686413605923, + "loss": 2.1592, + "step": 312815 + }, + { + "epoch": 0.74, + "grad_norm": 2.890625, + "learning_rate": 0.00014026694954229812, + "loss": 2.167, + "step": 312820 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.00014026525771024807, + "loss": 2.0929, + "step": 312825 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00014026356586444263, + "loss": 2.1734, + "step": 312830 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00014026187400488241, + "loss": 2.1551, + "step": 312835 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00014026018213156804, + "loss": 2.0228, + "step": 312840 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00014025849024450004, + "loss": 2.055, + "step": 312845 + }, + { + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 0.00014025679834367904, + "loss": 2.086, + "step": 312850 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00014025510642910556, + "loss": 2.0722, + "step": 312855 + }, + { + "epoch": 0.74, + "grad_norm": 1.8828125, + "learning_rate": 0.00014025341450078023, + "loss": 1.9982, + "step": 312860 + }, + { + "epoch": 0.74, + "grad_norm": 2.453125, + "learning_rate": 0.00014025172255870358, + "loss": 2.1073, + "step": 312865 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.0001402500306028762, + "loss": 2.0293, + "step": 312870 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00014024833863329872, + "loss": 2.2458, + "step": 312875 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.00014024664664997165, + "loss": 2.0627, + "step": 312880 + }, + { + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 0.0001402449546528956, + "loss": 2.1068, + "step": 312885 + }, + { + "epoch": 0.74, + "grad_norm": 2.5625, + "learning_rate": 0.00014024326264207114, + "loss": 1.9209, + "step": 312890 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.00014024157061749885, + "loss": 1.9399, + "step": 312895 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.00014023987857917934, + "loss": 2.0432, + "step": 312900 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.0001402381865271131, + "loss": 2.19, + "step": 312905 + }, + { + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 0.00014023649446130083, + "loss": 2.0245, + "step": 312910 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.000140234802381743, + "loss": 2.2104, + "step": 312915 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00014023311028844027, + "loss": 1.9822, + "step": 312920 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.00014023141818139313, + "loss": 1.9011, + "step": 312925 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00014022972606060223, + "loss": 2.0542, + "step": 312930 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.00014022803392606814, + "loss": 2.0843, + "step": 312935 + }, + { + "epoch": 0.74, + "grad_norm": 2.671875, + "learning_rate": 0.00014022634177779142, + "loss": 2.0627, + "step": 312940 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00014022464961577265, + "loss": 1.8436, + "step": 312945 + }, + { + "epoch": 0.74, + "grad_norm": 2.484375, + "learning_rate": 0.0001402229574400124, + "loss": 1.8681, + "step": 312950 + }, + { + "epoch": 0.74, + "grad_norm": 1.7578125, + "learning_rate": 0.00014022126525051125, + "loss": 2.0911, + "step": 312955 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.0001402195730472698, + "loss": 2.1245, + "step": 312960 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.0001402178808302886, + "loss": 2.0141, + "step": 312965 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00014021618859956828, + "loss": 2.0739, + "step": 312970 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00014021449635510938, + "loss": 2.023, + "step": 312975 + }, + { + "epoch": 0.74, + "grad_norm": 2.0, + "learning_rate": 0.00014021280409691242, + "loss": 2.053, + "step": 312980 + }, + { + "epoch": 0.74, + "grad_norm": 1.9921875, + "learning_rate": 0.00014021111182497807, + "loss": 2.0583, + "step": 312985 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.0001402094195393069, + "loss": 2.0138, + "step": 312990 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00014020772723989943, + "loss": 1.868, + "step": 312995 + }, + { + "epoch": 0.74, + "grad_norm": 2.5625, + "learning_rate": 0.0001402060349267563, + "loss": 1.9877, + "step": 313000 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00014020434259987802, + "loss": 1.9986, + "step": 313005 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00014020265025926524, + "loss": 1.9699, + "step": 313010 + }, + { + "epoch": 0.74, + "grad_norm": 2.640625, + "learning_rate": 0.0001402009579049185, + "loss": 2.0221, + "step": 313015 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00014019926553683837, + "loss": 2.0388, + "step": 313020 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00014019757315502547, + "loss": 1.8873, + "step": 313025 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00014019588075948032, + "loss": 2.1656, + "step": 313030 + }, + { + "epoch": 0.74, + "grad_norm": 2.0, + "learning_rate": 0.00014019418835020352, + "loss": 1.9583, + "step": 313035 + }, + { + "epoch": 0.74, + "grad_norm": 1.8359375, + "learning_rate": 0.0001401924959271957, + "loss": 2.0732, + "step": 313040 + }, + { + "epoch": 0.74, + "grad_norm": 2.625, + "learning_rate": 0.00014019080349045738, + "loss": 2.1857, + "step": 313045 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.00014018911103998912, + "loss": 2.1229, + "step": 313050 + }, + { + "epoch": 0.74, + "grad_norm": 2.59375, + "learning_rate": 0.00014018741857579157, + "loss": 2.0277, + "step": 313055 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.0001401857260978652, + "loss": 2.0659, + "step": 313060 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00014018403360621076, + "loss": 2.1322, + "step": 313065 + }, + { + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 0.00014018234110082863, + "loss": 2.1446, + "step": 313070 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00014018064858171956, + "loss": 2.0343, + "step": 313075 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.00014017895604888402, + "loss": 2.1415, + "step": 313080 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00014017726350232262, + "loss": 2.2303, + "step": 313085 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00014017557094203592, + "loss": 2.0111, + "step": 313090 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.00014017387836802453, + "loss": 2.172, + "step": 313095 + }, + { + "epoch": 0.74, + "grad_norm": 2.765625, + "learning_rate": 0.00014017218578028903, + "loss": 1.7543, + "step": 313100 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00014017049317883, + "loss": 2.0292, + "step": 313105 + }, + { + "epoch": 0.74, + "grad_norm": 2.5625, + "learning_rate": 0.00014016880056364796, + "loss": 2.0369, + "step": 313110 + }, + { + "epoch": 0.74, + "grad_norm": 2.796875, + "learning_rate": 0.00014016710793474351, + "loss": 2.0782, + "step": 313115 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.0001401654152921173, + "loss": 1.8096, + "step": 313120 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00014016372263576983, + "loss": 2.2998, + "step": 313125 + }, + { + "epoch": 0.74, + "grad_norm": 3.09375, + "learning_rate": 0.0001401620299657017, + "loss": 1.8842, + "step": 313130 + }, + { + "epoch": 0.74, + "grad_norm": 2.953125, + "learning_rate": 0.0001401603372819135, + "loss": 1.9707, + "step": 313135 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.0001401586445844058, + "loss": 2.2389, + "step": 313140 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00014015695187317921, + "loss": 2.0178, + "step": 313145 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00014015525914823425, + "loss": 2.0391, + "step": 313150 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00014015356640957152, + "loss": 2.1745, + "step": 313155 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00014015187365719157, + "loss": 2.1091, + "step": 313160 + }, + { + "epoch": 0.74, + "grad_norm": 1.8984375, + "learning_rate": 0.00014015018089109506, + "loss": 2.1041, + "step": 313165 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.00014014848811128253, + "loss": 2.0675, + "step": 313170 + }, + { + "epoch": 0.74, + "grad_norm": 2.640625, + "learning_rate": 0.0001401467953177545, + "loss": 2.038, + "step": 313175 + }, + { + "epoch": 0.74, + "grad_norm": 2.703125, + "learning_rate": 0.00014014510251051163, + "loss": 2.127, + "step": 313180 + }, + { + "epoch": 0.74, + "grad_norm": 2.59375, + "learning_rate": 0.00014014340968955446, + "loss": 2.0868, + "step": 313185 + }, + { + "epoch": 0.74, + "grad_norm": 2.75, + "learning_rate": 0.00014014171685488357, + "loss": 1.9824, + "step": 313190 + }, + { + "epoch": 0.74, + "grad_norm": 2.875, + "learning_rate": 0.00014014002400649953, + "loss": 1.9973, + "step": 313195 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00014013833114440296, + "loss": 2.0931, + "step": 313200 + }, + { + "epoch": 0.74, + "grad_norm": 1.8671875, + "learning_rate": 0.00014013663826859438, + "loss": 2.137, + "step": 313205 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.0001401349453790744, + "loss": 1.973, + "step": 313210 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.00014013325247584364, + "loss": 2.0949, + "step": 313215 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00014013155955890257, + "loss": 1.8513, + "step": 313220 + }, + { + "epoch": 0.74, + "grad_norm": 1.8984375, + "learning_rate": 0.00014012986662825184, + "loss": 2.1607, + "step": 313225 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00014012817368389203, + "loss": 1.9641, + "step": 313230 + }, + { + "epoch": 0.74, + "grad_norm": 2.546875, + "learning_rate": 0.00014012648072582375, + "loss": 2.1066, + "step": 313235 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.0001401247877540475, + "loss": 2.1741, + "step": 313240 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.0001401230947685639, + "loss": 1.9266, + "step": 313245 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.0001401214017693735, + "loss": 2.0776, + "step": 313250 + }, + { + "epoch": 0.74, + "grad_norm": 1.984375, + "learning_rate": 0.00014011970875647692, + "loss": 2.129, + "step": 313255 + }, + { + "epoch": 0.74, + "grad_norm": 2.8125, + "learning_rate": 0.00014011801572987473, + "loss": 1.9461, + "step": 313260 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00014011632268956747, + "loss": 2.0272, + "step": 313265 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.00014011462963555576, + "loss": 2.0919, + "step": 313270 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00014011293656784014, + "loss": 2.0681, + "step": 313275 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.0001401112434864213, + "loss": 1.8803, + "step": 313280 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00014010955039129964, + "loss": 1.8602, + "step": 313285 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00014010785728247588, + "loss": 2.094, + "step": 313290 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00014010616415995053, + "loss": 2.1274, + "step": 313295 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.0001401044710237242, + "loss": 2.1351, + "step": 313300 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.00014010277787379743, + "loss": 2.1861, + "step": 313305 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00014010108471017085, + "loss": 2.0692, + "step": 313310 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.000140099391532845, + "loss": 1.9614, + "step": 313315 + }, + { + "epoch": 0.74, + "grad_norm": 2.453125, + "learning_rate": 0.00014009769834182048, + "loss": 2.0078, + "step": 313320 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00014009600513709784, + "loss": 1.8866, + "step": 313325 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00014009431191867769, + "loss": 1.9175, + "step": 313330 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.0001400926186865606, + "loss": 1.9294, + "step": 313335 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00014009092544074714, + "loss": 2.2064, + "step": 313340 + }, + { + "epoch": 0.74, + "grad_norm": 3.34375, + "learning_rate": 0.0001400892321812379, + "loss": 2.1082, + "step": 313345 + }, + { + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 0.00014008753890803344, + "loss": 1.9794, + "step": 313350 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.00014008584562113434, + "loss": 2.0722, + "step": 313355 + }, + { + "epoch": 0.74, + "grad_norm": 2.90625, + "learning_rate": 0.0001400841523205412, + "loss": 2.1601, + "step": 313360 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.0001400824590062546, + "loss": 1.9771, + "step": 313365 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.0001400807656782751, + "loss": 2.0729, + "step": 313370 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00014007907233660327, + "loss": 2.0765, + "step": 313375 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00014007737898123968, + "loss": 2.0954, + "step": 313380 + }, + { + "epoch": 0.74, + "grad_norm": 2.484375, + "learning_rate": 0.00014007568561218495, + "loss": 2.1821, + "step": 313385 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00014007399222943967, + "loss": 1.7763, + "step": 313390 + }, + { + "epoch": 0.74, + "grad_norm": 2.53125, + "learning_rate": 0.00014007229883300436, + "loss": 2.1034, + "step": 313395 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00014007060542287964, + "loss": 1.9684, + "step": 313400 + }, + { + "epoch": 0.74, + "grad_norm": 1.9921875, + "learning_rate": 0.00014006891199906606, + "loss": 2.1443, + "step": 313405 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.0001400672185615642, + "loss": 2.1975, + "step": 313410 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.0001400655251103747, + "loss": 1.9406, + "step": 313415 + }, + { + "epoch": 0.74, + "grad_norm": 1.875, + "learning_rate": 0.00014006383164549805, + "loss": 2.1813, + "step": 313420 + }, + { + "epoch": 0.74, + "grad_norm": 2.53125, + "learning_rate": 0.00014006213816693489, + "loss": 2.0219, + "step": 313425 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00014006044467468576, + "loss": 2.0792, + "step": 313430 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00014005875116875123, + "loss": 2.2115, + "step": 313435 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00014005705764913196, + "loss": 2.028, + "step": 313440 + }, + { + "epoch": 0.74, + "grad_norm": 2.0, + "learning_rate": 0.00014005536411582846, + "loss": 2.1391, + "step": 313445 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.0001400536705688413, + "loss": 1.8622, + "step": 313450 + }, + { + "epoch": 0.74, + "grad_norm": 2.46875, + "learning_rate": 0.00014005197700817107, + "loss": 2.1769, + "step": 313455 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.0001400502834338184, + "loss": 2.0014, + "step": 313460 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.0001400485898457838, + "loss": 2.1277, + "step": 313465 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00014004689624406787, + "loss": 2.1285, + "step": 313470 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.0001400452026286712, + "loss": 1.9428, + "step": 313475 + }, + { + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 0.00014004350899959436, + "loss": 1.9515, + "step": 313480 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00014004181535683794, + "loss": 2.1589, + "step": 313485 + }, + { + "epoch": 0.74, + "grad_norm": 1.875, + "learning_rate": 0.00014004012170040252, + "loss": 2.0059, + "step": 313490 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00014003842803028864, + "loss": 2.2311, + "step": 313495 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.0001400367343464969, + "loss": 1.9741, + "step": 313500 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00014003504064902792, + "loss": 2.0913, + "step": 313505 + }, + { + "epoch": 0.74, + "grad_norm": 2.46875, + "learning_rate": 0.00014003334693788225, + "loss": 1.9161, + "step": 313510 + }, + { + "epoch": 0.74, + "grad_norm": 1.8671875, + "learning_rate": 0.00014003165321306043, + "loss": 2.0513, + "step": 313515 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.00014002995947456307, + "loss": 1.985, + "step": 313520 + }, + { + "epoch": 0.74, + "grad_norm": 2.53125, + "learning_rate": 0.00014002826572239078, + "loss": 2.0666, + "step": 313525 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00014002657195654413, + "loss": 1.9619, + "step": 313530 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.0001400248781770236, + "loss": 1.9512, + "step": 313535 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.0001400231843838299, + "loss": 2.0893, + "step": 313540 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.0001400214905769635, + "loss": 2.1436, + "step": 313545 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.0001400197967564251, + "loss": 2.0474, + "step": 313550 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.0001400181029222152, + "loss": 1.9573, + "step": 313555 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00014001640907433438, + "loss": 1.9812, + "step": 313560 + }, + { + "epoch": 0.74, + "grad_norm": 2.0, + "learning_rate": 0.00014001471521278322, + "loss": 2.1125, + "step": 313565 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.00014001302133756228, + "loss": 2.1568, + "step": 313570 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.00014001132744867223, + "loss": 2.1455, + "step": 313575 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00014000963354611354, + "loss": 2.153, + "step": 313580 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00014000793962988685, + "loss": 2.0273, + "step": 313585 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.00014000624569999271, + "loss": 2.2196, + "step": 313590 + }, + { + "epoch": 0.74, + "grad_norm": 1.6875, + "learning_rate": 0.00014000455175643172, + "loss": 1.8045, + "step": 313595 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00014000285779920446, + "loss": 1.9229, + "step": 313600 + }, + { + "epoch": 0.74, + "grad_norm": 2.546875, + "learning_rate": 0.0001400011638283115, + "loss": 1.9668, + "step": 313605 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.0001399994698437534, + "loss": 1.9956, + "step": 313610 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00013999777584553075, + "loss": 2.0268, + "step": 313615 + }, + { + "epoch": 0.74, + "grad_norm": 2.640625, + "learning_rate": 0.00013999608183364414, + "loss": 2.0452, + "step": 313620 + }, + { + "epoch": 0.74, + "grad_norm": 1.8515625, + "learning_rate": 0.00013999438780809415, + "loss": 1.94, + "step": 313625 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.00013999269376888134, + "loss": 1.9379, + "step": 313630 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00013999099971600631, + "loss": 2.3153, + "step": 313635 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00013998930564946963, + "loss": 2.2154, + "step": 313640 + }, + { + "epoch": 0.74, + "grad_norm": 1.9296875, + "learning_rate": 0.0001399876115692719, + "loss": 2.1086, + "step": 313645 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013998591747541362, + "loss": 2.0073, + "step": 313650 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.00013998422336789548, + "loss": 1.9732, + "step": 313655 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00013998252924671799, + "loss": 2.284, + "step": 313660 + }, + { + "epoch": 0.74, + "grad_norm": 2.78125, + "learning_rate": 0.00013998083511188172, + "loss": 1.9536, + "step": 313665 + }, + { + "epoch": 0.74, + "grad_norm": 1.96875, + "learning_rate": 0.00013997914096338732, + "loss": 2.0578, + "step": 313670 + }, + { + "epoch": 0.74, + "grad_norm": 2.0, + "learning_rate": 0.00013997744680123526, + "loss": 2.0165, + "step": 313675 + }, + { + "epoch": 0.74, + "grad_norm": 3.40625, + "learning_rate": 0.0001399757526254262, + "loss": 2.1024, + "step": 313680 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.0001399740584359607, + "loss": 1.8011, + "step": 313685 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013997236423283937, + "loss": 2.0995, + "step": 313690 + }, + { + "epoch": 0.74, + "grad_norm": 2.578125, + "learning_rate": 0.0001399706700160627, + "loss": 2.0192, + "step": 313695 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.00013996897578563139, + "loss": 2.0573, + "step": 313700 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.0001399672815415459, + "loss": 2.0886, + "step": 313705 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013996558728380688, + "loss": 2.2194, + "step": 313710 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00013996389301241488, + "loss": 1.9694, + "step": 313715 + }, + { + "epoch": 0.74, + "grad_norm": 1.9609375, + "learning_rate": 0.00013996219872737048, + "loss": 1.9903, + "step": 313720 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.0001399605044286743, + "loss": 1.9312, + "step": 313725 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.00013995881011632686, + "loss": 2.1133, + "step": 313730 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00013995711579032878, + "loss": 2.029, + "step": 313735 + }, + { + "epoch": 0.74, + "grad_norm": 2.921875, + "learning_rate": 0.00013995542145068063, + "loss": 2.197, + "step": 313740 + }, + { + "epoch": 0.74, + "grad_norm": 1.8359375, + "learning_rate": 0.00013995372709738296, + "loss": 2.0656, + "step": 313745 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.0001399520327304364, + "loss": 2.0695, + "step": 313750 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013995033834984148, + "loss": 2.0674, + "step": 313755 + }, + { + "epoch": 0.74, + "grad_norm": 2.46875, + "learning_rate": 0.00013994864395559881, + "loss": 2.191, + "step": 313760 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00013994694954770894, + "loss": 2.0162, + "step": 313765 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.00013994525512617248, + "loss": 2.1247, + "step": 313770 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.00013994356069099001, + "loss": 2.0223, + "step": 313775 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.0001399418662421621, + "loss": 2.0319, + "step": 313780 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.0001399401717796893, + "loss": 2.0114, + "step": 313785 + }, + { + "epoch": 0.74, + "grad_norm": 1.78125, + "learning_rate": 0.00013993847730357222, + "loss": 2.0603, + "step": 313790 + }, + { + "epoch": 0.74, + "grad_norm": 2.75, + "learning_rate": 0.00013993678281381145, + "loss": 1.9283, + "step": 313795 + }, + { + "epoch": 0.74, + "grad_norm": 1.9921875, + "learning_rate": 0.00013993508831040754, + "loss": 1.9392, + "step": 313800 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00013993339379336108, + "loss": 2.2613, + "step": 313805 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.00013993169926267263, + "loss": 2.1119, + "step": 313810 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.0001399300047183428, + "loss": 2.1239, + "step": 313815 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.00013992831016037215, + "loss": 2.1857, + "step": 313820 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.0001399266155887613, + "loss": 2.1572, + "step": 313825 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013992492100351074, + "loss": 1.8742, + "step": 313830 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.00013992322640462112, + "loss": 2.1125, + "step": 313835 + }, + { + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 0.00013992153179209302, + "loss": 2.0013, + "step": 313840 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00013991983716592698, + "loss": 2.0695, + "step": 313845 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.00013991814252612366, + "loss": 1.9703, + "step": 313850 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00013991644787268349, + "loss": 1.9969, + "step": 313855 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.0001399147532056072, + "loss": 1.8635, + "step": 313860 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013991305852489524, + "loss": 2.0233, + "step": 313865 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013991136383054834, + "loss": 2.1323, + "step": 313870 + }, + { + "epoch": 0.74, + "grad_norm": 1.9296875, + "learning_rate": 0.00013990966912256694, + "loss": 1.9536, + "step": 313875 + }, + { + "epoch": 0.74, + "grad_norm": 1.953125, + "learning_rate": 0.0001399079744009517, + "loss": 1.8464, + "step": 313880 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.00013990627966570316, + "loss": 2.0209, + "step": 313885 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.0001399045849168219, + "loss": 1.9495, + "step": 313890 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013990289015430854, + "loss": 1.9503, + "step": 313895 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.0001399011953781636, + "loss": 2.0021, + "step": 313900 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.0001398995005883877, + "loss": 1.8722, + "step": 313905 + }, + { + "epoch": 0.74, + "grad_norm": 1.8359375, + "learning_rate": 0.0001398978057849814, + "loss": 2.0861, + "step": 313910 + }, + { + "epoch": 0.74, + "grad_norm": 1.7421875, + "learning_rate": 0.00013989611096794529, + "loss": 2.0635, + "step": 313915 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00013989441613727993, + "loss": 2.0665, + "step": 313920 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00013989272129298595, + "loss": 1.9682, + "step": 313925 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013989102643506386, + "loss": 1.929, + "step": 313930 + }, + { + "epoch": 0.74, + "grad_norm": 2.6875, + "learning_rate": 0.00013988933156351428, + "loss": 2.0634, + "step": 313935 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.0001398876366783378, + "loss": 1.9099, + "step": 313940 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00013988594177953494, + "loss": 2.2076, + "step": 313945 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.00013988424686710635, + "loss": 2.2455, + "step": 313950 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.00013988255194105257, + "loss": 1.9822, + "step": 313955 + }, + { + "epoch": 0.74, + "grad_norm": 2.609375, + "learning_rate": 0.00013988085700137419, + "loss": 1.9191, + "step": 313960 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.0001398791620480718, + "loss": 2.119, + "step": 313965 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.00013987746708114593, + "loss": 1.9746, + "step": 313970 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013987577210059721, + "loss": 2.0094, + "step": 313975 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.0001398740771064262, + "loss": 2.059, + "step": 313980 + }, + { + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 0.00013987238209863353, + "loss": 2.0788, + "step": 313985 + }, + { + "epoch": 0.74, + "grad_norm": 2.59375, + "learning_rate": 0.0001398706870772197, + "loss": 2.1805, + "step": 313990 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.0001398689920421853, + "loss": 2.175, + "step": 313995 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.0001398672969935309, + "loss": 2.1906, + "step": 314000 + }, + { + "epoch": 0.74, + "grad_norm": 2.609375, + "learning_rate": 0.00013986560193125717, + "loss": 2.163, + "step": 314005 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013986390685536462, + "loss": 1.9125, + "step": 314010 + }, + { + "epoch": 0.74, + "grad_norm": 2.6875, + "learning_rate": 0.0001398622117658538, + "loss": 1.9854, + "step": 314015 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013986051666272536, + "loss": 2.1075, + "step": 314020 + }, + { + "epoch": 0.74, + "grad_norm": 2.75, + "learning_rate": 0.0001398588215459798, + "loss": 2.1242, + "step": 314025 + }, + { + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 0.0001398571264156178, + "loss": 2.1067, + "step": 314030 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00013985543127163986, + "loss": 1.8131, + "step": 314035 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00013985373611404657, + "loss": 2.0777, + "step": 314040 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.00013985204094283852, + "loss": 1.912, + "step": 314045 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.0001398503457580163, + "loss": 2.0165, + "step": 314050 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.0001398486505595805, + "loss": 2.0494, + "step": 314055 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.00013984695534753164, + "loss": 2.0728, + "step": 314060 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.00013984526012187035, + "loss": 1.9224, + "step": 314065 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013984356488259722, + "loss": 2.0683, + "step": 314070 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.0001398418696297128, + "loss": 2.1094, + "step": 314075 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.00013984017436321765, + "loss": 1.9236, + "step": 314080 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.00013983847908311239, + "loss": 2.0429, + "step": 314085 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.00013983678378939757, + "loss": 2.0222, + "step": 314090 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.0001398350884820738, + "loss": 2.1696, + "step": 314095 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00013983339316114162, + "loss": 2.01, + "step": 314100 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013983169782660164, + "loss": 1.9622, + "step": 314105 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.00013983000247845445, + "loss": 2.0412, + "step": 314110 + }, + { + "epoch": 0.74, + "grad_norm": 2.5625, + "learning_rate": 0.00013982830711670058, + "loss": 2.012, + "step": 314115 + }, + { + "epoch": 0.74, + "grad_norm": 2.46875, + "learning_rate": 0.00013982661174134067, + "loss": 1.9221, + "step": 314120 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013982491635237525, + "loss": 2.2102, + "step": 314125 + }, + { + "epoch": 0.74, + "grad_norm": 2.671875, + "learning_rate": 0.00013982322094980489, + "loss": 1.9848, + "step": 314130 + }, + { + "epoch": 0.74, + "grad_norm": 2.90625, + "learning_rate": 0.00013982152553363022, + "loss": 1.9333, + "step": 314135 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.0001398198301038518, + "loss": 1.9317, + "step": 314140 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.0001398181346604702, + "loss": 2.1481, + "step": 314145 + }, + { + "epoch": 0.74, + "grad_norm": 2.0, + "learning_rate": 0.00013981643920348602, + "loss": 2.0453, + "step": 314150 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.00013981474373289977, + "loss": 1.9858, + "step": 314155 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.0001398130482487121, + "loss": 2.1378, + "step": 314160 + }, + { + "epoch": 0.74, + "grad_norm": 2.453125, + "learning_rate": 0.0001398113527509236, + "loss": 1.9046, + "step": 314165 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013980965723953484, + "loss": 1.94, + "step": 314170 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00013980796171454634, + "loss": 2.1915, + "step": 314175 + }, + { + "epoch": 0.74, + "grad_norm": 2.78125, + "learning_rate": 0.0001398062661759587, + "loss": 2.1093, + "step": 314180 + }, + { + "epoch": 0.74, + "grad_norm": 2.765625, + "learning_rate": 0.00013980457062377254, + "loss": 2.0002, + "step": 314185 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.0001398028750579884, + "loss": 1.9389, + "step": 314190 + }, + { + "epoch": 0.74, + "grad_norm": 1.96875, + "learning_rate": 0.00013980117947860692, + "loss": 1.8632, + "step": 314195 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.0001397994838856286, + "loss": 2.0259, + "step": 314200 + }, + { + "epoch": 0.74, + "grad_norm": 1.9140625, + "learning_rate": 0.00013979778827905405, + "loss": 1.9318, + "step": 314205 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00013979609265888388, + "loss": 2.1204, + "step": 314210 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00013979439702511862, + "loss": 2.0346, + "step": 314215 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.0001397927013777589, + "loss": 2.1593, + "step": 314220 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00013979100571680523, + "loss": 1.9798, + "step": 314225 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.00013978931004225825, + "loss": 2.1925, + "step": 314230 + }, + { + "epoch": 0.74, + "grad_norm": 1.90625, + "learning_rate": 0.00013978761435411854, + "loss": 1.961, + "step": 314235 + }, + { + "epoch": 0.74, + "grad_norm": 2.5625, + "learning_rate": 0.00013978591865238663, + "loss": 1.9341, + "step": 314240 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013978422293706315, + "loss": 1.9665, + "step": 314245 + }, + { + "epoch": 0.74, + "grad_norm": 2.78125, + "learning_rate": 0.00013978252720814864, + "loss": 2.0537, + "step": 314250 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013978083146564372, + "loss": 2.1746, + "step": 314255 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013977913570954894, + "loss": 2.2409, + "step": 314260 + }, + { + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 0.00013977743993986484, + "loss": 1.9311, + "step": 314265 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.0001397757441565921, + "loss": 2.1198, + "step": 314270 + }, + { + "epoch": 0.74, + "grad_norm": 1.984375, + "learning_rate": 0.0001397740483597312, + "loss": 2.0212, + "step": 314275 + }, + { + "epoch": 0.74, + "grad_norm": 1.8359375, + "learning_rate": 0.00013977235254928285, + "loss": 1.8878, + "step": 314280 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.00013977065672524748, + "loss": 2.0428, + "step": 314285 + }, + { + "epoch": 0.74, + "grad_norm": 1.9609375, + "learning_rate": 0.00013976896088762573, + "loss": 2.0502, + "step": 314290 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00013976726503641816, + "loss": 1.8989, + "step": 314295 + }, + { + "epoch": 0.74, + "grad_norm": 2.75, + "learning_rate": 0.00013976556917162544, + "loss": 1.7787, + "step": 314300 + }, + { + "epoch": 0.74, + "grad_norm": 2.5625, + "learning_rate": 0.00013976387329324802, + "loss": 2.144, + "step": 314305 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013976217740128657, + "loss": 1.9948, + "step": 314310 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013976048149574164, + "loss": 1.8543, + "step": 314315 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013975878557661377, + "loss": 1.9034, + "step": 314320 + }, + { + "epoch": 0.74, + "grad_norm": 1.921875, + "learning_rate": 0.0001397570896439036, + "loss": 2.1288, + "step": 314325 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.0001397553936976117, + "loss": 2.0271, + "step": 314330 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00013975369773773863, + "loss": 2.2458, + "step": 314335 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.000139752001764285, + "loss": 2.1337, + "step": 314340 + }, + { + "epoch": 0.74, + "grad_norm": 2.640625, + "learning_rate": 0.00013975030577725132, + "loss": 2.1891, + "step": 314345 + }, + { + "epoch": 0.74, + "grad_norm": 1.875, + "learning_rate": 0.00013974860977663823, + "loss": 1.9703, + "step": 314350 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.0001397469137624463, + "loss": 2.1441, + "step": 314355 + }, + { + "epoch": 0.74, + "grad_norm": 1.8984375, + "learning_rate": 0.00013974521773467614, + "loss": 2.0462, + "step": 314360 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.00013974352169332822, + "loss": 2.1202, + "step": 314365 + }, + { + "epoch": 0.74, + "grad_norm": 2.53125, + "learning_rate": 0.00013974182563840324, + "loss": 2.0575, + "step": 314370 + }, + { + "epoch": 0.74, + "grad_norm": 1.9765625, + "learning_rate": 0.00013974012956990172, + "loss": 2.0944, + "step": 314375 + }, + { + "epoch": 0.74, + "grad_norm": 1.8984375, + "learning_rate": 0.00013973843348782426, + "loss": 2.2356, + "step": 314380 + }, + { + "epoch": 0.74, + "grad_norm": 3.25, + "learning_rate": 0.00013973673739217142, + "loss": 2.184, + "step": 314385 + }, + { + "epoch": 0.74, + "grad_norm": 2.609375, + "learning_rate": 0.00013973504128294377, + "loss": 2.0109, + "step": 314390 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00013973334516014196, + "loss": 2.0907, + "step": 314395 + }, + { + "epoch": 0.74, + "grad_norm": 1.8125, + "learning_rate": 0.0001397316490237665, + "loss": 2.0668, + "step": 314400 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013972995287381794, + "loss": 2.04, + "step": 314405 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013972825671029694, + "loss": 2.0727, + "step": 314410 + }, + { + "epoch": 0.74, + "grad_norm": 1.8671875, + "learning_rate": 0.00013972656053320406, + "loss": 2.0683, + "step": 314415 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00013972486434253988, + "loss": 1.9314, + "step": 314420 + }, + { + "epoch": 0.74, + "grad_norm": 1.6796875, + "learning_rate": 0.00013972316813830492, + "loss": 2.1087, + "step": 314425 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00013972147192049982, + "loss": 2.209, + "step": 314430 + }, + { + "epoch": 0.74, + "grad_norm": 1.84375, + "learning_rate": 0.00013971977568912516, + "loss": 1.9951, + "step": 314435 + }, + { + "epoch": 0.74, + "grad_norm": 2.65625, + "learning_rate": 0.0001397180794441815, + "loss": 1.9143, + "step": 314440 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.00013971638318566944, + "loss": 1.9732, + "step": 314445 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.0001397146869135895, + "loss": 2.1261, + "step": 314450 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00013971299062794231, + "loss": 2.0774, + "step": 314455 + }, + { + "epoch": 0.74, + "grad_norm": 2.640625, + "learning_rate": 0.00013971129432872848, + "loss": 2.1416, + "step": 314460 + }, + { + "epoch": 0.74, + "grad_norm": 2.578125, + "learning_rate": 0.00013970959801594854, + "loss": 2.0838, + "step": 314465 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.0001397079016896031, + "loss": 1.9121, + "step": 314470 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00013970620534969267, + "loss": 2.154, + "step": 314475 + }, + { + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 0.00013970450899621788, + "loss": 2.0071, + "step": 314480 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00013970281262917935, + "loss": 1.8741, + "step": 314485 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.0001397011162485776, + "loss": 2.0016, + "step": 314490 + }, + { + "epoch": 0.74, + "grad_norm": 2.546875, + "learning_rate": 0.00013969941985441325, + "loss": 2.0844, + "step": 314495 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013969772344668682, + "loss": 2.1758, + "step": 314500 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.00013969602702539895, + "loss": 2.0232, + "step": 314505 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00013969433059055018, + "loss": 2.1213, + "step": 314510 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00013969263414214113, + "loss": 2.2223, + "step": 314515 + }, + { + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 0.00013969093768017237, + "loss": 1.9846, + "step": 314520 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013968924120464442, + "loss": 2.1647, + "step": 314525 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00013968754471555794, + "loss": 1.9769, + "step": 314530 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00013968584821291347, + "loss": 2.0596, + "step": 314535 + }, + { + "epoch": 0.74, + "grad_norm": 1.84375, + "learning_rate": 0.00013968415169671156, + "loss": 1.8695, + "step": 314540 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00013968245516695287, + "loss": 2.0496, + "step": 314545 + }, + { + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 0.00013968075862363788, + "loss": 2.0411, + "step": 314550 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.0001396790620667673, + "loss": 2.076, + "step": 314555 + }, + { + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 0.00013967736549634158, + "loss": 2.0146, + "step": 314560 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00013967566891236137, + "loss": 2.139, + "step": 314565 + }, + { + "epoch": 0.74, + "grad_norm": 1.8203125, + "learning_rate": 0.00013967397231482722, + "loss": 1.8887, + "step": 314570 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00013967227570373973, + "loss": 2.1034, + "step": 314575 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00013967057907909947, + "loss": 1.9658, + "step": 314580 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.000139668882440907, + "loss": 2.127, + "step": 314585 + }, + { + "epoch": 0.74, + "grad_norm": 1.7265625, + "learning_rate": 0.00013966718578916295, + "loss": 2.0771, + "step": 314590 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00013966548912386787, + "loss": 2.0594, + "step": 314595 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00013966379244502233, + "loss": 2.0177, + "step": 314600 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013966209575262692, + "loss": 1.8708, + "step": 314605 + }, + { + "epoch": 0.74, + "grad_norm": 2.5625, + "learning_rate": 0.00013966039904668223, + "loss": 2.0614, + "step": 314610 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00013965870232718878, + "loss": 1.9389, + "step": 314615 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00013965700559414725, + "loss": 2.049, + "step": 314620 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013965530884755815, + "loss": 2.0635, + "step": 314625 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.00013965361208742206, + "loss": 2.0543, + "step": 314630 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013965191531373962, + "loss": 1.9461, + "step": 314635 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013965021852651135, + "loss": 1.9777, + "step": 314640 + }, + { + "epoch": 0.74, + "grad_norm": 2.984375, + "learning_rate": 0.0001396485217257378, + "loss": 2.1752, + "step": 314645 + }, + { + "epoch": 0.74, + "grad_norm": 1.9921875, + "learning_rate": 0.00013964682491141966, + "loss": 2.2704, + "step": 314650 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.00013964512808355744, + "loss": 1.9545, + "step": 314655 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.0001396434312421517, + "loss": 2.0234, + "step": 314660 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00013964173438720301, + "loss": 1.9115, + "step": 314665 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.00013964003751871205, + "loss": 2.1756, + "step": 314670 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.0001396383406366793, + "loss": 2.0899, + "step": 314675 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.0001396366437411054, + "loss": 2.0531, + "step": 314680 + }, + { + "epoch": 0.74, + "grad_norm": 1.9765625, + "learning_rate": 0.00013963494683199087, + "loss": 2.006, + "step": 314685 + }, + { + "epoch": 0.74, + "grad_norm": 1.9921875, + "learning_rate": 0.00013963324990933637, + "loss": 1.9208, + "step": 314690 + }, + { + "epoch": 0.74, + "grad_norm": 2.734375, + "learning_rate": 0.00013963155297314238, + "loss": 2.1718, + "step": 314695 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.00013962985602340955, + "loss": 1.9322, + "step": 314700 + }, + { + "epoch": 0.74, + "grad_norm": 2.484375, + "learning_rate": 0.00013962815906013845, + "loss": 2.065, + "step": 314705 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.00013962646208332967, + "loss": 2.1818, + "step": 314710 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013962476509298374, + "loss": 2.2466, + "step": 314715 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013962306808910127, + "loss": 2.09, + "step": 314720 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00013962137107168286, + "loss": 1.9053, + "step": 314725 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013961967404072905, + "loss": 1.9092, + "step": 314730 + }, + { + "epoch": 0.74, + "grad_norm": 1.9765625, + "learning_rate": 0.00013961797699624048, + "loss": 1.9739, + "step": 314735 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013961627993821767, + "loss": 2.1788, + "step": 314740 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013961458286666122, + "loss": 2.0317, + "step": 314745 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013961288578157168, + "loss": 1.887, + "step": 314750 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.0001396111886829497, + "loss": 2.1182, + "step": 314755 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.00013960949157079583, + "loss": 1.936, + "step": 314760 + }, + { + "epoch": 0.74, + "grad_norm": 1.9609375, + "learning_rate": 0.00013960779444511062, + "loss": 2.0762, + "step": 314765 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.00013960609730589464, + "loss": 1.8449, + "step": 314770 + }, + { + "epoch": 0.74, + "grad_norm": 1.984375, + "learning_rate": 0.00013960440015314853, + "loss": 1.9138, + "step": 314775 + }, + { + "epoch": 0.74, + "grad_norm": 1.8671875, + "learning_rate": 0.00013960270298687282, + "loss": 2.0483, + "step": 314780 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00013960100580706814, + "loss": 2.093, + "step": 314785 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.000139599308613735, + "loss": 2.0868, + "step": 314790 + }, + { + "epoch": 0.74, + "grad_norm": 1.9296875, + "learning_rate": 0.00013959761140687403, + "loss": 2.1219, + "step": 314795 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.00013959591418648582, + "loss": 2.191, + "step": 314800 + }, + { + "epoch": 0.74, + "grad_norm": 2.5625, + "learning_rate": 0.00013959421695257094, + "loss": 2.0228, + "step": 314805 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.0001395925197051299, + "loss": 2.1035, + "step": 314810 + }, + { + "epoch": 0.74, + "grad_norm": 2.6875, + "learning_rate": 0.00013959082244416337, + "loss": 2.0285, + "step": 314815 + }, + { + "epoch": 0.74, + "grad_norm": 2.703125, + "learning_rate": 0.0001395891251696719, + "loss": 2.1068, + "step": 314820 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.0001395874278816561, + "loss": 2.0419, + "step": 314825 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00013958573058011644, + "loss": 1.9987, + "step": 314830 + }, + { + "epoch": 0.74, + "grad_norm": 2.546875, + "learning_rate": 0.00013958403326505363, + "loss": 2.0928, + "step": 314835 + }, + { + "epoch": 0.74, + "grad_norm": 1.75, + "learning_rate": 0.00013958233593646817, + "loss": 2.1372, + "step": 314840 + }, + { + "epoch": 0.74, + "grad_norm": 1.6640625, + "learning_rate": 0.00013958063859436067, + "loss": 2.0133, + "step": 314845 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013957894123873174, + "loss": 2.0618, + "step": 314850 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.00013957724386958186, + "loss": 2.0129, + "step": 314855 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.00013957554648691173, + "loss": 2.0593, + "step": 314860 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.00013957384909072187, + "loss": 2.164, + "step": 314865 + }, + { + "epoch": 0.74, + "grad_norm": 2.46875, + "learning_rate": 0.00013957215168101285, + "loss": 2.083, + "step": 314870 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00013957045425778527, + "loss": 2.0168, + "step": 314875 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.0001395687568210397, + "loss": 2.0736, + "step": 314880 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00013956705937077671, + "loss": 1.8651, + "step": 314885 + }, + { + "epoch": 0.74, + "grad_norm": 1.8984375, + "learning_rate": 0.00013956536190699696, + "loss": 1.9436, + "step": 314890 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.00013956366442970092, + "loss": 2.2618, + "step": 314895 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.00013956196693888922, + "loss": 2.139, + "step": 314900 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013956026943456241, + "loss": 2.117, + "step": 314905 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.00013955857191672112, + "loss": 2.0124, + "step": 314910 + }, + { + "epoch": 0.74, + "grad_norm": 2.578125, + "learning_rate": 0.00013955687438536591, + "loss": 1.8469, + "step": 314915 + }, + { + "epoch": 0.74, + "grad_norm": 2.546875, + "learning_rate": 0.00013955517684049733, + "loss": 2.2393, + "step": 314920 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.000139553479282116, + "loss": 2.1253, + "step": 314925 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.0001395517817102225, + "loss": 2.0219, + "step": 314930 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00013955008412481736, + "loss": 2.1967, + "step": 314935 + }, + { + "epoch": 0.74, + "grad_norm": 1.96875, + "learning_rate": 0.0001395483865259012, + "loss": 2.0986, + "step": 314940 + }, + { + "epoch": 0.74, + "grad_norm": 2.53125, + "learning_rate": 0.00013954668891347463, + "loss": 2.0738, + "step": 314945 + }, + { + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 0.00013954499128753816, + "loss": 2.2301, + "step": 314950 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.0001395432936480924, + "loss": 2.1177, + "step": 314955 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.00013954159599513795, + "loss": 2.0039, + "step": 314960 + }, + { + "epoch": 0.74, + "grad_norm": 2.453125, + "learning_rate": 0.00013953989832867536, + "loss": 2.0167, + "step": 314965 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013953820064870523, + "loss": 2.1491, + "step": 314970 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013953650295522814, + "loss": 2.2768, + "step": 314975 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00013953480524824468, + "loss": 2.0108, + "step": 314980 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.0001395331075277554, + "loss": 1.9892, + "step": 314985 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013953140979376086, + "loss": 1.866, + "step": 314990 + }, + { + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 0.00013952971204626171, + "loss": 2.0812, + "step": 314995 + }, + { + "epoch": 0.74, + "grad_norm": 2.453125, + "learning_rate": 0.00013952801428525848, + "loss": 2.0969, + "step": 315000 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.00013952631651075178, + "loss": 2.0958, + "step": 315005 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00013952461872274215, + "loss": 1.9733, + "step": 315010 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.0001395229209212302, + "loss": 2.3071, + "step": 315015 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.00013952122310621648, + "loss": 2.1198, + "step": 315020 + }, + { + "epoch": 0.74, + "grad_norm": 1.9921875, + "learning_rate": 0.00013951952527770166, + "loss": 1.9093, + "step": 315025 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.0001395178274356862, + "loss": 1.877, + "step": 315030 + }, + { + "epoch": 0.74, + "grad_norm": 2.46875, + "learning_rate": 0.00013951612958017074, + "loss": 2.3177, + "step": 315035 + }, + { + "epoch": 0.74, + "grad_norm": 1.71875, + "learning_rate": 0.00013951443171115587, + "loss": 2.2049, + "step": 315040 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00013951273382864214, + "loss": 1.9602, + "step": 315045 + }, + { + "epoch": 0.74, + "grad_norm": 1.9375, + "learning_rate": 0.00013951103593263015, + "loss": 2.0363, + "step": 315050 + }, + { + "epoch": 0.74, + "grad_norm": 1.796875, + "learning_rate": 0.0001395093380231205, + "loss": 1.8837, + "step": 315055 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.0001395076401001137, + "loss": 1.9458, + "step": 315060 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.0001395059421636104, + "loss": 2.1441, + "step": 315065 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.0001395042442136111, + "loss": 2.1082, + "step": 315070 + }, + { + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 0.0001395025462501165, + "loss": 1.8648, + "step": 315075 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.0001395008482731271, + "loss": 2.0243, + "step": 315080 + }, + { + "epoch": 0.74, + "grad_norm": 3.0, + "learning_rate": 0.0001394991502826435, + "loss": 2.0439, + "step": 315085 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.00013949745227866624, + "loss": 2.1568, + "step": 315090 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013949575426119593, + "loss": 1.976, + "step": 315095 + }, + { + "epoch": 0.74, + "grad_norm": 1.8203125, + "learning_rate": 0.0001394940562302332, + "loss": 1.8357, + "step": 315100 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013949235818577855, + "loss": 2.2017, + "step": 315105 + }, + { + "epoch": 0.74, + "grad_norm": 1.9296875, + "learning_rate": 0.0001394906601278326, + "loss": 2.1111, + "step": 315110 + }, + { + "epoch": 0.74, + "grad_norm": 1.921875, + "learning_rate": 0.00013948896205639595, + "loss": 2.1258, + "step": 315115 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.00013948726397146913, + "loss": 2.0993, + "step": 315120 + }, + { + "epoch": 0.74, + "grad_norm": 2.859375, + "learning_rate": 0.00013948556587305272, + "loss": 2.061, + "step": 315125 + }, + { + "epoch": 0.74, + "grad_norm": 3.09375, + "learning_rate": 0.00013948386776114737, + "loss": 2.2526, + "step": 315130 + }, + { + "epoch": 0.74, + "grad_norm": 2.953125, + "learning_rate": 0.0001394821696357536, + "loss": 1.9022, + "step": 315135 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.000139480471496872, + "loss": 2.0156, + "step": 315140 + }, + { + "epoch": 0.74, + "grad_norm": 2.484375, + "learning_rate": 0.00013947877334450316, + "loss": 1.9036, + "step": 315145 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013947707517864764, + "loss": 2.2255, + "step": 315150 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.00013947537699930603, + "loss": 2.1094, + "step": 315155 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00013947367880647895, + "loss": 2.1068, + "step": 315160 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.00013947198060016696, + "loss": 1.9713, + "step": 315165 + }, + { + "epoch": 0.74, + "grad_norm": 2.453125, + "learning_rate": 0.0001394702823803706, + "loss": 2.102, + "step": 315170 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.00013946858414709044, + "loss": 2.0824, + "step": 315175 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.0001394668859003271, + "loss": 2.0141, + "step": 315180 + }, + { + "epoch": 0.74, + "grad_norm": 2.0, + "learning_rate": 0.0001394651876400812, + "loss": 1.9078, + "step": 315185 + }, + { + "epoch": 0.74, + "grad_norm": 2.546875, + "learning_rate": 0.00013946348936635326, + "loss": 1.9366, + "step": 315190 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00013946179107914388, + "loss": 1.9278, + "step": 315195 + }, + { + "epoch": 0.74, + "grad_norm": 2.734375, + "learning_rate": 0.00013946009277845362, + "loss": 2.0512, + "step": 315200 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013945839446428306, + "loss": 1.874, + "step": 315205 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00013945669613663286, + "loss": 2.0148, + "step": 315210 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00013945499779550349, + "loss": 2.0729, + "step": 315215 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.0001394532994408956, + "loss": 2.0293, + "step": 315220 + }, + { + "epoch": 0.74, + "grad_norm": 2.78125, + "learning_rate": 0.00013945160107280973, + "loss": 2.2021, + "step": 315225 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.00013944990269124647, + "loss": 2.0547, + "step": 315230 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.00013944820429620641, + "loss": 2.2381, + "step": 315235 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013944650588769016, + "loss": 2.1912, + "step": 315240 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00013944480746569824, + "loss": 2.0468, + "step": 315245 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00013944310903023127, + "loss": 2.1185, + "step": 315250 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.0001394414105812898, + "loss": 2.2475, + "step": 315255 + }, + { + "epoch": 0.74, + "grad_norm": 2.53125, + "learning_rate": 0.00013943971211887447, + "loss": 2.1287, + "step": 315260 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.00013943801364298578, + "loss": 2.1956, + "step": 315265 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.00013943631515362433, + "loss": 1.9657, + "step": 315270 + }, + { + "epoch": 0.74, + "grad_norm": 1.84375, + "learning_rate": 0.00013943461665079075, + "loss": 2.0123, + "step": 315275 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013943291813448564, + "loss": 1.9851, + "step": 315280 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013943121960470944, + "loss": 2.1371, + "step": 315285 + }, + { + "epoch": 0.74, + "grad_norm": 1.9765625, + "learning_rate": 0.00013942952106146285, + "loss": 1.8978, + "step": 315290 + }, + { + "epoch": 0.74, + "grad_norm": 2.546875, + "learning_rate": 0.00013942782250474643, + "loss": 2.1034, + "step": 315295 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.00013942612393456078, + "loss": 2.2184, + "step": 315300 + }, + { + "epoch": 0.74, + "grad_norm": 1.9453125, + "learning_rate": 0.00013942442535090643, + "loss": 1.9673, + "step": 315305 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00013942272675378394, + "loss": 2.184, + "step": 315310 + }, + { + "epoch": 0.74, + "grad_norm": 1.96875, + "learning_rate": 0.00013942102814319397, + "loss": 2.0787, + "step": 315315 + }, + { + "epoch": 0.74, + "grad_norm": 1.9921875, + "learning_rate": 0.00013941932951913705, + "loss": 1.9061, + "step": 315320 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013941763088161382, + "loss": 2.0452, + "step": 315325 + }, + { + "epoch": 0.74, + "grad_norm": 1.90625, + "learning_rate": 0.00013941593223062477, + "loss": 2.1896, + "step": 315330 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.0001394142335661705, + "loss": 2.093, + "step": 315335 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.00013941253488825164, + "loss": 2.1197, + "step": 315340 + }, + { + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 0.00013941083619686874, + "loss": 1.9908, + "step": 315345 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.0001394091374920224, + "loss": 2.0457, + "step": 315350 + }, + { + "epoch": 0.74, + "grad_norm": 2.0, + "learning_rate": 0.00013940743877371317, + "loss": 2.1311, + "step": 315355 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.00013940574004194165, + "loss": 2.0926, + "step": 315360 + }, + { + "epoch": 0.74, + "grad_norm": 1.8359375, + "learning_rate": 0.00013940404129670836, + "loss": 1.9385, + "step": 315365 + }, + { + "epoch": 0.74, + "grad_norm": 1.921875, + "learning_rate": 0.00013940234253801402, + "loss": 1.8896, + "step": 315370 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.0001394006437658591, + "loss": 1.9616, + "step": 315375 + }, + { + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 0.00013939894498024417, + "loss": 2.0896, + "step": 315380 + }, + { + "epoch": 0.74, + "grad_norm": 1.921875, + "learning_rate": 0.00013939724618116988, + "loss": 1.949, + "step": 315385 + }, + { + "epoch": 0.74, + "grad_norm": 2.546875, + "learning_rate": 0.00013939554736863675, + "loss": 1.9946, + "step": 315390 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.0001393938485426454, + "loss": 2.2353, + "step": 315395 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00013939214970319638, + "loss": 1.9866, + "step": 315400 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.00013939045085029032, + "loss": 2.1228, + "step": 315405 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00013938875198392776, + "loss": 2.1075, + "step": 315410 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.00013938705310410928, + "loss": 2.1405, + "step": 315415 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013938535421083548, + "loss": 2.1389, + "step": 315420 + }, + { + "epoch": 0.74, + "grad_norm": 1.8671875, + "learning_rate": 0.0001393836553041069, + "loss": 2.1343, + "step": 315425 + }, + { + "epoch": 0.74, + "grad_norm": 1.890625, + "learning_rate": 0.00013938195638392418, + "loss": 2.0621, + "step": 315430 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00013938025745028785, + "loss": 1.9646, + "step": 315435 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.00013937855850319853, + "loss": 2.029, + "step": 315440 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.00013937685954265672, + "loss": 2.0478, + "step": 315445 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00013937516056866314, + "loss": 2.0273, + "step": 315450 + }, + { + "epoch": 0.74, + "grad_norm": 2.953125, + "learning_rate": 0.00013937346158121824, + "loss": 2.1401, + "step": 315455 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.0001393717625803227, + "loss": 1.8775, + "step": 315460 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00013937006356597702, + "loss": 2.0684, + "step": 315465 + }, + { + "epoch": 0.74, + "grad_norm": 1.8671875, + "learning_rate": 0.00013936836453818177, + "loss": 2.0758, + "step": 315470 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.00013936666549693765, + "loss": 1.9324, + "step": 315475 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00013936496644224514, + "loss": 1.9682, + "step": 315480 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.00013936326737410483, + "loss": 2.127, + "step": 315485 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00013936156829251733, + "loss": 1.9885, + "step": 315490 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.00013935986919748317, + "loss": 1.9388, + "step": 315495 + }, + { + "epoch": 0.74, + "grad_norm": 1.9921875, + "learning_rate": 0.00013935817008900298, + "loss": 2.1552, + "step": 315500 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00013935647096707733, + "loss": 2.0863, + "step": 315505 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.0001393547718317068, + "loss": 2.1312, + "step": 315510 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013935307268289198, + "loss": 1.9673, + "step": 315515 + }, + { + "epoch": 0.74, + "grad_norm": 2.59375, + "learning_rate": 0.0001393513735206334, + "loss": 2.1459, + "step": 315520 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.0001393496743449317, + "loss": 2.1605, + "step": 315525 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013934797515578742, + "loss": 2.1476, + "step": 315530 + }, + { + "epoch": 0.74, + "grad_norm": 1.765625, + "learning_rate": 0.0001393462759532012, + "loss": 1.9487, + "step": 315535 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00013934457673717355, + "loss": 2.1248, + "step": 315540 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.00013934287750770507, + "loss": 2.0494, + "step": 315545 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.00013934117826479636, + "loss": 2.1585, + "step": 315550 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.000139339479008448, + "loss": 1.9951, + "step": 315555 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00013933777973866053, + "loss": 2.0247, + "step": 315560 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.0001393360804554346, + "loss": 2.1088, + "step": 315565 + }, + { + "epoch": 0.74, + "grad_norm": 1.9609375, + "learning_rate": 0.00013933438115877074, + "loss": 2.0677, + "step": 315570 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00013933268184866955, + "loss": 2.008, + "step": 315575 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00013933098252513158, + "loss": 1.9474, + "step": 315580 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.0001393292831881574, + "loss": 1.958, + "step": 315585 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.0001393275838377477, + "loss": 2.045, + "step": 315590 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.00013932588447390296, + "loss": 2.0961, + "step": 315595 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013932418509662376, + "loss": 2.1137, + "step": 315600 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.0001393224857059107, + "loss": 2.0209, + "step": 315605 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00013932078630176438, + "loss": 2.054, + "step": 315610 + }, + { + "epoch": 0.74, + "grad_norm": 1.96875, + "learning_rate": 0.0001393190868841854, + "loss": 2.0003, + "step": 315615 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.00013931738745317428, + "loss": 2.0062, + "step": 315620 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013931568800873163, + "loss": 2.0653, + "step": 315625 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.000139313988550858, + "loss": 2.0978, + "step": 315630 + }, + { + "epoch": 0.74, + "grad_norm": 2.546875, + "learning_rate": 0.000139312289079554, + "loss": 1.9378, + "step": 315635 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013931058959482025, + "loss": 2.0373, + "step": 315640 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013930889009665728, + "loss": 2.1094, + "step": 315645 + }, + { + "epoch": 0.74, + "grad_norm": 2.59375, + "learning_rate": 0.00013930719058506566, + "loss": 2.0908, + "step": 315650 + }, + { + "epoch": 0.74, + "grad_norm": 1.8984375, + "learning_rate": 0.000139305491060046, + "loss": 1.9403, + "step": 315655 + }, + { + "epoch": 0.74, + "grad_norm": 1.984375, + "learning_rate": 0.00013930379152159885, + "loss": 1.9834, + "step": 315660 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.00013930209196972486, + "loss": 1.8003, + "step": 315665 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013930039240442454, + "loss": 2.004, + "step": 315670 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.0001392986928256985, + "loss": 2.1475, + "step": 315675 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.0001392969932335473, + "loss": 1.9874, + "step": 315680 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.00013929529362797155, + "loss": 1.9749, + "step": 315685 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.00013929359400897178, + "loss": 2.1691, + "step": 315690 + }, + { + "epoch": 0.74, + "grad_norm": 2.546875, + "learning_rate": 0.00013929189437654864, + "loss": 1.742, + "step": 315695 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00013929019473070265, + "loss": 2.1102, + "step": 315700 + }, + { + "epoch": 0.74, + "grad_norm": 1.953125, + "learning_rate": 0.00013928849507143445, + "loss": 2.1623, + "step": 315705 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00013928679539874456, + "loss": 2.0589, + "step": 315710 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.0001392850957126336, + "loss": 1.9673, + "step": 315715 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.00013928339601310214, + "loss": 1.8089, + "step": 315720 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.00013928169630015073, + "loss": 1.9133, + "step": 315725 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00013927999657378, + "loss": 2.086, + "step": 315730 + }, + { + "epoch": 0.74, + "grad_norm": 1.9453125, + "learning_rate": 0.00013927829683399055, + "loss": 2.2008, + "step": 315735 + }, + { + "epoch": 0.74, + "grad_norm": 1.921875, + "learning_rate": 0.00013927659708078287, + "loss": 1.984, + "step": 315740 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00013927489731415762, + "loss": 2.1221, + "step": 315745 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.00013927319753411535, + "loss": 2.0527, + "step": 315750 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.00013927149774065664, + "loss": 2.0521, + "step": 315755 + }, + { + "epoch": 0.74, + "grad_norm": 1.6953125, + "learning_rate": 0.0001392697979337821, + "loss": 1.843, + "step": 315760 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00013926809811349222, + "loss": 2.268, + "step": 315765 + }, + { + "epoch": 0.74, + "grad_norm": 2.625, + "learning_rate": 0.00013926639827978768, + "loss": 1.7456, + "step": 315770 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.000139264698432669, + "loss": 2.119, + "step": 315775 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.00013926299857213684, + "loss": 2.1571, + "step": 315780 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.0001392612986981917, + "loss": 1.9243, + "step": 315785 + }, + { + "epoch": 0.74, + "grad_norm": 2.53125, + "learning_rate": 0.0001392595988108342, + "loss": 1.9797, + "step": 315790 + }, + { + "epoch": 0.74, + "grad_norm": 1.96875, + "learning_rate": 0.0001392578989100649, + "loss": 1.9491, + "step": 315795 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.0001392561989958844, + "loss": 2.0346, + "step": 315800 + }, + { + "epoch": 0.74, + "grad_norm": 2.46875, + "learning_rate": 0.00013925449906829326, + "loss": 2.0837, + "step": 315805 + }, + { + "epoch": 0.74, + "grad_norm": 2.765625, + "learning_rate": 0.00013925279912729206, + "loss": 2.054, + "step": 315810 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.0001392510991728814, + "loss": 2.0093, + "step": 315815 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00013924939920506186, + "loss": 2.1789, + "step": 315820 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.000139247699223834, + "loss": 2.0648, + "step": 315825 + }, + { + "epoch": 0.74, + "grad_norm": 1.9296875, + "learning_rate": 0.00013924599922919843, + "loss": 1.9335, + "step": 315830 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.0001392442992211557, + "loss": 1.9979, + "step": 315835 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.0001392425991997064, + "loss": 2.2048, + "step": 315840 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.00013924089916485114, + "loss": 2.2043, + "step": 315845 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.0001392391991165905, + "loss": 2.0745, + "step": 315850 + }, + { + "epoch": 0.74, + "grad_norm": 1.8828125, + "learning_rate": 0.00013923749905492497, + "loss": 2.1054, + "step": 315855 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013923579897985526, + "loss": 1.6906, + "step": 315860 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00013923409889138183, + "loss": 2.1495, + "step": 315865 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.0001392323987895054, + "loss": 2.0138, + "step": 315870 + }, + { + "epoch": 0.74, + "grad_norm": 2.75, + "learning_rate": 0.0001392306986742264, + "loss": 2.1011, + "step": 315875 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.0001392289985455455, + "loss": 2.01, + "step": 315880 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00013922729840346328, + "loss": 2.0033, + "step": 315885 + }, + { + "epoch": 0.74, + "grad_norm": 1.9375, + "learning_rate": 0.0001392255982479803, + "loss": 1.979, + "step": 315890 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.00013922389807909715, + "loss": 2.1229, + "step": 315895 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00013922219789681438, + "loss": 2.1141, + "step": 315900 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.0001392204977011326, + "loss": 1.9996, + "step": 315905 + }, + { + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 0.00013921879749205238, + "loss": 1.9678, + "step": 315910 + }, + { + "epoch": 0.74, + "grad_norm": 1.7265625, + "learning_rate": 0.00013921709726957436, + "loss": 1.9303, + "step": 315915 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.000139215397033699, + "loss": 2.1592, + "step": 315920 + }, + { + "epoch": 0.74, + "grad_norm": 2.015625, + "learning_rate": 0.00013921369678442696, + "loss": 2.0805, + "step": 315925 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013921199652175883, + "loss": 2.1463, + "step": 315930 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.0001392102962456952, + "loss": 2.2038, + "step": 315935 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.00013920859595623658, + "loss": 2.0508, + "step": 315940 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.00013920689565338363, + "loss": 2.0095, + "step": 315945 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013920519533713684, + "loss": 2.0682, + "step": 315950 + }, + { + "epoch": 0.74, + "grad_norm": 1.546875, + "learning_rate": 0.00013920349500749687, + "loss": 1.8277, + "step": 315955 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.0001392017946644643, + "loss": 2.1436, + "step": 315960 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00013920009430803964, + "loss": 2.2354, + "step": 315965 + }, + { + "epoch": 0.74, + "grad_norm": 2.0, + "learning_rate": 0.00013919839393822356, + "loss": 2.1968, + "step": 315970 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013919669355501657, + "loss": 1.9707, + "step": 315975 + }, + { + "epoch": 0.74, + "grad_norm": 2.640625, + "learning_rate": 0.00013919499315841928, + "loss": 2.0979, + "step": 315980 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.0001391932927484323, + "loss": 2.0286, + "step": 315985 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.00013919159232505615, + "loss": 2.0141, + "step": 315990 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00013918989188829142, + "loss": 2.0362, + "step": 315995 + }, + { + "epoch": 0.74, + "grad_norm": 2.53125, + "learning_rate": 0.00013918819143813875, + "loss": 2.029, + "step": 316000 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013918649097459867, + "loss": 1.7999, + "step": 316005 + }, + { + "epoch": 0.74, + "grad_norm": 2.5625, + "learning_rate": 0.00013918479049767178, + "loss": 2.0764, + "step": 316010 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013918309000735866, + "loss": 1.8564, + "step": 316015 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00013918138950365987, + "loss": 1.9746, + "step": 316020 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.000139179688986576, + "loss": 2.2395, + "step": 316025 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.0001391779884561077, + "loss": 1.988, + "step": 316030 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00013917628791225542, + "loss": 2.1088, + "step": 316035 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00013917458735501984, + "loss": 2.0341, + "step": 316040 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.0001391728867844015, + "loss": 2.132, + "step": 316045 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.000139171186200401, + "loss": 1.8746, + "step": 316050 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.0001391694856030189, + "loss": 2.1894, + "step": 316055 + }, + { + "epoch": 0.74, + "grad_norm": 2.625, + "learning_rate": 0.00013916778499225579, + "loss": 2.0643, + "step": 316060 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013916608436811224, + "loss": 2.1085, + "step": 316065 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00013916438373058886, + "loss": 2.0595, + "step": 316070 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013916268307968626, + "loss": 2.0386, + "step": 316075 + }, + { + "epoch": 0.74, + "grad_norm": 1.875, + "learning_rate": 0.00013916098241540493, + "loss": 2.0899, + "step": 316080 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.0001391592817377455, + "loss": 1.98, + "step": 316085 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.00013915758104670855, + "loss": 1.9416, + "step": 316090 + }, + { + "epoch": 0.74, + "grad_norm": 2.8125, + "learning_rate": 0.0001391558803422947, + "loss": 1.9844, + "step": 316095 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.00013915417962450446, + "loss": 2.0733, + "step": 316100 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00013915247889333842, + "loss": 2.0596, + "step": 316105 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.0001391507781487972, + "loss": 2.1638, + "step": 316110 + }, + { + "epoch": 0.74, + "grad_norm": 2.09375, + "learning_rate": 0.0001391490773908814, + "loss": 2.0841, + "step": 316115 + }, + { + "epoch": 0.74, + "grad_norm": 1.953125, + "learning_rate": 0.00013914737661959153, + "loss": 2.003, + "step": 316120 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.0001391456758349282, + "loss": 2.074, + "step": 316125 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.000139143975036892, + "loss": 2.0774, + "step": 316130 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013914227422548352, + "loss": 1.9178, + "step": 316135 + }, + { + "epoch": 0.74, + "grad_norm": 2.203125, + "learning_rate": 0.00013914057340070333, + "loss": 2.2371, + "step": 316140 + }, + { + "epoch": 0.74, + "grad_norm": 2.21875, + "learning_rate": 0.000139138872562552, + "loss": 2.1349, + "step": 316145 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00013913717171103014, + "loss": 1.9333, + "step": 316150 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013913547084613828, + "loss": 1.9923, + "step": 316155 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.00013913376996787705, + "loss": 2.0615, + "step": 316160 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00013913206907624704, + "loss": 2.0499, + "step": 316165 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013913036817124876, + "loss": 1.9598, + "step": 316170 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00013912866725288288, + "loss": 2.0177, + "step": 316175 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.0001391269663211499, + "loss": 1.7614, + "step": 316180 + }, + { + "epoch": 0.74, + "grad_norm": 2.4375, + "learning_rate": 0.00013912526537605046, + "loss": 2.1663, + "step": 316185 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00013912356441758513, + "loss": 2.1345, + "step": 316190 + }, + { + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 0.00013912186344575443, + "loss": 2.1053, + "step": 316195 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00013912016246055904, + "loss": 2.1117, + "step": 316200 + }, + { + "epoch": 0.74, + "grad_norm": 2.546875, + "learning_rate": 0.00013911846146199948, + "loss": 1.9235, + "step": 316205 + }, + { + "epoch": 0.74, + "grad_norm": 2.453125, + "learning_rate": 0.00013911676045007636, + "loss": 2.1242, + "step": 316210 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013911505942479021, + "loss": 2.0367, + "step": 316215 + }, + { + "epoch": 0.74, + "grad_norm": 1.859375, + "learning_rate": 0.00013911335838614165, + "loss": 1.9718, + "step": 316220 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.0001391116573341313, + "loss": 2.2537, + "step": 316225 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013910995626875968, + "loss": 2.0181, + "step": 316230 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.0001391082551900274, + "loss": 2.0732, + "step": 316235 + }, + { + "epoch": 0.74, + "grad_norm": 2.59375, + "learning_rate": 0.000139106554097935, + "loss": 1.949, + "step": 316240 + }, + { + "epoch": 0.74, + "grad_norm": 2.40625, + "learning_rate": 0.0001391048529924831, + "loss": 2.1342, + "step": 316245 + }, + { + "epoch": 0.74, + "grad_norm": 1.8515625, + "learning_rate": 0.00013910315187367223, + "loss": 2.1391, + "step": 316250 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.0001391014507415031, + "loss": 2.0198, + "step": 316255 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.00013909974959597617, + "loss": 1.9632, + "step": 316260 + }, + { + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 0.00013909804843709205, + "loss": 1.8683, + "step": 316265 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00013909634726485135, + "loss": 2.1695, + "step": 316270 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.0001390946460792546, + "loss": 2.0439, + "step": 316275 + }, + { + "epoch": 0.74, + "grad_norm": 2.53125, + "learning_rate": 0.00013909294488030242, + "loss": 2.0876, + "step": 316280 + }, + { + "epoch": 0.74, + "grad_norm": 2.609375, + "learning_rate": 0.00013909124366799538, + "loss": 2.0898, + "step": 316285 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.00013908954244233406, + "loss": 2.0102, + "step": 316290 + }, + { + "epoch": 0.74, + "grad_norm": 1.890625, + "learning_rate": 0.00013908784120331903, + "loss": 1.901, + "step": 316295 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.0001390861399509509, + "loss": 2.14, + "step": 316300 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013908443868523024, + "loss": 2.0668, + "step": 316305 + }, + { + "epoch": 0.74, + "grad_norm": 2.078125, + "learning_rate": 0.00013908273740615763, + "loss": 1.9429, + "step": 316310 + }, + { + "epoch": 0.74, + "grad_norm": 1.984375, + "learning_rate": 0.00013908103611373363, + "loss": 2.1515, + "step": 316315 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00013907933480795888, + "loss": 2.2328, + "step": 316320 + }, + { + "epoch": 0.74, + "grad_norm": 2.65625, + "learning_rate": 0.00013907763348883388, + "loss": 2.0226, + "step": 316325 + }, + { + "epoch": 0.74, + "grad_norm": 2.5, + "learning_rate": 0.00013907593215635927, + "loss": 1.905, + "step": 316330 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00013907423081053558, + "loss": 1.8658, + "step": 316335 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00013907252945136346, + "loss": 2.0055, + "step": 316340 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.00013907082807884346, + "loss": 2.1869, + "step": 316345 + }, + { + "epoch": 0.74, + "grad_norm": 2.390625, + "learning_rate": 0.00013906912669297614, + "loss": 2.1037, + "step": 316350 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00013906742529376208, + "loss": 2.1432, + "step": 316355 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00013906572388120193, + "loss": 2.2124, + "step": 316360 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.00013906402245529616, + "loss": 1.954, + "step": 316365 + }, + { + "epoch": 0.74, + "grad_norm": 1.984375, + "learning_rate": 0.00013906232101604547, + "loss": 2.0774, + "step": 316370 + }, + { + "epoch": 0.74, + "grad_norm": 1.9296875, + "learning_rate": 0.00013906061956345033, + "loss": 2.1366, + "step": 316375 + }, + { + "epoch": 0.74, + "grad_norm": 2.90625, + "learning_rate": 0.00013905891809751138, + "loss": 2.0223, + "step": 316380 + }, + { + "epoch": 0.74, + "grad_norm": 2.28125, + "learning_rate": 0.0001390572166182292, + "loss": 1.9915, + "step": 316385 + }, + { + "epoch": 0.74, + "grad_norm": 2.3125, + "learning_rate": 0.0001390555151256044, + "loss": 1.9747, + "step": 316390 + }, + { + "epoch": 0.74, + "grad_norm": 2.46875, + "learning_rate": 0.00013905381361963755, + "loss": 2.0388, + "step": 316395 + }, + { + "epoch": 0.74, + "grad_norm": 2.0625, + "learning_rate": 0.00013905211210032915, + "loss": 1.9941, + "step": 316400 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00013905041056767983, + "loss": 1.9722, + "step": 316405 + }, + { + "epoch": 0.74, + "grad_norm": 2.15625, + "learning_rate": 0.00013904870902169022, + "loss": 2.1387, + "step": 316410 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00013904700746236086, + "loss": 2.1175, + "step": 316415 + }, + { + "epoch": 0.74, + "grad_norm": 2.1875, + "learning_rate": 0.00013904530588969231, + "loss": 2.2265, + "step": 316420 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.00013904360430368518, + "loss": 2.1757, + "step": 316425 + }, + { + "epoch": 0.74, + "grad_norm": 2.03125, + "learning_rate": 0.00013904190270434008, + "loss": 2.1211, + "step": 316430 + }, + { + "epoch": 0.74, + "grad_norm": 2.359375, + "learning_rate": 0.0001390402010916575, + "loss": 2.0956, + "step": 316435 + }, + { + "epoch": 0.74, + "grad_norm": 2.125, + "learning_rate": 0.00013903849946563814, + "loss": 2.0713, + "step": 316440 + }, + { + "epoch": 0.74, + "grad_norm": 2.046875, + "learning_rate": 0.00013903679782628249, + "loss": 1.9866, + "step": 316445 + }, + { + "epoch": 0.74, + "grad_norm": 1.8671875, + "learning_rate": 0.00013903509617359117, + "loss": 2.0784, + "step": 316450 + }, + { + "epoch": 0.74, + "grad_norm": 2.265625, + "learning_rate": 0.00013903339450756474, + "loss": 2.1612, + "step": 316455 + }, + { + "epoch": 0.74, + "grad_norm": 2.234375, + "learning_rate": 0.00013903169282820382, + "loss": 2.0512, + "step": 316460 + }, + { + "epoch": 0.74, + "grad_norm": 2.421875, + "learning_rate": 0.00013902999113550894, + "loss": 2.0099, + "step": 316465 + }, + { + "epoch": 0.74, + "grad_norm": 2.328125, + "learning_rate": 0.0001390282894294807, + "loss": 2.0709, + "step": 316470 + }, + { + "epoch": 0.74, + "grad_norm": 1.8671875, + "learning_rate": 0.0001390265877101197, + "loss": 2.0683, + "step": 316475 + }, + { + "epoch": 0.74, + "grad_norm": 2.484375, + "learning_rate": 0.00013902488597742653, + "loss": 1.9658, + "step": 316480 + }, + { + "epoch": 0.74, + "grad_norm": 1.8671875, + "learning_rate": 0.00013902318423140178, + "loss": 2.0673, + "step": 316485 + }, + { + "epoch": 0.74, + "grad_norm": 1.75, + "learning_rate": 0.00013902148247204593, + "loss": 2.0186, + "step": 316490 + }, + { + "epoch": 0.74, + "grad_norm": 1.96875, + "learning_rate": 0.00013901978069935967, + "loss": 1.9797, + "step": 316495 + }, + { + "epoch": 0.74, + "grad_norm": 2.53125, + "learning_rate": 0.0001390180789133435, + "loss": 2.1156, + "step": 316500 + }, + { + "epoch": 0.74, + "grad_norm": 2.34375, + "learning_rate": 0.00013901637711399812, + "loss": 2.1188, + "step": 316505 + }, + { + "epoch": 0.74, + "grad_norm": 1.953125, + "learning_rate": 0.000139014675301324, + "loss": 2.0307, + "step": 316510 + }, + { + "epoch": 0.74, + "grad_norm": 2.296875, + "learning_rate": 0.00013901297347532175, + "loss": 2.2283, + "step": 316515 + }, + { + "epoch": 0.74, + "grad_norm": 2.171875, + "learning_rate": 0.00013901127163599197, + "loss": 2.2101, + "step": 316520 + }, + { + "epoch": 0.74, + "grad_norm": 1.8828125, + "learning_rate": 0.00013900956978333526, + "loss": 1.9316, + "step": 316525 + }, + { + "epoch": 0.74, + "grad_norm": 1.9765625, + "learning_rate": 0.00013900786791735213, + "loss": 2.1053, + "step": 316530 + }, + { + "epoch": 0.74, + "grad_norm": 2.734375, + "learning_rate": 0.00013900616603804324, + "loss": 1.8825, + "step": 316535 + }, + { + "epoch": 0.74, + "grad_norm": 2.8125, + "learning_rate": 0.00013900446414540912, + "loss": 2.1188, + "step": 316540 + }, + { + "epoch": 0.74, + "grad_norm": 1.9765625, + "learning_rate": 0.00013900276223945035, + "loss": 2.0936, + "step": 316545 + }, + { + "epoch": 0.74, + "grad_norm": 2.109375, + "learning_rate": 0.00013900106032016754, + "loss": 2.0844, + "step": 316550 + }, + { + "epoch": 0.74, + "grad_norm": 2.46875, + "learning_rate": 0.00013899935838756127, + "loss": 2.13, + "step": 316555 + }, + { + "epoch": 0.74, + "grad_norm": 2.25, + "learning_rate": 0.00013899765644163212, + "loss": 2.2154, + "step": 316560 + }, + { + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 0.00013899595448238066, + "loss": 2.167, + "step": 316565 + }, + { + "epoch": 0.74, + "grad_norm": 2.140625, + "learning_rate": 0.00013899425250980745, + "loss": 2.1845, + "step": 316570 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.0001389925505239131, + "loss": 2.103, + "step": 316575 + }, + { + "epoch": 0.75, + "grad_norm": 1.9140625, + "learning_rate": 0.00013899084852469823, + "loss": 2.0808, + "step": 316580 + }, + { + "epoch": 0.75, + "grad_norm": 1.953125, + "learning_rate": 0.00013898914651216333, + "loss": 1.8303, + "step": 316585 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013898744448630904, + "loss": 1.9683, + "step": 316590 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013898574244713597, + "loss": 2.0691, + "step": 316595 + }, + { + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 0.00013898404039464462, + "loss": 1.9708, + "step": 316600 + }, + { + "epoch": 0.75, + "grad_norm": 2.625, + "learning_rate": 0.00013898233832883563, + "loss": 1.9077, + "step": 316605 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013898063624970955, + "loss": 2.0098, + "step": 316610 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.00013897893415726703, + "loss": 2.0773, + "step": 316615 + }, + { + "epoch": 0.75, + "grad_norm": 2.515625, + "learning_rate": 0.00013897723205150858, + "loss": 2.0938, + "step": 316620 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013897552993243474, + "loss": 2.1138, + "step": 316625 + }, + { + "epoch": 0.75, + "grad_norm": 2.484375, + "learning_rate": 0.0001389738278000462, + "loss": 2.1869, + "step": 316630 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013897212565434347, + "loss": 2.0545, + "step": 316635 + }, + { + "epoch": 0.75, + "grad_norm": 1.953125, + "learning_rate": 0.00013897042349532722, + "loss": 1.9861, + "step": 316640 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.0001389687213229979, + "loss": 2.1374, + "step": 316645 + }, + { + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 0.00013896701913735618, + "loss": 2.0546, + "step": 316650 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013896531693840261, + "loss": 1.8563, + "step": 316655 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.00013896361472613778, + "loss": 2.0803, + "step": 316660 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.0001389619125005623, + "loss": 2.0351, + "step": 316665 + }, + { + "epoch": 0.75, + "grad_norm": 2.484375, + "learning_rate": 0.0001389602102616767, + "loss": 1.9298, + "step": 316670 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013895850800948157, + "loss": 1.837, + "step": 316675 + }, + { + "epoch": 0.75, + "grad_norm": 2.046875, + "learning_rate": 0.00013895680574397754, + "loss": 2.1504, + "step": 316680 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.00013895510346516516, + "loss": 1.9509, + "step": 316685 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013895340117304502, + "loss": 2.175, + "step": 316690 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013895169886761764, + "loss": 2.245, + "step": 316695 + }, + { + "epoch": 0.75, + "grad_norm": 2.5625, + "learning_rate": 0.0001389499965488837, + "loss": 2.1019, + "step": 316700 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013894829421684372, + "loss": 2.1974, + "step": 316705 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.00013894659187149827, + "loss": 2.174, + "step": 316710 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013894488951284799, + "loss": 2.1208, + "step": 316715 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013894318714089343, + "loss": 2.2079, + "step": 316720 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013894148475563517, + "loss": 2.2999, + "step": 316725 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013893978235707377, + "loss": 2.0852, + "step": 316730 + }, + { + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 0.00013893807994520984, + "loss": 2.0331, + "step": 316735 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.000138936377520044, + "loss": 1.9475, + "step": 316740 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013893467508157672, + "loss": 2.0546, + "step": 316745 + }, + { + "epoch": 0.75, + "grad_norm": 2.515625, + "learning_rate": 0.0001389329726298087, + "loss": 1.888, + "step": 316750 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013893127016474045, + "loss": 1.9384, + "step": 316755 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013892956768637256, + "loss": 2.1004, + "step": 316760 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.00013892786519470565, + "loss": 2.0537, + "step": 316765 + }, + { + "epoch": 0.75, + "grad_norm": 2.796875, + "learning_rate": 0.00013892616268974022, + "loss": 1.9304, + "step": 316770 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013892446017147697, + "loss": 1.972, + "step": 316775 + }, + { + "epoch": 0.75, + "grad_norm": 1.8671875, + "learning_rate": 0.00013892275763991642, + "loss": 2.2111, + "step": 316780 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.0001389210550950591, + "loss": 2.0782, + "step": 316785 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013891935253690566, + "loss": 2.2416, + "step": 316790 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013891764996545668, + "loss": 2.0455, + "step": 316795 + }, + { + "epoch": 0.75, + "grad_norm": 3.015625, + "learning_rate": 0.00013891594738071273, + "loss": 1.981, + "step": 316800 + }, + { + "epoch": 0.75, + "grad_norm": 2.71875, + "learning_rate": 0.00013891424478267437, + "loss": 2.0419, + "step": 316805 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.0001389125421713422, + "loss": 1.8724, + "step": 316810 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.0001389108395467168, + "loss": 2.0298, + "step": 316815 + }, + { + "epoch": 0.75, + "grad_norm": 2.546875, + "learning_rate": 0.00013890913690879877, + "loss": 2.0468, + "step": 316820 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013890743425758865, + "loss": 2.2007, + "step": 316825 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013890573159308707, + "loss": 1.9932, + "step": 316830 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.00013890402891529453, + "loss": 1.746, + "step": 316835 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.0001389023262242117, + "loss": 2.0964, + "step": 316840 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013890062351983916, + "loss": 2.1516, + "step": 316845 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013889892080217746, + "loss": 2.0315, + "step": 316850 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013889721807122714, + "loss": 1.84, + "step": 316855 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013889551532698886, + "loss": 1.963, + "step": 316860 + }, + { + "epoch": 0.75, + "grad_norm": 2.515625, + "learning_rate": 0.00013889381256946315, + "loss": 1.9971, + "step": 316865 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013889210979865058, + "loss": 2.1185, + "step": 316870 + }, + { + "epoch": 0.75, + "grad_norm": 1.9609375, + "learning_rate": 0.0001388904070145518, + "loss": 2.0252, + "step": 316875 + }, + { + "epoch": 0.75, + "grad_norm": 2.0, + "learning_rate": 0.00013888870421716734, + "loss": 2.2292, + "step": 316880 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.0001388870014064978, + "loss": 1.9784, + "step": 316885 + }, + { + "epoch": 0.75, + "grad_norm": 2.0, + "learning_rate": 0.00013888529858254375, + "loss": 2.1194, + "step": 316890 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013888359574530578, + "loss": 2.0486, + "step": 316895 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013888189289478446, + "loss": 1.959, + "step": 316900 + }, + { + "epoch": 0.75, + "grad_norm": 1.921875, + "learning_rate": 0.0001388801900309804, + "loss": 2.036, + "step": 316905 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013887848715389412, + "loss": 2.0172, + "step": 316910 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013887678426352632, + "loss": 1.9825, + "step": 316915 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013887508135987744, + "loss": 2.2337, + "step": 316920 + }, + { + "epoch": 0.75, + "grad_norm": 2.53125, + "learning_rate": 0.00013887337844294815, + "loss": 1.923, + "step": 316925 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013887167551273897, + "loss": 2.056, + "step": 316930 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013886997256925057, + "loss": 2.2229, + "step": 316935 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.0001388682696124835, + "loss": 2.0645, + "step": 316940 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.00013886656664243823, + "loss": 2.1449, + "step": 316945 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013886486365911554, + "loss": 2.0011, + "step": 316950 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.00013886316066251585, + "loss": 2.1457, + "step": 316955 + }, + { + "epoch": 0.75, + "grad_norm": 1.90625, + "learning_rate": 0.00013886145765263982, + "loss": 2.1601, + "step": 316960 + }, + { + "epoch": 0.75, + "grad_norm": 1.875, + "learning_rate": 0.00013885975462948801, + "loss": 2.2699, + "step": 316965 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.000138858051593061, + "loss": 2.1433, + "step": 316970 + }, + { + "epoch": 0.75, + "grad_norm": 2.515625, + "learning_rate": 0.00013885634854335935, + "loss": 2.128, + "step": 316975 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.0001388546454803837, + "loss": 2.1265, + "step": 316980 + }, + { + "epoch": 0.75, + "grad_norm": 1.8984375, + "learning_rate": 0.0001388529424041346, + "loss": 1.8645, + "step": 316985 + }, + { + "epoch": 0.75, + "grad_norm": 2.65625, + "learning_rate": 0.00013885123931461262, + "loss": 1.9728, + "step": 316990 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013884953621181835, + "loss": 2.3124, + "step": 316995 + }, + { + "epoch": 0.75, + "grad_norm": 1.59375, + "learning_rate": 0.00013884783309575237, + "loss": 1.9776, + "step": 317000 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013884612996641527, + "loss": 2.123, + "step": 317005 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013884442682380764, + "loss": 2.1136, + "step": 317010 + }, + { + "epoch": 0.75, + "grad_norm": 1.6796875, + "learning_rate": 0.00013884272366793002, + "loss": 1.8866, + "step": 317015 + }, + { + "epoch": 0.75, + "grad_norm": 2.703125, + "learning_rate": 0.00013884102049878304, + "loss": 1.9845, + "step": 317020 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013883931731636725, + "loss": 2.0884, + "step": 317025 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.00013883761412068326, + "loss": 1.9436, + "step": 317030 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013883591091173165, + "loss": 2.0157, + "step": 317035 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013883420768951297, + "loss": 2.0766, + "step": 317040 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.00013883250445402781, + "loss": 1.9534, + "step": 317045 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013883080120527676, + "loss": 2.054, + "step": 317050 + }, + { + "epoch": 0.75, + "grad_norm": 2.84375, + "learning_rate": 0.00013882909794326042, + "loss": 2.227, + "step": 317055 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013882739466797937, + "loss": 2.1147, + "step": 317060 + }, + { + "epoch": 0.75, + "grad_norm": 1.8828125, + "learning_rate": 0.00013882569137943414, + "loss": 1.9919, + "step": 317065 + }, + { + "epoch": 0.75, + "grad_norm": 1.90625, + "learning_rate": 0.00013882398807762538, + "loss": 1.8527, + "step": 317070 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013882228476255366, + "loss": 2.1346, + "step": 317075 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.00013882058143421949, + "loss": 1.8181, + "step": 317080 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013881887809262354, + "loss": 2.1036, + "step": 317085 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.0001388171747377663, + "loss": 1.9452, + "step": 317090 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013881547136964853, + "loss": 2.1165, + "step": 317095 + }, + { + "epoch": 0.75, + "grad_norm": 1.9765625, + "learning_rate": 0.00013881376798827058, + "loss": 2.068, + "step": 317100 + }, + { + "epoch": 0.75, + "grad_norm": 2.46875, + "learning_rate": 0.0001388120645936332, + "loss": 1.9953, + "step": 317105 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013881036118573687, + "loss": 1.9464, + "step": 317110 + }, + { + "epoch": 0.75, + "grad_norm": 2.59375, + "learning_rate": 0.00013880865776458228, + "loss": 2.0217, + "step": 317115 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.0001388069543301699, + "loss": 1.9806, + "step": 317120 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013880525088250036, + "loss": 1.9682, + "step": 317125 + }, + { + "epoch": 0.75, + "grad_norm": 2.59375, + "learning_rate": 0.00013880354742157425, + "loss": 2.1195, + "step": 317130 + }, + { + "epoch": 0.75, + "grad_norm": 1.8203125, + "learning_rate": 0.00013880184394739214, + "loss": 2.0211, + "step": 317135 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013880014045995463, + "loss": 2.0742, + "step": 317140 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013879843695926232, + "loss": 2.023, + "step": 317145 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.0001387967334453157, + "loss": 2.1795, + "step": 317150 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013879502991811545, + "loss": 2.1466, + "step": 317155 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.0001387933263776621, + "loss": 2.1975, + "step": 317160 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013879162282395623, + "loss": 2.033, + "step": 317165 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013878991925699846, + "loss": 1.8387, + "step": 317170 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013878821567678935, + "loss": 2.0774, + "step": 317175 + }, + { + "epoch": 0.75, + "grad_norm": 1.828125, + "learning_rate": 0.00013878651208332947, + "loss": 2.0895, + "step": 317180 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.0001387848084766194, + "loss": 2.0093, + "step": 317185 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013878310485665978, + "loss": 2.1236, + "step": 317190 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013878140122345112, + "loss": 1.9883, + "step": 317195 + }, + { + "epoch": 0.75, + "grad_norm": 1.953125, + "learning_rate": 0.00013877969757699403, + "loss": 2.1804, + "step": 317200 + }, + { + "epoch": 0.75, + "grad_norm": 2.625, + "learning_rate": 0.0001387779939172891, + "loss": 1.835, + "step": 317205 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013877629024433688, + "loss": 2.21, + "step": 317210 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.000138774586558138, + "loss": 2.0559, + "step": 317215 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013877288285869303, + "loss": 1.9306, + "step": 317220 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013877117914600252, + "loss": 2.1345, + "step": 317225 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.0001387694754200671, + "loss": 2.0266, + "step": 317230 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.0001387677716808873, + "loss": 2.0276, + "step": 317235 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.0001387660679284637, + "loss": 2.1208, + "step": 317240 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013876436416279694, + "loss": 2.0044, + "step": 317245 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013876266038388759, + "loss": 1.8857, + "step": 317250 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.0001387609565917362, + "loss": 2.1172, + "step": 317255 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013875925278634333, + "loss": 2.0844, + "step": 317260 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013875754896770963, + "loss": 1.9893, + "step": 317265 + }, + { + "epoch": 0.75, + "grad_norm": 2.5625, + "learning_rate": 0.00013875584513583562, + "loss": 2.0848, + "step": 317270 + }, + { + "epoch": 0.75, + "grad_norm": 2.734375, + "learning_rate": 0.00013875414129072195, + "loss": 1.9644, + "step": 317275 + }, + { + "epoch": 0.75, + "grad_norm": 1.9453125, + "learning_rate": 0.00013875243743236915, + "loss": 2.0445, + "step": 317280 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013875073356077778, + "loss": 1.9782, + "step": 317285 + }, + { + "epoch": 0.75, + "grad_norm": 3.203125, + "learning_rate": 0.0001387490296759485, + "loss": 2.0648, + "step": 317290 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013874732577788182, + "loss": 2.1915, + "step": 317295 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013874562186657835, + "loss": 2.182, + "step": 317300 + }, + { + "epoch": 0.75, + "grad_norm": 2.59375, + "learning_rate": 0.0001387439179420387, + "loss": 2.0657, + "step": 317305 + }, + { + "epoch": 0.75, + "grad_norm": 1.9609375, + "learning_rate": 0.0001387422140042634, + "loss": 1.9357, + "step": 317310 + }, + { + "epoch": 0.75, + "grad_norm": 1.890625, + "learning_rate": 0.00013874051005325307, + "loss": 2.2048, + "step": 317315 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013873880608900827, + "loss": 2.1056, + "step": 317320 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.0001387371021115296, + "loss": 2.3252, + "step": 317325 + }, + { + "epoch": 0.75, + "grad_norm": 1.921875, + "learning_rate": 0.0001387353981208176, + "loss": 1.9647, + "step": 317330 + }, + { + "epoch": 0.75, + "grad_norm": 2.734375, + "learning_rate": 0.00013873369411687294, + "loss": 2.0688, + "step": 317335 + }, + { + "epoch": 0.75, + "grad_norm": 2.453125, + "learning_rate": 0.00013873199009969614, + "loss": 2.0764, + "step": 317340 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013873028606928775, + "loss": 1.9801, + "step": 317345 + }, + { + "epoch": 0.75, + "grad_norm": 1.9765625, + "learning_rate": 0.0001387285820256484, + "loss": 2.1203, + "step": 317350 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.0001387268779687787, + "loss": 2.2275, + "step": 317355 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013872517389867917, + "loss": 2.1762, + "step": 317360 + }, + { + "epoch": 0.75, + "grad_norm": 1.9296875, + "learning_rate": 0.00013872346981535043, + "loss": 1.9096, + "step": 317365 + }, + { + "epoch": 0.75, + "grad_norm": 1.859375, + "learning_rate": 0.00013872176571879305, + "loss": 1.9065, + "step": 317370 + }, + { + "epoch": 0.75, + "grad_norm": 1.9609375, + "learning_rate": 0.00013872006160900758, + "loss": 1.9721, + "step": 317375 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.00013871835748599466, + "loss": 2.0815, + "step": 317380 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013871665334975485, + "loss": 2.0882, + "step": 317385 + }, + { + "epoch": 0.75, + "grad_norm": 2.890625, + "learning_rate": 0.00013871494920028872, + "loss": 2.1949, + "step": 317390 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.00013871324503759687, + "loss": 2.1304, + "step": 317395 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013871154086167987, + "loss": 2.1682, + "step": 317400 + }, + { + "epoch": 0.75, + "grad_norm": 1.90625, + "learning_rate": 0.0001387098366725383, + "loss": 2.1131, + "step": 317405 + }, + { + "epoch": 0.75, + "grad_norm": 1.734375, + "learning_rate": 0.00013870813247017277, + "loss": 2.0017, + "step": 317410 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013870642825458384, + "loss": 1.9117, + "step": 317415 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013870472402577208, + "loss": 2.0879, + "step": 317420 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013870301978373805, + "loss": 2.0976, + "step": 317425 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.0001387013155284824, + "loss": 2.1143, + "step": 317430 + }, + { + "epoch": 0.75, + "grad_norm": 5.21875, + "learning_rate": 0.0001386996112600057, + "loss": 2.0487, + "step": 317435 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.0001386979069783085, + "loss": 2.0214, + "step": 317440 + }, + { + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 0.00013869620268339137, + "loss": 2.0744, + "step": 317445 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.0001386944983752549, + "loss": 2.0602, + "step": 317450 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013869279405389972, + "loss": 2.0265, + "step": 317455 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013869108971932637, + "loss": 1.7865, + "step": 317460 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013868938537153546, + "loss": 1.97, + "step": 317465 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.00013868768101052753, + "loss": 2.1675, + "step": 317470 + }, + { + "epoch": 0.75, + "grad_norm": 2.515625, + "learning_rate": 0.0001386859766363032, + "loss": 2.0509, + "step": 317475 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013868427224886302, + "loss": 1.9905, + "step": 317480 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.0001386825678482076, + "loss": 1.9731, + "step": 317485 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013868086343433753, + "loss": 2.2212, + "step": 317490 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.00013867915900725336, + "loss": 2.1144, + "step": 317495 + }, + { + "epoch": 0.75, + "grad_norm": 2.6875, + "learning_rate": 0.0001386774545669557, + "loss": 2.0202, + "step": 317500 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013867575011344512, + "loss": 1.8506, + "step": 317505 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013867404564672217, + "loss": 2.1337, + "step": 317510 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013867234116678748, + "loss": 2.138, + "step": 317515 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013867063667364165, + "loss": 1.9195, + "step": 317520 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013866893216728522, + "loss": 2.0587, + "step": 317525 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013866722764771874, + "loss": 1.9931, + "step": 317530 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013866552311494288, + "loss": 2.2054, + "step": 317535 + }, + { + "epoch": 0.75, + "grad_norm": 1.8359375, + "learning_rate": 0.00013866381856895814, + "loss": 2.1571, + "step": 317540 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013866211400976515, + "loss": 2.2192, + "step": 317545 + }, + { + "epoch": 0.75, + "grad_norm": 2.53125, + "learning_rate": 0.00013866040943736452, + "loss": 2.0642, + "step": 317550 + }, + { + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 0.00013865870485175674, + "loss": 2.0134, + "step": 317555 + }, + { + "epoch": 0.75, + "grad_norm": 1.953125, + "learning_rate": 0.00013865700025294245, + "loss": 2.0118, + "step": 317560 + }, + { + "epoch": 0.75, + "grad_norm": 2.46875, + "learning_rate": 0.00013865529564092223, + "loss": 2.1207, + "step": 317565 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.0001386535910156967, + "loss": 2.2306, + "step": 317570 + }, + { + "epoch": 0.75, + "grad_norm": 2.609375, + "learning_rate": 0.00013865188637726637, + "loss": 1.8087, + "step": 317575 + }, + { + "epoch": 0.75, + "grad_norm": 1.984375, + "learning_rate": 0.00013865018172563187, + "loss": 2.2047, + "step": 317580 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013864847706079373, + "loss": 2.0438, + "step": 317585 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013864677238275262, + "loss": 1.9669, + "step": 317590 + }, + { + "epoch": 0.75, + "grad_norm": 1.90625, + "learning_rate": 0.000138645067691509, + "loss": 2.0202, + "step": 317595 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.0001386433629870636, + "loss": 2.0662, + "step": 317600 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.0001386416582694169, + "loss": 2.1706, + "step": 317605 + }, + { + "epoch": 0.75, + "grad_norm": 4.03125, + "learning_rate": 0.0001386399535385695, + "loss": 2.1224, + "step": 317610 + }, + { + "epoch": 0.75, + "grad_norm": 2.5625, + "learning_rate": 0.000138638248794522, + "loss": 1.973, + "step": 317615 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013863654403727498, + "loss": 2.2069, + "step": 317620 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.000138634839266829, + "loss": 2.0361, + "step": 317625 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013863313448318464, + "loss": 2.0813, + "step": 317630 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013863142968634256, + "loss": 2.0996, + "step": 317635 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.00013862972487630325, + "loss": 2.1112, + "step": 317640 + }, + { + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 0.0001386280200530673, + "loss": 2.0318, + "step": 317645 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013862631521663533, + "loss": 2.0392, + "step": 317650 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013862461036700795, + "loss": 2.1042, + "step": 317655 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013862290550418566, + "loss": 2.0562, + "step": 317660 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013862120062816912, + "loss": 2.0858, + "step": 317665 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.00013861949573895882, + "loss": 2.2178, + "step": 317670 + }, + { + "epoch": 0.75, + "grad_norm": 1.71875, + "learning_rate": 0.00013861779083655543, + "loss": 1.9561, + "step": 317675 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013861608592095952, + "loss": 2.19, + "step": 317680 + }, + { + "epoch": 0.75, + "grad_norm": 2.84375, + "learning_rate": 0.00013861438099217166, + "loss": 2.1558, + "step": 317685 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.00013861267605019242, + "loss": 1.9152, + "step": 317690 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.00013861097109502235, + "loss": 2.0675, + "step": 317695 + }, + { + "epoch": 0.75, + "grad_norm": 2.625, + "learning_rate": 0.0001386092661266621, + "loss": 1.92, + "step": 317700 + }, + { + "epoch": 0.75, + "grad_norm": 2.484375, + "learning_rate": 0.00013860756114511224, + "loss": 2.1675, + "step": 317705 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.0001386058561503733, + "loss": 1.8531, + "step": 317710 + }, + { + "epoch": 0.75, + "grad_norm": 2.046875, + "learning_rate": 0.00013860415114244593, + "loss": 2.1271, + "step": 317715 + }, + { + "epoch": 0.75, + "grad_norm": 2.640625, + "learning_rate": 0.00013860244612133067, + "loss": 2.126, + "step": 317720 + }, + { + "epoch": 0.75, + "grad_norm": 2.484375, + "learning_rate": 0.00013860074108702812, + "loss": 2.1411, + "step": 317725 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.00013859903603953889, + "loss": 2.035, + "step": 317730 + }, + { + "epoch": 0.75, + "grad_norm": 2.46875, + "learning_rate": 0.00013859733097886347, + "loss": 1.9872, + "step": 317735 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.00013859562590500253, + "loss": 1.9374, + "step": 317740 + }, + { + "epoch": 0.75, + "grad_norm": 2.53125, + "learning_rate": 0.0001385939208179566, + "loss": 1.8371, + "step": 317745 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013859221571772633, + "loss": 2.2557, + "step": 317750 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013859051060431224, + "loss": 2.0367, + "step": 317755 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013858880547771492, + "loss": 2.0, + "step": 317760 + }, + { + "epoch": 0.75, + "grad_norm": 2.609375, + "learning_rate": 0.00013858710033793498, + "loss": 2.1003, + "step": 317765 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013858539518497298, + "loss": 2.1315, + "step": 317770 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.0001385836900188295, + "loss": 2.096, + "step": 317775 + }, + { + "epoch": 0.75, + "grad_norm": 1.8984375, + "learning_rate": 0.00013858198483950512, + "loss": 2.1372, + "step": 317780 + }, + { + "epoch": 0.75, + "grad_norm": 2.5625, + "learning_rate": 0.00013858027964700045, + "loss": 1.8471, + "step": 317785 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.0001385785744413161, + "loss": 2.1003, + "step": 317790 + }, + { + "epoch": 0.75, + "grad_norm": 1.875, + "learning_rate": 0.00013857686922245254, + "loss": 2.0179, + "step": 317795 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013857516399041046, + "loss": 2.1322, + "step": 317800 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.0001385734587451904, + "loss": 1.7456, + "step": 317805 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013857175348679294, + "loss": 2.1089, + "step": 317810 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013857004821521868, + "loss": 2.0292, + "step": 317815 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013856834293046822, + "loss": 1.9199, + "step": 317820 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013856663763254208, + "loss": 1.9573, + "step": 317825 + }, + { + "epoch": 0.75, + "grad_norm": 1.8046875, + "learning_rate": 0.00013856493232144084, + "loss": 1.999, + "step": 317830 + }, + { + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 0.00013856322699716515, + "loss": 2.193, + "step": 317835 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013856152165971558, + "loss": 2.1662, + "step": 317840 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013855981630909268, + "loss": 1.9857, + "step": 317845 + }, + { + "epoch": 0.75, + "grad_norm": 1.734375, + "learning_rate": 0.00013855811094529708, + "loss": 2.0123, + "step": 317850 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013855640556832927, + "loss": 1.9876, + "step": 317855 + }, + { + "epoch": 0.75, + "grad_norm": 2.546875, + "learning_rate": 0.0001385547001781899, + "loss": 2.2317, + "step": 317860 + }, + { + "epoch": 0.75, + "grad_norm": 2.671875, + "learning_rate": 0.0001385529947748796, + "loss": 2.1368, + "step": 317865 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013855128935839886, + "loss": 1.9063, + "step": 317870 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013854958392874834, + "loss": 2.0874, + "step": 317875 + }, + { + "epoch": 0.75, + "grad_norm": 2.046875, + "learning_rate": 0.00013854787848592852, + "loss": 1.9904, + "step": 317880 + }, + { + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 0.0001385461730299401, + "loss": 2.2234, + "step": 317885 + }, + { + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 0.00013854446756078357, + "loss": 1.9563, + "step": 317890 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.00013854276207845954, + "loss": 1.8418, + "step": 317895 + }, + { + "epoch": 0.75, + "grad_norm": 2.96875, + "learning_rate": 0.00013854105658296864, + "loss": 2.1225, + "step": 317900 + }, + { + "epoch": 0.75, + "grad_norm": 1.9296875, + "learning_rate": 0.0001385393510743114, + "loss": 2.1857, + "step": 317905 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.00013853764555248844, + "loss": 2.0665, + "step": 317910 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.0001385359400175003, + "loss": 2.0824, + "step": 317915 + }, + { + "epoch": 0.75, + "grad_norm": 2.828125, + "learning_rate": 0.00013853423446934755, + "loss": 2.0119, + "step": 317920 + }, + { + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 0.0001385325289080309, + "loss": 1.966, + "step": 317925 + }, + { + "epoch": 0.75, + "grad_norm": 2.515625, + "learning_rate": 0.00013853082333355077, + "loss": 2.0578, + "step": 317930 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013852911774590783, + "loss": 2.0004, + "step": 317935 + }, + { + "epoch": 0.75, + "grad_norm": 2.59375, + "learning_rate": 0.00013852741214510263, + "loss": 2.1673, + "step": 317940 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013852570653113578, + "loss": 2.1706, + "step": 317945 + }, + { + "epoch": 0.75, + "grad_norm": 1.8359375, + "learning_rate": 0.00013852400090400783, + "loss": 2.1153, + "step": 317950 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.0001385222952637194, + "loss": 2.1854, + "step": 317955 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013852058961027108, + "loss": 2.0686, + "step": 317960 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.0001385188839436634, + "loss": 2.1313, + "step": 317965 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013851717826389697, + "loss": 1.8341, + "step": 317970 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013851547257097237, + "loss": 2.0078, + "step": 317975 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.0001385137668648902, + "loss": 1.9002, + "step": 317980 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013851206114565102, + "loss": 2.0386, + "step": 317985 + }, + { + "epoch": 0.75, + "grad_norm": 1.9453125, + "learning_rate": 0.0001385103554132554, + "loss": 2.1533, + "step": 317990 + }, + { + "epoch": 0.75, + "grad_norm": 1.96875, + "learning_rate": 0.000138508649667704, + "loss": 2.3078, + "step": 317995 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013850694390899732, + "loss": 1.9827, + "step": 318000 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.00013850523813713596, + "loss": 2.2097, + "step": 318005 + }, + { + "epoch": 0.75, + "grad_norm": 1.9609375, + "learning_rate": 0.00013850353235212053, + "loss": 2.0922, + "step": 318010 + }, + { + "epoch": 0.75, + "grad_norm": 2.453125, + "learning_rate": 0.00013850182655395155, + "loss": 2.0487, + "step": 318015 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013850012074262969, + "loss": 2.0614, + "step": 318020 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.0001384984149181555, + "loss": 2.0094, + "step": 318025 + }, + { + "epoch": 0.75, + "grad_norm": 1.9765625, + "learning_rate": 0.0001384967090805295, + "loss": 2.2159, + "step": 318030 + }, + { + "epoch": 0.75, + "grad_norm": 2.046875, + "learning_rate": 0.00013849500322975238, + "loss": 2.1334, + "step": 318035 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013849329736582465, + "loss": 1.9564, + "step": 318040 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.0001384915914887469, + "loss": 2.1807, + "step": 318045 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013848988559851975, + "loss": 2.0922, + "step": 318050 + }, + { + "epoch": 0.75, + "grad_norm": 2.0, + "learning_rate": 0.00013848817969514373, + "loss": 1.9175, + "step": 318055 + }, + { + "epoch": 0.75, + "grad_norm": 2.515625, + "learning_rate": 0.00013848647377861947, + "loss": 2.1604, + "step": 318060 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013848476784894754, + "loss": 1.9563, + "step": 318065 + }, + { + "epoch": 0.75, + "grad_norm": 1.9375, + "learning_rate": 0.00013848306190612848, + "loss": 2.0717, + "step": 318070 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.0001384813559501629, + "loss": 1.9844, + "step": 318075 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013847964998105146, + "loss": 2.0258, + "step": 318080 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.0001384779439987946, + "loss": 1.9278, + "step": 318085 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013847623800339302, + "loss": 2.0004, + "step": 318090 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013847453199484724, + "loss": 2.1593, + "step": 318095 + }, + { + "epoch": 0.75, + "grad_norm": 2.90625, + "learning_rate": 0.00013847282597315787, + "loss": 2.0258, + "step": 318100 + }, + { + "epoch": 0.75, + "grad_norm": 2.71875, + "learning_rate": 0.00013847111993832548, + "loss": 1.9866, + "step": 318105 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013846941389035065, + "loss": 2.1177, + "step": 318110 + }, + { + "epoch": 0.75, + "grad_norm": 1.8671875, + "learning_rate": 0.00013846770782923403, + "loss": 1.946, + "step": 318115 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.0001384660017549761, + "loss": 1.981, + "step": 318120 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013846429566757745, + "loss": 2.1219, + "step": 318125 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013846258956703872, + "loss": 2.0925, + "step": 318130 + }, + { + "epoch": 0.75, + "grad_norm": 1.8203125, + "learning_rate": 0.0001384608834533605, + "loss": 2.0827, + "step": 318135 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.00013845917732654335, + "loss": 2.0407, + "step": 318140 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.0001384574711865878, + "loss": 2.0689, + "step": 318145 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.0001384557650334945, + "loss": 2.1043, + "step": 318150 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013845405886726402, + "loss": 2.0402, + "step": 318155 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013845235268789696, + "loss": 2.1804, + "step": 318160 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013845064649539384, + "loss": 2.091, + "step": 318165 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.0001384489402897553, + "loss": 1.9544, + "step": 318170 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013844723407098187, + "loss": 2.0141, + "step": 318175 + }, + { + "epoch": 0.75, + "grad_norm": 2.796875, + "learning_rate": 0.00013844552783907424, + "loss": 2.1105, + "step": 318180 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013844382159403285, + "loss": 1.9513, + "step": 318185 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.0001384421153358584, + "loss": 2.1272, + "step": 318190 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.0001384404090645514, + "loss": 2.0499, + "step": 318195 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013843870278011248, + "loss": 2.1205, + "step": 318200 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013843699648254218, + "loss": 2.1011, + "step": 318205 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.0001384352901718411, + "loss": 2.1703, + "step": 318210 + }, + { + "epoch": 0.75, + "grad_norm": 1.9921875, + "learning_rate": 0.00013843358384800985, + "loss": 2.003, + "step": 318215 + }, + { + "epoch": 0.75, + "grad_norm": 1.921875, + "learning_rate": 0.000138431877511049, + "loss": 2.1327, + "step": 318220 + }, + { + "epoch": 0.75, + "grad_norm": 1.8984375, + "learning_rate": 0.0001384301711609591, + "loss": 1.8119, + "step": 318225 + }, + { + "epoch": 0.75, + "grad_norm": 2.046875, + "learning_rate": 0.00013842846479774075, + "loss": 1.9827, + "step": 318230 + }, + { + "epoch": 0.75, + "grad_norm": 2.046875, + "learning_rate": 0.00013842675842139457, + "loss": 2.0111, + "step": 318235 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013842505203192108, + "loss": 2.1757, + "step": 318240 + }, + { + "epoch": 0.75, + "grad_norm": 2.0, + "learning_rate": 0.0001384233456293209, + "loss": 2.278, + "step": 318245 + }, + { + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 0.00013842163921359463, + "loss": 2.2035, + "step": 318250 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.00013841993278474282, + "loss": 2.1811, + "step": 318255 + }, + { + "epoch": 0.75, + "grad_norm": 3.15625, + "learning_rate": 0.00013841822634276608, + "loss": 1.9101, + "step": 318260 + }, + { + "epoch": 0.75, + "grad_norm": 1.8828125, + "learning_rate": 0.00013841651988766494, + "loss": 2.1333, + "step": 318265 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013841481341944005, + "loss": 2.24, + "step": 318270 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013841310693809197, + "loss": 1.9648, + "step": 318275 + }, + { + "epoch": 0.75, + "grad_norm": 2.515625, + "learning_rate": 0.00013841140044362126, + "loss": 1.9709, + "step": 318280 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.0001384096939360285, + "loss": 1.9658, + "step": 318285 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.0001384079874153143, + "loss": 2.0171, + "step": 318290 + }, + { + "epoch": 0.75, + "grad_norm": 1.828125, + "learning_rate": 0.00013840628088147928, + "loss": 1.8423, + "step": 318295 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013840457433452393, + "loss": 2.0421, + "step": 318300 + }, + { + "epoch": 0.75, + "grad_norm": 2.453125, + "learning_rate": 0.00013840286777444888, + "loss": 1.9461, + "step": 318305 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013840116120125473, + "loss": 1.8834, + "step": 318310 + }, + { + "epoch": 0.75, + "grad_norm": 2.453125, + "learning_rate": 0.00013839945461494203, + "loss": 2.0081, + "step": 318315 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.0001383977480155114, + "loss": 1.9661, + "step": 318320 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013839604140296344, + "loss": 1.8342, + "step": 318325 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013839433477729865, + "loss": 2.1288, + "step": 318330 + }, + { + "epoch": 0.75, + "grad_norm": 2.6875, + "learning_rate": 0.00013839262813851767, + "loss": 2.0489, + "step": 318335 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013839092148662104, + "loss": 2.0996, + "step": 318340 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013838921482160941, + "loss": 1.9158, + "step": 318345 + }, + { + "epoch": 0.75, + "grad_norm": 1.9453125, + "learning_rate": 0.0001383875081434833, + "loss": 2.0101, + "step": 318350 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.00013838580145224337, + "loss": 2.1227, + "step": 318355 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.00013838409474789008, + "loss": 2.1693, + "step": 318360 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.00013838238803042416, + "loss": 2.204, + "step": 318365 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.0001383806812998461, + "loss": 2.016, + "step": 318370 + }, + { + "epoch": 0.75, + "grad_norm": 2.75, + "learning_rate": 0.00013837897455615646, + "loss": 1.9447, + "step": 318375 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013837726779935593, + "loss": 2.1334, + "step": 318380 + }, + { + "epoch": 0.75, + "grad_norm": 1.953125, + "learning_rate": 0.00013837556102944498, + "loss": 2.0582, + "step": 318385 + }, + { + "epoch": 0.75, + "grad_norm": 1.9765625, + "learning_rate": 0.00013837385424642426, + "loss": 2.1205, + "step": 318390 + }, + { + "epoch": 0.75, + "grad_norm": 1.953125, + "learning_rate": 0.00013837214745029435, + "loss": 1.7432, + "step": 318395 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.0001383704406410558, + "loss": 2.1725, + "step": 318400 + }, + { + "epoch": 0.75, + "grad_norm": 2.0, + "learning_rate": 0.0001383687338187092, + "loss": 1.9749, + "step": 318405 + }, + { + "epoch": 0.75, + "grad_norm": 2.453125, + "learning_rate": 0.0001383670269832552, + "loss": 2.1955, + "step": 318410 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013836532013469427, + "loss": 1.9412, + "step": 318415 + }, + { + "epoch": 0.75, + "grad_norm": 2.59375, + "learning_rate": 0.00013836361327302705, + "loss": 2.0904, + "step": 318420 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013836190639825415, + "loss": 2.2316, + "step": 318425 + }, + { + "epoch": 0.75, + "grad_norm": 3.078125, + "learning_rate": 0.00013836019951037613, + "loss": 2.1608, + "step": 318430 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013835849260939356, + "loss": 2.0408, + "step": 318435 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013835678569530706, + "loss": 1.9904, + "step": 318440 + }, + { + "epoch": 0.75, + "grad_norm": 2.46875, + "learning_rate": 0.00013835507876811714, + "loss": 1.8573, + "step": 318445 + }, + { + "epoch": 0.75, + "grad_norm": 2.640625, + "learning_rate": 0.00013835337182782444, + "loss": 1.9193, + "step": 318450 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013835166487442956, + "loss": 2.1091, + "step": 318455 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013834995790793303, + "loss": 2.0739, + "step": 318460 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013834825092833548, + "loss": 1.9448, + "step": 318465 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.00013834654393563744, + "loss": 2.2009, + "step": 318470 + }, + { + "epoch": 0.75, + "grad_norm": 1.9453125, + "learning_rate": 0.00013834483692983955, + "loss": 2.0936, + "step": 318475 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013834312991094238, + "loss": 2.113, + "step": 318480 + }, + { + "epoch": 0.75, + "grad_norm": 2.5625, + "learning_rate": 0.00013834142287894648, + "loss": 2.0506, + "step": 318485 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.00013833971583385244, + "loss": 2.1353, + "step": 318490 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.0001383380087756609, + "loss": 2.2414, + "step": 318495 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013833630170437237, + "loss": 2.0656, + "step": 318500 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013833459461998747, + "loss": 2.2656, + "step": 318505 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013833288752250678, + "loss": 2.1197, + "step": 318510 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.0001383311804119309, + "loss": 2.0764, + "step": 318515 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013832947328826036, + "loss": 1.9672, + "step": 318520 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013832776615149578, + "loss": 1.8989, + "step": 318525 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013832605900163775, + "loss": 1.9773, + "step": 318530 + }, + { + "epoch": 0.75, + "grad_norm": 2.515625, + "learning_rate": 0.00013832435183868685, + "loss": 1.9141, + "step": 318535 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.00013832264466264363, + "loss": 2.1038, + "step": 318540 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013832093747350875, + "loss": 2.1511, + "step": 318545 + }, + { + "epoch": 0.75, + "grad_norm": 1.9453125, + "learning_rate": 0.0001383192302712827, + "loss": 2.0235, + "step": 318550 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013831752305596613, + "loss": 2.2055, + "step": 318555 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013831581582755958, + "loss": 2.0329, + "step": 318560 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013831410858606365, + "loss": 2.1101, + "step": 318565 + }, + { + "epoch": 0.75, + "grad_norm": 1.7578125, + "learning_rate": 0.00013831240133147895, + "loss": 1.9406, + "step": 318570 + }, + { + "epoch": 0.75, + "grad_norm": 1.703125, + "learning_rate": 0.00013831069406380602, + "loss": 2.0938, + "step": 318575 + }, + { + "epoch": 0.75, + "grad_norm": 2.75, + "learning_rate": 0.00013830898678304546, + "loss": 2.1094, + "step": 318580 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013830727948919785, + "loss": 2.0772, + "step": 318585 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.0001383055721822638, + "loss": 2.1595, + "step": 318590 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013830386486224388, + "loss": 2.0825, + "step": 318595 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013830215752913864, + "loss": 1.9523, + "step": 318600 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013830045018294867, + "loss": 2.2017, + "step": 318605 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.0001382987428236746, + "loss": 2.1229, + "step": 318610 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.000138297035451317, + "loss": 2.0518, + "step": 318615 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013829532806587642, + "loss": 2.0516, + "step": 318620 + }, + { + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 0.00013829362066735348, + "loss": 2.0879, + "step": 318625 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.0001382919132557487, + "loss": 2.0643, + "step": 318630 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013829020583106273, + "loss": 2.0672, + "step": 318635 + }, + { + "epoch": 0.75, + "grad_norm": 1.890625, + "learning_rate": 0.00013828849839329614, + "loss": 2.07, + "step": 318640 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.0001382867909424495, + "loss": 2.0047, + "step": 318645 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.0001382850834785234, + "loss": 2.0154, + "step": 318650 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013828337600151842, + "loss": 1.9683, + "step": 318655 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.00013828166851143515, + "loss": 1.8491, + "step": 318660 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013827996100827417, + "loss": 1.9504, + "step": 318665 + }, + { + "epoch": 0.75, + "grad_norm": 2.046875, + "learning_rate": 0.00013827825349203602, + "loss": 1.8281, + "step": 318670 + }, + { + "epoch": 0.75, + "grad_norm": 2.53125, + "learning_rate": 0.00013827654596272138, + "loss": 2.0009, + "step": 318675 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013827483842033075, + "loss": 1.8745, + "step": 318680 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.00013827313086486474, + "loss": 2.0289, + "step": 318685 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013827142329632394, + "loss": 2.2325, + "step": 318690 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013826971571470893, + "loss": 2.0055, + "step": 318695 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013826800812002027, + "loss": 2.0224, + "step": 318700 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.0001382663005122586, + "loss": 2.1821, + "step": 318705 + }, + { + "epoch": 0.75, + "grad_norm": 1.8046875, + "learning_rate": 0.00013826459289142446, + "loss": 2.1042, + "step": 318710 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.00013826288525751844, + "loss": 2.0231, + "step": 318715 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013826117761054108, + "loss": 2.0111, + "step": 318720 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.00013825946995049305, + "loss": 2.1284, + "step": 318725 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.0001382577622773749, + "loss": 2.0302, + "step": 318730 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.0001382560545911872, + "loss": 2.0889, + "step": 318735 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.0001382543468919305, + "loss": 2.1489, + "step": 318740 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013825263917960546, + "loss": 2.0214, + "step": 318745 + }, + { + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 0.00013825093145421262, + "loss": 2.0021, + "step": 318750 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013824922371575259, + "loss": 2.1251, + "step": 318755 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013824751596422586, + "loss": 2.1591, + "step": 318760 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013824580819963314, + "loss": 1.976, + "step": 318765 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013824410042197495, + "loss": 2.1862, + "step": 318770 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013824239263125187, + "loss": 2.0558, + "step": 318775 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013824068482746452, + "loss": 1.841, + "step": 318780 + }, + { + "epoch": 0.75, + "grad_norm": 1.8984375, + "learning_rate": 0.00013823897701061344, + "loss": 1.8765, + "step": 318785 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013823726918069923, + "loss": 2.0366, + "step": 318790 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013823556133772246, + "loss": 2.0623, + "step": 318795 + }, + { + "epoch": 0.75, + "grad_norm": 2.46875, + "learning_rate": 0.00013823385348168377, + "loss": 2.166, + "step": 318800 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013823214561258366, + "loss": 2.0745, + "step": 318805 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.0001382304377304228, + "loss": 2.0927, + "step": 318810 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.0001382287298352017, + "loss": 2.1488, + "step": 318815 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013822702192692096, + "loss": 1.9412, + "step": 318820 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.0001382253140055812, + "loss": 2.0689, + "step": 318825 + }, + { + "epoch": 0.75, + "grad_norm": 2.890625, + "learning_rate": 0.00013822360607118297, + "loss": 2.1018, + "step": 318830 + }, + { + "epoch": 0.75, + "grad_norm": 2.5625, + "learning_rate": 0.00013822189812372687, + "loss": 1.9371, + "step": 318835 + }, + { + "epoch": 0.75, + "grad_norm": 2.53125, + "learning_rate": 0.00013822019016321345, + "loss": 2.0033, + "step": 318840 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013821848218964337, + "loss": 2.013, + "step": 318845 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.00013821677420301713, + "loss": 2.1278, + "step": 318850 + }, + { + "epoch": 0.75, + "grad_norm": 2.453125, + "learning_rate": 0.00013821506620333537, + "loss": 2.0949, + "step": 318855 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013821335819059861, + "loss": 2.0574, + "step": 318860 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.0001382116501648075, + "loss": 2.0092, + "step": 318865 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013820994212596262, + "loss": 2.1322, + "step": 318870 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013820823407406448, + "loss": 2.0091, + "step": 318875 + }, + { + "epoch": 0.75, + "grad_norm": 2.46875, + "learning_rate": 0.00013820652600911372, + "loss": 2.1914, + "step": 318880 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.00013820481793111098, + "loss": 2.129, + "step": 318885 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013820310984005672, + "loss": 1.9623, + "step": 318890 + }, + { + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 0.0001382014017359516, + "loss": 2.0108, + "step": 318895 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.0001381996936187962, + "loss": 2.1383, + "step": 318900 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013819798548859107, + "loss": 2.0637, + "step": 318905 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013819627734533684, + "loss": 2.0763, + "step": 318910 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013819456918903405, + "loss": 2.0456, + "step": 318915 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013819286101968332, + "loss": 2.268, + "step": 318920 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.0001381911528372852, + "loss": 2.0806, + "step": 318925 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.0001381894446418403, + "loss": 2.0762, + "step": 318930 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013818773643334918, + "loss": 2.0549, + "step": 318935 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013818602821181247, + "loss": 2.2012, + "step": 318940 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.00013818431997723068, + "loss": 2.1529, + "step": 318945 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013818261172960443, + "loss": 1.8279, + "step": 318950 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013818090346893433, + "loss": 2.0629, + "step": 318955 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013817919519522093, + "loss": 2.0512, + "step": 318960 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013817748690846484, + "loss": 2.0377, + "step": 318965 + }, + { + "epoch": 0.75, + "grad_norm": 2.46875, + "learning_rate": 0.00013817577860866662, + "loss": 2.0061, + "step": 318970 + }, + { + "epoch": 0.75, + "grad_norm": 3.4375, + "learning_rate": 0.00013817407029582688, + "loss": 2.0434, + "step": 318975 + }, + { + "epoch": 0.75, + "grad_norm": 1.953125, + "learning_rate": 0.00013817236196994617, + "loss": 2.1159, + "step": 318980 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013817065363102505, + "loss": 1.8943, + "step": 318985 + }, + { + "epoch": 0.75, + "grad_norm": 2.59375, + "learning_rate": 0.00013816894527906418, + "loss": 2.1331, + "step": 318990 + }, + { + "epoch": 0.75, + "grad_norm": 2.640625, + "learning_rate": 0.00013816723691406413, + "loss": 1.9816, + "step": 318995 + }, + { + "epoch": 0.75, + "grad_norm": 1.9765625, + "learning_rate": 0.00013816552853602545, + "loss": 2.1519, + "step": 319000 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013816382014494868, + "loss": 2.1005, + "step": 319005 + }, + { + "epoch": 0.75, + "grad_norm": 2.8125, + "learning_rate": 0.00013816211174083452, + "loss": 1.9958, + "step": 319010 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013816040332368344, + "loss": 2.0203, + "step": 319015 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.0001381586948934961, + "loss": 2.0929, + "step": 319020 + }, + { + "epoch": 0.75, + "grad_norm": 1.875, + "learning_rate": 0.0001381569864502731, + "loss": 1.9591, + "step": 319025 + }, + { + "epoch": 0.75, + "grad_norm": 1.796875, + "learning_rate": 0.0001381552779940149, + "loss": 1.9367, + "step": 319030 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.0001381535695247222, + "loss": 2.1413, + "step": 319035 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013815186104239555, + "loss": 1.9818, + "step": 319040 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013815015254703555, + "loss": 2.0507, + "step": 319045 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013814844403864276, + "loss": 2.0749, + "step": 319050 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013814673551721776, + "loss": 1.9719, + "step": 319055 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.0001381450269827611, + "loss": 2.0045, + "step": 319060 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.00013814331843527348, + "loss": 2.2596, + "step": 319065 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.0001381416098747554, + "loss": 2.2431, + "step": 319070 + }, + { + "epoch": 0.75, + "grad_norm": 1.9375, + "learning_rate": 0.00013813990130120743, + "loss": 2.0204, + "step": 319075 + }, + { + "epoch": 0.75, + "grad_norm": 2.59375, + "learning_rate": 0.00013813819271463017, + "loss": 2.0807, + "step": 319080 + }, + { + "epoch": 0.75, + "grad_norm": 2.484375, + "learning_rate": 0.0001381364841150242, + "loss": 2.0338, + "step": 319085 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013813477550239019, + "loss": 1.9019, + "step": 319090 + }, + { + "epoch": 0.75, + "grad_norm": 1.9609375, + "learning_rate": 0.00013813306687672857, + "loss": 2.0419, + "step": 319095 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013813135823804003, + "loss": 2.11, + "step": 319100 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.00013812964958632512, + "loss": 2.1409, + "step": 319105 + }, + { + "epoch": 0.75, + "grad_norm": 2.703125, + "learning_rate": 0.00013812794092158445, + "loss": 1.9814, + "step": 319110 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013812623224381858, + "loss": 2.2027, + "step": 319115 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.0001381245235530281, + "loss": 1.9276, + "step": 319120 + }, + { + "epoch": 0.75, + "grad_norm": 2.609375, + "learning_rate": 0.00013812281484921357, + "loss": 1.9355, + "step": 319125 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013812110613237562, + "loss": 2.098, + "step": 319130 + }, + { + "epoch": 0.75, + "grad_norm": 1.9765625, + "learning_rate": 0.00013811939740251478, + "loss": 2.1929, + "step": 319135 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013811768865963167, + "loss": 2.0082, + "step": 319140 + }, + { + "epoch": 0.75, + "grad_norm": 2.484375, + "learning_rate": 0.0001381159799037269, + "loss": 2.1707, + "step": 319145 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013811427113480097, + "loss": 1.9655, + "step": 319150 + }, + { + "epoch": 0.75, + "grad_norm": 2.5625, + "learning_rate": 0.00013811256235285454, + "loss": 2.0392, + "step": 319155 + }, + { + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 0.0001381108535578882, + "loss": 2.0477, + "step": 319160 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013810914474990247, + "loss": 1.9349, + "step": 319165 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013810743592889793, + "loss": 2.167, + "step": 319170 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013810572709487521, + "loss": 1.9428, + "step": 319175 + }, + { + "epoch": 0.75, + "grad_norm": 2.46875, + "learning_rate": 0.00013810401824783494, + "loss": 2.1224, + "step": 319180 + }, + { + "epoch": 0.75, + "grad_norm": 2.453125, + "learning_rate": 0.0001381023093877776, + "loss": 2.0226, + "step": 319185 + }, + { + "epoch": 0.75, + "grad_norm": 2.640625, + "learning_rate": 0.00013810060051470385, + "loss": 2.1163, + "step": 319190 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013809889162861417, + "loss": 2.0462, + "step": 319195 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.0001380971827295093, + "loss": 2.1127, + "step": 319200 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013809547381738973, + "loss": 1.9319, + "step": 319205 + }, + { + "epoch": 0.75, + "grad_norm": 2.734375, + "learning_rate": 0.00013809376489225603, + "loss": 1.9913, + "step": 319210 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.0001380920559541088, + "loss": 2.1658, + "step": 319215 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013809034700294865, + "loss": 2.1713, + "step": 319220 + }, + { + "epoch": 0.75, + "grad_norm": 2.46875, + "learning_rate": 0.00013808863803877616, + "loss": 2.0313, + "step": 319225 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.0001380869290615919, + "loss": 2.1838, + "step": 319230 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.0001380852200713964, + "loss": 2.1282, + "step": 319235 + }, + { + "epoch": 0.75, + "grad_norm": 1.9765625, + "learning_rate": 0.00013808351106819034, + "loss": 2.0424, + "step": 319240 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.0001380818020519743, + "loss": 2.1204, + "step": 319245 + }, + { + "epoch": 0.75, + "grad_norm": 1.9140625, + "learning_rate": 0.00013808009302274877, + "loss": 1.9087, + "step": 319250 + }, + { + "epoch": 0.75, + "grad_norm": 2.640625, + "learning_rate": 0.0001380783839805144, + "loss": 2.1735, + "step": 319255 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.00013807667492527176, + "loss": 2.0389, + "step": 319260 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013807496585702147, + "loss": 2.0386, + "step": 319265 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013807325677576407, + "loss": 1.9908, + "step": 319270 + }, + { + "epoch": 0.75, + "grad_norm": 1.8828125, + "learning_rate": 0.0001380715476815001, + "loss": 1.889, + "step": 319275 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.00013806983857423026, + "loss": 2.2165, + "step": 319280 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.00013806812945395508, + "loss": 2.1393, + "step": 319285 + }, + { + "epoch": 0.75, + "grad_norm": 2.546875, + "learning_rate": 0.0001380664203206751, + "loss": 2.1557, + "step": 319290 + }, + { + "epoch": 0.75, + "grad_norm": 1.9375, + "learning_rate": 0.00013806471117439094, + "loss": 2.0561, + "step": 319295 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.0001380630020151032, + "loss": 2.1088, + "step": 319300 + }, + { + "epoch": 0.75, + "grad_norm": 2.53125, + "learning_rate": 0.00013806129284281245, + "loss": 2.1845, + "step": 319305 + }, + { + "epoch": 0.75, + "grad_norm": 1.6328125, + "learning_rate": 0.00013805958365751926, + "loss": 2.0074, + "step": 319310 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.0001380578744592242, + "loss": 2.0887, + "step": 319315 + }, + { + "epoch": 0.75, + "grad_norm": 2.609375, + "learning_rate": 0.00013805616524792798, + "loss": 2.025, + "step": 319320 + }, + { + "epoch": 0.75, + "grad_norm": 1.921875, + "learning_rate": 0.000138054456023631, + "loss": 2.0617, + "step": 319325 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013805274678633392, + "loss": 2.0576, + "step": 319330 + }, + { + "epoch": 0.75, + "grad_norm": 2.609375, + "learning_rate": 0.00013805103753603736, + "loss": 2.1035, + "step": 319335 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.0001380493282727419, + "loss": 2.0273, + "step": 319340 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013804761899644804, + "loss": 2.0854, + "step": 319345 + }, + { + "epoch": 0.75, + "grad_norm": 3.734375, + "learning_rate": 0.00013804590970715647, + "loss": 2.0809, + "step": 319350 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.0001380442004048677, + "loss": 2.177, + "step": 319355 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013804249108958236, + "loss": 1.9278, + "step": 319360 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013804078176130098, + "loss": 2.1551, + "step": 319365 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013803907242002423, + "loss": 2.0816, + "step": 319370 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.0001380373630657526, + "loss": 2.0021, + "step": 319375 + }, + { + "epoch": 0.75, + "grad_norm": 2.453125, + "learning_rate": 0.00013803565369848673, + "loss": 2.0554, + "step": 319380 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013803394431822724, + "loss": 2.1472, + "step": 319385 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.0001380322349249746, + "loss": 1.8729, + "step": 319390 + }, + { + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 0.00013803052551872948, + "loss": 2.0511, + "step": 319395 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013802881609949243, + "loss": 1.9807, + "step": 319400 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.0001380271066672641, + "loss": 1.8149, + "step": 319405 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013802539722204495, + "loss": 2.187, + "step": 319410 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013802368776383569, + "loss": 1.9173, + "step": 319415 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.0001380219782926368, + "loss": 2.0777, + "step": 319420 + }, + { + "epoch": 0.75, + "grad_norm": 1.96875, + "learning_rate": 0.00013802026880844893, + "loss": 1.8601, + "step": 319425 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013801855931127265, + "loss": 1.9145, + "step": 319430 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013801684980110855, + "loss": 2.079, + "step": 319435 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.0001380151402779572, + "loss": 2.0776, + "step": 319440 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.00013801343074181917, + "loss": 2.0372, + "step": 319445 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013801172119269509, + "loss": 1.9351, + "step": 319450 + }, + { + "epoch": 0.75, + "grad_norm": 3.296875, + "learning_rate": 0.00013801001163058553, + "loss": 2.0277, + "step": 319455 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013800830205549104, + "loss": 2.0171, + "step": 319460 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.0001380065924674122, + "loss": 2.1002, + "step": 319465 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.00013800488286634964, + "loss": 2.0601, + "step": 319470 + }, + { + "epoch": 0.75, + "grad_norm": 2.609375, + "learning_rate": 0.00013800317325230393, + "loss": 1.9443, + "step": 319475 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013800146362527566, + "loss": 2.0785, + "step": 319480 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.00013799975398526537, + "loss": 1.9641, + "step": 319485 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013799804433227365, + "loss": 1.8688, + "step": 319490 + }, + { + "epoch": 0.75, + "grad_norm": 2.84375, + "learning_rate": 0.00013799633466630117, + "loss": 1.981, + "step": 319495 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013799462498734845, + "loss": 1.9519, + "step": 319500 + }, + { + "epoch": 0.75, + "grad_norm": 1.9921875, + "learning_rate": 0.00013799291529541604, + "loss": 1.9437, + "step": 319505 + }, + { + "epoch": 0.75, + "grad_norm": 1.8203125, + "learning_rate": 0.00013799120559050457, + "loss": 2.0512, + "step": 319510 + }, + { + "epoch": 0.75, + "grad_norm": 2.65625, + "learning_rate": 0.0001379894958726146, + "loss": 2.0459, + "step": 319515 + }, + { + "epoch": 0.75, + "grad_norm": 3.15625, + "learning_rate": 0.00013798778614174678, + "loss": 2.0332, + "step": 319520 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.0001379860763979016, + "loss": 2.0824, + "step": 319525 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013798436664107973, + "loss": 2.0607, + "step": 319530 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013798265687128166, + "loss": 2.2402, + "step": 319535 + }, + { + "epoch": 0.75, + "grad_norm": 1.875, + "learning_rate": 0.00013798094708850807, + "loss": 2.0914, + "step": 319540 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.00013797923729275946, + "loss": 2.0016, + "step": 319545 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013797752748403647, + "loss": 2.2192, + "step": 319550 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013797581766233967, + "loss": 2.0461, + "step": 319555 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.00013797410782766964, + "loss": 2.0374, + "step": 319560 + }, + { + "epoch": 0.75, + "grad_norm": 1.9765625, + "learning_rate": 0.00013797239798002696, + "loss": 2.1244, + "step": 319565 + }, + { + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 0.00013797068811941224, + "loss": 2.0155, + "step": 319570 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.000137968978245826, + "loss": 1.9737, + "step": 319575 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013796726835926893, + "loss": 1.8929, + "step": 319580 + }, + { + "epoch": 0.75, + "grad_norm": 2.484375, + "learning_rate": 0.00013796555845974152, + "loss": 2.1634, + "step": 319585 + }, + { + "epoch": 0.75, + "grad_norm": 1.9765625, + "learning_rate": 0.0001379638485472444, + "loss": 2.1519, + "step": 319590 + }, + { + "epoch": 0.75, + "grad_norm": 1.8984375, + "learning_rate": 0.0001379621386217781, + "loss": 2.1483, + "step": 319595 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013796042868334329, + "loss": 2.0438, + "step": 319600 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013795871873194048, + "loss": 2.0066, + "step": 319605 + }, + { + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 0.00013795700876757032, + "loss": 2.1803, + "step": 319610 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013795529879023335, + "loss": 2.0101, + "step": 319615 + }, + { + "epoch": 0.75, + "grad_norm": 1.8984375, + "learning_rate": 0.00013795358879993016, + "loss": 2.1003, + "step": 319620 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.0001379518787966613, + "loss": 1.8627, + "step": 319625 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.0001379501687804274, + "loss": 2.0869, + "step": 319630 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013794845875122905, + "loss": 2.0954, + "step": 319635 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.00013794674870906684, + "loss": 2.0979, + "step": 319640 + }, + { + "epoch": 0.75, + "grad_norm": 1.9921875, + "learning_rate": 0.0001379450386539413, + "loss": 2.1211, + "step": 319645 + }, + { + "epoch": 0.75, + "grad_norm": 2.609375, + "learning_rate": 0.00013794332858585304, + "loss": 1.9911, + "step": 319650 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.00013794161850480267, + "loss": 1.9096, + "step": 319655 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013793990841079077, + "loss": 2.1018, + "step": 319660 + }, + { + "epoch": 0.75, + "grad_norm": 2.453125, + "learning_rate": 0.00013793819830381786, + "loss": 1.9979, + "step": 319665 + }, + { + "epoch": 0.75, + "grad_norm": 1.828125, + "learning_rate": 0.0001379364881838846, + "loss": 2.0435, + "step": 319670 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013793477805099157, + "loss": 2.0494, + "step": 319675 + }, + { + "epoch": 0.75, + "grad_norm": 2.453125, + "learning_rate": 0.0001379330679051393, + "loss": 2.0241, + "step": 319680 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013793135774632843, + "loss": 2.1732, + "step": 319685 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.0001379296475745595, + "loss": 1.9122, + "step": 319690 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.0001379279373898331, + "loss": 1.9294, + "step": 319695 + }, + { + "epoch": 0.75, + "grad_norm": 2.609375, + "learning_rate": 0.00013792622719214988, + "loss": 1.9288, + "step": 319700 + }, + { + "epoch": 0.75, + "grad_norm": 2.609375, + "learning_rate": 0.00013792451698151035, + "loss": 2.0994, + "step": 319705 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.0001379228067579151, + "loss": 1.9875, + "step": 319710 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013792109652136474, + "loss": 1.9278, + "step": 319715 + }, + { + "epoch": 0.75, + "grad_norm": 2.59375, + "learning_rate": 0.00013791938627185984, + "loss": 1.9397, + "step": 319720 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.000137917676009401, + "loss": 2.0985, + "step": 319725 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.0001379159657339888, + "loss": 2.0606, + "step": 319730 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.0001379142554456238, + "loss": 2.2689, + "step": 319735 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013791254514430664, + "loss": 2.1666, + "step": 319740 + }, + { + "epoch": 0.75, + "grad_norm": 1.96875, + "learning_rate": 0.00013791083483003782, + "loss": 2.0355, + "step": 319745 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013790912450281802, + "loss": 2.0606, + "step": 319750 + }, + { + "epoch": 0.75, + "grad_norm": 2.65625, + "learning_rate": 0.00013790741416264772, + "loss": 1.9541, + "step": 319755 + }, + { + "epoch": 0.75, + "grad_norm": 1.9921875, + "learning_rate": 0.00013790570380952757, + "loss": 2.0102, + "step": 319760 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013790399344345818, + "loss": 2.0339, + "step": 319765 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.00013790228306444007, + "loss": 2.1188, + "step": 319770 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013790057267247389, + "loss": 2.0678, + "step": 319775 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013789886226756016, + "loss": 2.0352, + "step": 319780 + }, + { + "epoch": 0.75, + "grad_norm": 1.84375, + "learning_rate": 0.00013789715184969948, + "loss": 2.1972, + "step": 319785 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013789544141889243, + "loss": 2.0438, + "step": 319790 + }, + { + "epoch": 0.75, + "grad_norm": 1.8046875, + "learning_rate": 0.00013789373097513967, + "loss": 1.9849, + "step": 319795 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.0001378920205184417, + "loss": 2.1098, + "step": 319800 + }, + { + "epoch": 0.75, + "grad_norm": 1.8046875, + "learning_rate": 0.0001378903100487991, + "loss": 2.1397, + "step": 319805 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.0001378885995662125, + "loss": 2.1166, + "step": 319810 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.0001378868890706825, + "loss": 2.1671, + "step": 319815 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013788517856220959, + "loss": 2.1821, + "step": 319820 + }, + { + "epoch": 0.75, + "grad_norm": 2.46875, + "learning_rate": 0.00013788346804079446, + "loss": 2.0807, + "step": 319825 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013788175750643764, + "loss": 2.1227, + "step": 319830 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.00013788004695913973, + "loss": 2.0198, + "step": 319835 + }, + { + "epoch": 0.75, + "grad_norm": 2.703125, + "learning_rate": 0.0001378783363989013, + "loss": 2.0672, + "step": 319840 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013787662582572292, + "loss": 2.0569, + "step": 319845 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013787491523960523, + "loss": 2.0761, + "step": 319850 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013787320464054878, + "loss": 2.1572, + "step": 319855 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013787149402855414, + "loss": 2.0158, + "step": 319860 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.0001378697834036219, + "loss": 1.8954, + "step": 319865 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013786807276575268, + "loss": 2.0535, + "step": 319870 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013786636211494705, + "loss": 2.0417, + "step": 319875 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013786465145120558, + "loss": 1.9827, + "step": 319880 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013786294077452885, + "loss": 2.1667, + "step": 319885 + }, + { + "epoch": 0.75, + "grad_norm": 2.90625, + "learning_rate": 0.00013786123008491745, + "loss": 2.0316, + "step": 319890 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013785951938237196, + "loss": 2.0474, + "step": 319895 + }, + { + "epoch": 0.75, + "grad_norm": 3.015625, + "learning_rate": 0.00013785780866689296, + "loss": 2.0636, + "step": 319900 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013785609793848109, + "loss": 1.9017, + "step": 319905 + }, + { + "epoch": 0.75, + "grad_norm": 2.5625, + "learning_rate": 0.00013785438719713687, + "loss": 2.0562, + "step": 319910 + }, + { + "epoch": 0.75, + "grad_norm": 2.609375, + "learning_rate": 0.0001378526764428609, + "loss": 1.9887, + "step": 319915 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013785096567565377, + "loss": 2.035, + "step": 319920 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013784925489551605, + "loss": 2.1453, + "step": 319925 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013784754410244837, + "loss": 2.0263, + "step": 319930 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013784583329645126, + "loss": 2.0834, + "step": 319935 + }, + { + "epoch": 0.75, + "grad_norm": 2.671875, + "learning_rate": 0.00013784412247752533, + "loss": 2.0806, + "step": 319940 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013784241164567116, + "loss": 1.9421, + "step": 319945 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013784070080088935, + "loss": 1.9642, + "step": 319950 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013783898994318045, + "loss": 2.0441, + "step": 319955 + }, + { + "epoch": 0.75, + "grad_norm": 1.90625, + "learning_rate": 0.00013783727907254508, + "loss": 2.0018, + "step": 319960 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.0001378355681889838, + "loss": 2.1243, + "step": 319965 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013783385729249718, + "loss": 2.0664, + "step": 319970 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.0001378321463830859, + "loss": 1.904, + "step": 319975 + }, + { + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 0.0001378304354607504, + "loss": 2.0623, + "step": 319980 + }, + { + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 0.00013782872452549138, + "loss": 1.9234, + "step": 319985 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013782701357730937, + "loss": 2.0157, + "step": 319990 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013782530261620494, + "loss": 2.1985, + "step": 319995 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013782359164217874, + "loss": 1.8921, + "step": 320000 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.0001378218806552313, + "loss": 2.1171, + "step": 320005 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013782016965536322, + "loss": 2.1368, + "step": 320010 + }, + { + "epoch": 0.75, + "grad_norm": 2.546875, + "learning_rate": 0.00013781845864257506, + "loss": 2.1106, + "step": 320015 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.00013781674761686747, + "loss": 1.965, + "step": 320020 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.000137815036578241, + "loss": 2.0495, + "step": 320025 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013781332552669618, + "loss": 2.0396, + "step": 320030 + }, + { + "epoch": 0.75, + "grad_norm": 2.6875, + "learning_rate": 0.00013781161446223364, + "loss": 2.0229, + "step": 320035 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.000137809903384854, + "loss": 2.0236, + "step": 320040 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.0001378081922945578, + "loss": 2.1404, + "step": 320045 + }, + { + "epoch": 0.75, + "grad_norm": 1.9765625, + "learning_rate": 0.00013780648119134564, + "loss": 2.0946, + "step": 320050 + }, + { + "epoch": 0.75, + "grad_norm": 2.546875, + "learning_rate": 0.0001378047700752181, + "loss": 2.0714, + "step": 320055 + }, + { + "epoch": 0.75, + "grad_norm": 2.625, + "learning_rate": 0.00013780305894617575, + "loss": 2.1347, + "step": 320060 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013780134780421924, + "loss": 2.0365, + "step": 320065 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.00013779963664934907, + "loss": 2.1065, + "step": 320070 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013779792548156584, + "loss": 2.0113, + "step": 320075 + }, + { + "epoch": 0.75, + "grad_norm": 1.984375, + "learning_rate": 0.00013779621430087016, + "loss": 2.0138, + "step": 320080 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.0001377945031072626, + "loss": 2.1362, + "step": 320085 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.0001377927919007438, + "loss": 1.9901, + "step": 320090 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013779108068131426, + "loss": 2.0131, + "step": 320095 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.0001377893694489746, + "loss": 2.0974, + "step": 320100 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.0001377876582037254, + "loss": 2.3394, + "step": 320105 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.00013778594694556728, + "loss": 2.0122, + "step": 320110 + }, + { + "epoch": 0.75, + "grad_norm": 2.0625, + "learning_rate": 0.00013778423567450078, + "loss": 2.0192, + "step": 320115 + }, + { + "epoch": 0.75, + "grad_norm": 2.0, + "learning_rate": 0.0001377825243905265, + "loss": 2.0635, + "step": 320120 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.000137780813093645, + "loss": 2.1731, + "step": 320125 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013777910178385693, + "loss": 2.0581, + "step": 320130 + }, + { + "epoch": 0.75, + "grad_norm": 2.046875, + "learning_rate": 0.00013777739046116284, + "loss": 2.1059, + "step": 320135 + }, + { + "epoch": 0.75, + "grad_norm": 2.0, + "learning_rate": 0.00013777567912556327, + "loss": 2.0172, + "step": 320140 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013777396777705885, + "loss": 2.0394, + "step": 320145 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.00013777225641565014, + "loss": 2.0756, + "step": 320150 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.0001377705450413378, + "loss": 2.106, + "step": 320155 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.0001377688336541223, + "loss": 2.1548, + "step": 320160 + }, + { + "epoch": 0.75, + "grad_norm": 2.53125, + "learning_rate": 0.00013776712225400432, + "loss": 2.0737, + "step": 320165 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.00013776541084098437, + "loss": 2.1827, + "step": 320170 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.0001377636994150631, + "loss": 2.0465, + "step": 320175 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013776198797624107, + "loss": 2.0526, + "step": 320180 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013776027652451884, + "loss": 2.0331, + "step": 320185 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013775856505989702, + "loss": 2.0409, + "step": 320190 + }, + { + "epoch": 0.75, + "grad_norm": 1.7734375, + "learning_rate": 0.0001377568535823762, + "loss": 1.9298, + "step": 320195 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013775514209195695, + "loss": 2.152, + "step": 320200 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013775343058863986, + "loss": 2.0754, + "step": 320205 + }, + { + "epoch": 0.75, + "grad_norm": 2.609375, + "learning_rate": 0.0001377517190724255, + "loss": 2.1889, + "step": 320210 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013775000754331446, + "loss": 2.0453, + "step": 320215 + }, + { + "epoch": 0.75, + "grad_norm": 2.9375, + "learning_rate": 0.00013774829600130735, + "loss": 2.1094, + "step": 320220 + }, + { + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 0.00013774658444640477, + "loss": 2.1821, + "step": 320225 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013774487287860723, + "loss": 2.2008, + "step": 320230 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013774316129791536, + "loss": 2.1677, + "step": 320235 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013774144970432972, + "loss": 2.2784, + "step": 320240 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013773973809785096, + "loss": 1.9794, + "step": 320245 + }, + { + "epoch": 0.75, + "grad_norm": 2.59375, + "learning_rate": 0.0001377380264784796, + "loss": 2.2254, + "step": 320250 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013773631484621625, + "loss": 2.0075, + "step": 320255 + }, + { + "epoch": 0.75, + "grad_norm": 1.9921875, + "learning_rate": 0.00013773460320106148, + "loss": 2.181, + "step": 320260 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.00013773289154301587, + "loss": 2.1019, + "step": 320265 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013773117987208008, + "loss": 2.0096, + "step": 320270 + }, + { + "epoch": 0.75, + "grad_norm": 2.375, + "learning_rate": 0.0001377294681882546, + "loss": 2.0581, + "step": 320275 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.00013772775649154007, + "loss": 2.1924, + "step": 320280 + }, + { + "epoch": 0.75, + "grad_norm": 3.015625, + "learning_rate": 0.00013772604478193702, + "loss": 2.2016, + "step": 320285 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013772433305944608, + "loss": 1.8552, + "step": 320290 + }, + { + "epoch": 0.75, + "grad_norm": 1.671875, + "learning_rate": 0.0001377226213240678, + "loss": 1.9848, + "step": 320295 + }, + { + "epoch": 0.75, + "grad_norm": 1.8125, + "learning_rate": 0.00013772090957580284, + "loss": 1.9077, + "step": 320300 + }, + { + "epoch": 0.75, + "grad_norm": 3.5625, + "learning_rate": 0.0001377191978146517, + "loss": 2.0817, + "step": 320305 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.00013771748604061502, + "loss": 1.9923, + "step": 320310 + }, + { + "epoch": 0.75, + "grad_norm": 1.9140625, + "learning_rate": 0.00013771577425369334, + "loss": 2.0529, + "step": 320315 + }, + { + "epoch": 0.75, + "grad_norm": 1.8046875, + "learning_rate": 0.00013771406245388727, + "loss": 2.0376, + "step": 320320 + }, + { + "epoch": 0.75, + "grad_norm": 1.9609375, + "learning_rate": 0.00013771235064119738, + "loss": 2.045, + "step": 320325 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.0001377106388156243, + "loss": 2.225, + "step": 320330 + }, + { + "epoch": 0.75, + "grad_norm": 1.875, + "learning_rate": 0.00013770892697716859, + "loss": 1.9828, + "step": 320335 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.0001377072151258308, + "loss": 1.9178, + "step": 320340 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.0001377055032616115, + "loss": 2.0459, + "step": 320345 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.0001377037913845114, + "loss": 2.1144, + "step": 320350 + }, + { + "epoch": 0.75, + "grad_norm": 2.546875, + "learning_rate": 0.00013770207949453096, + "loss": 2.0108, + "step": 320355 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013770036759167082, + "loss": 1.9748, + "step": 320360 + }, + { + "epoch": 0.75, + "grad_norm": 2.453125, + "learning_rate": 0.00013769865567593153, + "loss": 2.0862, + "step": 320365 + }, + { + "epoch": 0.75, + "grad_norm": 1.84375, + "learning_rate": 0.0001376969437473137, + "loss": 1.9887, + "step": 320370 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013769523180581788, + "loss": 2.1046, + "step": 320375 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013769351985144475, + "loss": 2.1295, + "step": 320380 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.0001376918078841948, + "loss": 2.2, + "step": 320385 + }, + { + "epoch": 0.75, + "grad_norm": 2.5, + "learning_rate": 0.00013769009590406864, + "loss": 2.1754, + "step": 320390 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013768838391106688, + "loss": 2.2428, + "step": 320395 + }, + { + "epoch": 0.75, + "grad_norm": 1.9375, + "learning_rate": 0.00013768667190519005, + "loss": 2.0384, + "step": 320400 + }, + { + "epoch": 0.75, + "grad_norm": 2.484375, + "learning_rate": 0.00013768495988643879, + "loss": 2.0979, + "step": 320405 + }, + { + "epoch": 0.75, + "grad_norm": 2.5625, + "learning_rate": 0.00013768324785481368, + "loss": 2.2275, + "step": 320410 + }, + { + "epoch": 0.75, + "grad_norm": 2.625, + "learning_rate": 0.00013768153581031526, + "loss": 2.1649, + "step": 320415 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013767982375294415, + "loss": 2.0503, + "step": 320420 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013767811168270095, + "loss": 2.0143, + "step": 320425 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.0001376763995995862, + "loss": 2.0349, + "step": 320430 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013767468750360053, + "loss": 1.8837, + "step": 320435 + }, + { + "epoch": 0.75, + "grad_norm": 3.1875, + "learning_rate": 0.0001376729753947445, + "loss": 2.0333, + "step": 320440 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013767126327301866, + "loss": 1.921, + "step": 320445 + }, + { + "epoch": 0.75, + "grad_norm": 2.875, + "learning_rate": 0.00013766955113842365, + "loss": 2.0262, + "step": 320450 + }, + { + "epoch": 0.75, + "grad_norm": 1.984375, + "learning_rate": 0.00013766783899096007, + "loss": 2.1043, + "step": 320455 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013766612683062846, + "loss": 1.9687, + "step": 320460 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013766441465742942, + "loss": 2.0633, + "step": 320465 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.0001376627024713635, + "loss": 2.0749, + "step": 320470 + }, + { + "epoch": 0.75, + "grad_norm": 1.9921875, + "learning_rate": 0.0001376609902724314, + "loss": 2.0455, + "step": 320475 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.0001376592780606335, + "loss": 1.9083, + "step": 320480 + }, + { + "epoch": 0.75, + "grad_norm": 2.53125, + "learning_rate": 0.0001376575658359706, + "loss": 1.9159, + "step": 320485 + }, + { + "epoch": 0.75, + "grad_norm": 1.7421875, + "learning_rate": 0.00013765585359844316, + "loss": 2.0172, + "step": 320490 + }, + { + "epoch": 0.75, + "grad_norm": 2.546875, + "learning_rate": 0.0001376541413480518, + "loss": 2.1123, + "step": 320495 + }, + { + "epoch": 0.75, + "grad_norm": 2.390625, + "learning_rate": 0.00013765242908479712, + "loss": 1.707, + "step": 320500 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013765071680867964, + "loss": 1.999, + "step": 320505 + }, + { + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 0.0001376490045197, + "loss": 1.8681, + "step": 320510 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013764729221785884, + "loss": 2.2371, + "step": 320515 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.00013764557990315665, + "loss": 2.1088, + "step": 320520 + }, + { + "epoch": 0.75, + "grad_norm": 1.9921875, + "learning_rate": 0.00013764386757559403, + "loss": 2.0234, + "step": 320525 + }, + { + "epoch": 0.75, + "grad_norm": 1.796875, + "learning_rate": 0.0001376421552351716, + "loss": 1.9503, + "step": 320530 + }, + { + "epoch": 0.75, + "grad_norm": 1.9296875, + "learning_rate": 0.0001376404428818899, + "loss": 2.1106, + "step": 320535 + }, + { + "epoch": 0.75, + "grad_norm": 2.40625, + "learning_rate": 0.00013763873051574958, + "loss": 1.8967, + "step": 320540 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013763701813675119, + "loss": 1.902, + "step": 320545 + }, + { + "epoch": 0.75, + "grad_norm": 2.046875, + "learning_rate": 0.00013763530574489527, + "loss": 2.1302, + "step": 320550 + }, + { + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 0.00013763359334018248, + "loss": 2.1649, + "step": 320555 + }, + { + "epoch": 0.75, + "grad_norm": 1.8125, + "learning_rate": 0.00013763188092261332, + "loss": 1.9687, + "step": 320560 + }, + { + "epoch": 0.75, + "grad_norm": 2.46875, + "learning_rate": 0.0001376301684921885, + "loss": 2.126, + "step": 320565 + }, + { + "epoch": 0.75, + "grad_norm": 2.359375, + "learning_rate": 0.0001376284560489085, + "loss": 2.1818, + "step": 320570 + }, + { + "epoch": 0.75, + "grad_norm": 2.21875, + "learning_rate": 0.00013762674359277393, + "loss": 2.0715, + "step": 320575 + }, + { + "epoch": 0.75, + "grad_norm": 2.640625, + "learning_rate": 0.0001376250311237854, + "loss": 2.0801, + "step": 320580 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.00013762331864194347, + "loss": 2.046, + "step": 320585 + }, + { + "epoch": 0.75, + "grad_norm": 2.15625, + "learning_rate": 0.0001376216061472487, + "loss": 2.0389, + "step": 320590 + }, + { + "epoch": 0.75, + "grad_norm": 2.25, + "learning_rate": 0.00013761989363970175, + "loss": 2.254, + "step": 320595 + }, + { + "epoch": 0.75, + "grad_norm": 2.765625, + "learning_rate": 0.00013761818111930315, + "loss": 2.0666, + "step": 320600 + }, + { + "epoch": 0.75, + "grad_norm": 1.859375, + "learning_rate": 0.00013761646858605347, + "loss": 2.0959, + "step": 320605 + }, + { + "epoch": 0.75, + "grad_norm": 1.984375, + "learning_rate": 0.00013761475603995334, + "loss": 1.8921, + "step": 320610 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013761304348100336, + "loss": 2.1687, + "step": 320615 + }, + { + "epoch": 0.75, + "grad_norm": 2.03125, + "learning_rate": 0.00013761133090920406, + "loss": 2.0635, + "step": 320620 + }, + { + "epoch": 0.75, + "grad_norm": 2.234375, + "learning_rate": 0.00013760961832455604, + "loss": 1.953, + "step": 320625 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013760790572705988, + "loss": 2.2266, + "step": 320630 + }, + { + "epoch": 0.75, + "grad_norm": 2.109375, + "learning_rate": 0.00013760619311671622, + "loss": 2.0528, + "step": 320635 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013760448049352555, + "loss": 2.1056, + "step": 320640 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013760276785748855, + "loss": 1.7276, + "step": 320645 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013760105520860573, + "loss": 1.8968, + "step": 320650 + }, + { + "epoch": 0.75, + "grad_norm": 2.34375, + "learning_rate": 0.00013759934254687774, + "loss": 2.1753, + "step": 320655 + }, + { + "epoch": 0.75, + "grad_norm": 2.3125, + "learning_rate": 0.0001375976298723051, + "loss": 1.7129, + "step": 320660 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013759591718488844, + "loss": 1.955, + "step": 320665 + }, + { + "epoch": 0.75, + "grad_norm": 1.8984375, + "learning_rate": 0.0001375942044846283, + "loss": 2.1198, + "step": 320670 + }, + { + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 0.00013759249177152535, + "loss": 1.9572, + "step": 320675 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013759077904558012, + "loss": 1.9927, + "step": 320680 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.0001375890663067932, + "loss": 2.103, + "step": 320685 + }, + { + "epoch": 0.75, + "grad_norm": 2.140625, + "learning_rate": 0.00013758735355516515, + "loss": 1.833, + "step": 320690 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.0001375856407906966, + "loss": 2.1103, + "step": 320695 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013758392801338807, + "loss": 1.9645, + "step": 320700 + }, + { + "epoch": 0.75, + "grad_norm": 2.328125, + "learning_rate": 0.00013758221522324024, + "loss": 2.1812, + "step": 320705 + }, + { + "epoch": 0.75, + "grad_norm": 1.9375, + "learning_rate": 0.00013758050242025362, + "loss": 1.9885, + "step": 320710 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.00013757878960442881, + "loss": 2.029, + "step": 320715 + }, + { + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 0.00013757707677576638, + "loss": 1.8888, + "step": 320720 + }, + { + "epoch": 0.75, + "grad_norm": 2.1875, + "learning_rate": 0.000137575363934267, + "loss": 2.0852, + "step": 320725 + }, + { + "epoch": 0.75, + "grad_norm": 1.8203125, + "learning_rate": 0.0001375736510799312, + "loss": 2.0533, + "step": 320730 + }, + { + "epoch": 0.75, + "grad_norm": 2.421875, + "learning_rate": 0.0001375719382127595, + "loss": 2.1489, + "step": 320735 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013757022533275256, + "loss": 1.9415, + "step": 320740 + }, + { + "epoch": 0.75, + "grad_norm": 2.4375, + "learning_rate": 0.00013756851243991097, + "loss": 2.0847, + "step": 320745 + }, + { + "epoch": 0.75, + "grad_norm": 2.09375, + "learning_rate": 0.0001375667995342353, + "loss": 2.1251, + "step": 320750 + }, + { + "epoch": 0.75, + "grad_norm": 1.859375, + "learning_rate": 0.00013756508661572612, + "loss": 2.0687, + "step": 320755 + }, + { + "epoch": 0.75, + "grad_norm": 1.9921875, + "learning_rate": 0.00013756337368438403, + "loss": 1.8498, + "step": 320760 + }, + { + "epoch": 0.75, + "grad_norm": 2.203125, + "learning_rate": 0.00013756166074020958, + "loss": 2.0532, + "step": 320765 + }, + { + "epoch": 0.75, + "grad_norm": 2.015625, + "learning_rate": 0.00013755994778320342, + "loss": 2.2224, + "step": 320770 + }, + { + "epoch": 0.75, + "grad_norm": 2.640625, + "learning_rate": 0.00013755823481336607, + "loss": 2.0445, + "step": 320775 + }, + { + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 0.00013755652183069817, + "loss": 2.1248, + "step": 320780 + }, + { + "epoch": 0.75, + "grad_norm": 2.296875, + "learning_rate": 0.00013755480883520028, + "loss": 1.9722, + "step": 320785 + }, + { + "epoch": 0.75, + "grad_norm": 2.28125, + "learning_rate": 0.00013755309582687297, + "loss": 2.0651, + "step": 320790 + }, + { + "epoch": 0.75, + "grad_norm": 1.859375, + "learning_rate": 0.00013755138280571687, + "loss": 2.2227, + "step": 320795 + }, + { + "epoch": 0.75, + "grad_norm": 1.8671875, + "learning_rate": 0.00013754966977173248, + "loss": 1.9315, + "step": 320800 + }, + { + "epoch": 0.75, + "grad_norm": 2.453125, + "learning_rate": 0.0001375479567249205, + "loss": 1.9596, + "step": 320805 + }, + { + "epoch": 0.75, + "grad_norm": 2.125, + "learning_rate": 0.00013754624366528145, + "loss": 2.151, + "step": 320810 + }, + { + "epoch": 0.75, + "grad_norm": 1.9609375, + "learning_rate": 0.0001375445305928159, + "loss": 2.1701, + "step": 320815 + }, + { + "epoch": 0.75, + "grad_norm": 2.53125, + "learning_rate": 0.0001375428175075245, + "loss": 1.7625, + "step": 320820 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013754110440940775, + "loss": 2.1134, + "step": 320825 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013753939129846626, + "loss": 1.8056, + "step": 320830 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013753767817470066, + "loss": 2.0717, + "step": 320835 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013753596503811153, + "loss": 2.0964, + "step": 320840 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.0001375342518886994, + "loss": 2.0399, + "step": 320845 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.0001375325387264649, + "loss": 2.0083, + "step": 320850 + }, + { + "epoch": 0.76, + "grad_norm": 2.578125, + "learning_rate": 0.00013753082555140859, + "loss": 2.2864, + "step": 320855 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.0001375291123635311, + "loss": 2.0066, + "step": 320860 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013752739916283297, + "loss": 2.0218, + "step": 320865 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.00013752568594931481, + "loss": 1.9826, + "step": 320870 + }, + { + "epoch": 0.76, + "grad_norm": 1.703125, + "learning_rate": 0.00013752397272297716, + "loss": 2.0984, + "step": 320875 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013752225948382066, + "loss": 2.0847, + "step": 320880 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013752054623184589, + "loss": 2.1493, + "step": 320885 + }, + { + "epoch": 0.76, + "grad_norm": 1.921875, + "learning_rate": 0.00013751883296705343, + "loss": 1.9687, + "step": 320890 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013751711968944383, + "loss": 1.9832, + "step": 320895 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.0001375154063990177, + "loss": 2.1227, + "step": 320900 + }, + { + "epoch": 0.76, + "grad_norm": 2.5, + "learning_rate": 0.00013751369309577565, + "loss": 1.8871, + "step": 320905 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.0001375119797797182, + "loss": 2.1564, + "step": 320910 + }, + { + "epoch": 0.76, + "grad_norm": 1.8046875, + "learning_rate": 0.00013751026645084604, + "loss": 1.9514, + "step": 320915 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013750855310915965, + "loss": 2.0971, + "step": 320920 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013750683975465966, + "loss": 1.8963, + "step": 320925 + }, + { + "epoch": 0.76, + "grad_norm": 2.484375, + "learning_rate": 0.00013750512638734667, + "loss": 1.9475, + "step": 320930 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013750341300722122, + "loss": 2.1222, + "step": 320935 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013750169961428393, + "loss": 2.0389, + "step": 320940 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.0001374999862085354, + "loss": 2.0357, + "step": 320945 + }, + { + "epoch": 0.76, + "grad_norm": 1.7890625, + "learning_rate": 0.0001374982727899762, + "loss": 2.1171, + "step": 320950 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.0001374965593586069, + "loss": 2.1302, + "step": 320955 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.0001374948459144281, + "loss": 2.1587, + "step": 320960 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013749313245744037, + "loss": 2.187, + "step": 320965 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013749141898764428, + "loss": 2.1212, + "step": 320970 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.0001374897055050405, + "loss": 1.9866, + "step": 320975 + }, + { + "epoch": 0.76, + "grad_norm": 2.65625, + "learning_rate": 0.00013748799200962952, + "loss": 2.0184, + "step": 320980 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.000137486278501412, + "loss": 2.14, + "step": 320985 + }, + { + "epoch": 0.76, + "grad_norm": 1.8515625, + "learning_rate": 0.0001374845649803884, + "loss": 2.1121, + "step": 320990 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.00013748285144655947, + "loss": 1.9806, + "step": 320995 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.0001374811378999257, + "loss": 2.0842, + "step": 321000 + }, + { + "epoch": 0.76, + "grad_norm": 3.015625, + "learning_rate": 0.0001374794243404877, + "loss": 2.094, + "step": 321005 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013747771076824602, + "loss": 2.1545, + "step": 321010 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.00013747599718320127, + "loss": 2.1117, + "step": 321015 + }, + { + "epoch": 0.76, + "grad_norm": 2.703125, + "learning_rate": 0.0001374742835853541, + "loss": 2.2203, + "step": 321020 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.000137472569974705, + "loss": 2.0489, + "step": 321025 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013747085635125455, + "loss": 2.1539, + "step": 321030 + }, + { + "epoch": 0.76, + "grad_norm": 2.640625, + "learning_rate": 0.00013746914271500345, + "loss": 2.1429, + "step": 321035 + }, + { + "epoch": 0.76, + "grad_norm": 1.8359375, + "learning_rate": 0.00013746742906595215, + "loss": 1.943, + "step": 321040 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013746571540410133, + "loss": 2.129, + "step": 321045 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013746400172945153, + "loss": 2.1701, + "step": 321050 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.00013746228804200337, + "loss": 2.217, + "step": 321055 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.0001374605743417574, + "loss": 2.0114, + "step": 321060 + }, + { + "epoch": 0.76, + "grad_norm": 2.5, + "learning_rate": 0.0001374588606287142, + "loss": 1.8611, + "step": 321065 + }, + { + "epoch": 0.76, + "grad_norm": 2.875, + "learning_rate": 0.0001374571469028744, + "loss": 2.0997, + "step": 321070 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013745543316423853, + "loss": 2.0759, + "step": 321075 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.0001374537194128072, + "loss": 1.7896, + "step": 321080 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013745200564858103, + "loss": 2.0873, + "step": 321085 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.0001374502918715606, + "loss": 2.1834, + "step": 321090 + }, + { + "epoch": 0.76, + "grad_norm": 3.015625, + "learning_rate": 0.0001374485780817464, + "loss": 1.9863, + "step": 321095 + }, + { + "epoch": 0.76, + "grad_norm": 1.96875, + "learning_rate": 0.00013744686427913914, + "loss": 2.1169, + "step": 321100 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.00013744515046373932, + "loss": 1.9799, + "step": 321105 + }, + { + "epoch": 0.76, + "grad_norm": 1.890625, + "learning_rate": 0.00013744343663554763, + "loss": 2.0223, + "step": 321110 + }, + { + "epoch": 0.76, + "grad_norm": 1.96875, + "learning_rate": 0.00013744172279456452, + "loss": 2.0019, + "step": 321115 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013744000894079064, + "loss": 2.1575, + "step": 321120 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013743829507422654, + "loss": 2.0454, + "step": 321125 + }, + { + "epoch": 0.76, + "grad_norm": 2.953125, + "learning_rate": 0.0001374365811948729, + "loss": 1.8668, + "step": 321130 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.00013743486730273023, + "loss": 2.1503, + "step": 321135 + }, + { + "epoch": 0.76, + "grad_norm": 2.78125, + "learning_rate": 0.0001374331533977991, + "loss": 1.9964, + "step": 321140 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013743143948008018, + "loss": 1.9969, + "step": 321145 + }, + { + "epoch": 0.76, + "grad_norm": 2.546875, + "learning_rate": 0.00013742972554957393, + "loss": 1.9979, + "step": 321150 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013742801160628108, + "loss": 1.9164, + "step": 321155 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013742629765020213, + "loss": 1.9416, + "step": 321160 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013742458368133764, + "loss": 2.0493, + "step": 321165 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.00013742286969968824, + "loss": 2.0528, + "step": 321170 + }, + { + "epoch": 0.76, + "grad_norm": 2.59375, + "learning_rate": 0.00013742115570525453, + "loss": 2.0519, + "step": 321175 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013741944169803707, + "loss": 2.0411, + "step": 321180 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013741772767803646, + "loss": 2.1985, + "step": 321185 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.00013741601364525326, + "loss": 2.215, + "step": 321190 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.00013741429959968806, + "loss": 1.8511, + "step": 321195 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.00013741258554134147, + "loss": 2.1449, + "step": 321200 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013741087147021405, + "loss": 2.0026, + "step": 321205 + }, + { + "epoch": 0.76, + "grad_norm": 1.9296875, + "learning_rate": 0.00013740915738630642, + "loss": 2.2351, + "step": 321210 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013740744328961913, + "loss": 2.2662, + "step": 321215 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013740572918015279, + "loss": 2.1153, + "step": 321220 + }, + { + "epoch": 0.76, + "grad_norm": 2.5625, + "learning_rate": 0.00013740401505790798, + "loss": 1.8895, + "step": 321225 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013740230092288525, + "loss": 2.0517, + "step": 321230 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013740058677508523, + "loss": 2.0884, + "step": 321235 + }, + { + "epoch": 0.76, + "grad_norm": 1.9140625, + "learning_rate": 0.00013739887261450848, + "loss": 1.9734, + "step": 321240 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013739715844115563, + "loss": 1.8819, + "step": 321245 + }, + { + "epoch": 0.76, + "grad_norm": 1.96875, + "learning_rate": 0.00013739544425502722, + "loss": 1.9931, + "step": 321250 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013739373005612382, + "loss": 2.1233, + "step": 321255 + }, + { + "epoch": 0.76, + "grad_norm": 1.96875, + "learning_rate": 0.00013739201584444608, + "loss": 1.9353, + "step": 321260 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.0001373903016199945, + "loss": 2.1086, + "step": 321265 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.0001373885873827698, + "loss": 2.0375, + "step": 321270 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.0001373868731327724, + "loss": 2.0369, + "step": 321275 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.000137385158870003, + "loss": 2.0161, + "step": 321280 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013738344459446214, + "loss": 2.14, + "step": 321285 + }, + { + "epoch": 0.76, + "grad_norm": 2.484375, + "learning_rate": 0.00013738173030615044, + "loss": 2.0576, + "step": 321290 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013738001600506843, + "loss": 2.052, + "step": 321295 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013737830169121678, + "loss": 1.9822, + "step": 321300 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.000137376587364596, + "loss": 2.0131, + "step": 321305 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013737487302520663, + "loss": 2.0807, + "step": 321310 + }, + { + "epoch": 0.76, + "grad_norm": 1.8046875, + "learning_rate": 0.00013737315867304943, + "loss": 1.9992, + "step": 321315 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.00013737144430812483, + "loss": 2.0219, + "step": 321320 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013736972993043346, + "loss": 1.944, + "step": 321325 + }, + { + "epoch": 0.76, + "grad_norm": 2.578125, + "learning_rate": 0.00013736801553997593, + "loss": 2.1611, + "step": 321330 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.0001373663011367528, + "loss": 1.9345, + "step": 321335 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013736458672076467, + "loss": 1.9864, + "step": 321340 + }, + { + "epoch": 0.76, + "grad_norm": 1.8046875, + "learning_rate": 0.00013736287229201212, + "loss": 1.9027, + "step": 321345 + }, + { + "epoch": 0.76, + "grad_norm": 1.921875, + "learning_rate": 0.00013736115785049574, + "loss": 1.9175, + "step": 321350 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.0001373594433962161, + "loss": 2.0666, + "step": 321355 + }, + { + "epoch": 0.76, + "grad_norm": 2.78125, + "learning_rate": 0.00013735772892917384, + "loss": 2.1382, + "step": 321360 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.0001373560144493694, + "loss": 1.9762, + "step": 321365 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.00013735429995680355, + "loss": 1.9619, + "step": 321370 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013735258545147678, + "loss": 1.9308, + "step": 321375 + }, + { + "epoch": 0.76, + "grad_norm": 3.6875, + "learning_rate": 0.00013735087093338968, + "loss": 1.9549, + "step": 321380 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013734915640254287, + "loss": 2.3312, + "step": 321385 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013734744185893687, + "loss": 2.1771, + "step": 321390 + }, + { + "epoch": 0.76, + "grad_norm": 1.96875, + "learning_rate": 0.00013734572730257231, + "loss": 2.1597, + "step": 321395 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013734401273344978, + "loss": 2.0732, + "step": 321400 + }, + { + "epoch": 0.76, + "grad_norm": 1.90625, + "learning_rate": 0.00013734229815156988, + "loss": 2.0918, + "step": 321405 + }, + { + "epoch": 0.76, + "grad_norm": 2.609375, + "learning_rate": 0.00013734058355693317, + "loss": 2.0763, + "step": 321410 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.00013733886894954026, + "loss": 2.1891, + "step": 321415 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.00013733715432939165, + "loss": 1.8824, + "step": 321420 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.000137335439696488, + "loss": 2.1087, + "step": 321425 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.00013733372505082996, + "loss": 1.9722, + "step": 321430 + }, + { + "epoch": 0.76, + "grad_norm": 1.921875, + "learning_rate": 0.00013733201039241798, + "loss": 1.9684, + "step": 321435 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013733029572125273, + "loss": 1.9529, + "step": 321440 + }, + { + "epoch": 0.76, + "grad_norm": 2.515625, + "learning_rate": 0.00013732858103733474, + "loss": 2.0482, + "step": 321445 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013732686634066469, + "loss": 2.084, + "step": 321450 + }, + { + "epoch": 0.76, + "grad_norm": 2.53125, + "learning_rate": 0.00013732515163124306, + "loss": 1.9993, + "step": 321455 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013732343690907049, + "loss": 1.9983, + "step": 321460 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013732172217414756, + "loss": 1.9136, + "step": 321465 + }, + { + "epoch": 0.76, + "grad_norm": 2.546875, + "learning_rate": 0.00013732000742647487, + "loss": 2.1449, + "step": 321470 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013731829266605295, + "loss": 2.0422, + "step": 321475 + }, + { + "epoch": 0.76, + "grad_norm": 1.984375, + "learning_rate": 0.00013731657789288246, + "loss": 2.0829, + "step": 321480 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013731486310696392, + "loss": 2.0106, + "step": 321485 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013731314830829798, + "loss": 2.0104, + "step": 321490 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013731143349688516, + "loss": 2.0724, + "step": 321495 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013730971867272608, + "loss": 2.0835, + "step": 321500 + }, + { + "epoch": 0.76, + "grad_norm": 2.734375, + "learning_rate": 0.00013730800383582133, + "loss": 2.1756, + "step": 321505 + }, + { + "epoch": 0.76, + "grad_norm": 1.9609375, + "learning_rate": 0.00013730628898617152, + "loss": 2.0742, + "step": 321510 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013730457412377718, + "loss": 1.7845, + "step": 321515 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013730285924863894, + "loss": 2.1035, + "step": 321520 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.0001373011443607573, + "loss": 1.9823, + "step": 321525 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.000137299429460133, + "loss": 1.9318, + "step": 321530 + }, + { + "epoch": 0.76, + "grad_norm": 2.640625, + "learning_rate": 0.00013729771454676647, + "loss": 1.9554, + "step": 321535 + }, + { + "epoch": 0.76, + "grad_norm": 1.8671875, + "learning_rate": 0.00013729599962065842, + "loss": 2.1192, + "step": 321540 + }, + { + "epoch": 0.76, + "grad_norm": 1.8046875, + "learning_rate": 0.00013729428468180937, + "loss": 2.0561, + "step": 321545 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013729256973021986, + "loss": 2.071, + "step": 321550 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013729085476589056, + "loss": 1.9493, + "step": 321555 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013728913978882206, + "loss": 2.1172, + "step": 321560 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.0001372874247990149, + "loss": 2.0217, + "step": 321565 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013728570979646966, + "loss": 2.1586, + "step": 321570 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013728399478118696, + "loss": 2.0631, + "step": 321575 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013728227975316737, + "loss": 2.2205, + "step": 321580 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013728056471241148, + "loss": 2.2129, + "step": 321585 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013727884965891986, + "loss": 2.296, + "step": 321590 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013727713459269313, + "loss": 1.8303, + "step": 321595 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.00013727541951373185, + "loss": 2.0647, + "step": 321600 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013727370442203658, + "loss": 2.1039, + "step": 321605 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013727198931760796, + "loss": 2.0669, + "step": 321610 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.00013727027420044655, + "loss": 2.1526, + "step": 321615 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013726855907055297, + "loss": 2.0036, + "step": 321620 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.0001372668439279277, + "loss": 1.9883, + "step": 321625 + }, + { + "epoch": 0.76, + "grad_norm": 2.703125, + "learning_rate": 0.00013726512877257149, + "loss": 2.047, + "step": 321630 + }, + { + "epoch": 0.76, + "grad_norm": 2.78125, + "learning_rate": 0.00013726341360448477, + "loss": 2.0613, + "step": 321635 + }, + { + "epoch": 0.76, + "grad_norm": 1.9296875, + "learning_rate": 0.0001372616984236682, + "loss": 1.9424, + "step": 321640 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.00013725998323012236, + "loss": 1.9956, + "step": 321645 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.00013725826802384787, + "loss": 1.8482, + "step": 321650 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013725655280484526, + "loss": 1.9584, + "step": 321655 + }, + { + "epoch": 0.76, + "grad_norm": 2.59375, + "learning_rate": 0.00013725483757311512, + "loss": 1.8678, + "step": 321660 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013725312232865805, + "loss": 2.1016, + "step": 321665 + }, + { + "epoch": 0.76, + "grad_norm": 1.8984375, + "learning_rate": 0.00013725140707147466, + "loss": 2.0471, + "step": 321670 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.0001372496918015655, + "loss": 1.9761, + "step": 321675 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.0001372479765189312, + "loss": 1.9402, + "step": 321680 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013724626122357228, + "loss": 2.0978, + "step": 321685 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013724454591548937, + "loss": 2.1433, + "step": 321690 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013724283059468302, + "loss": 2.0017, + "step": 321695 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013724111526115392, + "loss": 2.1788, + "step": 321700 + }, + { + "epoch": 0.76, + "grad_norm": 2.625, + "learning_rate": 0.0001372393999149025, + "loss": 2.1986, + "step": 321705 + }, + { + "epoch": 0.76, + "grad_norm": 1.9609375, + "learning_rate": 0.00013723768455592948, + "loss": 2.178, + "step": 321710 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.00013723596918423535, + "loss": 2.1646, + "step": 321715 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.00013723425379982075, + "loss": 2.2036, + "step": 321720 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013723253840268627, + "loss": 2.0652, + "step": 321725 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013723082299283248, + "loss": 2.1624, + "step": 321730 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013722910757025996, + "loss": 2.0738, + "step": 321735 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013722739213496926, + "loss": 2.1004, + "step": 321740 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013722567668696106, + "loss": 1.97, + "step": 321745 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013722396122623588, + "loss": 2.1035, + "step": 321750 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.00013722224575279433, + "loss": 2.1586, + "step": 321755 + }, + { + "epoch": 0.76, + "grad_norm": 1.9765625, + "learning_rate": 0.00013722053026663696, + "loss": 2.0369, + "step": 321760 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013721881476776442, + "loss": 1.862, + "step": 321765 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013721709925617722, + "loss": 2.1141, + "step": 321770 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013721538373187602, + "loss": 1.9768, + "step": 321775 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.00013721366819486133, + "loss": 1.7181, + "step": 321780 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.00013721195264513377, + "loss": 1.8729, + "step": 321785 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.000137210237082694, + "loss": 2.0263, + "step": 321790 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013720852150754246, + "loss": 1.9096, + "step": 321795 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013720680591967988, + "loss": 2.0953, + "step": 321800 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013720509031910674, + "loss": 2.0151, + "step": 321805 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013720337470582368, + "loss": 2.0131, + "step": 321810 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013720165907983126, + "loss": 1.9724, + "step": 321815 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013719994344113008, + "loss": 2.0586, + "step": 321820 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.0001371982277897207, + "loss": 2.0492, + "step": 321825 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013719651212560377, + "loss": 1.8386, + "step": 321830 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013719479644877983, + "loss": 2.0894, + "step": 321835 + }, + { + "epoch": 0.76, + "grad_norm": 1.7578125, + "learning_rate": 0.0001371930807592495, + "loss": 2.0346, + "step": 321840 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.00013719136505701329, + "loss": 1.9482, + "step": 321845 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013718964934207186, + "loss": 2.0387, + "step": 321850 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013718793361442577, + "loss": 1.9958, + "step": 321855 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013718621787407563, + "loss": 2.0765, + "step": 321860 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.00013718450212102197, + "loss": 2.1682, + "step": 321865 + }, + { + "epoch": 0.76, + "grad_norm": 2.609375, + "learning_rate": 0.00013718278635526542, + "loss": 1.948, + "step": 321870 + }, + { + "epoch": 0.76, + "grad_norm": 2.71875, + "learning_rate": 0.00013718107057680652, + "loss": 1.9342, + "step": 321875 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.00013717935478564595, + "loss": 2.0077, + "step": 321880 + }, + { + "epoch": 0.76, + "grad_norm": 2.484375, + "learning_rate": 0.00013717763898178422, + "loss": 2.1662, + "step": 321885 + }, + { + "epoch": 0.76, + "grad_norm": 1.9921875, + "learning_rate": 0.00013717592316522193, + "loss": 2.1495, + "step": 321890 + }, + { + "epoch": 0.76, + "grad_norm": 2.515625, + "learning_rate": 0.00013717420733595968, + "loss": 2.1362, + "step": 321895 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.000137172491493998, + "loss": 2.0997, + "step": 321900 + }, + { + "epoch": 0.76, + "grad_norm": 2.703125, + "learning_rate": 0.00013717077563933758, + "loss": 2.201, + "step": 321905 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.00013716905977197893, + "loss": 2.0113, + "step": 321910 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013716734389192264, + "loss": 2.0903, + "step": 321915 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013716562799916931, + "loss": 2.1844, + "step": 321920 + }, + { + "epoch": 0.76, + "grad_norm": 1.9375, + "learning_rate": 0.00013716391209371955, + "loss": 1.9622, + "step": 321925 + }, + { + "epoch": 0.76, + "grad_norm": 1.90625, + "learning_rate": 0.00013716219617557394, + "loss": 1.9922, + "step": 321930 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.000137160480244733, + "loss": 2.1362, + "step": 321935 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.00013715876430119738, + "loss": 1.9644, + "step": 321940 + }, + { + "epoch": 0.76, + "grad_norm": 1.9609375, + "learning_rate": 0.00013715704834496768, + "loss": 2.0505, + "step": 321945 + }, + { + "epoch": 0.76, + "grad_norm": 2.859375, + "learning_rate": 0.00013715533237604446, + "loss": 2.0554, + "step": 321950 + }, + { + "epoch": 0.76, + "grad_norm": 2.65625, + "learning_rate": 0.00013715361639442825, + "loss": 2.0433, + "step": 321955 + }, + { + "epoch": 0.76, + "grad_norm": 3.109375, + "learning_rate": 0.00013715190040011974, + "loss": 2.0244, + "step": 321960 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013715018439311945, + "loss": 2.0958, + "step": 321965 + }, + { + "epoch": 0.76, + "grad_norm": 2.515625, + "learning_rate": 0.00013714846837342798, + "loss": 2.0823, + "step": 321970 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.00013714675234104595, + "loss": 2.1349, + "step": 321975 + }, + { + "epoch": 0.76, + "grad_norm": 2.90625, + "learning_rate": 0.00013714503629597385, + "loss": 2.0664, + "step": 321980 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.00013714332023821238, + "loss": 2.0518, + "step": 321985 + }, + { + "epoch": 0.76, + "grad_norm": 1.984375, + "learning_rate": 0.0001371416041677621, + "loss": 2.1079, + "step": 321990 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013713988808462354, + "loss": 2.2346, + "step": 321995 + }, + { + "epoch": 0.76, + "grad_norm": 1.84375, + "learning_rate": 0.00013713817198879734, + "loss": 2.0544, + "step": 322000 + }, + { + "epoch": 0.76, + "grad_norm": 1.9375, + "learning_rate": 0.00013713645588028404, + "loss": 2.0881, + "step": 322005 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013713473975908428, + "loss": 2.112, + "step": 322010 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.0001371330236251986, + "loss": 2.1287, + "step": 322015 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.0001371313074786276, + "loss": 2.136, + "step": 322020 + }, + { + "epoch": 0.76, + "grad_norm": 1.7265625, + "learning_rate": 0.00013712959131937188, + "loss": 1.9907, + "step": 322025 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013712787514743205, + "loss": 2.1875, + "step": 322030 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.0001371261589628086, + "loss": 2.0272, + "step": 322035 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013712444276550224, + "loss": 2.0997, + "step": 322040 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.00013712272655551347, + "loss": 2.1318, + "step": 322045 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.0001371210103328429, + "loss": 2.0078, + "step": 322050 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013711929409749112, + "loss": 2.1265, + "step": 322055 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013711757784945873, + "loss": 2.0405, + "step": 322060 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.0001371158615887463, + "loss": 2.0841, + "step": 322065 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.00013711414531535442, + "loss": 2.2387, + "step": 322070 + }, + { + "epoch": 0.76, + "grad_norm": 1.90625, + "learning_rate": 0.00013711242902928364, + "loss": 2.0792, + "step": 322075 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.00013711071273053463, + "loss": 2.1299, + "step": 322080 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.0001371089964191079, + "loss": 2.0522, + "step": 322085 + }, + { + "epoch": 0.76, + "grad_norm": 2.59375, + "learning_rate": 0.00013710728009500408, + "loss": 1.982, + "step": 322090 + }, + { + "epoch": 0.76, + "grad_norm": 2.828125, + "learning_rate": 0.00013710556375822375, + "loss": 2.0962, + "step": 322095 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.00013710384740876744, + "loss": 1.8861, + "step": 322100 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013710213104663584, + "loss": 2.1682, + "step": 322105 + }, + { + "epoch": 0.76, + "grad_norm": 1.84375, + "learning_rate": 0.00013710041467182945, + "loss": 1.9793, + "step": 322110 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.00013709869828434888, + "loss": 1.9911, + "step": 322115 + }, + { + "epoch": 0.76, + "grad_norm": 2.546875, + "learning_rate": 0.0001370969818841947, + "loss": 2.0413, + "step": 322120 + }, + { + "epoch": 0.76, + "grad_norm": 1.9375, + "learning_rate": 0.00013709526547136755, + "loss": 2.0455, + "step": 322125 + }, + { + "epoch": 0.76, + "grad_norm": 1.8984375, + "learning_rate": 0.00013709354904586798, + "loss": 2.0326, + "step": 322130 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.0001370918326076966, + "loss": 2.1407, + "step": 322135 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.00013709011615685394, + "loss": 2.0958, + "step": 322140 + }, + { + "epoch": 0.76, + "grad_norm": 2.515625, + "learning_rate": 0.00013708839969334065, + "loss": 2.0314, + "step": 322145 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013708668321715727, + "loss": 2.1452, + "step": 322150 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013708496672830444, + "loss": 1.9863, + "step": 322155 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.0001370832502267827, + "loss": 2.0619, + "step": 322160 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013708153371259262, + "loss": 2.0381, + "step": 322165 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013707981718573485, + "loss": 2.1321, + "step": 322170 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.00013707810064620993, + "loss": 1.9641, + "step": 322175 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.00013707638409401847, + "loss": 2.1092, + "step": 322180 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.00013707466752916104, + "loss": 1.8891, + "step": 322185 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.0001370729509516382, + "loss": 2.081, + "step": 322190 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013707123436145058, + "loss": 2.1579, + "step": 322195 + }, + { + "epoch": 0.76, + "grad_norm": 2.65625, + "learning_rate": 0.00013706951775859877, + "loss": 1.8796, + "step": 322200 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.00013706780114308332, + "loss": 1.9407, + "step": 322205 + }, + { + "epoch": 0.76, + "grad_norm": 2.484375, + "learning_rate": 0.00013706608451490485, + "loss": 2.0733, + "step": 322210 + }, + { + "epoch": 0.76, + "grad_norm": 2.671875, + "learning_rate": 0.00013706436787406395, + "loss": 1.9701, + "step": 322215 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013706265122056117, + "loss": 2.0383, + "step": 322220 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013706093455439713, + "loss": 1.9555, + "step": 322225 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.00013705921787557238, + "loss": 1.9786, + "step": 322230 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013705750118408756, + "loss": 2.081, + "step": 322235 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.0001370557844799432, + "loss": 1.755, + "step": 322240 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013705406776313994, + "loss": 1.9954, + "step": 322245 + }, + { + "epoch": 0.76, + "grad_norm": 1.8125, + "learning_rate": 0.00013705235103367828, + "loss": 1.9945, + "step": 322250 + }, + { + "epoch": 0.76, + "grad_norm": 2.5, + "learning_rate": 0.00013705063429155892, + "loss": 2.0351, + "step": 322255 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013704891753678238, + "loss": 2.1214, + "step": 322260 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013704720076934926, + "loss": 2.03, + "step": 322265 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013704548398926013, + "loss": 2.0411, + "step": 322270 + }, + { + "epoch": 0.76, + "grad_norm": 1.890625, + "learning_rate": 0.0001370437671965156, + "loss": 1.9141, + "step": 322275 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013704205039111623, + "loss": 2.0179, + "step": 322280 + }, + { + "epoch": 0.76, + "grad_norm": 1.7109375, + "learning_rate": 0.00013704033357306262, + "loss": 1.9805, + "step": 322285 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.0001370386167423554, + "loss": 2.0006, + "step": 322290 + }, + { + "epoch": 0.76, + "grad_norm": 1.9296875, + "learning_rate": 0.0001370368998989951, + "loss": 1.9541, + "step": 322295 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.0001370351830429823, + "loss": 2.0691, + "step": 322300 + }, + { + "epoch": 0.76, + "grad_norm": 2.484375, + "learning_rate": 0.00013703346617431763, + "loss": 1.9467, + "step": 322305 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013703174929300164, + "loss": 2.2653, + "step": 322310 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.000137030032399035, + "loss": 2.1412, + "step": 322315 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.00013702831549241816, + "loss": 2.0149, + "step": 322320 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.0001370265985731518, + "loss": 1.9887, + "step": 322325 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013702488164123647, + "loss": 2.1265, + "step": 322330 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013702316469667278, + "loss": 1.9979, + "step": 322335 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013702144773946129, + "loss": 2.1012, + "step": 322340 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.00013701973076960262, + "loss": 2.2177, + "step": 322345 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013701801378709734, + "loss": 1.9174, + "step": 322350 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.000137016296791946, + "loss": 1.9924, + "step": 322355 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013701457978414924, + "loss": 2.0084, + "step": 322360 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.00013701286276370764, + "loss": 2.2101, + "step": 322365 + }, + { + "epoch": 0.76, + "grad_norm": 2.484375, + "learning_rate": 0.0001370111457306218, + "loss": 2.0392, + "step": 322370 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.00013700942868489225, + "loss": 1.8422, + "step": 322375 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.0001370077116265196, + "loss": 2.0607, + "step": 322380 + }, + { + "epoch": 0.76, + "grad_norm": 1.984375, + "learning_rate": 0.00013700599455550443, + "loss": 2.0768, + "step": 322385 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013700427747184737, + "loss": 1.9989, + "step": 322390 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.00013700256037554897, + "loss": 2.0628, + "step": 322395 + }, + { + "epoch": 0.76, + "grad_norm": 1.8515625, + "learning_rate": 0.00013700084326660986, + "loss": 2.0249, + "step": 322400 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013699912614503057, + "loss": 2.0099, + "step": 322405 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013699740901081166, + "loss": 1.9647, + "step": 322410 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.0001369956918639538, + "loss": 2.048, + "step": 322415 + }, + { + "epoch": 0.76, + "grad_norm": 2.53125, + "learning_rate": 0.00013699397470445755, + "loss": 1.9485, + "step": 322420 + }, + { + "epoch": 0.76, + "grad_norm": 2.5625, + "learning_rate": 0.0001369922575323235, + "loss": 2.1254, + "step": 322425 + }, + { + "epoch": 0.76, + "grad_norm": 1.8984375, + "learning_rate": 0.00013699054034755219, + "loss": 2.025, + "step": 322430 + }, + { + "epoch": 0.76, + "grad_norm": 1.90625, + "learning_rate": 0.00013698882315014424, + "loss": 1.9513, + "step": 322435 + }, + { + "epoch": 0.76, + "grad_norm": 2.609375, + "learning_rate": 0.00013698710594010023, + "loss": 2.0205, + "step": 322440 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.0001369853887174208, + "loss": 2.0159, + "step": 322445 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013698367148210646, + "loss": 1.9579, + "step": 322450 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.00013698195423415783, + "loss": 2.2388, + "step": 322455 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.00013698023697357548, + "loss": 1.8657, + "step": 322460 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013697851970036003, + "loss": 2.061, + "step": 322465 + }, + { + "epoch": 0.76, + "grad_norm": 2.5, + "learning_rate": 0.00013697680241451206, + "loss": 2.142, + "step": 322470 + }, + { + "epoch": 0.76, + "grad_norm": 2.625, + "learning_rate": 0.00013697508511603212, + "loss": 2.1184, + "step": 322475 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.00013697336780492083, + "loss": 2.0317, + "step": 322480 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013697165048117874, + "loss": 1.9949, + "step": 322485 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013696993314480648, + "loss": 1.9279, + "step": 322490 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013696821579580465, + "loss": 2.0243, + "step": 322495 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013696649843417377, + "loss": 2.013, + "step": 322500 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013696478105991447, + "loss": 1.8687, + "step": 322505 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.00013696306367302736, + "loss": 2.0902, + "step": 322510 + }, + { + "epoch": 0.76, + "grad_norm": 1.703125, + "learning_rate": 0.000136961346273513, + "loss": 2.0263, + "step": 322515 + }, + { + "epoch": 0.76, + "grad_norm": 1.9765625, + "learning_rate": 0.0001369596288613719, + "loss": 1.9694, + "step": 322520 + }, + { + "epoch": 0.76, + "grad_norm": 1.8515625, + "learning_rate": 0.0001369579114366048, + "loss": 1.9543, + "step": 322525 + }, + { + "epoch": 0.76, + "grad_norm": 2.8125, + "learning_rate": 0.00013695619399921216, + "loss": 2.0505, + "step": 322530 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.00013695447654919464, + "loss": 1.8995, + "step": 322535 + }, + { + "epoch": 0.76, + "grad_norm": 1.7109375, + "learning_rate": 0.00013695275908655278, + "loss": 2.0044, + "step": 322540 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.0001369510416112872, + "loss": 2.0548, + "step": 322545 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013694932412339848, + "loss": 1.8683, + "step": 322550 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.0001369476066228872, + "loss": 2.0833, + "step": 322555 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013694588910975392, + "loss": 1.934, + "step": 322560 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.0001369441715839993, + "loss": 2.011, + "step": 322565 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.00013694245404562386, + "loss": 2.0762, + "step": 322570 + }, + { + "epoch": 0.76, + "grad_norm": 2.53125, + "learning_rate": 0.0001369407364946282, + "loss": 2.0677, + "step": 322575 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.0001369390189310129, + "loss": 2.1899, + "step": 322580 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.0001369373013547786, + "loss": 1.9981, + "step": 322585 + }, + { + "epoch": 0.76, + "grad_norm": 1.8359375, + "learning_rate": 0.00013693558376592585, + "loss": 1.6322, + "step": 322590 + }, + { + "epoch": 0.76, + "grad_norm": 1.8984375, + "learning_rate": 0.00013693386616445515, + "loss": 2.0558, + "step": 322595 + }, + { + "epoch": 0.76, + "grad_norm": 2.546875, + "learning_rate": 0.00013693214855036728, + "loss": 2.049, + "step": 322600 + }, + { + "epoch": 0.76, + "grad_norm": 2.640625, + "learning_rate": 0.00013693043092366264, + "loss": 1.971, + "step": 322605 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013692871328434197, + "loss": 2.1592, + "step": 322610 + }, + { + "epoch": 0.76, + "grad_norm": 2.5625, + "learning_rate": 0.00013692699563240573, + "loss": 2.1308, + "step": 322615 + }, + { + "epoch": 0.76, + "grad_norm": 1.875, + "learning_rate": 0.00013692527796785458, + "loss": 2.036, + "step": 322620 + }, + { + "epoch": 0.76, + "grad_norm": 1.7734375, + "learning_rate": 0.000136923560290689, + "loss": 1.7691, + "step": 322625 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013692184260090978, + "loss": 1.8554, + "step": 322630 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013692012489851735, + "loss": 2.0558, + "step": 322635 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.0001369184071835123, + "loss": 2.0272, + "step": 322640 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.00013691668945589532, + "loss": 1.9478, + "step": 322645 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.00013691497171566689, + "loss": 1.9625, + "step": 322650 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.00013691325396282764, + "loss": 2.2362, + "step": 322655 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013691153619737814, + "loss": 2.1404, + "step": 322660 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.000136909818419319, + "loss": 2.0754, + "step": 322665 + }, + { + "epoch": 0.76, + "grad_norm": 2.734375, + "learning_rate": 0.00013690810062865077, + "loss": 1.9141, + "step": 322670 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.0001369063828253741, + "loss": 2.0887, + "step": 322675 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.00013690466500948954, + "loss": 2.0503, + "step": 322680 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.00013690294718099767, + "loss": 1.942, + "step": 322685 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013690122933989908, + "loss": 2.0463, + "step": 322690 + }, + { + "epoch": 0.76, + "grad_norm": 3.125, + "learning_rate": 0.00013689951148619436, + "loss": 1.9802, + "step": 322695 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013689779361988414, + "loss": 1.7974, + "step": 322700 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013689607574096887, + "loss": 2.0573, + "step": 322705 + }, + { + "epoch": 0.76, + "grad_norm": 2.5, + "learning_rate": 0.0001368943578494493, + "loss": 2.1852, + "step": 322710 + }, + { + "epoch": 0.76, + "grad_norm": 1.8828125, + "learning_rate": 0.00013689263994532596, + "loss": 2.1178, + "step": 322715 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013689092202859938, + "loss": 1.9853, + "step": 322720 + }, + { + "epoch": 0.76, + "grad_norm": 2.671875, + "learning_rate": 0.00013688920409927024, + "loss": 2.0337, + "step": 322725 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013688748615733903, + "loss": 2.0511, + "step": 322730 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.0001368857682028064, + "loss": 2.0899, + "step": 322735 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013688405023567292, + "loss": 2.0655, + "step": 322740 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013688233225593921, + "loss": 1.7745, + "step": 322745 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.0001368806142636058, + "loss": 2.0681, + "step": 322750 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.00013687889625867332, + "loss": 1.9719, + "step": 322755 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.0001368771782411423, + "loss": 2.1242, + "step": 322760 + }, + { + "epoch": 0.76, + "grad_norm": 1.9453125, + "learning_rate": 0.00013687546021101343, + "loss": 2.0498, + "step": 322765 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.0001368737421682872, + "loss": 2.0691, + "step": 322770 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013687202411296422, + "loss": 1.9745, + "step": 322775 + }, + { + "epoch": 0.76, + "grad_norm": 1.984375, + "learning_rate": 0.00013687030604504508, + "loss": 1.9033, + "step": 322780 + }, + { + "epoch": 0.76, + "grad_norm": 2.609375, + "learning_rate": 0.0001368685879645304, + "loss": 2.2233, + "step": 322785 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.00013686686987142074, + "loss": 1.7821, + "step": 322790 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013686515176571668, + "loss": 2.0706, + "step": 322795 + }, + { + "epoch": 0.76, + "grad_norm": 1.734375, + "learning_rate": 0.00013686343364741882, + "loss": 2.0445, + "step": 322800 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013686171551652776, + "loss": 1.9458, + "step": 322805 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013685999737304402, + "loss": 1.8961, + "step": 322810 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.00013685827921696827, + "loss": 2.0545, + "step": 322815 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.00013685656104830106, + "loss": 1.7797, + "step": 322820 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.000136854842867043, + "loss": 2.1536, + "step": 322825 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013685312467319463, + "loss": 2.1261, + "step": 322830 + }, + { + "epoch": 0.76, + "grad_norm": 1.765625, + "learning_rate": 0.00013685140646675658, + "loss": 2.0349, + "step": 322835 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.0001368496882477294, + "loss": 1.9127, + "step": 322840 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.0001368479700161137, + "loss": 1.9568, + "step": 322845 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013684625177191008, + "loss": 1.992, + "step": 322850 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.00013684453351511913, + "loss": 2.0157, + "step": 322855 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013684281524574141, + "loss": 2.081, + "step": 322860 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.0001368410969637775, + "loss": 2.0348, + "step": 322865 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013683937866922798, + "loss": 2.1399, + "step": 322870 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.0001368376603620935, + "loss": 2.0483, + "step": 322875 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.0001368359420423746, + "loss": 1.9935, + "step": 322880 + }, + { + "epoch": 0.76, + "grad_norm": 2.515625, + "learning_rate": 0.00013683422371007188, + "loss": 2.1062, + "step": 322885 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013683250536518588, + "loss": 1.8679, + "step": 322890 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013683078700771727, + "loss": 2.101, + "step": 322895 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.00013682906863766658, + "loss": 2.0178, + "step": 322900 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013682735025503443, + "loss": 1.9944, + "step": 322905 + }, + { + "epoch": 0.76, + "grad_norm": 2.890625, + "learning_rate": 0.0001368256318598214, + "loss": 2.018, + "step": 322910 + }, + { + "epoch": 0.76, + "grad_norm": 2.71875, + "learning_rate": 0.00013682391345202802, + "loss": 1.9998, + "step": 322915 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013682219503165493, + "loss": 1.9769, + "step": 322920 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.00013682047659870273, + "loss": 2.1037, + "step": 322925 + }, + { + "epoch": 0.76, + "grad_norm": 2.8125, + "learning_rate": 0.000136818758153172, + "loss": 1.9826, + "step": 322930 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.0001368170396950633, + "loss": 2.146, + "step": 322935 + }, + { + "epoch": 0.76, + "grad_norm": 1.859375, + "learning_rate": 0.00013681532122437721, + "loss": 1.9678, + "step": 322940 + }, + { + "epoch": 0.76, + "grad_norm": 2.578125, + "learning_rate": 0.00013681360274111438, + "loss": 2.1285, + "step": 322945 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.0001368118842452753, + "loss": 2.1888, + "step": 322950 + }, + { + "epoch": 0.76, + "grad_norm": 1.8984375, + "learning_rate": 0.00013681016573686066, + "loss": 2.0053, + "step": 322955 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013680844721587098, + "loss": 1.9898, + "step": 322960 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013680672868230688, + "loss": 1.8542, + "step": 322965 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.0001368050101361689, + "loss": 2.1288, + "step": 322970 + }, + { + "epoch": 0.76, + "grad_norm": 2.84375, + "learning_rate": 0.0001368032915774577, + "loss": 1.7506, + "step": 322975 + }, + { + "epoch": 0.76, + "grad_norm": 2.484375, + "learning_rate": 0.00013680157300617382, + "loss": 1.9544, + "step": 322980 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013679985442231787, + "loss": 2.1208, + "step": 322985 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013679813582589037, + "loss": 1.9308, + "step": 322990 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.000136796417216892, + "loss": 1.9952, + "step": 322995 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.0001367946985953233, + "loss": 2.0697, + "step": 323000 + }, + { + "epoch": 0.76, + "grad_norm": 1.953125, + "learning_rate": 0.00013679297996118485, + "loss": 2.0372, + "step": 323005 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.00013679126131447728, + "loss": 2.15, + "step": 323010 + }, + { + "epoch": 0.76, + "grad_norm": 1.8984375, + "learning_rate": 0.0001367895426552011, + "loss": 2.0232, + "step": 323015 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.000136787823983357, + "loss": 1.9165, + "step": 323020 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.0001367861052989455, + "loss": 1.9867, + "step": 323025 + }, + { + "epoch": 0.76, + "grad_norm": 1.9921875, + "learning_rate": 0.00013678438660196717, + "loss": 2.163, + "step": 323030 + }, + { + "epoch": 0.76, + "grad_norm": 1.9296875, + "learning_rate": 0.00013678266789242263, + "loss": 2.0037, + "step": 323035 + }, + { + "epoch": 0.76, + "grad_norm": 2.609375, + "learning_rate": 0.00013678094917031253, + "loss": 2.0683, + "step": 323040 + }, + { + "epoch": 0.76, + "grad_norm": 2.75, + "learning_rate": 0.00013677923043563733, + "loss": 2.0912, + "step": 323045 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013677751168839766, + "loss": 2.0372, + "step": 323050 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013677579292859415, + "loss": 2.0152, + "step": 323055 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013677407415622737, + "loss": 2.0868, + "step": 323060 + }, + { + "epoch": 0.76, + "grad_norm": 2.4375, + "learning_rate": 0.0001367723553712979, + "loss": 2.1293, + "step": 323065 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.00013677063657380634, + "loss": 1.9573, + "step": 323070 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013676891776375323, + "loss": 2.0637, + "step": 323075 + }, + { + "epoch": 0.76, + "grad_norm": 2.703125, + "learning_rate": 0.00013676719894113918, + "loss": 1.9784, + "step": 323080 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013676548010596484, + "loss": 2.1741, + "step": 323085 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.0001367637612582307, + "loss": 2.1776, + "step": 323090 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013676204239793742, + "loss": 1.863, + "step": 323095 + }, + { + "epoch": 0.76, + "grad_norm": 1.65625, + "learning_rate": 0.00013676032352508553, + "loss": 2.0479, + "step": 323100 + }, + { + "epoch": 0.76, + "grad_norm": 2.546875, + "learning_rate": 0.00013675860463967568, + "loss": 1.9564, + "step": 323105 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.0001367568857417084, + "loss": 2.1269, + "step": 323110 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013675516683118433, + "loss": 2.0228, + "step": 323115 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.000136753447908104, + "loss": 2.0493, + "step": 323120 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.000136751728972468, + "loss": 1.9412, + "step": 323125 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.000136750010024277, + "loss": 1.9517, + "step": 323130 + }, + { + "epoch": 0.76, + "grad_norm": 1.6171875, + "learning_rate": 0.0001367482910635315, + "loss": 2.0952, + "step": 323135 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.00013674657209023213, + "loss": 2.0047, + "step": 323140 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.0001367448531043795, + "loss": 1.9721, + "step": 323145 + }, + { + "epoch": 0.76, + "grad_norm": 2.640625, + "learning_rate": 0.0001367431341059741, + "loss": 2.0325, + "step": 323150 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.0001367414150950166, + "loss": 1.9115, + "step": 323155 + }, + { + "epoch": 0.76, + "grad_norm": 1.8046875, + "learning_rate": 0.00013673969607150757, + "loss": 2.0549, + "step": 323160 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013673797703544758, + "loss": 2.125, + "step": 323165 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013673625798683726, + "loss": 2.0705, + "step": 323170 + }, + { + "epoch": 0.76, + "grad_norm": 1.90625, + "learning_rate": 0.00013673453892567715, + "loss": 2.1653, + "step": 323175 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013673281985196787, + "loss": 1.9867, + "step": 323180 + }, + { + "epoch": 0.76, + "grad_norm": 1.9453125, + "learning_rate": 0.00013673110076570995, + "loss": 1.9673, + "step": 323185 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.00013672938166690404, + "loss": 2.1672, + "step": 323190 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.0001367276625555507, + "loss": 1.9955, + "step": 323195 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.00013672594343165057, + "loss": 2.0635, + "step": 323200 + }, + { + "epoch": 0.76, + "grad_norm": 2.609375, + "learning_rate": 0.00013672422429520414, + "loss": 2.0762, + "step": 323205 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.0001367225051462121, + "loss": 2.1005, + "step": 323210 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.0001367207859846749, + "loss": 2.152, + "step": 323215 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.0001367190668105933, + "loss": 1.8852, + "step": 323220 + }, + { + "epoch": 0.76, + "grad_norm": 2.75, + "learning_rate": 0.00013671734762396776, + "loss": 2.0361, + "step": 323225 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013671562842479891, + "loss": 2.0049, + "step": 323230 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.00013671390921308736, + "loss": 1.9481, + "step": 323235 + }, + { + "epoch": 0.76, + "grad_norm": 2.546875, + "learning_rate": 0.00013671218998883365, + "loss": 1.9818, + "step": 323240 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.0001367104707520384, + "loss": 2.0411, + "step": 323245 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.0001367087515027022, + "loss": 1.9531, + "step": 323250 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.0001367070322408256, + "loss": 2.0018, + "step": 323255 + }, + { + "epoch": 0.76, + "grad_norm": 2.59375, + "learning_rate": 0.0001367053129664092, + "loss": 2.1192, + "step": 323260 + }, + { + "epoch": 0.76, + "grad_norm": 1.9140625, + "learning_rate": 0.00013670359367945362, + "loss": 2.0583, + "step": 323265 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013670187437995942, + "loss": 2.1003, + "step": 323270 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.0001367001550679272, + "loss": 1.9781, + "step": 323275 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013669843574335756, + "loss": 1.9481, + "step": 323280 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013669671640625106, + "loss": 2.0565, + "step": 323285 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013669499705660829, + "loss": 2.0673, + "step": 323290 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.0001366932776944298, + "loss": 1.9783, + "step": 323295 + }, + { + "epoch": 0.76, + "grad_norm": 1.9453125, + "learning_rate": 0.00013669155831971626, + "loss": 1.9362, + "step": 323300 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013668983893246826, + "loss": 2.0066, + "step": 323305 + }, + { + "epoch": 0.76, + "grad_norm": 2.4375, + "learning_rate": 0.0001366881195326863, + "loss": 2.1769, + "step": 323310 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.000136686400120371, + "loss": 2.136, + "step": 323315 + }, + { + "epoch": 0.76, + "grad_norm": 2.65625, + "learning_rate": 0.00013668468069552299, + "loss": 2.1083, + "step": 323320 + }, + { + "epoch": 0.76, + "grad_norm": 2.625, + "learning_rate": 0.0001366829612581428, + "loss": 2.0091, + "step": 323325 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.0001366812418082311, + "loss": 1.9502, + "step": 323330 + }, + { + "epoch": 0.76, + "grad_norm": 1.78125, + "learning_rate": 0.00013667952234578838, + "loss": 1.9041, + "step": 323335 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013667780287081527, + "loss": 2.102, + "step": 323340 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.00013667608338331232, + "loss": 2.0809, + "step": 323345 + }, + { + "epoch": 0.76, + "grad_norm": 1.7578125, + "learning_rate": 0.0001366743638832802, + "loss": 1.7681, + "step": 323350 + }, + { + "epoch": 0.76, + "grad_norm": 1.9765625, + "learning_rate": 0.00013667264437071947, + "loss": 2.0821, + "step": 323355 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.0001366709248456307, + "loss": 2.216, + "step": 323360 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013666920530801445, + "loss": 2.2091, + "step": 323365 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.00013666748575787133, + "loss": 2.0434, + "step": 323370 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.0001366657661952019, + "loss": 2.134, + "step": 323375 + }, + { + "epoch": 0.76, + "grad_norm": 2.5625, + "learning_rate": 0.00013666404662000686, + "loss": 2.2622, + "step": 323380 + }, + { + "epoch": 0.76, + "grad_norm": 2.546875, + "learning_rate": 0.00013666232703228668, + "loss": 2.1257, + "step": 323385 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.00013666060743204196, + "loss": 2.0878, + "step": 323390 + }, + { + "epoch": 0.76, + "grad_norm": 1.9140625, + "learning_rate": 0.00013665888781927334, + "loss": 1.884, + "step": 323395 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.00013665716819398138, + "loss": 2.0473, + "step": 323400 + }, + { + "epoch": 0.76, + "grad_norm": 2.625, + "learning_rate": 0.00013665544855616664, + "loss": 2.0051, + "step": 323405 + }, + { + "epoch": 0.76, + "grad_norm": 1.9140625, + "learning_rate": 0.00013665372890582977, + "loss": 2.1571, + "step": 323410 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.0001366520092429713, + "loss": 1.9801, + "step": 323415 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.00013665028956759184, + "loss": 1.9955, + "step": 323420 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.000136648569879692, + "loss": 2.2271, + "step": 323425 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.0001366468501792723, + "loss": 1.9059, + "step": 323430 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.0001366451304663334, + "loss": 2.269, + "step": 323435 + }, + { + "epoch": 0.76, + "grad_norm": 1.828125, + "learning_rate": 0.00013664341074087587, + "loss": 1.9058, + "step": 323440 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.00013664169100290025, + "loss": 2.1443, + "step": 323445 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.00013663997125240723, + "loss": 1.9791, + "step": 323450 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013663825148939725, + "loss": 2.0497, + "step": 323455 + }, + { + "epoch": 0.76, + "grad_norm": 1.96875, + "learning_rate": 0.000136636531713871, + "loss": 2.1124, + "step": 323460 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013663481192582907, + "loss": 1.9461, + "step": 323465 + }, + { + "epoch": 0.76, + "grad_norm": 2.6875, + "learning_rate": 0.00013663309212527203, + "loss": 2.1705, + "step": 323470 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013663137231220045, + "loss": 2.0576, + "step": 323475 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.0001366296524866149, + "loss": 2.1117, + "step": 323480 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.000136627932648516, + "loss": 2.1302, + "step": 323485 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013662621279790437, + "loss": 1.992, + "step": 323490 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013662449293478057, + "loss": 2.0161, + "step": 323495 + }, + { + "epoch": 0.76, + "grad_norm": 2.75, + "learning_rate": 0.00013662277305914516, + "loss": 2.0651, + "step": 323500 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.00013662105317099874, + "loss": 1.9635, + "step": 323505 + }, + { + "epoch": 0.76, + "grad_norm": 2.59375, + "learning_rate": 0.00013661933327034187, + "loss": 2.1743, + "step": 323510 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013661761335717523, + "loss": 2.0038, + "step": 323515 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.0001366158934314993, + "loss": 1.9689, + "step": 323520 + }, + { + "epoch": 0.76, + "grad_norm": 2.765625, + "learning_rate": 0.00013661417349331476, + "loss": 2.113, + "step": 323525 + }, + { + "epoch": 0.76, + "grad_norm": 1.8125, + "learning_rate": 0.00013661245354262215, + "loss": 2.0011, + "step": 323530 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013661073357942204, + "loss": 2.0093, + "step": 323535 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.00013660901360371504, + "loss": 2.001, + "step": 323540 + }, + { + "epoch": 0.76, + "grad_norm": 1.828125, + "learning_rate": 0.00013660729361550175, + "loss": 1.806, + "step": 323545 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013660557361478275, + "loss": 1.9455, + "step": 323550 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.0001366038536015586, + "loss": 2.1808, + "step": 323555 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.00013660213357582994, + "loss": 1.9889, + "step": 323560 + }, + { + "epoch": 0.76, + "grad_norm": 4.78125, + "learning_rate": 0.0001366004135375973, + "loss": 2.2451, + "step": 323565 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.0001365986934868613, + "loss": 2.1408, + "step": 323570 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.00013659697342362253, + "loss": 2.0747, + "step": 323575 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013659525334788154, + "loss": 2.0152, + "step": 323580 + }, + { + "epoch": 0.76, + "grad_norm": 2.609375, + "learning_rate": 0.000136593533259639, + "loss": 1.9615, + "step": 323585 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013659181315889542, + "loss": 2.0215, + "step": 323590 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.0001365900930456514, + "loss": 1.8751, + "step": 323595 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.00013658837291990756, + "loss": 2.1004, + "step": 323600 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013658665278166448, + "loss": 2.1084, + "step": 323605 + }, + { + "epoch": 0.76, + "grad_norm": 1.953125, + "learning_rate": 0.00013658493263092272, + "loss": 2.0413, + "step": 323610 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013658321246768287, + "loss": 2.0961, + "step": 323615 + }, + { + "epoch": 0.76, + "grad_norm": 2.578125, + "learning_rate": 0.00013658149229194553, + "loss": 1.9662, + "step": 323620 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.0001365797721037113, + "loss": 2.0212, + "step": 323625 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.0001365780519029808, + "loss": 2.0882, + "step": 323630 + }, + { + "epoch": 0.76, + "grad_norm": 2.59375, + "learning_rate": 0.0001365763316897545, + "loss": 1.8475, + "step": 323635 + }, + { + "epoch": 0.76, + "grad_norm": 2.546875, + "learning_rate": 0.00013657461146403306, + "loss": 2.0917, + "step": 323640 + }, + { + "epoch": 0.76, + "grad_norm": 3.015625, + "learning_rate": 0.0001365728912258171, + "loss": 2.2899, + "step": 323645 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013657117097510725, + "loss": 2.0915, + "step": 323650 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013656945071190393, + "loss": 2.1957, + "step": 323655 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.00013656773043620787, + "loss": 2.153, + "step": 323660 + }, + { + "epoch": 0.76, + "grad_norm": 1.90625, + "learning_rate": 0.00013656601014801954, + "loss": 1.9534, + "step": 323665 + }, + { + "epoch": 0.76, + "grad_norm": 1.84375, + "learning_rate": 0.00013656428984733964, + "loss": 2.1629, + "step": 323670 + }, + { + "epoch": 0.76, + "grad_norm": 2.8125, + "learning_rate": 0.00013656256953416875, + "loss": 1.9586, + "step": 323675 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.0001365608492085074, + "loss": 1.858, + "step": 323680 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.0001365591288703562, + "loss": 2.0874, + "step": 323685 + }, + { + "epoch": 0.76, + "grad_norm": 1.9921875, + "learning_rate": 0.00013655740851971574, + "loss": 1.8747, + "step": 323690 + }, + { + "epoch": 0.76, + "grad_norm": 2.828125, + "learning_rate": 0.00013655568815658657, + "loss": 2.0059, + "step": 323695 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013655396778096933, + "loss": 2.19, + "step": 323700 + }, + { + "epoch": 0.76, + "grad_norm": 1.84375, + "learning_rate": 0.00013655224739286462, + "loss": 1.9412, + "step": 323705 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013655052699227297, + "loss": 1.9738, + "step": 323710 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013654880657919502, + "loss": 2.0166, + "step": 323715 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.0001365470861536313, + "loss": 2.0087, + "step": 323720 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.0001365453657155825, + "loss": 2.2131, + "step": 323725 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.00013654364526504909, + "loss": 2.0688, + "step": 323730 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013654192480203173, + "loss": 2.2557, + "step": 323735 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013654020432653097, + "loss": 2.1001, + "step": 323740 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.00013653848383854742, + "loss": 2.149, + "step": 323745 + }, + { + "epoch": 0.76, + "grad_norm": 1.9609375, + "learning_rate": 0.00013653676333808164, + "loss": 1.9575, + "step": 323750 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013653504282513425, + "loss": 1.8919, + "step": 323755 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013653332229970585, + "loss": 1.915, + "step": 323760 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.000136531601761797, + "loss": 1.9356, + "step": 323765 + }, + { + "epoch": 0.76, + "grad_norm": 2.546875, + "learning_rate": 0.0001365298812114083, + "loss": 2.0665, + "step": 323770 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.00013652816064854028, + "loss": 2.082, + "step": 323775 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.00013652644007319358, + "loss": 1.9467, + "step": 323780 + }, + { + "epoch": 0.76, + "grad_norm": 1.8671875, + "learning_rate": 0.00013652471948536883, + "loss": 1.9753, + "step": 323785 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.00013652299888506657, + "loss": 2.1746, + "step": 323790 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.0001365212782722874, + "loss": 2.1859, + "step": 323795 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013651955764703183, + "loss": 1.9945, + "step": 323800 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013651783700930058, + "loss": 1.9206, + "step": 323805 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013651611635909417, + "loss": 2.0478, + "step": 323810 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013651439569641318, + "loss": 2.0702, + "step": 323815 + }, + { + "epoch": 0.76, + "grad_norm": 2.4375, + "learning_rate": 0.00013651267502125822, + "loss": 2.0662, + "step": 323820 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013651095433362987, + "loss": 1.9911, + "step": 323825 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.0001365092336335287, + "loss": 1.8892, + "step": 323830 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.0001365075129209553, + "loss": 1.9736, + "step": 323835 + }, + { + "epoch": 0.76, + "grad_norm": 2.484375, + "learning_rate": 0.0001365057921959103, + "loss": 2.0144, + "step": 323840 + }, + { + "epoch": 0.76, + "grad_norm": 2.5, + "learning_rate": 0.00013650407145839423, + "loss": 2.0612, + "step": 323845 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013650235070840772, + "loss": 1.9693, + "step": 323850 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.0001365006299459514, + "loss": 1.919, + "step": 323855 + }, + { + "epoch": 0.76, + "grad_norm": 1.8984375, + "learning_rate": 0.00013649890917102572, + "loss": 1.8929, + "step": 323860 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013649718838363138, + "loss": 2.2077, + "step": 323865 + }, + { + "epoch": 0.76, + "grad_norm": 1.9609375, + "learning_rate": 0.00013649546758376895, + "loss": 2.0786, + "step": 323870 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.000136493746771439, + "loss": 1.9401, + "step": 323875 + }, + { + "epoch": 0.76, + "grad_norm": 2.578125, + "learning_rate": 0.00013649202594664215, + "loss": 2.1649, + "step": 323880 + }, + { + "epoch": 0.76, + "grad_norm": 2.5625, + "learning_rate": 0.0001364903051093789, + "loss": 2.0835, + "step": 323885 + }, + { + "epoch": 0.76, + "grad_norm": 1.9921875, + "learning_rate": 0.00013648858425964995, + "loss": 1.9396, + "step": 323890 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.00013648686339745583, + "loss": 2.003, + "step": 323895 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013648514252279713, + "loss": 2.0527, + "step": 323900 + }, + { + "epoch": 0.76, + "grad_norm": 2.53125, + "learning_rate": 0.00013648342163567447, + "loss": 2.1819, + "step": 323905 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013648170073608836, + "loss": 2.0081, + "step": 323910 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.00013647997982403946, + "loss": 2.0308, + "step": 323915 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013647825889952835, + "loss": 2.0862, + "step": 323920 + }, + { + "epoch": 0.76, + "grad_norm": 1.828125, + "learning_rate": 0.00013647653796255562, + "loss": 1.9551, + "step": 323925 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.00013647481701312182, + "loss": 1.9121, + "step": 323930 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013647309605122754, + "loss": 2.1015, + "step": 323935 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.0001364713750768734, + "loss": 1.9953, + "step": 323940 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013646965409006, + "loss": 1.8526, + "step": 323945 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013646793309078788, + "loss": 2.0585, + "step": 323950 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013646621207905768, + "loss": 1.8597, + "step": 323955 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.00013646449105486993, + "loss": 2.0165, + "step": 323960 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013646277001822524, + "loss": 2.1251, + "step": 323965 + }, + { + "epoch": 0.76, + "grad_norm": 2.65625, + "learning_rate": 0.00013646104896912427, + "loss": 2.0787, + "step": 323970 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013645932790756752, + "loss": 1.9013, + "step": 323975 + }, + { + "epoch": 0.76, + "grad_norm": 2.6875, + "learning_rate": 0.00013645760683355558, + "loss": 2.0962, + "step": 323980 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013645588574708904, + "loss": 2.0269, + "step": 323985 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013645416464816857, + "loss": 2.0945, + "step": 323990 + }, + { + "epoch": 0.76, + "grad_norm": 2.78125, + "learning_rate": 0.00013645244353679465, + "loss": 2.0481, + "step": 323995 + }, + { + "epoch": 0.76, + "grad_norm": 1.828125, + "learning_rate": 0.00013645072241296792, + "loss": 1.9309, + "step": 324000 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013644900127668895, + "loss": 2.0878, + "step": 324005 + }, + { + "epoch": 0.76, + "grad_norm": 1.96875, + "learning_rate": 0.0001364472801279584, + "loss": 1.9371, + "step": 324010 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.00013644555896677674, + "loss": 1.8159, + "step": 324015 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013644383779314463, + "loss": 2.157, + "step": 324020 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013644211660706262, + "loss": 2.1891, + "step": 324025 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013644039540853137, + "loss": 2.2136, + "step": 324030 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.0001364386741975514, + "loss": 2.1987, + "step": 324035 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013643695297412334, + "loss": 2.1495, + "step": 324040 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.0001364352317382477, + "loss": 2.1212, + "step": 324045 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013643351048992517, + "loss": 2.2711, + "step": 324050 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013643178922915627, + "loss": 2.0207, + "step": 324055 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013643006795594165, + "loss": 2.0313, + "step": 324060 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013642834667028183, + "loss": 1.9722, + "step": 324065 + }, + { + "epoch": 0.76, + "grad_norm": 3.234375, + "learning_rate": 0.0001364266253721774, + "loss": 2.0155, + "step": 324070 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013642490406162898, + "loss": 2.0832, + "step": 324075 + }, + { + "epoch": 0.76, + "grad_norm": 2.6875, + "learning_rate": 0.00013642318273863717, + "loss": 2.0348, + "step": 324080 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013642146140320255, + "loss": 2.0137, + "step": 324085 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.0001364197400553257, + "loss": 1.9277, + "step": 324090 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013641801869500718, + "loss": 2.1481, + "step": 324095 + }, + { + "epoch": 0.76, + "grad_norm": 2.78125, + "learning_rate": 0.00013641629732224762, + "loss": 1.9119, + "step": 324100 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.0001364145759370476, + "loss": 2.1785, + "step": 324105 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.0001364128545394077, + "loss": 2.065, + "step": 324110 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013641113312932846, + "loss": 2.0173, + "step": 324115 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013640941170681059, + "loss": 2.0988, + "step": 324120 + }, + { + "epoch": 0.76, + "grad_norm": 2.65625, + "learning_rate": 0.00013640769027185455, + "loss": 2.1024, + "step": 324125 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.000136405968824461, + "loss": 2.0436, + "step": 324130 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013640424736463053, + "loss": 1.9063, + "step": 324135 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.00013640252589236369, + "loss": 1.998, + "step": 324140 + }, + { + "epoch": 0.76, + "grad_norm": 1.7265625, + "learning_rate": 0.0001364008044076611, + "loss": 1.9043, + "step": 324145 + }, + { + "epoch": 0.76, + "grad_norm": 1.7109375, + "learning_rate": 0.00013639908291052333, + "loss": 1.8844, + "step": 324150 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013639736140095096, + "loss": 1.8192, + "step": 324155 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.0001363956398789446, + "loss": 1.9701, + "step": 324160 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013639391834450483, + "loss": 2.0212, + "step": 324165 + }, + { + "epoch": 0.76, + "grad_norm": 2.765625, + "learning_rate": 0.00013639219679763222, + "loss": 2.1567, + "step": 324170 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.0001363904752383274, + "loss": 2.0165, + "step": 324175 + }, + { + "epoch": 0.76, + "grad_norm": 1.9921875, + "learning_rate": 0.00013638875366659092, + "loss": 2.0888, + "step": 324180 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013638703208242338, + "loss": 2.0256, + "step": 324185 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013638531048582538, + "loss": 2.1613, + "step": 324190 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.0001363835888767975, + "loss": 2.0175, + "step": 324195 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013638186725534036, + "loss": 2.1266, + "step": 324200 + }, + { + "epoch": 0.76, + "grad_norm": 2.828125, + "learning_rate": 0.00013638014562145445, + "loss": 2.1422, + "step": 324205 + }, + { + "epoch": 0.76, + "grad_norm": 1.9453125, + "learning_rate": 0.00013637842397514044, + "loss": 1.933, + "step": 324210 + }, + { + "epoch": 0.76, + "grad_norm": 2.5, + "learning_rate": 0.00013637670231639892, + "loss": 2.0464, + "step": 324215 + }, + { + "epoch": 0.76, + "grad_norm": 2.484375, + "learning_rate": 0.00013637498064523044, + "loss": 2.0246, + "step": 324220 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.00013637325896163565, + "loss": 2.0368, + "step": 324225 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.000136371537265615, + "loss": 2.2101, + "step": 324230 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013636981555716925, + "loss": 2.0927, + "step": 324235 + }, + { + "epoch": 0.76, + "grad_norm": 1.890625, + "learning_rate": 0.0001363680938362989, + "loss": 2.146, + "step": 324240 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.00013636637210300457, + "loss": 2.2074, + "step": 324245 + }, + { + "epoch": 0.76, + "grad_norm": 2.546875, + "learning_rate": 0.0001363646503572868, + "loss": 2.0592, + "step": 324250 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.0001363629285991462, + "loss": 2.1548, + "step": 324255 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013636120682858336, + "loss": 2.228, + "step": 324260 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.0001363594850455989, + "loss": 2.0415, + "step": 324265 + }, + { + "epoch": 0.76, + "grad_norm": 1.9609375, + "learning_rate": 0.00013635776325019336, + "loss": 1.8727, + "step": 324270 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013635604144236735, + "loss": 2.1575, + "step": 324275 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013635431962212146, + "loss": 1.9858, + "step": 324280 + }, + { + "epoch": 0.76, + "grad_norm": 2.625, + "learning_rate": 0.0001363525977894563, + "loss": 2.0602, + "step": 324285 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013635087594437238, + "loss": 2.0862, + "step": 324290 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013634915408687037, + "loss": 2.1287, + "step": 324295 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.00013634743221695083, + "loss": 2.1982, + "step": 324300 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013634571033461435, + "loss": 2.1352, + "step": 324305 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.00013634398843986151, + "loss": 1.9936, + "step": 324310 + }, + { + "epoch": 0.76, + "grad_norm": 2.671875, + "learning_rate": 0.0001363422665326929, + "loss": 2.0874, + "step": 324315 + }, + { + "epoch": 0.76, + "grad_norm": 2.53125, + "learning_rate": 0.00013634054461310915, + "loss": 2.062, + "step": 324320 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013633882268111077, + "loss": 2.2664, + "step": 324325 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013633710073669838, + "loss": 2.0214, + "step": 324330 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.00013633537877987264, + "loss": 2.1573, + "step": 324335 + }, + { + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 0.00013633365681063402, + "loss": 2.1872, + "step": 324340 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.00013633193482898316, + "loss": 1.9724, + "step": 324345 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013633021283492066, + "loss": 1.9975, + "step": 324350 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013632849082844715, + "loss": 2.0803, + "step": 324355 + }, + { + "epoch": 0.76, + "grad_norm": 2.484375, + "learning_rate": 0.0001363267688095631, + "loss": 1.8616, + "step": 324360 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.0001363250467782692, + "loss": 2.0324, + "step": 324365 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.000136323324734566, + "loss": 2.0652, + "step": 324370 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013632160267845408, + "loss": 2.0396, + "step": 324375 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.00013631988060993407, + "loss": 1.9979, + "step": 324380 + }, + { + "epoch": 0.76, + "grad_norm": 2.4375, + "learning_rate": 0.00013631815852900654, + "loss": 2.1029, + "step": 324385 + }, + { + "epoch": 0.76, + "grad_norm": 1.9609375, + "learning_rate": 0.000136316436435672, + "loss": 2.0567, + "step": 324390 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013631471432993118, + "loss": 1.804, + "step": 324395 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013631299221178458, + "loss": 1.923, + "step": 324400 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013631127008123279, + "loss": 2.0113, + "step": 324405 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013630954793827642, + "loss": 2.0941, + "step": 324410 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.00013630782578291604, + "loss": 1.943, + "step": 324415 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013630610361515223, + "loss": 1.9505, + "step": 324420 + }, + { + "epoch": 0.76, + "grad_norm": 2.6875, + "learning_rate": 0.00013630438143498564, + "loss": 2.1476, + "step": 324425 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.0001363026592424168, + "loss": 1.9975, + "step": 324430 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.00013630093703744629, + "loss": 2.126, + "step": 324435 + }, + { + "epoch": 0.76, + "grad_norm": 1.9453125, + "learning_rate": 0.00013629921482007472, + "loss": 2.0196, + "step": 324440 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.0001362974925903027, + "loss": 2.0846, + "step": 324445 + }, + { + "epoch": 0.76, + "grad_norm": 2.484375, + "learning_rate": 0.00013629577034813078, + "loss": 2.1047, + "step": 324450 + }, + { + "epoch": 0.76, + "grad_norm": 2.53125, + "learning_rate": 0.0001362940480935596, + "loss": 2.0496, + "step": 324455 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.0001362923258265897, + "loss": 2.1519, + "step": 324460 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.00013629060354722168, + "loss": 2.0998, + "step": 324465 + }, + { + "epoch": 0.76, + "grad_norm": 2.640625, + "learning_rate": 0.00013628888125545615, + "loss": 2.1906, + "step": 324470 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013628715895129366, + "loss": 2.068, + "step": 324475 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.00013628543663473478, + "loss": 2.0334, + "step": 324480 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.0001362837143057802, + "loss": 2.0255, + "step": 324485 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013628199196443042, + "loss": 1.9494, + "step": 324490 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013628026961068606, + "loss": 2.0271, + "step": 324495 + }, + { + "epoch": 0.76, + "grad_norm": 2.515625, + "learning_rate": 0.00013627854724454768, + "loss": 1.9079, + "step": 324500 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.0001362768248660159, + "loss": 2.0297, + "step": 324505 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.0001362751024750913, + "loss": 2.1798, + "step": 324510 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.0001362733800717745, + "loss": 2.174, + "step": 324515 + }, + { + "epoch": 0.76, + "grad_norm": 2.515625, + "learning_rate": 0.00013627165765606604, + "loss": 2.1409, + "step": 324520 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.00013626993522796647, + "loss": 1.9948, + "step": 324525 + }, + { + "epoch": 0.76, + "grad_norm": 2.046875, + "learning_rate": 0.0001362682127874765, + "loss": 2.2069, + "step": 324530 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.00013626649033459664, + "loss": 2.1142, + "step": 324535 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013626476786932746, + "loss": 2.042, + "step": 324540 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.0001362630453916696, + "loss": 2.0376, + "step": 324545 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.0001362613229016236, + "loss": 2.1757, + "step": 324550 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.0001362596003991901, + "loss": 2.0361, + "step": 324555 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013625787788436967, + "loss": 2.1195, + "step": 324560 + }, + { + "epoch": 0.76, + "grad_norm": 1.703125, + "learning_rate": 0.00013625615535716286, + "loss": 1.9081, + "step": 324565 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.00013625443281757032, + "loss": 2.2242, + "step": 324570 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.00013625271026559258, + "loss": 2.0999, + "step": 324575 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013625098770123027, + "loss": 2.07, + "step": 324580 + }, + { + "epoch": 0.76, + "grad_norm": 1.953125, + "learning_rate": 0.00013624926512448396, + "loss": 1.9275, + "step": 324585 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013624754253535425, + "loss": 2.0356, + "step": 324590 + }, + { + "epoch": 0.76, + "grad_norm": 2.078125, + "learning_rate": 0.00013624581993384172, + "loss": 2.2796, + "step": 324595 + }, + { + "epoch": 0.76, + "grad_norm": 1.8046875, + "learning_rate": 0.000136244097319947, + "loss": 2.0499, + "step": 324600 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013624237469367058, + "loss": 2.0366, + "step": 324605 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013624065205501313, + "loss": 2.0635, + "step": 324610 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013623892940397523, + "loss": 1.9449, + "step": 324615 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013623720674055742, + "loss": 1.8571, + "step": 324620 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013623548406476035, + "loss": 2.1099, + "step": 324625 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.0001362337613765846, + "loss": 1.8834, + "step": 324630 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.0001362320386760307, + "loss": 2.0047, + "step": 324635 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.0001362303159630993, + "loss": 2.051, + "step": 324640 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013622859323779093, + "loss": 2.0934, + "step": 324645 + }, + { + "epoch": 0.76, + "grad_norm": 2.421875, + "learning_rate": 0.00013622687050010628, + "loss": 2.0841, + "step": 324650 + }, + { + "epoch": 0.76, + "grad_norm": 2.59375, + "learning_rate": 0.00013622514775004586, + "loss": 2.1582, + "step": 324655 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.0001362234249876102, + "loss": 2.0591, + "step": 324660 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013622170221280002, + "loss": 1.9943, + "step": 324665 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.00013621997942561584, + "loss": 2.1328, + "step": 324670 + }, + { + "epoch": 0.76, + "grad_norm": 1.984375, + "learning_rate": 0.00013621825662605828, + "loss": 2.1331, + "step": 324675 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.0001362165338141279, + "loss": 2.1386, + "step": 324680 + }, + { + "epoch": 0.76, + "grad_norm": 3.40625, + "learning_rate": 0.00013621481098982524, + "loss": 1.9159, + "step": 324685 + }, + { + "epoch": 0.76, + "grad_norm": 2.5, + "learning_rate": 0.000136213088153151, + "loss": 2.2214, + "step": 324690 + }, + { + "epoch": 0.76, + "grad_norm": 1.890625, + "learning_rate": 0.00013621136530410572, + "loss": 2.156, + "step": 324695 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013620964244268996, + "loss": 2.0734, + "step": 324700 + }, + { + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 0.00013620791956890432, + "loss": 2.1835, + "step": 324705 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013620619668274942, + "loss": 2.0557, + "step": 324710 + }, + { + "epoch": 0.76, + "grad_norm": 1.8828125, + "learning_rate": 0.0001362044737842258, + "loss": 2.1614, + "step": 324715 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013620275087333409, + "loss": 2.1053, + "step": 324720 + }, + { + "epoch": 0.76, + "grad_norm": 1.9296875, + "learning_rate": 0.00013620102795007486, + "loss": 2.016, + "step": 324725 + }, + { + "epoch": 0.76, + "grad_norm": 1.5859375, + "learning_rate": 0.00013619930501444872, + "loss": 2.021, + "step": 324730 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013619758206645622, + "loss": 1.9448, + "step": 324735 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.00013619585910609798, + "loss": 2.0766, + "step": 324740 + }, + { + "epoch": 0.76, + "grad_norm": 1.984375, + "learning_rate": 0.00013619413613337456, + "loss": 1.977, + "step": 324745 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013619241314828656, + "loss": 2.0425, + "step": 324750 + }, + { + "epoch": 0.76, + "grad_norm": 2.53125, + "learning_rate": 0.0001361906901508346, + "loss": 1.9604, + "step": 324755 + }, + { + "epoch": 0.76, + "grad_norm": 2.578125, + "learning_rate": 0.00013618896714101927, + "loss": 2.0327, + "step": 324760 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013618724411884107, + "loss": 2.083, + "step": 324765 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.0001361855210843007, + "loss": 2.0697, + "step": 324770 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.0001361837980373987, + "loss": 2.1585, + "step": 324775 + }, + { + "epoch": 0.76, + "grad_norm": 1.875, + "learning_rate": 0.00013618207497813562, + "loss": 1.9741, + "step": 324780 + }, + { + "epoch": 0.76, + "grad_norm": 1.8515625, + "learning_rate": 0.0001361803519065121, + "loss": 2.1063, + "step": 324785 + }, + { + "epoch": 0.76, + "grad_norm": 2.296875, + "learning_rate": 0.00013617862882252875, + "loss": 2.1238, + "step": 324790 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.0001361769057261861, + "loss": 1.9914, + "step": 324795 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013617518261748474, + "loss": 1.8335, + "step": 324800 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.0001361734594964253, + "loss": 2.0747, + "step": 324805 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.00013617173636300836, + "loss": 1.9058, + "step": 324810 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.0001361700132172345, + "loss": 2.1615, + "step": 324815 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.00013616829005910427, + "loss": 2.2113, + "step": 324820 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013616656688861833, + "loss": 1.9144, + "step": 324825 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013616484370577723, + "loss": 1.9042, + "step": 324830 + }, + { + "epoch": 0.76, + "grad_norm": 2.125, + "learning_rate": 0.0001361631205105816, + "loss": 2.0292, + "step": 324835 + }, + { + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 0.00013616139730303196, + "loss": 2.2444, + "step": 324840 + }, + { + "epoch": 0.76, + "grad_norm": 2.640625, + "learning_rate": 0.0001361596740831289, + "loss": 2.1539, + "step": 324845 + }, + { + "epoch": 0.76, + "grad_norm": 2.25, + "learning_rate": 0.00013615795085087306, + "loss": 2.1957, + "step": 324850 + }, + { + "epoch": 0.76, + "grad_norm": 2.328125, + "learning_rate": 0.00013615622760626504, + "loss": 2.1332, + "step": 324855 + }, + { + "epoch": 0.76, + "grad_norm": 2.3125, + "learning_rate": 0.00013615450434930536, + "loss": 2.1179, + "step": 324860 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013615278107999466, + "loss": 1.9881, + "step": 324865 + }, + { + "epoch": 0.76, + "grad_norm": 2.515625, + "learning_rate": 0.0001361510577983335, + "loss": 1.9755, + "step": 324870 + }, + { + "epoch": 0.76, + "grad_norm": 2.625, + "learning_rate": 0.0001361493345043225, + "loss": 2.0998, + "step": 324875 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013614761119796223, + "loss": 2.1146, + "step": 324880 + }, + { + "epoch": 0.76, + "grad_norm": 2.234375, + "learning_rate": 0.00013614588787925328, + "loss": 2.145, + "step": 324885 + }, + { + "epoch": 0.76, + "grad_norm": 1.9765625, + "learning_rate": 0.00013614416454819624, + "loss": 2.0117, + "step": 324890 + }, + { + "epoch": 0.76, + "grad_norm": 2.1875, + "learning_rate": 0.00013614244120479168, + "loss": 2.0817, + "step": 324895 + }, + { + "epoch": 0.76, + "grad_norm": 2.390625, + "learning_rate": 0.00013614071784904023, + "loss": 2.078, + "step": 324900 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013613899448094242, + "loss": 1.9294, + "step": 324905 + }, + { + "epoch": 0.76, + "grad_norm": 1.9765625, + "learning_rate": 0.00013613727110049892, + "loss": 1.9885, + "step": 324910 + }, + { + "epoch": 0.76, + "grad_norm": 1.9765625, + "learning_rate": 0.00013613554770771025, + "loss": 1.8491, + "step": 324915 + }, + { + "epoch": 0.76, + "grad_norm": 2.890625, + "learning_rate": 0.00013613382430257702, + "loss": 1.9539, + "step": 324920 + }, + { + "epoch": 0.76, + "grad_norm": 2.0, + "learning_rate": 0.00013613210088509982, + "loss": 2.0472, + "step": 324925 + }, + { + "epoch": 0.76, + "grad_norm": 1.96875, + "learning_rate": 0.00013613037745527926, + "loss": 2.14, + "step": 324930 + }, + { + "epoch": 0.76, + "grad_norm": 2.203125, + "learning_rate": 0.00013612865401311586, + "loss": 1.9934, + "step": 324935 + }, + { + "epoch": 0.76, + "grad_norm": 2.5625, + "learning_rate": 0.0001361269305586103, + "loss": 2.0407, + "step": 324940 + }, + { + "epoch": 0.76, + "grad_norm": 1.953125, + "learning_rate": 0.00013612520709176313, + "loss": 2.0683, + "step": 324945 + }, + { + "epoch": 0.76, + "grad_norm": 2.46875, + "learning_rate": 0.0001361234836125749, + "loss": 2.0789, + "step": 324950 + }, + { + "epoch": 0.76, + "grad_norm": 1.8671875, + "learning_rate": 0.00013612176012104625, + "loss": 2.1221, + "step": 324955 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013612003661717776, + "loss": 2.1159, + "step": 324960 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013611831310097, + "loss": 2.1126, + "step": 324965 + }, + { + "epoch": 0.76, + "grad_norm": 2.28125, + "learning_rate": 0.00013611658957242358, + "loss": 2.097, + "step": 324970 + }, + { + "epoch": 0.76, + "grad_norm": 2.140625, + "learning_rate": 0.00013611486603153908, + "loss": 2.2772, + "step": 324975 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013611314247831703, + "loss": 2.0614, + "step": 324980 + }, + { + "epoch": 0.76, + "grad_norm": 1.7890625, + "learning_rate": 0.00013611141891275815, + "loss": 1.9271, + "step": 324985 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.00013610969533486294, + "loss": 2.1745, + "step": 324990 + }, + { + "epoch": 0.76, + "grad_norm": 2.640625, + "learning_rate": 0.000136107971744632, + "loss": 2.0511, + "step": 324995 + }, + { + "epoch": 0.76, + "grad_norm": 2.265625, + "learning_rate": 0.00013610624814206588, + "loss": 2.0689, + "step": 325000 + }, + { + "epoch": 0.76, + "grad_norm": 2.015625, + "learning_rate": 0.00013610452452716526, + "loss": 1.9004, + "step": 325005 + }, + { + "epoch": 0.76, + "grad_norm": 2.09375, + "learning_rate": 0.00013610280089993066, + "loss": 2.133, + "step": 325010 + }, + { + "epoch": 0.76, + "grad_norm": 1.9453125, + "learning_rate": 0.00013610107726036268, + "loss": 2.0598, + "step": 325015 + }, + { + "epoch": 0.76, + "grad_norm": 2.359375, + "learning_rate": 0.00013609935360846194, + "loss": 2.2012, + "step": 325020 + }, + { + "epoch": 0.76, + "grad_norm": 2.109375, + "learning_rate": 0.00013609762994422898, + "loss": 2.0288, + "step": 325025 + }, + { + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 0.0001360959062676644, + "loss": 1.9626, + "step": 325030 + }, + { + "epoch": 0.76, + "grad_norm": 2.828125, + "learning_rate": 0.00013609418257876888, + "loss": 2.0341, + "step": 325035 + }, + { + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 0.00013609245887754288, + "loss": 1.9689, + "step": 325040 + }, + { + "epoch": 0.76, + "grad_norm": 2.546875, + "learning_rate": 0.00013609073516398704, + "loss": 1.8944, + "step": 325045 + }, + { + "epoch": 0.76, + "grad_norm": 2.03125, + "learning_rate": 0.00013608901143810195, + "loss": 1.9932, + "step": 325050 + }, + { + "epoch": 0.76, + "grad_norm": 2.171875, + "learning_rate": 0.0001360872876998882, + "loss": 2.0129, + "step": 325055 + }, + { + "epoch": 0.76, + "grad_norm": 2.890625, + "learning_rate": 0.0001360855639493464, + "loss": 2.0407, + "step": 325060 + }, + { + "epoch": 0.76, + "grad_norm": 2.15625, + "learning_rate": 0.0001360838401864771, + "loss": 2.035, + "step": 325065 + }, + { + "epoch": 0.76, + "grad_norm": 2.21875, + "learning_rate": 0.00013608211641128089, + "loss": 2.041, + "step": 325070 + }, + { + "epoch": 0.77, + "grad_norm": 2.234375, + "learning_rate": 0.0001360803926237584, + "loss": 2.1204, + "step": 325075 + }, + { + "epoch": 0.77, + "grad_norm": 2.109375, + "learning_rate": 0.0001360786688239102, + "loss": 2.086, + "step": 325080 + }, + { + "epoch": 0.77, + "grad_norm": 2.234375, + "learning_rate": 0.00013607694501173684, + "loss": 2.2307, + "step": 325085 + }, + { + "epoch": 0.77, + "grad_norm": 2.0625, + "learning_rate": 0.00013607522118723895, + "loss": 2.0228, + "step": 325090 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013607349735041713, + "loss": 2.16, + "step": 325095 + }, + { + "epoch": 0.77, + "grad_norm": 2.53125, + "learning_rate": 0.0001360717735012719, + "loss": 1.9951, + "step": 325100 + }, + { + "epoch": 0.77, + "grad_norm": 1.7734375, + "learning_rate": 0.00013607004963980398, + "loss": 2.0687, + "step": 325105 + }, + { + "epoch": 0.77, + "grad_norm": 2.0625, + "learning_rate": 0.0001360683257660138, + "loss": 2.162, + "step": 325110 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.00013606660187990207, + "loss": 2.0448, + "step": 325115 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.00013606487798146932, + "loss": 1.9325, + "step": 325120 + }, + { + "epoch": 0.77, + "grad_norm": 2.109375, + "learning_rate": 0.00013606315407071616, + "loss": 1.9843, + "step": 325125 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.00013606143014764318, + "loss": 2.1809, + "step": 325130 + }, + { + "epoch": 0.77, + "grad_norm": 1.8203125, + "learning_rate": 0.00013605970621225096, + "loss": 2.0262, + "step": 325135 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.00013605798226454005, + "loss": 1.9935, + "step": 325140 + }, + { + "epoch": 0.77, + "grad_norm": 2.453125, + "learning_rate": 0.00013605625830451112, + "loss": 2.0215, + "step": 325145 + }, + { + "epoch": 0.77, + "grad_norm": 2.4375, + "learning_rate": 0.00013605453433216473, + "loss": 2.0972, + "step": 325150 + }, + { + "epoch": 0.77, + "grad_norm": 1.90625, + "learning_rate": 0.00013605281034750143, + "loss": 1.9392, + "step": 325155 + }, + { + "epoch": 0.77, + "grad_norm": 2.0, + "learning_rate": 0.00013605108635052183, + "loss": 2.0612, + "step": 325160 + }, + { + "epoch": 0.77, + "grad_norm": 2.0625, + "learning_rate": 0.00013604936234122653, + "loss": 1.9372, + "step": 325165 + }, + { + "epoch": 0.77, + "grad_norm": 2.234375, + "learning_rate": 0.00013604763831961614, + "loss": 2.1126, + "step": 325170 + }, + { + "epoch": 0.77, + "grad_norm": 1.9140625, + "learning_rate": 0.0001360459142856912, + "loss": 2.0888, + "step": 325175 + }, + { + "epoch": 0.77, + "grad_norm": 2.03125, + "learning_rate": 0.00013604419023945232, + "loss": 2.0958, + "step": 325180 + }, + { + "epoch": 0.77, + "grad_norm": 2.609375, + "learning_rate": 0.0001360424661809001, + "loss": 2.1048, + "step": 325185 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.0001360407421100351, + "loss": 2.0608, + "step": 325190 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.00013603901802685796, + "loss": 2.0154, + "step": 325195 + }, + { + "epoch": 0.77, + "grad_norm": 1.90625, + "learning_rate": 0.00013603729393136922, + "loss": 2.0365, + "step": 325200 + }, + { + "epoch": 0.77, + "grad_norm": 1.9921875, + "learning_rate": 0.0001360355698235695, + "loss": 2.1935, + "step": 325205 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.00013603384570345936, + "loss": 2.0316, + "step": 325210 + }, + { + "epoch": 0.77, + "grad_norm": 2.3125, + "learning_rate": 0.00013603212157103938, + "loss": 2.1128, + "step": 325215 + }, + { + "epoch": 0.77, + "grad_norm": 2.109375, + "learning_rate": 0.00013603039742631024, + "loss": 1.9318, + "step": 325220 + }, + { + "epoch": 0.77, + "grad_norm": 2.046875, + "learning_rate": 0.00013602867326927242, + "loss": 1.9107, + "step": 325225 + }, + { + "epoch": 0.77, + "grad_norm": 2.34375, + "learning_rate": 0.00013602694909992655, + "loss": 2.1539, + "step": 325230 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013602522491827323, + "loss": 1.9453, + "step": 325235 + }, + { + "epoch": 0.77, + "grad_norm": 1.8359375, + "learning_rate": 0.00013602350072431307, + "loss": 1.9413, + "step": 325240 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.0001360217765180466, + "loss": 2.0606, + "step": 325245 + }, + { + "epoch": 0.77, + "grad_norm": 2.765625, + "learning_rate": 0.00013602005229947442, + "loss": 2.034, + "step": 325250 + }, + { + "epoch": 0.77, + "grad_norm": 2.234375, + "learning_rate": 0.00013601832806859712, + "loss": 2.0464, + "step": 325255 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.00013601660382541538, + "loss": 2.0601, + "step": 325260 + }, + { + "epoch": 0.77, + "grad_norm": 2.328125, + "learning_rate": 0.00013601487956992967, + "loss": 2.1224, + "step": 325265 + }, + { + "epoch": 0.77, + "grad_norm": 2.359375, + "learning_rate": 0.00013601315530214063, + "loss": 2.0931, + "step": 325270 + }, + { + "epoch": 0.77, + "grad_norm": 2.359375, + "learning_rate": 0.00013601143102204882, + "loss": 2.1331, + "step": 325275 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.00013600970672965486, + "loss": 2.1507, + "step": 325280 + }, + { + "epoch": 0.77, + "grad_norm": 2.46875, + "learning_rate": 0.0001360079824249594, + "loss": 2.1304, + "step": 325285 + }, + { + "epoch": 0.77, + "grad_norm": 2.5, + "learning_rate": 0.00013600625810796288, + "loss": 2.1636, + "step": 325290 + }, + { + "epoch": 0.77, + "grad_norm": 2.328125, + "learning_rate": 0.000136004533778666, + "loss": 2.1241, + "step": 325295 + }, + { + "epoch": 0.77, + "grad_norm": 2.625, + "learning_rate": 0.0001360028094370693, + "loss": 2.2981, + "step": 325300 + }, + { + "epoch": 0.77, + "grad_norm": 2.34375, + "learning_rate": 0.0001360010850831734, + "loss": 2.1318, + "step": 325305 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.00013599936071697887, + "loss": 2.1049, + "step": 325310 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.00013599763633848633, + "loss": 2.0813, + "step": 325315 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.00013599591194769631, + "loss": 2.0511, + "step": 325320 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.00013599418754460944, + "loss": 2.1205, + "step": 325325 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.00013599246312922633, + "loss": 2.1824, + "step": 325330 + }, + { + "epoch": 0.77, + "grad_norm": 2.5, + "learning_rate": 0.00013599073870154753, + "loss": 2.029, + "step": 325335 + }, + { + "epoch": 0.77, + "grad_norm": 1.9140625, + "learning_rate": 0.00013598901426157363, + "loss": 2.1176, + "step": 325340 + }, + { + "epoch": 0.77, + "grad_norm": 1.921875, + "learning_rate": 0.00013598728980930523, + "loss": 1.9683, + "step": 325345 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.00013598556534474292, + "loss": 1.9667, + "step": 325350 + }, + { + "epoch": 0.77, + "grad_norm": 2.359375, + "learning_rate": 0.00013598384086788728, + "loss": 2.1446, + "step": 325355 + }, + { + "epoch": 0.77, + "grad_norm": 2.46875, + "learning_rate": 0.00013598211637873893, + "loss": 2.0304, + "step": 325360 + }, + { + "epoch": 0.77, + "grad_norm": 2.234375, + "learning_rate": 0.00013598039187729844, + "loss": 1.9738, + "step": 325365 + }, + { + "epoch": 0.77, + "grad_norm": 2.078125, + "learning_rate": 0.00013597866736356638, + "loss": 2.1528, + "step": 325370 + }, + { + "epoch": 0.77, + "grad_norm": 2.171875, + "learning_rate": 0.00013597694283754334, + "loss": 2.035, + "step": 325375 + }, + { + "epoch": 0.77, + "grad_norm": 2.71875, + "learning_rate": 0.00013597521829922997, + "loss": 2.1267, + "step": 325380 + }, + { + "epoch": 0.77, + "grad_norm": 1.9140625, + "learning_rate": 0.00013597349374862676, + "loss": 2.1688, + "step": 325385 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.00013597176918573437, + "loss": 1.9544, + "step": 325390 + }, + { + "epoch": 0.77, + "grad_norm": 2.140625, + "learning_rate": 0.00013597004461055338, + "loss": 1.9744, + "step": 325395 + }, + { + "epoch": 0.77, + "grad_norm": 2.5625, + "learning_rate": 0.0001359683200230844, + "loss": 2.0239, + "step": 325400 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.00013596659542332794, + "loss": 1.9878, + "step": 325405 + }, + { + "epoch": 0.77, + "grad_norm": 2.046875, + "learning_rate": 0.00013596487081128463, + "loss": 2.0339, + "step": 325410 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.0001359631461869551, + "loss": 1.9417, + "step": 325415 + }, + { + "epoch": 0.77, + "grad_norm": 2.515625, + "learning_rate": 0.00013596142155033992, + "loss": 1.9991, + "step": 325420 + }, + { + "epoch": 0.77, + "grad_norm": 1.9296875, + "learning_rate": 0.00013595969690143964, + "loss": 1.9436, + "step": 325425 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.00013595797224025488, + "loss": 2.1122, + "step": 325430 + }, + { + "epoch": 0.77, + "grad_norm": 1.703125, + "learning_rate": 0.0001359562475667862, + "loss": 2.0047, + "step": 325435 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013595452288103424, + "loss": 2.0069, + "step": 325440 + }, + { + "epoch": 0.77, + "grad_norm": 2.296875, + "learning_rate": 0.0001359527981829996, + "loss": 1.8845, + "step": 325445 + }, + { + "epoch": 0.77, + "grad_norm": 2.4375, + "learning_rate": 0.00013595107347268279, + "loss": 2.0964, + "step": 325450 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013594934875008446, + "loss": 2.1278, + "step": 325455 + }, + { + "epoch": 0.77, + "grad_norm": 2.8125, + "learning_rate": 0.00013594762401520515, + "loss": 2.1096, + "step": 325460 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.00013594589926804547, + "loss": 2.0487, + "step": 325465 + }, + { + "epoch": 0.77, + "grad_norm": 1.9921875, + "learning_rate": 0.0001359441745086061, + "loss": 2.2051, + "step": 325470 + }, + { + "epoch": 0.77, + "grad_norm": 3.078125, + "learning_rate": 0.00013594244973688747, + "loss": 1.8307, + "step": 325475 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.00013594072495289026, + "loss": 1.9932, + "step": 325480 + }, + { + "epoch": 0.77, + "grad_norm": 2.03125, + "learning_rate": 0.00013593900015661508, + "loss": 1.9379, + "step": 325485 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.00013593727534806245, + "loss": 2.0221, + "step": 325490 + }, + { + "epoch": 0.77, + "grad_norm": 2.0, + "learning_rate": 0.00013593555052723302, + "loss": 1.8123, + "step": 325495 + }, + { + "epoch": 0.77, + "grad_norm": 1.9140625, + "learning_rate": 0.00013593382569412733, + "loss": 2.002, + "step": 325500 + }, + { + "epoch": 0.77, + "grad_norm": 2.296875, + "learning_rate": 0.00013593210084874598, + "loss": 2.0256, + "step": 325505 + }, + { + "epoch": 0.77, + "grad_norm": 2.234375, + "learning_rate": 0.00013593037599108962, + "loss": 2.0992, + "step": 325510 + }, + { + "epoch": 0.77, + "grad_norm": 2.4375, + "learning_rate": 0.00013592865112115878, + "loss": 2.0432, + "step": 325515 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.00013592692623895405, + "loss": 2.0845, + "step": 325520 + }, + { + "epoch": 0.77, + "grad_norm": 2.328125, + "learning_rate": 0.00013592520134447601, + "loss": 2.1279, + "step": 325525 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.0001359234764377253, + "loss": 2.1934, + "step": 325530 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.00013592175151870247, + "loss": 2.0866, + "step": 325535 + }, + { + "epoch": 0.77, + "grad_norm": 2.640625, + "learning_rate": 0.00013592002658740815, + "loss": 2.0352, + "step": 325540 + }, + { + "epoch": 0.77, + "grad_norm": 1.9765625, + "learning_rate": 0.00013591830164384283, + "loss": 2.1174, + "step": 325545 + }, + { + "epoch": 0.77, + "grad_norm": 2.40625, + "learning_rate": 0.00013591657668800722, + "loss": 2.0489, + "step": 325550 + }, + { + "epoch": 0.77, + "grad_norm": 1.75, + "learning_rate": 0.00013591485171990183, + "loss": 1.9433, + "step": 325555 + }, + { + "epoch": 0.77, + "grad_norm": 2.140625, + "learning_rate": 0.0001359131267395273, + "loss": 1.9811, + "step": 325560 + }, + { + "epoch": 0.77, + "grad_norm": 2.125, + "learning_rate": 0.0001359114017468842, + "loss": 2.1458, + "step": 325565 + }, + { + "epoch": 0.77, + "grad_norm": 2.171875, + "learning_rate": 0.00013590967674197306, + "loss": 2.0828, + "step": 325570 + }, + { + "epoch": 0.77, + "grad_norm": 1.890625, + "learning_rate": 0.00013590795172479454, + "loss": 2.1474, + "step": 325575 + }, + { + "epoch": 0.77, + "grad_norm": 2.109375, + "learning_rate": 0.00013590622669534924, + "loss": 1.969, + "step": 325580 + }, + { + "epoch": 0.77, + "grad_norm": 2.34375, + "learning_rate": 0.00013590450165363772, + "loss": 2.0258, + "step": 325585 + }, + { + "epoch": 0.77, + "grad_norm": 2.5, + "learning_rate": 0.00013590277659966056, + "loss": 2.0823, + "step": 325590 + }, + { + "epoch": 0.77, + "grad_norm": 2.375, + "learning_rate": 0.00013590105153341833, + "loss": 1.9534, + "step": 325595 + }, + { + "epoch": 0.77, + "grad_norm": 2.578125, + "learning_rate": 0.00013589932645491172, + "loss": 2.0445, + "step": 325600 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.0001358976013641412, + "loss": 1.9743, + "step": 325605 + }, + { + "epoch": 0.77, + "grad_norm": 1.9921875, + "learning_rate": 0.00013589587626110743, + "loss": 2.3149, + "step": 325610 + }, + { + "epoch": 0.77, + "grad_norm": 1.9375, + "learning_rate": 0.00013589415114581097, + "loss": 1.965, + "step": 325615 + }, + { + "epoch": 0.77, + "grad_norm": 1.96875, + "learning_rate": 0.0001358924260182524, + "loss": 2.1743, + "step": 325620 + }, + { + "epoch": 0.77, + "grad_norm": 2.234375, + "learning_rate": 0.00013589070087843233, + "loss": 2.0208, + "step": 325625 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.00013588897572635138, + "loss": 2.0415, + "step": 325630 + }, + { + "epoch": 0.77, + "grad_norm": 2.171875, + "learning_rate": 0.0001358872505620101, + "loss": 2.0664, + "step": 325635 + }, + { + "epoch": 0.77, + "grad_norm": 1.8984375, + "learning_rate": 0.00013588552538540904, + "loss": 2.0945, + "step": 325640 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013588380019654886, + "loss": 2.1762, + "step": 325645 + }, + { + "epoch": 0.77, + "grad_norm": 2.3125, + "learning_rate": 0.0001358820749954301, + "loss": 2.0621, + "step": 325650 + }, + { + "epoch": 0.77, + "grad_norm": 2.578125, + "learning_rate": 0.00013588034978205343, + "loss": 1.9372, + "step": 325655 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.00013587862455641933, + "loss": 2.0106, + "step": 325660 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.00013587689931852844, + "loss": 2.0545, + "step": 325665 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.00013587517406838138, + "loss": 2.0098, + "step": 325670 + }, + { + "epoch": 0.77, + "grad_norm": 2.359375, + "learning_rate": 0.0001358734488059787, + "loss": 2.198, + "step": 325675 + }, + { + "epoch": 0.77, + "grad_norm": 2.125, + "learning_rate": 0.00013587172353132098, + "loss": 2.0555, + "step": 325680 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013586999824440884, + "loss": 2.0742, + "step": 325685 + }, + { + "epoch": 0.77, + "grad_norm": 2.65625, + "learning_rate": 0.00013586827294524286, + "loss": 2.1204, + "step": 325690 + }, + { + "epoch": 0.77, + "grad_norm": 1.8203125, + "learning_rate": 0.00013586654763382368, + "loss": 1.8114, + "step": 325695 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.00013586482231015177, + "loss": 1.8934, + "step": 325700 + }, + { + "epoch": 0.77, + "grad_norm": 2.125, + "learning_rate": 0.00013586309697422778, + "loss": 2.17, + "step": 325705 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.00013586137162605234, + "loss": 2.0193, + "step": 325710 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.000135859646265626, + "loss": 1.9272, + "step": 325715 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.00013585792089294935, + "loss": 2.0571, + "step": 325720 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.000135856195508023, + "loss": 2.0269, + "step": 325725 + }, + { + "epoch": 0.77, + "grad_norm": 2.453125, + "learning_rate": 0.00013585447011084747, + "loss": 1.8911, + "step": 325730 + }, + { + "epoch": 0.77, + "grad_norm": 2.109375, + "learning_rate": 0.00013585274470142344, + "loss": 2.1408, + "step": 325735 + }, + { + "epoch": 0.77, + "grad_norm": 2.0625, + "learning_rate": 0.00013585101927975147, + "loss": 2.0325, + "step": 325740 + }, + { + "epoch": 0.77, + "grad_norm": 2.328125, + "learning_rate": 0.00013584929384583214, + "loss": 1.9142, + "step": 325745 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.00013584756839966604, + "loss": 1.9604, + "step": 325750 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.00013584584294125374, + "loss": 2.0169, + "step": 325755 + }, + { + "epoch": 0.77, + "grad_norm": 1.8828125, + "learning_rate": 0.0001358441174705959, + "loss": 2.1834, + "step": 325760 + }, + { + "epoch": 0.77, + "grad_norm": 2.578125, + "learning_rate": 0.000135842391987693, + "loss": 2.0485, + "step": 325765 + }, + { + "epoch": 0.77, + "grad_norm": 2.5, + "learning_rate": 0.00013584066649254572, + "loss": 2.1201, + "step": 325770 + }, + { + "epoch": 0.77, + "grad_norm": 1.90625, + "learning_rate": 0.0001358389409851546, + "loss": 1.9832, + "step": 325775 + }, + { + "epoch": 0.77, + "grad_norm": 2.453125, + "learning_rate": 0.00013583721546552027, + "loss": 1.909, + "step": 325780 + }, + { + "epoch": 0.77, + "grad_norm": 3.21875, + "learning_rate": 0.00013583548993364327, + "loss": 1.9838, + "step": 325785 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.00013583376438952425, + "loss": 2.1111, + "step": 325790 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013583203883316375, + "loss": 2.0942, + "step": 325795 + }, + { + "epoch": 0.77, + "grad_norm": 2.640625, + "learning_rate": 0.00013583031326456238, + "loss": 2.1418, + "step": 325800 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.0001358285876837207, + "loss": 1.9898, + "step": 325805 + }, + { + "epoch": 0.77, + "grad_norm": 1.75, + "learning_rate": 0.00013582686209063935, + "loss": 1.7625, + "step": 325810 + }, + { + "epoch": 0.77, + "grad_norm": 1.9375, + "learning_rate": 0.0001358251364853189, + "loss": 2.0496, + "step": 325815 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.0001358234108677599, + "loss": 2.0346, + "step": 325820 + }, + { + "epoch": 0.77, + "grad_norm": 2.5625, + "learning_rate": 0.00013582168523796302, + "loss": 2.147, + "step": 325825 + }, + { + "epoch": 0.77, + "grad_norm": 1.8984375, + "learning_rate": 0.00013581995959592878, + "loss": 2.2766, + "step": 325830 + }, + { + "epoch": 0.77, + "grad_norm": 2.484375, + "learning_rate": 0.00013581823394165777, + "loss": 2.0651, + "step": 325835 + }, + { + "epoch": 0.77, + "grad_norm": 2.453125, + "learning_rate": 0.00013581650827515062, + "loss": 2.1317, + "step": 325840 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.00013581478259640792, + "loss": 2.2203, + "step": 325845 + }, + { + "epoch": 0.77, + "grad_norm": 2.40625, + "learning_rate": 0.0001358130569054302, + "loss": 2.019, + "step": 325850 + }, + { + "epoch": 0.77, + "grad_norm": 2.140625, + "learning_rate": 0.00013581133120221814, + "loss": 2.1446, + "step": 325855 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.00013580960548677227, + "loss": 2.037, + "step": 325860 + }, + { + "epoch": 0.77, + "grad_norm": 1.5703125, + "learning_rate": 0.00013580787975909315, + "loss": 1.9365, + "step": 325865 + }, + { + "epoch": 0.77, + "grad_norm": 1.9765625, + "learning_rate": 0.00013580615401918142, + "loss": 1.8891, + "step": 325870 + }, + { + "epoch": 0.77, + "grad_norm": 2.0625, + "learning_rate": 0.0001358044282670377, + "loss": 2.0926, + "step": 325875 + }, + { + "epoch": 0.77, + "grad_norm": 2.4375, + "learning_rate": 0.00013580270250266249, + "loss": 1.9898, + "step": 325880 + }, + { + "epoch": 0.77, + "grad_norm": 2.40625, + "learning_rate": 0.00013580097672605645, + "loss": 1.926, + "step": 325885 + }, + { + "epoch": 0.77, + "grad_norm": 2.0625, + "learning_rate": 0.00013579925093722012, + "loss": 1.9245, + "step": 325890 + }, + { + "epoch": 0.77, + "grad_norm": 2.625, + "learning_rate": 0.00013579752513615415, + "loss": 2.0167, + "step": 325895 + }, + { + "epoch": 0.77, + "grad_norm": 2.125, + "learning_rate": 0.00013579579932285912, + "loss": 1.9888, + "step": 325900 + }, + { + "epoch": 0.77, + "grad_norm": 2.359375, + "learning_rate": 0.00013579407349733556, + "loss": 1.9222, + "step": 325905 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.0001357923476595841, + "loss": 2.146, + "step": 325910 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.00013579062180960532, + "loss": 2.1543, + "step": 325915 + }, + { + "epoch": 0.77, + "grad_norm": 2.03125, + "learning_rate": 0.00013578889594739982, + "loss": 2.0618, + "step": 325920 + }, + { + "epoch": 0.77, + "grad_norm": 2.59375, + "learning_rate": 0.00013578717007296817, + "loss": 2.1158, + "step": 325925 + }, + { + "epoch": 0.77, + "grad_norm": 2.578125, + "learning_rate": 0.00013578544418631097, + "loss": 2.1245, + "step": 325930 + }, + { + "epoch": 0.77, + "grad_norm": 2.234375, + "learning_rate": 0.00013578371828742883, + "loss": 2.1881, + "step": 325935 + }, + { + "epoch": 0.77, + "grad_norm": 2.453125, + "learning_rate": 0.00013578199237632233, + "loss": 2.0515, + "step": 325940 + }, + { + "epoch": 0.77, + "grad_norm": 2.359375, + "learning_rate": 0.00013578026645299205, + "loss": 1.9292, + "step": 325945 + }, + { + "epoch": 0.77, + "grad_norm": 2.578125, + "learning_rate": 0.00013577854051743853, + "loss": 2.22, + "step": 325950 + }, + { + "epoch": 0.77, + "grad_norm": 2.0625, + "learning_rate": 0.00013577681456966248, + "loss": 2.2213, + "step": 325955 + }, + { + "epoch": 0.77, + "grad_norm": 2.515625, + "learning_rate": 0.0001357750886096644, + "loss": 2.0271, + "step": 325960 + }, + { + "epoch": 0.77, + "grad_norm": 1.9140625, + "learning_rate": 0.0001357733626374449, + "loss": 1.9872, + "step": 325965 + }, + { + "epoch": 0.77, + "grad_norm": 2.34375, + "learning_rate": 0.00013577163665300456, + "loss": 2.226, + "step": 325970 + }, + { + "epoch": 0.77, + "grad_norm": 2.359375, + "learning_rate": 0.000135769910656344, + "loss": 2.0048, + "step": 325975 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.00013576818464746377, + "loss": 2.0416, + "step": 325980 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.0001357664586263645, + "loss": 2.157, + "step": 325985 + }, + { + "epoch": 0.77, + "grad_norm": 2.3125, + "learning_rate": 0.00013576473259304677, + "loss": 2.0697, + "step": 325990 + }, + { + "epoch": 0.77, + "grad_norm": 2.34375, + "learning_rate": 0.00013576300654751113, + "loss": 2.1463, + "step": 325995 + }, + { + "epoch": 0.77, + "grad_norm": 2.140625, + "learning_rate": 0.0001357612804897582, + "loss": 2.1213, + "step": 326000 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.00013575955441978856, + "loss": 2.0619, + "step": 326005 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.00013575782833760282, + "loss": 2.0394, + "step": 326010 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.00013575610224320157, + "loss": 2.0808, + "step": 326015 + }, + { + "epoch": 0.77, + "grad_norm": 2.234375, + "learning_rate": 0.00013575437613658537, + "loss": 2.0749, + "step": 326020 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.00013575265001775482, + "loss": 2.1514, + "step": 326025 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.0001357509238867105, + "loss": 2.1439, + "step": 326030 + }, + { + "epoch": 0.77, + "grad_norm": 1.953125, + "learning_rate": 0.0001357491977434531, + "loss": 2.0116, + "step": 326035 + }, + { + "epoch": 0.77, + "grad_norm": 2.671875, + "learning_rate": 0.00013574747158798305, + "loss": 2.2415, + "step": 326040 + }, + { + "epoch": 0.77, + "grad_norm": 2.46875, + "learning_rate": 0.00013574574542030106, + "loss": 1.9819, + "step": 326045 + }, + { + "epoch": 0.77, + "grad_norm": 2.671875, + "learning_rate": 0.00013574401924040761, + "loss": 1.9423, + "step": 326050 + }, + { + "epoch": 0.77, + "grad_norm": 2.3125, + "learning_rate": 0.00013574229304830343, + "loss": 2.0538, + "step": 326055 + }, + { + "epoch": 0.77, + "grad_norm": 2.140625, + "learning_rate": 0.00013574056684398898, + "loss": 1.8603, + "step": 326060 + }, + { + "epoch": 0.77, + "grad_norm": 2.109375, + "learning_rate": 0.00013573884062746493, + "loss": 2.0951, + "step": 326065 + }, + { + "epoch": 0.77, + "grad_norm": 1.765625, + "learning_rate": 0.0001357371143987318, + "loss": 1.9718, + "step": 326070 + }, + { + "epoch": 0.77, + "grad_norm": 2.3125, + "learning_rate": 0.0001357353881577903, + "loss": 2.1041, + "step": 326075 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.0001357336619046409, + "loss": 1.9978, + "step": 326080 + }, + { + "epoch": 0.77, + "grad_norm": 2.34375, + "learning_rate": 0.00013573193563928425, + "loss": 2.0197, + "step": 326085 + }, + { + "epoch": 0.77, + "grad_norm": 2.3125, + "learning_rate": 0.0001357302093617209, + "loss": 2.0282, + "step": 326090 + }, + { + "epoch": 0.77, + "grad_norm": 2.171875, + "learning_rate": 0.00013572848307195147, + "loss": 2.1434, + "step": 326095 + }, + { + "epoch": 0.77, + "grad_norm": 2.59375, + "learning_rate": 0.00013572675676997654, + "loss": 2.0226, + "step": 326100 + }, + { + "epoch": 0.77, + "grad_norm": 2.03125, + "learning_rate": 0.0001357250304557967, + "loss": 2.0761, + "step": 326105 + }, + { + "epoch": 0.77, + "grad_norm": 1.703125, + "learning_rate": 0.00013572330412941255, + "loss": 1.8581, + "step": 326110 + }, + { + "epoch": 0.77, + "grad_norm": 2.078125, + "learning_rate": 0.00013572157779082468, + "loss": 2.2203, + "step": 326115 + }, + { + "epoch": 0.77, + "grad_norm": 2.140625, + "learning_rate": 0.00013571985144003365, + "loss": 2.1146, + "step": 326120 + }, + { + "epoch": 0.77, + "grad_norm": 2.171875, + "learning_rate": 0.00013571812507704012, + "loss": 1.9125, + "step": 326125 + }, + { + "epoch": 0.77, + "grad_norm": 2.140625, + "learning_rate": 0.00013571639870184457, + "loss": 1.9834, + "step": 326130 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.00013571467231444765, + "loss": 2.078, + "step": 326135 + }, + { + "epoch": 0.77, + "grad_norm": 2.328125, + "learning_rate": 0.00013571294591484997, + "loss": 2.1256, + "step": 326140 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.00013571121950305213, + "loss": 2.0974, + "step": 326145 + }, + { + "epoch": 0.77, + "grad_norm": 2.0, + "learning_rate": 0.00013570949307905466, + "loss": 1.968, + "step": 326150 + }, + { + "epoch": 0.77, + "grad_norm": 2.34375, + "learning_rate": 0.0001357077666428582, + "loss": 1.9582, + "step": 326155 + }, + { + "epoch": 0.77, + "grad_norm": 2.296875, + "learning_rate": 0.00013570604019446327, + "loss": 2.0532, + "step": 326160 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.00013570431373387053, + "loss": 2.0746, + "step": 326165 + }, + { + "epoch": 0.77, + "grad_norm": 2.046875, + "learning_rate": 0.0001357025872610806, + "loss": 2.0943, + "step": 326170 + }, + { + "epoch": 0.77, + "grad_norm": 1.796875, + "learning_rate": 0.00013570086077609398, + "loss": 2.1035, + "step": 326175 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.0001356991342789113, + "loss": 2.2169, + "step": 326180 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.00013569740776953312, + "loss": 1.7846, + "step": 326185 + }, + { + "epoch": 0.77, + "grad_norm": 2.109375, + "learning_rate": 0.0001356956812479601, + "loss": 2.0153, + "step": 326190 + }, + { + "epoch": 0.77, + "grad_norm": 2.4375, + "learning_rate": 0.00013569395471419278, + "loss": 2.0075, + "step": 326195 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.00013569222816823176, + "loss": 1.9103, + "step": 326200 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.00013569050161007758, + "loss": 2.014, + "step": 326205 + }, + { + "epoch": 0.77, + "grad_norm": 2.328125, + "learning_rate": 0.00013568877503973093, + "loss": 2.0915, + "step": 326210 + }, + { + "epoch": 0.77, + "grad_norm": 2.453125, + "learning_rate": 0.00013568704845719234, + "loss": 1.9827, + "step": 326215 + }, + { + "epoch": 0.77, + "grad_norm": 2.171875, + "learning_rate": 0.00013568532186246238, + "loss": 1.9526, + "step": 326220 + }, + { + "epoch": 0.77, + "grad_norm": 2.40625, + "learning_rate": 0.00013568359525554168, + "loss": 1.9847, + "step": 326225 + }, + { + "epoch": 0.77, + "grad_norm": 1.8046875, + "learning_rate": 0.00013568186863643081, + "loss": 2.1087, + "step": 326230 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.0001356801420051304, + "loss": 2.212, + "step": 326235 + }, + { + "epoch": 0.77, + "grad_norm": 2.046875, + "learning_rate": 0.000135678415361641, + "loss": 2.0268, + "step": 326240 + }, + { + "epoch": 0.77, + "grad_norm": 2.765625, + "learning_rate": 0.00013567668870596318, + "loss": 2.0258, + "step": 326245 + }, + { + "epoch": 0.77, + "grad_norm": 2.40625, + "learning_rate": 0.00013567496203809758, + "loss": 2.0989, + "step": 326250 + }, + { + "epoch": 0.77, + "grad_norm": 2.546875, + "learning_rate": 0.0001356732353580447, + "loss": 1.9757, + "step": 326255 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.00013567150866580527, + "loss": 2.0783, + "step": 326260 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.0001356697819613798, + "loss": 1.9894, + "step": 326265 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.00013566805524476886, + "loss": 1.8124, + "step": 326270 + }, + { + "epoch": 0.77, + "grad_norm": 2.53125, + "learning_rate": 0.00013566632851597308, + "loss": 1.9219, + "step": 326275 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013566460177499303, + "loss": 2.2138, + "step": 326280 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.00013566287502182932, + "loss": 1.9694, + "step": 326285 + }, + { + "epoch": 0.77, + "grad_norm": 4.0, + "learning_rate": 0.0001356611482564825, + "loss": 2.036, + "step": 326290 + }, + { + "epoch": 0.77, + "grad_norm": 2.59375, + "learning_rate": 0.0001356594214789532, + "loss": 1.9612, + "step": 326295 + }, + { + "epoch": 0.77, + "grad_norm": 1.96875, + "learning_rate": 0.000135657694689242, + "loss": 2.1318, + "step": 326300 + }, + { + "epoch": 0.77, + "grad_norm": 2.59375, + "learning_rate": 0.0001356559678873495, + "loss": 2.0029, + "step": 326305 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.00013565424107327622, + "loss": 2.1174, + "step": 326310 + }, + { + "epoch": 0.77, + "grad_norm": 2.078125, + "learning_rate": 0.00013565251424702286, + "loss": 2.0528, + "step": 326315 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.0001356507874085899, + "loss": 2.0921, + "step": 326320 + }, + { + "epoch": 0.77, + "grad_norm": 2.53125, + "learning_rate": 0.000135649060557978, + "loss": 2.1297, + "step": 326325 + }, + { + "epoch": 0.77, + "grad_norm": 1.9765625, + "learning_rate": 0.00013564733369518777, + "loss": 2.0741, + "step": 326330 + }, + { + "epoch": 0.77, + "grad_norm": 2.5, + "learning_rate": 0.00013564560682021974, + "loss": 1.973, + "step": 326335 + }, + { + "epoch": 0.77, + "grad_norm": 1.9609375, + "learning_rate": 0.00013564387993307455, + "loss": 1.8746, + "step": 326340 + }, + { + "epoch": 0.77, + "grad_norm": 2.3125, + "learning_rate": 0.00013564215303375272, + "loss": 1.8854, + "step": 326345 + }, + { + "epoch": 0.77, + "grad_norm": 1.9609375, + "learning_rate": 0.0001356404261222549, + "loss": 2.0413, + "step": 326350 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.00013563869919858167, + "loss": 2.086, + "step": 326355 + }, + { + "epoch": 0.77, + "grad_norm": 2.328125, + "learning_rate": 0.0001356369722627336, + "loss": 1.9946, + "step": 326360 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.00013563524531471133, + "loss": 1.9343, + "step": 326365 + }, + { + "epoch": 0.77, + "grad_norm": 2.3125, + "learning_rate": 0.0001356335183545154, + "loss": 2.195, + "step": 326370 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.0001356317913821464, + "loss": 1.9983, + "step": 326375 + }, + { + "epoch": 0.77, + "grad_norm": 2.484375, + "learning_rate": 0.00013563006439760495, + "loss": 1.9688, + "step": 326380 + }, + { + "epoch": 0.77, + "grad_norm": 2.3125, + "learning_rate": 0.0001356283374008916, + "loss": 2.0468, + "step": 326385 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.00013562661039200698, + "loss": 2.0047, + "step": 326390 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.00013562488337095165, + "loss": 2.12, + "step": 326395 + }, + { + "epoch": 0.77, + "grad_norm": 2.0, + "learning_rate": 0.00013562315633772624, + "loss": 2.257, + "step": 326400 + }, + { + "epoch": 0.77, + "grad_norm": 2.4375, + "learning_rate": 0.00013562142929233128, + "loss": 2.2096, + "step": 326405 + }, + { + "epoch": 0.77, + "grad_norm": 2.984375, + "learning_rate": 0.00013561970223476742, + "loss": 2.1611, + "step": 326410 + }, + { + "epoch": 0.77, + "grad_norm": 2.328125, + "learning_rate": 0.00013561797516503521, + "loss": 2.1214, + "step": 326415 + }, + { + "epoch": 0.77, + "grad_norm": 1.984375, + "learning_rate": 0.00013561624808313528, + "loss": 2.0844, + "step": 326420 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.00013561452098906815, + "loss": 1.957, + "step": 326425 + }, + { + "epoch": 0.77, + "grad_norm": 2.125, + "learning_rate": 0.0001356127938828345, + "loss": 1.9685, + "step": 326430 + }, + { + "epoch": 0.77, + "grad_norm": 2.140625, + "learning_rate": 0.00013561106676443485, + "loss": 2.164, + "step": 326435 + }, + { + "epoch": 0.77, + "grad_norm": 2.171875, + "learning_rate": 0.0001356093396338698, + "loss": 1.9371, + "step": 326440 + }, + { + "epoch": 0.77, + "grad_norm": 2.46875, + "learning_rate": 0.00013560761249114, + "loss": 1.8786, + "step": 326445 + }, + { + "epoch": 0.77, + "grad_norm": 2.390625, + "learning_rate": 0.00013560588533624595, + "loss": 2.2007, + "step": 326450 + }, + { + "epoch": 0.77, + "grad_norm": 2.296875, + "learning_rate": 0.00013560415816918828, + "loss": 2.0784, + "step": 326455 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013560243098996759, + "loss": 2.1259, + "step": 326460 + }, + { + "epoch": 0.77, + "grad_norm": 1.859375, + "learning_rate": 0.0001356007037985845, + "loss": 2.2063, + "step": 326465 + }, + { + "epoch": 0.77, + "grad_norm": 2.40625, + "learning_rate": 0.00013559897659503952, + "loss": 2.1841, + "step": 326470 + }, + { + "epoch": 0.77, + "grad_norm": 2.078125, + "learning_rate": 0.00013559724937933331, + "loss": 1.9976, + "step": 326475 + }, + { + "epoch": 0.77, + "grad_norm": 2.0625, + "learning_rate": 0.0001355955221514664, + "loss": 1.9569, + "step": 326480 + }, + { + "epoch": 0.77, + "grad_norm": 1.84375, + "learning_rate": 0.00013559379491143948, + "loss": 2.1132, + "step": 326485 + }, + { + "epoch": 0.77, + "grad_norm": 1.96875, + "learning_rate": 0.00013559206765925304, + "loss": 1.8713, + "step": 326490 + }, + { + "epoch": 0.77, + "grad_norm": 2.40625, + "learning_rate": 0.00013559034039490768, + "loss": 2.1139, + "step": 326495 + }, + { + "epoch": 0.77, + "grad_norm": 2.046875, + "learning_rate": 0.00013558861311840402, + "loss": 2.0727, + "step": 326500 + }, + { + "epoch": 0.77, + "grad_norm": 2.453125, + "learning_rate": 0.00013558688582974266, + "loss": 1.8911, + "step": 326505 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.00013558515852892417, + "loss": 2.2295, + "step": 326510 + }, + { + "epoch": 0.77, + "grad_norm": 2.0625, + "learning_rate": 0.00013558343121594914, + "loss": 2.0131, + "step": 326515 + }, + { + "epoch": 0.77, + "grad_norm": 2.34375, + "learning_rate": 0.00013558170389081818, + "loss": 1.8346, + "step": 326520 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.00013557997655353184, + "loss": 2.1708, + "step": 326525 + }, + { + "epoch": 0.77, + "grad_norm": 2.375, + "learning_rate": 0.00013557824920409076, + "loss": 2.2608, + "step": 326530 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.0001355765218424955, + "loss": 2.0374, + "step": 326535 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.00013557479446874664, + "loss": 2.1104, + "step": 326540 + }, + { + "epoch": 0.77, + "grad_norm": 2.03125, + "learning_rate": 0.00013557306708284478, + "loss": 2.003, + "step": 326545 + }, + { + "epoch": 0.77, + "grad_norm": 2.34375, + "learning_rate": 0.00013557133968479054, + "loss": 2.0557, + "step": 326550 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013556961227458446, + "loss": 1.9983, + "step": 326555 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.00013556788485222717, + "loss": 1.9475, + "step": 326560 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.00013556615741771926, + "loss": 2.0597, + "step": 326565 + }, + { + "epoch": 0.77, + "grad_norm": 2.34375, + "learning_rate": 0.0001355644299710613, + "loss": 2.0415, + "step": 326570 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.00013556270251225387, + "loss": 1.8487, + "step": 326575 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.0001355609750412976, + "loss": 2.0091, + "step": 326580 + }, + { + "epoch": 0.77, + "grad_norm": 1.828125, + "learning_rate": 0.00013555924755819304, + "loss": 2.0057, + "step": 326585 + }, + { + "epoch": 0.77, + "grad_norm": 2.640625, + "learning_rate": 0.00013555752006294078, + "loss": 2.024, + "step": 326590 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.00013555579255554142, + "loss": 2.0776, + "step": 326595 + }, + { + "epoch": 0.77, + "grad_norm": 2.484375, + "learning_rate": 0.0001355540650359956, + "loss": 1.836, + "step": 326600 + }, + { + "epoch": 0.77, + "grad_norm": 2.4375, + "learning_rate": 0.00013555233750430385, + "loss": 2.1515, + "step": 326605 + }, + { + "epoch": 0.77, + "grad_norm": 1.8515625, + "learning_rate": 0.00013555060996046678, + "loss": 1.9913, + "step": 326610 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.00013554888240448494, + "loss": 2.1724, + "step": 326615 + }, + { + "epoch": 0.77, + "grad_norm": 2.578125, + "learning_rate": 0.00013554715483635898, + "loss": 2.0835, + "step": 326620 + }, + { + "epoch": 0.77, + "grad_norm": 2.65625, + "learning_rate": 0.0001355454272560895, + "loss": 2.0435, + "step": 326625 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.00013554369966367702, + "loss": 2.0508, + "step": 326630 + }, + { + "epoch": 0.77, + "grad_norm": 2.375, + "learning_rate": 0.00013554197205912217, + "loss": 2.1205, + "step": 326635 + }, + { + "epoch": 0.77, + "grad_norm": 2.328125, + "learning_rate": 0.00013554024444242555, + "loss": 2.0841, + "step": 326640 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.00013553851681358774, + "loss": 2.0326, + "step": 326645 + }, + { + "epoch": 0.77, + "grad_norm": 2.625, + "learning_rate": 0.00013553678917260933, + "loss": 2.0197, + "step": 326650 + }, + { + "epoch": 0.77, + "grad_norm": 2.0625, + "learning_rate": 0.00013553506151949089, + "loss": 2.03, + "step": 326655 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.00013553333385423304, + "loss": 1.9887, + "step": 326660 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.00013553160617683636, + "loss": 2.1054, + "step": 326665 + }, + { + "epoch": 0.77, + "grad_norm": 1.984375, + "learning_rate": 0.0001355298784873014, + "loss": 2.2566, + "step": 326670 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.00013552815078562885, + "loss": 1.935, + "step": 326675 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.0001355264230718192, + "loss": 1.9683, + "step": 326680 + }, + { + "epoch": 0.77, + "grad_norm": 2.515625, + "learning_rate": 0.0001355246953458731, + "loss": 1.8304, + "step": 326685 + }, + { + "epoch": 0.77, + "grad_norm": 2.296875, + "learning_rate": 0.0001355229676077911, + "loss": 1.97, + "step": 326690 + }, + { + "epoch": 0.77, + "grad_norm": 2.515625, + "learning_rate": 0.00013552123985757384, + "loss": 2.1799, + "step": 326695 + }, + { + "epoch": 0.77, + "grad_norm": 1.9921875, + "learning_rate": 0.00013551951209522187, + "loss": 2.0996, + "step": 326700 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013551778432073576, + "loss": 2.029, + "step": 326705 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.00013551605653411617, + "loss": 2.1246, + "step": 326710 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.00013551432873536366, + "loss": 2.2044, + "step": 326715 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.00013551260092447876, + "loss": 2.0314, + "step": 326720 + }, + { + "epoch": 0.77, + "grad_norm": 2.453125, + "learning_rate": 0.00013551087310146212, + "loss": 2.1531, + "step": 326725 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.00013550914526631435, + "loss": 2.0463, + "step": 326730 + }, + { + "epoch": 0.77, + "grad_norm": 2.359375, + "learning_rate": 0.000135507417419036, + "loss": 2.0488, + "step": 326735 + }, + { + "epoch": 0.77, + "grad_norm": 1.984375, + "learning_rate": 0.0001355056895596277, + "loss": 2.0461, + "step": 326740 + }, + { + "epoch": 0.77, + "grad_norm": 1.9375, + "learning_rate": 0.00013550396168808998, + "loss": 2.0945, + "step": 326745 + }, + { + "epoch": 0.77, + "grad_norm": 2.046875, + "learning_rate": 0.00013550223380442343, + "loss": 2.1023, + "step": 326750 + }, + { + "epoch": 0.77, + "grad_norm": 2.9375, + "learning_rate": 0.00013550050590862872, + "loss": 2.1536, + "step": 326755 + }, + { + "epoch": 0.77, + "grad_norm": 2.140625, + "learning_rate": 0.00013549877800070643, + "loss": 2.0211, + "step": 326760 + }, + { + "epoch": 0.77, + "grad_norm": 2.171875, + "learning_rate": 0.00013549705008065706, + "loss": 2.1107, + "step": 326765 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.00013549532214848127, + "loss": 2.0683, + "step": 326770 + }, + { + "epoch": 0.77, + "grad_norm": 1.9375, + "learning_rate": 0.0001354935942041796, + "loss": 2.0624, + "step": 326775 + }, + { + "epoch": 0.77, + "grad_norm": 1.8984375, + "learning_rate": 0.0001354918662477527, + "loss": 2.1175, + "step": 326780 + }, + { + "epoch": 0.77, + "grad_norm": 1.9296875, + "learning_rate": 0.00013549013827920115, + "loss": 2.0871, + "step": 326785 + }, + { + "epoch": 0.77, + "grad_norm": 2.5, + "learning_rate": 0.00013548841029852553, + "loss": 1.9869, + "step": 326790 + }, + { + "epoch": 0.77, + "grad_norm": 1.9609375, + "learning_rate": 0.0001354866823057264, + "loss": 2.117, + "step": 326795 + }, + { + "epoch": 0.77, + "grad_norm": 2.328125, + "learning_rate": 0.00013548495430080437, + "loss": 2.0447, + "step": 326800 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.00013548322628376006, + "loss": 2.1441, + "step": 326805 + }, + { + "epoch": 0.77, + "grad_norm": 2.53125, + "learning_rate": 0.000135481498254594, + "loss": 2.1977, + "step": 326810 + }, + { + "epoch": 0.77, + "grad_norm": 2.3125, + "learning_rate": 0.00013547977021330688, + "loss": 2.0181, + "step": 326815 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.00013547804215989915, + "loss": 2.0849, + "step": 326820 + }, + { + "epoch": 0.77, + "grad_norm": 1.8359375, + "learning_rate": 0.00013547631409437154, + "loss": 1.8653, + "step": 326825 + }, + { + "epoch": 0.77, + "grad_norm": 2.546875, + "learning_rate": 0.00013547458601672452, + "loss": 1.895, + "step": 326830 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.0001354728579269588, + "loss": 2.0402, + "step": 326835 + }, + { + "epoch": 0.77, + "grad_norm": 2.0, + "learning_rate": 0.0001354711298250749, + "loss": 2.0518, + "step": 326840 + }, + { + "epoch": 0.77, + "grad_norm": 1.8671875, + "learning_rate": 0.00013546940171107338, + "loss": 2.093, + "step": 326845 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013546767358495489, + "loss": 1.9902, + "step": 326850 + }, + { + "epoch": 0.77, + "grad_norm": 2.53125, + "learning_rate": 0.00013546594544672, + "loss": 2.0297, + "step": 326855 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.00013546421729636928, + "loss": 1.9027, + "step": 326860 + }, + { + "epoch": 0.77, + "grad_norm": 1.796875, + "learning_rate": 0.00013546248913390337, + "loss": 1.9985, + "step": 326865 + }, + { + "epoch": 0.77, + "grad_norm": 2.359375, + "learning_rate": 0.0001354607609593228, + "loss": 2.1296, + "step": 326870 + }, + { + "epoch": 0.77, + "grad_norm": 2.53125, + "learning_rate": 0.00013545903277262827, + "loss": 1.971, + "step": 326875 + }, + { + "epoch": 0.77, + "grad_norm": 2.71875, + "learning_rate": 0.0001354573045738202, + "loss": 2.1734, + "step": 326880 + }, + { + "epoch": 0.77, + "grad_norm": 1.96875, + "learning_rate": 0.00013545557636289928, + "loss": 2.0255, + "step": 326885 + }, + { + "epoch": 0.77, + "grad_norm": 2.8125, + "learning_rate": 0.00013545384813986612, + "loss": 2.1632, + "step": 326890 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.00013545211990472131, + "loss": 2.2336, + "step": 326895 + }, + { + "epoch": 0.77, + "grad_norm": 2.140625, + "learning_rate": 0.00013545039165746537, + "loss": 1.9237, + "step": 326900 + }, + { + "epoch": 0.77, + "grad_norm": 2.40625, + "learning_rate": 0.00013544866339809894, + "loss": 2.0115, + "step": 326905 + }, + { + "epoch": 0.77, + "grad_norm": 1.90625, + "learning_rate": 0.0001354469351266226, + "loss": 2.007, + "step": 326910 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.00013544520684303694, + "loss": 2.1553, + "step": 326915 + }, + { + "epoch": 0.77, + "grad_norm": 2.03125, + "learning_rate": 0.00013544347854734258, + "loss": 2.0573, + "step": 326920 + }, + { + "epoch": 0.77, + "grad_norm": 2.234375, + "learning_rate": 0.00013544175023954007, + "loss": 1.9618, + "step": 326925 + }, + { + "epoch": 0.77, + "grad_norm": 3.0, + "learning_rate": 0.00013544002191963003, + "loss": 1.9997, + "step": 326930 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.000135438293587613, + "loss": 1.7491, + "step": 326935 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.00013543656524348964, + "loss": 2.1919, + "step": 326940 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.00013543483688726048, + "loss": 2.108, + "step": 326945 + }, + { + "epoch": 0.77, + "grad_norm": 2.34375, + "learning_rate": 0.00013543310851892617, + "loss": 2.2433, + "step": 326950 + }, + { + "epoch": 0.77, + "grad_norm": 2.65625, + "learning_rate": 0.00013543138013848724, + "loss": 2.2084, + "step": 326955 + }, + { + "epoch": 0.77, + "grad_norm": 1.96875, + "learning_rate": 0.0001354296517459443, + "loss": 2.0217, + "step": 326960 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.00013542792334129798, + "loss": 1.8986, + "step": 326965 + }, + { + "epoch": 0.77, + "grad_norm": 2.546875, + "learning_rate": 0.00013542619492454882, + "loss": 2.2219, + "step": 326970 + }, + { + "epoch": 0.77, + "grad_norm": 3.078125, + "learning_rate": 0.00013542446649569744, + "loss": 2.0713, + "step": 326975 + }, + { + "epoch": 0.77, + "grad_norm": 2.140625, + "learning_rate": 0.00013542273805474441, + "loss": 2.0172, + "step": 326980 + }, + { + "epoch": 0.77, + "grad_norm": 2.375, + "learning_rate": 0.00013542100960169034, + "loss": 1.9612, + "step": 326985 + }, + { + "epoch": 0.77, + "grad_norm": 2.40625, + "learning_rate": 0.0001354192811365358, + "loss": 1.8921, + "step": 326990 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.0001354175526592814, + "loss": 2.1884, + "step": 326995 + }, + { + "epoch": 0.77, + "grad_norm": 2.046875, + "learning_rate": 0.0001354158241699277, + "loss": 2.1095, + "step": 327000 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.00013541409566847533, + "loss": 2.0897, + "step": 327005 + }, + { + "epoch": 0.77, + "grad_norm": 2.046875, + "learning_rate": 0.00013541236715492488, + "loss": 1.8019, + "step": 327010 + }, + { + "epoch": 0.77, + "grad_norm": 2.625, + "learning_rate": 0.0001354106386292769, + "loss": 2.0191, + "step": 327015 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.000135408910091532, + "loss": 2.005, + "step": 327020 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.0001354071815416908, + "loss": 2.0119, + "step": 327025 + }, + { + "epoch": 0.77, + "grad_norm": 2.53125, + "learning_rate": 0.00013540545297975384, + "loss": 2.0457, + "step": 327030 + }, + { + "epoch": 0.77, + "grad_norm": 2.296875, + "learning_rate": 0.00013540372440572175, + "loss": 2.0518, + "step": 327035 + }, + { + "epoch": 0.77, + "grad_norm": 1.9921875, + "learning_rate": 0.0001354019958195951, + "loss": 2.0504, + "step": 327040 + }, + { + "epoch": 0.77, + "grad_norm": 2.328125, + "learning_rate": 0.00013540026722137446, + "loss": 2.1219, + "step": 327045 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.0001353985386110605, + "loss": 1.9177, + "step": 327050 + }, + { + "epoch": 0.77, + "grad_norm": 2.3125, + "learning_rate": 0.00013539680998865373, + "loss": 2.1058, + "step": 327055 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.00013539508135415476, + "loss": 1.8445, + "step": 327060 + }, + { + "epoch": 0.77, + "grad_norm": 3.0, + "learning_rate": 0.0001353933527075642, + "loss": 2.0609, + "step": 327065 + }, + { + "epoch": 0.77, + "grad_norm": 2.828125, + "learning_rate": 0.00013539162404888262, + "loss": 2.0236, + "step": 327070 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.0001353898953781106, + "loss": 2.113, + "step": 327075 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.0001353881666952488, + "loss": 2.0175, + "step": 327080 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.00013538643800029774, + "loss": 1.9991, + "step": 327085 + }, + { + "epoch": 0.77, + "grad_norm": 2.234375, + "learning_rate": 0.00013538470929325803, + "loss": 1.9605, + "step": 327090 + }, + { + "epoch": 0.77, + "grad_norm": 2.078125, + "learning_rate": 0.00013538298057413022, + "loss": 2.032, + "step": 327095 + }, + { + "epoch": 0.77, + "grad_norm": 2.109375, + "learning_rate": 0.000135381251842915, + "loss": 2.0907, + "step": 327100 + }, + { + "epoch": 0.77, + "grad_norm": 2.265625, + "learning_rate": 0.0001353795230996129, + "loss": 2.1103, + "step": 327105 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.00013537779434422446, + "loss": 2.0408, + "step": 327110 + }, + { + "epoch": 0.77, + "grad_norm": 2.03125, + "learning_rate": 0.00013537606557675037, + "loss": 1.9827, + "step": 327115 + }, + { + "epoch": 0.77, + "grad_norm": 1.796875, + "learning_rate": 0.00013537433679719116, + "loss": 1.9975, + "step": 327120 + }, + { + "epoch": 0.77, + "grad_norm": 2.203125, + "learning_rate": 0.00013537260800554744, + "loss": 2.0377, + "step": 327125 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.0001353708792018198, + "loss": 2.1673, + "step": 327130 + }, + { + "epoch": 0.77, + "grad_norm": 2.40625, + "learning_rate": 0.00013536915038600878, + "loss": 2.1222, + "step": 327135 + }, + { + "epoch": 0.77, + "grad_norm": 1.734375, + "learning_rate": 0.00013536742155811508, + "loss": 2.0292, + "step": 327140 + }, + { + "epoch": 0.77, + "grad_norm": 1.953125, + "learning_rate": 0.00013536569271813923, + "loss": 2.088, + "step": 327145 + }, + { + "epoch": 0.77, + "grad_norm": 2.453125, + "learning_rate": 0.00013536396386608176, + "loss": 2.1617, + "step": 327150 + }, + { + "epoch": 0.77, + "grad_norm": 2.125, + "learning_rate": 0.00013536223500194337, + "loss": 1.9908, + "step": 327155 + }, + { + "epoch": 0.77, + "grad_norm": 2.5, + "learning_rate": 0.00013536050612572456, + "loss": 1.9125, + "step": 327160 + }, + { + "epoch": 0.77, + "grad_norm": 2.0625, + "learning_rate": 0.00013535877723742598, + "loss": 2.0293, + "step": 327165 + }, + { + "epoch": 0.77, + "grad_norm": 2.515625, + "learning_rate": 0.00013535704833704824, + "loss": 2.2507, + "step": 327170 + }, + { + "epoch": 0.77, + "grad_norm": 2.140625, + "learning_rate": 0.00013535531942459182, + "loss": 2.1311, + "step": 327175 + }, + { + "epoch": 0.77, + "grad_norm": 2.375, + "learning_rate": 0.0001353535905000574, + "loss": 2.0037, + "step": 327180 + }, + { + "epoch": 0.77, + "grad_norm": 1.5625, + "learning_rate": 0.00013535186156344556, + "loss": 1.7844, + "step": 327185 + }, + { + "epoch": 0.77, + "grad_norm": 2.46875, + "learning_rate": 0.00013535013261475691, + "loss": 2.0179, + "step": 327190 + }, + { + "epoch": 0.77, + "grad_norm": 2.390625, + "learning_rate": 0.000135348403653992, + "loss": 2.1549, + "step": 327195 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.00013534667468115142, + "loss": 2.0306, + "step": 327200 + }, + { + "epoch": 0.77, + "grad_norm": 2.1875, + "learning_rate": 0.00013534494569623576, + "loss": 2.0568, + "step": 327205 + }, + { + "epoch": 0.77, + "grad_norm": 2.015625, + "learning_rate": 0.00013534321669924563, + "loss": 2.0708, + "step": 327210 + }, + { + "epoch": 0.77, + "grad_norm": 2.671875, + "learning_rate": 0.00013534148769018165, + "loss": 2.1081, + "step": 327215 + }, + { + "epoch": 0.77, + "grad_norm": 4.125, + "learning_rate": 0.00013533975866904436, + "loss": 2.2165, + "step": 327220 + }, + { + "epoch": 0.77, + "grad_norm": 2.40625, + "learning_rate": 0.00013533802963583438, + "loss": 2.0132, + "step": 327225 + }, + { + "epoch": 0.77, + "grad_norm": 2.359375, + "learning_rate": 0.00013533630059055224, + "loss": 1.8638, + "step": 327230 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.00013533457153319865, + "loss": 1.9573, + "step": 327235 + }, + { + "epoch": 0.77, + "grad_norm": 2.21875, + "learning_rate": 0.0001353328424637741, + "loss": 1.8982, + "step": 327240 + }, + { + "epoch": 0.77, + "grad_norm": 1.6015625, + "learning_rate": 0.00013533111338227916, + "loss": 1.8444, + "step": 327245 + }, + { + "epoch": 0.77, + "grad_norm": 2.375, + "learning_rate": 0.00013532938428871454, + "loss": 2.0668, + "step": 327250 + }, + { + "epoch": 0.77, + "grad_norm": 2.796875, + "learning_rate": 0.00013532765518308072, + "loss": 1.9384, + "step": 327255 + }, + { + "epoch": 0.77, + "grad_norm": 2.4375, + "learning_rate": 0.00013532592606537835, + "loss": 2.0799, + "step": 327260 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.000135324196935608, + "loss": 1.9764, + "step": 327265 + }, + { + "epoch": 0.77, + "grad_norm": 1.875, + "learning_rate": 0.00013532246779377028, + "loss": 2.0286, + "step": 327270 + }, + { + "epoch": 0.77, + "grad_norm": 2.4375, + "learning_rate": 0.00013532073863986574, + "loss": 2.1748, + "step": 327275 + }, + { + "epoch": 0.77, + "grad_norm": 2.5, + "learning_rate": 0.00013531900947389499, + "loss": 2.2375, + "step": 327280 + }, + { + "epoch": 0.77, + "grad_norm": 2.359375, + "learning_rate": 0.00013531728029585866, + "loss": 2.0472, + "step": 327285 + }, + { + "epoch": 0.77, + "grad_norm": 2.109375, + "learning_rate": 0.00013531555110575728, + "loss": 2.1504, + "step": 327290 + }, + { + "epoch": 0.77, + "grad_norm": 1.84375, + "learning_rate": 0.0001353138219035915, + "loss": 2.0508, + "step": 327295 + }, + { + "epoch": 0.77, + "grad_norm": 2.375, + "learning_rate": 0.00013531209268936184, + "loss": 2.0644, + "step": 327300 + }, + { + "epoch": 0.77, + "grad_norm": 2.53125, + "learning_rate": 0.00013531036346306894, + "loss": 2.1351, + "step": 327305 + }, + { + "epoch": 0.77, + "grad_norm": 2.609375, + "learning_rate": 0.00013530863422471336, + "loss": 2.0763, + "step": 327310 + }, + { + "epoch": 0.77, + "grad_norm": 2.234375, + "learning_rate": 0.00013530690497429572, + "loss": 2.1816, + "step": 327315 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.00013530517571181663, + "loss": 1.9257, + "step": 327320 + }, + { + "epoch": 0.77, + "grad_norm": 2.15625, + "learning_rate": 0.0001353034464372766, + "loss": 1.7969, + "step": 327325 + }, + { + "epoch": 0.77, + "grad_norm": 2.421875, + "learning_rate": 0.00013530171715067635, + "loss": 2.242, + "step": 327330 + }, + { + "epoch": 0.77, + "grad_norm": 2.359375, + "learning_rate": 0.00013529998785201635, + "loss": 2.0714, + "step": 327335 + }, + { + "epoch": 0.77, + "grad_norm": 1.9921875, + "learning_rate": 0.0001352982585412972, + "loss": 1.9164, + "step": 327340 + }, + { + "epoch": 0.77, + "grad_norm": 2.375, + "learning_rate": 0.00013529652921851957, + "loss": 2.0107, + "step": 327345 + }, + { + "epoch": 0.77, + "grad_norm": 2.09375, + "learning_rate": 0.000135294799883684, + "loss": 1.9781, + "step": 327350 + }, + { + "epoch": 0.77, + "grad_norm": 2.171875, + "learning_rate": 0.00013529307053679107, + "loss": 1.9621, + "step": 327355 + }, + { + "epoch": 0.77, + "grad_norm": 2.390625, + "learning_rate": 0.0001352913411778414, + "loss": 2.0518, + "step": 327360 + }, + { + "epoch": 0.77, + "grad_norm": 2.734375, + "learning_rate": 0.00013528961180683555, + "loss": 2.1946, + "step": 327365 + }, + { + "epoch": 0.77, + "grad_norm": 2.28125, + "learning_rate": 0.00013528788242377413, + "loss": 2.3454, + "step": 327370 + }, + { + "epoch": 0.77, + "grad_norm": 1.875, + "learning_rate": 0.00013528615302865776, + "loss": 2.0168, + "step": 327375 + }, + { + "epoch": 0.77, + "grad_norm": 2.4375, + "learning_rate": 0.00013528442362148698, + "loss": 2.0516, + "step": 327380 + }, + { + "epoch": 0.77, + "grad_norm": 2.5, + "learning_rate": 0.0001352826942022624, + "loss": 2.1455, + "step": 327385 + }, + { + "epoch": 0.77, + "grad_norm": 2.40625, + "learning_rate": 0.00013528096477098458, + "loss": 2.0668, + "step": 327390 + }, + { + "epoch": 0.77, + "grad_norm": 2.59375, + "learning_rate": 0.00013527923532765423, + "loss": 2.2093, + "step": 327395 + }, + { + "epoch": 0.77, + "grad_norm": 2.25, + "learning_rate": 0.0001352775058722718, + "loss": 2.1564, + "step": 327400 } ], "logging_steps": 5, @@ -312494,7 +458374,7 @@ "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, - "total_flos": 1.3479703781281943e+20, + "total_flos": 1.976116670110095e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null