| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 62500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00032, |
| "grad_norm": 0.39110425114631653, |
| "learning_rate": 0.0001999456, |
| "loss": 2.0348, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00064, |
| "grad_norm": 0.17977817356586456, |
| "learning_rate": 0.0001998816, |
| "loss": 0.9027, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.00096, |
| "grad_norm": 0.1247190609574318, |
| "learning_rate": 0.00019981760000000002, |
| "loss": 0.9304, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00128, |
| "grad_norm": 0.1341821253299713, |
| "learning_rate": 0.0001997536, |
| "loss": 0.8894, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0016, |
| "grad_norm": 0.1190188005566597, |
| "learning_rate": 0.0001996896, |
| "loss": 0.924, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00192, |
| "grad_norm": 0.12215295433998108, |
| "learning_rate": 0.0001996256, |
| "loss": 0.896, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.00224, |
| "grad_norm": 0.12413129210472107, |
| "learning_rate": 0.00019956160000000002, |
| "loss": 0.9179, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.00256, |
| "grad_norm": 0.119780533015728, |
| "learning_rate": 0.00019949760000000002, |
| "loss": 0.9253, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.00288, |
| "grad_norm": 0.12296301126480103, |
| "learning_rate": 0.00019943360000000001, |
| "loss": 0.934, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0032, |
| "grad_norm": 0.10840147733688354, |
| "learning_rate": 0.0001993696, |
| "loss": 0.9346, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.00352, |
| "grad_norm": 0.11459454894065857, |
| "learning_rate": 0.0001993056, |
| "loss": 0.9282, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.00384, |
| "grad_norm": 0.1370021402835846, |
| "learning_rate": 0.0001992416, |
| "loss": 0.8889, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.00416, |
| "grad_norm": 0.12761865556240082, |
| "learning_rate": 0.0001991776, |
| "loss": 0.9154, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.00448, |
| "grad_norm": 0.10725900530815125, |
| "learning_rate": 0.00019911360000000002, |
| "loss": 0.8488, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0048, |
| "grad_norm": 0.12192831188440323, |
| "learning_rate": 0.0001990496, |
| "loss": 0.9009, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.00512, |
| "grad_norm": 0.1291641741991043, |
| "learning_rate": 0.0001989856, |
| "loss": 0.8447, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.00544, |
| "grad_norm": 0.1057133749127388, |
| "learning_rate": 0.00019892160000000003, |
| "loss": 0.8813, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.00576, |
| "grad_norm": 0.1342541128396988, |
| "learning_rate": 0.0001988576, |
| "loss": 0.8812, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.00608, |
| "grad_norm": 0.11218508332967758, |
| "learning_rate": 0.0001987936, |
| "loss": 0.9021, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 0.16854852437973022, |
| "learning_rate": 0.00019872960000000002, |
| "loss": 0.9124, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.00672, |
| "grad_norm": 0.11709938943386078, |
| "learning_rate": 0.0001986656, |
| "loss": 0.8405, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.00704, |
| "grad_norm": 0.12850341200828552, |
| "learning_rate": 0.0001986016, |
| "loss": 0.8414, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.00736, |
| "grad_norm": 0.14519518613815308, |
| "learning_rate": 0.0001985376, |
| "loss": 0.8812, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.00768, |
| "grad_norm": 0.12263692915439606, |
| "learning_rate": 0.00019847360000000002, |
| "loss": 0.8983, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 0.13009226322174072, |
| "learning_rate": 0.00019840960000000002, |
| "loss": 0.9046, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.00832, |
| "grad_norm": 0.11333148181438446, |
| "learning_rate": 0.00019834560000000001, |
| "loss": 0.9265, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.00864, |
| "grad_norm": 0.1445179581642151, |
| "learning_rate": 0.0001982816, |
| "loss": 0.8138, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.00896, |
| "grad_norm": 0.12200130522251129, |
| "learning_rate": 0.0001982176, |
| "loss": 0.9048, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.00928, |
| "grad_norm": 0.13597029447555542, |
| "learning_rate": 0.0001981536, |
| "loss": 0.8862, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 0.130451962351799, |
| "learning_rate": 0.00019808960000000002, |
| "loss": 0.9051, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.00992, |
| "grad_norm": 0.12426720559597015, |
| "learning_rate": 0.00019802560000000002, |
| "loss": 0.8529, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.01024, |
| "grad_norm": 0.13389454782009125, |
| "learning_rate": 0.0001979616, |
| "loss": 0.8612, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.01056, |
| "grad_norm": 0.14324034750461578, |
| "learning_rate": 0.0001978976, |
| "loss": 0.8377, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.01088, |
| "grad_norm": 0.13510741293430328, |
| "learning_rate": 0.00019783360000000003, |
| "loss": 0.8786, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.0112, |
| "grad_norm": 0.13480916619300842, |
| "learning_rate": 0.0001977696, |
| "loss": 0.8832, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.01152, |
| "grad_norm": 0.14060954749584198, |
| "learning_rate": 0.0001977056, |
| "loss": 0.9028, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.01184, |
| "grad_norm": 0.1472562700510025, |
| "learning_rate": 0.00019764160000000002, |
| "loss": 0.8943, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.01216, |
| "grad_norm": 0.15105944871902466, |
| "learning_rate": 0.0001975776, |
| "loss": 0.8931, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.01248, |
| "grad_norm": 0.14458748698234558, |
| "learning_rate": 0.0001975136, |
| "loss": 0.8901, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 0.14904917776584625, |
| "learning_rate": 0.0001974496, |
| "loss": 0.8967, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.01312, |
| "grad_norm": 0.1423230618238449, |
| "learning_rate": 0.00019738560000000002, |
| "loss": 0.891, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.01344, |
| "grad_norm": 0.16175036132335663, |
| "learning_rate": 0.00019732160000000002, |
| "loss": 0.9115, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.01376, |
| "grad_norm": 0.1510002315044403, |
| "learning_rate": 0.00019725760000000001, |
| "loss": 0.8754, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.01408, |
| "grad_norm": 0.13992249965667725, |
| "learning_rate": 0.0001971936, |
| "loss": 0.8817, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.0144, |
| "grad_norm": 0.17271611094474792, |
| "learning_rate": 0.0001971296, |
| "loss": 0.8966, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.01472, |
| "grad_norm": 0.13203832507133484, |
| "learning_rate": 0.0001970656, |
| "loss": 0.9368, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.01504, |
| "grad_norm": 0.1591019332408905, |
| "learning_rate": 0.00019700160000000002, |
| "loss": 0.8906, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.01536, |
| "grad_norm": 0.14198246598243713, |
| "learning_rate": 0.00019693760000000002, |
| "loss": 0.8643, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.01568, |
| "grad_norm": 0.17151997983455658, |
| "learning_rate": 0.0001968736, |
| "loss": 0.8888, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 0.14477385580539703, |
| "learning_rate": 0.0001968096, |
| "loss": 0.8929, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.01632, |
| "grad_norm": 0.18318715691566467, |
| "learning_rate": 0.00019674560000000003, |
| "loss": 0.8512, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.01664, |
| "grad_norm": 0.13533398509025574, |
| "learning_rate": 0.0001966816, |
| "loss": 0.8791, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.01696, |
| "grad_norm": 0.15874390304088593, |
| "learning_rate": 0.0001966176, |
| "loss": 0.8834, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.01728, |
| "grad_norm": 0.15447142720222473, |
| "learning_rate": 0.00019655360000000001, |
| "loss": 0.9151, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.0176, |
| "grad_norm": 0.1866873800754547, |
| "learning_rate": 0.0001964896, |
| "loss": 0.9385, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.01792, |
| "grad_norm": 0.14127598702907562, |
| "learning_rate": 0.0001964256, |
| "loss": 0.9416, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.01824, |
| "grad_norm": 0.17219585180282593, |
| "learning_rate": 0.0001963616, |
| "loss": 0.9493, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.01856, |
| "grad_norm": 0.1528492569923401, |
| "learning_rate": 0.00019629760000000002, |
| "loss": 0.8794, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.01888, |
| "grad_norm": 0.1348702758550644, |
| "learning_rate": 0.00019623360000000002, |
| "loss": 0.9214, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 0.16409145295619965, |
| "learning_rate": 0.0001961696, |
| "loss": 0.9187, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.01952, |
| "grad_norm": 0.15834647417068481, |
| "learning_rate": 0.0001961056, |
| "loss": 0.8428, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.01984, |
| "grad_norm": 0.18810701370239258, |
| "learning_rate": 0.0001960416, |
| "loss": 0.9302, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.02016, |
| "grad_norm": 0.1573120802640915, |
| "learning_rate": 0.0001959776, |
| "loss": 0.8962, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.02048, |
| "grad_norm": 0.1655122935771942, |
| "learning_rate": 0.00019591360000000002, |
| "loss": 0.927, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.0208, |
| "grad_norm": 0.1711716651916504, |
| "learning_rate": 0.00019584960000000002, |
| "loss": 0.8683, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.02112, |
| "grad_norm": 0.1399732083082199, |
| "learning_rate": 0.0001957856, |
| "loss": 0.8753, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.02144, |
| "grad_norm": 0.19218869507312775, |
| "learning_rate": 0.0001957216, |
| "loss": 0.8943, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.02176, |
| "grad_norm": 0.15489013493061066, |
| "learning_rate": 0.00019565760000000003, |
| "loss": 0.8872, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.02208, |
| "grad_norm": 0.17431455850601196, |
| "learning_rate": 0.0001955936, |
| "loss": 0.9016, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.0224, |
| "grad_norm": 0.13751237094402313, |
| "learning_rate": 0.0001955296, |
| "loss": 0.8206, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.02272, |
| "grad_norm": 0.15201833844184875, |
| "learning_rate": 0.00019546560000000001, |
| "loss": 0.824, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.02304, |
| "grad_norm": 0.1994636058807373, |
| "learning_rate": 0.0001954016, |
| "loss": 0.8673, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.02336, |
| "grad_norm": 0.17202576994895935, |
| "learning_rate": 0.0001953376, |
| "loss": 0.834, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.02368, |
| "grad_norm": 0.19487006962299347, |
| "learning_rate": 0.0001952736, |
| "loss": 0.9347, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 0.16891010105609894, |
| "learning_rate": 0.00019520960000000002, |
| "loss": 0.8873, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.02432, |
| "grad_norm": 0.18789614737033844, |
| "learning_rate": 0.00019514560000000002, |
| "loss": 0.8883, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.02464, |
| "grad_norm": 0.19695357978343964, |
| "learning_rate": 0.0001950816, |
| "loss": 0.9197, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.02496, |
| "grad_norm": 0.13254858553409576, |
| "learning_rate": 0.0001950176, |
| "loss": 0.8877, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.02528, |
| "grad_norm": 0.1932552009820938, |
| "learning_rate": 0.0001949536, |
| "loss": 0.8719, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 0.18450401723384857, |
| "learning_rate": 0.0001948896, |
| "loss": 0.8889, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.02592, |
| "grad_norm": 0.14565275609493256, |
| "learning_rate": 0.00019482560000000002, |
| "loss": 0.8882, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.02624, |
| "grad_norm": 0.17898815870285034, |
| "learning_rate": 0.00019476160000000002, |
| "loss": 0.9387, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.02656, |
| "grad_norm": 0.1418757438659668, |
| "learning_rate": 0.0001946976, |
| "loss": 0.835, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.02688, |
| "grad_norm": 0.1738288700580597, |
| "learning_rate": 0.0001946336, |
| "loss": 0.8402, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.0272, |
| "grad_norm": 0.15658064186573029, |
| "learning_rate": 0.00019456960000000003, |
| "loss": 0.8626, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.02752, |
| "grad_norm": 0.1640857756137848, |
| "learning_rate": 0.0001945056, |
| "loss": 0.9208, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.02784, |
| "grad_norm": 0.18185724318027496, |
| "learning_rate": 0.0001944416, |
| "loss": 0.9001, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.02816, |
| "grad_norm": 0.1771153062582016, |
| "learning_rate": 0.00019437760000000001, |
| "loss": 0.9175, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.02848, |
| "grad_norm": 0.1369091272354126, |
| "learning_rate": 0.0001943136, |
| "loss": 0.9019, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.0288, |
| "grad_norm": 0.18259896337985992, |
| "learning_rate": 0.0001942496, |
| "loss": 0.9223, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.02912, |
| "grad_norm": 0.15459062159061432, |
| "learning_rate": 0.0001941856, |
| "loss": 0.9302, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.02944, |
| "grad_norm": 0.19653448462486267, |
| "learning_rate": 0.00019412160000000002, |
| "loss": 0.9116, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.02976, |
| "grad_norm": 0.18698687851428986, |
| "learning_rate": 0.00019405760000000002, |
| "loss": 0.8874, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.03008, |
| "grad_norm": 0.21010226011276245, |
| "learning_rate": 0.0001939936, |
| "loss": 0.8956, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.0304, |
| "grad_norm": 0.15704917907714844, |
| "learning_rate": 0.0001939296, |
| "loss": 0.8816, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.03072, |
| "grad_norm": 0.16836212575435638, |
| "learning_rate": 0.0001938656, |
| "loss": 0.8419, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.03104, |
| "grad_norm": 0.15333925187587738, |
| "learning_rate": 0.0001938016, |
| "loss": 0.9068, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.03136, |
| "grad_norm": 0.18370755016803741, |
| "learning_rate": 0.00019373760000000002, |
| "loss": 0.8932, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.03168, |
| "grad_norm": 0.16511815786361694, |
| "learning_rate": 0.00019367360000000002, |
| "loss": 0.9328, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 0.16475580632686615, |
| "learning_rate": 0.0001936096, |
| "loss": 0.9234, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.03232, |
| "grad_norm": 0.17609569430351257, |
| "learning_rate": 0.0001935456, |
| "loss": 0.9281, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.03264, |
| "grad_norm": 0.1759602576494217, |
| "learning_rate": 0.00019348160000000003, |
| "loss": 0.8592, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.03296, |
| "grad_norm": 0.1785658448934555, |
| "learning_rate": 0.0001934176, |
| "loss": 0.9156, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.03328, |
| "grad_norm": 0.20041823387145996, |
| "learning_rate": 0.0001933536, |
| "loss": 0.8585, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.0336, |
| "grad_norm": 0.2025129646062851, |
| "learning_rate": 0.00019328960000000001, |
| "loss": 0.883, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.03392, |
| "grad_norm": 0.1853547841310501, |
| "learning_rate": 0.0001932256, |
| "loss": 0.9493, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.03424, |
| "grad_norm": 0.1714346706867218, |
| "learning_rate": 0.0001931616, |
| "loss": 0.9102, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.03456, |
| "grad_norm": 0.14508432149887085, |
| "learning_rate": 0.0001930976, |
| "loss": 0.8636, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.03488, |
| "grad_norm": 0.15658248960971832, |
| "learning_rate": 0.00019303360000000002, |
| "loss": 0.8495, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.0352, |
| "grad_norm": 0.1980847865343094, |
| "learning_rate": 0.00019296960000000002, |
| "loss": 0.8814, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.03552, |
| "grad_norm": 0.18244528770446777, |
| "learning_rate": 0.0001929056, |
| "loss": 0.896, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.03584, |
| "grad_norm": 0.19880063831806183, |
| "learning_rate": 0.0001928416, |
| "loss": 0.8725, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.03616, |
| "grad_norm": 0.2539379894733429, |
| "learning_rate": 0.0001927776, |
| "loss": 0.936, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.03648, |
| "grad_norm": 0.17734292149543762, |
| "learning_rate": 0.0001927136, |
| "loss": 0.8839, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.0368, |
| "grad_norm": 0.15432968735694885, |
| "learning_rate": 0.00019264960000000002, |
| "loss": 0.8977, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.03712, |
| "grad_norm": 0.17004595696926117, |
| "learning_rate": 0.00019258560000000001, |
| "loss": 0.8974, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.03744, |
| "grad_norm": 0.16686637699604034, |
| "learning_rate": 0.0001925216, |
| "loss": 0.8822, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.03776, |
| "grad_norm": 0.16283023357391357, |
| "learning_rate": 0.0001924576, |
| "loss": 0.9273, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.03808, |
| "grad_norm": 0.1839868277311325, |
| "learning_rate": 0.00019239360000000003, |
| "loss": 0.8829, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.0384, |
| "grad_norm": 0.1701708436012268, |
| "learning_rate": 0.0001923296, |
| "loss": 0.9052, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.03872, |
| "grad_norm": 0.16713082790374756, |
| "learning_rate": 0.0001922656, |
| "loss": 0.8653, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.03904, |
| "grad_norm": 0.16699771583080292, |
| "learning_rate": 0.0001922016, |
| "loss": 0.8644, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.03936, |
| "grad_norm": 0.15876609086990356, |
| "learning_rate": 0.0001921376, |
| "loss": 0.8703, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.03968, |
| "grad_norm": 0.1910441368818283, |
| "learning_rate": 0.0001920736, |
| "loss": 0.8637, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.18075905740261078, |
| "learning_rate": 0.0001920096, |
| "loss": 0.8566, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.04032, |
| "grad_norm": 0.19470706582069397, |
| "learning_rate": 0.00019194560000000002, |
| "loss": 0.9013, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.04064, |
| "grad_norm": 0.19072532653808594, |
| "learning_rate": 0.00019188160000000002, |
| "loss": 0.9026, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.04096, |
| "grad_norm": 0.17622806131839752, |
| "learning_rate": 0.0001918176, |
| "loss": 0.8705, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.04128, |
| "grad_norm": 0.19638915359973907, |
| "learning_rate": 0.0001917536, |
| "loss": 0.8606, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.0416, |
| "grad_norm": 0.18957193195819855, |
| "learning_rate": 0.0001916896, |
| "loss": 0.9041, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.04192, |
| "grad_norm": 0.1762382835149765, |
| "learning_rate": 0.0001916256, |
| "loss": 0.8593, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.04224, |
| "grad_norm": 0.16159483790397644, |
| "learning_rate": 0.00019156160000000002, |
| "loss": 0.9049, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.04256, |
| "grad_norm": 0.19137801229953766, |
| "learning_rate": 0.00019149760000000001, |
| "loss": 0.9062, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.04288, |
| "grad_norm": 0.19132420420646667, |
| "learning_rate": 0.0001914336, |
| "loss": 0.8678, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.0432, |
| "grad_norm": 0.1738004982471466, |
| "learning_rate": 0.0001913696, |
| "loss": 0.898, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.04352, |
| "grad_norm": 0.19048957526683807, |
| "learning_rate": 0.00019130560000000003, |
| "loss": 0.8471, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.04384, |
| "grad_norm": 0.19051052629947662, |
| "learning_rate": 0.0001912416, |
| "loss": 0.8878, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.04416, |
| "grad_norm": 0.18549174070358276, |
| "learning_rate": 0.0001911776, |
| "loss": 0.9166, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.04448, |
| "grad_norm": 0.20678356289863586, |
| "learning_rate": 0.0001911136, |
| "loss": 0.8624, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.0448, |
| "grad_norm": 0.20438261330127716, |
| "learning_rate": 0.0001910496, |
| "loss": 0.882, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.04512, |
| "grad_norm": 0.1805305778980255, |
| "learning_rate": 0.0001909856, |
| "loss": 0.8867, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.04544, |
| "grad_norm": 0.2102346122264862, |
| "learning_rate": 0.0001909216, |
| "loss": 0.8605, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.04576, |
| "grad_norm": 0.17274044454097748, |
| "learning_rate": 0.00019085760000000002, |
| "loss": 0.8529, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.04608, |
| "grad_norm": 0.19794899225234985, |
| "learning_rate": 0.00019079360000000002, |
| "loss": 0.8778, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.0464, |
| "grad_norm": 0.19638848304748535, |
| "learning_rate": 0.0001907296, |
| "loss": 0.848, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.04672, |
| "grad_norm": 0.20513470470905304, |
| "learning_rate": 0.0001906656, |
| "loss": 0.8791, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.04704, |
| "grad_norm": 0.18168902397155762, |
| "learning_rate": 0.0001906016, |
| "loss": 0.9258, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.04736, |
| "grad_norm": 0.1906946301460266, |
| "learning_rate": 0.0001905376, |
| "loss": 0.9339, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.04768, |
| "grad_norm": 0.20983171463012695, |
| "learning_rate": 0.00019047360000000002, |
| "loss": 0.9209, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 0.18700706958770752, |
| "learning_rate": 0.00019040960000000001, |
| "loss": 0.8569, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.04832, |
| "grad_norm": 0.18951478600502014, |
| "learning_rate": 0.0001903456, |
| "loss": 0.9087, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.04864, |
| "grad_norm": 0.18202978372573853, |
| "learning_rate": 0.0001902816, |
| "loss": 0.9239, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.04896, |
| "grad_norm": 0.21562401950359344, |
| "learning_rate": 0.00019021760000000003, |
| "loss": 0.8721, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.04928, |
| "grad_norm": 0.18537688255310059, |
| "learning_rate": 0.0001901536, |
| "loss": 0.8798, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.0496, |
| "grad_norm": 0.1878584325313568, |
| "learning_rate": 0.0001900896, |
| "loss": 0.9315, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.04992, |
| "grad_norm": 0.1872929185628891, |
| "learning_rate": 0.0001900256, |
| "loss": 0.9202, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.05024, |
| "grad_norm": 0.1833094209432602, |
| "learning_rate": 0.0001899616, |
| "loss": 0.8837, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.05056, |
| "grad_norm": 0.18516699969768524, |
| "learning_rate": 0.0001898976, |
| "loss": 0.892, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.05088, |
| "grad_norm": 0.1559123992919922, |
| "learning_rate": 0.0001898336, |
| "loss": 0.8998, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.0512, |
| "grad_norm": 0.17760765552520752, |
| "learning_rate": 0.00018976960000000002, |
| "loss": 0.9178, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.05152, |
| "grad_norm": 0.1603628695011139, |
| "learning_rate": 0.00018970560000000002, |
| "loss": 0.8732, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.05184, |
| "grad_norm": 0.17330580949783325, |
| "learning_rate": 0.0001896416, |
| "loss": 0.9528, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.05216, |
| "grad_norm": 0.1774517297744751, |
| "learning_rate": 0.0001895776, |
| "loss": 0.9112, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.05248, |
| "grad_norm": 0.19834113121032715, |
| "learning_rate": 0.0001895136, |
| "loss": 0.8799, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.0528, |
| "grad_norm": 0.197114035487175, |
| "learning_rate": 0.0001894496, |
| "loss": 0.8898, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.05312, |
| "grad_norm": 0.21631449460983276, |
| "learning_rate": 0.00018938560000000002, |
| "loss": 0.8889, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.05344, |
| "grad_norm": 0.1554328352212906, |
| "learning_rate": 0.00018932160000000001, |
| "loss": 0.9055, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.05376, |
| "grad_norm": 0.17191193997859955, |
| "learning_rate": 0.0001892576, |
| "loss": 0.8713, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.05408, |
| "grad_norm": 0.18753254413604736, |
| "learning_rate": 0.0001891936, |
| "loss": 0.9223, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.0544, |
| "grad_norm": 0.172084778547287, |
| "learning_rate": 0.00018912960000000003, |
| "loss": 0.931, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.05472, |
| "grad_norm": 0.19548653066158295, |
| "learning_rate": 0.0001890656, |
| "loss": 0.8795, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.05504, |
| "grad_norm": 0.19771696627140045, |
| "learning_rate": 0.0001890016, |
| "loss": 0.8904, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.05536, |
| "grad_norm": 0.18042775988578796, |
| "learning_rate": 0.0001889376, |
| "loss": 0.8289, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.05568, |
| "grad_norm": 0.20334866642951965, |
| "learning_rate": 0.0001888736, |
| "loss": 0.8988, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 0.2053702026605606, |
| "learning_rate": 0.0001888096, |
| "loss": 0.85, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.05632, |
| "grad_norm": 0.18091996014118195, |
| "learning_rate": 0.00018874560000000002, |
| "loss": 0.8816, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.05664, |
| "grad_norm": 0.1538042575120926, |
| "learning_rate": 0.00018868160000000002, |
| "loss": 0.8792, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.05696, |
| "grad_norm": 0.21067845821380615, |
| "learning_rate": 0.00018861760000000002, |
| "loss": 0.9104, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.05728, |
| "grad_norm": 0.17531852424144745, |
| "learning_rate": 0.0001885536, |
| "loss": 0.9073, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.0576, |
| "grad_norm": 0.16701558232307434, |
| "learning_rate": 0.0001884896, |
| "loss": 0.8782, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.05792, |
| "grad_norm": 0.20766527950763702, |
| "learning_rate": 0.0001884256, |
| "loss": 0.8751, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.05824, |
| "grad_norm": 0.19526097178459167, |
| "learning_rate": 0.0001883616, |
| "loss": 0.87, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.05856, |
| "grad_norm": 0.16312770545482635, |
| "learning_rate": 0.00018829760000000002, |
| "loss": 0.8904, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.05888, |
| "grad_norm": 0.18951712548732758, |
| "learning_rate": 0.0001882336, |
| "loss": 0.9183, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.0592, |
| "grad_norm": 0.1615159958600998, |
| "learning_rate": 0.0001881696, |
| "loss": 0.8602, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.05952, |
| "grad_norm": 0.20840367674827576, |
| "learning_rate": 0.0001881056, |
| "loss": 0.9207, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.05984, |
| "grad_norm": 0.19745437800884247, |
| "learning_rate": 0.00018804160000000003, |
| "loss": 0.8962, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.06016, |
| "grad_norm": 0.1767299473285675, |
| "learning_rate": 0.0001879776, |
| "loss": 0.8357, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.06048, |
| "grad_norm": 0.16729581356048584, |
| "learning_rate": 0.0001879136, |
| "loss": 0.8993, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.0608, |
| "grad_norm": 0.1816299855709076, |
| "learning_rate": 0.0001878496, |
| "loss": 0.8775, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.06112, |
| "grad_norm": 0.17500704526901245, |
| "learning_rate": 0.0001877856, |
| "loss": 0.8829, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.06144, |
| "grad_norm": 0.1851237714290619, |
| "learning_rate": 0.0001877216, |
| "loss": 0.8478, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.06176, |
| "grad_norm": 0.19113439321517944, |
| "learning_rate": 0.00018765760000000002, |
| "loss": 0.9024, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.06208, |
| "grad_norm": 0.1793053150177002, |
| "learning_rate": 0.00018759360000000002, |
| "loss": 0.9191, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.0624, |
| "grad_norm": 0.19696858525276184, |
| "learning_rate": 0.00018752960000000001, |
| "loss": 0.9023, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.06272, |
| "grad_norm": 0.19326741993427277, |
| "learning_rate": 0.0001874656, |
| "loss": 0.8434, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.06304, |
| "grad_norm": 0.1995677947998047, |
| "learning_rate": 0.0001874016, |
| "loss": 0.8569, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.06336, |
| "grad_norm": 0.1579284369945526, |
| "learning_rate": 0.0001873376, |
| "loss": 0.8722, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.06368, |
| "grad_norm": 0.20145860314369202, |
| "learning_rate": 0.0001872736, |
| "loss": 0.8586, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.16962005198001862, |
| "learning_rate": 0.00018720960000000002, |
| "loss": 0.8256, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.06432, |
| "grad_norm": 0.14154337346553802, |
| "learning_rate": 0.0001871456, |
| "loss": 0.9169, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.06464, |
| "grad_norm": 0.18831445276737213, |
| "learning_rate": 0.0001870816, |
| "loss": 0.8717, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.06496, |
| "grad_norm": 0.2613060176372528, |
| "learning_rate": 0.0001870176, |
| "loss": 0.8946, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.06528, |
| "grad_norm": 0.1657022088766098, |
| "learning_rate": 0.00018695360000000003, |
| "loss": 0.8721, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.0656, |
| "grad_norm": 0.17723548412322998, |
| "learning_rate": 0.0001868896, |
| "loss": 0.8481, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.06592, |
| "grad_norm": 0.1840563416481018, |
| "learning_rate": 0.0001868256, |
| "loss": 0.8963, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.06624, |
| "grad_norm": 0.19427619874477386, |
| "learning_rate": 0.0001867616, |
| "loss": 0.8473, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.06656, |
| "grad_norm": 0.20632588863372803, |
| "learning_rate": 0.0001866976, |
| "loss": 0.8962, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.06688, |
| "grad_norm": 0.17780327796936035, |
| "learning_rate": 0.0001866336, |
| "loss": 0.9016, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.0672, |
| "grad_norm": 0.17626479268074036, |
| "learning_rate": 0.00018656960000000002, |
| "loss": 0.8949, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.06752, |
| "grad_norm": 0.19475996494293213, |
| "learning_rate": 0.00018650560000000002, |
| "loss": 0.9152, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.06784, |
| "grad_norm": 0.2053624838590622, |
| "learning_rate": 0.00018644160000000001, |
| "loss": 0.9467, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.06816, |
| "grad_norm": 0.17303887009620667, |
| "learning_rate": 0.0001863776, |
| "loss": 0.9104, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.06848, |
| "grad_norm": 0.19969859719276428, |
| "learning_rate": 0.0001863136, |
| "loss": 0.8578, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.0688, |
| "grad_norm": 0.23917217552661896, |
| "learning_rate": 0.0001862496, |
| "loss": 0.8999, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.06912, |
| "grad_norm": 0.18194426596164703, |
| "learning_rate": 0.0001861856, |
| "loss": 0.9014, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.06944, |
| "grad_norm": 0.21291664242744446, |
| "learning_rate": 0.00018612160000000002, |
| "loss": 0.9131, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.06976, |
| "grad_norm": 0.18465067446231842, |
| "learning_rate": 0.0001860576, |
| "loss": 0.8859, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.07008, |
| "grad_norm": 0.22093325853347778, |
| "learning_rate": 0.0001859936, |
| "loss": 0.9038, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.0704, |
| "grad_norm": 0.1888457089662552, |
| "learning_rate": 0.0001859296, |
| "loss": 0.8468, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.07072, |
| "grad_norm": 0.19705061614513397, |
| "learning_rate": 0.00018586560000000003, |
| "loss": 0.8871, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.07104, |
| "grad_norm": 0.20150603353977203, |
| "learning_rate": 0.0001858016, |
| "loss": 0.8391, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.07136, |
| "grad_norm": 0.21136346459388733, |
| "learning_rate": 0.0001857376, |
| "loss": 0.8528, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.07168, |
| "grad_norm": 0.20985183119773865, |
| "learning_rate": 0.0001856736, |
| "loss": 0.9093, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 0.1725299060344696, |
| "learning_rate": 0.0001856096, |
| "loss": 0.849, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.07232, |
| "grad_norm": 0.19184072315692902, |
| "learning_rate": 0.0001855456, |
| "loss": 0.8414, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.07264, |
| "grad_norm": 0.1758476197719574, |
| "learning_rate": 0.00018548160000000002, |
| "loss": 0.9081, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.07296, |
| "grad_norm": 0.1840459555387497, |
| "learning_rate": 0.00018541760000000002, |
| "loss": 0.9149, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.07328, |
| "grad_norm": 0.1862034946680069, |
| "learning_rate": 0.00018535360000000001, |
| "loss": 0.8879, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.0736, |
| "grad_norm": 0.21543624997138977, |
| "learning_rate": 0.0001852896, |
| "loss": 0.8753, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.07392, |
| "grad_norm": 0.18351414799690247, |
| "learning_rate": 0.0001852256, |
| "loss": 0.8977, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.07424, |
| "grad_norm": 0.2166828215122223, |
| "learning_rate": 0.0001851616, |
| "loss": 0.8669, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.07456, |
| "grad_norm": 0.19744159281253815, |
| "learning_rate": 0.0001850976, |
| "loss": 0.8846, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.07488, |
| "grad_norm": 0.19065077602863312, |
| "learning_rate": 0.00018503360000000002, |
| "loss": 0.8715, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.0752, |
| "grad_norm": 0.17913594841957092, |
| "learning_rate": 0.0001849696, |
| "loss": 0.8777, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.07552, |
| "grad_norm": 0.2282969057559967, |
| "learning_rate": 0.0001849056, |
| "loss": 0.8598, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.07584, |
| "grad_norm": 0.2031577080488205, |
| "learning_rate": 0.0001848416, |
| "loss": 0.928, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.07616, |
| "grad_norm": 0.24187202751636505, |
| "learning_rate": 0.00018477760000000002, |
| "loss": 0.9169, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.07648, |
| "grad_norm": 0.2227555513381958, |
| "learning_rate": 0.0001847136, |
| "loss": 0.914, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.0768, |
| "grad_norm": 0.2157488912343979, |
| "learning_rate": 0.0001846496, |
| "loss": 0.8697, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.07712, |
| "grad_norm": 0.19421465694904327, |
| "learning_rate": 0.0001845856, |
| "loss": 0.9358, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.07744, |
| "grad_norm": 0.2111523300409317, |
| "learning_rate": 0.0001845216, |
| "loss": 0.8708, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.07776, |
| "grad_norm": 0.23789940774440765, |
| "learning_rate": 0.0001844576, |
| "loss": 0.9036, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.07808, |
| "grad_norm": 0.19063900411128998, |
| "learning_rate": 0.00018439360000000002, |
| "loss": 0.8825, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.0784, |
| "grad_norm": 0.18922486901283264, |
| "learning_rate": 0.00018432960000000002, |
| "loss": 0.9094, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.07872, |
| "grad_norm": 0.19124048948287964, |
| "learning_rate": 0.0001842656, |
| "loss": 0.9422, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.07904, |
| "grad_norm": 0.19916868209838867, |
| "learning_rate": 0.0001842016, |
| "loss": 0.9341, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.07936, |
| "grad_norm": 0.19486361742019653, |
| "learning_rate": 0.0001841376, |
| "loss": 0.8836, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.07968, |
| "grad_norm": 0.20217594504356384, |
| "learning_rate": 0.0001840736, |
| "loss": 0.8485, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.18520930409431458, |
| "learning_rate": 0.0001840096, |
| "loss": 0.8887, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.08032, |
| "grad_norm": 0.1816449910402298, |
| "learning_rate": 0.00018394560000000002, |
| "loss": 0.8668, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.08064, |
| "grad_norm": 0.21598085761070251, |
| "learning_rate": 0.0001838816, |
| "loss": 0.947, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.08096, |
| "grad_norm": 0.21336813271045685, |
| "learning_rate": 0.0001838176, |
| "loss": 0.928, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.08128, |
| "grad_norm": 0.18636910617351532, |
| "learning_rate": 0.0001837536, |
| "loss": 0.858, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.0816, |
| "grad_norm": 0.20049895346164703, |
| "learning_rate": 0.00018368960000000002, |
| "loss": 0.8937, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.08192, |
| "grad_norm": 0.2153417468070984, |
| "learning_rate": 0.00018362560000000002, |
| "loss": 0.9052, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.08224, |
| "grad_norm": 0.2149072140455246, |
| "learning_rate": 0.0001835616, |
| "loss": 0.8961, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.08256, |
| "grad_norm": 0.19339273869991302, |
| "learning_rate": 0.0001834976, |
| "loss": 0.9128, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.08288, |
| "grad_norm": 0.23768258094787598, |
| "learning_rate": 0.0001834336, |
| "loss": 0.877, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.0832, |
| "grad_norm": 0.20677222311496735, |
| "learning_rate": 0.0001833696, |
| "loss": 0.8989, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.08352, |
| "grad_norm": 0.2008122056722641, |
| "learning_rate": 0.00018330560000000002, |
| "loss": 0.8911, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.08384, |
| "grad_norm": 0.1981019526720047, |
| "learning_rate": 0.00018324160000000002, |
| "loss": 0.9085, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.08416, |
| "grad_norm": 0.22739489376544952, |
| "learning_rate": 0.0001831776, |
| "loss": 0.8804, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.08448, |
| "grad_norm": 0.2044532150030136, |
| "learning_rate": 0.0001831136, |
| "loss": 0.8438, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.0848, |
| "grad_norm": 0.23086583614349365, |
| "learning_rate": 0.0001830496, |
| "loss": 0.8904, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.08512, |
| "grad_norm": 0.1737246811389923, |
| "learning_rate": 0.0001829856, |
| "loss": 0.8399, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.08544, |
| "grad_norm": 0.19789084792137146, |
| "learning_rate": 0.0001829216, |
| "loss": 0.8928, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.08576, |
| "grad_norm": 0.19274166226387024, |
| "learning_rate": 0.00018285760000000002, |
| "loss": 0.9071, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.08608, |
| "grad_norm": 0.18289533257484436, |
| "learning_rate": 0.0001827936, |
| "loss": 0.885, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.0864, |
| "grad_norm": 0.20274992287158966, |
| "learning_rate": 0.0001827296, |
| "loss": 0.8716, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.08672, |
| "grad_norm": 0.20618405938148499, |
| "learning_rate": 0.0001826656, |
| "loss": 0.9022, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.08704, |
| "grad_norm": 0.18017026782035828, |
| "learning_rate": 0.00018260160000000002, |
| "loss": 0.8997, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.08736, |
| "grad_norm": 0.17250943183898926, |
| "learning_rate": 0.00018253760000000002, |
| "loss": 0.8778, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.08768, |
| "grad_norm": 0.21039535105228424, |
| "learning_rate": 0.0001824736, |
| "loss": 0.8629, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 0.1946125328540802, |
| "learning_rate": 0.0001824096, |
| "loss": 0.9527, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.08832, |
| "grad_norm": 0.20565049350261688, |
| "learning_rate": 0.0001823456, |
| "loss": 0.8627, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.08864, |
| "grad_norm": 0.16778771579265594, |
| "learning_rate": 0.0001822816, |
| "loss": 0.879, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.08896, |
| "grad_norm": 0.1957644522190094, |
| "learning_rate": 0.00018221760000000002, |
| "loss": 0.9253, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.08928, |
| "grad_norm": 0.20745377242565155, |
| "learning_rate": 0.00018215360000000002, |
| "loss": 0.9006, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.0896, |
| "grad_norm": 0.19847019016742706, |
| "learning_rate": 0.0001820896, |
| "loss": 0.9176, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.08992, |
| "grad_norm": 0.22231200337409973, |
| "learning_rate": 0.0001820256, |
| "loss": 0.9174, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.09024, |
| "grad_norm": 0.21002036333084106, |
| "learning_rate": 0.0001819616, |
| "loss": 0.8773, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.09056, |
| "grad_norm": 0.18204717338085175, |
| "learning_rate": 0.0001818976, |
| "loss": 0.9038, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.09088, |
| "grad_norm": 0.21081459522247314, |
| "learning_rate": 0.0001818336, |
| "loss": 0.8409, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.0912, |
| "grad_norm": 0.1905379593372345, |
| "learning_rate": 0.00018176960000000002, |
| "loss": 0.9125, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.09152, |
| "grad_norm": 0.17761899530887604, |
| "learning_rate": 0.0001817056, |
| "loss": 0.8617, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.09184, |
| "grad_norm": 0.20881423354148865, |
| "learning_rate": 0.0001816416, |
| "loss": 0.8769, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.09216, |
| "grad_norm": 0.22868691384792328, |
| "learning_rate": 0.0001815776, |
| "loss": 0.8426, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.09248, |
| "grad_norm": 0.2537609040737152, |
| "learning_rate": 0.00018151360000000002, |
| "loss": 0.9347, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.0928, |
| "grad_norm": 0.2280977964401245, |
| "learning_rate": 0.00018144960000000002, |
| "loss": 0.89, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.09312, |
| "grad_norm": 0.22828595340251923, |
| "learning_rate": 0.0001813856, |
| "loss": 0.8818, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.09344, |
| "grad_norm": 0.19653092324733734, |
| "learning_rate": 0.0001813216, |
| "loss": 0.8944, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.09376, |
| "grad_norm": 0.2112797498703003, |
| "learning_rate": 0.0001812576, |
| "loss": 0.8945, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.09408, |
| "grad_norm": 0.21034376323223114, |
| "learning_rate": 0.0001811936, |
| "loss": 0.877, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.0944, |
| "grad_norm": 0.20544138550758362, |
| "learning_rate": 0.00018112960000000002, |
| "loss": 0.8955, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.09472, |
| "grad_norm": 0.18214848637580872, |
| "learning_rate": 0.00018106560000000002, |
| "loss": 0.8538, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.09504, |
| "grad_norm": 0.19273880124092102, |
| "learning_rate": 0.0001810016, |
| "loss": 0.9267, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.09536, |
| "grad_norm": 0.16388094425201416, |
| "learning_rate": 0.0001809376, |
| "loss": 0.8903, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.09568, |
| "grad_norm": 0.19152410328388214, |
| "learning_rate": 0.0001808736, |
| "loss": 0.8994, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.20129649341106415, |
| "learning_rate": 0.0001808096, |
| "loss": 0.9065, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.09632, |
| "grad_norm": 0.2275884598493576, |
| "learning_rate": 0.0001807456, |
| "loss": 0.8745, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.09664, |
| "grad_norm": 0.1939428150653839, |
| "learning_rate": 0.00018068160000000002, |
| "loss": 0.9147, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.09696, |
| "grad_norm": 0.21504884958267212, |
| "learning_rate": 0.0001806176, |
| "loss": 0.8575, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.09728, |
| "grad_norm": 0.21252253651618958, |
| "learning_rate": 0.0001805536, |
| "loss": 0.8554, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.0976, |
| "grad_norm": 0.213465616106987, |
| "learning_rate": 0.0001804896, |
| "loss": 0.9016, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.09792, |
| "grad_norm": 0.19815479218959808, |
| "learning_rate": 0.00018042560000000002, |
| "loss": 0.9675, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.09824, |
| "grad_norm": 0.19477008283138275, |
| "learning_rate": 0.00018036160000000002, |
| "loss": 0.9025, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.09856, |
| "grad_norm": 0.20203906297683716, |
| "learning_rate": 0.0001802976, |
| "loss": 0.8952, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.09888, |
| "grad_norm": 0.2099459171295166, |
| "learning_rate": 0.0001802336, |
| "loss": 0.9044, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.0992, |
| "grad_norm": 0.2077176868915558, |
| "learning_rate": 0.0001801696, |
| "loss": 0.8826, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.09952, |
| "grad_norm": 0.18981848657131195, |
| "learning_rate": 0.0001801056, |
| "loss": 0.8455, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.09984, |
| "grad_norm": 0.20933973789215088, |
| "learning_rate": 0.00018004160000000002, |
| "loss": 0.902, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.10016, |
| "grad_norm": 0.20591773092746735, |
| "learning_rate": 0.00017997760000000002, |
| "loss": 0.8667, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.10048, |
| "grad_norm": 0.258956640958786, |
| "learning_rate": 0.0001799136, |
| "loss": 0.8949, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.1008, |
| "grad_norm": 0.19157810509204865, |
| "learning_rate": 0.0001798496, |
| "loss": 0.8713, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.10112, |
| "grad_norm": 0.21302878856658936, |
| "learning_rate": 0.0001797856, |
| "loss": 0.8584, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.10144, |
| "grad_norm": 0.1915074735879898, |
| "learning_rate": 0.0001797216, |
| "loss": 0.9583, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.10176, |
| "grad_norm": 0.22054611146450043, |
| "learning_rate": 0.0001796576, |
| "loss": 0.9125, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.10208, |
| "grad_norm": 0.22295401990413666, |
| "learning_rate": 0.00017959360000000001, |
| "loss": 0.8893, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.1024, |
| "grad_norm": 0.19963820278644562, |
| "learning_rate": 0.0001795296, |
| "loss": 0.8944, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.10272, |
| "grad_norm": 0.17585329711437225, |
| "learning_rate": 0.0001794656, |
| "loss": 0.869, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.10304, |
| "grad_norm": 0.20457583665847778, |
| "learning_rate": 0.00017940160000000003, |
| "loss": 0.8894, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.10336, |
| "grad_norm": 0.2085409164428711, |
| "learning_rate": 0.00017933760000000002, |
| "loss": 0.9218, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.10368, |
| "grad_norm": 0.14747366309165955, |
| "learning_rate": 0.00017927360000000002, |
| "loss": 0.8441, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 0.24237246811389923, |
| "learning_rate": 0.0001792096, |
| "loss": 0.9292, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.10432, |
| "grad_norm": 0.2079431265592575, |
| "learning_rate": 0.0001791456, |
| "loss": 0.8364, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.10464, |
| "grad_norm": 0.2067815363407135, |
| "learning_rate": 0.0001790816, |
| "loss": 0.9069, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.10496, |
| "grad_norm": 0.18671968579292297, |
| "learning_rate": 0.0001790176, |
| "loss": 0.8582, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.10528, |
| "grad_norm": 0.18874432146549225, |
| "learning_rate": 0.00017895360000000002, |
| "loss": 0.8791, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.1056, |
| "grad_norm": 0.22563117742538452, |
| "learning_rate": 0.00017888960000000002, |
| "loss": 0.8395, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.10592, |
| "grad_norm": 0.19527731835842133, |
| "learning_rate": 0.0001788256, |
| "loss": 0.8675, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.10624, |
| "grad_norm": 0.21411758661270142, |
| "learning_rate": 0.0001787616, |
| "loss": 0.9045, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.10656, |
| "grad_norm": 0.2257653772830963, |
| "learning_rate": 0.0001786976, |
| "loss": 0.9009, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.10688, |
| "grad_norm": 0.18150146305561066, |
| "learning_rate": 0.0001786336, |
| "loss": 0.9246, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.1072, |
| "grad_norm": 0.1973322033882141, |
| "learning_rate": 0.0001785696, |
| "loss": 0.9191, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.10752, |
| "grad_norm": 0.19496308267116547, |
| "learning_rate": 0.00017850560000000001, |
| "loss": 0.8449, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.10784, |
| "grad_norm": 0.19810955226421356, |
| "learning_rate": 0.0001784416, |
| "loss": 0.8846, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.10816, |
| "grad_norm": 0.24701924622058868, |
| "learning_rate": 0.0001783776, |
| "loss": 0.8716, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.10848, |
| "grad_norm": 0.22664742171764374, |
| "learning_rate": 0.00017831360000000003, |
| "loss": 0.884, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.1088, |
| "grad_norm": 0.228456512093544, |
| "learning_rate": 0.00017824960000000002, |
| "loss": 0.8975, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.10912, |
| "grad_norm": 0.21849101781845093, |
| "learning_rate": 0.00017818560000000002, |
| "loss": 0.9255, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.10944, |
| "grad_norm": 0.2064104974269867, |
| "learning_rate": 0.0001781216, |
| "loss": 0.8829, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.10976, |
| "grad_norm": 0.22377945482730865, |
| "learning_rate": 0.0001780576, |
| "loss": 0.8715, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.11008, |
| "grad_norm": 0.202182337641716, |
| "learning_rate": 0.0001779936, |
| "loss": 0.9154, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.1104, |
| "grad_norm": 0.15783466398715973, |
| "learning_rate": 0.0001779296, |
| "loss": 0.9463, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.11072, |
| "grad_norm": 0.2259039580821991, |
| "learning_rate": 0.00017786560000000002, |
| "loss": 0.8754, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.11104, |
| "grad_norm": 0.23525789380073547, |
| "learning_rate": 0.00017780160000000002, |
| "loss": 0.8665, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.11136, |
| "grad_norm": 0.2006695419549942, |
| "learning_rate": 0.0001777376, |
| "loss": 0.8832, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.11168, |
| "grad_norm": 0.2209470272064209, |
| "learning_rate": 0.0001776736, |
| "loss": 0.8867, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.22054742276668549, |
| "learning_rate": 0.0001776096, |
| "loss": 0.8876, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.11232, |
| "grad_norm": 0.24601756036281586, |
| "learning_rate": 0.0001775456, |
| "loss": 0.8667, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.11264, |
| "grad_norm": 0.20692676305770874, |
| "learning_rate": 0.0001774816, |
| "loss": 0.8659, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.11296, |
| "grad_norm": 0.18839353322982788, |
| "learning_rate": 0.00017741760000000001, |
| "loss": 0.8503, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.11328, |
| "grad_norm": 0.2029074728488922, |
| "learning_rate": 0.0001773536, |
| "loss": 0.8643, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.1136, |
| "grad_norm": 0.22685612738132477, |
| "learning_rate": 0.0001772896, |
| "loss": 0.9198, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.11392, |
| "grad_norm": 0.22184133529663086, |
| "learning_rate": 0.00017722560000000003, |
| "loss": 0.8725, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.11424, |
| "grad_norm": 0.19977827370166779, |
| "learning_rate": 0.00017716160000000002, |
| "loss": 0.8217, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.11456, |
| "grad_norm": 0.22433121502399445, |
| "learning_rate": 0.00017709760000000002, |
| "loss": 0.9014, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.11488, |
| "grad_norm": 0.2040790170431137, |
| "learning_rate": 0.0001770336, |
| "loss": 0.9144, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.1152, |
| "grad_norm": 0.22500857710838318, |
| "learning_rate": 0.0001769696, |
| "loss": 0.8332, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.11552, |
| "grad_norm": 0.2294531762599945, |
| "learning_rate": 0.0001769056, |
| "loss": 0.9003, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.11584, |
| "grad_norm": 0.2060810774564743, |
| "learning_rate": 0.0001768416, |
| "loss": 0.9065, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.11616, |
| "grad_norm": 0.21327152848243713, |
| "learning_rate": 0.00017677760000000002, |
| "loss": 0.9172, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.11648, |
| "grad_norm": 0.2296830266714096, |
| "learning_rate": 0.00017671360000000002, |
| "loss": 0.9246, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.1168, |
| "grad_norm": 0.18748362362384796, |
| "learning_rate": 0.0001766496, |
| "loss": 0.8971, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.11712, |
| "grad_norm": 0.1924070417881012, |
| "learning_rate": 0.0001765856, |
| "loss": 0.8685, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.11744, |
| "grad_norm": 0.2428852766752243, |
| "learning_rate": 0.0001765216, |
| "loss": 0.9398, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.11776, |
| "grad_norm": 0.24050328135490417, |
| "learning_rate": 0.0001764576, |
| "loss": 0.8048, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.11808, |
| "grad_norm": 0.2360570877790451, |
| "learning_rate": 0.0001763936, |
| "loss": 0.8465, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.1184, |
| "grad_norm": 0.21176236867904663, |
| "learning_rate": 0.0001763296, |
| "loss": 0.8985, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.11872, |
| "grad_norm": 0.20678134262561798, |
| "learning_rate": 0.0001762656, |
| "loss": 0.8958, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.11904, |
| "grad_norm": 0.28033092617988586, |
| "learning_rate": 0.0001762016, |
| "loss": 0.8752, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.11936, |
| "grad_norm": 0.1989385336637497, |
| "learning_rate": 0.00017613760000000003, |
| "loss": 0.9008, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.11968, |
| "grad_norm": 0.22315728664398193, |
| "learning_rate": 0.00017607360000000002, |
| "loss": 0.8795, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.2524365186691284, |
| "learning_rate": 0.00017600960000000002, |
| "loss": 0.9486, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.12032, |
| "grad_norm": 0.25160396099090576, |
| "learning_rate": 0.0001759456, |
| "loss": 0.9099, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.12064, |
| "grad_norm": 0.22552479803562164, |
| "learning_rate": 0.0001758816, |
| "loss": 0.8352, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.12096, |
| "grad_norm": 0.17683327198028564, |
| "learning_rate": 0.0001758176, |
| "loss": 0.8771, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.12128, |
| "grad_norm": 0.21366801857948303, |
| "learning_rate": 0.0001757536, |
| "loss": 0.9409, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.1216, |
| "grad_norm": 0.19283446669578552, |
| "learning_rate": 0.00017568960000000002, |
| "loss": 0.9305, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.12192, |
| "grad_norm": 0.22334997355937958, |
| "learning_rate": 0.00017562560000000001, |
| "loss": 0.8974, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.12224, |
| "grad_norm": 0.252670019865036, |
| "learning_rate": 0.0001755616, |
| "loss": 0.8787, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.12256, |
| "grad_norm": 0.2769858241081238, |
| "learning_rate": 0.0001754976, |
| "loss": 0.898, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.12288, |
| "grad_norm": 0.1979377120733261, |
| "learning_rate": 0.0001754336, |
| "loss": 0.8994, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.1232, |
| "grad_norm": 0.2033649981021881, |
| "learning_rate": 0.0001753696, |
| "loss": 0.8465, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.12352, |
| "grad_norm": 0.19611379504203796, |
| "learning_rate": 0.0001753056, |
| "loss": 0.9224, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.12384, |
| "grad_norm": 0.33501213788986206, |
| "learning_rate": 0.0001752416, |
| "loss": 0.9225, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.12416, |
| "grad_norm": 0.17307236790657043, |
| "learning_rate": 0.0001751776, |
| "loss": 0.9069, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.12448, |
| "grad_norm": 0.21077322959899902, |
| "learning_rate": 0.0001751136, |
| "loss": 0.9084, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.1248, |
| "grad_norm": 0.2217060923576355, |
| "learning_rate": 0.00017504960000000003, |
| "loss": 0.8567, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.12512, |
| "grad_norm": 0.2257986068725586, |
| "learning_rate": 0.00017498560000000002, |
| "loss": 0.8508, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.12544, |
| "grad_norm": 0.2513684332370758, |
| "learning_rate": 0.00017492160000000002, |
| "loss": 0.8808, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.12576, |
| "grad_norm": 0.3284933865070343, |
| "learning_rate": 0.0001748576, |
| "loss": 0.8912, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.12608, |
| "grad_norm": 0.20665164291858673, |
| "learning_rate": 0.0001747936, |
| "loss": 0.8869, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.1264, |
| "grad_norm": 0.2463517189025879, |
| "learning_rate": 0.0001747296, |
| "loss": 0.9119, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.12672, |
| "grad_norm": 0.19471873342990875, |
| "learning_rate": 0.0001746656, |
| "loss": 0.898, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.12704, |
| "grad_norm": 0.2780425250530243, |
| "learning_rate": 0.00017460160000000002, |
| "loss": 0.9174, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.12736, |
| "grad_norm": 0.22313277423381805, |
| "learning_rate": 0.00017453760000000001, |
| "loss": 0.9054, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.12768, |
| "grad_norm": 0.22709155082702637, |
| "learning_rate": 0.0001744736, |
| "loss": 0.887, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.22096025943756104, |
| "learning_rate": 0.0001744096, |
| "loss": 0.8977, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.12832, |
| "grad_norm": 0.2423054575920105, |
| "learning_rate": 0.0001743456, |
| "loss": 0.9106, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.12864, |
| "grad_norm": 0.20658574998378754, |
| "learning_rate": 0.0001742816, |
| "loss": 0.8476, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.12896, |
| "grad_norm": 0.22077764570713043, |
| "learning_rate": 0.0001742176, |
| "loss": 0.911, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.12928, |
| "grad_norm": 0.22980265319347382, |
| "learning_rate": 0.0001741536, |
| "loss": 0.9451, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.1296, |
| "grad_norm": 0.25283125042915344, |
| "learning_rate": 0.0001740896, |
| "loss": 0.8582, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.12992, |
| "grad_norm": 0.22836875915527344, |
| "learning_rate": 0.0001740256, |
| "loss": 0.8644, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.13024, |
| "grad_norm": 0.20451593399047852, |
| "learning_rate": 0.00017396160000000003, |
| "loss": 0.9361, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.13056, |
| "grad_norm": 0.20466330647468567, |
| "learning_rate": 0.00017389760000000002, |
| "loss": 0.9134, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.13088, |
| "grad_norm": 0.20562607049942017, |
| "learning_rate": 0.00017383360000000002, |
| "loss": 0.9157, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.1312, |
| "grad_norm": 0.23010079562664032, |
| "learning_rate": 0.0001737696, |
| "loss": 0.8571, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.13152, |
| "grad_norm": 0.2761363387107849, |
| "learning_rate": 0.0001737056, |
| "loss": 0.863, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.13184, |
| "grad_norm": 0.19927144050598145, |
| "learning_rate": 0.0001736416, |
| "loss": 0.9056, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.13216, |
| "grad_norm": 0.21809734404087067, |
| "learning_rate": 0.0001735776, |
| "loss": 0.8547, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.13248, |
| "grad_norm": 0.2040037214756012, |
| "learning_rate": 0.00017351360000000002, |
| "loss": 0.8567, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.1328, |
| "grad_norm": 0.19414140284061432, |
| "learning_rate": 0.00017344960000000001, |
| "loss": 0.8773, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.13312, |
| "grad_norm": 0.17483866214752197, |
| "learning_rate": 0.0001733856, |
| "loss": 0.9026, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.13344, |
| "grad_norm": 0.2505808472633362, |
| "learning_rate": 0.0001733216, |
| "loss": 0.8348, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.13376, |
| "grad_norm": 0.2515566051006317, |
| "learning_rate": 0.0001732576, |
| "loss": 0.8657, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.13408, |
| "grad_norm": 0.2105536013841629, |
| "learning_rate": 0.0001731936, |
| "loss": 0.8864, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.1344, |
| "grad_norm": 0.22910176217556, |
| "learning_rate": 0.0001731296, |
| "loss": 0.8379, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.13472, |
| "grad_norm": 0.20737454295158386, |
| "learning_rate": 0.0001730656, |
| "loss": 0.8684, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.13504, |
| "grad_norm": 0.22466444969177246, |
| "learning_rate": 0.0001730016, |
| "loss": 0.9522, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.13536, |
| "grad_norm": 0.19258467853069305, |
| "learning_rate": 0.0001729376, |
| "loss": 0.8525, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.13568, |
| "grad_norm": 0.2092629224061966, |
| "learning_rate": 0.00017287360000000002, |
| "loss": 0.8658, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 0.20756912231445312, |
| "learning_rate": 0.00017280960000000002, |
| "loss": 0.9148, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.13632, |
| "grad_norm": 0.22604379057884216, |
| "learning_rate": 0.00017274560000000002, |
| "loss": 0.8489, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.13664, |
| "grad_norm": 0.2140427976846695, |
| "learning_rate": 0.0001726816, |
| "loss": 0.8702, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.13696, |
| "grad_norm": 0.22593297064304352, |
| "learning_rate": 0.0001726176, |
| "loss": 0.8572, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.13728, |
| "grad_norm": 0.2053360491991043, |
| "learning_rate": 0.0001725536, |
| "loss": 0.8283, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.1376, |
| "grad_norm": 0.2059011608362198, |
| "learning_rate": 0.0001724896, |
| "loss": 0.9218, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.13792, |
| "grad_norm": 0.19691585004329681, |
| "learning_rate": 0.00017242560000000002, |
| "loss": 0.8718, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.13824, |
| "grad_norm": 0.2076309472322464, |
| "learning_rate": 0.00017236480000000002, |
| "loss": 0.9297, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.13856, |
| "grad_norm": 0.26082372665405273, |
| "learning_rate": 0.00017230080000000002, |
| "loss": 0.8568, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.13888, |
| "grad_norm": 0.22894443571567535, |
| "learning_rate": 0.0001722368, |
| "loss": 0.858, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.1392, |
| "grad_norm": 0.2583048939704895, |
| "learning_rate": 0.0001721728, |
| "loss": 0.9089, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.13952, |
| "grad_norm": 0.23365485668182373, |
| "learning_rate": 0.0001721088, |
| "loss": 0.8283, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.13984, |
| "grad_norm": 0.23852278292179108, |
| "learning_rate": 0.0001720448, |
| "loss": 0.8573, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.14016, |
| "grad_norm": 0.22304783761501312, |
| "learning_rate": 0.00017198080000000002, |
| "loss": 0.8772, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.14048, |
| "grad_norm": 0.2686362862586975, |
| "learning_rate": 0.00017191680000000001, |
| "loss": 0.8857, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.1408, |
| "grad_norm": 0.17005324363708496, |
| "learning_rate": 0.0001718528, |
| "loss": 0.9015, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.14112, |
| "grad_norm": 0.22986558079719543, |
| "learning_rate": 0.0001717888, |
| "loss": 0.9192, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.14144, |
| "grad_norm": 0.21427962183952332, |
| "learning_rate": 0.00017172480000000003, |
| "loss": 0.8947, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.14176, |
| "grad_norm": 0.262226402759552, |
| "learning_rate": 0.0001716608, |
| "loss": 0.8878, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.14208, |
| "grad_norm": 0.23082557320594788, |
| "learning_rate": 0.0001715968, |
| "loss": 0.9263, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.1424, |
| "grad_norm": 0.2226615846157074, |
| "learning_rate": 0.0001715328, |
| "loss": 0.9526, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.14272, |
| "grad_norm": 0.2389681190252304, |
| "learning_rate": 0.0001714688, |
| "loss": 0.8784, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.14304, |
| "grad_norm": 0.20122146606445312, |
| "learning_rate": 0.0001714048, |
| "loss": 0.9487, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.14336, |
| "grad_norm": 0.24507276713848114, |
| "learning_rate": 0.00017134080000000002, |
| "loss": 0.8501, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.14368, |
| "grad_norm": 0.23927843570709229, |
| "learning_rate": 0.00017127680000000002, |
| "loss": 0.8898, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.22527576982975006, |
| "learning_rate": 0.00017121280000000002, |
| "loss": 0.8939, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.14432, |
| "grad_norm": 0.23542018234729767, |
| "learning_rate": 0.0001711488, |
| "loss": 0.9004, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.14464, |
| "grad_norm": 0.21746650338172913, |
| "learning_rate": 0.0001710848, |
| "loss": 0.8618, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.14496, |
| "grad_norm": 0.2594437003135681, |
| "learning_rate": 0.0001710208, |
| "loss": 0.9052, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.14528, |
| "grad_norm": 0.23847267031669617, |
| "learning_rate": 0.0001709568, |
| "loss": 0.8654, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.1456, |
| "grad_norm": 0.2352636456489563, |
| "learning_rate": 0.00017089280000000002, |
| "loss": 0.8685, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.14592, |
| "grad_norm": 0.21218867599964142, |
| "learning_rate": 0.0001708288, |
| "loss": 0.8865, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.14624, |
| "grad_norm": 0.22339680790901184, |
| "learning_rate": 0.0001707648, |
| "loss": 0.9414, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.14656, |
| "grad_norm": 0.2145155370235443, |
| "learning_rate": 0.0001707008, |
| "loss": 0.871, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.14688, |
| "grad_norm": 0.24632301926612854, |
| "learning_rate": 0.00017063680000000003, |
| "loss": 0.9003, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.1472, |
| "grad_norm": 0.21344535052776337, |
| "learning_rate": 0.0001705728, |
| "loss": 0.9132, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.14752, |
| "grad_norm": 0.2178122103214264, |
| "learning_rate": 0.0001705088, |
| "loss": 0.9213, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.14784, |
| "grad_norm": 0.23042111098766327, |
| "learning_rate": 0.0001704448, |
| "loss": 0.9325, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.14816, |
| "grad_norm": 0.246158629655838, |
| "learning_rate": 0.0001703808, |
| "loss": 0.87, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.14848, |
| "grad_norm": 0.22557534277439117, |
| "learning_rate": 0.0001703168, |
| "loss": 0.8192, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.1488, |
| "grad_norm": 0.20784518122673035, |
| "learning_rate": 0.00017025280000000002, |
| "loss": 0.8372, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.14912, |
| "grad_norm": 0.23057977855205536, |
| "learning_rate": 0.00017018880000000002, |
| "loss": 0.9297, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.14944, |
| "grad_norm": 0.2289903163909912, |
| "learning_rate": 0.00017012480000000001, |
| "loss": 0.9167, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.14976, |
| "grad_norm": 0.22998815774917603, |
| "learning_rate": 0.0001700608, |
| "loss": 0.889, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.15008, |
| "grad_norm": 0.22863976657390594, |
| "learning_rate": 0.0001699968, |
| "loss": 0.8884, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.1504, |
| "grad_norm": 0.24748341739177704, |
| "learning_rate": 0.0001699328, |
| "loss": 0.8961, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.15072, |
| "grad_norm": 0.21250346302986145, |
| "learning_rate": 0.0001698688, |
| "loss": 0.8683, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.15104, |
| "grad_norm": 0.239846333861351, |
| "learning_rate": 0.00016980480000000002, |
| "loss": 0.8927, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.15136, |
| "grad_norm": 0.2487175464630127, |
| "learning_rate": 0.0001697408, |
| "loss": 0.9144, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.15168, |
| "grad_norm": 0.23323270678520203, |
| "learning_rate": 0.0001696768, |
| "loss": 0.9251, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 0.19210824370384216, |
| "learning_rate": 0.0001696128, |
| "loss": 0.9244, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.15232, |
| "grad_norm": 0.23382435739040375, |
| "learning_rate": 0.00016954880000000003, |
| "loss": 0.9249, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.15264, |
| "grad_norm": 0.20494690537452698, |
| "learning_rate": 0.0001694848, |
| "loss": 0.878, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.15296, |
| "grad_norm": 0.23017622530460358, |
| "learning_rate": 0.0001694208, |
| "loss": 0.8656, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.15328, |
| "grad_norm": 0.26027923822402954, |
| "learning_rate": 0.0001693568, |
| "loss": 0.912, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.1536, |
| "grad_norm": 0.19583414494991302, |
| "learning_rate": 0.0001692928, |
| "loss": 0.8411, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.15392, |
| "grad_norm": 0.25373271107673645, |
| "learning_rate": 0.0001692288, |
| "loss": 0.8781, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.15424, |
| "grad_norm": 0.27190205454826355, |
| "learning_rate": 0.00016916480000000002, |
| "loss": 0.8691, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.15456, |
| "grad_norm": 0.22996129095554352, |
| "learning_rate": 0.00016910080000000002, |
| "loss": 0.8277, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.15488, |
| "grad_norm": 0.1947249174118042, |
| "learning_rate": 0.00016903680000000001, |
| "loss": 0.8873, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.1552, |
| "grad_norm": 0.18230539560317993, |
| "learning_rate": 0.0001689728, |
| "loss": 0.8315, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.15552, |
| "grad_norm": 0.25768032670021057, |
| "learning_rate": 0.0001689088, |
| "loss": 0.8645, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.15584, |
| "grad_norm": 0.2460031509399414, |
| "learning_rate": 0.0001688448, |
| "loss": 0.9031, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.15616, |
| "grad_norm": 0.22613097727298737, |
| "learning_rate": 0.0001687808, |
| "loss": 0.9065, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.15648, |
| "grad_norm": 0.2073383629322052, |
| "learning_rate": 0.00016871680000000002, |
| "loss": 0.8825, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.1568, |
| "grad_norm": 0.2087622731924057, |
| "learning_rate": 0.0001686528, |
| "loss": 0.9253, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.15712, |
| "grad_norm": 0.2113562375307083, |
| "learning_rate": 0.0001685888, |
| "loss": 0.8639, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.15744, |
| "grad_norm": 0.23061156272888184, |
| "learning_rate": 0.0001685248, |
| "loss": 0.8818, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.15776, |
| "grad_norm": 0.2453097254037857, |
| "learning_rate": 0.00016846080000000003, |
| "loss": 0.91, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.15808, |
| "grad_norm": 0.2568601071834564, |
| "learning_rate": 0.0001683968, |
| "loss": 0.9147, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.1584, |
| "grad_norm": 0.238372802734375, |
| "learning_rate": 0.0001683328, |
| "loss": 0.8514, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.15872, |
| "grad_norm": 0.2500544786453247, |
| "learning_rate": 0.0001682688, |
| "loss": 0.9219, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.15904, |
| "grad_norm": 0.22526203095912933, |
| "learning_rate": 0.0001682048, |
| "loss": 0.8553, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.15936, |
| "grad_norm": 0.2296661138534546, |
| "learning_rate": 0.0001681408, |
| "loss": 0.8867, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.15968, |
| "grad_norm": 0.19159358739852905, |
| "learning_rate": 0.00016807680000000002, |
| "loss": 0.8231, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.21099399030208588, |
| "learning_rate": 0.00016801280000000002, |
| "loss": 0.9614, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.16032, |
| "grad_norm": 0.19851434230804443, |
| "learning_rate": 0.00016794880000000001, |
| "loss": 0.8711, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.16064, |
| "grad_norm": 0.255908340215683, |
| "learning_rate": 0.0001678848, |
| "loss": 0.8584, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.16096, |
| "grad_norm": 0.17037171125411987, |
| "learning_rate": 0.0001678208, |
| "loss": 0.8858, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.16128, |
| "grad_norm": 0.18440371751785278, |
| "learning_rate": 0.0001677568, |
| "loss": 0.8785, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.1616, |
| "grad_norm": 0.22271201014518738, |
| "learning_rate": 0.0001676928, |
| "loss": 0.8777, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.16192, |
| "grad_norm": 0.23368695378303528, |
| "learning_rate": 0.00016762880000000002, |
| "loss": 0.9383, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.16224, |
| "grad_norm": 0.2024698108434677, |
| "learning_rate": 0.0001675648, |
| "loss": 0.8235, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.16256, |
| "grad_norm": 0.24644511938095093, |
| "learning_rate": 0.0001675008, |
| "loss": 0.9375, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.16288, |
| "grad_norm": 0.21530281007289886, |
| "learning_rate": 0.0001674368, |
| "loss": 0.8697, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.1632, |
| "grad_norm": 0.2107221782207489, |
| "learning_rate": 0.00016737280000000002, |
| "loss": 0.8798, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.16352, |
| "grad_norm": 0.18811015784740448, |
| "learning_rate": 0.0001673088, |
| "loss": 0.9518, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.16384, |
| "grad_norm": 0.20447804033756256, |
| "learning_rate": 0.0001672448, |
| "loss": 0.8528, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.16416, |
| "grad_norm": 0.22877538204193115, |
| "learning_rate": 0.0001671808, |
| "loss": 0.9376, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.16448, |
| "grad_norm": 0.24324432015419006, |
| "learning_rate": 0.0001671168, |
| "loss": 0.8818, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.1648, |
| "grad_norm": 0.20559096336364746, |
| "learning_rate": 0.0001670528, |
| "loss": 0.9382, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.16512, |
| "grad_norm": 0.23329490423202515, |
| "learning_rate": 0.00016698880000000002, |
| "loss": 0.9457, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.16544, |
| "grad_norm": 0.23040834069252014, |
| "learning_rate": 0.00016692480000000002, |
| "loss": 0.8943, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.16576, |
| "grad_norm": 0.21570099890232086, |
| "learning_rate": 0.0001668608, |
| "loss": 0.8714, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.16608, |
| "grad_norm": 0.20824502408504486, |
| "learning_rate": 0.0001667968, |
| "loss": 0.8851, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.1664, |
| "grad_norm": 0.19650331139564514, |
| "learning_rate": 0.0001667328, |
| "loss": 0.8649, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.16672, |
| "grad_norm": 0.22227755188941956, |
| "learning_rate": 0.0001666688, |
| "loss": 0.9556, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.16704, |
| "grad_norm": 0.21929942071437836, |
| "learning_rate": 0.0001666048, |
| "loss": 0.9107, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.16736, |
| "grad_norm": 0.21728375554084778, |
| "learning_rate": 0.00016654080000000002, |
| "loss": 0.9389, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.16768, |
| "grad_norm": 0.257927805185318, |
| "learning_rate": 0.0001664768, |
| "loss": 0.852, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 0.23964323103427887, |
| "learning_rate": 0.0001664128, |
| "loss": 0.8966, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.16832, |
| "grad_norm": 0.21869444847106934, |
| "learning_rate": 0.00016634880000000003, |
| "loss": 0.9496, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.16864, |
| "grad_norm": 0.2491443157196045, |
| "learning_rate": 0.00016628480000000002, |
| "loss": 0.8853, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.16896, |
| "grad_norm": 0.19421234726905823, |
| "learning_rate": 0.0001662208, |
| "loss": 0.9094, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.16928, |
| "grad_norm": 0.2546538710594177, |
| "learning_rate": 0.00016615680000000001, |
| "loss": 0.9055, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.1696, |
| "grad_norm": 0.21943865716457367, |
| "learning_rate": 0.0001660928, |
| "loss": 0.9036, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.16992, |
| "grad_norm": 0.26403695344924927, |
| "learning_rate": 0.0001660288, |
| "loss": 0.8961, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.17024, |
| "grad_norm": 0.2386874556541443, |
| "learning_rate": 0.0001659648, |
| "loss": 0.8756, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.17056, |
| "grad_norm": 0.2226932942867279, |
| "learning_rate": 0.00016590080000000002, |
| "loss": 0.847, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.17088, |
| "grad_norm": 0.19772516191005707, |
| "learning_rate": 0.00016583680000000002, |
| "loss": 0.8771, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.1712, |
| "grad_norm": 0.20000356435775757, |
| "learning_rate": 0.0001657728, |
| "loss": 0.922, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.17152, |
| "grad_norm": 0.24227920174598694, |
| "learning_rate": 0.0001657088, |
| "loss": 0.8792, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.17184, |
| "grad_norm": 0.2312862128019333, |
| "learning_rate": 0.0001656448, |
| "loss": 0.8606, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.17216, |
| "grad_norm": 0.229568749666214, |
| "learning_rate": 0.0001655808, |
| "loss": 0.8763, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.17248, |
| "grad_norm": 0.22286683320999146, |
| "learning_rate": 0.0001655168, |
| "loss": 0.9215, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.1728, |
| "grad_norm": 0.21545717120170593, |
| "learning_rate": 0.00016545280000000002, |
| "loss": 0.8683, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.17312, |
| "grad_norm": 0.2119383066892624, |
| "learning_rate": 0.0001653888, |
| "loss": 0.9104, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.17344, |
| "grad_norm": 0.25230464339256287, |
| "learning_rate": 0.0001653248, |
| "loss": 0.9178, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.17376, |
| "grad_norm": 0.20645944774150848, |
| "learning_rate": 0.00016526080000000003, |
| "loss": 0.8743, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.17408, |
| "grad_norm": 0.24283145368099213, |
| "learning_rate": 0.00016519680000000002, |
| "loss": 0.917, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.1744, |
| "grad_norm": 0.24862386286258698, |
| "learning_rate": 0.0001651328, |
| "loss": 0.8957, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.17472, |
| "grad_norm": 0.16515551507472992, |
| "learning_rate": 0.00016506880000000001, |
| "loss": 0.9213, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.17504, |
| "grad_norm": 0.21619679033756256, |
| "learning_rate": 0.0001650048, |
| "loss": 0.8658, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.17536, |
| "grad_norm": 0.19346758723258972, |
| "learning_rate": 0.0001649408, |
| "loss": 0.8456, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.17568, |
| "grad_norm": 0.21540650725364685, |
| "learning_rate": 0.0001648768, |
| "loss": 0.9633, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.21067962050437927, |
| "learning_rate": 0.00016481280000000002, |
| "loss": 0.8907, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.17632, |
| "grad_norm": 0.2155253291130066, |
| "learning_rate": 0.00016474880000000002, |
| "loss": 0.8985, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.17664, |
| "grad_norm": 0.27138301730155945, |
| "learning_rate": 0.0001646848, |
| "loss": 0.8525, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.17696, |
| "grad_norm": 0.20680946111679077, |
| "learning_rate": 0.0001646208, |
| "loss": 0.9124, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.17728, |
| "grad_norm": 0.2446873039007187, |
| "learning_rate": 0.0001645568, |
| "loss": 0.8922, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.1776, |
| "grad_norm": 0.19545750319957733, |
| "learning_rate": 0.0001644928, |
| "loss": 0.8919, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.17792, |
| "grad_norm": 0.20573855936527252, |
| "learning_rate": 0.0001644288, |
| "loss": 0.8495, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.17824, |
| "grad_norm": 0.1951497346162796, |
| "learning_rate": 0.00016436480000000002, |
| "loss": 0.8981, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.17856, |
| "grad_norm": 0.25471144914627075, |
| "learning_rate": 0.0001643008, |
| "loss": 0.9582, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.17888, |
| "grad_norm": 0.22080758213996887, |
| "learning_rate": 0.0001642368, |
| "loss": 0.9398, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.1792, |
| "grad_norm": 0.23357786238193512, |
| "learning_rate": 0.00016417280000000003, |
| "loss": 0.8585, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.17952, |
| "grad_norm": 0.3059156537055969, |
| "learning_rate": 0.00016410880000000002, |
| "loss": 0.9087, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.17984, |
| "grad_norm": 0.21788957715034485, |
| "learning_rate": 0.0001640448, |
| "loss": 0.9112, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.18016, |
| "grad_norm": 0.2401525229215622, |
| "learning_rate": 0.00016398080000000001, |
| "loss": 0.9099, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.18048, |
| "grad_norm": 0.22227467596530914, |
| "learning_rate": 0.0001639168, |
| "loss": 0.8272, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.1808, |
| "grad_norm": 0.21627697348594666, |
| "learning_rate": 0.0001638528, |
| "loss": 0.8753, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.18112, |
| "grad_norm": 0.21134355664253235, |
| "learning_rate": 0.0001637888, |
| "loss": 0.9182, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.18144, |
| "grad_norm": 0.22719112038612366, |
| "learning_rate": 0.00016372480000000002, |
| "loss": 0.8454, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.18176, |
| "grad_norm": 0.22609511017799377, |
| "learning_rate": 0.00016366080000000002, |
| "loss": 0.888, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.18208, |
| "grad_norm": 0.19711975753307343, |
| "learning_rate": 0.0001635968, |
| "loss": 0.942, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.1824, |
| "grad_norm": 0.2588805854320526, |
| "learning_rate": 0.0001635328, |
| "loss": 0.9463, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.18272, |
| "grad_norm": 0.25787708163261414, |
| "learning_rate": 0.0001634688, |
| "loss": 0.9114, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.18304, |
| "grad_norm": 0.2743508219718933, |
| "learning_rate": 0.0001634048, |
| "loss": 0.873, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.18336, |
| "grad_norm": 0.23172695934772491, |
| "learning_rate": 0.0001633408, |
| "loss": 0.8495, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.18368, |
| "grad_norm": 0.18422289192676544, |
| "learning_rate": 0.00016327680000000002, |
| "loss": 0.8821, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 0.2328750044107437, |
| "learning_rate": 0.0001632128, |
| "loss": 0.8885, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.18432, |
| "grad_norm": 0.26465412974357605, |
| "learning_rate": 0.0001631488, |
| "loss": 0.8943, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.18464, |
| "grad_norm": 0.27734020352363586, |
| "learning_rate": 0.00016308480000000003, |
| "loss": 0.855, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.18496, |
| "grad_norm": 0.24460507929325104, |
| "learning_rate": 0.00016302080000000002, |
| "loss": 0.8996, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.18528, |
| "grad_norm": 0.2152118980884552, |
| "learning_rate": 0.0001629568, |
| "loss": 0.839, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.1856, |
| "grad_norm": 0.22813241183757782, |
| "learning_rate": 0.00016289280000000001, |
| "loss": 0.9257, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.18592, |
| "grad_norm": 0.2076783925294876, |
| "learning_rate": 0.0001628288, |
| "loss": 0.88, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.18624, |
| "grad_norm": 0.23828792572021484, |
| "learning_rate": 0.0001627648, |
| "loss": 0.9087, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.18656, |
| "grad_norm": 0.24277402460575104, |
| "learning_rate": 0.0001627008, |
| "loss": 0.9419, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.18688, |
| "grad_norm": 0.24770581722259521, |
| "learning_rate": 0.00016263680000000002, |
| "loss": 0.9184, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.1872, |
| "grad_norm": 0.23547635972499847, |
| "learning_rate": 0.00016257280000000002, |
| "loss": 0.9069, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.18752, |
| "grad_norm": 0.17838741838932037, |
| "learning_rate": 0.0001625088, |
| "loss": 0.9483, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.18784, |
| "grad_norm": 0.23091432452201843, |
| "learning_rate": 0.0001624448, |
| "loss": 0.9225, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.18816, |
| "grad_norm": 0.2132597118616104, |
| "learning_rate": 0.0001623808, |
| "loss": 0.8979, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.18848, |
| "grad_norm": 0.2296367734670639, |
| "learning_rate": 0.0001623168, |
| "loss": 0.8762, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.1888, |
| "grad_norm": 0.20997250080108643, |
| "learning_rate": 0.0001622528, |
| "loss": 0.9156, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.18912, |
| "grad_norm": 0.2033025026321411, |
| "learning_rate": 0.00016218880000000001, |
| "loss": 0.8847, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.18944, |
| "grad_norm": 0.21794314682483673, |
| "learning_rate": 0.0001621248, |
| "loss": 0.8564, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.18976, |
| "grad_norm": 0.23999591171741486, |
| "learning_rate": 0.0001620608, |
| "loss": 0.8581, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.19008, |
| "grad_norm": 0.2366144210100174, |
| "learning_rate": 0.00016199680000000003, |
| "loss": 0.8745, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.1904, |
| "grad_norm": 0.2415480762720108, |
| "learning_rate": 0.00016193280000000002, |
| "loss": 0.9004, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.19072, |
| "grad_norm": 0.22656038403511047, |
| "learning_rate": 0.0001618688, |
| "loss": 0.8871, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.19104, |
| "grad_norm": 0.2326974719762802, |
| "learning_rate": 0.0001618048, |
| "loss": 0.954, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.19136, |
| "grad_norm": 0.212848499417305, |
| "learning_rate": 0.0001617408, |
| "loss": 0.9154, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.19168, |
| "grad_norm": 0.16706988215446472, |
| "learning_rate": 0.0001616768, |
| "loss": 0.9052, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.2651592791080475, |
| "learning_rate": 0.0001616128, |
| "loss": 0.9448, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.19232, |
| "grad_norm": 0.24427416920661926, |
| "learning_rate": 0.00016154880000000002, |
| "loss": 0.8794, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.19264, |
| "grad_norm": 0.19025467336177826, |
| "learning_rate": 0.00016148480000000002, |
| "loss": 0.8535, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.19296, |
| "grad_norm": 0.21214129030704498, |
| "learning_rate": 0.0001614208, |
| "loss": 0.8756, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.19328, |
| "grad_norm": 0.2451871931552887, |
| "learning_rate": 0.0001613568, |
| "loss": 0.8858, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.1936, |
| "grad_norm": 0.23217494785785675, |
| "learning_rate": 0.0001612928, |
| "loss": 0.9066, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.19392, |
| "grad_norm": 0.2479615956544876, |
| "learning_rate": 0.0001612288, |
| "loss": 0.8477, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.19424, |
| "grad_norm": 0.20965996384620667, |
| "learning_rate": 0.0001611648, |
| "loss": 0.8573, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.19456, |
| "grad_norm": 0.19635817408561707, |
| "learning_rate": 0.00016110080000000001, |
| "loss": 0.9182, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.19488, |
| "grad_norm": 0.2266317903995514, |
| "learning_rate": 0.0001610368, |
| "loss": 0.9243, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.1952, |
| "grad_norm": 0.24232080578804016, |
| "learning_rate": 0.0001609728, |
| "loss": 0.8944, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.19552, |
| "grad_norm": 0.18726186454296112, |
| "learning_rate": 0.00016090880000000003, |
| "loss": 0.9084, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.19584, |
| "grad_norm": 0.25809457898139954, |
| "learning_rate": 0.00016084480000000002, |
| "loss": 0.8629, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.19616, |
| "grad_norm": 0.24405358731746674, |
| "learning_rate": 0.0001607808, |
| "loss": 0.9071, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.19648, |
| "grad_norm": 0.21723495423793793, |
| "learning_rate": 0.0001607168, |
| "loss": 0.8814, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.1968, |
| "grad_norm": 0.23140837252140045, |
| "learning_rate": 0.0001606528, |
| "loss": 0.8499, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.19712, |
| "grad_norm": 0.22470901906490326, |
| "learning_rate": 0.0001605888, |
| "loss": 0.9249, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.19744, |
| "grad_norm": 0.19264104962348938, |
| "learning_rate": 0.0001605248, |
| "loss": 0.9057, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.19776, |
| "grad_norm": 0.23376864194869995, |
| "learning_rate": 0.00016046080000000002, |
| "loss": 0.9535, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.19808, |
| "grad_norm": 0.2225295752286911, |
| "learning_rate": 0.00016039680000000002, |
| "loss": 0.865, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.1984, |
| "grad_norm": 0.23474235832691193, |
| "learning_rate": 0.0001603328, |
| "loss": 0.9137, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.19872, |
| "grad_norm": 0.29955846071243286, |
| "learning_rate": 0.0001602688, |
| "loss": 0.8618, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.19904, |
| "grad_norm": 0.25170376896858215, |
| "learning_rate": 0.0001602048, |
| "loss": 0.9341, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.19936, |
| "grad_norm": 0.23932316899299622, |
| "learning_rate": 0.0001601408, |
| "loss": 0.876, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.19968, |
| "grad_norm": 0.24285189807415009, |
| "learning_rate": 0.0001600768, |
| "loss": 0.8858, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.23304852843284607, |
| "learning_rate": 0.00016001280000000001, |
| "loss": 0.8659, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.20032, |
| "grad_norm": 0.21106384694576263, |
| "learning_rate": 0.0001599488, |
| "loss": 0.8626, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.20064, |
| "grad_norm": 0.20884625613689423, |
| "learning_rate": 0.0001598848, |
| "loss": 0.8872, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.20096, |
| "grad_norm": 0.20588114857673645, |
| "learning_rate": 0.00015982080000000003, |
| "loss": 0.8462, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.20128, |
| "grad_norm": 0.2657853066921234, |
| "learning_rate": 0.00015975680000000002, |
| "loss": 0.9246, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.2016, |
| "grad_norm": 0.22846530377864838, |
| "learning_rate": 0.0001596928, |
| "loss": 0.8847, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.20192, |
| "grad_norm": 0.20565031468868256, |
| "learning_rate": 0.0001596288, |
| "loss": 0.8966, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.20224, |
| "grad_norm": 0.19185014069080353, |
| "learning_rate": 0.0001595648, |
| "loss": 0.9018, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.20256, |
| "grad_norm": 0.23399049043655396, |
| "learning_rate": 0.0001595008, |
| "loss": 0.9258, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.20288, |
| "grad_norm": 0.2446955144405365, |
| "learning_rate": 0.0001594368, |
| "loss": 0.8574, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.2032, |
| "grad_norm": 0.2344285249710083, |
| "learning_rate": 0.00015937280000000002, |
| "loss": 0.9078, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.20352, |
| "grad_norm": 0.2038036733865738, |
| "learning_rate": 0.00015930880000000002, |
| "loss": 0.9086, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.20384, |
| "grad_norm": 0.23228555917739868, |
| "learning_rate": 0.0001592448, |
| "loss": 0.931, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.20416, |
| "grad_norm": 0.2811441719532013, |
| "learning_rate": 0.0001591808, |
| "loss": 0.8438, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.20448, |
| "grad_norm": 0.2014266848564148, |
| "learning_rate": 0.0001591168, |
| "loss": 0.9311, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.2048, |
| "grad_norm": 0.23992010951042175, |
| "learning_rate": 0.0001590528, |
| "loss": 0.8965, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.20512, |
| "grad_norm": 0.25870153307914734, |
| "learning_rate": 0.0001589888, |
| "loss": 0.8959, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.20544, |
| "grad_norm": 0.24375873804092407, |
| "learning_rate": 0.00015892480000000001, |
| "loss": 0.8786, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.20576, |
| "grad_norm": 0.20621752738952637, |
| "learning_rate": 0.0001588608, |
| "loss": 0.8796, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.20608, |
| "grad_norm": 0.23437882959842682, |
| "learning_rate": 0.0001587968, |
| "loss": 0.8592, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.2064, |
| "grad_norm": 0.23581136763095856, |
| "learning_rate": 0.00015873280000000003, |
| "loss": 0.8651, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.20672, |
| "grad_norm": 0.24483484029769897, |
| "learning_rate": 0.00015866880000000002, |
| "loss": 0.9199, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.20704, |
| "grad_norm": 0.3012985289096832, |
| "learning_rate": 0.0001586048, |
| "loss": 0.859, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.20736, |
| "grad_norm": 0.26789209246635437, |
| "learning_rate": 0.0001585408, |
| "loss": 0.8816, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.20768, |
| "grad_norm": 0.21916130185127258, |
| "learning_rate": 0.0001584768, |
| "loss": 0.9345, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.8556731343269348, |
| "learning_rate": 0.0001584128, |
| "loss": 0.9199, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.20832, |
| "grad_norm": 0.22015364468097687, |
| "learning_rate": 0.0001583488, |
| "loss": 0.8742, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.20864, |
| "grad_norm": 0.2598000168800354, |
| "learning_rate": 0.00015828480000000002, |
| "loss": 0.8665, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.20896, |
| "grad_norm": 0.22221586108207703, |
| "learning_rate": 0.00015822080000000001, |
| "loss": 0.892, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.20928, |
| "grad_norm": 0.2682360112667084, |
| "learning_rate": 0.0001581568, |
| "loss": 0.91, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.2096, |
| "grad_norm": 0.24058601260185242, |
| "learning_rate": 0.0001580928, |
| "loss": 0.8826, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.20992, |
| "grad_norm": 0.25506773591041565, |
| "learning_rate": 0.0001580288, |
| "loss": 0.8979, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.21024, |
| "grad_norm": 0.2581631541252136, |
| "learning_rate": 0.0001579648, |
| "loss": 0.9204, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.21056, |
| "grad_norm": 0.2511695623397827, |
| "learning_rate": 0.0001579008, |
| "loss": 0.8796, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.21088, |
| "grad_norm": 0.20950326323509216, |
| "learning_rate": 0.0001578368, |
| "loss": 0.8267, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.2112, |
| "grad_norm": 0.2644106149673462, |
| "learning_rate": 0.0001577728, |
| "loss": 0.9124, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.21152, |
| "grad_norm": 0.1935633271932602, |
| "learning_rate": 0.0001577088, |
| "loss": 0.8468, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.21184, |
| "grad_norm": 0.2543448507785797, |
| "learning_rate": 0.00015764480000000003, |
| "loss": 0.8965, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.21216, |
| "grad_norm": 0.27806851267814636, |
| "learning_rate": 0.00015758080000000002, |
| "loss": 0.894, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.21248, |
| "grad_norm": 0.18095877766609192, |
| "learning_rate": 0.0001575168, |
| "loss": 0.8876, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.2128, |
| "grad_norm": 0.21904884278774261, |
| "learning_rate": 0.0001574528, |
| "loss": 0.8835, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.21312, |
| "grad_norm": 0.25367972254753113, |
| "learning_rate": 0.0001573888, |
| "loss": 0.8688, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.21344, |
| "grad_norm": 0.261203408241272, |
| "learning_rate": 0.0001573248, |
| "loss": 0.9173, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.21376, |
| "grad_norm": 0.25779855251312256, |
| "learning_rate": 0.0001572608, |
| "loss": 0.9127, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.21408, |
| "grad_norm": 0.20082098245620728, |
| "learning_rate": 0.00015719680000000002, |
| "loss": 0.9433, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.2144, |
| "grad_norm": 0.22630241513252258, |
| "learning_rate": 0.00015713280000000001, |
| "loss": 0.9027, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.21472, |
| "grad_norm": 0.2328576296567917, |
| "learning_rate": 0.0001570688, |
| "loss": 0.8736, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.21504, |
| "grad_norm": 0.24743099510669708, |
| "learning_rate": 0.00015700480000000003, |
| "loss": 0.9568, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.21536, |
| "grad_norm": 0.23386693000793457, |
| "learning_rate": 0.0001569408, |
| "loss": 0.9131, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.21568, |
| "grad_norm": 0.2177802473306656, |
| "learning_rate": 0.0001568768, |
| "loss": 0.8948, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 0.19793163239955902, |
| "learning_rate": 0.00015681280000000002, |
| "loss": 0.9037, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.21632, |
| "grad_norm": 0.6092952489852905, |
| "learning_rate": 0.0001567488, |
| "loss": 0.8912, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.21664, |
| "grad_norm": 0.21942676603794098, |
| "learning_rate": 0.0001566848, |
| "loss": 0.8476, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.21696, |
| "grad_norm": 0.2475002408027649, |
| "learning_rate": 0.000156624, |
| "loss": 0.8796, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.21728, |
| "grad_norm": 0.25338417291641235, |
| "learning_rate": 0.00015656, |
| "loss": 0.9155, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.2176, |
| "grad_norm": 0.22608576714992523, |
| "learning_rate": 0.000156496, |
| "loss": 0.8685, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.21792, |
| "grad_norm": 0.20519301295280457, |
| "learning_rate": 0.000156432, |
| "loss": 0.8913, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.21824, |
| "grad_norm": 0.20905616879463196, |
| "learning_rate": 0.000156368, |
| "loss": 0.9382, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.21856, |
| "grad_norm": 0.21286025643348694, |
| "learning_rate": 0.000156304, |
| "loss": 0.8659, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.21888, |
| "grad_norm": 0.23173551261425018, |
| "learning_rate": 0.00015624, |
| "loss": 0.845, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.2192, |
| "grad_norm": 0.2360743284225464, |
| "learning_rate": 0.000156176, |
| "loss": 0.9332, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.21952, |
| "grad_norm": 0.23367565870285034, |
| "learning_rate": 0.00015611200000000003, |
| "loss": 0.8805, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.21984, |
| "grad_norm": 0.2483336627483368, |
| "learning_rate": 0.00015604800000000002, |
| "loss": 0.9189, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.22016, |
| "grad_norm": 0.23518161475658417, |
| "learning_rate": 0.000155984, |
| "loss": 0.8927, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.22048, |
| "grad_norm": 0.2596130073070526, |
| "learning_rate": 0.00015592, |
| "loss": 0.8879, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.2208, |
| "grad_norm": 0.20567701756954193, |
| "learning_rate": 0.000155856, |
| "loss": 0.8677, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.22112, |
| "grad_norm": 0.21333087980747223, |
| "learning_rate": 0.000155792, |
| "loss": 0.8599, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.22144, |
| "grad_norm": 0.21102353930473328, |
| "learning_rate": 0.000155728, |
| "loss": 0.955, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.22176, |
| "grad_norm": 0.23368091881275177, |
| "learning_rate": 0.00015566400000000002, |
| "loss": 0.9107, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.22208, |
| "grad_norm": 0.2646392285823822, |
| "learning_rate": 0.00015560000000000001, |
| "loss": 0.8605, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.2224, |
| "grad_norm": 0.2340191900730133, |
| "learning_rate": 0.000155536, |
| "loss": 0.8651, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.22272, |
| "grad_norm": 0.22169966995716095, |
| "learning_rate": 0.000155472, |
| "loss": 0.8232, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.22304, |
| "grad_norm": 0.2382878214120865, |
| "learning_rate": 0.000155408, |
| "loss": 0.8581, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.22336, |
| "grad_norm": 0.22548457980155945, |
| "learning_rate": 0.000155344, |
| "loss": 0.9245, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.22368, |
| "grad_norm": 0.2386041283607483, |
| "learning_rate": 0.00015528, |
| "loss": 0.9159, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.2749132812023163, |
| "learning_rate": 0.000155216, |
| "loss": 0.9209, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.22432, |
| "grad_norm": 0.21053732931613922, |
| "learning_rate": 0.000155152, |
| "loss": 0.8694, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.22464, |
| "grad_norm": 0.21479672193527222, |
| "learning_rate": 0.000155088, |
| "loss": 0.8775, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.22496, |
| "grad_norm": 0.21168935298919678, |
| "learning_rate": 0.00015502400000000003, |
| "loss": 0.886, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.22528, |
| "grad_norm": 0.23790377378463745, |
| "learning_rate": 0.00015496000000000002, |
| "loss": 0.9171, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.2256, |
| "grad_norm": 0.2546534240245819, |
| "learning_rate": 0.000154896, |
| "loss": 0.9024, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.22592, |
| "grad_norm": 0.21047984063625336, |
| "learning_rate": 0.000154832, |
| "loss": 0.9181, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.22624, |
| "grad_norm": 0.18703001737594604, |
| "learning_rate": 0.000154768, |
| "loss": 0.9229, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.22656, |
| "grad_norm": 0.2910281717777252, |
| "learning_rate": 0.000154704, |
| "loss": 0.8769, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.22688, |
| "grad_norm": 0.253282368183136, |
| "learning_rate": 0.00015464, |
| "loss": 0.8899, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.2272, |
| "grad_norm": 0.23244041204452515, |
| "learning_rate": 0.00015457600000000002, |
| "loss": 0.8847, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.22752, |
| "grad_norm": 0.2044428586959839, |
| "learning_rate": 0.00015451200000000001, |
| "loss": 0.8558, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.22784, |
| "grad_norm": 0.2259109914302826, |
| "learning_rate": 0.000154448, |
| "loss": 0.9359, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.22816, |
| "grad_norm": 0.19026106595993042, |
| "learning_rate": 0.000154384, |
| "loss": 0.888, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.22848, |
| "grad_norm": 0.26393407583236694, |
| "learning_rate": 0.00015432, |
| "loss": 0.9065, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.2288, |
| "grad_norm": 0.23802846670150757, |
| "learning_rate": 0.000154256, |
| "loss": 0.859, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.22912, |
| "grad_norm": 0.20962855219841003, |
| "learning_rate": 0.000154192, |
| "loss": 0.9316, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.22944, |
| "grad_norm": 0.24111364781856537, |
| "learning_rate": 0.000154128, |
| "loss": 0.8761, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.22976, |
| "grad_norm": 0.24475687742233276, |
| "learning_rate": 0.000154064, |
| "loss": 0.8639, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.23008, |
| "grad_norm": 0.2179078459739685, |
| "learning_rate": 0.000154, |
| "loss": 0.9153, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.2304, |
| "grad_norm": 0.21389590203762054, |
| "learning_rate": 0.00015393600000000003, |
| "loss": 0.8965, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.23072, |
| "grad_norm": 0.25422388315200806, |
| "learning_rate": 0.00015387200000000002, |
| "loss": 0.9019, |
| "step": 14420 |
| }, |
| { |
| "epoch": 0.23104, |
| "grad_norm": 0.25789642333984375, |
| "learning_rate": 0.000153808, |
| "loss": 0.8862, |
| "step": 14440 |
| }, |
| { |
| "epoch": 0.23136, |
| "grad_norm": 0.24445413053035736, |
| "learning_rate": 0.000153744, |
| "loss": 0.8686, |
| "step": 14460 |
| }, |
| { |
| "epoch": 0.23168, |
| "grad_norm": 0.2562089264392853, |
| "learning_rate": 0.00015368, |
| "loss": 0.8724, |
| "step": 14480 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 0.22422178089618683, |
| "learning_rate": 0.000153616, |
| "loss": 0.8126, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.23232, |
| "grad_norm": 0.2669355571269989, |
| "learning_rate": 0.000153552, |
| "loss": 0.9321, |
| "step": 14520 |
| }, |
| { |
| "epoch": 0.23264, |
| "grad_norm": 0.22260543704032898, |
| "learning_rate": 0.00015348800000000002, |
| "loss": 0.9001, |
| "step": 14540 |
| }, |
| { |
| "epoch": 0.23296, |
| "grad_norm": 0.2247844934463501, |
| "learning_rate": 0.00015342400000000001, |
| "loss": 0.8703, |
| "step": 14560 |
| }, |
| { |
| "epoch": 0.23328, |
| "grad_norm": 0.21349264681339264, |
| "learning_rate": 0.00015336, |
| "loss": 0.9012, |
| "step": 14580 |
| }, |
| { |
| "epoch": 0.2336, |
| "grad_norm": 0.20764821767807007, |
| "learning_rate": 0.000153296, |
| "loss": 0.8589, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.23392, |
| "grad_norm": 0.2439945936203003, |
| "learning_rate": 0.000153232, |
| "loss": 0.8685, |
| "step": 14620 |
| }, |
| { |
| "epoch": 0.23424, |
| "grad_norm": 0.189644455909729, |
| "learning_rate": 0.000153168, |
| "loss": 0.8888, |
| "step": 14640 |
| }, |
| { |
| "epoch": 0.23456, |
| "grad_norm": 0.2418312132358551, |
| "learning_rate": 0.000153104, |
| "loss": 0.8894, |
| "step": 14660 |
| }, |
| { |
| "epoch": 0.23488, |
| "grad_norm": 0.2261509746313095, |
| "learning_rate": 0.00015304, |
| "loss": 0.8834, |
| "step": 14680 |
| }, |
| { |
| "epoch": 0.2352, |
| "grad_norm": 0.20159967243671417, |
| "learning_rate": 0.000152976, |
| "loss": 0.8805, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.23552, |
| "grad_norm": 0.20319266617298126, |
| "learning_rate": 0.000152912, |
| "loss": 0.9304, |
| "step": 14720 |
| }, |
| { |
| "epoch": 0.23584, |
| "grad_norm": 0.26556146144866943, |
| "learning_rate": 0.00015284800000000002, |
| "loss": 0.8703, |
| "step": 14740 |
| }, |
| { |
| "epoch": 0.23616, |
| "grad_norm": 0.2388124316930771, |
| "learning_rate": 0.00015278400000000002, |
| "loss": 0.9027, |
| "step": 14760 |
| }, |
| { |
| "epoch": 0.23648, |
| "grad_norm": 0.2560880184173584, |
| "learning_rate": 0.00015272, |
| "loss": 0.9153, |
| "step": 14780 |
| }, |
| { |
| "epoch": 0.2368, |
| "grad_norm": 0.2266043722629547, |
| "learning_rate": 0.000152656, |
| "loss": 0.915, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.23712, |
| "grad_norm": 0.21880818903446198, |
| "learning_rate": 0.000152592, |
| "loss": 0.8509, |
| "step": 14820 |
| }, |
| { |
| "epoch": 0.23744, |
| "grad_norm": 0.2733529806137085, |
| "learning_rate": 0.000152528, |
| "loss": 0.8412, |
| "step": 14840 |
| }, |
| { |
| "epoch": 0.23776, |
| "grad_norm": 0.2371928095817566, |
| "learning_rate": 0.000152464, |
| "loss": 0.9024, |
| "step": 14860 |
| }, |
| { |
| "epoch": 0.23808, |
| "grad_norm": 0.21131671965122223, |
| "learning_rate": 0.00015240000000000002, |
| "loss": 0.9014, |
| "step": 14880 |
| }, |
| { |
| "epoch": 0.2384, |
| "grad_norm": 0.22599981725215912, |
| "learning_rate": 0.000152336, |
| "loss": 0.8418, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.23872, |
| "grad_norm": 0.210512176156044, |
| "learning_rate": 0.000152272, |
| "loss": 0.8215, |
| "step": 14920 |
| }, |
| { |
| "epoch": 0.23904, |
| "grad_norm": 0.24387352168560028, |
| "learning_rate": 0.00015220800000000003, |
| "loss": 0.9528, |
| "step": 14940 |
| }, |
| { |
| "epoch": 0.23936, |
| "grad_norm": 0.23596692085266113, |
| "learning_rate": 0.000152144, |
| "loss": 0.9321, |
| "step": 14960 |
| }, |
| { |
| "epoch": 0.23968, |
| "grad_norm": 0.2662867307662964, |
| "learning_rate": 0.00015208, |
| "loss": 0.8687, |
| "step": 14980 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.27276721596717834, |
| "learning_rate": 0.00015201600000000002, |
| "loss": 0.8885, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.24032, |
| "grad_norm": 0.2904922366142273, |
| "learning_rate": 0.000151952, |
| "loss": 0.905, |
| "step": 15020 |
| }, |
| { |
| "epoch": 0.24064, |
| "grad_norm": 0.22744856774806976, |
| "learning_rate": 0.000151888, |
| "loss": 0.875, |
| "step": 15040 |
| }, |
| { |
| "epoch": 0.24096, |
| "grad_norm": 0.21145053207874298, |
| "learning_rate": 0.000151824, |
| "loss": 0.9173, |
| "step": 15060 |
| }, |
| { |
| "epoch": 0.24128, |
| "grad_norm": 0.2397310584783554, |
| "learning_rate": 0.00015176000000000002, |
| "loss": 0.9416, |
| "step": 15080 |
| }, |
| { |
| "epoch": 0.2416, |
| "grad_norm": 0.255487322807312, |
| "learning_rate": 0.00015169600000000002, |
| "loss": 0.9074, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.24192, |
| "grad_norm": 0.20825912058353424, |
| "learning_rate": 0.000151632, |
| "loss": 0.9081, |
| "step": 15120 |
| }, |
| { |
| "epoch": 0.24224, |
| "grad_norm": 0.21789099276065826, |
| "learning_rate": 0.000151568, |
| "loss": 0.8599, |
| "step": 15140 |
| }, |
| { |
| "epoch": 0.24256, |
| "grad_norm": 0.26202690601348877, |
| "learning_rate": 0.000151504, |
| "loss": 0.9323, |
| "step": 15160 |
| }, |
| { |
| "epoch": 0.24288, |
| "grad_norm": 0.24351023137569427, |
| "learning_rate": 0.00015144, |
| "loss": 0.8613, |
| "step": 15180 |
| }, |
| { |
| "epoch": 0.2432, |
| "grad_norm": 0.24698816239833832, |
| "learning_rate": 0.000151376, |
| "loss": 0.9229, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.24352, |
| "grad_norm": 0.28698813915252686, |
| "learning_rate": 0.00015131200000000002, |
| "loss": 0.902, |
| "step": 15220 |
| }, |
| { |
| "epoch": 0.24384, |
| "grad_norm": 0.2190207839012146, |
| "learning_rate": 0.000151248, |
| "loss": 0.8347, |
| "step": 15240 |
| }, |
| { |
| "epoch": 0.24416, |
| "grad_norm": 0.25162091851234436, |
| "learning_rate": 0.000151184, |
| "loss": 0.9738, |
| "step": 15260 |
| }, |
| { |
| "epoch": 0.24448, |
| "grad_norm": 0.22512441873550415, |
| "learning_rate": 0.00015112000000000003, |
| "loss": 0.8972, |
| "step": 15280 |
| }, |
| { |
| "epoch": 0.2448, |
| "grad_norm": 0.2120593637228012, |
| "learning_rate": 0.000151056, |
| "loss": 0.9371, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.24512, |
| "grad_norm": 0.26042282581329346, |
| "learning_rate": 0.000150992, |
| "loss": 0.9792, |
| "step": 15320 |
| }, |
| { |
| "epoch": 0.24544, |
| "grad_norm": 0.23215824365615845, |
| "learning_rate": 0.00015092800000000002, |
| "loss": 0.8731, |
| "step": 15340 |
| }, |
| { |
| "epoch": 0.24576, |
| "grad_norm": 0.23564760386943817, |
| "learning_rate": 0.000150864, |
| "loss": 0.8924, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.24608, |
| "grad_norm": 0.234059140086174, |
| "learning_rate": 0.0001508, |
| "loss": 0.8987, |
| "step": 15380 |
| }, |
| { |
| "epoch": 0.2464, |
| "grad_norm": 0.24413174390792847, |
| "learning_rate": 0.000150736, |
| "loss": 0.8814, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.24672, |
| "grad_norm": 0.19461284577846527, |
| "learning_rate": 0.00015067200000000002, |
| "loss": 0.8885, |
| "step": 15420 |
| }, |
| { |
| "epoch": 0.24704, |
| "grad_norm": 0.21257640421390533, |
| "learning_rate": 0.00015060800000000002, |
| "loss": 0.8726, |
| "step": 15440 |
| }, |
| { |
| "epoch": 0.24736, |
| "grad_norm": 0.19599197804927826, |
| "learning_rate": 0.000150544, |
| "loss": 0.8769, |
| "step": 15460 |
| }, |
| { |
| "epoch": 0.24768, |
| "grad_norm": 0.2362959086894989, |
| "learning_rate": 0.00015048, |
| "loss": 0.9387, |
| "step": 15480 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.22756963968276978, |
| "learning_rate": 0.000150416, |
| "loss": 0.8997, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.24832, |
| "grad_norm": 0.20421338081359863, |
| "learning_rate": 0.000150352, |
| "loss": 0.8393, |
| "step": 15520 |
| }, |
| { |
| "epoch": 0.24864, |
| "grad_norm": 0.22076188027858734, |
| "learning_rate": 0.000150288, |
| "loss": 0.9485, |
| "step": 15540 |
| }, |
| { |
| "epoch": 0.24896, |
| "grad_norm": 0.2736372649669647, |
| "learning_rate": 0.00015022400000000002, |
| "loss": 0.9198, |
| "step": 15560 |
| }, |
| { |
| "epoch": 0.24928, |
| "grad_norm": 0.21683241426944733, |
| "learning_rate": 0.00015016, |
| "loss": 0.896, |
| "step": 15580 |
| }, |
| { |
| "epoch": 0.2496, |
| "grad_norm": 0.2821408212184906, |
| "learning_rate": 0.000150096, |
| "loss": 0.9136, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.24992, |
| "grad_norm": 0.2574373781681061, |
| "learning_rate": 0.00015003200000000003, |
| "loss": 0.867, |
| "step": 15620 |
| }, |
| { |
| "epoch": 0.25024, |
| "grad_norm": 0.25345656275749207, |
| "learning_rate": 0.000149968, |
| "loss": 0.8558, |
| "step": 15640 |
| }, |
| { |
| "epoch": 0.25056, |
| "grad_norm": 0.22354301810264587, |
| "learning_rate": 0.000149904, |
| "loss": 0.8877, |
| "step": 15660 |
| }, |
| { |
| "epoch": 0.25088, |
| "grad_norm": 0.24617154896259308, |
| "learning_rate": 0.00014984000000000002, |
| "loss": 0.9129, |
| "step": 15680 |
| }, |
| { |
| "epoch": 0.2512, |
| "grad_norm": 0.2457919418811798, |
| "learning_rate": 0.000149776, |
| "loss": 0.8808, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.25152, |
| "grad_norm": 0.24559831619262695, |
| "learning_rate": 0.000149712, |
| "loss": 0.8843, |
| "step": 15720 |
| }, |
| { |
| "epoch": 0.25184, |
| "grad_norm": 0.23059086501598358, |
| "learning_rate": 0.0001496512, |
| "loss": 0.8913, |
| "step": 15740 |
| }, |
| { |
| "epoch": 0.25216, |
| "grad_norm": 0.23776483535766602, |
| "learning_rate": 0.0001495872, |
| "loss": 0.8903, |
| "step": 15760 |
| }, |
| { |
| "epoch": 0.25248, |
| "grad_norm": 0.22387710213661194, |
| "learning_rate": 0.0001495232, |
| "loss": 0.8357, |
| "step": 15780 |
| }, |
| { |
| "epoch": 0.2528, |
| "grad_norm": 0.25012654066085815, |
| "learning_rate": 0.0001494592, |
| "loss": 0.9193, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.25312, |
| "grad_norm": 0.24608831107616425, |
| "learning_rate": 0.0001493952, |
| "loss": 0.9439, |
| "step": 15820 |
| }, |
| { |
| "epoch": 0.25344, |
| "grad_norm": 0.2951606512069702, |
| "learning_rate": 0.0001493312, |
| "loss": 0.9266, |
| "step": 15840 |
| }, |
| { |
| "epoch": 0.25376, |
| "grad_norm": 0.2593064606189728, |
| "learning_rate": 0.0001492672, |
| "loss": 0.8889, |
| "step": 15860 |
| }, |
| { |
| "epoch": 0.25408, |
| "grad_norm": 0.26488035917282104, |
| "learning_rate": 0.0001492032, |
| "loss": 0.8961, |
| "step": 15880 |
| }, |
| { |
| "epoch": 0.2544, |
| "grad_norm": 0.23882393538951874, |
| "learning_rate": 0.00014913920000000002, |
| "loss": 0.8546, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.25472, |
| "grad_norm": 0.22206124663352966, |
| "learning_rate": 0.00014907520000000002, |
| "loss": 0.8872, |
| "step": 15920 |
| }, |
| { |
| "epoch": 0.25504, |
| "grad_norm": 0.2286020815372467, |
| "learning_rate": 0.00014901120000000001, |
| "loss": 0.8716, |
| "step": 15940 |
| }, |
| { |
| "epoch": 0.25536, |
| "grad_norm": 0.2474469095468521, |
| "learning_rate": 0.0001489472, |
| "loss": 0.876, |
| "step": 15960 |
| }, |
| { |
| "epoch": 0.25568, |
| "grad_norm": 0.24693526327610016, |
| "learning_rate": 0.0001488832, |
| "loss": 0.8774, |
| "step": 15980 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.25829070806503296, |
| "learning_rate": 0.0001488192, |
| "loss": 0.9659, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.25632, |
| "grad_norm": 0.19258326292037964, |
| "learning_rate": 0.0001487552, |
| "loss": 0.8895, |
| "step": 16020 |
| }, |
| { |
| "epoch": 0.25664, |
| "grad_norm": 0.22530287504196167, |
| "learning_rate": 0.00014869120000000002, |
| "loss": 0.8625, |
| "step": 16040 |
| }, |
| { |
| "epoch": 0.25696, |
| "grad_norm": 0.23685386776924133, |
| "learning_rate": 0.0001486272, |
| "loss": 0.8744, |
| "step": 16060 |
| }, |
| { |
| "epoch": 0.25728, |
| "grad_norm": 0.2815619111061096, |
| "learning_rate": 0.0001485632, |
| "loss": 0.9046, |
| "step": 16080 |
| }, |
| { |
| "epoch": 0.2576, |
| "grad_norm": 0.2278144359588623, |
| "learning_rate": 0.0001484992, |
| "loss": 0.9233, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.25792, |
| "grad_norm": 0.23006929457187653, |
| "learning_rate": 0.0001484352, |
| "loss": 0.8888, |
| "step": 16120 |
| }, |
| { |
| "epoch": 0.25824, |
| "grad_norm": 0.23313170671463013, |
| "learning_rate": 0.0001483712, |
| "loss": 0.8838, |
| "step": 16140 |
| }, |
| { |
| "epoch": 0.25856, |
| "grad_norm": 0.1918276846408844, |
| "learning_rate": 0.0001483072, |
| "loss": 0.9102, |
| "step": 16160 |
| }, |
| { |
| "epoch": 0.25888, |
| "grad_norm": 0.22128306329250336, |
| "learning_rate": 0.0001482432, |
| "loss": 0.9111, |
| "step": 16180 |
| }, |
| { |
| "epoch": 0.2592, |
| "grad_norm": 0.2663705348968506, |
| "learning_rate": 0.0001481792, |
| "loss": 0.8658, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.25952, |
| "grad_norm": 0.2246493250131607, |
| "learning_rate": 0.0001481152, |
| "loss": 0.9076, |
| "step": 16220 |
| }, |
| { |
| "epoch": 0.25984, |
| "grad_norm": 0.276429146528244, |
| "learning_rate": 0.00014805120000000002, |
| "loss": 0.893, |
| "step": 16240 |
| }, |
| { |
| "epoch": 0.26016, |
| "grad_norm": 0.25722208619117737, |
| "learning_rate": 0.00014798720000000002, |
| "loss": 0.9124, |
| "step": 16260 |
| }, |
| { |
| "epoch": 0.26048, |
| "grad_norm": 0.2293781042098999, |
| "learning_rate": 0.00014792320000000001, |
| "loss": 0.9055, |
| "step": 16280 |
| }, |
| { |
| "epoch": 0.2608, |
| "grad_norm": 0.2425844967365265, |
| "learning_rate": 0.0001478592, |
| "loss": 0.9552, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.26112, |
| "grad_norm": 0.21894103288650513, |
| "learning_rate": 0.0001477952, |
| "loss": 0.9094, |
| "step": 16320 |
| }, |
| { |
| "epoch": 0.26144, |
| "grad_norm": 0.18762537837028503, |
| "learning_rate": 0.0001477312, |
| "loss": 0.864, |
| "step": 16340 |
| }, |
| { |
| "epoch": 0.26176, |
| "grad_norm": 0.24802207946777344, |
| "learning_rate": 0.0001476672, |
| "loss": 0.94, |
| "step": 16360 |
| }, |
| { |
| "epoch": 0.26208, |
| "grad_norm": 0.2168876677751541, |
| "learning_rate": 0.00014760320000000002, |
| "loss": 0.8795, |
| "step": 16380 |
| }, |
| { |
| "epoch": 0.2624, |
| "grad_norm": 0.21225641667842865, |
| "learning_rate": 0.0001475392, |
| "loss": 0.8783, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.26272, |
| "grad_norm": 0.22440536320209503, |
| "learning_rate": 0.0001474752, |
| "loss": 0.8984, |
| "step": 16420 |
| }, |
| { |
| "epoch": 0.26304, |
| "grad_norm": 0.21807946264743805, |
| "learning_rate": 0.0001474112, |
| "loss": 0.8949, |
| "step": 16440 |
| }, |
| { |
| "epoch": 0.26336, |
| "grad_norm": 0.27320024371147156, |
| "learning_rate": 0.0001473472, |
| "loss": 0.9225, |
| "step": 16460 |
| }, |
| { |
| "epoch": 0.26368, |
| "grad_norm": 0.2062409669160843, |
| "learning_rate": 0.0001472832, |
| "loss": 0.8705, |
| "step": 16480 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 0.2158362716436386, |
| "learning_rate": 0.00014721920000000002, |
| "loss": 0.8971, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.26432, |
| "grad_norm": 0.38786885142326355, |
| "learning_rate": 0.0001471552, |
| "loss": 0.9119, |
| "step": 16520 |
| }, |
| { |
| "epoch": 0.26464, |
| "grad_norm": 0.17009785771369934, |
| "learning_rate": 0.0001470912, |
| "loss": 0.8909, |
| "step": 16540 |
| }, |
| { |
| "epoch": 0.26496, |
| "grad_norm": 0.1993030607700348, |
| "learning_rate": 0.0001470272, |
| "loss": 0.8962, |
| "step": 16560 |
| }, |
| { |
| "epoch": 0.26528, |
| "grad_norm": 0.20363526046276093, |
| "learning_rate": 0.00014696320000000002, |
| "loss": 0.8829, |
| "step": 16580 |
| }, |
| { |
| "epoch": 0.2656, |
| "grad_norm": 0.21756501495838165, |
| "learning_rate": 0.00014689920000000002, |
| "loss": 0.8796, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.26592, |
| "grad_norm": 0.23114700615406036, |
| "learning_rate": 0.0001468352, |
| "loss": 0.8857, |
| "step": 16620 |
| }, |
| { |
| "epoch": 0.26624, |
| "grad_norm": 0.2643290162086487, |
| "learning_rate": 0.0001467712, |
| "loss": 0.8865, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.26656, |
| "grad_norm": 0.2651253640651703, |
| "learning_rate": 0.0001467072, |
| "loss": 0.9395, |
| "step": 16660 |
| }, |
| { |
| "epoch": 0.26688, |
| "grad_norm": 0.2604687511920929, |
| "learning_rate": 0.0001466432, |
| "loss": 0.9293, |
| "step": 16680 |
| }, |
| { |
| "epoch": 0.2672, |
| "grad_norm": 0.26383012533187866, |
| "learning_rate": 0.0001465792, |
| "loss": 0.9427, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.26752, |
| "grad_norm": 0.21108990907669067, |
| "learning_rate": 0.00014651520000000002, |
| "loss": 0.878, |
| "step": 16720 |
| }, |
| { |
| "epoch": 0.26784, |
| "grad_norm": 0.19841574132442474, |
| "learning_rate": 0.0001464512, |
| "loss": 0.9178, |
| "step": 16740 |
| }, |
| { |
| "epoch": 0.26816, |
| "grad_norm": 0.18484389781951904, |
| "learning_rate": 0.0001463872, |
| "loss": 0.8609, |
| "step": 16760 |
| }, |
| { |
| "epoch": 0.26848, |
| "grad_norm": 0.22383546829223633, |
| "learning_rate": 0.00014632320000000003, |
| "loss": 0.8582, |
| "step": 16780 |
| }, |
| { |
| "epoch": 0.2688, |
| "grad_norm": 0.23623542487621307, |
| "learning_rate": 0.0001462592, |
| "loss": 0.9065, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.26912, |
| "grad_norm": 0.20453502237796783, |
| "learning_rate": 0.0001461952, |
| "loss": 0.873, |
| "step": 16820 |
| }, |
| { |
| "epoch": 0.26944, |
| "grad_norm": 0.2092786431312561, |
| "learning_rate": 0.00014613120000000001, |
| "loss": 0.8592, |
| "step": 16840 |
| }, |
| { |
| "epoch": 0.26976, |
| "grad_norm": 0.35000941157341003, |
| "learning_rate": 0.0001460672, |
| "loss": 0.9135, |
| "step": 16860 |
| }, |
| { |
| "epoch": 0.27008, |
| "grad_norm": 0.21759675443172455, |
| "learning_rate": 0.0001460032, |
| "loss": 0.9318, |
| "step": 16880 |
| }, |
| { |
| "epoch": 0.2704, |
| "grad_norm": 0.21129532158374786, |
| "learning_rate": 0.0001459392, |
| "loss": 0.8726, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.27072, |
| "grad_norm": 0.2558363974094391, |
| "learning_rate": 0.00014587520000000002, |
| "loss": 0.8606, |
| "step": 16920 |
| }, |
| { |
| "epoch": 0.27104, |
| "grad_norm": 0.24021393060684204, |
| "learning_rate": 0.00014581120000000002, |
| "loss": 0.9545, |
| "step": 16940 |
| }, |
| { |
| "epoch": 0.27136, |
| "grad_norm": 0.22521884739398956, |
| "learning_rate": 0.0001457472, |
| "loss": 0.8983, |
| "step": 16960 |
| }, |
| { |
| "epoch": 0.27168, |
| "grad_norm": 0.19116589426994324, |
| "learning_rate": 0.0001456832, |
| "loss": 0.8628, |
| "step": 16980 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.25496795773506165, |
| "learning_rate": 0.0001456192, |
| "loss": 0.8648, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.27232, |
| "grad_norm": 0.21997754275798798, |
| "learning_rate": 0.0001455552, |
| "loss": 0.8864, |
| "step": 17020 |
| }, |
| { |
| "epoch": 0.27264, |
| "grad_norm": 0.2255709022283554, |
| "learning_rate": 0.0001454912, |
| "loss": 0.8818, |
| "step": 17040 |
| }, |
| { |
| "epoch": 0.27296, |
| "grad_norm": 0.2319864183664322, |
| "learning_rate": 0.00014542720000000002, |
| "loss": 0.9234, |
| "step": 17060 |
| }, |
| { |
| "epoch": 0.27328, |
| "grad_norm": 0.25236740708351135, |
| "learning_rate": 0.0001453632, |
| "loss": 0.9205, |
| "step": 17080 |
| }, |
| { |
| "epoch": 0.2736, |
| "grad_norm": 0.25668826699256897, |
| "learning_rate": 0.0001452992, |
| "loss": 0.8697, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.27392, |
| "grad_norm": 0.23997314274311066, |
| "learning_rate": 0.00014523520000000003, |
| "loss": 0.8752, |
| "step": 17120 |
| }, |
| { |
| "epoch": 0.27424, |
| "grad_norm": 0.26037994027137756, |
| "learning_rate": 0.0001451712, |
| "loss": 0.8955, |
| "step": 17140 |
| }, |
| { |
| "epoch": 0.27456, |
| "grad_norm": 0.2793257534503937, |
| "learning_rate": 0.0001451072, |
| "loss": 0.9114, |
| "step": 17160 |
| }, |
| { |
| "epoch": 0.27488, |
| "grad_norm": 0.2247275561094284, |
| "learning_rate": 0.00014504320000000001, |
| "loss": 0.8595, |
| "step": 17180 |
| }, |
| { |
| "epoch": 0.2752, |
| "grad_norm": 0.17990809679031372, |
| "learning_rate": 0.0001449792, |
| "loss": 0.8705, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.27552, |
| "grad_norm": 0.25493231415748596, |
| "learning_rate": 0.0001449152, |
| "loss": 0.8974, |
| "step": 17220 |
| }, |
| { |
| "epoch": 0.27584, |
| "grad_norm": 0.2407209277153015, |
| "learning_rate": 0.0001448512, |
| "loss": 0.8411, |
| "step": 17240 |
| }, |
| { |
| "epoch": 0.27616, |
| "grad_norm": 0.23049895465373993, |
| "learning_rate": 0.00014478720000000002, |
| "loss": 0.8879, |
| "step": 17260 |
| }, |
| { |
| "epoch": 0.27648, |
| "grad_norm": 0.2609173059463501, |
| "learning_rate": 0.00014472320000000002, |
| "loss": 0.841, |
| "step": 17280 |
| }, |
| { |
| "epoch": 0.2768, |
| "grad_norm": 0.25201836228370667, |
| "learning_rate": 0.0001446592, |
| "loss": 0.9323, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.27712, |
| "grad_norm": 0.2098960429430008, |
| "learning_rate": 0.0001445952, |
| "loss": 0.9157, |
| "step": 17320 |
| }, |
| { |
| "epoch": 0.27744, |
| "grad_norm": 0.20928995311260223, |
| "learning_rate": 0.0001445312, |
| "loss": 0.9038, |
| "step": 17340 |
| }, |
| { |
| "epoch": 0.27776, |
| "grad_norm": 0.27435973286628723, |
| "learning_rate": 0.0001444672, |
| "loss": 0.9151, |
| "step": 17360 |
| }, |
| { |
| "epoch": 0.27808, |
| "grad_norm": 0.21046458184719086, |
| "learning_rate": 0.0001444032, |
| "loss": 0.916, |
| "step": 17380 |
| }, |
| { |
| "epoch": 0.2784, |
| "grad_norm": 0.21415123343467712, |
| "learning_rate": 0.00014433920000000002, |
| "loss": 0.8068, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.27872, |
| "grad_norm": 0.23788805305957794, |
| "learning_rate": 0.0001442752, |
| "loss": 0.897, |
| "step": 17420 |
| }, |
| { |
| "epoch": 0.27904, |
| "grad_norm": 0.27183711528778076, |
| "learning_rate": 0.0001442112, |
| "loss": 0.9256, |
| "step": 17440 |
| }, |
| { |
| "epoch": 0.27936, |
| "grad_norm": 0.23290027678012848, |
| "learning_rate": 0.00014414720000000003, |
| "loss": 0.8824, |
| "step": 17460 |
| }, |
| { |
| "epoch": 0.27968, |
| "grad_norm": 0.228986918926239, |
| "learning_rate": 0.0001440832, |
| "loss": 0.8986, |
| "step": 17480 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.2455400675535202, |
| "learning_rate": 0.0001440192, |
| "loss": 0.9284, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.28032, |
| "grad_norm": 0.23576192557811737, |
| "learning_rate": 0.00014395520000000001, |
| "loss": 0.9038, |
| "step": 17520 |
| }, |
| { |
| "epoch": 0.28064, |
| "grad_norm": 0.24957694113254547, |
| "learning_rate": 0.0001438912, |
| "loss": 0.8735, |
| "step": 17540 |
| }, |
| { |
| "epoch": 0.28096, |
| "grad_norm": 0.2399289608001709, |
| "learning_rate": 0.0001438272, |
| "loss": 0.8819, |
| "step": 17560 |
| }, |
| { |
| "epoch": 0.28128, |
| "grad_norm": 0.2208934873342514, |
| "learning_rate": 0.0001437664, |
| "loss": 0.8615, |
| "step": 17580 |
| }, |
| { |
| "epoch": 0.2816, |
| "grad_norm": 0.2641635835170746, |
| "learning_rate": 0.0001437024, |
| "loss": 0.8801, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.28192, |
| "grad_norm": 0.21068689227104187, |
| "learning_rate": 0.00014363840000000002, |
| "loss": 0.8928, |
| "step": 17620 |
| }, |
| { |
| "epoch": 0.28224, |
| "grad_norm": 0.22611364722251892, |
| "learning_rate": 0.0001435744, |
| "loss": 0.8997, |
| "step": 17640 |
| }, |
| { |
| "epoch": 0.28256, |
| "grad_norm": 0.21559686958789825, |
| "learning_rate": 0.0001435104, |
| "loss": 0.8757, |
| "step": 17660 |
| }, |
| { |
| "epoch": 0.28288, |
| "grad_norm": 0.2164774090051651, |
| "learning_rate": 0.0001434464, |
| "loss": 0.8621, |
| "step": 17680 |
| }, |
| { |
| "epoch": 0.2832, |
| "grad_norm": 0.2710246443748474, |
| "learning_rate": 0.0001433824, |
| "loss": 0.8788, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.28352, |
| "grad_norm": 0.22402608394622803, |
| "learning_rate": 0.0001433184, |
| "loss": 0.8655, |
| "step": 17720 |
| }, |
| { |
| "epoch": 0.28384, |
| "grad_norm": 0.240563303232193, |
| "learning_rate": 0.00014325440000000002, |
| "loss": 0.831, |
| "step": 17740 |
| }, |
| { |
| "epoch": 0.28416, |
| "grad_norm": 0.2877085208892822, |
| "learning_rate": 0.00014319040000000002, |
| "loss": 0.8939, |
| "step": 17760 |
| }, |
| { |
| "epoch": 0.28448, |
| "grad_norm": 0.21592877805233002, |
| "learning_rate": 0.0001431264, |
| "loss": 0.8441, |
| "step": 17780 |
| }, |
| { |
| "epoch": 0.2848, |
| "grad_norm": 1.138694167137146, |
| "learning_rate": 0.0001430624, |
| "loss": 0.921, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.28512, |
| "grad_norm": 0.22125540673732758, |
| "learning_rate": 0.0001429984, |
| "loss": 0.8695, |
| "step": 17820 |
| }, |
| { |
| "epoch": 0.28544, |
| "grad_norm": 0.2059083729982376, |
| "learning_rate": 0.0001429344, |
| "loss": 0.8554, |
| "step": 17840 |
| }, |
| { |
| "epoch": 0.28576, |
| "grad_norm": 0.2463064044713974, |
| "learning_rate": 0.0001428704, |
| "loss": 0.9121, |
| "step": 17860 |
| }, |
| { |
| "epoch": 0.28608, |
| "grad_norm": 0.228254035115242, |
| "learning_rate": 0.00014280640000000002, |
| "loss": 0.8945, |
| "step": 17880 |
| }, |
| { |
| "epoch": 0.2864, |
| "grad_norm": 0.23662838339805603, |
| "learning_rate": 0.0001427424, |
| "loss": 0.9431, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.28672, |
| "grad_norm": 0.22641515731811523, |
| "learning_rate": 0.0001426784, |
| "loss": 0.9033, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.28704, |
| "grad_norm": 0.23892265558242798, |
| "learning_rate": 0.0001426144, |
| "loss": 0.8623, |
| "step": 17940 |
| }, |
| { |
| "epoch": 0.28736, |
| "grad_norm": 0.2975479066371918, |
| "learning_rate": 0.00014255040000000002, |
| "loss": 0.8474, |
| "step": 17960 |
| }, |
| { |
| "epoch": 0.28768, |
| "grad_norm": 0.23718002438545227, |
| "learning_rate": 0.0001424864, |
| "loss": 0.9099, |
| "step": 17980 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.2706455886363983, |
| "learning_rate": 0.0001424224, |
| "loss": 0.9357, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.28832, |
| "grad_norm": 0.260484516620636, |
| "learning_rate": 0.0001423584, |
| "loss": 0.9144, |
| "step": 18020 |
| }, |
| { |
| "epoch": 0.28864, |
| "grad_norm": 0.2651614844799042, |
| "learning_rate": 0.0001422944, |
| "loss": 0.8764, |
| "step": 18040 |
| }, |
| { |
| "epoch": 0.28896, |
| "grad_norm": 0.18904343247413635, |
| "learning_rate": 0.0001422304, |
| "loss": 0.9312, |
| "step": 18060 |
| }, |
| { |
| "epoch": 0.28928, |
| "grad_norm": 0.22476926445960999, |
| "learning_rate": 0.00014216640000000002, |
| "loss": 0.8732, |
| "step": 18080 |
| }, |
| { |
| "epoch": 0.2896, |
| "grad_norm": 0.25984928011894226, |
| "learning_rate": 0.00014210240000000002, |
| "loss": 0.8511, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.28992, |
| "grad_norm": 0.24901899695396423, |
| "learning_rate": 0.0001420384, |
| "loss": 0.8844, |
| "step": 18120 |
| }, |
| { |
| "epoch": 0.29024, |
| "grad_norm": 0.2536833882331848, |
| "learning_rate": 0.0001419744, |
| "loss": 0.9403, |
| "step": 18140 |
| }, |
| { |
| "epoch": 0.29056, |
| "grad_norm": 0.24617129564285278, |
| "learning_rate": 0.0001419104, |
| "loss": 0.9463, |
| "step": 18160 |
| }, |
| { |
| "epoch": 0.29088, |
| "grad_norm": 0.23471851646900177, |
| "learning_rate": 0.0001418464, |
| "loss": 0.8977, |
| "step": 18180 |
| }, |
| { |
| "epoch": 0.2912, |
| "grad_norm": 0.22955600917339325, |
| "learning_rate": 0.0001417824, |
| "loss": 0.87, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.29152, |
| "grad_norm": 0.24333995580673218, |
| "learning_rate": 0.00014171840000000002, |
| "loss": 0.8879, |
| "step": 18220 |
| }, |
| { |
| "epoch": 0.29184, |
| "grad_norm": 0.20572280883789062, |
| "learning_rate": 0.0001416544, |
| "loss": 0.8896, |
| "step": 18240 |
| }, |
| { |
| "epoch": 0.29216, |
| "grad_norm": 0.26233595609664917, |
| "learning_rate": 0.0001415904, |
| "loss": 0.9508, |
| "step": 18260 |
| }, |
| { |
| "epoch": 0.29248, |
| "grad_norm": 0.25485649704933167, |
| "learning_rate": 0.00014152640000000003, |
| "loss": 0.8671, |
| "step": 18280 |
| }, |
| { |
| "epoch": 0.2928, |
| "grad_norm": 0.2335824966430664, |
| "learning_rate": 0.00014146240000000002, |
| "loss": 0.8607, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.29312, |
| "grad_norm": 0.23609523475170135, |
| "learning_rate": 0.0001413984, |
| "loss": 0.8851, |
| "step": 18320 |
| }, |
| { |
| "epoch": 0.29344, |
| "grad_norm": 0.23384864628314972, |
| "learning_rate": 0.0001413344, |
| "loss": 0.8432, |
| "step": 18340 |
| }, |
| { |
| "epoch": 0.29376, |
| "grad_norm": 0.20632806420326233, |
| "learning_rate": 0.0001412704, |
| "loss": 0.8929, |
| "step": 18360 |
| }, |
| { |
| "epoch": 0.29408, |
| "grad_norm": 0.23396439850330353, |
| "learning_rate": 0.0001412064, |
| "loss": 0.9085, |
| "step": 18380 |
| }, |
| { |
| "epoch": 0.2944, |
| "grad_norm": 0.22463780641555786, |
| "learning_rate": 0.0001411424, |
| "loss": 0.882, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.29472, |
| "grad_norm": 0.2543090581893921, |
| "learning_rate": 0.00014107840000000002, |
| "loss": 0.8529, |
| "step": 18420 |
| }, |
| { |
| "epoch": 0.29504, |
| "grad_norm": 0.23137034475803375, |
| "learning_rate": 0.00014101440000000002, |
| "loss": 0.8763, |
| "step": 18440 |
| }, |
| { |
| "epoch": 0.29536, |
| "grad_norm": 0.2112882137298584, |
| "learning_rate": 0.0001409504, |
| "loss": 0.8764, |
| "step": 18460 |
| }, |
| { |
| "epoch": 0.29568, |
| "grad_norm": 0.26310333609580994, |
| "learning_rate": 0.0001408864, |
| "loss": 0.8922, |
| "step": 18480 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 0.17188023030757904, |
| "learning_rate": 0.0001408224, |
| "loss": 0.8667, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.29632, |
| "grad_norm": 0.21832036972045898, |
| "learning_rate": 0.0001407584, |
| "loss": 0.8365, |
| "step": 18520 |
| }, |
| { |
| "epoch": 0.29664, |
| "grad_norm": 0.22188645601272583, |
| "learning_rate": 0.0001406944, |
| "loss": 0.8851, |
| "step": 18540 |
| }, |
| { |
| "epoch": 0.29696, |
| "grad_norm": 0.21572217345237732, |
| "learning_rate": 0.00014063040000000001, |
| "loss": 0.9154, |
| "step": 18560 |
| }, |
| { |
| "epoch": 0.29728, |
| "grad_norm": 0.27105310559272766, |
| "learning_rate": 0.0001405664, |
| "loss": 0.9377, |
| "step": 18580 |
| }, |
| { |
| "epoch": 0.2976, |
| "grad_norm": 0.24213473498821259, |
| "learning_rate": 0.0001405024, |
| "loss": 0.8637, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.29792, |
| "grad_norm": 0.2412949949502945, |
| "learning_rate": 0.00014043840000000003, |
| "loss": 0.8406, |
| "step": 18620 |
| }, |
| { |
| "epoch": 0.29824, |
| "grad_norm": 0.22756318747997284, |
| "learning_rate": 0.00014037440000000002, |
| "loss": 0.9136, |
| "step": 18640 |
| }, |
| { |
| "epoch": 0.29856, |
| "grad_norm": 0.27023327350616455, |
| "learning_rate": 0.0001403104, |
| "loss": 0.903, |
| "step": 18660 |
| }, |
| { |
| "epoch": 0.29888, |
| "grad_norm": 0.2557690739631653, |
| "learning_rate": 0.0001402464, |
| "loss": 0.91, |
| "step": 18680 |
| }, |
| { |
| "epoch": 0.2992, |
| "grad_norm": 0.2063320279121399, |
| "learning_rate": 0.0001401824, |
| "loss": 0.8563, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.29952, |
| "grad_norm": 0.2544916570186615, |
| "learning_rate": 0.0001401184, |
| "loss": 0.874, |
| "step": 18720 |
| }, |
| { |
| "epoch": 0.29984, |
| "grad_norm": 0.2347075492143631, |
| "learning_rate": 0.0001400544, |
| "loss": 0.9113, |
| "step": 18740 |
| }, |
| { |
| "epoch": 0.30016, |
| "grad_norm": 0.22835484147071838, |
| "learning_rate": 0.00013999040000000002, |
| "loss": 0.9441, |
| "step": 18760 |
| }, |
| { |
| "epoch": 0.30048, |
| "grad_norm": 0.2611948847770691, |
| "learning_rate": 0.00013992640000000002, |
| "loss": 0.8588, |
| "step": 18780 |
| }, |
| { |
| "epoch": 0.3008, |
| "grad_norm": 0.2237863838672638, |
| "learning_rate": 0.0001398624, |
| "loss": 0.8968, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.30112, |
| "grad_norm": 0.25444677472114563, |
| "learning_rate": 0.0001397984, |
| "loss": 0.8699, |
| "step": 18820 |
| }, |
| { |
| "epoch": 0.30144, |
| "grad_norm": 0.22238390147686005, |
| "learning_rate": 0.0001397344, |
| "loss": 0.8958, |
| "step": 18840 |
| }, |
| { |
| "epoch": 0.30176, |
| "grad_norm": 0.2588092088699341, |
| "learning_rate": 0.0001396704, |
| "loss": 0.8379, |
| "step": 18860 |
| }, |
| { |
| "epoch": 0.30208, |
| "grad_norm": 0.32045722007751465, |
| "learning_rate": 0.0001396064, |
| "loss": 0.8985, |
| "step": 18880 |
| }, |
| { |
| "epoch": 0.3024, |
| "grad_norm": 0.277260959148407, |
| "learning_rate": 0.00013954240000000001, |
| "loss": 0.9429, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.30272, |
| "grad_norm": 0.21729040145874023, |
| "learning_rate": 0.0001394784, |
| "loss": 0.8797, |
| "step": 18920 |
| }, |
| { |
| "epoch": 0.30304, |
| "grad_norm": 0.21643051505088806, |
| "learning_rate": 0.0001394144, |
| "loss": 0.8601, |
| "step": 18940 |
| }, |
| { |
| "epoch": 0.30336, |
| "grad_norm": 0.2492791712284088, |
| "learning_rate": 0.00013935040000000003, |
| "loss": 0.8796, |
| "step": 18960 |
| }, |
| { |
| "epoch": 0.30368, |
| "grad_norm": 0.22013212740421295, |
| "learning_rate": 0.00013928640000000002, |
| "loss": 0.8746, |
| "step": 18980 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.25737157464027405, |
| "learning_rate": 0.0001392224, |
| "loss": 0.9122, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.30432, |
| "grad_norm": 0.2505769729614258, |
| "learning_rate": 0.0001391584, |
| "loss": 0.9256, |
| "step": 19020 |
| }, |
| { |
| "epoch": 0.30464, |
| "grad_norm": 0.23468899726867676, |
| "learning_rate": 0.0001390944, |
| "loss": 0.9104, |
| "step": 19040 |
| }, |
| { |
| "epoch": 0.30496, |
| "grad_norm": 0.2267698347568512, |
| "learning_rate": 0.0001390304, |
| "loss": 0.9157, |
| "step": 19060 |
| }, |
| { |
| "epoch": 0.30528, |
| "grad_norm": 0.25038978457450867, |
| "learning_rate": 0.0001389664, |
| "loss": 0.9413, |
| "step": 19080 |
| }, |
| { |
| "epoch": 0.3056, |
| "grad_norm": 0.2678689658641815, |
| "learning_rate": 0.00013890240000000002, |
| "loss": 0.879, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.30592, |
| "grad_norm": 0.2314527928829193, |
| "learning_rate": 0.00013883840000000002, |
| "loss": 0.8693, |
| "step": 19120 |
| }, |
| { |
| "epoch": 0.30624, |
| "grad_norm": 0.2365267276763916, |
| "learning_rate": 0.0001387744, |
| "loss": 0.8498, |
| "step": 19140 |
| }, |
| { |
| "epoch": 0.30656, |
| "grad_norm": 0.24371282756328583, |
| "learning_rate": 0.0001387104, |
| "loss": 0.8604, |
| "step": 19160 |
| }, |
| { |
| "epoch": 0.30688, |
| "grad_norm": 0.3142736554145813, |
| "learning_rate": 0.0001386464, |
| "loss": 0.8808, |
| "step": 19180 |
| }, |
| { |
| "epoch": 0.3072, |
| "grad_norm": 0.29017287492752075, |
| "learning_rate": 0.0001385824, |
| "loss": 0.9096, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.30752, |
| "grad_norm": 0.26269444823265076, |
| "learning_rate": 0.0001385184, |
| "loss": 0.8378, |
| "step": 19220 |
| }, |
| { |
| "epoch": 0.30784, |
| "grad_norm": 0.22420069575309753, |
| "learning_rate": 0.00013845440000000001, |
| "loss": 0.8637, |
| "step": 19240 |
| }, |
| { |
| "epoch": 0.30816, |
| "grad_norm": 0.237029567360878, |
| "learning_rate": 0.0001383904, |
| "loss": 0.9015, |
| "step": 19260 |
| }, |
| { |
| "epoch": 0.30848, |
| "grad_norm": 0.2280898541212082, |
| "learning_rate": 0.0001383264, |
| "loss": 0.888, |
| "step": 19280 |
| }, |
| { |
| "epoch": 0.3088, |
| "grad_norm": 0.27057039737701416, |
| "learning_rate": 0.00013826240000000003, |
| "loss": 0.8871, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.30912, |
| "grad_norm": 0.20367485284805298, |
| "learning_rate": 0.00013819840000000002, |
| "loss": 0.8596, |
| "step": 19320 |
| }, |
| { |
| "epoch": 0.30944, |
| "grad_norm": 0.2941890060901642, |
| "learning_rate": 0.0001381344, |
| "loss": 0.9363, |
| "step": 19340 |
| }, |
| { |
| "epoch": 0.30976, |
| "grad_norm": 0.2736791968345642, |
| "learning_rate": 0.0001380704, |
| "loss": 0.8547, |
| "step": 19360 |
| }, |
| { |
| "epoch": 0.31008, |
| "grad_norm": 0.26755064725875854, |
| "learning_rate": 0.0001380064, |
| "loss": 0.8846, |
| "step": 19380 |
| }, |
| { |
| "epoch": 0.3104, |
| "grad_norm": 0.2217639535665512, |
| "learning_rate": 0.0001379424, |
| "loss": 0.9354, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.31072, |
| "grad_norm": 0.26769986748695374, |
| "learning_rate": 0.0001378784, |
| "loss": 0.8951, |
| "step": 19420 |
| }, |
| { |
| "epoch": 0.31104, |
| "grad_norm": 0.2696346938610077, |
| "learning_rate": 0.00013781440000000002, |
| "loss": 0.886, |
| "step": 19440 |
| }, |
| { |
| "epoch": 0.31136, |
| "grad_norm": 0.24080106616020203, |
| "learning_rate": 0.00013775040000000002, |
| "loss": 0.8534, |
| "step": 19460 |
| }, |
| { |
| "epoch": 0.31168, |
| "grad_norm": 0.2640572786331177, |
| "learning_rate": 0.0001376864, |
| "loss": 0.9043, |
| "step": 19480 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 0.22785134613513947, |
| "learning_rate": 0.0001376224, |
| "loss": 0.9025, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.31232, |
| "grad_norm": 0.25118863582611084, |
| "learning_rate": 0.0001375584, |
| "loss": 0.9085, |
| "step": 19520 |
| }, |
| { |
| "epoch": 0.31264, |
| "grad_norm": 0.25034481287002563, |
| "learning_rate": 0.0001374944, |
| "loss": 0.9055, |
| "step": 19540 |
| }, |
| { |
| "epoch": 0.31296, |
| "grad_norm": 0.2614835798740387, |
| "learning_rate": 0.0001374304, |
| "loss": 0.8826, |
| "step": 19560 |
| }, |
| { |
| "epoch": 0.31328, |
| "grad_norm": 0.24192634224891663, |
| "learning_rate": 0.0001373664, |
| "loss": 0.8654, |
| "step": 19580 |
| }, |
| { |
| "epoch": 0.3136, |
| "grad_norm": 0.21241247653961182, |
| "learning_rate": 0.0001373024, |
| "loss": 0.8953, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.31392, |
| "grad_norm": 0.2508241832256317, |
| "learning_rate": 0.0001372384, |
| "loss": 0.8859, |
| "step": 19620 |
| }, |
| { |
| "epoch": 0.31424, |
| "grad_norm": 0.21773919463157654, |
| "learning_rate": 0.00013717440000000003, |
| "loss": 0.9142, |
| "step": 19640 |
| }, |
| { |
| "epoch": 0.31456, |
| "grad_norm": 0.24207216501235962, |
| "learning_rate": 0.00013711040000000002, |
| "loss": 0.9151, |
| "step": 19660 |
| }, |
| { |
| "epoch": 0.31488, |
| "grad_norm": 0.2574104368686676, |
| "learning_rate": 0.0001370464, |
| "loss": 0.9325, |
| "step": 19680 |
| }, |
| { |
| "epoch": 0.3152, |
| "grad_norm": 0.27459943294525146, |
| "learning_rate": 0.0001369824, |
| "loss": 0.9121, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.31552, |
| "grad_norm": 0.29292821884155273, |
| "learning_rate": 0.0001369184, |
| "loss": 0.8728, |
| "step": 19720 |
| }, |
| { |
| "epoch": 0.31584, |
| "grad_norm": 0.24154408276081085, |
| "learning_rate": 0.0001368544, |
| "loss": 0.9036, |
| "step": 19740 |
| }, |
| { |
| "epoch": 0.31616, |
| "grad_norm": 0.24163678288459778, |
| "learning_rate": 0.0001367904, |
| "loss": 0.894, |
| "step": 19760 |
| }, |
| { |
| "epoch": 0.31648, |
| "grad_norm": 0.24694600701332092, |
| "learning_rate": 0.00013672640000000002, |
| "loss": 0.8377, |
| "step": 19780 |
| }, |
| { |
| "epoch": 0.3168, |
| "grad_norm": 0.24963949620723724, |
| "learning_rate": 0.00013666240000000001, |
| "loss": 0.8749, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.31712, |
| "grad_norm": 0.22724537551403046, |
| "learning_rate": 0.0001365984, |
| "loss": 0.8355, |
| "step": 19820 |
| }, |
| { |
| "epoch": 0.31744, |
| "grad_norm": 0.25323203206062317, |
| "learning_rate": 0.0001365344, |
| "loss": 0.8674, |
| "step": 19840 |
| }, |
| { |
| "epoch": 0.31776, |
| "grad_norm": 0.23880919814109802, |
| "learning_rate": 0.0001364704, |
| "loss": 0.8944, |
| "step": 19860 |
| }, |
| { |
| "epoch": 0.31808, |
| "grad_norm": 0.27053314447402954, |
| "learning_rate": 0.0001364064, |
| "loss": 0.9309, |
| "step": 19880 |
| }, |
| { |
| "epoch": 0.3184, |
| "grad_norm": 0.23681063950061798, |
| "learning_rate": 0.0001363424, |
| "loss": 0.9299, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.31872, |
| "grad_norm": 0.23930998146533966, |
| "learning_rate": 0.0001362784, |
| "loss": 0.87, |
| "step": 19920 |
| }, |
| { |
| "epoch": 0.31904, |
| "grad_norm": 0.2663067877292633, |
| "learning_rate": 0.0001362144, |
| "loss": 0.8916, |
| "step": 19940 |
| }, |
| { |
| "epoch": 0.31936, |
| "grad_norm": 0.24097581207752228, |
| "learning_rate": 0.0001361504, |
| "loss": 0.8894, |
| "step": 19960 |
| }, |
| { |
| "epoch": 0.31968, |
| "grad_norm": 0.2457173615694046, |
| "learning_rate": 0.00013608640000000003, |
| "loss": 0.882, |
| "step": 19980 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.24961303174495697, |
| "learning_rate": 0.00013602240000000002, |
| "loss": 0.9111, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.32032, |
| "grad_norm": 0.2094404697418213, |
| "learning_rate": 0.0001359584, |
| "loss": 0.8509, |
| "step": 20020 |
| }, |
| { |
| "epoch": 0.32064, |
| "grad_norm": 0.259491890668869, |
| "learning_rate": 0.0001358944, |
| "loss": 0.9049, |
| "step": 20040 |
| }, |
| { |
| "epoch": 0.32096, |
| "grad_norm": 0.23153385519981384, |
| "learning_rate": 0.0001358304, |
| "loss": 0.9332, |
| "step": 20060 |
| }, |
| { |
| "epoch": 0.32128, |
| "grad_norm": 0.22706463932991028, |
| "learning_rate": 0.0001357664, |
| "loss": 0.9113, |
| "step": 20080 |
| }, |
| { |
| "epoch": 0.3216, |
| "grad_norm": 0.24029311537742615, |
| "learning_rate": 0.0001357024, |
| "loss": 0.8789, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.32192, |
| "grad_norm": 0.2065390646457672, |
| "learning_rate": 0.00013563840000000002, |
| "loss": 0.8984, |
| "step": 20120 |
| }, |
| { |
| "epoch": 0.32224, |
| "grad_norm": 0.2206522673368454, |
| "learning_rate": 0.00013557440000000001, |
| "loss": 0.8654, |
| "step": 20140 |
| }, |
| { |
| "epoch": 0.32256, |
| "grad_norm": 0.25168558955192566, |
| "learning_rate": 0.0001355104, |
| "loss": 0.8857, |
| "step": 20160 |
| }, |
| { |
| "epoch": 0.32288, |
| "grad_norm": 0.33041876554489136, |
| "learning_rate": 0.0001354464, |
| "loss": 0.8478, |
| "step": 20180 |
| }, |
| { |
| "epoch": 0.3232, |
| "grad_norm": 0.24307800829410553, |
| "learning_rate": 0.0001353824, |
| "loss": 0.8422, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.32352, |
| "grad_norm": 0.21959048509597778, |
| "learning_rate": 0.0001353184, |
| "loss": 0.9439, |
| "step": 20220 |
| }, |
| { |
| "epoch": 0.32384, |
| "grad_norm": 0.2573561668395996, |
| "learning_rate": 0.0001352544, |
| "loss": 0.9243, |
| "step": 20240 |
| }, |
| { |
| "epoch": 0.32416, |
| "grad_norm": 0.21520912647247314, |
| "learning_rate": 0.0001351904, |
| "loss": 0.8948, |
| "step": 20260 |
| }, |
| { |
| "epoch": 0.32448, |
| "grad_norm": 0.1714809685945511, |
| "learning_rate": 0.0001351264, |
| "loss": 0.8826, |
| "step": 20280 |
| }, |
| { |
| "epoch": 0.3248, |
| "grad_norm": 0.24606648087501526, |
| "learning_rate": 0.0001350624, |
| "loss": 0.8686, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.32512, |
| "grad_norm": 0.2979150116443634, |
| "learning_rate": 0.00013499840000000003, |
| "loss": 0.9369, |
| "step": 20320 |
| }, |
| { |
| "epoch": 0.32544, |
| "grad_norm": 0.2468058317899704, |
| "learning_rate": 0.00013493440000000002, |
| "loss": 0.9155, |
| "step": 20340 |
| }, |
| { |
| "epoch": 0.32576, |
| "grad_norm": 0.24256190657615662, |
| "learning_rate": 0.0001348704, |
| "loss": 0.8497, |
| "step": 20360 |
| }, |
| { |
| "epoch": 0.32608, |
| "grad_norm": 0.18389752507209778, |
| "learning_rate": 0.0001348064, |
| "loss": 0.904, |
| "step": 20380 |
| }, |
| { |
| "epoch": 0.3264, |
| "grad_norm": 0.3476732075214386, |
| "learning_rate": 0.0001347424, |
| "loss": 0.8942, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.32672, |
| "grad_norm": 0.24657464027404785, |
| "learning_rate": 0.0001346784, |
| "loss": 0.8558, |
| "step": 20420 |
| }, |
| { |
| "epoch": 0.32704, |
| "grad_norm": 0.23047630488872528, |
| "learning_rate": 0.0001346144, |
| "loss": 0.8671, |
| "step": 20440 |
| }, |
| { |
| "epoch": 0.32736, |
| "grad_norm": 0.26599422097206116, |
| "learning_rate": 0.00013455040000000002, |
| "loss": 0.8681, |
| "step": 20460 |
| }, |
| { |
| "epoch": 0.32768, |
| "grad_norm": 0.21226702630519867, |
| "learning_rate": 0.00013448640000000001, |
| "loss": 0.8737, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 0.24917374551296234, |
| "learning_rate": 0.0001344224, |
| "loss": 0.8631, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.32832, |
| "grad_norm": 0.23581089079380035, |
| "learning_rate": 0.0001343584, |
| "loss": 0.8835, |
| "step": 20520 |
| }, |
| { |
| "epoch": 0.32864, |
| "grad_norm": 0.20437762141227722, |
| "learning_rate": 0.0001342944, |
| "loss": 0.8462, |
| "step": 20540 |
| }, |
| { |
| "epoch": 0.32896, |
| "grad_norm": 0.2745993733406067, |
| "learning_rate": 0.0001342304, |
| "loss": 0.8921, |
| "step": 20560 |
| }, |
| { |
| "epoch": 0.32928, |
| "grad_norm": 0.2283765822649002, |
| "learning_rate": 0.0001341664, |
| "loss": 0.9724, |
| "step": 20580 |
| }, |
| { |
| "epoch": 0.3296, |
| "grad_norm": 0.23891101777553558, |
| "learning_rate": 0.0001341024, |
| "loss": 0.8836, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.32992, |
| "grad_norm": 0.29253366589546204, |
| "learning_rate": 0.0001340384, |
| "loss": 0.8708, |
| "step": 20620 |
| }, |
| { |
| "epoch": 0.33024, |
| "grad_norm": 0.21287086606025696, |
| "learning_rate": 0.0001339744, |
| "loss": 0.8523, |
| "step": 20640 |
| }, |
| { |
| "epoch": 0.33056, |
| "grad_norm": 0.25029948353767395, |
| "learning_rate": 0.00013391040000000002, |
| "loss": 0.93, |
| "step": 20660 |
| }, |
| { |
| "epoch": 0.33088, |
| "grad_norm": 0.2445763796567917, |
| "learning_rate": 0.00013384640000000002, |
| "loss": 0.8435, |
| "step": 20680 |
| }, |
| { |
| "epoch": 0.3312, |
| "grad_norm": 0.23780953884124756, |
| "learning_rate": 0.0001337824, |
| "loss": 0.8524, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.33152, |
| "grad_norm": 0.24498897790908813, |
| "learning_rate": 0.0001337184, |
| "loss": 0.8806, |
| "step": 20720 |
| }, |
| { |
| "epoch": 0.33184, |
| "grad_norm": 0.24283845722675323, |
| "learning_rate": 0.0001336544, |
| "loss": 0.9041, |
| "step": 20740 |
| }, |
| { |
| "epoch": 0.33216, |
| "grad_norm": 0.25686827301979065, |
| "learning_rate": 0.0001335904, |
| "loss": 0.8836, |
| "step": 20760 |
| }, |
| { |
| "epoch": 0.33248, |
| "grad_norm": 0.19892160594463348, |
| "learning_rate": 0.0001335264, |
| "loss": 0.846, |
| "step": 20780 |
| }, |
| { |
| "epoch": 0.3328, |
| "grad_norm": 0.22868989408016205, |
| "learning_rate": 0.00013346240000000002, |
| "loss": 0.9193, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.33312, |
| "grad_norm": 0.22255489230155945, |
| "learning_rate": 0.0001333984, |
| "loss": 0.898, |
| "step": 20820 |
| }, |
| { |
| "epoch": 0.33344, |
| "grad_norm": 0.20357553660869598, |
| "learning_rate": 0.0001333344, |
| "loss": 0.8621, |
| "step": 20840 |
| }, |
| { |
| "epoch": 0.33376, |
| "grad_norm": 0.21813243627548218, |
| "learning_rate": 0.0001332704, |
| "loss": 0.8745, |
| "step": 20860 |
| }, |
| { |
| "epoch": 0.33408, |
| "grad_norm": 0.2878071069717407, |
| "learning_rate": 0.0001332064, |
| "loss": 0.8639, |
| "step": 20880 |
| }, |
| { |
| "epoch": 0.3344, |
| "grad_norm": 0.2555156946182251, |
| "learning_rate": 0.0001331424, |
| "loss": 0.8748, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.33472, |
| "grad_norm": 0.23196488618850708, |
| "learning_rate": 0.00013307840000000002, |
| "loss": 0.8921, |
| "step": 20920 |
| }, |
| { |
| "epoch": 0.33504, |
| "grad_norm": 0.24895872175693512, |
| "learning_rate": 0.0001330144, |
| "loss": 0.9024, |
| "step": 20940 |
| }, |
| { |
| "epoch": 0.33536, |
| "grad_norm": 0.26333311200141907, |
| "learning_rate": 0.0001329504, |
| "loss": 0.9119, |
| "step": 20960 |
| }, |
| { |
| "epoch": 0.33568, |
| "grad_norm": 0.23664534091949463, |
| "learning_rate": 0.0001328864, |
| "loss": 0.8766, |
| "step": 20980 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.2326672226190567, |
| "learning_rate": 0.00013282240000000002, |
| "loss": 0.8263, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.33632, |
| "grad_norm": 0.23202668130397797, |
| "learning_rate": 0.00013275840000000002, |
| "loss": 0.9158, |
| "step": 21020 |
| }, |
| { |
| "epoch": 0.33664, |
| "grad_norm": 0.2137191891670227, |
| "learning_rate": 0.0001326944, |
| "loss": 0.9182, |
| "step": 21040 |
| }, |
| { |
| "epoch": 0.33696, |
| "grad_norm": 0.22848786413669586, |
| "learning_rate": 0.0001326304, |
| "loss": 0.8647, |
| "step": 21060 |
| }, |
| { |
| "epoch": 0.33728, |
| "grad_norm": 0.2685950994491577, |
| "learning_rate": 0.0001325664, |
| "loss": 0.8808, |
| "step": 21080 |
| }, |
| { |
| "epoch": 0.3376, |
| "grad_norm": 0.22779199481010437, |
| "learning_rate": 0.0001325024, |
| "loss": 0.9204, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.33792, |
| "grad_norm": 0.23398426175117493, |
| "learning_rate": 0.0001324384, |
| "loss": 0.8765, |
| "step": 21120 |
| }, |
| { |
| "epoch": 0.33824, |
| "grad_norm": 0.2746240794658661, |
| "learning_rate": 0.00013237440000000002, |
| "loss": 0.9218, |
| "step": 21140 |
| }, |
| { |
| "epoch": 0.33856, |
| "grad_norm": 0.21966886520385742, |
| "learning_rate": 0.0001323104, |
| "loss": 0.8135, |
| "step": 21160 |
| }, |
| { |
| "epoch": 0.33888, |
| "grad_norm": 0.24707438051700592, |
| "learning_rate": 0.0001322464, |
| "loss": 0.9269, |
| "step": 21180 |
| }, |
| { |
| "epoch": 0.3392, |
| "grad_norm": 0.23954932391643524, |
| "learning_rate": 0.00013218240000000003, |
| "loss": 0.8461, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.33952, |
| "grad_norm": 0.2673095762729645, |
| "learning_rate": 0.0001321184, |
| "loss": 0.8173, |
| "step": 21220 |
| }, |
| { |
| "epoch": 0.33984, |
| "grad_norm": 0.2744886577129364, |
| "learning_rate": 0.0001320544, |
| "loss": 0.9024, |
| "step": 21240 |
| }, |
| { |
| "epoch": 0.34016, |
| "grad_norm": 0.2099352777004242, |
| "learning_rate": 0.00013199040000000002, |
| "loss": 0.8744, |
| "step": 21260 |
| }, |
| { |
| "epoch": 0.34048, |
| "grad_norm": 0.24516913294792175, |
| "learning_rate": 0.0001319264, |
| "loss": 0.9172, |
| "step": 21280 |
| }, |
| { |
| "epoch": 0.3408, |
| "grad_norm": 0.2523512840270996, |
| "learning_rate": 0.0001318624, |
| "loss": 0.8656, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.34112, |
| "grad_norm": 0.23334811627864838, |
| "learning_rate": 0.0001317984, |
| "loss": 0.9486, |
| "step": 21320 |
| }, |
| { |
| "epoch": 0.34144, |
| "grad_norm": 0.2607022225856781, |
| "learning_rate": 0.00013173440000000002, |
| "loss": 0.9262, |
| "step": 21340 |
| }, |
| { |
| "epoch": 0.34176, |
| "grad_norm": 0.2326965034008026, |
| "learning_rate": 0.00013167040000000002, |
| "loss": 0.88, |
| "step": 21360 |
| }, |
| { |
| "epoch": 0.34208, |
| "grad_norm": 0.26053765416145325, |
| "learning_rate": 0.0001316064, |
| "loss": 0.926, |
| "step": 21380 |
| }, |
| { |
| "epoch": 0.3424, |
| "grad_norm": 0.24075692892074585, |
| "learning_rate": 0.0001315424, |
| "loss": 0.8301, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.34272, |
| "grad_norm": 0.2621121108531952, |
| "learning_rate": 0.0001314784, |
| "loss": 0.8895, |
| "step": 21420 |
| }, |
| { |
| "epoch": 0.34304, |
| "grad_norm": 0.26628807187080383, |
| "learning_rate": 0.0001314144, |
| "loss": 0.8634, |
| "step": 21440 |
| }, |
| { |
| "epoch": 0.34336, |
| "grad_norm": 0.301937073469162, |
| "learning_rate": 0.0001313504, |
| "loss": 0.8876, |
| "step": 21460 |
| }, |
| { |
| "epoch": 0.34368, |
| "grad_norm": 0.2092495709657669, |
| "learning_rate": 0.00013128640000000002, |
| "loss": 0.917, |
| "step": 21480 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 0.2542109191417694, |
| "learning_rate": 0.0001312224, |
| "loss": 0.8835, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.34432, |
| "grad_norm": 0.3914921283721924, |
| "learning_rate": 0.0001311584, |
| "loss": 0.8868, |
| "step": 21520 |
| }, |
| { |
| "epoch": 0.34464, |
| "grad_norm": 0.23783642053604126, |
| "learning_rate": 0.00013109440000000003, |
| "loss": 0.9062, |
| "step": 21540 |
| }, |
| { |
| "epoch": 0.34496, |
| "grad_norm": 0.22890570759773254, |
| "learning_rate": 0.0001310304, |
| "loss": 0.914, |
| "step": 21560 |
| }, |
| { |
| "epoch": 0.34528, |
| "grad_norm": 0.23722144961357117, |
| "learning_rate": 0.0001309664, |
| "loss": 0.9393, |
| "step": 21580 |
| }, |
| { |
| "epoch": 0.3456, |
| "grad_norm": 0.264466255903244, |
| "learning_rate": 0.00013090240000000002, |
| "loss": 0.9294, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.34592, |
| "grad_norm": 0.23583443462848663, |
| "learning_rate": 0.0001308384, |
| "loss": 0.8729, |
| "step": 21620 |
| }, |
| { |
| "epoch": 0.34624, |
| "grad_norm": 0.25647860765457153, |
| "learning_rate": 0.0001307776, |
| "loss": 0.8512, |
| "step": 21640 |
| }, |
| { |
| "epoch": 0.34656, |
| "grad_norm": 0.2854239344596863, |
| "learning_rate": 0.0001307136, |
| "loss": 0.925, |
| "step": 21660 |
| }, |
| { |
| "epoch": 0.34688, |
| "grad_norm": 0.26097196340560913, |
| "learning_rate": 0.0001306496, |
| "loss": 0.9127, |
| "step": 21680 |
| }, |
| { |
| "epoch": 0.3472, |
| "grad_norm": 0.20337998867034912, |
| "learning_rate": 0.0001305856, |
| "loss": 0.9048, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.34752, |
| "grad_norm": 0.2021179497241974, |
| "learning_rate": 0.0001305216, |
| "loss": 0.9213, |
| "step": 21720 |
| }, |
| { |
| "epoch": 0.34784, |
| "grad_norm": 0.2213645875453949, |
| "learning_rate": 0.0001304576, |
| "loss": 0.8891, |
| "step": 21740 |
| }, |
| { |
| "epoch": 0.34816, |
| "grad_norm": 0.2201976180076599, |
| "learning_rate": 0.0001303936, |
| "loss": 0.8425, |
| "step": 21760 |
| }, |
| { |
| "epoch": 0.34848, |
| "grad_norm": 0.21747338771820068, |
| "learning_rate": 0.0001303296, |
| "loss": 0.9035, |
| "step": 21780 |
| }, |
| { |
| "epoch": 0.3488, |
| "grad_norm": 0.23967601358890533, |
| "learning_rate": 0.0001302656, |
| "loss": 0.9168, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.34912, |
| "grad_norm": 0.2570231854915619, |
| "learning_rate": 0.00013020160000000002, |
| "loss": 0.8813, |
| "step": 21820 |
| }, |
| { |
| "epoch": 0.34944, |
| "grad_norm": 0.24394749104976654, |
| "learning_rate": 0.00013013760000000002, |
| "loss": 0.884, |
| "step": 21840 |
| }, |
| { |
| "epoch": 0.34976, |
| "grad_norm": 0.25966569781303406, |
| "learning_rate": 0.0001300736, |
| "loss": 0.8981, |
| "step": 21860 |
| }, |
| { |
| "epoch": 0.35008, |
| "grad_norm": 0.24895374476909637, |
| "learning_rate": 0.0001300096, |
| "loss": 0.9225, |
| "step": 21880 |
| }, |
| { |
| "epoch": 0.3504, |
| "grad_norm": 0.22816093266010284, |
| "learning_rate": 0.0001299456, |
| "loss": 0.8729, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.35072, |
| "grad_norm": 0.22073081135749817, |
| "learning_rate": 0.0001298816, |
| "loss": 0.8505, |
| "step": 21920 |
| }, |
| { |
| "epoch": 0.35104, |
| "grad_norm": 0.24581517279148102, |
| "learning_rate": 0.0001298176, |
| "loss": 0.8963, |
| "step": 21940 |
| }, |
| { |
| "epoch": 0.35136, |
| "grad_norm": 0.22976480424404144, |
| "learning_rate": 0.00012975360000000002, |
| "loss": 0.8694, |
| "step": 21960 |
| }, |
| { |
| "epoch": 0.35168, |
| "grad_norm": 0.2811223566532135, |
| "learning_rate": 0.0001296896, |
| "loss": 0.9338, |
| "step": 21980 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.2338571697473526, |
| "learning_rate": 0.0001296256, |
| "loss": 0.883, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.35232, |
| "grad_norm": 0.2589928209781647, |
| "learning_rate": 0.0001295616, |
| "loss": 0.8674, |
| "step": 22020 |
| }, |
| { |
| "epoch": 0.35264, |
| "grad_norm": 0.2552894949913025, |
| "learning_rate": 0.0001294976, |
| "loss": 0.9449, |
| "step": 22040 |
| }, |
| { |
| "epoch": 0.35296, |
| "grad_norm": 0.2796124219894409, |
| "learning_rate": 0.0001294336, |
| "loss": 0.9292, |
| "step": 22060 |
| }, |
| { |
| "epoch": 0.35328, |
| "grad_norm": 0.25079146027565, |
| "learning_rate": 0.0001293696, |
| "loss": 0.8741, |
| "step": 22080 |
| }, |
| { |
| "epoch": 0.3536, |
| "grad_norm": 0.25996407866477966, |
| "learning_rate": 0.0001293056, |
| "loss": 0.8642, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.35392, |
| "grad_norm": 0.24768070876598358, |
| "learning_rate": 0.0001292416, |
| "loss": 0.8876, |
| "step": 22120 |
| }, |
| { |
| "epoch": 0.35424, |
| "grad_norm": 0.2748354375362396, |
| "learning_rate": 0.0001291776, |
| "loss": 0.8686, |
| "step": 22140 |
| }, |
| { |
| "epoch": 0.35456, |
| "grad_norm": 0.26020100712776184, |
| "learning_rate": 0.00012911360000000002, |
| "loss": 0.8978, |
| "step": 22160 |
| }, |
| { |
| "epoch": 0.35488, |
| "grad_norm": 0.22740164399147034, |
| "learning_rate": 0.00012904960000000002, |
| "loss": 0.8806, |
| "step": 22180 |
| }, |
| { |
| "epoch": 0.3552, |
| "grad_norm": 0.22539575397968292, |
| "learning_rate": 0.0001289856, |
| "loss": 0.9586, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.35552, |
| "grad_norm": 0.224917471408844, |
| "learning_rate": 0.0001289216, |
| "loss": 0.929, |
| "step": 22220 |
| }, |
| { |
| "epoch": 0.35584, |
| "grad_norm": 0.2811584174633026, |
| "learning_rate": 0.0001288576, |
| "loss": 0.8816, |
| "step": 22240 |
| }, |
| { |
| "epoch": 0.35616, |
| "grad_norm": 0.2050054520368576, |
| "learning_rate": 0.0001287936, |
| "loss": 0.886, |
| "step": 22260 |
| }, |
| { |
| "epoch": 0.35648, |
| "grad_norm": 0.26352056860923767, |
| "learning_rate": 0.0001287296, |
| "loss": 0.8631, |
| "step": 22280 |
| }, |
| { |
| "epoch": 0.3568, |
| "grad_norm": 0.2189260572195053, |
| "learning_rate": 0.00012866560000000002, |
| "loss": 0.8886, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.35712, |
| "grad_norm": 0.20122426748275757, |
| "learning_rate": 0.0001286016, |
| "loss": 0.8899, |
| "step": 22320 |
| }, |
| { |
| "epoch": 0.35744, |
| "grad_norm": 0.2483946979045868, |
| "learning_rate": 0.0001285376, |
| "loss": 0.9277, |
| "step": 22340 |
| }, |
| { |
| "epoch": 0.35776, |
| "grad_norm": 0.2784961462020874, |
| "learning_rate": 0.0001284736, |
| "loss": 0.8697, |
| "step": 22360 |
| }, |
| { |
| "epoch": 0.35808, |
| "grad_norm": 0.253579318523407, |
| "learning_rate": 0.0001284096, |
| "loss": 0.9001, |
| "step": 22380 |
| }, |
| { |
| "epoch": 0.3584, |
| "grad_norm": 0.24485689401626587, |
| "learning_rate": 0.0001283456, |
| "loss": 0.8837, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.35872, |
| "grad_norm": 0.28353968262672424, |
| "learning_rate": 0.0001282816, |
| "loss": 0.8641, |
| "step": 22420 |
| }, |
| { |
| "epoch": 0.35904, |
| "grad_norm": 0.22458963096141815, |
| "learning_rate": 0.0001282176, |
| "loss": 0.8424, |
| "step": 22440 |
| }, |
| { |
| "epoch": 0.35936, |
| "grad_norm": 0.21924658119678497, |
| "learning_rate": 0.0001281536, |
| "loss": 0.9238, |
| "step": 22460 |
| }, |
| { |
| "epoch": 0.35968, |
| "grad_norm": 0.2481856644153595, |
| "learning_rate": 0.0001280896, |
| "loss": 0.914, |
| "step": 22480 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.22141766548156738, |
| "learning_rate": 0.00012802560000000002, |
| "loss": 0.883, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.36032, |
| "grad_norm": 0.241195410490036, |
| "learning_rate": 0.00012796160000000002, |
| "loss": 0.8758, |
| "step": 22520 |
| }, |
| { |
| "epoch": 0.36064, |
| "grad_norm": 0.21804146468639374, |
| "learning_rate": 0.0001278976, |
| "loss": 0.892, |
| "step": 22540 |
| }, |
| { |
| "epoch": 0.36096, |
| "grad_norm": 0.21555864810943604, |
| "learning_rate": 0.0001278336, |
| "loss": 0.9042, |
| "step": 22560 |
| }, |
| { |
| "epoch": 0.36128, |
| "grad_norm": 0.2855897545814514, |
| "learning_rate": 0.0001277696, |
| "loss": 0.8698, |
| "step": 22580 |
| }, |
| { |
| "epoch": 0.3616, |
| "grad_norm": 0.18174555897712708, |
| "learning_rate": 0.0001277056, |
| "loss": 0.9076, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.36192, |
| "grad_norm": 0.24485164880752563, |
| "learning_rate": 0.0001276416, |
| "loss": 0.8773, |
| "step": 22620 |
| }, |
| { |
| "epoch": 0.36224, |
| "grad_norm": 0.24945834279060364, |
| "learning_rate": 0.00012757760000000002, |
| "loss": 0.8947, |
| "step": 22640 |
| }, |
| { |
| "epoch": 0.36256, |
| "grad_norm": 0.27107083797454834, |
| "learning_rate": 0.0001275136, |
| "loss": 0.8908, |
| "step": 22660 |
| }, |
| { |
| "epoch": 0.36288, |
| "grad_norm": 0.2725388705730438, |
| "learning_rate": 0.0001274496, |
| "loss": 0.9117, |
| "step": 22680 |
| }, |
| { |
| "epoch": 0.3632, |
| "grad_norm": 0.2100275754928589, |
| "learning_rate": 0.00012738560000000003, |
| "loss": 0.841, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.36352, |
| "grad_norm": 0.18476144969463348, |
| "learning_rate": 0.0001273216, |
| "loss": 0.8546, |
| "step": 22720 |
| }, |
| { |
| "epoch": 0.36384, |
| "grad_norm": 0.2054418921470642, |
| "learning_rate": 0.0001272576, |
| "loss": 0.9456, |
| "step": 22740 |
| }, |
| { |
| "epoch": 0.36416, |
| "grad_norm": 0.23518037796020508, |
| "learning_rate": 0.00012719360000000001, |
| "loss": 0.8499, |
| "step": 22760 |
| }, |
| { |
| "epoch": 0.36448, |
| "grad_norm": 0.22273370623588562, |
| "learning_rate": 0.0001271296, |
| "loss": 0.8961, |
| "step": 22780 |
| }, |
| { |
| "epoch": 0.3648, |
| "grad_norm": 0.2261369377374649, |
| "learning_rate": 0.0001270656, |
| "loss": 0.8655, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.36512, |
| "grad_norm": 0.2787102460861206, |
| "learning_rate": 0.0001270016, |
| "loss": 0.8457, |
| "step": 22820 |
| }, |
| { |
| "epoch": 0.36544, |
| "grad_norm": 0.287702351808548, |
| "learning_rate": 0.00012693760000000002, |
| "loss": 0.8998, |
| "step": 22840 |
| }, |
| { |
| "epoch": 0.36576, |
| "grad_norm": 0.24665424227714539, |
| "learning_rate": 0.00012687360000000002, |
| "loss": 0.9337, |
| "step": 22860 |
| }, |
| { |
| "epoch": 0.36608, |
| "grad_norm": 0.23870235681533813, |
| "learning_rate": 0.00012680959999999999, |
| "loss": 0.9023, |
| "step": 22880 |
| }, |
| { |
| "epoch": 0.3664, |
| "grad_norm": 0.2987755835056305, |
| "learning_rate": 0.0001267456, |
| "loss": 0.9055, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.36672, |
| "grad_norm": 0.271150678396225, |
| "learning_rate": 0.0001266816, |
| "loss": 0.8995, |
| "step": 22920 |
| }, |
| { |
| "epoch": 0.36704, |
| "grad_norm": 0.2605392038822174, |
| "learning_rate": 0.0001266176, |
| "loss": 0.8712, |
| "step": 22940 |
| }, |
| { |
| "epoch": 0.36736, |
| "grad_norm": 0.20183727145195007, |
| "learning_rate": 0.0001265536, |
| "loss": 0.9121, |
| "step": 22960 |
| }, |
| { |
| "epoch": 0.36768, |
| "grad_norm": 0.22325585782527924, |
| "learning_rate": 0.00012648960000000002, |
| "loss": 0.9287, |
| "step": 22980 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.2461370825767517, |
| "learning_rate": 0.0001264256, |
| "loss": 0.9411, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.36832, |
| "grad_norm": 0.26270198822021484, |
| "learning_rate": 0.0001263616, |
| "loss": 0.8686, |
| "step": 23020 |
| }, |
| { |
| "epoch": 0.36864, |
| "grad_norm": 0.2455090433359146, |
| "learning_rate": 0.00012629760000000003, |
| "loss": 0.8735, |
| "step": 23040 |
| }, |
| { |
| "epoch": 0.36896, |
| "grad_norm": 0.22023002803325653, |
| "learning_rate": 0.0001262336, |
| "loss": 0.9235, |
| "step": 23060 |
| }, |
| { |
| "epoch": 0.36928, |
| "grad_norm": 0.19227732717990875, |
| "learning_rate": 0.0001261696, |
| "loss": 0.8781, |
| "step": 23080 |
| }, |
| { |
| "epoch": 0.3696, |
| "grad_norm": 0.230510413646698, |
| "learning_rate": 0.00012610560000000001, |
| "loss": 0.8865, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.36992, |
| "grad_norm": 0.2508642077445984, |
| "learning_rate": 0.0001260416, |
| "loss": 0.9123, |
| "step": 23120 |
| }, |
| { |
| "epoch": 0.37024, |
| "grad_norm": 0.22809596359729767, |
| "learning_rate": 0.0001259776, |
| "loss": 0.8799, |
| "step": 23140 |
| }, |
| { |
| "epoch": 0.37056, |
| "grad_norm": 0.2615736722946167, |
| "learning_rate": 0.0001259136, |
| "loss": 0.8927, |
| "step": 23160 |
| }, |
| { |
| "epoch": 0.37088, |
| "grad_norm": 0.23064741492271423, |
| "learning_rate": 0.00012584960000000002, |
| "loss": 0.9142, |
| "step": 23180 |
| }, |
| { |
| "epoch": 0.3712, |
| "grad_norm": 0.2514834403991699, |
| "learning_rate": 0.00012578560000000002, |
| "loss": 0.9439, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.37152, |
| "grad_norm": 0.27186328172683716, |
| "learning_rate": 0.00012572159999999999, |
| "loss": 0.8197, |
| "step": 23220 |
| }, |
| { |
| "epoch": 0.37184, |
| "grad_norm": 0.2310495674610138, |
| "learning_rate": 0.0001256576, |
| "loss": 0.925, |
| "step": 23240 |
| }, |
| { |
| "epoch": 0.37216, |
| "grad_norm": 0.21828240156173706, |
| "learning_rate": 0.0001255936, |
| "loss": 0.8944, |
| "step": 23260 |
| }, |
| { |
| "epoch": 0.37248, |
| "grad_norm": 0.25522857904434204, |
| "learning_rate": 0.0001255296, |
| "loss": 0.8338, |
| "step": 23280 |
| }, |
| { |
| "epoch": 0.3728, |
| "grad_norm": 0.2428404837846756, |
| "learning_rate": 0.0001254656, |
| "loss": 0.8788, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.37312, |
| "grad_norm": 0.2573212683200836, |
| "learning_rate": 0.00012540160000000002, |
| "loss": 0.8674, |
| "step": 23320 |
| }, |
| { |
| "epoch": 0.37344, |
| "grad_norm": 0.26089292764663696, |
| "learning_rate": 0.0001253376, |
| "loss": 0.8811, |
| "step": 23340 |
| }, |
| { |
| "epoch": 0.37376, |
| "grad_norm": 0.2231331765651703, |
| "learning_rate": 0.0001252736, |
| "loss": 0.8762, |
| "step": 23360 |
| }, |
| { |
| "epoch": 0.37408, |
| "grad_norm": 0.22179879248142242, |
| "learning_rate": 0.00012520960000000003, |
| "loss": 0.9226, |
| "step": 23380 |
| }, |
| { |
| "epoch": 0.3744, |
| "grad_norm": 0.29986852407455444, |
| "learning_rate": 0.0001251456, |
| "loss": 0.8889, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.37472, |
| "grad_norm": 0.30950990319252014, |
| "learning_rate": 0.0001250816, |
| "loss": 0.9255, |
| "step": 23420 |
| }, |
| { |
| "epoch": 0.37504, |
| "grad_norm": 0.2493577003479004, |
| "learning_rate": 0.00012501760000000001, |
| "loss": 0.8823, |
| "step": 23440 |
| }, |
| { |
| "epoch": 0.37536, |
| "grad_norm": 0.20476320385932922, |
| "learning_rate": 0.0001249536, |
| "loss": 0.9095, |
| "step": 23460 |
| }, |
| { |
| "epoch": 0.37568, |
| "grad_norm": 0.22957487404346466, |
| "learning_rate": 0.0001248896, |
| "loss": 0.8832, |
| "step": 23480 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 0.20240527391433716, |
| "learning_rate": 0.0001248256, |
| "loss": 0.8569, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.37632, |
| "grad_norm": 0.2227647453546524, |
| "learning_rate": 0.00012476160000000002, |
| "loss": 0.8729, |
| "step": 23520 |
| }, |
| { |
| "epoch": 0.37664, |
| "grad_norm": 0.2590673565864563, |
| "learning_rate": 0.00012469760000000002, |
| "loss": 0.9006, |
| "step": 23540 |
| }, |
| { |
| "epoch": 0.37696, |
| "grad_norm": 0.2456459403038025, |
| "learning_rate": 0.0001246336, |
| "loss": 0.9238, |
| "step": 23560 |
| }, |
| { |
| "epoch": 0.37728, |
| "grad_norm": 0.23964524269104004, |
| "learning_rate": 0.0001245696, |
| "loss": 0.9148, |
| "step": 23580 |
| }, |
| { |
| "epoch": 0.3776, |
| "grad_norm": 0.2621648907661438, |
| "learning_rate": 0.0001245056, |
| "loss": 0.91, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.37792, |
| "grad_norm": 0.2700936794281006, |
| "learning_rate": 0.0001244416, |
| "loss": 0.8256, |
| "step": 23620 |
| }, |
| { |
| "epoch": 0.37824, |
| "grad_norm": 0.2757120430469513, |
| "learning_rate": 0.0001243776, |
| "loss": 0.9158, |
| "step": 23640 |
| }, |
| { |
| "epoch": 0.37856, |
| "grad_norm": 0.25307512283325195, |
| "learning_rate": 0.00012431360000000002, |
| "loss": 0.9564, |
| "step": 23660 |
| }, |
| { |
| "epoch": 0.37888, |
| "grad_norm": 0.22811047732830048, |
| "learning_rate": 0.0001242496, |
| "loss": 0.9005, |
| "step": 23680 |
| }, |
| { |
| "epoch": 0.3792, |
| "grad_norm": 0.2570401430130005, |
| "learning_rate": 0.0001241856, |
| "loss": 0.903, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.37952, |
| "grad_norm": 0.2455683946609497, |
| "learning_rate": 0.00012412160000000003, |
| "loss": 0.8849, |
| "step": 23720 |
| }, |
| { |
| "epoch": 0.37984, |
| "grad_norm": 0.2513890564441681, |
| "learning_rate": 0.0001240576, |
| "loss": 0.916, |
| "step": 23740 |
| }, |
| { |
| "epoch": 0.38016, |
| "grad_norm": 0.2542913556098938, |
| "learning_rate": 0.0001239936, |
| "loss": 0.8913, |
| "step": 23760 |
| }, |
| { |
| "epoch": 0.38048, |
| "grad_norm": 0.23636704683303833, |
| "learning_rate": 0.00012392960000000001, |
| "loss": 0.9071, |
| "step": 23780 |
| }, |
| { |
| "epoch": 0.3808, |
| "grad_norm": 0.23358510434627533, |
| "learning_rate": 0.00012386880000000002, |
| "loss": 0.8727, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.38112, |
| "grad_norm": 0.2075459063053131, |
| "learning_rate": 0.0001238048, |
| "loss": 0.8602, |
| "step": 23820 |
| }, |
| { |
| "epoch": 0.38144, |
| "grad_norm": 0.24009796977043152, |
| "learning_rate": 0.0001237408, |
| "loss": 0.8787, |
| "step": 23840 |
| }, |
| { |
| "epoch": 0.38176, |
| "grad_norm": 0.20129810273647308, |
| "learning_rate": 0.0001236768, |
| "loss": 0.9057, |
| "step": 23860 |
| }, |
| { |
| "epoch": 0.38208, |
| "grad_norm": 0.2360943704843521, |
| "learning_rate": 0.0001236128, |
| "loss": 0.8914, |
| "step": 23880 |
| }, |
| { |
| "epoch": 0.3824, |
| "grad_norm": 0.2023976743221283, |
| "learning_rate": 0.0001235488, |
| "loss": 0.8671, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.38272, |
| "grad_norm": 0.28492018580436707, |
| "learning_rate": 0.0001234848, |
| "loss": 0.8466, |
| "step": 23920 |
| }, |
| { |
| "epoch": 0.38304, |
| "grad_norm": 0.2340991497039795, |
| "learning_rate": 0.0001234208, |
| "loss": 0.8719, |
| "step": 23940 |
| }, |
| { |
| "epoch": 0.38336, |
| "grad_norm": 0.2417367696762085, |
| "learning_rate": 0.0001233568, |
| "loss": 0.8661, |
| "step": 23960 |
| }, |
| { |
| "epoch": 0.38368, |
| "grad_norm": 0.2661448121070862, |
| "learning_rate": 0.0001232928, |
| "loss": 0.9087, |
| "step": 23980 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.2731974720954895, |
| "learning_rate": 0.00012322880000000002, |
| "loss": 0.8759, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.38432, |
| "grad_norm": 0.25823774933815, |
| "learning_rate": 0.00012316480000000002, |
| "loss": 0.8722, |
| "step": 24020 |
| }, |
| { |
| "epoch": 0.38464, |
| "grad_norm": 0.25573599338531494, |
| "learning_rate": 0.0001231008, |
| "loss": 0.9005, |
| "step": 24040 |
| }, |
| { |
| "epoch": 0.38496, |
| "grad_norm": 0.22409121692180634, |
| "learning_rate": 0.0001230368, |
| "loss": 0.8797, |
| "step": 24060 |
| }, |
| { |
| "epoch": 0.38528, |
| "grad_norm": 0.2514369785785675, |
| "learning_rate": 0.0001229728, |
| "loss": 0.9225, |
| "step": 24080 |
| }, |
| { |
| "epoch": 0.3856, |
| "grad_norm": 0.2252058982849121, |
| "learning_rate": 0.0001229088, |
| "loss": 0.8665, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.38592, |
| "grad_norm": 0.29812994599342346, |
| "learning_rate": 0.0001228448, |
| "loss": 0.8914, |
| "step": 24120 |
| }, |
| { |
| "epoch": 0.38624, |
| "grad_norm": 0.23113328218460083, |
| "learning_rate": 0.00012278080000000002, |
| "loss": 0.9096, |
| "step": 24140 |
| }, |
| { |
| "epoch": 0.38656, |
| "grad_norm": 0.26779311895370483, |
| "learning_rate": 0.0001227168, |
| "loss": 0.8901, |
| "step": 24160 |
| }, |
| { |
| "epoch": 0.38688, |
| "grad_norm": 0.26052671670913696, |
| "learning_rate": 0.0001226528, |
| "loss": 0.8874, |
| "step": 24180 |
| }, |
| { |
| "epoch": 0.3872, |
| "grad_norm": 0.24518652260303497, |
| "learning_rate": 0.00012258880000000003, |
| "loss": 0.8906, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.38752, |
| "grad_norm": 0.22408343851566315, |
| "learning_rate": 0.0001225248, |
| "loss": 0.9066, |
| "step": 24220 |
| }, |
| { |
| "epoch": 0.38784, |
| "grad_norm": 0.24065548181533813, |
| "learning_rate": 0.0001224608, |
| "loss": 0.8831, |
| "step": 24240 |
| }, |
| { |
| "epoch": 0.38816, |
| "grad_norm": 0.2512281835079193, |
| "learning_rate": 0.00012239680000000001, |
| "loss": 0.9191, |
| "step": 24260 |
| }, |
| { |
| "epoch": 0.38848, |
| "grad_norm": 0.212532177567482, |
| "learning_rate": 0.0001223328, |
| "loss": 0.8254, |
| "step": 24280 |
| }, |
| { |
| "epoch": 0.3888, |
| "grad_norm": 0.27028560638427734, |
| "learning_rate": 0.0001222688, |
| "loss": 0.9266, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.38912, |
| "grad_norm": 0.25514838099479675, |
| "learning_rate": 0.0001222048, |
| "loss": 0.8876, |
| "step": 24320 |
| }, |
| { |
| "epoch": 0.38944, |
| "grad_norm": 0.23313501477241516, |
| "learning_rate": 0.00012214080000000002, |
| "loss": 0.8999, |
| "step": 24340 |
| }, |
| { |
| "epoch": 0.38976, |
| "grad_norm": 0.24142177402973175, |
| "learning_rate": 0.00012207680000000002, |
| "loss": 0.8689, |
| "step": 24360 |
| }, |
| { |
| "epoch": 0.39008, |
| "grad_norm": 0.26181671023368835, |
| "learning_rate": 0.00012201280000000001, |
| "loss": 0.9077, |
| "step": 24380 |
| }, |
| { |
| "epoch": 0.3904, |
| "grad_norm": 0.20765413343906403, |
| "learning_rate": 0.0001219488, |
| "loss": 0.8583, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.39072, |
| "grad_norm": 0.23421642184257507, |
| "learning_rate": 0.0001218848, |
| "loss": 0.8741, |
| "step": 24420 |
| }, |
| { |
| "epoch": 0.39104, |
| "grad_norm": 0.24042420089244843, |
| "learning_rate": 0.0001218208, |
| "loss": 0.8836, |
| "step": 24440 |
| }, |
| { |
| "epoch": 0.39136, |
| "grad_norm": 0.23314060270786285, |
| "learning_rate": 0.0001217568, |
| "loss": 0.8494, |
| "step": 24460 |
| }, |
| { |
| "epoch": 0.39168, |
| "grad_norm": 0.21119163930416107, |
| "learning_rate": 0.0001216928, |
| "loss": 0.8662, |
| "step": 24480 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 0.2608548700809479, |
| "learning_rate": 0.00012163200000000002, |
| "loss": 0.8893, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.39232, |
| "grad_norm": 0.21324241161346436, |
| "learning_rate": 0.00012156800000000001, |
| "loss": 0.9002, |
| "step": 24520 |
| }, |
| { |
| "epoch": 0.39264, |
| "grad_norm": 0.22912296652793884, |
| "learning_rate": 0.000121504, |
| "loss": 0.866, |
| "step": 24540 |
| }, |
| { |
| "epoch": 0.39296, |
| "grad_norm": 0.21215850114822388, |
| "learning_rate": 0.00012144, |
| "loss": 0.8513, |
| "step": 24560 |
| }, |
| { |
| "epoch": 0.39328, |
| "grad_norm": 0.24554443359375, |
| "learning_rate": 0.000121376, |
| "loss": 0.8919, |
| "step": 24580 |
| }, |
| { |
| "epoch": 0.3936, |
| "grad_norm": 0.20404468476772308, |
| "learning_rate": 0.000121312, |
| "loss": 0.8908, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.39392, |
| "grad_norm": 0.22551661729812622, |
| "learning_rate": 0.000121248, |
| "loss": 0.8854, |
| "step": 24620 |
| }, |
| { |
| "epoch": 0.39424, |
| "grad_norm": 0.21961207687854767, |
| "learning_rate": 0.00012118400000000001, |
| "loss": 0.9231, |
| "step": 24640 |
| }, |
| { |
| "epoch": 0.39456, |
| "grad_norm": 0.22458186745643616, |
| "learning_rate": 0.00012112, |
| "loss": 0.9015, |
| "step": 24660 |
| }, |
| { |
| "epoch": 0.39488, |
| "grad_norm": 0.29484352469444275, |
| "learning_rate": 0.00012105600000000001, |
| "loss": 0.8869, |
| "step": 24680 |
| }, |
| { |
| "epoch": 0.3952, |
| "grad_norm": 0.228530153632164, |
| "learning_rate": 0.00012099200000000001, |
| "loss": 0.8622, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.39552, |
| "grad_norm": 0.2094821333885193, |
| "learning_rate": 0.00012092799999999999, |
| "loss": 0.8878, |
| "step": 24720 |
| }, |
| { |
| "epoch": 0.39584, |
| "grad_norm": 0.22119556367397308, |
| "learning_rate": 0.000120864, |
| "loss": 0.8235, |
| "step": 24740 |
| }, |
| { |
| "epoch": 0.39616, |
| "grad_norm": 0.23812732100486755, |
| "learning_rate": 0.0001208, |
| "loss": 0.8358, |
| "step": 24760 |
| }, |
| { |
| "epoch": 0.39648, |
| "grad_norm": 0.26022717356681824, |
| "learning_rate": 0.000120736, |
| "loss": 0.9271, |
| "step": 24780 |
| }, |
| { |
| "epoch": 0.3968, |
| "grad_norm": 0.21341145038604736, |
| "learning_rate": 0.00012067200000000001, |
| "loss": 0.7994, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.39712, |
| "grad_norm": 0.24168658256530762, |
| "learning_rate": 0.00012060800000000001, |
| "loss": 0.8575, |
| "step": 24820 |
| }, |
| { |
| "epoch": 0.39744, |
| "grad_norm": 0.24363411962985992, |
| "learning_rate": 0.00012054400000000002, |
| "loss": 0.9, |
| "step": 24840 |
| }, |
| { |
| "epoch": 0.39776, |
| "grad_norm": 0.23571257293224335, |
| "learning_rate": 0.00012048000000000001, |
| "loss": 0.8862, |
| "step": 24860 |
| }, |
| { |
| "epoch": 0.39808, |
| "grad_norm": 0.24681545794010162, |
| "learning_rate": 0.000120416, |
| "loss": 0.8989, |
| "step": 24880 |
| }, |
| { |
| "epoch": 0.3984, |
| "grad_norm": 0.2692868113517761, |
| "learning_rate": 0.000120352, |
| "loss": 0.9006, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.39872, |
| "grad_norm": 0.21049508452415466, |
| "learning_rate": 0.000120288, |
| "loss": 0.8785, |
| "step": 24920 |
| }, |
| { |
| "epoch": 0.39904, |
| "grad_norm": 0.23448914289474487, |
| "learning_rate": 0.000120224, |
| "loss": 0.8677, |
| "step": 24940 |
| }, |
| { |
| "epoch": 0.39936, |
| "grad_norm": 0.21739456057548523, |
| "learning_rate": 0.00012016, |
| "loss": 0.8595, |
| "step": 24960 |
| }, |
| { |
| "epoch": 0.39968, |
| "grad_norm": 0.2921290397644043, |
| "learning_rate": 0.00012009600000000001, |
| "loss": 0.8987, |
| "step": 24980 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.2572174668312073, |
| "learning_rate": 0.000120032, |
| "loss": 0.8608, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.40032, |
| "grad_norm": 0.22153301537036896, |
| "learning_rate": 0.00011996800000000001, |
| "loss": 0.924, |
| "step": 25020 |
| }, |
| { |
| "epoch": 0.40064, |
| "grad_norm": 0.25618571043014526, |
| "learning_rate": 0.00011990400000000002, |
| "loss": 0.9082, |
| "step": 25040 |
| }, |
| { |
| "epoch": 0.40096, |
| "grad_norm": 0.23285400867462158, |
| "learning_rate": 0.00011983999999999999, |
| "loss": 0.8467, |
| "step": 25060 |
| }, |
| { |
| "epoch": 0.40128, |
| "grad_norm": 0.22959965467453003, |
| "learning_rate": 0.000119776, |
| "loss": 0.8813, |
| "step": 25080 |
| }, |
| { |
| "epoch": 0.4016, |
| "grad_norm": 0.2747071385383606, |
| "learning_rate": 0.00011971200000000001, |
| "loss": 0.8736, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.40192, |
| "grad_norm": 0.22911617159843445, |
| "learning_rate": 0.000119648, |
| "loss": 0.8696, |
| "step": 25120 |
| }, |
| { |
| "epoch": 0.40224, |
| "grad_norm": 0.2593874931335449, |
| "learning_rate": 0.00011958400000000001, |
| "loss": 0.8975, |
| "step": 25140 |
| }, |
| { |
| "epoch": 0.40256, |
| "grad_norm": 0.25185978412628174, |
| "learning_rate": 0.00011952000000000001, |
| "loss": 0.9214, |
| "step": 25160 |
| }, |
| { |
| "epoch": 0.40288, |
| "grad_norm": 0.20723138749599457, |
| "learning_rate": 0.00011945600000000002, |
| "loss": 0.8532, |
| "step": 25180 |
| }, |
| { |
| "epoch": 0.4032, |
| "grad_norm": 0.2371417135000229, |
| "learning_rate": 0.00011939200000000001, |
| "loss": 0.9435, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.40352, |
| "grad_norm": 0.22036102414131165, |
| "learning_rate": 0.00011932799999999999, |
| "loss": 0.9591, |
| "step": 25220 |
| }, |
| { |
| "epoch": 0.40384, |
| "grad_norm": 0.1885063499212265, |
| "learning_rate": 0.000119264, |
| "loss": 0.8855, |
| "step": 25240 |
| }, |
| { |
| "epoch": 0.40416, |
| "grad_norm": 0.22148434817790985, |
| "learning_rate": 0.0001192, |
| "loss": 0.8941, |
| "step": 25260 |
| }, |
| { |
| "epoch": 0.40448, |
| "grad_norm": 0.245897576212883, |
| "learning_rate": 0.000119136, |
| "loss": 0.8979, |
| "step": 25280 |
| }, |
| { |
| "epoch": 0.4048, |
| "grad_norm": 0.2093392014503479, |
| "learning_rate": 0.000119072, |
| "loss": 0.8549, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.40512, |
| "grad_norm": 0.20682351291179657, |
| "learning_rate": 0.00011900800000000001, |
| "loss": 0.8412, |
| "step": 25320 |
| }, |
| { |
| "epoch": 0.40544, |
| "grad_norm": 0.39095112681388855, |
| "learning_rate": 0.000118944, |
| "loss": 0.9389, |
| "step": 25340 |
| }, |
| { |
| "epoch": 0.40576, |
| "grad_norm": 0.2154461294412613, |
| "learning_rate": 0.00011888000000000001, |
| "loss": 0.9047, |
| "step": 25360 |
| }, |
| { |
| "epoch": 0.40608, |
| "grad_norm": 0.2192692905664444, |
| "learning_rate": 0.00011881600000000002, |
| "loss": 0.8371, |
| "step": 25380 |
| }, |
| { |
| "epoch": 0.4064, |
| "grad_norm": 0.30516675114631653, |
| "learning_rate": 0.00011875199999999999, |
| "loss": 0.9068, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.40672, |
| "grad_norm": 0.24160155653953552, |
| "learning_rate": 0.000118688, |
| "loss": 0.8778, |
| "step": 25420 |
| }, |
| { |
| "epoch": 0.40704, |
| "grad_norm": 0.2394413948059082, |
| "learning_rate": 0.00011862400000000001, |
| "loss": 0.9009, |
| "step": 25440 |
| }, |
| { |
| "epoch": 0.40736, |
| "grad_norm": 0.2312084585428238, |
| "learning_rate": 0.00011856, |
| "loss": 0.9599, |
| "step": 25460 |
| }, |
| { |
| "epoch": 0.40768, |
| "grad_norm": 0.24847859144210815, |
| "learning_rate": 0.00011849600000000001, |
| "loss": 0.9424, |
| "step": 25480 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 0.2651779055595398, |
| "learning_rate": 0.00011843200000000001, |
| "loss": 0.8898, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.40832, |
| "grad_norm": 0.22847053408622742, |
| "learning_rate": 0.00011836800000000002, |
| "loss": 0.8775, |
| "step": 25520 |
| }, |
| { |
| "epoch": 0.40864, |
| "grad_norm": 0.25370457768440247, |
| "learning_rate": 0.00011830400000000001, |
| "loss": 0.8691, |
| "step": 25540 |
| }, |
| { |
| "epoch": 0.40896, |
| "grad_norm": 0.24085932970046997, |
| "learning_rate": 0.00011823999999999999, |
| "loss": 0.8863, |
| "step": 25560 |
| }, |
| { |
| "epoch": 0.40928, |
| "grad_norm": 0.2516380548477173, |
| "learning_rate": 0.000118176, |
| "loss": 0.8686, |
| "step": 25580 |
| }, |
| { |
| "epoch": 0.4096, |
| "grad_norm": 0.24218106269836426, |
| "learning_rate": 0.000118112, |
| "loss": 0.9093, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.40992, |
| "grad_norm": 0.22466421127319336, |
| "learning_rate": 0.000118048, |
| "loss": 0.8652, |
| "step": 25620 |
| }, |
| { |
| "epoch": 0.41024, |
| "grad_norm": 0.2240326702594757, |
| "learning_rate": 0.000117984, |
| "loss": 0.8873, |
| "step": 25640 |
| }, |
| { |
| "epoch": 0.41056, |
| "grad_norm": 0.24201804399490356, |
| "learning_rate": 0.00011792000000000001, |
| "loss": 0.8556, |
| "step": 25660 |
| }, |
| { |
| "epoch": 0.41088, |
| "grad_norm": 0.2758803963661194, |
| "learning_rate": 0.000117856, |
| "loss": 0.9015, |
| "step": 25680 |
| }, |
| { |
| "epoch": 0.4112, |
| "grad_norm": 0.23030854761600494, |
| "learning_rate": 0.00011779200000000001, |
| "loss": 0.8259, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.41152, |
| "grad_norm": 0.21517449617385864, |
| "learning_rate": 0.00011772800000000002, |
| "loss": 0.9246, |
| "step": 25720 |
| }, |
| { |
| "epoch": 0.41184, |
| "grad_norm": 0.2662043571472168, |
| "learning_rate": 0.00011766399999999999, |
| "loss": 0.9175, |
| "step": 25740 |
| }, |
| { |
| "epoch": 0.41216, |
| "grad_norm": 0.23844832181930542, |
| "learning_rate": 0.0001176, |
| "loss": 0.8416, |
| "step": 25760 |
| }, |
| { |
| "epoch": 0.41248, |
| "grad_norm": 0.23714718222618103, |
| "learning_rate": 0.00011753600000000001, |
| "loss": 0.8802, |
| "step": 25780 |
| }, |
| { |
| "epoch": 0.4128, |
| "grad_norm": 0.2341051995754242, |
| "learning_rate": 0.000117472, |
| "loss": 0.8629, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.41312, |
| "grad_norm": 0.2298164963722229, |
| "learning_rate": 0.00011740800000000001, |
| "loss": 0.8794, |
| "step": 25820 |
| }, |
| { |
| "epoch": 0.41344, |
| "grad_norm": 0.26338857412338257, |
| "learning_rate": 0.00011734400000000001, |
| "loss": 0.8953, |
| "step": 25840 |
| }, |
| { |
| "epoch": 0.41376, |
| "grad_norm": 0.2441425770521164, |
| "learning_rate": 0.00011728000000000002, |
| "loss": 0.888, |
| "step": 25860 |
| }, |
| { |
| "epoch": 0.41408, |
| "grad_norm": 0.22573915123939514, |
| "learning_rate": 0.00011721600000000001, |
| "loss": 0.8417, |
| "step": 25880 |
| }, |
| { |
| "epoch": 0.4144, |
| "grad_norm": 0.24974480271339417, |
| "learning_rate": 0.00011715199999999999, |
| "loss": 0.8313, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.41472, |
| "grad_norm": 0.22177425026893616, |
| "learning_rate": 0.000117088, |
| "loss": 0.8615, |
| "step": 25920 |
| }, |
| { |
| "epoch": 0.41504, |
| "grad_norm": 0.25715529918670654, |
| "learning_rate": 0.000117024, |
| "loss": 0.9622, |
| "step": 25940 |
| }, |
| { |
| "epoch": 0.41536, |
| "grad_norm": 0.22982242703437805, |
| "learning_rate": 0.00011696, |
| "loss": 0.8944, |
| "step": 25960 |
| }, |
| { |
| "epoch": 0.41568, |
| "grad_norm": 0.2524280250072479, |
| "learning_rate": 0.000116896, |
| "loss": 0.9148, |
| "step": 25980 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.19068706035614014, |
| "learning_rate": 0.00011683200000000001, |
| "loss": 0.8383, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.41632, |
| "grad_norm": 0.20955216884613037, |
| "learning_rate": 0.000116768, |
| "loss": 0.8297, |
| "step": 26020 |
| }, |
| { |
| "epoch": 0.41664, |
| "grad_norm": 0.28669893741607666, |
| "learning_rate": 0.00011670400000000001, |
| "loss": 0.9302, |
| "step": 26040 |
| }, |
| { |
| "epoch": 0.41696, |
| "grad_norm": 0.2136538028717041, |
| "learning_rate": 0.00011664000000000002, |
| "loss": 0.8628, |
| "step": 26060 |
| }, |
| { |
| "epoch": 0.41728, |
| "grad_norm": 0.24216507375240326, |
| "learning_rate": 0.00011657599999999999, |
| "loss": 0.8645, |
| "step": 26080 |
| }, |
| { |
| "epoch": 0.4176, |
| "grad_norm": 0.23418830335140228, |
| "learning_rate": 0.000116512, |
| "loss": 0.8744, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.41792, |
| "grad_norm": 0.21107226610183716, |
| "learning_rate": 0.00011644800000000001, |
| "loss": 0.882, |
| "step": 26120 |
| }, |
| { |
| "epoch": 0.41824, |
| "grad_norm": 0.25925180315971375, |
| "learning_rate": 0.000116384, |
| "loss": 0.9287, |
| "step": 26140 |
| }, |
| { |
| "epoch": 0.41856, |
| "grad_norm": 0.24545122683048248, |
| "learning_rate": 0.00011632000000000001, |
| "loss": 0.871, |
| "step": 26160 |
| }, |
| { |
| "epoch": 0.41888, |
| "grad_norm": 0.24683259427547455, |
| "learning_rate": 0.000116256, |
| "loss": 0.9309, |
| "step": 26180 |
| }, |
| { |
| "epoch": 0.4192, |
| "grad_norm": 0.271581768989563, |
| "learning_rate": 0.00011619200000000002, |
| "loss": 0.8901, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.41952, |
| "grad_norm": 0.19227302074432373, |
| "learning_rate": 0.00011612800000000001, |
| "loss": 0.8457, |
| "step": 26220 |
| }, |
| { |
| "epoch": 0.41984, |
| "grad_norm": 0.2621937692165375, |
| "learning_rate": 0.00011606399999999999, |
| "loss": 0.9007, |
| "step": 26240 |
| }, |
| { |
| "epoch": 0.42016, |
| "grad_norm": 0.23038643598556519, |
| "learning_rate": 0.000116, |
| "loss": 0.8869, |
| "step": 26260 |
| }, |
| { |
| "epoch": 0.42048, |
| "grad_norm": 0.18889521062374115, |
| "learning_rate": 0.000115936, |
| "loss": 0.8939, |
| "step": 26280 |
| }, |
| { |
| "epoch": 0.4208, |
| "grad_norm": 0.22690792381763458, |
| "learning_rate": 0.000115872, |
| "loss": 0.8544, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.42112, |
| "grad_norm": 0.23775628209114075, |
| "learning_rate": 0.000115808, |
| "loss": 0.8847, |
| "step": 26320 |
| }, |
| { |
| "epoch": 0.42144, |
| "grad_norm": 0.22833390533924103, |
| "learning_rate": 0.00011574400000000001, |
| "loss": 0.8963, |
| "step": 26340 |
| }, |
| { |
| "epoch": 0.42176, |
| "grad_norm": 0.26199871301651, |
| "learning_rate": 0.00011568000000000002, |
| "loss": 0.8717, |
| "step": 26360 |
| }, |
| { |
| "epoch": 0.42208, |
| "grad_norm": 0.21491499245166779, |
| "learning_rate": 0.00011561600000000001, |
| "loss": 0.8355, |
| "step": 26380 |
| }, |
| { |
| "epoch": 0.4224, |
| "grad_norm": 0.24193742871284485, |
| "learning_rate": 0.00011555200000000002, |
| "loss": 0.8916, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.42272, |
| "grad_norm": 0.23363493382930756, |
| "learning_rate": 0.000115488, |
| "loss": 0.882, |
| "step": 26420 |
| }, |
| { |
| "epoch": 0.42304, |
| "grad_norm": 0.2511495053768158, |
| "learning_rate": 0.000115424, |
| "loss": 0.856, |
| "step": 26440 |
| }, |
| { |
| "epoch": 0.42336, |
| "grad_norm": 0.2527294456958771, |
| "learning_rate": 0.00011536000000000001, |
| "loss": 0.9213, |
| "step": 26460 |
| }, |
| { |
| "epoch": 0.42368, |
| "grad_norm": 0.24384371936321259, |
| "learning_rate": 0.000115296, |
| "loss": 0.8766, |
| "step": 26480 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.22618679702281952, |
| "learning_rate": 0.00011523200000000001, |
| "loss": 0.8849, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.42432, |
| "grad_norm": 0.2192445695400238, |
| "learning_rate": 0.000115168, |
| "loss": 0.8945, |
| "step": 26520 |
| }, |
| { |
| "epoch": 0.42464, |
| "grad_norm": 0.21766866743564606, |
| "learning_rate": 0.00011510400000000001, |
| "loss": 0.9218, |
| "step": 26540 |
| }, |
| { |
| "epoch": 0.42496, |
| "grad_norm": 0.25323477387428284, |
| "learning_rate": 0.00011504000000000001, |
| "loss": 0.8973, |
| "step": 26560 |
| }, |
| { |
| "epoch": 0.42528, |
| "grad_norm": 0.22795149683952332, |
| "learning_rate": 0.0001149792, |
| "loss": 0.842, |
| "step": 26580 |
| }, |
| { |
| "epoch": 0.4256, |
| "grad_norm": 0.26310163736343384, |
| "learning_rate": 0.00011491520000000001, |
| "loss": 0.9459, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.42592, |
| "grad_norm": 0.2246372550725937, |
| "learning_rate": 0.0001148512, |
| "loss": 0.9001, |
| "step": 26620 |
| }, |
| { |
| "epoch": 0.42624, |
| "grad_norm": 0.23721113801002502, |
| "learning_rate": 0.00011478720000000001, |
| "loss": 0.9489, |
| "step": 26640 |
| }, |
| { |
| "epoch": 0.42656, |
| "grad_norm": 0.2320554107427597, |
| "learning_rate": 0.0001147232, |
| "loss": 0.8728, |
| "step": 26660 |
| }, |
| { |
| "epoch": 0.42688, |
| "grad_norm": 0.24293909966945648, |
| "learning_rate": 0.00011465920000000002, |
| "loss": 0.8921, |
| "step": 26680 |
| }, |
| { |
| "epoch": 0.4272, |
| "grad_norm": 0.23679333925247192, |
| "learning_rate": 0.00011459520000000001, |
| "loss": 0.8383, |
| "step": 26700 |
| }, |
| { |
| "epoch": 0.42752, |
| "grad_norm": 0.27467820048332214, |
| "learning_rate": 0.00011453120000000002, |
| "loss": 0.8646, |
| "step": 26720 |
| }, |
| { |
| "epoch": 0.42784, |
| "grad_norm": 0.2692321538925171, |
| "learning_rate": 0.0001144672, |
| "loss": 0.9117, |
| "step": 26740 |
| }, |
| { |
| "epoch": 0.42816, |
| "grad_norm": 0.2061707228422165, |
| "learning_rate": 0.0001144032, |
| "loss": 0.8794, |
| "step": 26760 |
| }, |
| { |
| "epoch": 0.42848, |
| "grad_norm": 0.24439223110675812, |
| "learning_rate": 0.0001143392, |
| "loss": 0.8522, |
| "step": 26780 |
| }, |
| { |
| "epoch": 0.4288, |
| "grad_norm": 0.2587136924266815, |
| "learning_rate": 0.0001142752, |
| "loss": 0.8729, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.42912, |
| "grad_norm": 0.2559018135070801, |
| "learning_rate": 0.00011421120000000001, |
| "loss": 0.8797, |
| "step": 26820 |
| }, |
| { |
| "epoch": 0.42944, |
| "grad_norm": 0.2468901127576828, |
| "learning_rate": 0.0001141472, |
| "loss": 0.9022, |
| "step": 26840 |
| }, |
| { |
| "epoch": 0.42976, |
| "grad_norm": 0.24873799085617065, |
| "learning_rate": 0.00011408320000000001, |
| "loss": 0.8634, |
| "step": 26860 |
| }, |
| { |
| "epoch": 0.43008, |
| "grad_norm": 0.23509488999843597, |
| "learning_rate": 0.00011401920000000002, |
| "loss": 0.8995, |
| "step": 26880 |
| }, |
| { |
| "epoch": 0.4304, |
| "grad_norm": 0.22719904780387878, |
| "learning_rate": 0.00011395519999999999, |
| "loss": 0.8288, |
| "step": 26900 |
| }, |
| { |
| "epoch": 0.43072, |
| "grad_norm": 0.26962873339653015, |
| "learning_rate": 0.0001138912, |
| "loss": 0.9099, |
| "step": 26920 |
| }, |
| { |
| "epoch": 0.43104, |
| "grad_norm": 0.2308361977338791, |
| "learning_rate": 0.00011382720000000001, |
| "loss": 0.8927, |
| "step": 26940 |
| }, |
| { |
| "epoch": 0.43136, |
| "grad_norm": 0.23821701109409332, |
| "learning_rate": 0.0001137632, |
| "loss": 0.8926, |
| "step": 26960 |
| }, |
| { |
| "epoch": 0.43168, |
| "grad_norm": 0.2621578574180603, |
| "learning_rate": 0.00011369920000000001, |
| "loss": 0.8996, |
| "step": 26980 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.1956038624048233, |
| "learning_rate": 0.0001136352, |
| "loss": 0.8429, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.43232, |
| "grad_norm": 0.25825899839401245, |
| "learning_rate": 0.00011357120000000001, |
| "loss": 0.8912, |
| "step": 27020 |
| }, |
| { |
| "epoch": 0.43264, |
| "grad_norm": 0.2325858324766159, |
| "learning_rate": 0.00011350720000000001, |
| "loss": 0.9094, |
| "step": 27040 |
| }, |
| { |
| "epoch": 0.43296, |
| "grad_norm": 0.24896900355815887, |
| "learning_rate": 0.00011344320000000002, |
| "loss": 0.9526, |
| "step": 27060 |
| }, |
| { |
| "epoch": 0.43328, |
| "grad_norm": 0.2929576337337494, |
| "learning_rate": 0.0001133792, |
| "loss": 0.8513, |
| "step": 27080 |
| }, |
| { |
| "epoch": 0.4336, |
| "grad_norm": 0.23895148932933807, |
| "learning_rate": 0.0001133152, |
| "loss": 0.8708, |
| "step": 27100 |
| }, |
| { |
| "epoch": 0.43392, |
| "grad_norm": 0.22470715641975403, |
| "learning_rate": 0.0001132512, |
| "loss": 0.9009, |
| "step": 27120 |
| }, |
| { |
| "epoch": 0.43424, |
| "grad_norm": 0.25981777906417847, |
| "learning_rate": 0.0001131872, |
| "loss": 0.9252, |
| "step": 27140 |
| }, |
| { |
| "epoch": 0.43456, |
| "grad_norm": 0.26591363549232483, |
| "learning_rate": 0.00011312320000000001, |
| "loss": 0.878, |
| "step": 27160 |
| }, |
| { |
| "epoch": 0.43488, |
| "grad_norm": 0.24026769399642944, |
| "learning_rate": 0.0001130592, |
| "loss": 0.838, |
| "step": 27180 |
| }, |
| { |
| "epoch": 0.4352, |
| "grad_norm": 0.243183895945549, |
| "learning_rate": 0.00011299520000000001, |
| "loss": 0.945, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.43552, |
| "grad_norm": 0.28983068466186523, |
| "learning_rate": 0.00011293120000000002, |
| "loss": 0.8779, |
| "step": 27220 |
| }, |
| { |
| "epoch": 0.43584, |
| "grad_norm": 0.25985220074653625, |
| "learning_rate": 0.00011286719999999999, |
| "loss": 0.8852, |
| "step": 27240 |
| }, |
| { |
| "epoch": 0.43616, |
| "grad_norm": 0.2521764934062958, |
| "learning_rate": 0.0001128032, |
| "loss": 0.8459, |
| "step": 27260 |
| }, |
| { |
| "epoch": 0.43648, |
| "grad_norm": 0.2260691374540329, |
| "learning_rate": 0.0001127392, |
| "loss": 0.8478, |
| "step": 27280 |
| }, |
| { |
| "epoch": 0.4368, |
| "grad_norm": 0.24375227093696594, |
| "learning_rate": 0.0001126752, |
| "loss": 0.8566, |
| "step": 27300 |
| }, |
| { |
| "epoch": 0.43712, |
| "grad_norm": 0.23803727328777313, |
| "learning_rate": 0.00011261120000000001, |
| "loss": 0.8832, |
| "step": 27320 |
| }, |
| { |
| "epoch": 0.43744, |
| "grad_norm": 0.35262176394462585, |
| "learning_rate": 0.0001125472, |
| "loss": 0.9305, |
| "step": 27340 |
| }, |
| { |
| "epoch": 0.43776, |
| "grad_norm": 0.22310085594654083, |
| "learning_rate": 0.00011248320000000001, |
| "loss": 0.8806, |
| "step": 27360 |
| }, |
| { |
| "epoch": 0.43808, |
| "grad_norm": 0.22547666728496552, |
| "learning_rate": 0.00011241920000000001, |
| "loss": 0.8617, |
| "step": 27380 |
| }, |
| { |
| "epoch": 0.4384, |
| "grad_norm": 0.23973605036735535, |
| "learning_rate": 0.00011235520000000002, |
| "loss": 0.8415, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.43872, |
| "grad_norm": 0.2396925538778305, |
| "learning_rate": 0.0001122912, |
| "loss": 0.9221, |
| "step": 27420 |
| }, |
| { |
| "epoch": 0.43904, |
| "grad_norm": 0.2549417018890381, |
| "learning_rate": 0.0001122272, |
| "loss": 0.8289, |
| "step": 27440 |
| }, |
| { |
| "epoch": 0.43936, |
| "grad_norm": 0.3001738488674164, |
| "learning_rate": 0.0001121632, |
| "loss": 0.8951, |
| "step": 27460 |
| }, |
| { |
| "epoch": 0.43968, |
| "grad_norm": 0.28858518600463867, |
| "learning_rate": 0.0001120992, |
| "loss": 0.9271, |
| "step": 27480 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.22798220813274384, |
| "learning_rate": 0.00011203520000000001, |
| "loss": 0.9133, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.44032, |
| "grad_norm": 0.2488940805196762, |
| "learning_rate": 0.0001119712, |
| "loss": 0.9054, |
| "step": 27520 |
| }, |
| { |
| "epoch": 0.44064, |
| "grad_norm": 0.26057055592536926, |
| "learning_rate": 0.00011190720000000001, |
| "loss": 0.9136, |
| "step": 27540 |
| }, |
| { |
| "epoch": 0.44096, |
| "grad_norm": 0.21583379805088043, |
| "learning_rate": 0.00011184320000000002, |
| "loss": 0.8559, |
| "step": 27560 |
| }, |
| { |
| "epoch": 0.44128, |
| "grad_norm": 0.2284022718667984, |
| "learning_rate": 0.00011177919999999999, |
| "loss": 0.8976, |
| "step": 27580 |
| }, |
| { |
| "epoch": 0.4416, |
| "grad_norm": 0.25823116302490234, |
| "learning_rate": 0.0001117152, |
| "loss": 0.863, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.44192, |
| "grad_norm": 0.206822007894516, |
| "learning_rate": 0.0001116512, |
| "loss": 0.8877, |
| "step": 27620 |
| }, |
| { |
| "epoch": 0.44224, |
| "grad_norm": 0.21545498073101044, |
| "learning_rate": 0.0001115872, |
| "loss": 0.907, |
| "step": 27640 |
| }, |
| { |
| "epoch": 0.44256, |
| "grad_norm": 0.20267954468727112, |
| "learning_rate": 0.00011152320000000001, |
| "loss": 0.8363, |
| "step": 27660 |
| }, |
| { |
| "epoch": 0.44288, |
| "grad_norm": 0.22508728504180908, |
| "learning_rate": 0.0001114592, |
| "loss": 0.8913, |
| "step": 27680 |
| }, |
| { |
| "epoch": 0.4432, |
| "grad_norm": 0.2211364060640335, |
| "learning_rate": 0.00011139520000000001, |
| "loss": 0.8492, |
| "step": 27700 |
| }, |
| { |
| "epoch": 0.44352, |
| "grad_norm": 0.23222848773002625, |
| "learning_rate": 0.00011133120000000001, |
| "loss": 0.8941, |
| "step": 27720 |
| }, |
| { |
| "epoch": 0.44384, |
| "grad_norm": 0.2355644702911377, |
| "learning_rate": 0.00011126720000000002, |
| "loss": 0.9167, |
| "step": 27740 |
| }, |
| { |
| "epoch": 0.44416, |
| "grad_norm": 0.25646787881851196, |
| "learning_rate": 0.0001112032, |
| "loss": 0.8852, |
| "step": 27760 |
| }, |
| { |
| "epoch": 0.44448, |
| "grad_norm": 0.24109818041324615, |
| "learning_rate": 0.0001111392, |
| "loss": 0.8864, |
| "step": 27780 |
| }, |
| { |
| "epoch": 0.4448, |
| "grad_norm": 0.2541385591030121, |
| "learning_rate": 0.0001110752, |
| "loss": 0.8674, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.44512, |
| "grad_norm": 0.2090396136045456, |
| "learning_rate": 0.0001110112, |
| "loss": 0.8601, |
| "step": 27820 |
| }, |
| { |
| "epoch": 0.44544, |
| "grad_norm": 0.249611958861351, |
| "learning_rate": 0.00011094720000000001, |
| "loss": 0.8973, |
| "step": 27840 |
| }, |
| { |
| "epoch": 0.44576, |
| "grad_norm": 0.24944797158241272, |
| "learning_rate": 0.00011088320000000002, |
| "loss": 0.9308, |
| "step": 27860 |
| }, |
| { |
| "epoch": 0.44608, |
| "grad_norm": 0.2585296332836151, |
| "learning_rate": 0.00011081920000000001, |
| "loss": 0.8865, |
| "step": 27880 |
| }, |
| { |
| "epoch": 0.4464, |
| "grad_norm": 0.2178046554327011, |
| "learning_rate": 0.00011075520000000002, |
| "loss": 0.8511, |
| "step": 27900 |
| }, |
| { |
| "epoch": 0.44672, |
| "grad_norm": 0.2413053810596466, |
| "learning_rate": 0.0001106912, |
| "loss": 0.9125, |
| "step": 27920 |
| }, |
| { |
| "epoch": 0.44704, |
| "grad_norm": 0.23110991716384888, |
| "learning_rate": 0.0001106272, |
| "loss": 0.9167, |
| "step": 27940 |
| }, |
| { |
| "epoch": 0.44736, |
| "grad_norm": 0.2292502075433731, |
| "learning_rate": 0.0001105632, |
| "loss": 0.8573, |
| "step": 27960 |
| }, |
| { |
| "epoch": 0.44768, |
| "grad_norm": 0.2460576891899109, |
| "learning_rate": 0.0001104992, |
| "loss": 0.8469, |
| "step": 27980 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.2802644371986389, |
| "learning_rate": 0.00011043520000000001, |
| "loss": 0.8421, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.44832, |
| "grad_norm": 0.21653008460998535, |
| "learning_rate": 0.0001103712, |
| "loss": 0.9343, |
| "step": 28020 |
| }, |
| { |
| "epoch": 0.44864, |
| "grad_norm": 0.21708372235298157, |
| "learning_rate": 0.00011030720000000001, |
| "loss": 0.8866, |
| "step": 28040 |
| }, |
| { |
| "epoch": 0.44896, |
| "grad_norm": 0.2733645737171173, |
| "learning_rate": 0.00011024320000000001, |
| "loss": 0.9055, |
| "step": 28060 |
| }, |
| { |
| "epoch": 0.44928, |
| "grad_norm": 0.2751225531101227, |
| "learning_rate": 0.00011017920000000002, |
| "loss": 0.8615, |
| "step": 28080 |
| }, |
| { |
| "epoch": 0.4496, |
| "grad_norm": 0.23991945385932922, |
| "learning_rate": 0.0001101152, |
| "loss": 0.8963, |
| "step": 28100 |
| }, |
| { |
| "epoch": 0.44992, |
| "grad_norm": 0.3005094528198242, |
| "learning_rate": 0.0001100512, |
| "loss": 0.8711, |
| "step": 28120 |
| }, |
| { |
| "epoch": 0.45024, |
| "grad_norm": 0.2634584903717041, |
| "learning_rate": 0.0001099872, |
| "loss": 0.8764, |
| "step": 28140 |
| }, |
| { |
| "epoch": 0.45056, |
| "grad_norm": 0.22322441637516022, |
| "learning_rate": 0.0001099232, |
| "loss": 0.8467, |
| "step": 28160 |
| }, |
| { |
| "epoch": 0.45088, |
| "grad_norm": 0.21676741540431976, |
| "learning_rate": 0.00010985920000000001, |
| "loss": 0.8433, |
| "step": 28180 |
| }, |
| { |
| "epoch": 0.4512, |
| "grad_norm": 0.26323333382606506, |
| "learning_rate": 0.00010979520000000002, |
| "loss": 0.9486, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.45152, |
| "grad_norm": 0.21226100623607635, |
| "learning_rate": 0.00010973120000000001, |
| "loss": 0.8896, |
| "step": 28220 |
| }, |
| { |
| "epoch": 0.45184, |
| "grad_norm": 0.22679319977760315, |
| "learning_rate": 0.00010966720000000002, |
| "loss": 0.8528, |
| "step": 28240 |
| }, |
| { |
| "epoch": 0.45216, |
| "grad_norm": 0.18700149655342102, |
| "learning_rate": 0.0001096032, |
| "loss": 0.8368, |
| "step": 28260 |
| }, |
| { |
| "epoch": 0.45248, |
| "grad_norm": 0.23498637974262238, |
| "learning_rate": 0.0001095392, |
| "loss": 0.9194, |
| "step": 28280 |
| }, |
| { |
| "epoch": 0.4528, |
| "grad_norm": 0.22683313488960266, |
| "learning_rate": 0.0001094752, |
| "loss": 0.906, |
| "step": 28300 |
| }, |
| { |
| "epoch": 0.45312, |
| "grad_norm": 0.23284801840782166, |
| "learning_rate": 0.0001094112, |
| "loss": 0.9314, |
| "step": 28320 |
| }, |
| { |
| "epoch": 0.45344, |
| "grad_norm": 0.24406535923480988, |
| "learning_rate": 0.00010934720000000001, |
| "loss": 0.9017, |
| "step": 28340 |
| }, |
| { |
| "epoch": 0.45376, |
| "grad_norm": 0.22531583905220032, |
| "learning_rate": 0.0001092832, |
| "loss": 0.8906, |
| "step": 28360 |
| }, |
| { |
| "epoch": 0.45408, |
| "grad_norm": 0.21459725499153137, |
| "learning_rate": 0.00010921920000000001, |
| "loss": 0.8495, |
| "step": 28380 |
| }, |
| { |
| "epoch": 0.4544, |
| "grad_norm": 0.2690097689628601, |
| "learning_rate": 0.00010915520000000001, |
| "loss": 0.959, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.45472, |
| "grad_norm": 0.3007669448852539, |
| "learning_rate": 0.00010909120000000002, |
| "loss": 0.8716, |
| "step": 28420 |
| }, |
| { |
| "epoch": 0.45504, |
| "grad_norm": 0.2667557895183563, |
| "learning_rate": 0.0001090272, |
| "loss": 0.8909, |
| "step": 28440 |
| }, |
| { |
| "epoch": 0.45536, |
| "grad_norm": 0.24151913821697235, |
| "learning_rate": 0.0001089632, |
| "loss": 0.8808, |
| "step": 28460 |
| }, |
| { |
| "epoch": 0.45568, |
| "grad_norm": 0.2797807455062866, |
| "learning_rate": 0.0001088992, |
| "loss": 0.872, |
| "step": 28480 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 0.3146251440048218, |
| "learning_rate": 0.0001088352, |
| "loss": 0.8632, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.45632, |
| "grad_norm": 0.26347050070762634, |
| "learning_rate": 0.0001087712, |
| "loss": 0.8863, |
| "step": 28520 |
| }, |
| { |
| "epoch": 0.45664, |
| "grad_norm": 0.24761775135993958, |
| "learning_rate": 0.00010870720000000002, |
| "loss": 0.8692, |
| "step": 28540 |
| }, |
| { |
| "epoch": 0.45696, |
| "grad_norm": 0.258346825838089, |
| "learning_rate": 0.00010864320000000001, |
| "loss": 0.8733, |
| "step": 28560 |
| }, |
| { |
| "epoch": 0.45728, |
| "grad_norm": 0.2539924085140228, |
| "learning_rate": 0.00010857920000000002, |
| "loss": 0.8691, |
| "step": 28580 |
| }, |
| { |
| "epoch": 0.4576, |
| "grad_norm": 0.5414292216300964, |
| "learning_rate": 0.0001085152, |
| "loss": 0.9207, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.45792, |
| "grad_norm": 0.21679574251174927, |
| "learning_rate": 0.0001084512, |
| "loss": 0.8953, |
| "step": 28620 |
| }, |
| { |
| "epoch": 0.45824, |
| "grad_norm": 0.236148864030838, |
| "learning_rate": 0.0001083872, |
| "loss": 0.8793, |
| "step": 28640 |
| }, |
| { |
| "epoch": 0.45856, |
| "grad_norm": 0.21311239898204803, |
| "learning_rate": 0.0001083232, |
| "loss": 0.8276, |
| "step": 28660 |
| }, |
| { |
| "epoch": 0.45888, |
| "grad_norm": 0.23734121024608612, |
| "learning_rate": 0.00010825920000000001, |
| "loss": 0.8464, |
| "step": 28680 |
| }, |
| { |
| "epoch": 0.4592, |
| "grad_norm": 0.2556433081626892, |
| "learning_rate": 0.0001081952, |
| "loss": 0.8775, |
| "step": 28700 |
| }, |
| { |
| "epoch": 0.45952, |
| "grad_norm": 0.25167569518089294, |
| "learning_rate": 0.00010813120000000001, |
| "loss": 0.8801, |
| "step": 28720 |
| }, |
| { |
| "epoch": 0.45984, |
| "grad_norm": 0.21533095836639404, |
| "learning_rate": 0.00010806720000000001, |
| "loss": 0.9013, |
| "step": 28740 |
| }, |
| { |
| "epoch": 0.46016, |
| "grad_norm": 0.2581512928009033, |
| "learning_rate": 0.00010800320000000002, |
| "loss": 0.8842, |
| "step": 28760 |
| }, |
| { |
| "epoch": 0.46048, |
| "grad_norm": 0.22151516377925873, |
| "learning_rate": 0.0001079392, |
| "loss": 0.9151, |
| "step": 28780 |
| }, |
| { |
| "epoch": 0.4608, |
| "grad_norm": 0.2598574459552765, |
| "learning_rate": 0.0001078752, |
| "loss": 0.8817, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.46112, |
| "grad_norm": 0.22162817418575287, |
| "learning_rate": 0.0001078112, |
| "loss": 0.8887, |
| "step": 28820 |
| }, |
| { |
| "epoch": 0.46144, |
| "grad_norm": 0.2202143371105194, |
| "learning_rate": 0.0001077472, |
| "loss": 0.9206, |
| "step": 28840 |
| }, |
| { |
| "epoch": 0.46176, |
| "grad_norm": 0.24929936230182648, |
| "learning_rate": 0.0001076832, |
| "loss": 0.9279, |
| "step": 28860 |
| }, |
| { |
| "epoch": 0.46208, |
| "grad_norm": 0.26587414741516113, |
| "learning_rate": 0.00010761920000000001, |
| "loss": 0.9087, |
| "step": 28880 |
| }, |
| { |
| "epoch": 0.4624, |
| "grad_norm": 0.2536023259162903, |
| "learning_rate": 0.00010755520000000001, |
| "loss": 0.9128, |
| "step": 28900 |
| }, |
| { |
| "epoch": 0.46272, |
| "grad_norm": 0.1964925229549408, |
| "learning_rate": 0.00010749120000000002, |
| "loss": 0.8445, |
| "step": 28920 |
| }, |
| { |
| "epoch": 0.46304, |
| "grad_norm": 0.21544238924980164, |
| "learning_rate": 0.0001074272, |
| "loss": 0.856, |
| "step": 28940 |
| }, |
| { |
| "epoch": 0.46336, |
| "grad_norm": 0.25481197237968445, |
| "learning_rate": 0.0001073632, |
| "loss": 0.8555, |
| "step": 28960 |
| }, |
| { |
| "epoch": 0.46368, |
| "grad_norm": 0.24622678756713867, |
| "learning_rate": 0.0001072992, |
| "loss": 0.8975, |
| "step": 28980 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.2316320389509201, |
| "learning_rate": 0.0001072352, |
| "loss": 0.8802, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.46432, |
| "grad_norm": 0.22140583395957947, |
| "learning_rate": 0.00010717120000000001, |
| "loss": 0.8845, |
| "step": 29020 |
| }, |
| { |
| "epoch": 0.46464, |
| "grad_norm": 0.21848374605178833, |
| "learning_rate": 0.0001071072, |
| "loss": 0.8766, |
| "step": 29040 |
| }, |
| { |
| "epoch": 0.46496, |
| "grad_norm": 0.2609255015850067, |
| "learning_rate": 0.00010704320000000001, |
| "loss": 0.9335, |
| "step": 29060 |
| }, |
| { |
| "epoch": 0.46528, |
| "grad_norm": 0.25037628412246704, |
| "learning_rate": 0.00010697920000000001, |
| "loss": 0.8705, |
| "step": 29080 |
| }, |
| { |
| "epoch": 0.4656, |
| "grad_norm": 0.2289629876613617, |
| "learning_rate": 0.00010691520000000002, |
| "loss": 0.9401, |
| "step": 29100 |
| }, |
| { |
| "epoch": 0.46592, |
| "grad_norm": 0.28347960114479065, |
| "learning_rate": 0.0001068512, |
| "loss": 0.9032, |
| "step": 29120 |
| }, |
| { |
| "epoch": 0.46624, |
| "grad_norm": 0.26885560154914856, |
| "learning_rate": 0.00010678719999999999, |
| "loss": 0.8777, |
| "step": 29140 |
| }, |
| { |
| "epoch": 0.46656, |
| "grad_norm": 0.240605428814888, |
| "learning_rate": 0.0001067232, |
| "loss": 0.8381, |
| "step": 29160 |
| }, |
| { |
| "epoch": 0.46688, |
| "grad_norm": 0.21176287531852722, |
| "learning_rate": 0.0001066592, |
| "loss": 0.8988, |
| "step": 29180 |
| }, |
| { |
| "epoch": 0.4672, |
| "grad_norm": 0.2735714316368103, |
| "learning_rate": 0.0001065952, |
| "loss": 0.8879, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.46752, |
| "grad_norm": 0.22908750176429749, |
| "learning_rate": 0.00010653120000000001, |
| "loss": 0.9259, |
| "step": 29220 |
| }, |
| { |
| "epoch": 0.46784, |
| "grad_norm": 0.2262643426656723, |
| "learning_rate": 0.00010646720000000001, |
| "loss": 0.8774, |
| "step": 29240 |
| }, |
| { |
| "epoch": 0.46816, |
| "grad_norm": 0.1967533677816391, |
| "learning_rate": 0.00010640320000000002, |
| "loss": 0.8512, |
| "step": 29260 |
| }, |
| { |
| "epoch": 0.46848, |
| "grad_norm": 0.23780053853988647, |
| "learning_rate": 0.0001063392, |
| "loss": 0.8703, |
| "step": 29280 |
| }, |
| { |
| "epoch": 0.4688, |
| "grad_norm": 0.2447732537984848, |
| "learning_rate": 0.0001062752, |
| "loss": 0.8656, |
| "step": 29300 |
| }, |
| { |
| "epoch": 0.46912, |
| "grad_norm": 0.23653922975063324, |
| "learning_rate": 0.0001062112, |
| "loss": 0.9124, |
| "step": 29320 |
| }, |
| { |
| "epoch": 0.46944, |
| "grad_norm": 0.22936119139194489, |
| "learning_rate": 0.0001061472, |
| "loss": 0.9262, |
| "step": 29340 |
| }, |
| { |
| "epoch": 0.46976, |
| "grad_norm": 0.26746055483818054, |
| "learning_rate": 0.00010608320000000001, |
| "loss": 0.9279, |
| "step": 29360 |
| }, |
| { |
| "epoch": 0.47008, |
| "grad_norm": 0.27925965189933777, |
| "learning_rate": 0.0001060192, |
| "loss": 0.9249, |
| "step": 29380 |
| }, |
| { |
| "epoch": 0.4704, |
| "grad_norm": 0.25772443413734436, |
| "learning_rate": 0.00010595520000000001, |
| "loss": 0.857, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.47072, |
| "grad_norm": 0.31103163957595825, |
| "learning_rate": 0.00010589120000000001, |
| "loss": 0.8858, |
| "step": 29420 |
| }, |
| { |
| "epoch": 0.47104, |
| "grad_norm": 0.2527211010456085, |
| "learning_rate": 0.00010582720000000002, |
| "loss": 0.8851, |
| "step": 29440 |
| }, |
| { |
| "epoch": 0.47136, |
| "grad_norm": 0.2501220405101776, |
| "learning_rate": 0.0001057632, |
| "loss": 0.8829, |
| "step": 29460 |
| }, |
| { |
| "epoch": 0.47168, |
| "grad_norm": 0.20719179511070251, |
| "learning_rate": 0.00010569919999999999, |
| "loss": 0.8934, |
| "step": 29480 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 0.24948135018348694, |
| "learning_rate": 0.0001056352, |
| "loss": 0.9229, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.47232, |
| "grad_norm": 0.2544534206390381, |
| "learning_rate": 0.00010557120000000001, |
| "loss": 0.9075, |
| "step": 29520 |
| }, |
| { |
| "epoch": 0.47264, |
| "grad_norm": 0.24410614371299744, |
| "learning_rate": 0.0001055072, |
| "loss": 0.8189, |
| "step": 29540 |
| }, |
| { |
| "epoch": 0.47296, |
| "grad_norm": 0.2736496925354004, |
| "learning_rate": 0.00010544320000000001, |
| "loss": 0.8778, |
| "step": 29560 |
| }, |
| { |
| "epoch": 0.47328, |
| "grad_norm": 0.2509610652923584, |
| "learning_rate": 0.00010537920000000001, |
| "loss": 0.9481, |
| "step": 29580 |
| }, |
| { |
| "epoch": 0.4736, |
| "grad_norm": 0.23943458497524261, |
| "learning_rate": 0.00010531520000000002, |
| "loss": 0.8921, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.47392, |
| "grad_norm": 0.2580275237560272, |
| "learning_rate": 0.0001052512, |
| "loss": 0.9293, |
| "step": 29620 |
| }, |
| { |
| "epoch": 0.47424, |
| "grad_norm": 0.27012938261032104, |
| "learning_rate": 0.0001051872, |
| "loss": 0.8833, |
| "step": 29640 |
| }, |
| { |
| "epoch": 0.47456, |
| "grad_norm": 0.25766292214393616, |
| "learning_rate": 0.0001051232, |
| "loss": 0.8901, |
| "step": 29660 |
| }, |
| { |
| "epoch": 0.47488, |
| "grad_norm": 0.2404147982597351, |
| "learning_rate": 0.0001050592, |
| "loss": 0.875, |
| "step": 29680 |
| }, |
| { |
| "epoch": 0.4752, |
| "grad_norm": 0.24435456097126007, |
| "learning_rate": 0.00010499520000000001, |
| "loss": 0.9058, |
| "step": 29700 |
| }, |
| { |
| "epoch": 0.47552, |
| "grad_norm": 0.21276427805423737, |
| "learning_rate": 0.0001049312, |
| "loss": 0.851, |
| "step": 29720 |
| }, |
| { |
| "epoch": 0.47584, |
| "grad_norm": 0.26436007022857666, |
| "learning_rate": 0.00010486720000000001, |
| "loss": 0.8794, |
| "step": 29740 |
| }, |
| { |
| "epoch": 0.47616, |
| "grad_norm": 0.2028064727783203, |
| "learning_rate": 0.0001048032, |
| "loss": 0.9011, |
| "step": 29760 |
| }, |
| { |
| "epoch": 0.47648, |
| "grad_norm": 0.2643204629421234, |
| "learning_rate": 0.00010473920000000002, |
| "loss": 0.8951, |
| "step": 29780 |
| }, |
| { |
| "epoch": 0.4768, |
| "grad_norm": 0.2519238293170929, |
| "learning_rate": 0.0001046752, |
| "loss": 0.9099, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.47712, |
| "grad_norm": 0.2304890900850296, |
| "learning_rate": 0.00010461119999999999, |
| "loss": 0.9054, |
| "step": 29820 |
| }, |
| { |
| "epoch": 0.47744, |
| "grad_norm": 0.24808572232723236, |
| "learning_rate": 0.0001045472, |
| "loss": 0.9119, |
| "step": 29840 |
| }, |
| { |
| "epoch": 0.47776, |
| "grad_norm": 0.23621508479118347, |
| "learning_rate": 0.00010448320000000001, |
| "loss": 0.9018, |
| "step": 29860 |
| }, |
| { |
| "epoch": 0.47808, |
| "grad_norm": 0.23834584653377533, |
| "learning_rate": 0.0001044192, |
| "loss": 0.8939, |
| "step": 29880 |
| }, |
| { |
| "epoch": 0.4784, |
| "grad_norm": 0.2608813941478729, |
| "learning_rate": 0.00010435520000000001, |
| "loss": 0.898, |
| "step": 29900 |
| }, |
| { |
| "epoch": 0.47872, |
| "grad_norm": 0.22944658994674683, |
| "learning_rate": 0.00010429120000000001, |
| "loss": 0.8535, |
| "step": 29920 |
| }, |
| { |
| "epoch": 0.47904, |
| "grad_norm": 0.18182271718978882, |
| "learning_rate": 0.00010422720000000002, |
| "loss": 0.9109, |
| "step": 29940 |
| }, |
| { |
| "epoch": 0.47936, |
| "grad_norm": 0.25479796528816223, |
| "learning_rate": 0.0001041632, |
| "loss": 0.8445, |
| "step": 29960 |
| }, |
| { |
| "epoch": 0.47968, |
| "grad_norm": 0.2711828052997589, |
| "learning_rate": 0.0001040992, |
| "loss": 0.8993, |
| "step": 29980 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.23804309964179993, |
| "learning_rate": 0.0001040352, |
| "loss": 0.826, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.48032, |
| "grad_norm": 0.2019588053226471, |
| "learning_rate": 0.0001039712, |
| "loss": 0.8777, |
| "step": 30020 |
| }, |
| { |
| "epoch": 0.48064, |
| "grad_norm": 0.2302328497171402, |
| "learning_rate": 0.00010390720000000001, |
| "loss": 0.9068, |
| "step": 30040 |
| }, |
| { |
| "epoch": 0.48096, |
| "grad_norm": 0.20539428293704987, |
| "learning_rate": 0.0001038432, |
| "loss": 0.894, |
| "step": 30060 |
| }, |
| { |
| "epoch": 0.48128, |
| "grad_norm": 0.3114759624004364, |
| "learning_rate": 0.00010377920000000001, |
| "loss": 0.8664, |
| "step": 30080 |
| }, |
| { |
| "epoch": 0.4816, |
| "grad_norm": 0.22987259924411774, |
| "learning_rate": 0.0001037152, |
| "loss": 0.8831, |
| "step": 30100 |
| }, |
| { |
| "epoch": 0.48192, |
| "grad_norm": 0.29275548458099365, |
| "learning_rate": 0.00010365120000000001, |
| "loss": 0.9069, |
| "step": 30120 |
| }, |
| { |
| "epoch": 0.48224, |
| "grad_norm": 0.21175888180732727, |
| "learning_rate": 0.0001035872, |
| "loss": 0.9181, |
| "step": 30140 |
| }, |
| { |
| "epoch": 0.48256, |
| "grad_norm": 0.32916828989982605, |
| "learning_rate": 0.00010352319999999999, |
| "loss": 0.9072, |
| "step": 30160 |
| }, |
| { |
| "epoch": 0.48288, |
| "grad_norm": 0.24162223935127258, |
| "learning_rate": 0.0001034592, |
| "loss": 0.8855, |
| "step": 30180 |
| }, |
| { |
| "epoch": 0.4832, |
| "grad_norm": 0.21086947619915009, |
| "learning_rate": 0.00010339520000000001, |
| "loss": 0.9223, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.48352, |
| "grad_norm": 0.24102705717086792, |
| "learning_rate": 0.0001033312, |
| "loss": 0.8887, |
| "step": 30220 |
| }, |
| { |
| "epoch": 0.48384, |
| "grad_norm": 0.24661841988563538, |
| "learning_rate": 0.00010326720000000001, |
| "loss": 0.9, |
| "step": 30240 |
| }, |
| { |
| "epoch": 0.48416, |
| "grad_norm": 0.2146749347448349, |
| "learning_rate": 0.00010320320000000001, |
| "loss": 0.9188, |
| "step": 30260 |
| }, |
| { |
| "epoch": 0.48448, |
| "grad_norm": 0.31963422894477844, |
| "learning_rate": 0.00010313920000000002, |
| "loss": 0.8681, |
| "step": 30280 |
| }, |
| { |
| "epoch": 0.4848, |
| "grad_norm": 0.25737327337265015, |
| "learning_rate": 0.0001030752, |
| "loss": 0.866, |
| "step": 30300 |
| }, |
| { |
| "epoch": 0.48512, |
| "grad_norm": 0.25805580615997314, |
| "learning_rate": 0.0001030112, |
| "loss": 0.9203, |
| "step": 30320 |
| }, |
| { |
| "epoch": 0.48544, |
| "grad_norm": 0.2447681874036789, |
| "learning_rate": 0.0001029472, |
| "loss": 0.8897, |
| "step": 30340 |
| }, |
| { |
| "epoch": 0.48576, |
| "grad_norm": 0.2577648460865021, |
| "learning_rate": 0.0001028832, |
| "loss": 0.8548, |
| "step": 30360 |
| }, |
| { |
| "epoch": 0.48608, |
| "grad_norm": 0.24042245745658875, |
| "learning_rate": 0.0001028192, |
| "loss": 0.9512, |
| "step": 30380 |
| }, |
| { |
| "epoch": 0.4864, |
| "grad_norm": 0.24115043878555298, |
| "learning_rate": 0.0001027552, |
| "loss": 0.9036, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.48672, |
| "grad_norm": 0.2144307792186737, |
| "learning_rate": 0.00010269120000000001, |
| "loss": 0.9172, |
| "step": 30420 |
| }, |
| { |
| "epoch": 0.48704, |
| "grad_norm": 0.21385303139686584, |
| "learning_rate": 0.0001026272, |
| "loss": 0.8796, |
| "step": 30440 |
| }, |
| { |
| "epoch": 0.48736, |
| "grad_norm": 0.21479547023773193, |
| "learning_rate": 0.00010256320000000001, |
| "loss": 0.8923, |
| "step": 30460 |
| }, |
| { |
| "epoch": 0.48768, |
| "grad_norm": 0.24400416016578674, |
| "learning_rate": 0.0001024992, |
| "loss": 0.9339, |
| "step": 30480 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 0.19768428802490234, |
| "learning_rate": 0.00010243519999999999, |
| "loss": 0.8924, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.48832, |
| "grad_norm": 0.23109054565429688, |
| "learning_rate": 0.0001023712, |
| "loss": 0.9223, |
| "step": 30520 |
| }, |
| { |
| "epoch": 0.48864, |
| "grad_norm": 0.1906929314136505, |
| "learning_rate": 0.00010230720000000001, |
| "loss": 0.8876, |
| "step": 30540 |
| }, |
| { |
| "epoch": 0.48896, |
| "grad_norm": 0.24491210281848907, |
| "learning_rate": 0.0001022432, |
| "loss": 0.8911, |
| "step": 30560 |
| }, |
| { |
| "epoch": 0.48928, |
| "grad_norm": 0.21576926112174988, |
| "learning_rate": 0.00010217920000000001, |
| "loss": 0.8618, |
| "step": 30580 |
| }, |
| { |
| "epoch": 0.4896, |
| "grad_norm": 0.2178792953491211, |
| "learning_rate": 0.00010211520000000001, |
| "loss": 0.8744, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.48992, |
| "grad_norm": 0.2320430725812912, |
| "learning_rate": 0.00010205120000000002, |
| "loss": 0.8853, |
| "step": 30620 |
| }, |
| { |
| "epoch": 0.49024, |
| "grad_norm": 0.2017570436000824, |
| "learning_rate": 0.0001019872, |
| "loss": 0.878, |
| "step": 30640 |
| }, |
| { |
| "epoch": 0.49056, |
| "grad_norm": 0.26477140188217163, |
| "learning_rate": 0.0001019232, |
| "loss": 0.9664, |
| "step": 30660 |
| }, |
| { |
| "epoch": 0.49088, |
| "grad_norm": 0.22188952565193176, |
| "learning_rate": 0.0001018592, |
| "loss": 0.8757, |
| "step": 30680 |
| }, |
| { |
| "epoch": 0.4912, |
| "grad_norm": 0.2646230161190033, |
| "learning_rate": 0.0001017952, |
| "loss": 0.8465, |
| "step": 30700 |
| }, |
| { |
| "epoch": 0.49152, |
| "grad_norm": 0.19764351844787598, |
| "learning_rate": 0.0001017312, |
| "loss": 0.8602, |
| "step": 30720 |
| }, |
| { |
| "epoch": 0.49184, |
| "grad_norm": 0.2683072090148926, |
| "learning_rate": 0.0001016672, |
| "loss": 0.9249, |
| "step": 30740 |
| }, |
| { |
| "epoch": 0.49216, |
| "grad_norm": 0.2290680855512619, |
| "learning_rate": 0.00010160320000000001, |
| "loss": 0.8733, |
| "step": 30760 |
| }, |
| { |
| "epoch": 0.49248, |
| "grad_norm": 0.2023002654314041, |
| "learning_rate": 0.00010153920000000002, |
| "loss": 0.9078, |
| "step": 30780 |
| }, |
| { |
| "epoch": 0.4928, |
| "grad_norm": 0.2146545648574829, |
| "learning_rate": 0.00010147520000000001, |
| "loss": 0.901, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.49312, |
| "grad_norm": 0.2241715043783188, |
| "learning_rate": 0.0001014112, |
| "loss": 0.8551, |
| "step": 30820 |
| }, |
| { |
| "epoch": 0.49344, |
| "grad_norm": 0.21123602986335754, |
| "learning_rate": 0.0001013472, |
| "loss": 0.8821, |
| "step": 30840 |
| }, |
| { |
| "epoch": 0.49376, |
| "grad_norm": 0.2702392637729645, |
| "learning_rate": 0.0001012832, |
| "loss": 0.9438, |
| "step": 30860 |
| }, |
| { |
| "epoch": 0.49408, |
| "grad_norm": 0.22311244904994965, |
| "learning_rate": 0.00010121920000000001, |
| "loss": 0.8835, |
| "step": 30880 |
| }, |
| { |
| "epoch": 0.4944, |
| "grad_norm": 0.20762741565704346, |
| "learning_rate": 0.0001011552, |
| "loss": 0.8668, |
| "step": 30900 |
| }, |
| { |
| "epoch": 0.49472, |
| "grad_norm": 0.2043907791376114, |
| "learning_rate": 0.00010109120000000001, |
| "loss": 0.8684, |
| "step": 30920 |
| }, |
| { |
| "epoch": 0.49504, |
| "grad_norm": 0.23084862530231476, |
| "learning_rate": 0.00010102720000000001, |
| "loss": 0.9253, |
| "step": 30940 |
| }, |
| { |
| "epoch": 0.49536, |
| "grad_norm": 0.23380133509635925, |
| "learning_rate": 0.00010096320000000002, |
| "loss": 0.8374, |
| "step": 30960 |
| }, |
| { |
| "epoch": 0.49568, |
| "grad_norm": 0.23766806721687317, |
| "learning_rate": 0.00010089920000000001, |
| "loss": 0.8712, |
| "step": 30980 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.24725081026554108, |
| "learning_rate": 0.00010083519999999999, |
| "loss": 0.8836, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.49632, |
| "grad_norm": 0.25081491470336914, |
| "learning_rate": 0.0001007712, |
| "loss": 0.8037, |
| "step": 31020 |
| }, |
| { |
| "epoch": 0.49664, |
| "grad_norm": 0.3002113401889801, |
| "learning_rate": 0.0001007072, |
| "loss": 0.8463, |
| "step": 31040 |
| }, |
| { |
| "epoch": 0.49696, |
| "grad_norm": 0.24691928923130035, |
| "learning_rate": 0.0001006432, |
| "loss": 0.8808, |
| "step": 31060 |
| }, |
| { |
| "epoch": 0.49728, |
| "grad_norm": 0.22657333314418793, |
| "learning_rate": 0.0001005792, |
| "loss": 0.8678, |
| "step": 31080 |
| }, |
| { |
| "epoch": 0.4976, |
| "grad_norm": 0.2616662085056305, |
| "learning_rate": 0.00010051520000000001, |
| "loss": 0.8786, |
| "step": 31100 |
| }, |
| { |
| "epoch": 0.49792, |
| "grad_norm": 0.23406346142292023, |
| "learning_rate": 0.00010045120000000002, |
| "loss": 0.8488, |
| "step": 31120 |
| }, |
| { |
| "epoch": 0.49824, |
| "grad_norm": 0.2088574469089508, |
| "learning_rate": 0.00010038720000000001, |
| "loss": 0.8433, |
| "step": 31140 |
| }, |
| { |
| "epoch": 0.49856, |
| "grad_norm": 0.24721549451351166, |
| "learning_rate": 0.0001003264, |
| "loss": 0.8545, |
| "step": 31160 |
| }, |
| { |
| "epoch": 0.49888, |
| "grad_norm": 0.2351522445678711, |
| "learning_rate": 0.0001002624, |
| "loss": 0.929, |
| "step": 31180 |
| }, |
| { |
| "epoch": 0.4992, |
| "grad_norm": 0.24752940237522125, |
| "learning_rate": 0.0001001984, |
| "loss": 0.8709, |
| "step": 31200 |
| }, |
| { |
| "epoch": 0.49952, |
| "grad_norm": 0.2309713363647461, |
| "learning_rate": 0.0001001344, |
| "loss": 0.8782, |
| "step": 31220 |
| }, |
| { |
| "epoch": 0.49984, |
| "grad_norm": 0.24925391376018524, |
| "learning_rate": 0.00010007040000000001, |
| "loss": 0.8542, |
| "step": 31240 |
| }, |
| { |
| "epoch": 0.50016, |
| "grad_norm": 0.22123312950134277, |
| "learning_rate": 0.0001000064, |
| "loss": 0.8859, |
| "step": 31260 |
| }, |
| { |
| "epoch": 0.50048, |
| "grad_norm": 0.24795830249786377, |
| "learning_rate": 9.99424e-05, |
| "loss": 0.8789, |
| "step": 31280 |
| }, |
| { |
| "epoch": 0.5008, |
| "grad_norm": 0.22997990250587463, |
| "learning_rate": 9.987840000000001e-05, |
| "loss": 0.8454, |
| "step": 31300 |
| }, |
| { |
| "epoch": 0.50112, |
| "grad_norm": 0.24946443736553192, |
| "learning_rate": 9.98144e-05, |
| "loss": 0.9236, |
| "step": 31320 |
| }, |
| { |
| "epoch": 0.50144, |
| "grad_norm": 0.21945710480213165, |
| "learning_rate": 9.97504e-05, |
| "loss": 0.9018, |
| "step": 31340 |
| }, |
| { |
| "epoch": 0.50176, |
| "grad_norm": 0.23725731670856476, |
| "learning_rate": 9.968640000000001e-05, |
| "loss": 0.8932, |
| "step": 31360 |
| }, |
| { |
| "epoch": 0.50208, |
| "grad_norm": 0.256510466337204, |
| "learning_rate": 9.96224e-05, |
| "loss": 0.8957, |
| "step": 31380 |
| }, |
| { |
| "epoch": 0.5024, |
| "grad_norm": 0.2660234272480011, |
| "learning_rate": 9.955840000000001e-05, |
| "loss": 0.8573, |
| "step": 31400 |
| }, |
| { |
| "epoch": 0.50272, |
| "grad_norm": 0.2343997359275818, |
| "learning_rate": 9.949440000000001e-05, |
| "loss": 0.8419, |
| "step": 31420 |
| }, |
| { |
| "epoch": 0.50304, |
| "grad_norm": 0.25852516293525696, |
| "learning_rate": 9.94304e-05, |
| "loss": 0.8699, |
| "step": 31440 |
| }, |
| { |
| "epoch": 0.50336, |
| "grad_norm": 0.21607396006584167, |
| "learning_rate": 9.93664e-05, |
| "loss": 0.8752, |
| "step": 31460 |
| }, |
| { |
| "epoch": 0.50368, |
| "grad_norm": 0.23309437930583954, |
| "learning_rate": 9.93024e-05, |
| "loss": 0.8992, |
| "step": 31480 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 0.24827729165554047, |
| "learning_rate": 9.923840000000002e-05, |
| "loss": 0.9009, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.50432, |
| "grad_norm": 0.22215944528579712, |
| "learning_rate": 9.91744e-05, |
| "loss": 0.8935, |
| "step": 31520 |
| }, |
| { |
| "epoch": 0.50464, |
| "grad_norm": 0.23230740427970886, |
| "learning_rate": 9.91104e-05, |
| "loss": 0.9087, |
| "step": 31540 |
| }, |
| { |
| "epoch": 0.50496, |
| "grad_norm": 0.25434383749961853, |
| "learning_rate": 9.90464e-05, |
| "loss": 0.9246, |
| "step": 31560 |
| }, |
| { |
| "epoch": 0.50528, |
| "grad_norm": 0.22664053738117218, |
| "learning_rate": 9.898240000000001e-05, |
| "loss": 0.8454, |
| "step": 31580 |
| }, |
| { |
| "epoch": 0.5056, |
| "grad_norm": 0.2654976546764374, |
| "learning_rate": 9.89184e-05, |
| "loss": 0.8788, |
| "step": 31600 |
| }, |
| { |
| "epoch": 0.50592, |
| "grad_norm": 0.23799720406532288, |
| "learning_rate": 9.88544e-05, |
| "loss": 0.9061, |
| "step": 31620 |
| }, |
| { |
| "epoch": 0.50624, |
| "grad_norm": 0.22206011414527893, |
| "learning_rate": 9.879040000000001e-05, |
| "loss": 0.9175, |
| "step": 31640 |
| }, |
| { |
| "epoch": 0.50656, |
| "grad_norm": 0.20660768449306488, |
| "learning_rate": 9.87264e-05, |
| "loss": 0.9249, |
| "step": 31660 |
| }, |
| { |
| "epoch": 0.50688, |
| "grad_norm": 0.2204529196023941, |
| "learning_rate": 9.86624e-05, |
| "loss": 0.8608, |
| "step": 31680 |
| }, |
| { |
| "epoch": 0.5072, |
| "grad_norm": 0.23114994168281555, |
| "learning_rate": 9.859840000000001e-05, |
| "loss": 0.8453, |
| "step": 31700 |
| }, |
| { |
| "epoch": 0.50752, |
| "grad_norm": 0.23731987178325653, |
| "learning_rate": 9.85344e-05, |
| "loss": 0.8556, |
| "step": 31720 |
| }, |
| { |
| "epoch": 0.50784, |
| "grad_norm": 0.2295464277267456, |
| "learning_rate": 9.847040000000001e-05, |
| "loss": 0.8361, |
| "step": 31740 |
| }, |
| { |
| "epoch": 0.50816, |
| "grad_norm": 0.2154739946126938, |
| "learning_rate": 9.840640000000001e-05, |
| "loss": 0.9396, |
| "step": 31760 |
| }, |
| { |
| "epoch": 0.50848, |
| "grad_norm": 0.2496858537197113, |
| "learning_rate": 9.83424e-05, |
| "loss": 0.9095, |
| "step": 31780 |
| }, |
| { |
| "epoch": 0.5088, |
| "grad_norm": 0.2576257288455963, |
| "learning_rate": 9.82784e-05, |
| "loss": 0.928, |
| "step": 31800 |
| }, |
| { |
| "epoch": 0.50912, |
| "grad_norm": 0.2754540741443634, |
| "learning_rate": 9.82144e-05, |
| "loss": 0.9274, |
| "step": 31820 |
| }, |
| { |
| "epoch": 0.50944, |
| "grad_norm": 0.21684108674526215, |
| "learning_rate": 9.815040000000001e-05, |
| "loss": 0.9536, |
| "step": 31840 |
| }, |
| { |
| "epoch": 0.50976, |
| "grad_norm": 0.20123428106307983, |
| "learning_rate": 9.80864e-05, |
| "loss": 0.8948, |
| "step": 31860 |
| }, |
| { |
| "epoch": 0.51008, |
| "grad_norm": 0.19840183854103088, |
| "learning_rate": 9.80224e-05, |
| "loss": 0.8843, |
| "step": 31880 |
| }, |
| { |
| "epoch": 0.5104, |
| "grad_norm": 0.21900126338005066, |
| "learning_rate": 9.79584e-05, |
| "loss": 0.861, |
| "step": 31900 |
| }, |
| { |
| "epoch": 0.51072, |
| "grad_norm": 0.27311161160469055, |
| "learning_rate": 9.789440000000001e-05, |
| "loss": 0.8824, |
| "step": 31920 |
| }, |
| { |
| "epoch": 0.51104, |
| "grad_norm": 0.2307424545288086, |
| "learning_rate": 9.78304e-05, |
| "loss": 0.9077, |
| "step": 31940 |
| }, |
| { |
| "epoch": 0.51136, |
| "grad_norm": 0.23477308452129364, |
| "learning_rate": 9.77664e-05, |
| "loss": 0.8819, |
| "step": 31960 |
| }, |
| { |
| "epoch": 0.51168, |
| "grad_norm": 0.24617180228233337, |
| "learning_rate": 9.770240000000001e-05, |
| "loss": 0.8832, |
| "step": 31980 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.28253304958343506, |
| "learning_rate": 9.76384e-05, |
| "loss": 0.9143, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.51232, |
| "grad_norm": 0.21168233454227448, |
| "learning_rate": 9.75744e-05, |
| "loss": 0.8644, |
| "step": 32020 |
| }, |
| { |
| "epoch": 0.51264, |
| "grad_norm": 0.2240704596042633, |
| "learning_rate": 9.751040000000001e-05, |
| "loss": 0.8768, |
| "step": 32040 |
| }, |
| { |
| "epoch": 0.51296, |
| "grad_norm": 0.26020580530166626, |
| "learning_rate": 9.74464e-05, |
| "loss": 0.8708, |
| "step": 32060 |
| }, |
| { |
| "epoch": 0.51328, |
| "grad_norm": 0.2664453387260437, |
| "learning_rate": 9.738240000000001e-05, |
| "loss": 0.8823, |
| "step": 32080 |
| }, |
| { |
| "epoch": 0.5136, |
| "grad_norm": 0.24020379781723022, |
| "learning_rate": 9.73184e-05, |
| "loss": 0.9324, |
| "step": 32100 |
| }, |
| { |
| "epoch": 0.51392, |
| "grad_norm": 0.25187745690345764, |
| "learning_rate": 9.72544e-05, |
| "loss": 0.9079, |
| "step": 32120 |
| }, |
| { |
| "epoch": 0.51424, |
| "grad_norm": 0.2086835503578186, |
| "learning_rate": 9.71904e-05, |
| "loss": 0.9136, |
| "step": 32140 |
| }, |
| { |
| "epoch": 0.51456, |
| "grad_norm": 0.20220904052257538, |
| "learning_rate": 9.71264e-05, |
| "loss": 0.9192, |
| "step": 32160 |
| }, |
| { |
| "epoch": 0.51488, |
| "grad_norm": 0.245720773935318, |
| "learning_rate": 9.706240000000001e-05, |
| "loss": 0.8716, |
| "step": 32180 |
| }, |
| { |
| "epoch": 0.5152, |
| "grad_norm": 0.24418127536773682, |
| "learning_rate": 9.69984e-05, |
| "loss": 0.9469, |
| "step": 32200 |
| }, |
| { |
| "epoch": 0.51552, |
| "grad_norm": 0.20389395952224731, |
| "learning_rate": 9.69344e-05, |
| "loss": 0.8574, |
| "step": 32220 |
| }, |
| { |
| "epoch": 0.51584, |
| "grad_norm": 0.25827401876449585, |
| "learning_rate": 9.68704e-05, |
| "loss": 0.8792, |
| "step": 32240 |
| }, |
| { |
| "epoch": 0.51616, |
| "grad_norm": 0.24777017533779144, |
| "learning_rate": 9.680640000000001e-05, |
| "loss": 0.9179, |
| "step": 32260 |
| }, |
| { |
| "epoch": 0.51648, |
| "grad_norm": 0.24638600647449493, |
| "learning_rate": 9.67424e-05, |
| "loss": 0.8133, |
| "step": 32280 |
| }, |
| { |
| "epoch": 0.5168, |
| "grad_norm": 0.24801717698574066, |
| "learning_rate": 9.66784e-05, |
| "loss": 0.9329, |
| "step": 32300 |
| }, |
| { |
| "epoch": 0.51712, |
| "grad_norm": 0.23096071183681488, |
| "learning_rate": 9.661440000000001e-05, |
| "loss": 0.8527, |
| "step": 32320 |
| }, |
| { |
| "epoch": 0.51744, |
| "grad_norm": 0.25937584042549133, |
| "learning_rate": 9.65504e-05, |
| "loss": 0.8984, |
| "step": 32340 |
| }, |
| { |
| "epoch": 0.51776, |
| "grad_norm": 0.22245679795742035, |
| "learning_rate": 9.648640000000001e-05, |
| "loss": 0.888, |
| "step": 32360 |
| }, |
| { |
| "epoch": 0.51808, |
| "grad_norm": 0.24738770723342896, |
| "learning_rate": 9.642240000000001e-05, |
| "loss": 0.9783, |
| "step": 32380 |
| }, |
| { |
| "epoch": 0.5184, |
| "grad_norm": 0.24137365818023682, |
| "learning_rate": 9.63584e-05, |
| "loss": 0.9105, |
| "step": 32400 |
| }, |
| { |
| "epoch": 0.51872, |
| "grad_norm": 0.2397020161151886, |
| "learning_rate": 9.629440000000001e-05, |
| "loss": 0.8195, |
| "step": 32420 |
| }, |
| { |
| "epoch": 0.51904, |
| "grad_norm": 0.2638731598854065, |
| "learning_rate": 9.62304e-05, |
| "loss": 0.94, |
| "step": 32440 |
| }, |
| { |
| "epoch": 0.51936, |
| "grad_norm": 0.24911172688007355, |
| "learning_rate": 9.61664e-05, |
| "loss": 0.9078, |
| "step": 32460 |
| }, |
| { |
| "epoch": 0.51968, |
| "grad_norm": 0.2063673883676529, |
| "learning_rate": 9.610240000000001e-05, |
| "loss": 0.8501, |
| "step": 32480 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.2300567924976349, |
| "learning_rate": 9.60384e-05, |
| "loss": 0.8857, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.52032, |
| "grad_norm": 0.26897576451301575, |
| "learning_rate": 9.597440000000001e-05, |
| "loss": 0.9256, |
| "step": 32520 |
| }, |
| { |
| "epoch": 0.52064, |
| "grad_norm": 0.25261190533638, |
| "learning_rate": 9.59104e-05, |
| "loss": 0.8997, |
| "step": 32540 |
| }, |
| { |
| "epoch": 0.52096, |
| "grad_norm": 0.24318355321884155, |
| "learning_rate": 9.58464e-05, |
| "loss": 0.8751, |
| "step": 32560 |
| }, |
| { |
| "epoch": 0.52128, |
| "grad_norm": 0.24040265381336212, |
| "learning_rate": 9.57824e-05, |
| "loss": 0.8543, |
| "step": 32580 |
| }, |
| { |
| "epoch": 0.5216, |
| "grad_norm": 0.22509385645389557, |
| "learning_rate": 9.571840000000001e-05, |
| "loss": 0.8858, |
| "step": 32600 |
| }, |
| { |
| "epoch": 0.52192, |
| "grad_norm": 0.2367594838142395, |
| "learning_rate": 9.56544e-05, |
| "loss": 0.8506, |
| "step": 32620 |
| }, |
| { |
| "epoch": 0.52224, |
| "grad_norm": 0.2382354438304901, |
| "learning_rate": 9.55904e-05, |
| "loss": 0.875, |
| "step": 32640 |
| }, |
| { |
| "epoch": 0.52256, |
| "grad_norm": 0.24895018339157104, |
| "learning_rate": 9.552640000000001e-05, |
| "loss": 0.9271, |
| "step": 32660 |
| }, |
| { |
| "epoch": 0.52288, |
| "grad_norm": 0.2637403905391693, |
| "learning_rate": 9.54624e-05, |
| "loss": 0.8485, |
| "step": 32680 |
| }, |
| { |
| "epoch": 0.5232, |
| "grad_norm": 0.2029896229505539, |
| "learning_rate": 9.539840000000001e-05, |
| "loss": 0.8569, |
| "step": 32700 |
| }, |
| { |
| "epoch": 0.52352, |
| "grad_norm": 0.28035101294517517, |
| "learning_rate": 9.53344e-05, |
| "loss": 0.8745, |
| "step": 32720 |
| }, |
| { |
| "epoch": 0.52384, |
| "grad_norm": 0.24843214452266693, |
| "learning_rate": 9.52704e-05, |
| "loss": 0.8853, |
| "step": 32740 |
| }, |
| { |
| "epoch": 0.52416, |
| "grad_norm": 0.26682519912719727, |
| "learning_rate": 9.520640000000001e-05, |
| "loss": 0.9163, |
| "step": 32760 |
| }, |
| { |
| "epoch": 0.52448, |
| "grad_norm": 0.2349347323179245, |
| "learning_rate": 9.51424e-05, |
| "loss": 0.91, |
| "step": 32780 |
| }, |
| { |
| "epoch": 0.5248, |
| "grad_norm": 0.2493859827518463, |
| "learning_rate": 9.50784e-05, |
| "loss": 0.8412, |
| "step": 32800 |
| }, |
| { |
| "epoch": 0.52512, |
| "grad_norm": 0.2364472597837448, |
| "learning_rate": 9.501440000000001e-05, |
| "loss": 0.8777, |
| "step": 32820 |
| }, |
| { |
| "epoch": 0.52544, |
| "grad_norm": 0.2579153776168823, |
| "learning_rate": 9.49504e-05, |
| "loss": 0.9061, |
| "step": 32840 |
| }, |
| { |
| "epoch": 0.52576, |
| "grad_norm": 0.23014868795871735, |
| "learning_rate": 9.488640000000001e-05, |
| "loss": 0.9311, |
| "step": 32860 |
| }, |
| { |
| "epoch": 0.52608, |
| "grad_norm": 0.2836184501647949, |
| "learning_rate": 9.48224e-05, |
| "loss": 0.8641, |
| "step": 32880 |
| }, |
| { |
| "epoch": 0.5264, |
| "grad_norm": 0.36536288261413574, |
| "learning_rate": 9.47584e-05, |
| "loss": 0.8722, |
| "step": 32900 |
| }, |
| { |
| "epoch": 0.52672, |
| "grad_norm": 0.23661687970161438, |
| "learning_rate": 9.46944e-05, |
| "loss": 0.8349, |
| "step": 32920 |
| }, |
| { |
| "epoch": 0.52704, |
| "grad_norm": 0.24428099393844604, |
| "learning_rate": 9.463040000000001e-05, |
| "loss": 0.8688, |
| "step": 32940 |
| }, |
| { |
| "epoch": 0.52736, |
| "grad_norm": 0.24511151015758514, |
| "learning_rate": 9.45664e-05, |
| "loss": 0.88, |
| "step": 32960 |
| }, |
| { |
| "epoch": 0.52768, |
| "grad_norm": 0.244853213429451, |
| "learning_rate": 9.45024e-05, |
| "loss": 0.8739, |
| "step": 32980 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.2549150884151459, |
| "learning_rate": 9.443840000000001e-05, |
| "loss": 0.8941, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.52832, |
| "grad_norm": 0.24175408482551575, |
| "learning_rate": 9.43744e-05, |
| "loss": 0.8919, |
| "step": 33020 |
| }, |
| { |
| "epoch": 0.52864, |
| "grad_norm": 0.25348398089408875, |
| "learning_rate": 9.431040000000001e-05, |
| "loss": 0.8914, |
| "step": 33040 |
| }, |
| { |
| "epoch": 0.52896, |
| "grad_norm": 0.21426767110824585, |
| "learning_rate": 9.42464e-05, |
| "loss": 0.8783, |
| "step": 33060 |
| }, |
| { |
| "epoch": 0.52928, |
| "grad_norm": 0.2478022277355194, |
| "learning_rate": 9.41824e-05, |
| "loss": 0.866, |
| "step": 33080 |
| }, |
| { |
| "epoch": 0.5296, |
| "grad_norm": 0.21202678978443146, |
| "learning_rate": 9.411840000000001e-05, |
| "loss": 0.8285, |
| "step": 33100 |
| }, |
| { |
| "epoch": 0.52992, |
| "grad_norm": 0.2358037382364273, |
| "learning_rate": 9.40544e-05, |
| "loss": 0.8479, |
| "step": 33120 |
| }, |
| { |
| "epoch": 0.53024, |
| "grad_norm": 0.2295175939798355, |
| "learning_rate": 9.39904e-05, |
| "loss": 0.8979, |
| "step": 33140 |
| }, |
| { |
| "epoch": 0.53056, |
| "grad_norm": 0.22576481103897095, |
| "learning_rate": 9.392640000000001e-05, |
| "loss": 0.919, |
| "step": 33160 |
| }, |
| { |
| "epoch": 0.53088, |
| "grad_norm": 0.20744270086288452, |
| "learning_rate": 9.38624e-05, |
| "loss": 0.9019, |
| "step": 33180 |
| }, |
| { |
| "epoch": 0.5312, |
| "grad_norm": 0.26612403988838196, |
| "learning_rate": 9.379840000000001e-05, |
| "loss": 0.8855, |
| "step": 33200 |
| }, |
| { |
| "epoch": 0.53152, |
| "grad_norm": 0.24272586405277252, |
| "learning_rate": 9.37344e-05, |
| "loss": 0.8907, |
| "step": 33220 |
| }, |
| { |
| "epoch": 0.53184, |
| "grad_norm": 0.25545746088027954, |
| "learning_rate": 9.36704e-05, |
| "loss": 0.8881, |
| "step": 33240 |
| }, |
| { |
| "epoch": 0.53216, |
| "grad_norm": 0.22786740958690643, |
| "learning_rate": 9.36064e-05, |
| "loss": 0.8739, |
| "step": 33260 |
| }, |
| { |
| "epoch": 0.53248, |
| "grad_norm": 0.2257343977689743, |
| "learning_rate": 9.354240000000001e-05, |
| "loss": 0.8873, |
| "step": 33280 |
| }, |
| { |
| "epoch": 0.5328, |
| "grad_norm": 0.23881380259990692, |
| "learning_rate": 9.34784e-05, |
| "loss": 0.8811, |
| "step": 33300 |
| }, |
| { |
| "epoch": 0.53312, |
| "grad_norm": 0.2408117800951004, |
| "learning_rate": 9.34144e-05, |
| "loss": 0.8528, |
| "step": 33320 |
| }, |
| { |
| "epoch": 0.53344, |
| "grad_norm": 0.20471978187561035, |
| "learning_rate": 9.33504e-05, |
| "loss": 0.8313, |
| "step": 33340 |
| }, |
| { |
| "epoch": 0.53376, |
| "grad_norm": 0.24219338595867157, |
| "learning_rate": 9.32864e-05, |
| "loss": 0.8821, |
| "step": 33360 |
| }, |
| { |
| "epoch": 0.53408, |
| "grad_norm": 0.24901708960533142, |
| "learning_rate": 9.322240000000001e-05, |
| "loss": 0.9017, |
| "step": 33380 |
| }, |
| { |
| "epoch": 0.5344, |
| "grad_norm": 0.2642413079738617, |
| "learning_rate": 9.31584e-05, |
| "loss": 0.8627, |
| "step": 33400 |
| }, |
| { |
| "epoch": 0.53472, |
| "grad_norm": 0.20527620613574982, |
| "learning_rate": 9.30944e-05, |
| "loss": 0.8729, |
| "step": 33420 |
| }, |
| { |
| "epoch": 0.53504, |
| "grad_norm": 0.2573811411857605, |
| "learning_rate": 9.303040000000001e-05, |
| "loss": 0.8767, |
| "step": 33440 |
| }, |
| { |
| "epoch": 0.53536, |
| "grad_norm": 0.2389804571866989, |
| "learning_rate": 9.29664e-05, |
| "loss": 0.8553, |
| "step": 33460 |
| }, |
| { |
| "epoch": 0.53568, |
| "grad_norm": 0.25284579396247864, |
| "learning_rate": 9.29024e-05, |
| "loss": 0.9157, |
| "step": 33480 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 0.24692294001579285, |
| "learning_rate": 9.283840000000001e-05, |
| "loss": 0.8612, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.53632, |
| "grad_norm": 0.23540472984313965, |
| "learning_rate": 9.27744e-05, |
| "loss": 0.9049, |
| "step": 33520 |
| }, |
| { |
| "epoch": 0.53664, |
| "grad_norm": 0.23751689493656158, |
| "learning_rate": 9.271040000000001e-05, |
| "loss": 0.837, |
| "step": 33540 |
| }, |
| { |
| "epoch": 0.53696, |
| "grad_norm": 0.24152640998363495, |
| "learning_rate": 9.26464e-05, |
| "loss": 0.8772, |
| "step": 33560 |
| }, |
| { |
| "epoch": 0.53728, |
| "grad_norm": 0.2199302613735199, |
| "learning_rate": 9.25824e-05, |
| "loss": 0.9278, |
| "step": 33580 |
| }, |
| { |
| "epoch": 0.5376, |
| "grad_norm": 0.24747338891029358, |
| "learning_rate": 9.25184e-05, |
| "loss": 0.8802, |
| "step": 33600 |
| }, |
| { |
| "epoch": 0.53792, |
| "grad_norm": 0.21488319337368011, |
| "learning_rate": 9.245440000000001e-05, |
| "loss": 0.9052, |
| "step": 33620 |
| }, |
| { |
| "epoch": 0.53824, |
| "grad_norm": 0.2535870671272278, |
| "learning_rate": 9.23904e-05, |
| "loss": 0.8781, |
| "step": 33640 |
| }, |
| { |
| "epoch": 0.53856, |
| "grad_norm": 0.2381758987903595, |
| "learning_rate": 9.23264e-05, |
| "loss": 0.8088, |
| "step": 33660 |
| }, |
| { |
| "epoch": 0.53888, |
| "grad_norm": 0.2485072910785675, |
| "learning_rate": 9.226560000000001e-05, |
| "loss": 0.8592, |
| "step": 33680 |
| }, |
| { |
| "epoch": 0.5392, |
| "grad_norm": 0.21843871474266052, |
| "learning_rate": 9.22016e-05, |
| "loss": 0.8427, |
| "step": 33700 |
| }, |
| { |
| "epoch": 0.53952, |
| "grad_norm": 0.2665683627128601, |
| "learning_rate": 9.21376e-05, |
| "loss": 0.879, |
| "step": 33720 |
| }, |
| { |
| "epoch": 0.53984, |
| "grad_norm": 0.22822092473506927, |
| "learning_rate": 9.20736e-05, |
| "loss": 0.8936, |
| "step": 33740 |
| }, |
| { |
| "epoch": 0.54016, |
| "grad_norm": 0.22950272262096405, |
| "learning_rate": 9.200960000000001e-05, |
| "loss": 0.838, |
| "step": 33760 |
| }, |
| { |
| "epoch": 0.54048, |
| "grad_norm": 0.2490173578262329, |
| "learning_rate": 9.19456e-05, |
| "loss": 0.8914, |
| "step": 33780 |
| }, |
| { |
| "epoch": 0.5408, |
| "grad_norm": 0.2716629207134247, |
| "learning_rate": 9.18816e-05, |
| "loss": 0.8936, |
| "step": 33800 |
| }, |
| { |
| "epoch": 0.54112, |
| "grad_norm": 0.28845784068107605, |
| "learning_rate": 9.18176e-05, |
| "loss": 0.9278, |
| "step": 33820 |
| }, |
| { |
| "epoch": 0.54144, |
| "grad_norm": 0.2092408537864685, |
| "learning_rate": 9.17536e-05, |
| "loss": 0.8818, |
| "step": 33840 |
| }, |
| { |
| "epoch": 0.54176, |
| "grad_norm": 0.19223183393478394, |
| "learning_rate": 9.168960000000001e-05, |
| "loss": 0.8626, |
| "step": 33860 |
| }, |
| { |
| "epoch": 0.54208, |
| "grad_norm": 0.2201627492904663, |
| "learning_rate": 9.16256e-05, |
| "loss": 0.8946, |
| "step": 33880 |
| }, |
| { |
| "epoch": 0.5424, |
| "grad_norm": 0.22081099450588226, |
| "learning_rate": 9.15616e-05, |
| "loss": 0.8691, |
| "step": 33900 |
| }, |
| { |
| "epoch": 0.54272, |
| "grad_norm": 0.229042649269104, |
| "learning_rate": 9.149760000000001e-05, |
| "loss": 0.856, |
| "step": 33920 |
| }, |
| { |
| "epoch": 0.54304, |
| "grad_norm": 0.23319824039936066, |
| "learning_rate": 9.14336e-05, |
| "loss": 0.8748, |
| "step": 33940 |
| }, |
| { |
| "epoch": 0.54336, |
| "grad_norm": 0.21704263985157013, |
| "learning_rate": 9.13696e-05, |
| "loss": 0.8736, |
| "step": 33960 |
| }, |
| { |
| "epoch": 0.54368, |
| "grad_norm": 0.2763427197933197, |
| "learning_rate": 9.130560000000001e-05, |
| "loss": 0.8392, |
| "step": 33980 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.24619615077972412, |
| "learning_rate": 9.12416e-05, |
| "loss": 0.9046, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.54432, |
| "grad_norm": 0.2581463158130646, |
| "learning_rate": 9.117760000000001e-05, |
| "loss": 0.8873, |
| "step": 34020 |
| }, |
| { |
| "epoch": 0.54464, |
| "grad_norm": 0.25280770659446716, |
| "learning_rate": 9.11136e-05, |
| "loss": 0.9039, |
| "step": 34040 |
| }, |
| { |
| "epoch": 0.54496, |
| "grad_norm": 0.22729690372943878, |
| "learning_rate": 9.10496e-05, |
| "loss": 0.9558, |
| "step": 34060 |
| }, |
| { |
| "epoch": 0.54528, |
| "grad_norm": 0.25227198004722595, |
| "learning_rate": 9.09856e-05, |
| "loss": 0.8474, |
| "step": 34080 |
| }, |
| { |
| "epoch": 0.5456, |
| "grad_norm": 0.22505003213882446, |
| "learning_rate": 9.092160000000001e-05, |
| "loss": 0.8884, |
| "step": 34100 |
| }, |
| { |
| "epoch": 0.54592, |
| "grad_norm": 0.19984784722328186, |
| "learning_rate": 9.085760000000002e-05, |
| "loss": 0.864, |
| "step": 34120 |
| }, |
| { |
| "epoch": 0.54624, |
| "grad_norm": 0.22763599455356598, |
| "learning_rate": 9.07936e-05, |
| "loss": 0.8398, |
| "step": 34140 |
| }, |
| { |
| "epoch": 0.54656, |
| "grad_norm": 0.2618582248687744, |
| "learning_rate": 9.07296e-05, |
| "loss": 0.9464, |
| "step": 34160 |
| }, |
| { |
| "epoch": 0.54688, |
| "grad_norm": 0.2371044158935547, |
| "learning_rate": 9.06656e-05, |
| "loss": 0.8829, |
| "step": 34180 |
| }, |
| { |
| "epoch": 0.5472, |
| "grad_norm": 0.21414311230182648, |
| "learning_rate": 9.060160000000001e-05, |
| "loss": 0.8997, |
| "step": 34200 |
| }, |
| { |
| "epoch": 0.54752, |
| "grad_norm": 0.24445602297782898, |
| "learning_rate": 9.05376e-05, |
| "loss": 0.8741, |
| "step": 34220 |
| }, |
| { |
| "epoch": 0.54784, |
| "grad_norm": 0.22683311998844147, |
| "learning_rate": 9.04736e-05, |
| "loss": 0.905, |
| "step": 34240 |
| }, |
| { |
| "epoch": 0.54816, |
| "grad_norm": 0.23930495977401733, |
| "learning_rate": 9.040960000000001e-05, |
| "loss": 0.8318, |
| "step": 34260 |
| }, |
| { |
| "epoch": 0.54848, |
| "grad_norm": 0.2092512547969818, |
| "learning_rate": 9.03456e-05, |
| "loss": 0.9077, |
| "step": 34280 |
| }, |
| { |
| "epoch": 0.5488, |
| "grad_norm": 0.24851293861865997, |
| "learning_rate": 9.02816e-05, |
| "loss": 0.8201, |
| "step": 34300 |
| }, |
| { |
| "epoch": 0.54912, |
| "grad_norm": 0.25093013048171997, |
| "learning_rate": 9.021760000000001e-05, |
| "loss": 0.9003, |
| "step": 34320 |
| }, |
| { |
| "epoch": 0.54944, |
| "grad_norm": 0.22838135063648224, |
| "learning_rate": 9.01536e-05, |
| "loss": 0.8897, |
| "step": 34340 |
| }, |
| { |
| "epoch": 0.54976, |
| "grad_norm": 0.23337380588054657, |
| "learning_rate": 9.008960000000001e-05, |
| "loss": 0.8721, |
| "step": 34360 |
| }, |
| { |
| "epoch": 0.55008, |
| "grad_norm": 0.2049025148153305, |
| "learning_rate": 9.00256e-05, |
| "loss": 0.8554, |
| "step": 34380 |
| }, |
| { |
| "epoch": 0.5504, |
| "grad_norm": 0.24730657041072845, |
| "learning_rate": 8.99616e-05, |
| "loss": 0.8799, |
| "step": 34400 |
| }, |
| { |
| "epoch": 0.55072, |
| "grad_norm": 0.24854011833667755, |
| "learning_rate": 8.98976e-05, |
| "loss": 0.7941, |
| "step": 34420 |
| }, |
| { |
| "epoch": 0.55104, |
| "grad_norm": 0.2421095371246338, |
| "learning_rate": 8.98336e-05, |
| "loss": 0.874, |
| "step": 34440 |
| }, |
| { |
| "epoch": 0.55136, |
| "grad_norm": 0.23288048803806305, |
| "learning_rate": 8.976960000000002e-05, |
| "loss": 0.8883, |
| "step": 34460 |
| }, |
| { |
| "epoch": 0.55168, |
| "grad_norm": 0.227716326713562, |
| "learning_rate": 8.97056e-05, |
| "loss": 0.9004, |
| "step": 34480 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 0.24987129867076874, |
| "learning_rate": 8.96416e-05, |
| "loss": 0.8758, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.55232, |
| "grad_norm": 0.24900184571743011, |
| "learning_rate": 8.95776e-05, |
| "loss": 0.8657, |
| "step": 34520 |
| }, |
| { |
| "epoch": 0.55264, |
| "grad_norm": 0.2685990035533905, |
| "learning_rate": 8.951360000000001e-05, |
| "loss": 0.8833, |
| "step": 34540 |
| }, |
| { |
| "epoch": 0.55296, |
| "grad_norm": 0.23856236040592194, |
| "learning_rate": 8.94496e-05, |
| "loss": 0.9229, |
| "step": 34560 |
| }, |
| { |
| "epoch": 0.55328, |
| "grad_norm": 0.24043366312980652, |
| "learning_rate": 8.93856e-05, |
| "loss": 0.8985, |
| "step": 34580 |
| }, |
| { |
| "epoch": 0.5536, |
| "grad_norm": 0.26800286769866943, |
| "learning_rate": 8.932160000000001e-05, |
| "loss": 0.9129, |
| "step": 34600 |
| }, |
| { |
| "epoch": 0.55392, |
| "grad_norm": 0.2030034214258194, |
| "learning_rate": 8.92576e-05, |
| "loss": 0.8625, |
| "step": 34620 |
| }, |
| { |
| "epoch": 0.55424, |
| "grad_norm": 0.21988297998905182, |
| "learning_rate": 8.91936e-05, |
| "loss": 0.8638, |
| "step": 34640 |
| }, |
| { |
| "epoch": 0.55456, |
| "grad_norm": 0.24161766469478607, |
| "learning_rate": 8.912960000000001e-05, |
| "loss": 0.9061, |
| "step": 34660 |
| }, |
| { |
| "epoch": 0.55488, |
| "grad_norm": 0.24580594897270203, |
| "learning_rate": 8.90656e-05, |
| "loss": 0.8638, |
| "step": 34680 |
| }, |
| { |
| "epoch": 0.5552, |
| "grad_norm": 0.23829227685928345, |
| "learning_rate": 8.900160000000001e-05, |
| "loss": 0.8996, |
| "step": 34700 |
| }, |
| { |
| "epoch": 0.55552, |
| "grad_norm": 0.2329644411802292, |
| "learning_rate": 8.893760000000001e-05, |
| "loss": 0.8453, |
| "step": 34720 |
| }, |
| { |
| "epoch": 0.55584, |
| "grad_norm": 0.22091227769851685, |
| "learning_rate": 8.88736e-05, |
| "loss": 0.8795, |
| "step": 34740 |
| }, |
| { |
| "epoch": 0.55616, |
| "grad_norm": 0.21813109517097473, |
| "learning_rate": 8.88096e-05, |
| "loss": 0.8929, |
| "step": 34760 |
| }, |
| { |
| "epoch": 0.55648, |
| "grad_norm": 0.2232077419757843, |
| "learning_rate": 8.87456e-05, |
| "loss": 0.8585, |
| "step": 34780 |
| }, |
| { |
| "epoch": 0.5568, |
| "grad_norm": 0.27467960119247437, |
| "learning_rate": 8.868160000000002e-05, |
| "loss": 0.9287, |
| "step": 34800 |
| }, |
| { |
| "epoch": 0.55712, |
| "grad_norm": 0.22533021867275238, |
| "learning_rate": 8.86176e-05, |
| "loss": 0.8408, |
| "step": 34820 |
| }, |
| { |
| "epoch": 0.55744, |
| "grad_norm": 0.2704051733016968, |
| "learning_rate": 8.85536e-05, |
| "loss": 0.8606, |
| "step": 34840 |
| }, |
| { |
| "epoch": 0.55776, |
| "grad_norm": 0.2197883278131485, |
| "learning_rate": 8.84896e-05, |
| "loss": 0.8271, |
| "step": 34860 |
| }, |
| { |
| "epoch": 0.55808, |
| "grad_norm": 0.2424239218235016, |
| "learning_rate": 8.842560000000001e-05, |
| "loss": 0.9006, |
| "step": 34880 |
| }, |
| { |
| "epoch": 0.5584, |
| "grad_norm": 0.21460562944412231, |
| "learning_rate": 8.83616e-05, |
| "loss": 0.8882, |
| "step": 34900 |
| }, |
| { |
| "epoch": 0.55872, |
| "grad_norm": 0.2590295076370239, |
| "learning_rate": 8.82976e-05, |
| "loss": 0.906, |
| "step": 34920 |
| }, |
| { |
| "epoch": 0.55904, |
| "grad_norm": 0.2667274475097656, |
| "learning_rate": 8.823360000000001e-05, |
| "loss": 0.8522, |
| "step": 34940 |
| }, |
| { |
| "epoch": 0.55936, |
| "grad_norm": 0.20394787192344666, |
| "learning_rate": 8.81696e-05, |
| "loss": 0.9169, |
| "step": 34960 |
| }, |
| { |
| "epoch": 0.55968, |
| "grad_norm": 0.2314232587814331, |
| "learning_rate": 8.81056e-05, |
| "loss": 0.847, |
| "step": 34980 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.25091394782066345, |
| "learning_rate": 8.804160000000001e-05, |
| "loss": 0.9109, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.56032, |
| "grad_norm": 0.26859068870544434, |
| "learning_rate": 8.79776e-05, |
| "loss": 0.879, |
| "step": 35020 |
| }, |
| { |
| "epoch": 0.56064, |
| "grad_norm": 0.2730168104171753, |
| "learning_rate": 8.791360000000001e-05, |
| "loss": 0.8548, |
| "step": 35040 |
| }, |
| { |
| "epoch": 0.56096, |
| "grad_norm": 0.21965494751930237, |
| "learning_rate": 8.78496e-05, |
| "loss": 0.8965, |
| "step": 35060 |
| }, |
| { |
| "epoch": 0.56128, |
| "grad_norm": 0.2608742117881775, |
| "learning_rate": 8.77856e-05, |
| "loss": 0.9028, |
| "step": 35080 |
| }, |
| { |
| "epoch": 0.5616, |
| "grad_norm": 0.19259628653526306, |
| "learning_rate": 8.77216e-05, |
| "loss": 0.8716, |
| "step": 35100 |
| }, |
| { |
| "epoch": 0.56192, |
| "grad_norm": 0.2763505280017853, |
| "learning_rate": 8.766080000000001e-05, |
| "loss": 0.8783, |
| "step": 35120 |
| }, |
| { |
| "epoch": 0.56224, |
| "grad_norm": 0.23790399730205536, |
| "learning_rate": 8.75968e-05, |
| "loss": 0.8931, |
| "step": 35140 |
| }, |
| { |
| "epoch": 0.56256, |
| "grad_norm": 0.21415534615516663, |
| "learning_rate": 8.75328e-05, |
| "loss": 0.8931, |
| "step": 35160 |
| }, |
| { |
| "epoch": 0.56288, |
| "grad_norm": 0.23109756410121918, |
| "learning_rate": 8.746880000000001e-05, |
| "loss": 0.9333, |
| "step": 35180 |
| }, |
| { |
| "epoch": 0.5632, |
| "grad_norm": 0.22907577455043793, |
| "learning_rate": 8.740480000000001e-05, |
| "loss": 0.8739, |
| "step": 35200 |
| }, |
| { |
| "epoch": 0.56352, |
| "grad_norm": 0.22961348295211792, |
| "learning_rate": 8.73408e-05, |
| "loss": 0.883, |
| "step": 35220 |
| }, |
| { |
| "epoch": 0.56384, |
| "grad_norm": 0.19997790455818176, |
| "learning_rate": 8.72768e-05, |
| "loss": 0.8698, |
| "step": 35240 |
| }, |
| { |
| "epoch": 0.56416, |
| "grad_norm": 0.22530966997146606, |
| "learning_rate": 8.72128e-05, |
| "loss": 0.8538, |
| "step": 35260 |
| }, |
| { |
| "epoch": 0.56448, |
| "grad_norm": 0.2729052007198334, |
| "learning_rate": 8.71488e-05, |
| "loss": 0.8882, |
| "step": 35280 |
| }, |
| { |
| "epoch": 0.5648, |
| "grad_norm": 0.23470643162727356, |
| "learning_rate": 8.70848e-05, |
| "loss": 0.8615, |
| "step": 35300 |
| }, |
| { |
| "epoch": 0.56512, |
| "grad_norm": 0.24945303797721863, |
| "learning_rate": 8.70208e-05, |
| "loss": 0.8993, |
| "step": 35320 |
| }, |
| { |
| "epoch": 0.56544, |
| "grad_norm": 0.18795213103294373, |
| "learning_rate": 8.69568e-05, |
| "loss": 0.8676, |
| "step": 35340 |
| }, |
| { |
| "epoch": 0.56576, |
| "grad_norm": 0.2676798403263092, |
| "learning_rate": 8.689280000000001e-05, |
| "loss": 0.9417, |
| "step": 35360 |
| }, |
| { |
| "epoch": 0.56608, |
| "grad_norm": 0.2519102096557617, |
| "learning_rate": 8.68288e-05, |
| "loss": 0.8696, |
| "step": 35380 |
| }, |
| { |
| "epoch": 0.5664, |
| "grad_norm": 0.2239411622285843, |
| "learning_rate": 8.67648e-05, |
| "loss": 0.8518, |
| "step": 35400 |
| }, |
| { |
| "epoch": 0.56672, |
| "grad_norm": 0.25220707058906555, |
| "learning_rate": 8.670080000000001e-05, |
| "loss": 0.8406, |
| "step": 35420 |
| }, |
| { |
| "epoch": 0.56704, |
| "grad_norm": 0.24866600334644318, |
| "learning_rate": 8.66368e-05, |
| "loss": 0.8726, |
| "step": 35440 |
| }, |
| { |
| "epoch": 0.56736, |
| "grad_norm": 0.23390260338783264, |
| "learning_rate": 8.657280000000001e-05, |
| "loss": 0.8616, |
| "step": 35460 |
| }, |
| { |
| "epoch": 0.56768, |
| "grad_norm": 0.23465383052825928, |
| "learning_rate": 8.650880000000001e-05, |
| "loss": 0.8897, |
| "step": 35480 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 0.23219779133796692, |
| "learning_rate": 8.64448e-05, |
| "loss": 0.892, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.56832, |
| "grad_norm": 0.23238353431224823, |
| "learning_rate": 8.638080000000001e-05, |
| "loss": 0.8887, |
| "step": 35520 |
| }, |
| { |
| "epoch": 0.56864, |
| "grad_norm": 0.23893140256404877, |
| "learning_rate": 8.631680000000001e-05, |
| "loss": 0.868, |
| "step": 35540 |
| }, |
| { |
| "epoch": 0.56896, |
| "grad_norm": 0.23007500171661377, |
| "learning_rate": 8.62528e-05, |
| "loss": 0.8564, |
| "step": 35560 |
| }, |
| { |
| "epoch": 0.56928, |
| "grad_norm": 0.2285536676645279, |
| "learning_rate": 8.61888e-05, |
| "loss": 0.8967, |
| "step": 35580 |
| }, |
| { |
| "epoch": 0.5696, |
| "grad_norm": 0.2179461121559143, |
| "learning_rate": 8.61248e-05, |
| "loss": 0.8468, |
| "step": 35600 |
| }, |
| { |
| "epoch": 0.56992, |
| "grad_norm": 0.26246190071105957, |
| "learning_rate": 8.606080000000001e-05, |
| "loss": 0.866, |
| "step": 35620 |
| }, |
| { |
| "epoch": 0.57024, |
| "grad_norm": 0.2367112636566162, |
| "learning_rate": 8.59968e-05, |
| "loss": 0.8862, |
| "step": 35640 |
| }, |
| { |
| "epoch": 0.57056, |
| "grad_norm": 0.26484036445617676, |
| "learning_rate": 8.59328e-05, |
| "loss": 0.8992, |
| "step": 35660 |
| }, |
| { |
| "epoch": 0.57088, |
| "grad_norm": 0.2554921507835388, |
| "learning_rate": 8.58688e-05, |
| "loss": 0.9095, |
| "step": 35680 |
| }, |
| { |
| "epoch": 0.5712, |
| "grad_norm": 0.20814883708953857, |
| "learning_rate": 8.580480000000001e-05, |
| "loss": 0.8306, |
| "step": 35700 |
| }, |
| { |
| "epoch": 0.57152, |
| "grad_norm": 0.22327591478824615, |
| "learning_rate": 8.57408e-05, |
| "loss": 0.8366, |
| "step": 35720 |
| }, |
| { |
| "epoch": 0.57184, |
| "grad_norm": 0.2438846230506897, |
| "learning_rate": 8.56768e-05, |
| "loss": 0.8424, |
| "step": 35740 |
| }, |
| { |
| "epoch": 0.57216, |
| "grad_norm": 0.20804153382778168, |
| "learning_rate": 8.561280000000001e-05, |
| "loss": 0.8852, |
| "step": 35760 |
| }, |
| { |
| "epoch": 0.57248, |
| "grad_norm": 0.22227492928504944, |
| "learning_rate": 8.55488e-05, |
| "loss": 0.8586, |
| "step": 35780 |
| }, |
| { |
| "epoch": 0.5728, |
| "grad_norm": 0.28952184319496155, |
| "learning_rate": 8.548480000000001e-05, |
| "loss": 0.9095, |
| "step": 35800 |
| }, |
| { |
| "epoch": 0.57312, |
| "grad_norm": 0.22480730712413788, |
| "learning_rate": 8.542080000000001e-05, |
| "loss": 0.8834, |
| "step": 35820 |
| }, |
| { |
| "epoch": 0.57344, |
| "grad_norm": 0.19908693432807922, |
| "learning_rate": 8.53568e-05, |
| "loss": 0.84, |
| "step": 35840 |
| }, |
| { |
| "epoch": 0.57376, |
| "grad_norm": 0.24293170869350433, |
| "learning_rate": 8.529280000000001e-05, |
| "loss": 0.9019, |
| "step": 35860 |
| }, |
| { |
| "epoch": 0.57408, |
| "grad_norm": 0.1839456409215927, |
| "learning_rate": 8.52288e-05, |
| "loss": 0.923, |
| "step": 35880 |
| }, |
| { |
| "epoch": 0.5744, |
| "grad_norm": 0.23249760270118713, |
| "learning_rate": 8.51648e-05, |
| "loss": 0.8667, |
| "step": 35900 |
| }, |
| { |
| "epoch": 0.57472, |
| "grad_norm": 0.2784735858440399, |
| "learning_rate": 8.51008e-05, |
| "loss": 0.9178, |
| "step": 35920 |
| }, |
| { |
| "epoch": 0.57504, |
| "grad_norm": 0.23797404766082764, |
| "learning_rate": 8.50368e-05, |
| "loss": 0.9291, |
| "step": 35940 |
| }, |
| { |
| "epoch": 0.57536, |
| "grad_norm": 0.241110160946846, |
| "learning_rate": 8.497280000000001e-05, |
| "loss": 0.8447, |
| "step": 35960 |
| }, |
| { |
| "epoch": 0.57568, |
| "grad_norm": 0.2260485291481018, |
| "learning_rate": 8.49088e-05, |
| "loss": 0.8532, |
| "step": 35980 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.22092276811599731, |
| "learning_rate": 8.48448e-05, |
| "loss": 0.8768, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.57632, |
| "grad_norm": 0.2721387445926666, |
| "learning_rate": 8.47808e-05, |
| "loss": 0.863, |
| "step": 36020 |
| }, |
| { |
| "epoch": 0.57664, |
| "grad_norm": 0.21206919848918915, |
| "learning_rate": 8.471680000000001e-05, |
| "loss": 0.8636, |
| "step": 36040 |
| }, |
| { |
| "epoch": 0.57696, |
| "grad_norm": 0.23496432602405548, |
| "learning_rate": 8.46528e-05, |
| "loss": 0.8745, |
| "step": 36060 |
| }, |
| { |
| "epoch": 0.57728, |
| "grad_norm": 0.2214774489402771, |
| "learning_rate": 8.45888e-05, |
| "loss": 0.873, |
| "step": 36080 |
| }, |
| { |
| "epoch": 0.5776, |
| "grad_norm": 0.2665559649467468, |
| "learning_rate": 8.452480000000001e-05, |
| "loss": 0.9061, |
| "step": 36100 |
| }, |
| { |
| "epoch": 0.57792, |
| "grad_norm": 0.25471359491348267, |
| "learning_rate": 8.44608e-05, |
| "loss": 0.8592, |
| "step": 36120 |
| }, |
| { |
| "epoch": 0.57824, |
| "grad_norm": 0.24169199168682098, |
| "learning_rate": 8.439680000000001e-05, |
| "loss": 0.8867, |
| "step": 36140 |
| }, |
| { |
| "epoch": 0.57856, |
| "grad_norm": 0.25281229615211487, |
| "learning_rate": 8.433280000000001e-05, |
| "loss": 0.8702, |
| "step": 36160 |
| }, |
| { |
| "epoch": 0.57888, |
| "grad_norm": 0.2565683424472809, |
| "learning_rate": 8.42688e-05, |
| "loss": 0.8812, |
| "step": 36180 |
| }, |
| { |
| "epoch": 0.5792, |
| "grad_norm": 0.2228873074054718, |
| "learning_rate": 8.420480000000001e-05, |
| "loss": 0.8556, |
| "step": 36200 |
| }, |
| { |
| "epoch": 0.57952, |
| "grad_norm": 0.23793266713619232, |
| "learning_rate": 8.41408e-05, |
| "loss": 0.8878, |
| "step": 36220 |
| }, |
| { |
| "epoch": 0.57984, |
| "grad_norm": 0.24681511521339417, |
| "learning_rate": 8.40768e-05, |
| "loss": 0.9124, |
| "step": 36240 |
| }, |
| { |
| "epoch": 0.58016, |
| "grad_norm": 0.2208309918642044, |
| "learning_rate": 8.40128e-05, |
| "loss": 0.8988, |
| "step": 36260 |
| }, |
| { |
| "epoch": 0.58048, |
| "grad_norm": 0.2527472972869873, |
| "learning_rate": 8.39488e-05, |
| "loss": 0.886, |
| "step": 36280 |
| }, |
| { |
| "epoch": 0.5808, |
| "grad_norm": 0.23893190920352936, |
| "learning_rate": 8.388480000000001e-05, |
| "loss": 0.8487, |
| "step": 36300 |
| }, |
| { |
| "epoch": 0.58112, |
| "grad_norm": 0.22206924855709076, |
| "learning_rate": 8.38208e-05, |
| "loss": 0.9371, |
| "step": 36320 |
| }, |
| { |
| "epoch": 0.58144, |
| "grad_norm": 0.28712591528892517, |
| "learning_rate": 8.37568e-05, |
| "loss": 0.8959, |
| "step": 36340 |
| }, |
| { |
| "epoch": 0.58176, |
| "grad_norm": 0.20586076378822327, |
| "learning_rate": 8.36928e-05, |
| "loss": 0.8394, |
| "step": 36360 |
| }, |
| { |
| "epoch": 0.58208, |
| "grad_norm": 0.20877981185913086, |
| "learning_rate": 8.362880000000001e-05, |
| "loss": 0.8793, |
| "step": 36380 |
| }, |
| { |
| "epoch": 0.5824, |
| "grad_norm": 0.2150806486606598, |
| "learning_rate": 8.35648e-05, |
| "loss": 0.8575, |
| "step": 36400 |
| }, |
| { |
| "epoch": 0.58272, |
| "grad_norm": 0.2607544958591461, |
| "learning_rate": 8.35008e-05, |
| "loss": 0.8492, |
| "step": 36420 |
| }, |
| { |
| "epoch": 0.58304, |
| "grad_norm": 0.21660766005516052, |
| "learning_rate": 8.343680000000001e-05, |
| "loss": 0.8599, |
| "step": 36440 |
| }, |
| { |
| "epoch": 0.58336, |
| "grad_norm": 0.19799566268920898, |
| "learning_rate": 8.33728e-05, |
| "loss": 0.8754, |
| "step": 36460 |
| }, |
| { |
| "epoch": 0.58368, |
| "grad_norm": 0.2376536726951599, |
| "learning_rate": 8.330880000000001e-05, |
| "loss": 0.8645, |
| "step": 36480 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 0.2775854766368866, |
| "learning_rate": 8.32448e-05, |
| "loss": 0.8341, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.58432, |
| "grad_norm": 0.2808210849761963, |
| "learning_rate": 8.31808e-05, |
| "loss": 0.9145, |
| "step": 36520 |
| }, |
| { |
| "epoch": 0.58464, |
| "grad_norm": 0.2942677140235901, |
| "learning_rate": 8.311680000000001e-05, |
| "loss": 0.8645, |
| "step": 36540 |
| }, |
| { |
| "epoch": 0.58496, |
| "grad_norm": 0.24506719410419464, |
| "learning_rate": 8.30528e-05, |
| "loss": 0.9266, |
| "step": 36560 |
| }, |
| { |
| "epoch": 0.58528, |
| "grad_norm": 0.23690511286258698, |
| "learning_rate": 8.29888e-05, |
| "loss": 0.8701, |
| "step": 36580 |
| }, |
| { |
| "epoch": 0.5856, |
| "grad_norm": 0.2386239618062973, |
| "learning_rate": 8.29248e-05, |
| "loss": 0.894, |
| "step": 36600 |
| }, |
| { |
| "epoch": 0.58592, |
| "grad_norm": 0.23407141864299774, |
| "learning_rate": 8.28608e-05, |
| "loss": 0.8795, |
| "step": 36620 |
| }, |
| { |
| "epoch": 0.58624, |
| "grad_norm": 0.2837297320365906, |
| "learning_rate": 8.279680000000001e-05, |
| "loss": 0.8844, |
| "step": 36640 |
| }, |
| { |
| "epoch": 0.58656, |
| "grad_norm": 0.18929681181907654, |
| "learning_rate": 8.27328e-05, |
| "loss": 0.881, |
| "step": 36660 |
| }, |
| { |
| "epoch": 0.58688, |
| "grad_norm": 0.2538854479789734, |
| "learning_rate": 8.26688e-05, |
| "loss": 0.8326, |
| "step": 36680 |
| }, |
| { |
| "epoch": 0.5872, |
| "grad_norm": 0.23304085433483124, |
| "learning_rate": 8.26048e-05, |
| "loss": 0.8535, |
| "step": 36700 |
| }, |
| { |
| "epoch": 0.58752, |
| "grad_norm": 0.2668757736682892, |
| "learning_rate": 8.254080000000001e-05, |
| "loss": 0.9134, |
| "step": 36720 |
| }, |
| { |
| "epoch": 0.58784, |
| "grad_norm": 0.2611462473869324, |
| "learning_rate": 8.24768e-05, |
| "loss": 0.865, |
| "step": 36740 |
| }, |
| { |
| "epoch": 0.58816, |
| "grad_norm": 0.20947878062725067, |
| "learning_rate": 8.24128e-05, |
| "loss": 0.8719, |
| "step": 36760 |
| }, |
| { |
| "epoch": 0.58848, |
| "grad_norm": 0.23722214996814728, |
| "learning_rate": 8.234880000000001e-05, |
| "loss": 0.8801, |
| "step": 36780 |
| }, |
| { |
| "epoch": 0.5888, |
| "grad_norm": 0.2517217993736267, |
| "learning_rate": 8.22848e-05, |
| "loss": 0.929, |
| "step": 36800 |
| }, |
| { |
| "epoch": 0.58912, |
| "grad_norm": 0.21423856914043427, |
| "learning_rate": 8.222080000000001e-05, |
| "loss": 0.8452, |
| "step": 36820 |
| }, |
| { |
| "epoch": 0.58944, |
| "grad_norm": 0.224490687251091, |
| "learning_rate": 8.21568e-05, |
| "loss": 0.8617, |
| "step": 36840 |
| }, |
| { |
| "epoch": 0.58976, |
| "grad_norm": 0.1989527940750122, |
| "learning_rate": 8.20928e-05, |
| "loss": 0.9057, |
| "step": 36860 |
| }, |
| { |
| "epoch": 0.59008, |
| "grad_norm": 0.20397096872329712, |
| "learning_rate": 8.202880000000001e-05, |
| "loss": 0.8504, |
| "step": 36880 |
| }, |
| { |
| "epoch": 0.5904, |
| "grad_norm": 0.22488151490688324, |
| "learning_rate": 8.19648e-05, |
| "loss": 0.8383, |
| "step": 36900 |
| }, |
| { |
| "epoch": 0.59072, |
| "grad_norm": 0.23070698976516724, |
| "learning_rate": 8.19008e-05, |
| "loss": 0.8249, |
| "step": 36920 |
| }, |
| { |
| "epoch": 0.59104, |
| "grad_norm": 0.23242853581905365, |
| "learning_rate": 8.183680000000001e-05, |
| "loss": 0.8153, |
| "step": 36940 |
| }, |
| { |
| "epoch": 0.59136, |
| "grad_norm": 0.2361801415681839, |
| "learning_rate": 8.17728e-05, |
| "loss": 0.913, |
| "step": 36960 |
| }, |
| { |
| "epoch": 0.59168, |
| "grad_norm": 0.20463335514068604, |
| "learning_rate": 8.170880000000001e-05, |
| "loss": 0.8239, |
| "step": 36980 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.2715516984462738, |
| "learning_rate": 8.16448e-05, |
| "loss": 0.8807, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.59232, |
| "grad_norm": 0.260455459356308, |
| "learning_rate": 8.15808e-05, |
| "loss": 0.874, |
| "step": 37020 |
| }, |
| { |
| "epoch": 0.59264, |
| "grad_norm": 0.23766261339187622, |
| "learning_rate": 8.15168e-05, |
| "loss": 0.8706, |
| "step": 37040 |
| }, |
| { |
| "epoch": 0.59296, |
| "grad_norm": 0.2649995684623718, |
| "learning_rate": 8.145280000000001e-05, |
| "loss": 0.8938, |
| "step": 37060 |
| }, |
| { |
| "epoch": 0.59328, |
| "grad_norm": 0.20467999577522278, |
| "learning_rate": 8.13888e-05, |
| "loss": 0.8686, |
| "step": 37080 |
| }, |
| { |
| "epoch": 0.5936, |
| "grad_norm": 0.2401207685470581, |
| "learning_rate": 8.13248e-05, |
| "loss": 0.8919, |
| "step": 37100 |
| }, |
| { |
| "epoch": 0.59392, |
| "grad_norm": 0.2349073439836502, |
| "learning_rate": 8.12608e-05, |
| "loss": 0.8998, |
| "step": 37120 |
| }, |
| { |
| "epoch": 0.59424, |
| "grad_norm": 0.22856348752975464, |
| "learning_rate": 8.11968e-05, |
| "loss": 0.8892, |
| "step": 37140 |
| }, |
| { |
| "epoch": 0.59456, |
| "grad_norm": 0.22828173637390137, |
| "learning_rate": 8.113280000000001e-05, |
| "loss": 0.8411, |
| "step": 37160 |
| }, |
| { |
| "epoch": 0.59488, |
| "grad_norm": 0.21084712445735931, |
| "learning_rate": 8.10688e-05, |
| "loss": 0.8682, |
| "step": 37180 |
| }, |
| { |
| "epoch": 0.5952, |
| "grad_norm": 0.2080162763595581, |
| "learning_rate": 8.10048e-05, |
| "loss": 0.9049, |
| "step": 37200 |
| }, |
| { |
| "epoch": 0.59552, |
| "grad_norm": 0.2635326385498047, |
| "learning_rate": 8.094080000000001e-05, |
| "loss": 0.8978, |
| "step": 37220 |
| }, |
| { |
| "epoch": 0.59584, |
| "grad_norm": 0.2089320570230484, |
| "learning_rate": 8.08768e-05, |
| "loss": 0.9324, |
| "step": 37240 |
| }, |
| { |
| "epoch": 0.59616, |
| "grad_norm": 0.2738167643547058, |
| "learning_rate": 8.08128e-05, |
| "loss": 0.8635, |
| "step": 37260 |
| }, |
| { |
| "epoch": 0.59648, |
| "grad_norm": 0.19724565744400024, |
| "learning_rate": 8.074880000000001e-05, |
| "loss": 0.8171, |
| "step": 37280 |
| }, |
| { |
| "epoch": 0.5968, |
| "grad_norm": 0.208595871925354, |
| "learning_rate": 8.06848e-05, |
| "loss": 0.8812, |
| "step": 37300 |
| }, |
| { |
| "epoch": 0.59712, |
| "grad_norm": 0.22307556867599487, |
| "learning_rate": 8.062080000000001e-05, |
| "loss": 0.8931, |
| "step": 37320 |
| }, |
| { |
| "epoch": 0.59744, |
| "grad_norm": 0.23288871347904205, |
| "learning_rate": 8.05568e-05, |
| "loss": 0.8764, |
| "step": 37340 |
| }, |
| { |
| "epoch": 0.59776, |
| "grad_norm": 0.228603333234787, |
| "learning_rate": 8.04928e-05, |
| "loss": 0.8742, |
| "step": 37360 |
| }, |
| { |
| "epoch": 0.59808, |
| "grad_norm": 0.2657196521759033, |
| "learning_rate": 8.04288e-05, |
| "loss": 0.8955, |
| "step": 37380 |
| }, |
| { |
| "epoch": 0.5984, |
| "grad_norm": 0.21586276590824127, |
| "learning_rate": 8.036480000000001e-05, |
| "loss": 0.8867, |
| "step": 37400 |
| }, |
| { |
| "epoch": 0.59872, |
| "grad_norm": 0.24376487731933594, |
| "learning_rate": 8.030080000000002e-05, |
| "loss": 0.8941, |
| "step": 37420 |
| }, |
| { |
| "epoch": 0.59904, |
| "grad_norm": 0.24991095066070557, |
| "learning_rate": 8.02368e-05, |
| "loss": 0.8832, |
| "step": 37440 |
| }, |
| { |
| "epoch": 0.59936, |
| "grad_norm": 0.2291015237569809, |
| "learning_rate": 8.01728e-05, |
| "loss": 0.8427, |
| "step": 37460 |
| }, |
| { |
| "epoch": 0.59968, |
| "grad_norm": 0.2516835331916809, |
| "learning_rate": 8.01088e-05, |
| "loss": 0.8526, |
| "step": 37480 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.2229020595550537, |
| "learning_rate": 8.004480000000001e-05, |
| "loss": 0.8766, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.60032, |
| "grad_norm": 0.24501658976078033, |
| "learning_rate": 7.99808e-05, |
| "loss": 0.9391, |
| "step": 37520 |
| }, |
| { |
| "epoch": 0.60064, |
| "grad_norm": 0.21306291222572327, |
| "learning_rate": 7.99168e-05, |
| "loss": 0.8318, |
| "step": 37540 |
| }, |
| { |
| "epoch": 0.60096, |
| "grad_norm": 0.28489160537719727, |
| "learning_rate": 7.985280000000001e-05, |
| "loss": 0.8845, |
| "step": 37560 |
| }, |
| { |
| "epoch": 0.60128, |
| "grad_norm": 0.24311847984790802, |
| "learning_rate": 7.97888e-05, |
| "loss": 0.8619, |
| "step": 37580 |
| }, |
| { |
| "epoch": 0.6016, |
| "grad_norm": 0.2386752963066101, |
| "learning_rate": 7.97248e-05, |
| "loss": 0.8554, |
| "step": 37600 |
| }, |
| { |
| "epoch": 0.60192, |
| "grad_norm": 0.22845840454101562, |
| "learning_rate": 7.966080000000001e-05, |
| "loss": 0.8505, |
| "step": 37620 |
| }, |
| { |
| "epoch": 0.60224, |
| "grad_norm": 0.21545352041721344, |
| "learning_rate": 7.95968e-05, |
| "loss": 0.8725, |
| "step": 37640 |
| }, |
| { |
| "epoch": 0.60256, |
| "grad_norm": 0.24536795914173126, |
| "learning_rate": 7.953280000000001e-05, |
| "loss": 0.9144, |
| "step": 37660 |
| }, |
| { |
| "epoch": 0.60288, |
| "grad_norm": 0.29689520597457886, |
| "learning_rate": 7.94688e-05, |
| "loss": 0.8949, |
| "step": 37680 |
| }, |
| { |
| "epoch": 0.6032, |
| "grad_norm": 0.22102287411689758, |
| "learning_rate": 7.94048e-05, |
| "loss": 0.8847, |
| "step": 37700 |
| }, |
| { |
| "epoch": 0.60352, |
| "grad_norm": 0.2358902543783188, |
| "learning_rate": 7.93408e-05, |
| "loss": 0.8841, |
| "step": 37720 |
| }, |
| { |
| "epoch": 0.60384, |
| "grad_norm": 0.23542214930057526, |
| "learning_rate": 7.92768e-05, |
| "loss": 0.8899, |
| "step": 37740 |
| }, |
| { |
| "epoch": 0.60416, |
| "grad_norm": 0.23006409406661987, |
| "learning_rate": 7.921280000000002e-05, |
| "loss": 0.824, |
| "step": 37760 |
| }, |
| { |
| "epoch": 0.60448, |
| "grad_norm": 0.29189014434814453, |
| "learning_rate": 7.91488e-05, |
| "loss": 0.9226, |
| "step": 37780 |
| }, |
| { |
| "epoch": 0.6048, |
| "grad_norm": 0.23212820291519165, |
| "learning_rate": 7.90848e-05, |
| "loss": 0.883, |
| "step": 37800 |
| }, |
| { |
| "epoch": 0.60512, |
| "grad_norm": 0.2882430851459503, |
| "learning_rate": 7.90208e-05, |
| "loss": 0.8327, |
| "step": 37820 |
| }, |
| { |
| "epoch": 0.60544, |
| "grad_norm": 0.21909599006175995, |
| "learning_rate": 7.895680000000001e-05, |
| "loss": 0.8446, |
| "step": 37840 |
| }, |
| { |
| "epoch": 0.60576, |
| "grad_norm": 0.23385344445705414, |
| "learning_rate": 7.88928e-05, |
| "loss": 0.8822, |
| "step": 37860 |
| }, |
| { |
| "epoch": 0.60608, |
| "grad_norm": 0.23137474060058594, |
| "learning_rate": 7.88288e-05, |
| "loss": 0.8669, |
| "step": 37880 |
| }, |
| { |
| "epoch": 0.6064, |
| "grad_norm": 0.234337717294693, |
| "learning_rate": 7.876480000000001e-05, |
| "loss": 0.9464, |
| "step": 37900 |
| }, |
| { |
| "epoch": 0.60672, |
| "grad_norm": 0.19100037217140198, |
| "learning_rate": 7.87008e-05, |
| "loss": 0.8538, |
| "step": 37920 |
| }, |
| { |
| "epoch": 0.60704, |
| "grad_norm": 0.231523334980011, |
| "learning_rate": 7.86368e-05, |
| "loss": 0.8869, |
| "step": 37940 |
| }, |
| { |
| "epoch": 0.60736, |
| "grad_norm": 0.19973509013652802, |
| "learning_rate": 7.857280000000001e-05, |
| "loss": 0.8765, |
| "step": 37960 |
| }, |
| { |
| "epoch": 0.60768, |
| "grad_norm": 0.2176535725593567, |
| "learning_rate": 7.85088e-05, |
| "loss": 0.864, |
| "step": 37980 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.2557581961154938, |
| "learning_rate": 7.844480000000001e-05, |
| "loss": 0.8782, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.60832, |
| "grad_norm": 0.273478627204895, |
| "learning_rate": 7.83808e-05, |
| "loss": 0.9086, |
| "step": 38020 |
| }, |
| { |
| "epoch": 0.60864, |
| "grad_norm": 0.26042431592941284, |
| "learning_rate": 7.83168e-05, |
| "loss": 0.8562, |
| "step": 38040 |
| }, |
| { |
| "epoch": 0.60896, |
| "grad_norm": 0.23801887035369873, |
| "learning_rate": 7.82528e-05, |
| "loss": 0.9028, |
| "step": 38060 |
| }, |
| { |
| "epoch": 0.60928, |
| "grad_norm": 0.22711488604545593, |
| "learning_rate": 7.81888e-05, |
| "loss": 0.9305, |
| "step": 38080 |
| }, |
| { |
| "epoch": 0.6096, |
| "grad_norm": 0.19922931492328644, |
| "learning_rate": 7.812480000000001e-05, |
| "loss": 0.8843, |
| "step": 38100 |
| }, |
| { |
| "epoch": 0.60992, |
| "grad_norm": 0.20743116736412048, |
| "learning_rate": 7.80608e-05, |
| "loss": 0.8837, |
| "step": 38120 |
| }, |
| { |
| "epoch": 0.61024, |
| "grad_norm": 0.2508273422718048, |
| "learning_rate": 7.79968e-05, |
| "loss": 0.8681, |
| "step": 38140 |
| }, |
| { |
| "epoch": 0.61056, |
| "grad_norm": 0.24501731991767883, |
| "learning_rate": 7.79328e-05, |
| "loss": 0.8787, |
| "step": 38160 |
| }, |
| { |
| "epoch": 0.61088, |
| "grad_norm": 0.258413702249527, |
| "learning_rate": 7.786880000000001e-05, |
| "loss": 0.9128, |
| "step": 38180 |
| }, |
| { |
| "epoch": 0.6112, |
| "grad_norm": 0.22945356369018555, |
| "learning_rate": 7.78048e-05, |
| "loss": 0.8835, |
| "step": 38200 |
| }, |
| { |
| "epoch": 0.61152, |
| "grad_norm": 0.2546984553337097, |
| "learning_rate": 7.77408e-05, |
| "loss": 0.8806, |
| "step": 38220 |
| }, |
| { |
| "epoch": 0.61184, |
| "grad_norm": 0.2060491144657135, |
| "learning_rate": 7.767680000000001e-05, |
| "loss": 0.8551, |
| "step": 38240 |
| }, |
| { |
| "epoch": 0.61216, |
| "grad_norm": 0.21388421952724457, |
| "learning_rate": 7.76128e-05, |
| "loss": 0.8576, |
| "step": 38260 |
| }, |
| { |
| "epoch": 0.61248, |
| "grad_norm": 0.22443564236164093, |
| "learning_rate": 7.75488e-05, |
| "loss": 0.8807, |
| "step": 38280 |
| }, |
| { |
| "epoch": 0.6128, |
| "grad_norm": 0.24119791388511658, |
| "learning_rate": 7.748480000000001e-05, |
| "loss": 0.9135, |
| "step": 38300 |
| }, |
| { |
| "epoch": 0.61312, |
| "grad_norm": 0.22703927755355835, |
| "learning_rate": 7.74208e-05, |
| "loss": 0.9121, |
| "step": 38320 |
| }, |
| { |
| "epoch": 0.61344, |
| "grad_norm": 0.23478037118911743, |
| "learning_rate": 7.735680000000001e-05, |
| "loss": 0.9024, |
| "step": 38340 |
| }, |
| { |
| "epoch": 0.61376, |
| "grad_norm": 0.22331403195858002, |
| "learning_rate": 7.729279999999999e-05, |
| "loss": 0.9034, |
| "step": 38360 |
| }, |
| { |
| "epoch": 0.61408, |
| "grad_norm": 0.22578079998493195, |
| "learning_rate": 7.72288e-05, |
| "loss": 0.855, |
| "step": 38380 |
| }, |
| { |
| "epoch": 0.6144, |
| "grad_norm": 0.22246988117694855, |
| "learning_rate": 7.716480000000001e-05, |
| "loss": 0.8515, |
| "step": 38400 |
| }, |
| { |
| "epoch": 0.61472, |
| "grad_norm": 0.20506010949611664, |
| "learning_rate": 7.71008e-05, |
| "loss": 0.8626, |
| "step": 38420 |
| }, |
| { |
| "epoch": 0.61504, |
| "grad_norm": 0.23380227386951447, |
| "learning_rate": 7.703680000000001e-05, |
| "loss": 0.8963, |
| "step": 38440 |
| }, |
| { |
| "epoch": 0.61536, |
| "grad_norm": 0.2194732278585434, |
| "learning_rate": 7.69728e-05, |
| "loss": 0.835, |
| "step": 38460 |
| }, |
| { |
| "epoch": 0.61568, |
| "grad_norm": 0.20710738003253937, |
| "learning_rate": 7.69088e-05, |
| "loss": 0.8783, |
| "step": 38480 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 0.23248372972011566, |
| "learning_rate": 7.68448e-05, |
| "loss": 0.8603, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.61632, |
| "grad_norm": 0.21198534965515137, |
| "learning_rate": 7.678080000000001e-05, |
| "loss": 0.8963, |
| "step": 38520 |
| }, |
| { |
| "epoch": 0.61664, |
| "grad_norm": 0.2391924113035202, |
| "learning_rate": 7.67168e-05, |
| "loss": 0.8964, |
| "step": 38540 |
| }, |
| { |
| "epoch": 0.61696, |
| "grad_norm": 0.21630340814590454, |
| "learning_rate": 7.66528e-05, |
| "loss": 0.8468, |
| "step": 38560 |
| }, |
| { |
| "epoch": 0.61728, |
| "grad_norm": 0.22936932742595673, |
| "learning_rate": 7.658880000000001e-05, |
| "loss": 0.8638, |
| "step": 38580 |
| }, |
| { |
| "epoch": 0.6176, |
| "grad_norm": 0.15402059257030487, |
| "learning_rate": 7.65248e-05, |
| "loss": 0.8881, |
| "step": 38600 |
| }, |
| { |
| "epoch": 0.61792, |
| "grad_norm": 0.18893806636333466, |
| "learning_rate": 7.64608e-05, |
| "loss": 0.9137, |
| "step": 38620 |
| }, |
| { |
| "epoch": 0.61824, |
| "grad_norm": 0.19027528166770935, |
| "learning_rate": 7.639680000000001e-05, |
| "loss": 0.8402, |
| "step": 38640 |
| }, |
| { |
| "epoch": 0.61856, |
| "grad_norm": 0.2610226571559906, |
| "learning_rate": 7.63328e-05, |
| "loss": 0.8644, |
| "step": 38660 |
| }, |
| { |
| "epoch": 0.61888, |
| "grad_norm": 0.22377745807170868, |
| "learning_rate": 7.626880000000001e-05, |
| "loss": 0.8621, |
| "step": 38680 |
| }, |
| { |
| "epoch": 0.6192, |
| "grad_norm": 0.2550973892211914, |
| "learning_rate": 7.620479999999999e-05, |
| "loss": 0.8982, |
| "step": 38700 |
| }, |
| { |
| "epoch": 0.61952, |
| "grad_norm": 0.21758711338043213, |
| "learning_rate": 7.61408e-05, |
| "loss": 0.8616, |
| "step": 38720 |
| }, |
| { |
| "epoch": 0.61984, |
| "grad_norm": 0.2632993459701538, |
| "learning_rate": 7.607680000000001e-05, |
| "loss": 0.8565, |
| "step": 38740 |
| }, |
| { |
| "epoch": 0.62016, |
| "grad_norm": 0.26186972856521606, |
| "learning_rate": 7.60128e-05, |
| "loss": 0.883, |
| "step": 38760 |
| }, |
| { |
| "epoch": 0.62048, |
| "grad_norm": 0.22264538705348969, |
| "learning_rate": 7.594880000000001e-05, |
| "loss": 0.8625, |
| "step": 38780 |
| }, |
| { |
| "epoch": 0.6208, |
| "grad_norm": 0.2717147171497345, |
| "learning_rate": 7.58848e-05, |
| "loss": 0.9014, |
| "step": 38800 |
| }, |
| { |
| "epoch": 0.62112, |
| "grad_norm": 0.2638401687145233, |
| "learning_rate": 7.58208e-05, |
| "loss": 0.933, |
| "step": 38820 |
| }, |
| { |
| "epoch": 0.62144, |
| "grad_norm": 0.20931828022003174, |
| "learning_rate": 7.57568e-05, |
| "loss": 0.8697, |
| "step": 38840 |
| }, |
| { |
| "epoch": 0.62176, |
| "grad_norm": 0.22345200181007385, |
| "learning_rate": 7.569280000000001e-05, |
| "loss": 0.8404, |
| "step": 38860 |
| }, |
| { |
| "epoch": 0.62208, |
| "grad_norm": 0.21052759885787964, |
| "learning_rate": 7.56288e-05, |
| "loss": 0.8458, |
| "step": 38880 |
| }, |
| { |
| "epoch": 0.6224, |
| "grad_norm": 0.24614858627319336, |
| "learning_rate": 7.55648e-05, |
| "loss": 0.9028, |
| "step": 38900 |
| }, |
| { |
| "epoch": 0.62272, |
| "grad_norm": 0.276304692029953, |
| "learning_rate": 7.550080000000001e-05, |
| "loss": 0.9097, |
| "step": 38920 |
| }, |
| { |
| "epoch": 0.62304, |
| "grad_norm": 0.27636557817459106, |
| "learning_rate": 7.54368e-05, |
| "loss": 0.8969, |
| "step": 38940 |
| }, |
| { |
| "epoch": 0.62336, |
| "grad_norm": 0.2127619832754135, |
| "learning_rate": 7.53728e-05, |
| "loss": 0.9, |
| "step": 38960 |
| }, |
| { |
| "epoch": 0.62368, |
| "grad_norm": 0.23187151551246643, |
| "learning_rate": 7.53088e-05, |
| "loss": 0.9081, |
| "step": 38980 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.2539467215538025, |
| "learning_rate": 7.52448e-05, |
| "loss": 0.8887, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.62432, |
| "grad_norm": 0.30993226170539856, |
| "learning_rate": 7.518080000000001e-05, |
| "loss": 0.9242, |
| "step": 39020 |
| }, |
| { |
| "epoch": 0.62464, |
| "grad_norm": 0.18882299959659576, |
| "learning_rate": 7.511679999999999e-05, |
| "loss": 0.8939, |
| "step": 39040 |
| }, |
| { |
| "epoch": 0.62496, |
| "grad_norm": 0.21862895786762238, |
| "learning_rate": 7.50528e-05, |
| "loss": 0.8707, |
| "step": 39060 |
| }, |
| { |
| "epoch": 0.62528, |
| "grad_norm": 0.26119473576545715, |
| "learning_rate": 7.498880000000001e-05, |
| "loss": 0.8699, |
| "step": 39080 |
| }, |
| { |
| "epoch": 0.6256, |
| "grad_norm": 0.24526222050189972, |
| "learning_rate": 7.49248e-05, |
| "loss": 0.8787, |
| "step": 39100 |
| }, |
| { |
| "epoch": 0.62592, |
| "grad_norm": 0.2376752644777298, |
| "learning_rate": 7.486080000000001e-05, |
| "loss": 0.9359, |
| "step": 39120 |
| }, |
| { |
| "epoch": 0.62624, |
| "grad_norm": 0.27865490317344666, |
| "learning_rate": 7.47968e-05, |
| "loss": 0.8842, |
| "step": 39140 |
| }, |
| { |
| "epoch": 0.62656, |
| "grad_norm": 0.22411134839057922, |
| "learning_rate": 7.47328e-05, |
| "loss": 0.9039, |
| "step": 39160 |
| }, |
| { |
| "epoch": 0.62688, |
| "grad_norm": 0.24417252838611603, |
| "learning_rate": 7.46688e-05, |
| "loss": 0.915, |
| "step": 39180 |
| }, |
| { |
| "epoch": 0.6272, |
| "grad_norm": 0.21417266130447388, |
| "learning_rate": 7.460480000000001e-05, |
| "loss": 0.8408, |
| "step": 39200 |
| }, |
| { |
| "epoch": 0.62752, |
| "grad_norm": 0.21754087507724762, |
| "learning_rate": 7.45408e-05, |
| "loss": 0.8663, |
| "step": 39220 |
| }, |
| { |
| "epoch": 0.62784, |
| "grad_norm": 0.2340565174818039, |
| "learning_rate": 7.44768e-05, |
| "loss": 0.8353, |
| "step": 39240 |
| }, |
| { |
| "epoch": 0.62816, |
| "grad_norm": 0.25035715103149414, |
| "learning_rate": 7.441280000000001e-05, |
| "loss": 0.8729, |
| "step": 39260 |
| }, |
| { |
| "epoch": 0.62848, |
| "grad_norm": 0.2600226104259491, |
| "learning_rate": 7.43488e-05, |
| "loss": 0.8794, |
| "step": 39280 |
| }, |
| { |
| "epoch": 0.6288, |
| "grad_norm": 0.27380433678627014, |
| "learning_rate": 7.42848e-05, |
| "loss": 0.8578, |
| "step": 39300 |
| }, |
| { |
| "epoch": 0.62912, |
| "grad_norm": 0.18544994294643402, |
| "learning_rate": 7.42208e-05, |
| "loss": 0.9026, |
| "step": 39320 |
| }, |
| { |
| "epoch": 0.62944, |
| "grad_norm": 0.23277778923511505, |
| "learning_rate": 7.41568e-05, |
| "loss": 0.8759, |
| "step": 39340 |
| }, |
| { |
| "epoch": 0.62976, |
| "grad_norm": 0.21839508414268494, |
| "learning_rate": 7.409280000000001e-05, |
| "loss": 0.884, |
| "step": 39360 |
| }, |
| { |
| "epoch": 0.63008, |
| "grad_norm": 0.20547005534172058, |
| "learning_rate": 7.402879999999999e-05, |
| "loss": 0.8755, |
| "step": 39380 |
| }, |
| { |
| "epoch": 0.6304, |
| "grad_norm": 0.21153508126735687, |
| "learning_rate": 7.39648e-05, |
| "loss": 0.8551, |
| "step": 39400 |
| }, |
| { |
| "epoch": 0.63072, |
| "grad_norm": 0.22079287469387054, |
| "learning_rate": 7.390080000000001e-05, |
| "loss": 0.8424, |
| "step": 39420 |
| }, |
| { |
| "epoch": 0.63104, |
| "grad_norm": 0.25068333745002747, |
| "learning_rate": 7.38368e-05, |
| "loss": 0.893, |
| "step": 39440 |
| }, |
| { |
| "epoch": 0.63136, |
| "grad_norm": 0.18888238072395325, |
| "learning_rate": 7.377280000000001e-05, |
| "loss": 0.8803, |
| "step": 39460 |
| }, |
| { |
| "epoch": 0.63168, |
| "grad_norm": 0.258759081363678, |
| "learning_rate": 7.37088e-05, |
| "loss": 0.921, |
| "step": 39480 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 0.22620119154453278, |
| "learning_rate": 7.36448e-05, |
| "loss": 0.8552, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.63232, |
| "grad_norm": 0.2477254718542099, |
| "learning_rate": 7.35808e-05, |
| "loss": 0.8884, |
| "step": 39520 |
| }, |
| { |
| "epoch": 0.63264, |
| "grad_norm": 0.23816423118114471, |
| "learning_rate": 7.351680000000001e-05, |
| "loss": 0.8148, |
| "step": 39540 |
| }, |
| { |
| "epoch": 0.63296, |
| "grad_norm": 0.19741494953632355, |
| "learning_rate": 7.34528e-05, |
| "loss": 0.8668, |
| "step": 39560 |
| }, |
| { |
| "epoch": 0.63328, |
| "grad_norm": 0.25120246410369873, |
| "learning_rate": 7.33888e-05, |
| "loss": 0.9149, |
| "step": 39580 |
| }, |
| { |
| "epoch": 0.6336, |
| "grad_norm": 0.21695558726787567, |
| "learning_rate": 7.33248e-05, |
| "loss": 0.8762, |
| "step": 39600 |
| }, |
| { |
| "epoch": 0.63392, |
| "grad_norm": 0.2556038498878479, |
| "learning_rate": 7.32608e-05, |
| "loss": 0.8438, |
| "step": 39620 |
| }, |
| { |
| "epoch": 0.63424, |
| "grad_norm": 0.265425443649292, |
| "learning_rate": 7.31968e-05, |
| "loss": 0.913, |
| "step": 39640 |
| }, |
| { |
| "epoch": 0.63456, |
| "grad_norm": 0.23394323885440826, |
| "learning_rate": 7.31328e-05, |
| "loss": 0.8572, |
| "step": 39660 |
| }, |
| { |
| "epoch": 0.63488, |
| "grad_norm": 0.25647103786468506, |
| "learning_rate": 7.30688e-05, |
| "loss": 0.9088, |
| "step": 39680 |
| }, |
| { |
| "epoch": 0.6352, |
| "grad_norm": 0.2558782994747162, |
| "learning_rate": 7.300480000000001e-05, |
| "loss": 0.887, |
| "step": 39700 |
| }, |
| { |
| "epoch": 0.63552, |
| "grad_norm": 0.23038125038146973, |
| "learning_rate": 7.29408e-05, |
| "loss": 0.8505, |
| "step": 39720 |
| }, |
| { |
| "epoch": 0.63584, |
| "grad_norm": 0.22082971036434174, |
| "learning_rate": 7.28768e-05, |
| "loss": 0.8159, |
| "step": 39740 |
| }, |
| { |
| "epoch": 0.63616, |
| "grad_norm": 0.2407037317752838, |
| "learning_rate": 7.281280000000001e-05, |
| "loss": 0.8703, |
| "step": 39760 |
| }, |
| { |
| "epoch": 0.63648, |
| "grad_norm": 0.2498258352279663, |
| "learning_rate": 7.27488e-05, |
| "loss": 0.902, |
| "step": 39780 |
| }, |
| { |
| "epoch": 0.6368, |
| "grad_norm": 0.212127685546875, |
| "learning_rate": 7.268480000000001e-05, |
| "loss": 0.8748, |
| "step": 39800 |
| }, |
| { |
| "epoch": 0.63712, |
| "grad_norm": 0.2286374717950821, |
| "learning_rate": 7.26208e-05, |
| "loss": 0.8932, |
| "step": 39820 |
| }, |
| { |
| "epoch": 0.63744, |
| "grad_norm": 0.23190194368362427, |
| "learning_rate": 7.25568e-05, |
| "loss": 0.8772, |
| "step": 39840 |
| }, |
| { |
| "epoch": 0.63776, |
| "grad_norm": 0.2264523208141327, |
| "learning_rate": 7.249280000000001e-05, |
| "loss": 0.8824, |
| "step": 39860 |
| }, |
| { |
| "epoch": 0.63808, |
| "grad_norm": 0.267228901386261, |
| "learning_rate": 7.242880000000001e-05, |
| "loss": 0.808, |
| "step": 39880 |
| }, |
| { |
| "epoch": 0.6384, |
| "grad_norm": 0.25287920236587524, |
| "learning_rate": 7.23648e-05, |
| "loss": 0.8901, |
| "step": 39900 |
| }, |
| { |
| "epoch": 0.63872, |
| "grad_norm": 0.22771823406219482, |
| "learning_rate": 7.23008e-05, |
| "loss": 0.9111, |
| "step": 39920 |
| }, |
| { |
| "epoch": 0.63904, |
| "grad_norm": 0.20720939338207245, |
| "learning_rate": 7.22368e-05, |
| "loss": 0.89, |
| "step": 39940 |
| }, |
| { |
| "epoch": 0.63936, |
| "grad_norm": 0.24383579194545746, |
| "learning_rate": 7.21728e-05, |
| "loss": 0.9141, |
| "step": 39960 |
| }, |
| { |
| "epoch": 0.63968, |
| "grad_norm": 0.22674813866615295, |
| "learning_rate": 7.21088e-05, |
| "loss": 0.8842, |
| "step": 39980 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.22404788434505463, |
| "learning_rate": 7.20448e-05, |
| "loss": 0.8501, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.64032, |
| "grad_norm": 0.21472766995429993, |
| "learning_rate": 7.19808e-05, |
| "loss": 0.9002, |
| "step": 40020 |
| }, |
| { |
| "epoch": 0.64064, |
| "grad_norm": 0.25677719712257385, |
| "learning_rate": 7.191680000000001e-05, |
| "loss": 0.9173, |
| "step": 40040 |
| }, |
| { |
| "epoch": 0.64096, |
| "grad_norm": 0.21479903161525726, |
| "learning_rate": 7.18528e-05, |
| "loss": 0.8685, |
| "step": 40060 |
| }, |
| { |
| "epoch": 0.64128, |
| "grad_norm": 0.24473252892494202, |
| "learning_rate": 7.17888e-05, |
| "loss": 0.8955, |
| "step": 40080 |
| }, |
| { |
| "epoch": 0.6416, |
| "grad_norm": 0.23189175128936768, |
| "learning_rate": 7.172480000000001e-05, |
| "loss": 0.8438, |
| "step": 40100 |
| }, |
| { |
| "epoch": 0.64192, |
| "grad_norm": 0.25505882501602173, |
| "learning_rate": 7.16608e-05, |
| "loss": 0.8654, |
| "step": 40120 |
| }, |
| { |
| "epoch": 0.64224, |
| "grad_norm": 0.23652783036231995, |
| "learning_rate": 7.159680000000001e-05, |
| "loss": 0.907, |
| "step": 40140 |
| }, |
| { |
| "epoch": 0.64256, |
| "grad_norm": 0.26799604296684265, |
| "learning_rate": 7.15328e-05, |
| "loss": 0.8599, |
| "step": 40160 |
| }, |
| { |
| "epoch": 0.64288, |
| "grad_norm": 0.2281332015991211, |
| "learning_rate": 7.14688e-05, |
| "loss": 0.9087, |
| "step": 40180 |
| }, |
| { |
| "epoch": 0.6432, |
| "grad_norm": 0.22313934564590454, |
| "learning_rate": 7.140480000000001e-05, |
| "loss": 0.8987, |
| "step": 40200 |
| }, |
| { |
| "epoch": 0.64352, |
| "grad_norm": 0.21266809105873108, |
| "learning_rate": 7.13408e-05, |
| "loss": 0.9406, |
| "step": 40220 |
| }, |
| { |
| "epoch": 0.64384, |
| "grad_norm": 0.21366780996322632, |
| "learning_rate": 7.12768e-05, |
| "loss": 0.8144, |
| "step": 40240 |
| }, |
| { |
| "epoch": 0.64416, |
| "grad_norm": 0.2068609744310379, |
| "learning_rate": 7.12128e-05, |
| "loss": 0.8527, |
| "step": 40260 |
| }, |
| { |
| "epoch": 0.64448, |
| "grad_norm": 0.2570587396621704, |
| "learning_rate": 7.11488e-05, |
| "loss": 0.8938, |
| "step": 40280 |
| }, |
| { |
| "epoch": 0.6448, |
| "grad_norm": 0.21306006610393524, |
| "learning_rate": 7.10848e-05, |
| "loss": 0.8601, |
| "step": 40300 |
| }, |
| { |
| "epoch": 0.64512, |
| "grad_norm": 0.23386195302009583, |
| "learning_rate": 7.10208e-05, |
| "loss": 0.8709, |
| "step": 40320 |
| }, |
| { |
| "epoch": 0.64544, |
| "grad_norm": 0.2133599817752838, |
| "learning_rate": 7.09568e-05, |
| "loss": 0.9178, |
| "step": 40340 |
| }, |
| { |
| "epoch": 0.64576, |
| "grad_norm": 0.23202918469905853, |
| "learning_rate": 7.08928e-05, |
| "loss": 0.8588, |
| "step": 40360 |
| }, |
| { |
| "epoch": 0.64608, |
| "grad_norm": 0.246184840798378, |
| "learning_rate": 7.082880000000001e-05, |
| "loss": 0.8732, |
| "step": 40380 |
| }, |
| { |
| "epoch": 0.6464, |
| "grad_norm": 0.22465592622756958, |
| "learning_rate": 7.07648e-05, |
| "loss": 0.8496, |
| "step": 40400 |
| }, |
| { |
| "epoch": 0.64672, |
| "grad_norm": 0.27027273178100586, |
| "learning_rate": 7.07008e-05, |
| "loss": 0.8732, |
| "step": 40420 |
| }, |
| { |
| "epoch": 0.64704, |
| "grad_norm": 0.23711097240447998, |
| "learning_rate": 7.063680000000001e-05, |
| "loss": 0.9601, |
| "step": 40440 |
| }, |
| { |
| "epoch": 0.64736, |
| "grad_norm": 0.21715228259563446, |
| "learning_rate": 7.05728e-05, |
| "loss": 0.93, |
| "step": 40460 |
| }, |
| { |
| "epoch": 0.64768, |
| "grad_norm": 0.18533211946487427, |
| "learning_rate": 7.050880000000001e-05, |
| "loss": 0.8923, |
| "step": 40480 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 0.2325373739004135, |
| "learning_rate": 7.04448e-05, |
| "loss": 0.8445, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.64832, |
| "grad_norm": 0.20923930406570435, |
| "learning_rate": 7.03808e-05, |
| "loss": 0.8714, |
| "step": 40520 |
| }, |
| { |
| "epoch": 0.64864, |
| "grad_norm": 0.16755761206150055, |
| "learning_rate": 7.031680000000001e-05, |
| "loss": 0.856, |
| "step": 40540 |
| }, |
| { |
| "epoch": 0.64896, |
| "grad_norm": 0.20746345818042755, |
| "learning_rate": 7.02528e-05, |
| "loss": 0.9456, |
| "step": 40560 |
| }, |
| { |
| "epoch": 0.64928, |
| "grad_norm": 0.2445952445268631, |
| "learning_rate": 7.01888e-05, |
| "loss": 0.8962, |
| "step": 40580 |
| }, |
| { |
| "epoch": 0.6496, |
| "grad_norm": 0.21696268022060394, |
| "learning_rate": 7.01248e-05, |
| "loss": 0.8501, |
| "step": 40600 |
| }, |
| { |
| "epoch": 0.64992, |
| "grad_norm": 0.18578511476516724, |
| "learning_rate": 7.00608e-05, |
| "loss": 0.8168, |
| "step": 40620 |
| }, |
| { |
| "epoch": 0.65024, |
| "grad_norm": 0.25534483790397644, |
| "learning_rate": 6.99968e-05, |
| "loss": 0.8867, |
| "step": 40640 |
| }, |
| { |
| "epoch": 0.65056, |
| "grad_norm": 0.24341151118278503, |
| "learning_rate": 6.99328e-05, |
| "loss": 0.8426, |
| "step": 40660 |
| }, |
| { |
| "epoch": 0.65088, |
| "grad_norm": 0.229604572057724, |
| "learning_rate": 6.98688e-05, |
| "loss": 0.9253, |
| "step": 40680 |
| }, |
| { |
| "epoch": 0.6512, |
| "grad_norm": 0.24505998194217682, |
| "learning_rate": 6.98048e-05, |
| "loss": 0.9244, |
| "step": 40700 |
| }, |
| { |
| "epoch": 0.65152, |
| "grad_norm": 0.19099657237529755, |
| "learning_rate": 6.974080000000001e-05, |
| "loss": 0.8625, |
| "step": 40720 |
| }, |
| { |
| "epoch": 0.65184, |
| "grad_norm": 0.24071238934993744, |
| "learning_rate": 6.96768e-05, |
| "loss": 0.8457, |
| "step": 40740 |
| }, |
| { |
| "epoch": 0.65216, |
| "grad_norm": 0.2676192820072174, |
| "learning_rate": 6.96128e-05, |
| "loss": 0.8621, |
| "step": 40760 |
| }, |
| { |
| "epoch": 0.65248, |
| "grad_norm": 0.2141886055469513, |
| "learning_rate": 6.954880000000001e-05, |
| "loss": 0.8369, |
| "step": 40780 |
| }, |
| { |
| "epoch": 0.6528, |
| "grad_norm": 0.24451769888401031, |
| "learning_rate": 6.94848e-05, |
| "loss": 0.8665, |
| "step": 40800 |
| }, |
| { |
| "epoch": 0.65312, |
| "grad_norm": 0.20462900400161743, |
| "learning_rate": 6.942080000000001e-05, |
| "loss": 0.9034, |
| "step": 40820 |
| }, |
| { |
| "epoch": 0.65344, |
| "grad_norm": 0.2098025679588318, |
| "learning_rate": 6.935679999999999e-05, |
| "loss": 0.8874, |
| "step": 40840 |
| }, |
| { |
| "epoch": 0.65376, |
| "grad_norm": 0.2127532809972763, |
| "learning_rate": 6.92928e-05, |
| "loss": 0.8651, |
| "step": 40860 |
| }, |
| { |
| "epoch": 0.65408, |
| "grad_norm": 0.23097112774848938, |
| "learning_rate": 6.922880000000001e-05, |
| "loss": 0.8587, |
| "step": 40880 |
| }, |
| { |
| "epoch": 0.6544, |
| "grad_norm": 0.23748517036437988, |
| "learning_rate": 6.91648e-05, |
| "loss": 0.8306, |
| "step": 40900 |
| }, |
| { |
| "epoch": 0.65472, |
| "grad_norm": 0.2403116524219513, |
| "learning_rate": 6.91008e-05, |
| "loss": 0.8822, |
| "step": 40920 |
| }, |
| { |
| "epoch": 0.65504, |
| "grad_norm": 0.25235334038734436, |
| "learning_rate": 6.90368e-05, |
| "loss": 0.9339, |
| "step": 40940 |
| }, |
| { |
| "epoch": 0.65536, |
| "grad_norm": 0.23442967236042023, |
| "learning_rate": 6.89728e-05, |
| "loss": 0.9065, |
| "step": 40960 |
| }, |
| { |
| "epoch": 0.65568, |
| "grad_norm": 0.22697308659553528, |
| "learning_rate": 6.89088e-05, |
| "loss": 0.9111, |
| "step": 40980 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.21100306510925293, |
| "learning_rate": 6.88448e-05, |
| "loss": 0.8536, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.65632, |
| "grad_norm": 0.24288100004196167, |
| "learning_rate": 6.87808e-05, |
| "loss": 0.8757, |
| "step": 41020 |
| }, |
| { |
| "epoch": 0.65664, |
| "grad_norm": 0.26507681608200073, |
| "learning_rate": 6.87168e-05, |
| "loss": 0.8786, |
| "step": 41040 |
| }, |
| { |
| "epoch": 0.65696, |
| "grad_norm": 0.2742130160331726, |
| "learning_rate": 6.865280000000001e-05, |
| "loss": 0.8888, |
| "step": 41060 |
| }, |
| { |
| "epoch": 0.65728, |
| "grad_norm": 0.23026636242866516, |
| "learning_rate": 6.85888e-05, |
| "loss": 0.8212, |
| "step": 41080 |
| }, |
| { |
| "epoch": 0.6576, |
| "grad_norm": 0.20454558730125427, |
| "learning_rate": 6.85248e-05, |
| "loss": 0.8761, |
| "step": 41100 |
| }, |
| { |
| "epoch": 0.65792, |
| "grad_norm": 0.20581161975860596, |
| "learning_rate": 6.846080000000001e-05, |
| "loss": 0.8897, |
| "step": 41120 |
| }, |
| { |
| "epoch": 0.65824, |
| "grad_norm": 0.24633437395095825, |
| "learning_rate": 6.83968e-05, |
| "loss": 0.8448, |
| "step": 41140 |
| }, |
| { |
| "epoch": 0.65856, |
| "grad_norm": 0.246739000082016, |
| "learning_rate": 6.833280000000001e-05, |
| "loss": 0.8769, |
| "step": 41160 |
| }, |
| { |
| "epoch": 0.65888, |
| "grad_norm": 0.22334247827529907, |
| "learning_rate": 6.82688e-05, |
| "loss": 0.8975, |
| "step": 41180 |
| }, |
| { |
| "epoch": 0.6592, |
| "grad_norm": 0.23122014105319977, |
| "learning_rate": 6.82048e-05, |
| "loss": 0.9277, |
| "step": 41200 |
| }, |
| { |
| "epoch": 0.65952, |
| "grad_norm": 0.26595011353492737, |
| "learning_rate": 6.8144e-05, |
| "loss": 0.8685, |
| "step": 41220 |
| }, |
| { |
| "epoch": 0.65984, |
| "grad_norm": 0.22354891896247864, |
| "learning_rate": 6.808e-05, |
| "loss": 0.8693, |
| "step": 41240 |
| }, |
| { |
| "epoch": 0.66016, |
| "grad_norm": 0.23355019092559814, |
| "learning_rate": 6.801600000000001e-05, |
| "loss": 0.8979, |
| "step": 41260 |
| }, |
| { |
| "epoch": 0.66048, |
| "grad_norm": 0.2354181855916977, |
| "learning_rate": 6.7952e-05, |
| "loss": 0.8735, |
| "step": 41280 |
| }, |
| { |
| "epoch": 0.6608, |
| "grad_norm": 0.26578792929649353, |
| "learning_rate": 6.788800000000001e-05, |
| "loss": 0.8971, |
| "step": 41300 |
| }, |
| { |
| "epoch": 0.66112, |
| "grad_norm": 0.26312100887298584, |
| "learning_rate": 6.782399999999999e-05, |
| "loss": 0.9185, |
| "step": 41320 |
| }, |
| { |
| "epoch": 0.66144, |
| "grad_norm": 0.22569258511066437, |
| "learning_rate": 6.776e-05, |
| "loss": 0.8838, |
| "step": 41340 |
| }, |
| { |
| "epoch": 0.66176, |
| "grad_norm": 0.30978450179100037, |
| "learning_rate": 6.769600000000001e-05, |
| "loss": 0.9312, |
| "step": 41360 |
| }, |
| { |
| "epoch": 0.66208, |
| "grad_norm": 0.23343753814697266, |
| "learning_rate": 6.7632e-05, |
| "loss": 0.8593, |
| "step": 41380 |
| }, |
| { |
| "epoch": 0.6624, |
| "grad_norm": 0.22287525236606598, |
| "learning_rate": 6.7568e-05, |
| "loss": 0.9196, |
| "step": 41400 |
| }, |
| { |
| "epoch": 0.66272, |
| "grad_norm": 0.20180395245552063, |
| "learning_rate": 6.7504e-05, |
| "loss": 0.8573, |
| "step": 41420 |
| }, |
| { |
| "epoch": 0.66304, |
| "grad_norm": 0.24834296107292175, |
| "learning_rate": 6.744e-05, |
| "loss": 0.8839, |
| "step": 41440 |
| }, |
| { |
| "epoch": 0.66336, |
| "grad_norm": 0.2042527049779892, |
| "learning_rate": 6.7376e-05, |
| "loss": 0.8805, |
| "step": 41460 |
| }, |
| { |
| "epoch": 0.66368, |
| "grad_norm": 0.22171486914157867, |
| "learning_rate": 6.731200000000001e-05, |
| "loss": 0.8742, |
| "step": 41480 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 0.27567192912101746, |
| "learning_rate": 6.7248e-05, |
| "loss": 0.8997, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.66432, |
| "grad_norm": 0.22618427872657776, |
| "learning_rate": 6.7184e-05, |
| "loss": 0.8679, |
| "step": 41520 |
| }, |
| { |
| "epoch": 0.66464, |
| "grad_norm": 0.24690526723861694, |
| "learning_rate": 6.712000000000001e-05, |
| "loss": 0.8564, |
| "step": 41540 |
| }, |
| { |
| "epoch": 0.66496, |
| "grad_norm": 0.23084665834903717, |
| "learning_rate": 6.7056e-05, |
| "loss": 0.9033, |
| "step": 41560 |
| }, |
| { |
| "epoch": 0.66528, |
| "grad_norm": 0.2343718707561493, |
| "learning_rate": 6.6992e-05, |
| "loss": 0.8987, |
| "step": 41580 |
| }, |
| { |
| "epoch": 0.6656, |
| "grad_norm": 0.24334469437599182, |
| "learning_rate": 6.692800000000001e-05, |
| "loss": 0.8943, |
| "step": 41600 |
| }, |
| { |
| "epoch": 0.66592, |
| "grad_norm": 0.24616220593452454, |
| "learning_rate": 6.6864e-05, |
| "loss": 0.831, |
| "step": 41620 |
| }, |
| { |
| "epoch": 0.66624, |
| "grad_norm": 0.21528421342372894, |
| "learning_rate": 6.680000000000001e-05, |
| "loss": 0.8642, |
| "step": 41640 |
| }, |
| { |
| "epoch": 0.66656, |
| "grad_norm": 0.20237964391708374, |
| "learning_rate": 6.673599999999999e-05, |
| "loss": 0.9171, |
| "step": 41660 |
| }, |
| { |
| "epoch": 0.66688, |
| "grad_norm": 0.2018793821334839, |
| "learning_rate": 6.6672e-05, |
| "loss": 0.8975, |
| "step": 41680 |
| }, |
| { |
| "epoch": 0.6672, |
| "grad_norm": 0.2155706137418747, |
| "learning_rate": 6.660800000000001e-05, |
| "loss": 0.8354, |
| "step": 41700 |
| }, |
| { |
| "epoch": 0.66752, |
| "grad_norm": 0.23168103396892548, |
| "learning_rate": 6.6544e-05, |
| "loss": 0.916, |
| "step": 41720 |
| }, |
| { |
| "epoch": 0.66784, |
| "grad_norm": 0.20231425762176514, |
| "learning_rate": 6.648e-05, |
| "loss": 0.8639, |
| "step": 41740 |
| }, |
| { |
| "epoch": 0.66816, |
| "grad_norm": 0.20722989737987518, |
| "learning_rate": 6.6416e-05, |
| "loss": 0.8197, |
| "step": 41760 |
| }, |
| { |
| "epoch": 0.66848, |
| "grad_norm": 0.20583872497081757, |
| "learning_rate": 6.6352e-05, |
| "loss": 0.8644, |
| "step": 41780 |
| }, |
| { |
| "epoch": 0.6688, |
| "grad_norm": 0.22469474375247955, |
| "learning_rate": 6.6288e-05, |
| "loss": 0.8431, |
| "step": 41800 |
| }, |
| { |
| "epoch": 0.66912, |
| "grad_norm": 0.22593587636947632, |
| "learning_rate": 6.622400000000001e-05, |
| "loss": 0.8428, |
| "step": 41820 |
| }, |
| { |
| "epoch": 0.66944, |
| "grad_norm": 0.21532204747200012, |
| "learning_rate": 6.616e-05, |
| "loss": 0.8634, |
| "step": 41840 |
| }, |
| { |
| "epoch": 0.66976, |
| "grad_norm": 0.1992572546005249, |
| "learning_rate": 6.6096e-05, |
| "loss": 0.8744, |
| "step": 41860 |
| }, |
| { |
| "epoch": 0.67008, |
| "grad_norm": 0.23626761138439178, |
| "learning_rate": 6.603200000000001e-05, |
| "loss": 0.8928, |
| "step": 41880 |
| }, |
| { |
| "epoch": 0.6704, |
| "grad_norm": 0.2587644159793854, |
| "learning_rate": 6.5968e-05, |
| "loss": 0.8501, |
| "step": 41900 |
| }, |
| { |
| "epoch": 0.67072, |
| "grad_norm": 0.23042425513267517, |
| "learning_rate": 6.5904e-05, |
| "loss": 0.8317, |
| "step": 41920 |
| }, |
| { |
| "epoch": 0.67104, |
| "grad_norm": 0.20776692032814026, |
| "learning_rate": 6.584e-05, |
| "loss": 0.8842, |
| "step": 41940 |
| }, |
| { |
| "epoch": 0.67136, |
| "grad_norm": 0.2233342081308365, |
| "learning_rate": 6.5776e-05, |
| "loss": 0.9062, |
| "step": 41960 |
| }, |
| { |
| "epoch": 0.67168, |
| "grad_norm": 0.22281095385551453, |
| "learning_rate": 6.571200000000001e-05, |
| "loss": 0.8696, |
| "step": 41980 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.2269035130739212, |
| "learning_rate": 6.564799999999999e-05, |
| "loss": 0.8983, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.67232, |
| "grad_norm": 0.21187534928321838, |
| "learning_rate": 6.5584e-05, |
| "loss": 0.8666, |
| "step": 42020 |
| }, |
| { |
| "epoch": 0.67264, |
| "grad_norm": 0.2288711965084076, |
| "learning_rate": 6.552000000000001e-05, |
| "loss": 0.8519, |
| "step": 42040 |
| }, |
| { |
| "epoch": 0.67296, |
| "grad_norm": 0.24002696573734283, |
| "learning_rate": 6.5456e-05, |
| "loss": 0.843, |
| "step": 42060 |
| }, |
| { |
| "epoch": 0.67328, |
| "grad_norm": 0.19838085770606995, |
| "learning_rate": 6.5392e-05, |
| "loss": 0.8429, |
| "step": 42080 |
| }, |
| { |
| "epoch": 0.6736, |
| "grad_norm": 0.24605266749858856, |
| "learning_rate": 6.5328e-05, |
| "loss": 0.9016, |
| "step": 42100 |
| }, |
| { |
| "epoch": 0.67392, |
| "grad_norm": 0.273473858833313, |
| "learning_rate": 6.5264e-05, |
| "loss": 0.9013, |
| "step": 42120 |
| }, |
| { |
| "epoch": 0.67424, |
| "grad_norm": 0.2528668940067291, |
| "learning_rate": 6.52e-05, |
| "loss": 0.9072, |
| "step": 42140 |
| }, |
| { |
| "epoch": 0.67456, |
| "grad_norm": 0.23695510625839233, |
| "learning_rate": 6.513600000000001e-05, |
| "loss": 0.8652, |
| "step": 42160 |
| }, |
| { |
| "epoch": 0.67488, |
| "grad_norm": 0.23263618350028992, |
| "learning_rate": 6.5072e-05, |
| "loss": 0.9238, |
| "step": 42180 |
| }, |
| { |
| "epoch": 0.6752, |
| "grad_norm": 0.2174840271472931, |
| "learning_rate": 6.5008e-05, |
| "loss": 0.8806, |
| "step": 42200 |
| }, |
| { |
| "epoch": 0.67552, |
| "grad_norm": 0.22841788828372955, |
| "learning_rate": 6.494400000000001e-05, |
| "loss": 0.8422, |
| "step": 42220 |
| }, |
| { |
| "epoch": 0.67584, |
| "grad_norm": 0.24447223544120789, |
| "learning_rate": 6.488e-05, |
| "loss": 0.8361, |
| "step": 42240 |
| }, |
| { |
| "epoch": 0.67616, |
| "grad_norm": 0.24879607558250427, |
| "learning_rate": 6.4816e-05, |
| "loss": 0.8944, |
| "step": 42260 |
| }, |
| { |
| "epoch": 0.67648, |
| "grad_norm": 0.26324090361595154, |
| "learning_rate": 6.4752e-05, |
| "loss": 0.8424, |
| "step": 42280 |
| }, |
| { |
| "epoch": 0.6768, |
| "grad_norm": 0.28680363297462463, |
| "learning_rate": 6.4688e-05, |
| "loss": 0.8763, |
| "step": 42300 |
| }, |
| { |
| "epoch": 0.67712, |
| "grad_norm": 0.2222435623407364, |
| "learning_rate": 6.462400000000001e-05, |
| "loss": 0.8901, |
| "step": 42320 |
| }, |
| { |
| "epoch": 0.67744, |
| "grad_norm": 0.2362917810678482, |
| "learning_rate": 6.455999999999999e-05, |
| "loss": 0.9079, |
| "step": 42340 |
| }, |
| { |
| "epoch": 0.67776, |
| "grad_norm": 0.23679310083389282, |
| "learning_rate": 6.4496e-05, |
| "loss": 0.832, |
| "step": 42360 |
| }, |
| { |
| "epoch": 0.67808, |
| "grad_norm": 0.23579975962638855, |
| "learning_rate": 6.443200000000001e-05, |
| "loss": 0.8957, |
| "step": 42380 |
| }, |
| { |
| "epoch": 0.6784, |
| "grad_norm": 0.2289842814207077, |
| "learning_rate": 6.4368e-05, |
| "loss": 0.9169, |
| "step": 42400 |
| }, |
| { |
| "epoch": 0.67872, |
| "grad_norm": 0.2322479486465454, |
| "learning_rate": 6.4304e-05, |
| "loss": 0.8729, |
| "step": 42420 |
| }, |
| { |
| "epoch": 0.67904, |
| "grad_norm": 0.2532987892627716, |
| "learning_rate": 6.42432e-05, |
| "loss": 0.8365, |
| "step": 42440 |
| }, |
| { |
| "epoch": 0.67936, |
| "grad_norm": 0.31229642033576965, |
| "learning_rate": 6.417920000000001e-05, |
| "loss": 0.9106, |
| "step": 42460 |
| }, |
| { |
| "epoch": 0.67968, |
| "grad_norm": 0.19338229298591614, |
| "learning_rate": 6.41152e-05, |
| "loss": 0.8812, |
| "step": 42480 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.2138776332139969, |
| "learning_rate": 6.40512e-05, |
| "loss": 0.8538, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.68032, |
| "grad_norm": 0.2549976408481598, |
| "learning_rate": 6.39872e-05, |
| "loss": 0.8651, |
| "step": 42520 |
| }, |
| { |
| "epoch": 0.68064, |
| "grad_norm": 0.24992278218269348, |
| "learning_rate": 6.39232e-05, |
| "loss": 0.9094, |
| "step": 42540 |
| }, |
| { |
| "epoch": 0.68096, |
| "grad_norm": 0.2768593728542328, |
| "learning_rate": 6.385920000000001e-05, |
| "loss": 0.9047, |
| "step": 42560 |
| }, |
| { |
| "epoch": 0.68128, |
| "grad_norm": 0.2133874148130417, |
| "learning_rate": 6.37952e-05, |
| "loss": 0.9259, |
| "step": 42580 |
| }, |
| { |
| "epoch": 0.6816, |
| "grad_norm": 0.24320970475673676, |
| "learning_rate": 6.37312e-05, |
| "loss": 0.875, |
| "step": 42600 |
| }, |
| { |
| "epoch": 0.68192, |
| "grad_norm": 0.21263545751571655, |
| "learning_rate": 6.36672e-05, |
| "loss": 0.8896, |
| "step": 42620 |
| }, |
| { |
| "epoch": 0.68224, |
| "grad_norm": 0.18488876521587372, |
| "learning_rate": 6.360320000000001e-05, |
| "loss": 0.8557, |
| "step": 42640 |
| }, |
| { |
| "epoch": 0.68256, |
| "grad_norm": 0.2456846386194229, |
| "learning_rate": 6.35392e-05, |
| "loss": 0.8991, |
| "step": 42660 |
| }, |
| { |
| "epoch": 0.68288, |
| "grad_norm": 0.20528970658779144, |
| "learning_rate": 6.34752e-05, |
| "loss": 0.8908, |
| "step": 42680 |
| }, |
| { |
| "epoch": 0.6832, |
| "grad_norm": 0.225137397646904, |
| "learning_rate": 6.341120000000001e-05, |
| "loss": 0.8635, |
| "step": 42700 |
| }, |
| { |
| "epoch": 0.68352, |
| "grad_norm": 0.24640017747879028, |
| "learning_rate": 6.33472e-05, |
| "loss": 0.8551, |
| "step": 42720 |
| }, |
| { |
| "epoch": 0.68384, |
| "grad_norm": 0.22672517597675323, |
| "learning_rate": 6.32832e-05, |
| "loss": 0.8665, |
| "step": 42740 |
| }, |
| { |
| "epoch": 0.68416, |
| "grad_norm": 0.229408398270607, |
| "learning_rate": 6.32192e-05, |
| "loss": 0.9198, |
| "step": 42760 |
| }, |
| { |
| "epoch": 0.68448, |
| "grad_norm": 0.19723407924175262, |
| "learning_rate": 6.31552e-05, |
| "loss": 0.8959, |
| "step": 42780 |
| }, |
| { |
| "epoch": 0.6848, |
| "grad_norm": 0.2351776361465454, |
| "learning_rate": 6.309120000000001e-05, |
| "loss": 0.8286, |
| "step": 42800 |
| }, |
| { |
| "epoch": 0.68512, |
| "grad_norm": 0.17581576108932495, |
| "learning_rate": 6.30272e-05, |
| "loss": 0.8849, |
| "step": 42820 |
| }, |
| { |
| "epoch": 0.68544, |
| "grad_norm": 0.22729769349098206, |
| "learning_rate": 6.29632e-05, |
| "loss": 0.8592, |
| "step": 42840 |
| }, |
| { |
| "epoch": 0.68576, |
| "grad_norm": 0.25973424315452576, |
| "learning_rate": 6.289920000000001e-05, |
| "loss": 0.8797, |
| "step": 42860 |
| }, |
| { |
| "epoch": 0.68608, |
| "grad_norm": 0.24774223566055298, |
| "learning_rate": 6.28352e-05, |
| "loss": 0.8542, |
| "step": 42880 |
| }, |
| { |
| "epoch": 0.6864, |
| "grad_norm": 0.25668323040008545, |
| "learning_rate": 6.277120000000001e-05, |
| "loss": 0.9054, |
| "step": 42900 |
| }, |
| { |
| "epoch": 0.68672, |
| "grad_norm": 0.26286524534225464, |
| "learning_rate": 6.27072e-05, |
| "loss": 0.8654, |
| "step": 42920 |
| }, |
| { |
| "epoch": 0.68704, |
| "grad_norm": 0.24494454264640808, |
| "learning_rate": 6.26432e-05, |
| "loss": 0.9516, |
| "step": 42940 |
| }, |
| { |
| "epoch": 0.68736, |
| "grad_norm": 0.2337479293346405, |
| "learning_rate": 6.25792e-05, |
| "loss": 0.8931, |
| "step": 42960 |
| }, |
| { |
| "epoch": 0.68768, |
| "grad_norm": 0.2087046056985855, |
| "learning_rate": 6.251520000000001e-05, |
| "loss": 0.9421, |
| "step": 42980 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.21605008840560913, |
| "learning_rate": 6.24512e-05, |
| "loss": 0.883, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.68832, |
| "grad_norm": 0.21600419282913208, |
| "learning_rate": 6.23872e-05, |
| "loss": 0.8408, |
| "step": 43020 |
| }, |
| { |
| "epoch": 0.68864, |
| "grad_norm": 0.2731294333934784, |
| "learning_rate": 6.23232e-05, |
| "loss": 0.8508, |
| "step": 43040 |
| }, |
| { |
| "epoch": 0.68896, |
| "grad_norm": 0.20644868910312653, |
| "learning_rate": 6.22592e-05, |
| "loss": 0.8578, |
| "step": 43060 |
| }, |
| { |
| "epoch": 0.68928, |
| "grad_norm": 0.22507797181606293, |
| "learning_rate": 6.21952e-05, |
| "loss": 0.8556, |
| "step": 43080 |
| }, |
| { |
| "epoch": 0.6896, |
| "grad_norm": 0.20173804461956024, |
| "learning_rate": 6.21312e-05, |
| "loss": 0.8631, |
| "step": 43100 |
| }, |
| { |
| "epoch": 0.68992, |
| "grad_norm": 0.2198924422264099, |
| "learning_rate": 6.20672e-05, |
| "loss": 0.9, |
| "step": 43120 |
| }, |
| { |
| "epoch": 0.69024, |
| "grad_norm": 0.2248951494693756, |
| "learning_rate": 6.200320000000001e-05, |
| "loss": 0.9064, |
| "step": 43140 |
| }, |
| { |
| "epoch": 0.69056, |
| "grad_norm": 0.23556740581989288, |
| "learning_rate": 6.19392e-05, |
| "loss": 0.866, |
| "step": 43160 |
| }, |
| { |
| "epoch": 0.69088, |
| "grad_norm": 0.2064543068408966, |
| "learning_rate": 6.18752e-05, |
| "loss": 0.8798, |
| "step": 43180 |
| }, |
| { |
| "epoch": 0.6912, |
| "grad_norm": 0.22137311100959778, |
| "learning_rate": 6.181120000000001e-05, |
| "loss": 0.8844, |
| "step": 43200 |
| }, |
| { |
| "epoch": 0.69152, |
| "grad_norm": 0.21415813267230988, |
| "learning_rate": 6.17472e-05, |
| "loss": 0.8576, |
| "step": 43220 |
| }, |
| { |
| "epoch": 0.69184, |
| "grad_norm": 0.22798651456832886, |
| "learning_rate": 6.168320000000001e-05, |
| "loss": 0.8471, |
| "step": 43240 |
| }, |
| { |
| "epoch": 0.69216, |
| "grad_norm": 0.233371764421463, |
| "learning_rate": 6.16192e-05, |
| "loss": 0.8734, |
| "step": 43260 |
| }, |
| { |
| "epoch": 0.69248, |
| "grad_norm": 0.20008385181427002, |
| "learning_rate": 6.15552e-05, |
| "loss": 0.8683, |
| "step": 43280 |
| }, |
| { |
| "epoch": 0.6928, |
| "grad_norm": 0.22969180345535278, |
| "learning_rate": 6.14912e-05, |
| "loss": 0.8694, |
| "step": 43300 |
| }, |
| { |
| "epoch": 0.69312, |
| "grad_norm": 0.2556081712245941, |
| "learning_rate": 6.142720000000001e-05, |
| "loss": 0.8439, |
| "step": 43320 |
| }, |
| { |
| "epoch": 0.69344, |
| "grad_norm": 0.2534750699996948, |
| "learning_rate": 6.13632e-05, |
| "loss": 0.9054, |
| "step": 43340 |
| }, |
| { |
| "epoch": 0.69376, |
| "grad_norm": 0.2144964188337326, |
| "learning_rate": 6.12992e-05, |
| "loss": 0.9026, |
| "step": 43360 |
| }, |
| { |
| "epoch": 0.69408, |
| "grad_norm": 0.21919748187065125, |
| "learning_rate": 6.12352e-05, |
| "loss": 0.901, |
| "step": 43380 |
| }, |
| { |
| "epoch": 0.6944, |
| "grad_norm": 0.19432856142520905, |
| "learning_rate": 6.11712e-05, |
| "loss": 0.9081, |
| "step": 43400 |
| }, |
| { |
| "epoch": 0.69472, |
| "grad_norm": 0.25123217701911926, |
| "learning_rate": 6.11072e-05, |
| "loss": 0.9019, |
| "step": 43420 |
| }, |
| { |
| "epoch": 0.69504, |
| "grad_norm": 0.29103386402130127, |
| "learning_rate": 6.10432e-05, |
| "loss": 0.9118, |
| "step": 43440 |
| }, |
| { |
| "epoch": 0.69536, |
| "grad_norm": 0.2519950270652771, |
| "learning_rate": 6.09792e-05, |
| "loss": 0.8581, |
| "step": 43460 |
| }, |
| { |
| "epoch": 0.69568, |
| "grad_norm": 0.2215908318758011, |
| "learning_rate": 6.09152e-05, |
| "loss": 0.8958, |
| "step": 43480 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 0.23915638029575348, |
| "learning_rate": 6.085120000000001e-05, |
| "loss": 0.8523, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.69632, |
| "grad_norm": 0.2282445728778839, |
| "learning_rate": 6.07872e-05, |
| "loss": 0.8991, |
| "step": 43520 |
| }, |
| { |
| "epoch": 0.69664, |
| "grad_norm": 0.2502846419811249, |
| "learning_rate": 6.07232e-05, |
| "loss": 0.8664, |
| "step": 43540 |
| }, |
| { |
| "epoch": 0.69696, |
| "grad_norm": 0.208401620388031, |
| "learning_rate": 6.0659200000000004e-05, |
| "loss": 0.8687, |
| "step": 43560 |
| }, |
| { |
| "epoch": 0.69728, |
| "grad_norm": 0.20891068875789642, |
| "learning_rate": 6.0595200000000006e-05, |
| "loss": 0.9093, |
| "step": 43580 |
| }, |
| { |
| "epoch": 0.6976, |
| "grad_norm": 0.26877716183662415, |
| "learning_rate": 6.05312e-05, |
| "loss": 0.847, |
| "step": 43600 |
| }, |
| { |
| "epoch": 0.69792, |
| "grad_norm": 0.21002227067947388, |
| "learning_rate": 6.04672e-05, |
| "loss": 0.8587, |
| "step": 43620 |
| }, |
| { |
| "epoch": 0.69824, |
| "grad_norm": 0.19834822416305542, |
| "learning_rate": 6.0403200000000005e-05, |
| "loss": 0.8871, |
| "step": 43640 |
| }, |
| { |
| "epoch": 0.69856, |
| "grad_norm": 0.23921220004558563, |
| "learning_rate": 6.033920000000001e-05, |
| "loss": 0.8202, |
| "step": 43660 |
| }, |
| { |
| "epoch": 0.69888, |
| "grad_norm": 0.23094278573989868, |
| "learning_rate": 6.0275199999999995e-05, |
| "loss": 0.8753, |
| "step": 43680 |
| }, |
| { |
| "epoch": 0.6992, |
| "grad_norm": 0.25601616501808167, |
| "learning_rate": 6.0211200000000004e-05, |
| "loss": 0.8652, |
| "step": 43700 |
| }, |
| { |
| "epoch": 0.69952, |
| "grad_norm": 0.25069522857666016, |
| "learning_rate": 6.0147200000000006e-05, |
| "loss": 0.8942, |
| "step": 43720 |
| }, |
| { |
| "epoch": 0.69984, |
| "grad_norm": 0.19572977721691132, |
| "learning_rate": 6.008320000000001e-05, |
| "loss": 0.9245, |
| "step": 43740 |
| }, |
| { |
| "epoch": 0.70016, |
| "grad_norm": 0.24047626554965973, |
| "learning_rate": 6.0019199999999996e-05, |
| "loss": 0.899, |
| "step": 43760 |
| }, |
| { |
| "epoch": 0.70048, |
| "grad_norm": 0.21386469900608063, |
| "learning_rate": 5.99552e-05, |
| "loss": 0.874, |
| "step": 43780 |
| }, |
| { |
| "epoch": 0.7008, |
| "grad_norm": 0.22829948365688324, |
| "learning_rate": 5.98912e-05, |
| "loss": 0.9313, |
| "step": 43800 |
| }, |
| { |
| "epoch": 0.70112, |
| "grad_norm": 0.23667655885219574, |
| "learning_rate": 5.98272e-05, |
| "loss": 0.8878, |
| "step": 43820 |
| }, |
| { |
| "epoch": 0.70144, |
| "grad_norm": 0.2182048112154007, |
| "learning_rate": 5.976320000000001e-05, |
| "loss": 0.8397, |
| "step": 43840 |
| }, |
| { |
| "epoch": 0.70176, |
| "grad_norm": 0.20164678990840912, |
| "learning_rate": 5.96992e-05, |
| "loss": 0.8768, |
| "step": 43860 |
| }, |
| { |
| "epoch": 0.70208, |
| "grad_norm": 0.23960982263088226, |
| "learning_rate": 5.96352e-05, |
| "loss": 0.8932, |
| "step": 43880 |
| }, |
| { |
| "epoch": 0.7024, |
| "grad_norm": 0.20772390067577362, |
| "learning_rate": 5.95712e-05, |
| "loss": 0.9277, |
| "step": 43900 |
| }, |
| { |
| "epoch": 0.70272, |
| "grad_norm": 0.24492938816547394, |
| "learning_rate": 5.9507200000000005e-05, |
| "loss": 0.8923, |
| "step": 43920 |
| }, |
| { |
| "epoch": 0.70304, |
| "grad_norm": 0.23545905947685242, |
| "learning_rate": 5.94432e-05, |
| "loss": 0.9141, |
| "step": 43940 |
| }, |
| { |
| "epoch": 0.70336, |
| "grad_norm": 0.2978091239929199, |
| "learning_rate": 5.93792e-05, |
| "loss": 0.9643, |
| "step": 43960 |
| }, |
| { |
| "epoch": 0.70368, |
| "grad_norm": 0.19800467789173126, |
| "learning_rate": 5.9315200000000004e-05, |
| "loss": 0.8799, |
| "step": 43980 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.24483546614646912, |
| "learning_rate": 5.9251200000000006e-05, |
| "loss": 0.8587, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.70432, |
| "grad_norm": 0.32727476954460144, |
| "learning_rate": 5.91872e-05, |
| "loss": 0.8876, |
| "step": 44020 |
| }, |
| { |
| "epoch": 0.70464, |
| "grad_norm": 0.21823062002658844, |
| "learning_rate": 5.9123200000000003e-05, |
| "loss": 0.8695, |
| "step": 44040 |
| }, |
| { |
| "epoch": 0.70496, |
| "grad_norm": 0.2308553159236908, |
| "learning_rate": 5.9059200000000005e-05, |
| "loss": 0.8926, |
| "step": 44060 |
| }, |
| { |
| "epoch": 0.70528, |
| "grad_norm": 0.219979926943779, |
| "learning_rate": 5.899520000000001e-05, |
| "loss": 0.9193, |
| "step": 44080 |
| }, |
| { |
| "epoch": 0.7056, |
| "grad_norm": 0.24282580614089966, |
| "learning_rate": 5.8931199999999996e-05, |
| "loss": 0.8812, |
| "step": 44100 |
| }, |
| { |
| "epoch": 0.70592, |
| "grad_norm": 0.20185592770576477, |
| "learning_rate": 5.88672e-05, |
| "loss": 0.8588, |
| "step": 44120 |
| }, |
| { |
| "epoch": 0.70624, |
| "grad_norm": 0.24580541253089905, |
| "learning_rate": 5.88032e-05, |
| "loss": 0.849, |
| "step": 44140 |
| }, |
| { |
| "epoch": 0.70656, |
| "grad_norm": 0.2542431950569153, |
| "learning_rate": 5.873920000000001e-05, |
| "loss": 0.882, |
| "step": 44160 |
| }, |
| { |
| "epoch": 0.70688, |
| "grad_norm": 0.24872715771198273, |
| "learning_rate": 5.867520000000001e-05, |
| "loss": 0.8544, |
| "step": 44180 |
| }, |
| { |
| "epoch": 0.7072, |
| "grad_norm": 0.19842933118343353, |
| "learning_rate": 5.86112e-05, |
| "loss": 0.8803, |
| "step": 44200 |
| }, |
| { |
| "epoch": 0.70752, |
| "grad_norm": 0.2545991539955139, |
| "learning_rate": 5.85472e-05, |
| "loss": 0.9178, |
| "step": 44220 |
| }, |
| { |
| "epoch": 0.70784, |
| "grad_norm": 0.2342890352010727, |
| "learning_rate": 5.84832e-05, |
| "loss": 0.8403, |
| "step": 44240 |
| }, |
| { |
| "epoch": 0.70816, |
| "grad_norm": 0.2353144884109497, |
| "learning_rate": 5.8419200000000005e-05, |
| "loss": 0.9219, |
| "step": 44260 |
| }, |
| { |
| "epoch": 0.70848, |
| "grad_norm": 0.21412351727485657, |
| "learning_rate": 5.83552e-05, |
| "loss": 0.8837, |
| "step": 44280 |
| }, |
| { |
| "epoch": 0.7088, |
| "grad_norm": 0.18827536702156067, |
| "learning_rate": 5.82912e-05, |
| "loss": 0.8567, |
| "step": 44300 |
| }, |
| { |
| "epoch": 0.70912, |
| "grad_norm": 0.23062194883823395, |
| "learning_rate": 5.8227200000000004e-05, |
| "loss": 0.903, |
| "step": 44320 |
| }, |
| { |
| "epoch": 0.70944, |
| "grad_norm": 0.23226912319660187, |
| "learning_rate": 5.8163200000000006e-05, |
| "loss": 0.8446, |
| "step": 44340 |
| }, |
| { |
| "epoch": 0.70976, |
| "grad_norm": 0.23661820590496063, |
| "learning_rate": 5.80992e-05, |
| "loss": 0.9511, |
| "step": 44360 |
| }, |
| { |
| "epoch": 0.71008, |
| "grad_norm": 0.2356158196926117, |
| "learning_rate": 5.80352e-05, |
| "loss": 0.8817, |
| "step": 44380 |
| }, |
| { |
| "epoch": 0.7104, |
| "grad_norm": 0.23160752654075623, |
| "learning_rate": 5.7971200000000005e-05, |
| "loss": 0.8637, |
| "step": 44400 |
| }, |
| { |
| "epoch": 0.71072, |
| "grad_norm": 0.20803622901439667, |
| "learning_rate": 5.790720000000001e-05, |
| "loss": 0.8552, |
| "step": 44420 |
| }, |
| { |
| "epoch": 0.71104, |
| "grad_norm": 0.22061729431152344, |
| "learning_rate": 5.7843199999999995e-05, |
| "loss": 0.8751, |
| "step": 44440 |
| }, |
| { |
| "epoch": 0.71136, |
| "grad_norm": 0.233897864818573, |
| "learning_rate": 5.77792e-05, |
| "loss": 0.8686, |
| "step": 44460 |
| }, |
| { |
| "epoch": 0.71168, |
| "grad_norm": 0.21677446365356445, |
| "learning_rate": 5.77152e-05, |
| "loss": 0.8967, |
| "step": 44480 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 0.24504272639751434, |
| "learning_rate": 5.765120000000001e-05, |
| "loss": 0.8934, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.71232, |
| "grad_norm": 0.21646228432655334, |
| "learning_rate": 5.758720000000001e-05, |
| "loss": 0.8452, |
| "step": 44520 |
| }, |
| { |
| "epoch": 0.71264, |
| "grad_norm": 0.22801847755908966, |
| "learning_rate": 5.75232e-05, |
| "loss": 0.8388, |
| "step": 44540 |
| }, |
| { |
| "epoch": 0.71296, |
| "grad_norm": 0.19865715503692627, |
| "learning_rate": 5.74592e-05, |
| "loss": 0.9077, |
| "step": 44560 |
| }, |
| { |
| "epoch": 0.71328, |
| "grad_norm": 0.24044495820999146, |
| "learning_rate": 5.73952e-05, |
| "loss": 0.9122, |
| "step": 44580 |
| }, |
| { |
| "epoch": 0.7136, |
| "grad_norm": 0.23846623301506042, |
| "learning_rate": 5.7331200000000004e-05, |
| "loss": 0.8906, |
| "step": 44600 |
| }, |
| { |
| "epoch": 0.71392, |
| "grad_norm": 0.21420036256313324, |
| "learning_rate": 5.72672e-05, |
| "loss": 0.877, |
| "step": 44620 |
| }, |
| { |
| "epoch": 0.71424, |
| "grad_norm": 0.2217768281698227, |
| "learning_rate": 5.72032e-05, |
| "loss": 0.8717, |
| "step": 44640 |
| }, |
| { |
| "epoch": 0.71456, |
| "grad_norm": 0.23392203450202942, |
| "learning_rate": 5.7139200000000003e-05, |
| "loss": 0.898, |
| "step": 44660 |
| }, |
| { |
| "epoch": 0.71488, |
| "grad_norm": 0.22015775740146637, |
| "learning_rate": 5.7075200000000005e-05, |
| "loss": 0.8809, |
| "step": 44680 |
| }, |
| { |
| "epoch": 0.7152, |
| "grad_norm": 0.21397672593593597, |
| "learning_rate": 5.70112e-05, |
| "loss": 0.8441, |
| "step": 44700 |
| }, |
| { |
| "epoch": 0.71552, |
| "grad_norm": 0.25773394107818604, |
| "learning_rate": 5.69472e-05, |
| "loss": 0.8623, |
| "step": 44720 |
| }, |
| { |
| "epoch": 0.71584, |
| "grad_norm": 0.24330535531044006, |
| "learning_rate": 5.6883200000000005e-05, |
| "loss": 0.8889, |
| "step": 44740 |
| }, |
| { |
| "epoch": 0.71616, |
| "grad_norm": 0.20773817598819733, |
| "learning_rate": 5.6819200000000006e-05, |
| "loss": 0.8711, |
| "step": 44760 |
| }, |
| { |
| "epoch": 0.71648, |
| "grad_norm": 0.21590672433376312, |
| "learning_rate": 5.6755199999999995e-05, |
| "loss": 0.8426, |
| "step": 44780 |
| }, |
| { |
| "epoch": 0.7168, |
| "grad_norm": 0.1878194808959961, |
| "learning_rate": 5.66912e-05, |
| "loss": 0.873, |
| "step": 44800 |
| }, |
| { |
| "epoch": 0.71712, |
| "grad_norm": 0.2268812656402588, |
| "learning_rate": 5.6627200000000006e-05, |
| "loss": 0.8769, |
| "step": 44820 |
| }, |
| { |
| "epoch": 0.71744, |
| "grad_norm": 0.24054917693138123, |
| "learning_rate": 5.6566399999999994e-05, |
| "loss": 0.8766, |
| "step": 44840 |
| }, |
| { |
| "epoch": 0.71776, |
| "grad_norm": 0.2115447223186493, |
| "learning_rate": 5.65024e-05, |
| "loss": 0.8531, |
| "step": 44860 |
| }, |
| { |
| "epoch": 0.71808, |
| "grad_norm": 0.20368890464305878, |
| "learning_rate": 5.6438400000000005e-05, |
| "loss": 0.8418, |
| "step": 44880 |
| }, |
| { |
| "epoch": 0.7184, |
| "grad_norm": 0.2356366366147995, |
| "learning_rate": 5.637440000000001e-05, |
| "loss": 0.9, |
| "step": 44900 |
| }, |
| { |
| "epoch": 0.71872, |
| "grad_norm": 0.22393269836902618, |
| "learning_rate": 5.631040000000001e-05, |
| "loss": 0.8868, |
| "step": 44920 |
| }, |
| { |
| "epoch": 0.71904, |
| "grad_norm": 0.2569195032119751, |
| "learning_rate": 5.62464e-05, |
| "loss": 0.895, |
| "step": 44940 |
| }, |
| { |
| "epoch": 0.71936, |
| "grad_norm": 0.20783191919326782, |
| "learning_rate": 5.61824e-05, |
| "loss": 0.8627, |
| "step": 44960 |
| }, |
| { |
| "epoch": 0.71968, |
| "grad_norm": 0.2164582461118698, |
| "learning_rate": 5.61184e-05, |
| "loss": 0.8982, |
| "step": 44980 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.38153156638145447, |
| "learning_rate": 5.605440000000001e-05, |
| "loss": 0.9014, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.72032, |
| "grad_norm": 0.2400229126214981, |
| "learning_rate": 5.59904e-05, |
| "loss": 0.8997, |
| "step": 45020 |
| }, |
| { |
| "epoch": 0.72064, |
| "grad_norm": 0.25559934973716736, |
| "learning_rate": 5.59264e-05, |
| "loss": 0.8984, |
| "step": 45040 |
| }, |
| { |
| "epoch": 0.72096, |
| "grad_norm": 0.2528096139431, |
| "learning_rate": 5.58624e-05, |
| "loss": 0.914, |
| "step": 45060 |
| }, |
| { |
| "epoch": 0.72128, |
| "grad_norm": 0.18854907155036926, |
| "learning_rate": 5.5798400000000004e-05, |
| "loss": 0.8876, |
| "step": 45080 |
| }, |
| { |
| "epoch": 0.7216, |
| "grad_norm": 0.22515028715133667, |
| "learning_rate": 5.57344e-05, |
| "loss": 0.8684, |
| "step": 45100 |
| }, |
| { |
| "epoch": 0.72192, |
| "grad_norm": 0.2182977795600891, |
| "learning_rate": 5.56704e-05, |
| "loss": 0.8838, |
| "step": 45120 |
| }, |
| { |
| "epoch": 0.72224, |
| "grad_norm": 0.21532991528511047, |
| "learning_rate": 5.5606400000000003e-05, |
| "loss": 0.8298, |
| "step": 45140 |
| }, |
| { |
| "epoch": 0.72256, |
| "grad_norm": 0.2378109246492386, |
| "learning_rate": 5.5542400000000005e-05, |
| "loss": 0.867, |
| "step": 45160 |
| }, |
| { |
| "epoch": 0.72288, |
| "grad_norm": 0.22187520563602448, |
| "learning_rate": 5.547840000000001e-05, |
| "loss": 0.87, |
| "step": 45180 |
| }, |
| { |
| "epoch": 0.7232, |
| "grad_norm": 0.2259528785943985, |
| "learning_rate": 5.54144e-05, |
| "loss": 0.885, |
| "step": 45200 |
| }, |
| { |
| "epoch": 0.72352, |
| "grad_norm": 0.19351425766944885, |
| "learning_rate": 5.5350400000000005e-05, |
| "loss": 0.8984, |
| "step": 45220 |
| }, |
| { |
| "epoch": 0.72384, |
| "grad_norm": 0.2292325645685196, |
| "learning_rate": 5.5286400000000007e-05, |
| "loss": 0.8794, |
| "step": 45240 |
| }, |
| { |
| "epoch": 0.72416, |
| "grad_norm": 0.20458444952964783, |
| "learning_rate": 5.522240000000001e-05, |
| "loss": 0.8585, |
| "step": 45260 |
| }, |
| { |
| "epoch": 0.72448, |
| "grad_norm": 0.22770562767982483, |
| "learning_rate": 5.51584e-05, |
| "loss": 0.9062, |
| "step": 45280 |
| }, |
| { |
| "epoch": 0.7248, |
| "grad_norm": 0.21661782264709473, |
| "learning_rate": 5.50944e-05, |
| "loss": 0.8842, |
| "step": 45300 |
| }, |
| { |
| "epoch": 0.72512, |
| "grad_norm": 0.19377048313617706, |
| "learning_rate": 5.50304e-05, |
| "loss": 0.8405, |
| "step": 45320 |
| }, |
| { |
| "epoch": 0.72544, |
| "grad_norm": 0.2309509813785553, |
| "learning_rate": 5.496640000000001e-05, |
| "loss": 0.8799, |
| "step": 45340 |
| }, |
| { |
| "epoch": 0.72576, |
| "grad_norm": 0.18839353322982788, |
| "learning_rate": 5.49024e-05, |
| "loss": 0.878, |
| "step": 45360 |
| }, |
| { |
| "epoch": 0.72608, |
| "grad_norm": 0.248517245054245, |
| "learning_rate": 5.48384e-05, |
| "loss": 0.9094, |
| "step": 45380 |
| }, |
| { |
| "epoch": 0.7264, |
| "grad_norm": 0.21810860931873322, |
| "learning_rate": 5.47744e-05, |
| "loss": 0.8679, |
| "step": 45400 |
| }, |
| { |
| "epoch": 0.72672, |
| "grad_norm": 0.2429954707622528, |
| "learning_rate": 5.4710400000000004e-05, |
| "loss": 0.8592, |
| "step": 45420 |
| }, |
| { |
| "epoch": 0.72704, |
| "grad_norm": 0.20929422974586487, |
| "learning_rate": 5.46464e-05, |
| "loss": 0.9011, |
| "step": 45440 |
| }, |
| { |
| "epoch": 0.72736, |
| "grad_norm": 0.2323046624660492, |
| "learning_rate": 5.45824e-05, |
| "loss": 0.8614, |
| "step": 45460 |
| }, |
| { |
| "epoch": 0.72768, |
| "grad_norm": 0.22738327085971832, |
| "learning_rate": 5.45184e-05, |
| "loss": 0.8795, |
| "step": 45480 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 0.2241695523262024, |
| "learning_rate": 5.4454400000000005e-05, |
| "loss": 0.9009, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.72832, |
| "grad_norm": 0.21020178496837616, |
| "learning_rate": 5.439040000000001e-05, |
| "loss": 0.8568, |
| "step": 45520 |
| }, |
| { |
| "epoch": 0.72864, |
| "grad_norm": 0.24524196982383728, |
| "learning_rate": 5.43264e-05, |
| "loss": 0.8667, |
| "step": 45540 |
| }, |
| { |
| "epoch": 0.72896, |
| "grad_norm": 0.2374972701072693, |
| "learning_rate": 5.4262400000000004e-05, |
| "loss": 0.9086, |
| "step": 45560 |
| }, |
| { |
| "epoch": 0.72928, |
| "grad_norm": 0.2362067550420761, |
| "learning_rate": 5.4198400000000006e-05, |
| "loss": 0.876, |
| "step": 45580 |
| }, |
| { |
| "epoch": 0.7296, |
| "grad_norm": 0.21441881358623505, |
| "learning_rate": 5.413440000000001e-05, |
| "loss": 0.8941, |
| "step": 45600 |
| }, |
| { |
| "epoch": 0.72992, |
| "grad_norm": 0.22504673898220062, |
| "learning_rate": 5.4070399999999996e-05, |
| "loss": 0.9268, |
| "step": 45620 |
| }, |
| { |
| "epoch": 0.73024, |
| "grad_norm": 0.20583686232566833, |
| "learning_rate": 5.40064e-05, |
| "loss": 0.8298, |
| "step": 45640 |
| }, |
| { |
| "epoch": 0.73056, |
| "grad_norm": 0.21706163883209229, |
| "learning_rate": 5.394240000000001e-05, |
| "loss": 0.8833, |
| "step": 45660 |
| }, |
| { |
| "epoch": 0.73088, |
| "grad_norm": 0.202799990773201, |
| "learning_rate": 5.387840000000001e-05, |
| "loss": 0.8792, |
| "step": 45680 |
| }, |
| { |
| "epoch": 0.7312, |
| "grad_norm": 0.2602541446685791, |
| "learning_rate": 5.38144e-05, |
| "loss": 0.9037, |
| "step": 45700 |
| }, |
| { |
| "epoch": 0.73152, |
| "grad_norm": 0.22036013007164001, |
| "learning_rate": 5.37504e-05, |
| "loss": 0.8917, |
| "step": 45720 |
| }, |
| { |
| "epoch": 0.73184, |
| "grad_norm": 0.22023898363113403, |
| "learning_rate": 5.36864e-05, |
| "loss": 0.8671, |
| "step": 45740 |
| }, |
| { |
| "epoch": 0.73216, |
| "grad_norm": 0.23420779407024384, |
| "learning_rate": 5.3622400000000003e-05, |
| "loss": 0.9288, |
| "step": 45760 |
| }, |
| { |
| "epoch": 0.73248, |
| "grad_norm": 0.20039279758930206, |
| "learning_rate": 5.35584e-05, |
| "loss": 0.8755, |
| "step": 45780 |
| }, |
| { |
| "epoch": 0.7328, |
| "grad_norm": 0.2586964964866638, |
| "learning_rate": 5.34944e-05, |
| "loss": 0.8896, |
| "step": 45800 |
| }, |
| { |
| "epoch": 0.73312, |
| "grad_norm": 0.2525421380996704, |
| "learning_rate": 5.34304e-05, |
| "loss": 0.8514, |
| "step": 45820 |
| }, |
| { |
| "epoch": 0.73344, |
| "grad_norm": 0.2144252359867096, |
| "learning_rate": 5.3366400000000005e-05, |
| "loss": 0.9074, |
| "step": 45840 |
| }, |
| { |
| "epoch": 0.73376, |
| "grad_norm": 0.21878720819950104, |
| "learning_rate": 5.3302400000000007e-05, |
| "loss": 0.9488, |
| "step": 45860 |
| }, |
| { |
| "epoch": 0.73408, |
| "grad_norm": 0.24089403450489044, |
| "learning_rate": 5.32384e-05, |
| "loss": 0.9008, |
| "step": 45880 |
| }, |
| { |
| "epoch": 0.7344, |
| "grad_norm": 0.25092679262161255, |
| "learning_rate": 5.3174400000000004e-05, |
| "loss": 0.8905, |
| "step": 45900 |
| }, |
| { |
| "epoch": 0.73472, |
| "grad_norm": 0.24005566537380219, |
| "learning_rate": 5.3110400000000006e-05, |
| "loss": 0.8812, |
| "step": 45920 |
| }, |
| { |
| "epoch": 0.73504, |
| "grad_norm": 0.2381397932767868, |
| "learning_rate": 5.304640000000001e-05, |
| "loss": 0.8944, |
| "step": 45940 |
| }, |
| { |
| "epoch": 0.73536, |
| "grad_norm": 0.23799841105937958, |
| "learning_rate": 5.2982399999999996e-05, |
| "loss": 0.8684, |
| "step": 45960 |
| }, |
| { |
| "epoch": 0.73568, |
| "grad_norm": 0.22827275097370148, |
| "learning_rate": 5.29184e-05, |
| "loss": 0.8657, |
| "step": 45980 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.24510063230991364, |
| "learning_rate": 5.285440000000001e-05, |
| "loss": 0.9598, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.73632, |
| "grad_norm": 0.22655485570430756, |
| "learning_rate": 5.279040000000001e-05, |
| "loss": 0.8511, |
| "step": 46020 |
| }, |
| { |
| "epoch": 0.73664, |
| "grad_norm": 0.23105552792549133, |
| "learning_rate": 5.27264e-05, |
| "loss": 0.8809, |
| "step": 46040 |
| }, |
| { |
| "epoch": 0.73696, |
| "grad_norm": 0.21182331442832947, |
| "learning_rate": 5.26624e-05, |
| "loss": 0.8786, |
| "step": 46060 |
| }, |
| { |
| "epoch": 0.73728, |
| "grad_norm": 0.2535363435745239, |
| "learning_rate": 5.25984e-05, |
| "loss": 0.8697, |
| "step": 46080 |
| }, |
| { |
| "epoch": 0.7376, |
| "grad_norm": 0.2286081463098526, |
| "learning_rate": 5.25344e-05, |
| "loss": 0.8694, |
| "step": 46100 |
| }, |
| { |
| "epoch": 0.73792, |
| "grad_norm": 0.2347458302974701, |
| "learning_rate": 5.24704e-05, |
| "loss": 0.9251, |
| "step": 46120 |
| }, |
| { |
| "epoch": 0.73824, |
| "grad_norm": 0.21052898466587067, |
| "learning_rate": 5.24064e-05, |
| "loss": 0.8944, |
| "step": 46140 |
| }, |
| { |
| "epoch": 0.73856, |
| "grad_norm": 0.23154202103614807, |
| "learning_rate": 5.23424e-05, |
| "loss": 0.8367, |
| "step": 46160 |
| }, |
| { |
| "epoch": 0.73888, |
| "grad_norm": 0.23162192106246948, |
| "learning_rate": 5.2278400000000004e-05, |
| "loss": 0.8762, |
| "step": 46180 |
| }, |
| { |
| "epoch": 0.7392, |
| "grad_norm": 0.2610846757888794, |
| "learning_rate": 5.2214400000000006e-05, |
| "loss": 0.9275, |
| "step": 46200 |
| }, |
| { |
| "epoch": 0.73952, |
| "grad_norm": 0.20983512699604034, |
| "learning_rate": 5.21504e-05, |
| "loss": 0.8411, |
| "step": 46220 |
| }, |
| { |
| "epoch": 0.73984, |
| "grad_norm": 0.27023133635520935, |
| "learning_rate": 5.20864e-05, |
| "loss": 0.8825, |
| "step": 46240 |
| }, |
| { |
| "epoch": 0.74016, |
| "grad_norm": 0.22272150218486786, |
| "learning_rate": 5.2022400000000005e-05, |
| "loss": 0.8826, |
| "step": 46260 |
| }, |
| { |
| "epoch": 0.74048, |
| "grad_norm": 0.22320957481861115, |
| "learning_rate": 5.195840000000001e-05, |
| "loss": 0.8204, |
| "step": 46280 |
| }, |
| { |
| "epoch": 0.7408, |
| "grad_norm": 0.20854775607585907, |
| "learning_rate": 5.1894399999999996e-05, |
| "loss": 0.8643, |
| "step": 46300 |
| }, |
| { |
| "epoch": 0.74112, |
| "grad_norm": 0.23853574693202972, |
| "learning_rate": 5.1830400000000004e-05, |
| "loss": 0.8504, |
| "step": 46320 |
| }, |
| { |
| "epoch": 0.74144, |
| "grad_norm": 0.2031133770942688, |
| "learning_rate": 5.1766400000000006e-05, |
| "loss": 0.8798, |
| "step": 46340 |
| }, |
| { |
| "epoch": 0.74176, |
| "grad_norm": 0.23090733587741852, |
| "learning_rate": 5.170240000000001e-05, |
| "loss": 0.8771, |
| "step": 46360 |
| }, |
| { |
| "epoch": 0.74208, |
| "grad_norm": 0.22893227636814117, |
| "learning_rate": 5.16384e-05, |
| "loss": 0.9161, |
| "step": 46380 |
| }, |
| { |
| "epoch": 0.7424, |
| "grad_norm": 0.24600179493427277, |
| "learning_rate": 5.15744e-05, |
| "loss": 0.9158, |
| "step": 46400 |
| }, |
| { |
| "epoch": 0.74272, |
| "grad_norm": 0.22234416007995605, |
| "learning_rate": 5.15104e-05, |
| "loss": 0.9224, |
| "step": 46420 |
| }, |
| { |
| "epoch": 0.74304, |
| "grad_norm": 0.22974424064159393, |
| "learning_rate": 5.14464e-05, |
| "loss": 0.8871, |
| "step": 46440 |
| }, |
| { |
| "epoch": 0.74336, |
| "grad_norm": 0.2495729774236679, |
| "learning_rate": 5.13824e-05, |
| "loss": 0.9271, |
| "step": 46460 |
| }, |
| { |
| "epoch": 0.74368, |
| "grad_norm": 0.2178795486688614, |
| "learning_rate": 5.13184e-05, |
| "loss": 0.86, |
| "step": 46480 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 0.2375311255455017, |
| "learning_rate": 5.12544e-05, |
| "loss": 0.8524, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.74432, |
| "grad_norm": 0.21281583607196808, |
| "learning_rate": 5.1190400000000004e-05, |
| "loss": 0.9261, |
| "step": 46520 |
| }, |
| { |
| "epoch": 0.74464, |
| "grad_norm": 0.26535019278526306, |
| "learning_rate": 5.1126400000000006e-05, |
| "loss": 0.9067, |
| "step": 46540 |
| }, |
| { |
| "epoch": 0.74496, |
| "grad_norm": 0.1832839846611023, |
| "learning_rate": 5.10624e-05, |
| "loss": 0.8801, |
| "step": 46560 |
| }, |
| { |
| "epoch": 0.74528, |
| "grad_norm": 0.21736547350883484, |
| "learning_rate": 5.09984e-05, |
| "loss": 0.8953, |
| "step": 46580 |
| }, |
| { |
| "epoch": 0.7456, |
| "grad_norm": 0.2637736201286316, |
| "learning_rate": 5.0934400000000005e-05, |
| "loss": 0.8551, |
| "step": 46600 |
| }, |
| { |
| "epoch": 0.74592, |
| "grad_norm": 0.19663706421852112, |
| "learning_rate": 5.087040000000001e-05, |
| "loss": 0.9004, |
| "step": 46620 |
| }, |
| { |
| "epoch": 0.74624, |
| "grad_norm": 0.21443675458431244, |
| "learning_rate": 5.0806399999999995e-05, |
| "loss": 0.8947, |
| "step": 46640 |
| }, |
| { |
| "epoch": 0.74656, |
| "grad_norm": 0.2313489317893982, |
| "learning_rate": 5.0742400000000004e-05, |
| "loss": 0.8798, |
| "step": 46660 |
| }, |
| { |
| "epoch": 0.74688, |
| "grad_norm": 0.2411520630121231, |
| "learning_rate": 5.0678400000000006e-05, |
| "loss": 0.8833, |
| "step": 46680 |
| }, |
| { |
| "epoch": 0.7472, |
| "grad_norm": 0.24178458750247955, |
| "learning_rate": 5.061440000000001e-05, |
| "loss": 0.8768, |
| "step": 46700 |
| }, |
| { |
| "epoch": 0.74752, |
| "grad_norm": 0.24031583964824677, |
| "learning_rate": 5.0550399999999996e-05, |
| "loss": 0.9053, |
| "step": 46720 |
| }, |
| { |
| "epoch": 0.74784, |
| "grad_norm": 0.24462060630321503, |
| "learning_rate": 5.04864e-05, |
| "loss": 0.8677, |
| "step": 46740 |
| }, |
| { |
| "epoch": 0.74816, |
| "grad_norm": 0.18988333642482758, |
| "learning_rate": 5.04224e-05, |
| "loss": 0.877, |
| "step": 46760 |
| }, |
| { |
| "epoch": 0.74848, |
| "grad_norm": 0.23754200339317322, |
| "learning_rate": 5.03584e-05, |
| "loss": 0.8808, |
| "step": 46780 |
| }, |
| { |
| "epoch": 0.7488, |
| "grad_norm": 0.2371503710746765, |
| "learning_rate": 5.02944e-05, |
| "loss": 0.9283, |
| "step": 46800 |
| }, |
| { |
| "epoch": 0.74912, |
| "grad_norm": 0.21101176738739014, |
| "learning_rate": 5.02304e-05, |
| "loss": 0.8769, |
| "step": 46820 |
| }, |
| { |
| "epoch": 0.74944, |
| "grad_norm": 0.23707903921604156, |
| "learning_rate": 5.01664e-05, |
| "loss": 0.876, |
| "step": 46840 |
| }, |
| { |
| "epoch": 0.74976, |
| "grad_norm": 0.25081855058670044, |
| "learning_rate": 5.01024e-05, |
| "loss": 0.8429, |
| "step": 46860 |
| }, |
| { |
| "epoch": 0.75008, |
| "grad_norm": 0.2517668604850769, |
| "learning_rate": 5.0038400000000005e-05, |
| "loss": 0.8936, |
| "step": 46880 |
| }, |
| { |
| "epoch": 0.7504, |
| "grad_norm": 0.2321518510580063, |
| "learning_rate": 4.997440000000001e-05, |
| "loss": 0.9427, |
| "step": 46900 |
| }, |
| { |
| "epoch": 0.75072, |
| "grad_norm": 0.3038017749786377, |
| "learning_rate": 4.99104e-05, |
| "loss": 0.9116, |
| "step": 46920 |
| }, |
| { |
| "epoch": 0.75104, |
| "grad_norm": 0.22047431766986847, |
| "learning_rate": 4.9846400000000004e-05, |
| "loss": 0.9356, |
| "step": 46940 |
| }, |
| { |
| "epoch": 0.75136, |
| "grad_norm": 0.2446911334991455, |
| "learning_rate": 4.97824e-05, |
| "loss": 0.8855, |
| "step": 46960 |
| }, |
| { |
| "epoch": 0.75168, |
| "grad_norm": 0.23208874464035034, |
| "learning_rate": 4.97184e-05, |
| "loss": 0.9064, |
| "step": 46980 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.22263742983341217, |
| "learning_rate": 4.9654400000000004e-05, |
| "loss": 0.9262, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.75232, |
| "grad_norm": 0.24141012132167816, |
| "learning_rate": 4.9590400000000006e-05, |
| "loss": 0.8755, |
| "step": 47020 |
| }, |
| { |
| "epoch": 0.75264, |
| "grad_norm": 0.21403875946998596, |
| "learning_rate": 4.95264e-05, |
| "loss": 0.9046, |
| "step": 47040 |
| }, |
| { |
| "epoch": 0.75296, |
| "grad_norm": 0.22028857469558716, |
| "learning_rate": 4.94624e-05, |
| "loss": 0.8604, |
| "step": 47060 |
| }, |
| { |
| "epoch": 0.75328, |
| "grad_norm": 0.23686060309410095, |
| "learning_rate": 4.93984e-05, |
| "loss": 0.9034, |
| "step": 47080 |
| }, |
| { |
| "epoch": 0.7536, |
| "grad_norm": 0.21621714532375336, |
| "learning_rate": 4.93344e-05, |
| "loss": 0.8753, |
| "step": 47100 |
| }, |
| { |
| "epoch": 0.75392, |
| "grad_norm": 0.19985179603099823, |
| "learning_rate": 4.92704e-05, |
| "loss": 0.8533, |
| "step": 47120 |
| }, |
| { |
| "epoch": 0.75424, |
| "grad_norm": 0.25167474150657654, |
| "learning_rate": 4.9206400000000004e-05, |
| "loss": 0.899, |
| "step": 47140 |
| }, |
| { |
| "epoch": 0.75456, |
| "grad_norm": 0.22282272577285767, |
| "learning_rate": 4.9142400000000006e-05, |
| "loss": 0.8312, |
| "step": 47160 |
| }, |
| { |
| "epoch": 0.75488, |
| "grad_norm": 0.219001904129982, |
| "learning_rate": 4.90784e-05, |
| "loss": 0.9131, |
| "step": 47180 |
| }, |
| { |
| "epoch": 0.7552, |
| "grad_norm": 0.244069442152977, |
| "learning_rate": 4.90144e-05, |
| "loss": 0.8767, |
| "step": 47200 |
| }, |
| { |
| "epoch": 0.75552, |
| "grad_norm": 0.2010125070810318, |
| "learning_rate": 4.8950400000000005e-05, |
| "loss": 0.8617, |
| "step": 47220 |
| }, |
| { |
| "epoch": 0.75584, |
| "grad_norm": 0.19826588034629822, |
| "learning_rate": 4.888640000000001e-05, |
| "loss": 0.8655, |
| "step": 47240 |
| }, |
| { |
| "epoch": 0.75616, |
| "grad_norm": 0.2718552052974701, |
| "learning_rate": 4.88224e-05, |
| "loss": 0.8918, |
| "step": 47260 |
| }, |
| { |
| "epoch": 0.75648, |
| "grad_norm": 0.1939408779144287, |
| "learning_rate": 4.8758400000000004e-05, |
| "loss": 0.8851, |
| "step": 47280 |
| }, |
| { |
| "epoch": 0.7568, |
| "grad_norm": 0.2180645763874054, |
| "learning_rate": 4.86944e-05, |
| "loss": 0.8959, |
| "step": 47300 |
| }, |
| { |
| "epoch": 0.75712, |
| "grad_norm": 0.18682503700256348, |
| "learning_rate": 4.86304e-05, |
| "loss": 0.8892, |
| "step": 47320 |
| }, |
| { |
| "epoch": 0.75744, |
| "grad_norm": 0.24464449286460876, |
| "learning_rate": 4.85664e-05, |
| "loss": 0.8513, |
| "step": 47340 |
| }, |
| { |
| "epoch": 0.75776, |
| "grad_norm": 0.22836542129516602, |
| "learning_rate": 4.8502400000000005e-05, |
| "loss": 0.7955, |
| "step": 47360 |
| }, |
| { |
| "epoch": 0.75808, |
| "grad_norm": 0.236654594540596, |
| "learning_rate": 4.84384e-05, |
| "loss": 0.9108, |
| "step": 47380 |
| }, |
| { |
| "epoch": 0.7584, |
| "grad_norm": 0.23842214047908783, |
| "learning_rate": 4.83744e-05, |
| "loss": 0.9084, |
| "step": 47400 |
| }, |
| { |
| "epoch": 0.75872, |
| "grad_norm": 0.26700836420059204, |
| "learning_rate": 4.83104e-05, |
| "loss": 0.8792, |
| "step": 47420 |
| }, |
| { |
| "epoch": 0.75904, |
| "grad_norm": 0.19707651436328888, |
| "learning_rate": 4.82464e-05, |
| "loss": 0.885, |
| "step": 47440 |
| }, |
| { |
| "epoch": 0.75936, |
| "grad_norm": 0.2712419927120209, |
| "learning_rate": 4.81824e-05, |
| "loss": 0.891, |
| "step": 47460 |
| }, |
| { |
| "epoch": 0.75968, |
| "grad_norm": 0.22818304598331451, |
| "learning_rate": 4.81184e-05, |
| "loss": 0.8668, |
| "step": 47480 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.24705687165260315, |
| "learning_rate": 4.8054400000000005e-05, |
| "loss": 0.853, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.76032, |
| "grad_norm": 0.2136003077030182, |
| "learning_rate": 4.79904e-05, |
| "loss": 0.8795, |
| "step": 47520 |
| }, |
| { |
| "epoch": 0.76064, |
| "grad_norm": 0.22492119669914246, |
| "learning_rate": 4.79264e-05, |
| "loss": 0.8962, |
| "step": 47540 |
| }, |
| { |
| "epoch": 0.76096, |
| "grad_norm": 0.21469563245773315, |
| "learning_rate": 4.7862400000000004e-05, |
| "loss": 0.8804, |
| "step": 47560 |
| }, |
| { |
| "epoch": 0.76128, |
| "grad_norm": 0.229572594165802, |
| "learning_rate": 4.7798400000000006e-05, |
| "loss": 0.898, |
| "step": 47580 |
| }, |
| { |
| "epoch": 0.7616, |
| "grad_norm": 0.2185087352991104, |
| "learning_rate": 4.77344e-05, |
| "loss": 0.9312, |
| "step": 47600 |
| }, |
| { |
| "epoch": 0.76192, |
| "grad_norm": 0.24852368235588074, |
| "learning_rate": 4.7670400000000004e-05, |
| "loss": 0.8916, |
| "step": 47620 |
| }, |
| { |
| "epoch": 0.76224, |
| "grad_norm": 0.20700128376483917, |
| "learning_rate": 4.76064e-05, |
| "loss": 0.8553, |
| "step": 47640 |
| }, |
| { |
| "epoch": 0.76256, |
| "grad_norm": 0.1880226880311966, |
| "learning_rate": 4.75424e-05, |
| "loss": 0.8134, |
| "step": 47660 |
| }, |
| { |
| "epoch": 0.76288, |
| "grad_norm": 0.24719256162643433, |
| "learning_rate": 4.74784e-05, |
| "loss": 0.9169, |
| "step": 47680 |
| }, |
| { |
| "epoch": 0.7632, |
| "grad_norm": 0.2389199137687683, |
| "learning_rate": 4.7414400000000005e-05, |
| "loss": 0.8902, |
| "step": 47700 |
| }, |
| { |
| "epoch": 0.76352, |
| "grad_norm": 0.26046285033226013, |
| "learning_rate": 4.73504e-05, |
| "loss": 0.9075, |
| "step": 47720 |
| }, |
| { |
| "epoch": 0.76384, |
| "grad_norm": 0.2406904399394989, |
| "learning_rate": 4.72864e-05, |
| "loss": 0.8481, |
| "step": 47740 |
| }, |
| { |
| "epoch": 0.76416, |
| "grad_norm": 0.24346570670604706, |
| "learning_rate": 4.72224e-05, |
| "loss": 0.8482, |
| "step": 47760 |
| }, |
| { |
| "epoch": 0.76448, |
| "grad_norm": 0.2557404935359955, |
| "learning_rate": 4.7158400000000006e-05, |
| "loss": 0.9096, |
| "step": 47780 |
| }, |
| { |
| "epoch": 0.7648, |
| "grad_norm": 0.22144544124603271, |
| "learning_rate": 4.70944e-05, |
| "loss": 0.8489, |
| "step": 47800 |
| }, |
| { |
| "epoch": 0.76512, |
| "grad_norm": 0.2356208860874176, |
| "learning_rate": 4.70304e-05, |
| "loss": 0.8703, |
| "step": 47820 |
| }, |
| { |
| "epoch": 0.76544, |
| "grad_norm": 0.20454536378383636, |
| "learning_rate": 4.6966400000000005e-05, |
| "loss": 0.9135, |
| "step": 47840 |
| }, |
| { |
| "epoch": 0.76576, |
| "grad_norm": 0.2013743370771408, |
| "learning_rate": 4.69024e-05, |
| "loss": 0.8641, |
| "step": 47860 |
| }, |
| { |
| "epoch": 0.76608, |
| "grad_norm": 0.20594638586044312, |
| "learning_rate": 4.68384e-05, |
| "loss": 0.9012, |
| "step": 47880 |
| }, |
| { |
| "epoch": 0.7664, |
| "grad_norm": 0.23454588651657104, |
| "learning_rate": 4.6774400000000004e-05, |
| "loss": 0.8529, |
| "step": 47900 |
| }, |
| { |
| "epoch": 0.76672, |
| "grad_norm": 0.2404514104127884, |
| "learning_rate": 4.6710400000000006e-05, |
| "loss": 0.9, |
| "step": 47920 |
| }, |
| { |
| "epoch": 0.76704, |
| "grad_norm": 0.21869786083698273, |
| "learning_rate": 4.66496e-05, |
| "loss": 0.8843, |
| "step": 47940 |
| }, |
| { |
| "epoch": 0.76736, |
| "grad_norm": 0.228584423661232, |
| "learning_rate": 4.65856e-05, |
| "loss": 0.935, |
| "step": 47960 |
| }, |
| { |
| "epoch": 0.76768, |
| "grad_norm": 0.2123897522687912, |
| "learning_rate": 4.6521600000000005e-05, |
| "loss": 0.8897, |
| "step": 47980 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.27578243613243103, |
| "learning_rate": 4.64576e-05, |
| "loss": 0.8803, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.76832, |
| "grad_norm": 0.2598460614681244, |
| "learning_rate": 4.63936e-05, |
| "loss": 0.8164, |
| "step": 48020 |
| }, |
| { |
| "epoch": 0.76864, |
| "grad_norm": 0.21342791616916656, |
| "learning_rate": 4.63296e-05, |
| "loss": 0.9123, |
| "step": 48040 |
| }, |
| { |
| "epoch": 0.76896, |
| "grad_norm": 0.2282058596611023, |
| "learning_rate": 4.6265600000000006e-05, |
| "loss": 0.8956, |
| "step": 48060 |
| }, |
| { |
| "epoch": 0.76928, |
| "grad_norm": 0.21980886161327362, |
| "learning_rate": 4.62016e-05, |
| "loss": 0.8567, |
| "step": 48080 |
| }, |
| { |
| "epoch": 0.7696, |
| "grad_norm": 0.24570724368095398, |
| "learning_rate": 4.6137600000000004e-05, |
| "loss": 0.9293, |
| "step": 48100 |
| }, |
| { |
| "epoch": 0.76992, |
| "grad_norm": 0.21538405120372772, |
| "learning_rate": 4.60736e-05, |
| "loss": 0.8453, |
| "step": 48120 |
| }, |
| { |
| "epoch": 0.77024, |
| "grad_norm": 0.19840775430202484, |
| "learning_rate": 4.60096e-05, |
| "loss": 0.8911, |
| "step": 48140 |
| }, |
| { |
| "epoch": 0.77056, |
| "grad_norm": 0.24362660944461823, |
| "learning_rate": 4.59456e-05, |
| "loss": 0.8839, |
| "step": 48160 |
| }, |
| { |
| "epoch": 0.77088, |
| "grad_norm": 0.23664100468158722, |
| "learning_rate": 4.5881600000000005e-05, |
| "loss": 0.9147, |
| "step": 48180 |
| }, |
| { |
| "epoch": 0.7712, |
| "grad_norm": 0.22470878064632416, |
| "learning_rate": 4.581760000000001e-05, |
| "loss": 0.9081, |
| "step": 48200 |
| }, |
| { |
| "epoch": 0.77152, |
| "grad_norm": 0.25879278779029846, |
| "learning_rate": 4.57536e-05, |
| "loss": 0.8678, |
| "step": 48220 |
| }, |
| { |
| "epoch": 0.77184, |
| "grad_norm": 0.22820644080638885, |
| "learning_rate": 4.5689600000000004e-05, |
| "loss": 0.8437, |
| "step": 48240 |
| }, |
| { |
| "epoch": 0.77216, |
| "grad_norm": 0.24052444100379944, |
| "learning_rate": 4.56256e-05, |
| "loss": 0.8222, |
| "step": 48260 |
| }, |
| { |
| "epoch": 0.77248, |
| "grad_norm": 0.2304847240447998, |
| "learning_rate": 4.55616e-05, |
| "loss": 0.9303, |
| "step": 48280 |
| }, |
| { |
| "epoch": 0.7728, |
| "grad_norm": 0.2518431544303894, |
| "learning_rate": 4.54976e-05, |
| "loss": 0.8785, |
| "step": 48300 |
| }, |
| { |
| "epoch": 0.77312, |
| "grad_norm": 0.2376391738653183, |
| "learning_rate": 4.5433600000000005e-05, |
| "loss": 0.8469, |
| "step": 48320 |
| }, |
| { |
| "epoch": 0.77344, |
| "grad_norm": 0.24182195961475372, |
| "learning_rate": 4.53696e-05, |
| "loss": 0.916, |
| "step": 48340 |
| }, |
| { |
| "epoch": 0.77376, |
| "grad_norm": 0.25106081366539, |
| "learning_rate": 4.53056e-05, |
| "loss": 0.8624, |
| "step": 48360 |
| }, |
| { |
| "epoch": 0.77408, |
| "grad_norm": 0.22700931131839752, |
| "learning_rate": 4.52416e-05, |
| "loss": 0.9024, |
| "step": 48380 |
| }, |
| { |
| "epoch": 0.7744, |
| "grad_norm": 0.21767041087150574, |
| "learning_rate": 4.5177600000000006e-05, |
| "loss": 0.9082, |
| "step": 48400 |
| }, |
| { |
| "epoch": 0.77472, |
| "grad_norm": 0.2539537250995636, |
| "learning_rate": 4.51136e-05, |
| "loss": 0.8893, |
| "step": 48420 |
| }, |
| { |
| "epoch": 0.77504, |
| "grad_norm": 0.27352043986320496, |
| "learning_rate": 4.50496e-05, |
| "loss": 0.8678, |
| "step": 48440 |
| }, |
| { |
| "epoch": 0.77536, |
| "grad_norm": 0.22831988334655762, |
| "learning_rate": 4.49856e-05, |
| "loss": 0.8654, |
| "step": 48460 |
| }, |
| { |
| "epoch": 0.77568, |
| "grad_norm": 0.24554172158241272, |
| "learning_rate": 4.49216e-05, |
| "loss": 0.8604, |
| "step": 48480 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 0.22556883096694946, |
| "learning_rate": 4.48576e-05, |
| "loss": 0.9112, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.77632, |
| "grad_norm": 0.22238677740097046, |
| "learning_rate": 4.4793600000000004e-05, |
| "loss": 0.8963, |
| "step": 48520 |
| }, |
| { |
| "epoch": 0.77664, |
| "grad_norm": 0.18963344395160675, |
| "learning_rate": 4.4729600000000006e-05, |
| "loss": 0.8464, |
| "step": 48540 |
| }, |
| { |
| "epoch": 0.77696, |
| "grad_norm": 0.21553830802440643, |
| "learning_rate": 4.46656e-05, |
| "loss": 0.8701, |
| "step": 48560 |
| }, |
| { |
| "epoch": 0.77728, |
| "grad_norm": 0.25254547595977783, |
| "learning_rate": 4.4601600000000003e-05, |
| "loss": 0.8503, |
| "step": 48580 |
| }, |
| { |
| "epoch": 0.7776, |
| "grad_norm": 0.21796059608459473, |
| "learning_rate": 4.45376e-05, |
| "loss": 0.8817, |
| "step": 48600 |
| }, |
| { |
| "epoch": 0.77792, |
| "grad_norm": 0.19532719254493713, |
| "learning_rate": 4.447360000000001e-05, |
| "loss": 0.9274, |
| "step": 48620 |
| }, |
| { |
| "epoch": 0.77824, |
| "grad_norm": 0.19615907967090607, |
| "learning_rate": 4.44096e-05, |
| "loss": 0.8781, |
| "step": 48640 |
| }, |
| { |
| "epoch": 0.77856, |
| "grad_norm": 0.20943795144557953, |
| "learning_rate": 4.4345600000000004e-05, |
| "loss": 0.8958, |
| "step": 48660 |
| }, |
| { |
| "epoch": 0.77888, |
| "grad_norm": 0.1854809671640396, |
| "learning_rate": 4.42816e-05, |
| "loss": 0.8712, |
| "step": 48680 |
| }, |
| { |
| "epoch": 0.7792, |
| "grad_norm": 0.23485055565834045, |
| "learning_rate": 4.42208e-05, |
| "loss": 0.8504, |
| "step": 48700 |
| }, |
| { |
| "epoch": 0.77952, |
| "grad_norm": 0.2727571725845337, |
| "learning_rate": 4.4156800000000004e-05, |
| "loss": 0.8882, |
| "step": 48720 |
| }, |
| { |
| "epoch": 0.77984, |
| "grad_norm": 0.2016323059797287, |
| "learning_rate": 4.40928e-05, |
| "loss": 0.8817, |
| "step": 48740 |
| }, |
| { |
| "epoch": 0.78016, |
| "grad_norm": 0.22555996477603912, |
| "learning_rate": 4.40288e-05, |
| "loss": 0.8778, |
| "step": 48760 |
| }, |
| { |
| "epoch": 0.78048, |
| "grad_norm": 0.19512006640434265, |
| "learning_rate": 4.39648e-05, |
| "loss": 0.8641, |
| "step": 48780 |
| }, |
| { |
| "epoch": 0.7808, |
| "grad_norm": 0.21679182350635529, |
| "learning_rate": 4.3900800000000005e-05, |
| "loss": 0.8819, |
| "step": 48800 |
| }, |
| { |
| "epoch": 0.78112, |
| "grad_norm": 0.23714877665042877, |
| "learning_rate": 4.38368e-05, |
| "loss": 0.8771, |
| "step": 48820 |
| }, |
| { |
| "epoch": 0.78144, |
| "grad_norm": 0.2215253859758377, |
| "learning_rate": 4.37728e-05, |
| "loss": 0.8592, |
| "step": 48840 |
| }, |
| { |
| "epoch": 0.78176, |
| "grad_norm": 0.21237672865390778, |
| "learning_rate": 4.37088e-05, |
| "loss": 0.8735, |
| "step": 48860 |
| }, |
| { |
| "epoch": 0.78208, |
| "grad_norm": 0.24682950973510742, |
| "learning_rate": 4.36448e-05, |
| "loss": 0.8988, |
| "step": 48880 |
| }, |
| { |
| "epoch": 0.7824, |
| "grad_norm": 0.27274882793426514, |
| "learning_rate": 4.35808e-05, |
| "loss": 0.9076, |
| "step": 48900 |
| }, |
| { |
| "epoch": 0.78272, |
| "grad_norm": 0.20632825791835785, |
| "learning_rate": 4.35168e-05, |
| "loss": 0.8541, |
| "step": 48920 |
| }, |
| { |
| "epoch": 0.78304, |
| "grad_norm": 0.21251200139522552, |
| "learning_rate": 4.3452800000000005e-05, |
| "loss": 0.8965, |
| "step": 48940 |
| }, |
| { |
| "epoch": 0.78336, |
| "grad_norm": 0.2018088847398758, |
| "learning_rate": 4.33888e-05, |
| "loss": 0.8742, |
| "step": 48960 |
| }, |
| { |
| "epoch": 0.78368, |
| "grad_norm": 0.24776096642017365, |
| "learning_rate": 4.33248e-05, |
| "loss": 0.8693, |
| "step": 48980 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.24149677157402039, |
| "learning_rate": 4.3260800000000004e-05, |
| "loss": 0.883, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.78432, |
| "grad_norm": 0.2117341160774231, |
| "learning_rate": 4.3196800000000006e-05, |
| "loss": 0.8831, |
| "step": 49020 |
| }, |
| { |
| "epoch": 0.78464, |
| "grad_norm": 0.25594037771224976, |
| "learning_rate": 4.31328e-05, |
| "loss": 0.9239, |
| "step": 49040 |
| }, |
| { |
| "epoch": 0.78496, |
| "grad_norm": 0.20546288788318634, |
| "learning_rate": 4.3068800000000003e-05, |
| "loss": 0.8446, |
| "step": 49060 |
| }, |
| { |
| "epoch": 0.78528, |
| "grad_norm": 0.23239131271839142, |
| "learning_rate": 4.30048e-05, |
| "loss": 0.8878, |
| "step": 49080 |
| }, |
| { |
| "epoch": 0.7856, |
| "grad_norm": 0.24074342846870422, |
| "learning_rate": 4.29408e-05, |
| "loss": 0.9033, |
| "step": 49100 |
| }, |
| { |
| "epoch": 0.78592, |
| "grad_norm": 0.24424532055854797, |
| "learning_rate": 4.28768e-05, |
| "loss": 0.9051, |
| "step": 49120 |
| }, |
| { |
| "epoch": 0.78624, |
| "grad_norm": 0.23116187751293182, |
| "learning_rate": 4.2812800000000005e-05, |
| "loss": 0.911, |
| "step": 49140 |
| }, |
| { |
| "epoch": 0.78656, |
| "grad_norm": 0.2513030171394348, |
| "learning_rate": 4.27488e-05, |
| "loss": 0.8569, |
| "step": 49160 |
| }, |
| { |
| "epoch": 0.78688, |
| "grad_norm": 0.2296024113893509, |
| "learning_rate": 4.26848e-05, |
| "loss": 0.8666, |
| "step": 49180 |
| }, |
| { |
| "epoch": 0.7872, |
| "grad_norm": 0.2069111168384552, |
| "learning_rate": 4.26208e-05, |
| "loss": 0.9025, |
| "step": 49200 |
| }, |
| { |
| "epoch": 0.78752, |
| "grad_norm": 0.21525107324123383, |
| "learning_rate": 4.25568e-05, |
| "loss": 0.8218, |
| "step": 49220 |
| }, |
| { |
| "epoch": 0.78784, |
| "grad_norm": 0.21345154941082, |
| "learning_rate": 4.24928e-05, |
| "loss": 0.8456, |
| "step": 49240 |
| }, |
| { |
| "epoch": 0.78816, |
| "grad_norm": 0.25389420986175537, |
| "learning_rate": 4.24288e-05, |
| "loss": 0.842, |
| "step": 49260 |
| }, |
| { |
| "epoch": 0.78848, |
| "grad_norm": 0.2326725423336029, |
| "learning_rate": 4.2364800000000005e-05, |
| "loss": 0.8504, |
| "step": 49280 |
| }, |
| { |
| "epoch": 0.7888, |
| "grad_norm": 0.21930308640003204, |
| "learning_rate": 4.23008e-05, |
| "loss": 0.8697, |
| "step": 49300 |
| }, |
| { |
| "epoch": 0.78912, |
| "grad_norm": 0.23466825485229492, |
| "learning_rate": 4.22368e-05, |
| "loss": 0.9128, |
| "step": 49320 |
| }, |
| { |
| "epoch": 0.78944, |
| "grad_norm": 0.24129875004291534, |
| "learning_rate": 4.2172800000000004e-05, |
| "loss": 0.8499, |
| "step": 49340 |
| }, |
| { |
| "epoch": 0.78976, |
| "grad_norm": 0.17660856246948242, |
| "learning_rate": 4.2108800000000006e-05, |
| "loss": 0.8559, |
| "step": 49360 |
| }, |
| { |
| "epoch": 0.79008, |
| "grad_norm": 0.24038086831569672, |
| "learning_rate": 4.20448e-05, |
| "loss": 0.8669, |
| "step": 49380 |
| }, |
| { |
| "epoch": 0.7904, |
| "grad_norm": 0.23702336847782135, |
| "learning_rate": 4.19808e-05, |
| "loss": 0.846, |
| "step": 49400 |
| }, |
| { |
| "epoch": 0.79072, |
| "grad_norm": 0.2305484116077423, |
| "learning_rate": 4.19168e-05, |
| "loss": 0.8462, |
| "step": 49420 |
| }, |
| { |
| "epoch": 0.79104, |
| "grad_norm": 0.24989739060401917, |
| "learning_rate": 4.18528e-05, |
| "loss": 0.9404, |
| "step": 49440 |
| }, |
| { |
| "epoch": 0.79136, |
| "grad_norm": 0.23767246305942535, |
| "learning_rate": 4.17888e-05, |
| "loss": 0.9021, |
| "step": 49460 |
| }, |
| { |
| "epoch": 0.79168, |
| "grad_norm": 0.244027242064476, |
| "learning_rate": 4.1724800000000004e-05, |
| "loss": 0.8757, |
| "step": 49480 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 0.21049901843070984, |
| "learning_rate": 4.16608e-05, |
| "loss": 0.8554, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.79232, |
| "grad_norm": 0.2375907301902771, |
| "learning_rate": 4.15968e-05, |
| "loss": 0.8256, |
| "step": 49520 |
| }, |
| { |
| "epoch": 0.79264, |
| "grad_norm": 0.23760604858398438, |
| "learning_rate": 4.1532799999999996e-05, |
| "loss": 0.898, |
| "step": 49540 |
| }, |
| { |
| "epoch": 0.79296, |
| "grad_norm": 0.16031509637832642, |
| "learning_rate": 4.1468800000000005e-05, |
| "loss": 0.8516, |
| "step": 49560 |
| }, |
| { |
| "epoch": 0.79328, |
| "grad_norm": 0.21544058620929718, |
| "learning_rate": 4.140480000000001e-05, |
| "loss": 0.857, |
| "step": 49580 |
| }, |
| { |
| "epoch": 0.7936, |
| "grad_norm": 0.23034314811229706, |
| "learning_rate": 4.13408e-05, |
| "loss": 0.8784, |
| "step": 49600 |
| }, |
| { |
| "epoch": 0.79392, |
| "grad_norm": 0.23492272198200226, |
| "learning_rate": 4.1276800000000004e-05, |
| "loss": 0.9037, |
| "step": 49620 |
| }, |
| { |
| "epoch": 0.79424, |
| "grad_norm": 0.2450007051229477, |
| "learning_rate": 4.12128e-05, |
| "loss": 0.8969, |
| "step": 49640 |
| }, |
| { |
| "epoch": 0.79456, |
| "grad_norm": 0.21207576990127563, |
| "learning_rate": 4.11488e-05, |
| "loss": 0.8492, |
| "step": 49660 |
| }, |
| { |
| "epoch": 0.79488, |
| "grad_norm": 0.21204914152622223, |
| "learning_rate": 4.1084800000000003e-05, |
| "loss": 0.9025, |
| "step": 49680 |
| }, |
| { |
| "epoch": 0.7952, |
| "grad_norm": 0.2355094999074936, |
| "learning_rate": 4.1020800000000005e-05, |
| "loss": 0.8533, |
| "step": 49700 |
| }, |
| { |
| "epoch": 0.79552, |
| "grad_norm": 0.21224915981292725, |
| "learning_rate": 4.09568e-05, |
| "loss": 0.8374, |
| "step": 49720 |
| }, |
| { |
| "epoch": 0.79584, |
| "grad_norm": 0.2114105373620987, |
| "learning_rate": 4.08928e-05, |
| "loss": 0.8823, |
| "step": 49740 |
| }, |
| { |
| "epoch": 0.79616, |
| "grad_norm": 0.3748084306716919, |
| "learning_rate": 4.08288e-05, |
| "loss": 0.8607, |
| "step": 49760 |
| }, |
| { |
| "epoch": 0.79648, |
| "grad_norm": 0.2285369336605072, |
| "learning_rate": 4.07648e-05, |
| "loss": 0.8654, |
| "step": 49780 |
| }, |
| { |
| "epoch": 0.7968, |
| "grad_norm": 0.17516812682151794, |
| "learning_rate": 4.07008e-05, |
| "loss": 0.8728, |
| "step": 49800 |
| }, |
| { |
| "epoch": 0.79712, |
| "grad_norm": 0.25664079189300537, |
| "learning_rate": 4.0636800000000004e-05, |
| "loss": 0.9342, |
| "step": 49820 |
| }, |
| { |
| "epoch": 0.79744, |
| "grad_norm": 0.2027619630098343, |
| "learning_rate": 4.05728e-05, |
| "loss": 0.8644, |
| "step": 49840 |
| }, |
| { |
| "epoch": 0.79776, |
| "grad_norm": 0.22048500180244446, |
| "learning_rate": 4.05088e-05, |
| "loss": 0.8586, |
| "step": 49860 |
| }, |
| { |
| "epoch": 0.79808, |
| "grad_norm": 0.2323845624923706, |
| "learning_rate": 4.0444799999999996e-05, |
| "loss": 0.8298, |
| "step": 49880 |
| }, |
| { |
| "epoch": 0.7984, |
| "grad_norm": 0.21872085332870483, |
| "learning_rate": 4.0380800000000005e-05, |
| "loss": 0.8584, |
| "step": 49900 |
| }, |
| { |
| "epoch": 0.79872, |
| "grad_norm": 0.20625688135623932, |
| "learning_rate": 4.031680000000001e-05, |
| "loss": 0.8122, |
| "step": 49920 |
| }, |
| { |
| "epoch": 0.79904, |
| "grad_norm": 0.20388665795326233, |
| "learning_rate": 4.02528e-05, |
| "loss": 0.8838, |
| "step": 49940 |
| }, |
| { |
| "epoch": 0.79936, |
| "grad_norm": 0.2362195998430252, |
| "learning_rate": 4.0188800000000004e-05, |
| "loss": 0.9139, |
| "step": 49960 |
| }, |
| { |
| "epoch": 0.79968, |
| "grad_norm": 0.19558613002300262, |
| "learning_rate": 4.01248e-05, |
| "loss": 0.8761, |
| "step": 49980 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.18318922817707062, |
| "learning_rate": 4.00608e-05, |
| "loss": 0.8705, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.80032, |
| "grad_norm": 0.2216624766588211, |
| "learning_rate": 3.99968e-05, |
| "loss": 0.891, |
| "step": 50020 |
| }, |
| { |
| "epoch": 0.80064, |
| "grad_norm": 0.22886547446250916, |
| "learning_rate": 3.9932800000000005e-05, |
| "loss": 0.8926, |
| "step": 50040 |
| }, |
| { |
| "epoch": 0.80096, |
| "grad_norm": 0.2386888712644577, |
| "learning_rate": 3.98688e-05, |
| "loss": 0.8946, |
| "step": 50060 |
| }, |
| { |
| "epoch": 0.80128, |
| "grad_norm": 0.22732418775558472, |
| "learning_rate": 3.98048e-05, |
| "loss": 0.8874, |
| "step": 50080 |
| }, |
| { |
| "epoch": 0.8016, |
| "grad_norm": 0.19502227008342743, |
| "learning_rate": 3.97408e-05, |
| "loss": 0.8628, |
| "step": 50100 |
| }, |
| { |
| "epoch": 0.80192, |
| "grad_norm": 0.22442220151424408, |
| "learning_rate": 3.9676800000000006e-05, |
| "loss": 0.8888, |
| "step": 50120 |
| }, |
| { |
| "epoch": 0.80224, |
| "grad_norm": 0.2765730917453766, |
| "learning_rate": 3.96128e-05, |
| "loss": 0.8692, |
| "step": 50140 |
| }, |
| { |
| "epoch": 0.80256, |
| "grad_norm": 0.20345774292945862, |
| "learning_rate": 3.95488e-05, |
| "loss": 0.8961, |
| "step": 50160 |
| }, |
| { |
| "epoch": 0.80288, |
| "grad_norm": 0.17965848743915558, |
| "learning_rate": 3.94848e-05, |
| "loss": 0.8693, |
| "step": 50180 |
| }, |
| { |
| "epoch": 0.8032, |
| "grad_norm": 0.2365620881319046, |
| "learning_rate": 3.94208e-05, |
| "loss": 0.8645, |
| "step": 50200 |
| }, |
| { |
| "epoch": 0.80352, |
| "grad_norm": 0.22703875601291656, |
| "learning_rate": 3.93568e-05, |
| "loss": 0.9219, |
| "step": 50220 |
| }, |
| { |
| "epoch": 0.80384, |
| "grad_norm": 0.21852591633796692, |
| "learning_rate": 3.9292800000000004e-05, |
| "loss": 0.8594, |
| "step": 50240 |
| }, |
| { |
| "epoch": 0.80416, |
| "grad_norm": 0.2254893034696579, |
| "learning_rate": 3.9228800000000006e-05, |
| "loss": 0.8786, |
| "step": 50260 |
| }, |
| { |
| "epoch": 0.80448, |
| "grad_norm": 0.21090517938137054, |
| "learning_rate": 3.91648e-05, |
| "loss": 0.8894, |
| "step": 50280 |
| }, |
| { |
| "epoch": 0.8048, |
| "grad_norm": 0.21594619750976562, |
| "learning_rate": 3.9100800000000003e-05, |
| "loss": 0.8921, |
| "step": 50300 |
| }, |
| { |
| "epoch": 0.80512, |
| "grad_norm": 0.1902090162038803, |
| "learning_rate": 3.90368e-05, |
| "loss": 0.8454, |
| "step": 50320 |
| }, |
| { |
| "epoch": 0.80544, |
| "grad_norm": 0.3519250452518463, |
| "learning_rate": 3.89728e-05, |
| "loss": 0.8987, |
| "step": 50340 |
| }, |
| { |
| "epoch": 0.80576, |
| "grad_norm": 0.2199210673570633, |
| "learning_rate": 3.89088e-05, |
| "loss": 0.8827, |
| "step": 50360 |
| }, |
| { |
| "epoch": 0.80608, |
| "grad_norm": 0.23292851448059082, |
| "learning_rate": 3.8844800000000005e-05, |
| "loss": 0.8713, |
| "step": 50380 |
| }, |
| { |
| "epoch": 0.8064, |
| "grad_norm": 0.21949096024036407, |
| "learning_rate": 3.87808e-05, |
| "loss": 0.8707, |
| "step": 50400 |
| }, |
| { |
| "epoch": 0.80672, |
| "grad_norm": 0.25161877274513245, |
| "learning_rate": 3.87168e-05, |
| "loss": 0.9223, |
| "step": 50420 |
| }, |
| { |
| "epoch": 0.80704, |
| "grad_norm": 0.23557806015014648, |
| "learning_rate": 3.86528e-05, |
| "loss": 0.8855, |
| "step": 50440 |
| }, |
| { |
| "epoch": 0.80736, |
| "grad_norm": 0.2316737025976181, |
| "learning_rate": 3.8588800000000006e-05, |
| "loss": 0.8749, |
| "step": 50460 |
| }, |
| { |
| "epoch": 0.80768, |
| "grad_norm": 0.20035359263420105, |
| "learning_rate": 3.85248e-05, |
| "loss": 0.838, |
| "step": 50480 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 0.17797014117240906, |
| "learning_rate": 3.84608e-05, |
| "loss": 0.839, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.80832, |
| "grad_norm": 0.2226208597421646, |
| "learning_rate": 3.83968e-05, |
| "loss": 0.8729, |
| "step": 50520 |
| }, |
| { |
| "epoch": 0.80864, |
| "grad_norm": 0.2194611132144928, |
| "learning_rate": 3.83328e-05, |
| "loss": 0.9066, |
| "step": 50540 |
| }, |
| { |
| "epoch": 0.80896, |
| "grad_norm": 0.21009878814220428, |
| "learning_rate": 3.82688e-05, |
| "loss": 0.8454, |
| "step": 50560 |
| }, |
| { |
| "epoch": 0.80928, |
| "grad_norm": 0.24309992790222168, |
| "learning_rate": 3.8204800000000004e-05, |
| "loss": 0.8612, |
| "step": 50580 |
| }, |
| { |
| "epoch": 0.8096, |
| "grad_norm": 0.19760167598724365, |
| "learning_rate": 3.8140800000000006e-05, |
| "loss": 0.9226, |
| "step": 50600 |
| }, |
| { |
| "epoch": 0.80992, |
| "grad_norm": 0.24959954619407654, |
| "learning_rate": 3.80768e-05, |
| "loss": 0.8667, |
| "step": 50620 |
| }, |
| { |
| "epoch": 0.81024, |
| "grad_norm": 0.2227480560541153, |
| "learning_rate": 3.80128e-05, |
| "loss": 0.9303, |
| "step": 50640 |
| }, |
| { |
| "epoch": 0.81056, |
| "grad_norm": 0.2228361815214157, |
| "learning_rate": 3.79488e-05, |
| "loss": 0.8365, |
| "step": 50660 |
| }, |
| { |
| "epoch": 0.81088, |
| "grad_norm": 0.22961002588272095, |
| "learning_rate": 3.788480000000001e-05, |
| "loss": 0.8257, |
| "step": 50680 |
| }, |
| { |
| "epoch": 0.8112, |
| "grad_norm": 0.19517934322357178, |
| "learning_rate": 3.78208e-05, |
| "loss": 0.8486, |
| "step": 50700 |
| }, |
| { |
| "epoch": 0.81152, |
| "grad_norm": 0.21257704496383667, |
| "learning_rate": 3.7756800000000004e-05, |
| "loss": 0.8665, |
| "step": 50720 |
| }, |
| { |
| "epoch": 0.81184, |
| "grad_norm": 0.17308840155601501, |
| "learning_rate": 3.76928e-05, |
| "loss": 0.8682, |
| "step": 50740 |
| }, |
| { |
| "epoch": 0.81216, |
| "grad_norm": 0.2145150750875473, |
| "learning_rate": 3.76288e-05, |
| "loss": 0.8652, |
| "step": 50760 |
| }, |
| { |
| "epoch": 0.81248, |
| "grad_norm": 0.20915599167346954, |
| "learning_rate": 3.75648e-05, |
| "loss": 0.881, |
| "step": 50780 |
| }, |
| { |
| "epoch": 0.8128, |
| "grad_norm": 0.2703164219856262, |
| "learning_rate": 3.7500800000000005e-05, |
| "loss": 0.8451, |
| "step": 50800 |
| }, |
| { |
| "epoch": 0.81312, |
| "grad_norm": 0.218171164393425, |
| "learning_rate": 3.74368e-05, |
| "loss": 0.8879, |
| "step": 50820 |
| }, |
| { |
| "epoch": 0.81344, |
| "grad_norm": 0.2015322595834732, |
| "learning_rate": 3.73728e-05, |
| "loss": 0.8583, |
| "step": 50840 |
| }, |
| { |
| "epoch": 0.81376, |
| "grad_norm": 0.2532670199871063, |
| "learning_rate": 3.73088e-05, |
| "loss": 0.9111, |
| "step": 50860 |
| }, |
| { |
| "epoch": 0.81408, |
| "grad_norm": 0.22806140780448914, |
| "learning_rate": 3.72448e-05, |
| "loss": 0.8677, |
| "step": 50880 |
| }, |
| { |
| "epoch": 0.8144, |
| "grad_norm": 0.20651741325855255, |
| "learning_rate": 3.71808e-05, |
| "loss": 0.9084, |
| "step": 50900 |
| }, |
| { |
| "epoch": 0.81472, |
| "grad_norm": 0.23538829386234283, |
| "learning_rate": 3.7116800000000004e-05, |
| "loss": 0.888, |
| "step": 50920 |
| }, |
| { |
| "epoch": 0.81504, |
| "grad_norm": 0.23798079788684845, |
| "learning_rate": 3.7052800000000005e-05, |
| "loss": 0.8974, |
| "step": 50940 |
| }, |
| { |
| "epoch": 0.81536, |
| "grad_norm": 0.24513110518455505, |
| "learning_rate": 3.69888e-05, |
| "loss": 0.9018, |
| "step": 50960 |
| }, |
| { |
| "epoch": 0.81568, |
| "grad_norm": 0.24939313530921936, |
| "learning_rate": 3.69248e-05, |
| "loss": 0.9033, |
| "step": 50980 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.20573210716247559, |
| "learning_rate": 3.68608e-05, |
| "loss": 0.874, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.81632, |
| "grad_norm": 0.22649157047271729, |
| "learning_rate": 3.6796800000000007e-05, |
| "loss": 0.9008, |
| "step": 51020 |
| }, |
| { |
| "epoch": 0.81664, |
| "grad_norm": 0.24870529770851135, |
| "learning_rate": 3.67328e-05, |
| "loss": 0.8949, |
| "step": 51040 |
| }, |
| { |
| "epoch": 0.81696, |
| "grad_norm": 0.22699517011642456, |
| "learning_rate": 3.6668800000000004e-05, |
| "loss": 0.9049, |
| "step": 51060 |
| }, |
| { |
| "epoch": 0.81728, |
| "grad_norm": 0.24480411410331726, |
| "learning_rate": 3.66048e-05, |
| "loss": 0.8855, |
| "step": 51080 |
| }, |
| { |
| "epoch": 0.8176, |
| "grad_norm": 0.2394665777683258, |
| "learning_rate": 3.65408e-05, |
| "loss": 0.8868, |
| "step": 51100 |
| }, |
| { |
| "epoch": 0.81792, |
| "grad_norm": 0.20132653415203094, |
| "learning_rate": 3.64768e-05, |
| "loss": 0.8209, |
| "step": 51120 |
| }, |
| { |
| "epoch": 0.81824, |
| "grad_norm": 0.2139676958322525, |
| "learning_rate": 3.6412800000000005e-05, |
| "loss": 0.8852, |
| "step": 51140 |
| }, |
| { |
| "epoch": 0.81856, |
| "grad_norm": 0.23041175305843353, |
| "learning_rate": 3.63488e-05, |
| "loss": 0.8864, |
| "step": 51160 |
| }, |
| { |
| "epoch": 0.81888, |
| "grad_norm": 0.18776430189609528, |
| "learning_rate": 3.62848e-05, |
| "loss": 0.8675, |
| "step": 51180 |
| }, |
| { |
| "epoch": 0.8192, |
| "grad_norm": 0.24612362682819366, |
| "learning_rate": 3.62208e-05, |
| "loss": 0.9132, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.81952, |
| "grad_norm": 0.235810786485672, |
| "learning_rate": 3.61568e-05, |
| "loss": 0.8947, |
| "step": 51220 |
| }, |
| { |
| "epoch": 0.81984, |
| "grad_norm": 0.20968469977378845, |
| "learning_rate": 3.60928e-05, |
| "loss": 0.8943, |
| "step": 51240 |
| }, |
| { |
| "epoch": 0.82016, |
| "grad_norm": 0.20469830930233002, |
| "learning_rate": 3.60288e-05, |
| "loss": 0.8561, |
| "step": 51260 |
| }, |
| { |
| "epoch": 0.82048, |
| "grad_norm": 0.19832849502563477, |
| "learning_rate": 3.5964800000000005e-05, |
| "loss": 0.8784, |
| "step": 51280 |
| }, |
| { |
| "epoch": 0.8208, |
| "grad_norm": 0.2282322198152542, |
| "learning_rate": 3.59008e-05, |
| "loss": 0.8491, |
| "step": 51300 |
| }, |
| { |
| "epoch": 0.82112, |
| "grad_norm": 0.21050651371479034, |
| "learning_rate": 3.58368e-05, |
| "loss": 0.8547, |
| "step": 51320 |
| }, |
| { |
| "epoch": 0.82144, |
| "grad_norm": 0.21298284828662872, |
| "learning_rate": 3.57728e-05, |
| "loss": 0.8696, |
| "step": 51340 |
| }, |
| { |
| "epoch": 0.82176, |
| "grad_norm": 0.1952245980501175, |
| "learning_rate": 3.5708800000000006e-05, |
| "loss": 0.8184, |
| "step": 51360 |
| }, |
| { |
| "epoch": 0.82208, |
| "grad_norm": 0.23376013338565826, |
| "learning_rate": 3.56448e-05, |
| "loss": 0.8348, |
| "step": 51380 |
| }, |
| { |
| "epoch": 0.8224, |
| "grad_norm": 0.22029918432235718, |
| "learning_rate": 3.55808e-05, |
| "loss": 0.8956, |
| "step": 51400 |
| }, |
| { |
| "epoch": 0.82272, |
| "grad_norm": 0.23580487072467804, |
| "learning_rate": 3.55168e-05, |
| "loss": 0.9059, |
| "step": 51420 |
| }, |
| { |
| "epoch": 0.82304, |
| "grad_norm": 0.2074773609638214, |
| "learning_rate": 3.54528e-05, |
| "loss": 0.8591, |
| "step": 51440 |
| }, |
| { |
| "epoch": 0.82336, |
| "grad_norm": 0.21169452369213104, |
| "learning_rate": 3.53888e-05, |
| "loss": 0.8603, |
| "step": 51460 |
| }, |
| { |
| "epoch": 0.82368, |
| "grad_norm": 0.21765758097171783, |
| "learning_rate": 3.5324800000000004e-05, |
| "loss": 0.9107, |
| "step": 51480 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 0.23709143698215485, |
| "learning_rate": 3.52608e-05, |
| "loss": 0.8909, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.82432, |
| "grad_norm": 0.18285530805587769, |
| "learning_rate": 3.51968e-05, |
| "loss": 0.8699, |
| "step": 51520 |
| }, |
| { |
| "epoch": 0.82464, |
| "grad_norm": 0.19453024864196777, |
| "learning_rate": 3.51328e-05, |
| "loss": 0.8841, |
| "step": 51540 |
| }, |
| { |
| "epoch": 0.82496, |
| "grad_norm": 0.20472079515457153, |
| "learning_rate": 3.50688e-05, |
| "loss": 0.8651, |
| "step": 51560 |
| }, |
| { |
| "epoch": 0.82528, |
| "grad_norm": 0.20934510231018066, |
| "learning_rate": 3.500480000000001e-05, |
| "loss": 0.9298, |
| "step": 51580 |
| }, |
| { |
| "epoch": 0.8256, |
| "grad_norm": 0.20935463905334473, |
| "learning_rate": 3.49408e-05, |
| "loss": 0.8238, |
| "step": 51600 |
| }, |
| { |
| "epoch": 0.82592, |
| "grad_norm": 0.19985993206501007, |
| "learning_rate": 3.4876800000000005e-05, |
| "loss": 0.8879, |
| "step": 51620 |
| }, |
| { |
| "epoch": 0.82624, |
| "grad_norm": 0.21266649663448334, |
| "learning_rate": 3.48128e-05, |
| "loss": 0.8754, |
| "step": 51640 |
| }, |
| { |
| "epoch": 0.82656, |
| "grad_norm": 0.21522794663906097, |
| "learning_rate": 3.47488e-05, |
| "loss": 0.8851, |
| "step": 51660 |
| }, |
| { |
| "epoch": 0.82688, |
| "grad_norm": 0.21246004104614258, |
| "learning_rate": 3.4684800000000004e-05, |
| "loss": 0.8588, |
| "step": 51680 |
| }, |
| { |
| "epoch": 0.8272, |
| "grad_norm": 0.23577255010604858, |
| "learning_rate": 3.4620800000000006e-05, |
| "loss": 0.8502, |
| "step": 51700 |
| }, |
| { |
| "epoch": 0.82752, |
| "grad_norm": 0.22873230278491974, |
| "learning_rate": 3.45568e-05, |
| "loss": 0.9434, |
| "step": 51720 |
| }, |
| { |
| "epoch": 0.82784, |
| "grad_norm": 0.24500973522663116, |
| "learning_rate": 3.44928e-05, |
| "loss": 0.8575, |
| "step": 51740 |
| }, |
| { |
| "epoch": 0.82816, |
| "grad_norm": 0.24861827492713928, |
| "learning_rate": 3.44288e-05, |
| "loss": 0.8601, |
| "step": 51760 |
| }, |
| { |
| "epoch": 0.82848, |
| "grad_norm": 0.25244006514549255, |
| "learning_rate": 3.43648e-05, |
| "loss": 0.8957, |
| "step": 51780 |
| }, |
| { |
| "epoch": 0.8288, |
| "grad_norm": 0.24348974227905273, |
| "learning_rate": 3.43008e-05, |
| "loss": 0.8433, |
| "step": 51800 |
| }, |
| { |
| "epoch": 0.82912, |
| "grad_norm": 0.23009903728961945, |
| "learning_rate": 3.4236800000000004e-05, |
| "loss": 0.9117, |
| "step": 51820 |
| }, |
| { |
| "epoch": 0.82944, |
| "grad_norm": 0.2047446370124817, |
| "learning_rate": 3.41728e-05, |
| "loss": 0.8102, |
| "step": 51840 |
| }, |
| { |
| "epoch": 0.82976, |
| "grad_norm": 0.2244807630777359, |
| "learning_rate": 3.41088e-05, |
| "loss": 0.8878, |
| "step": 51860 |
| }, |
| { |
| "epoch": 0.83008, |
| "grad_norm": 0.23874083161354065, |
| "learning_rate": 3.4044799999999996e-05, |
| "loss": 0.9476, |
| "step": 51880 |
| }, |
| { |
| "epoch": 0.8304, |
| "grad_norm": 0.21802900731563568, |
| "learning_rate": 3.39808e-05, |
| "loss": 0.8457, |
| "step": 51900 |
| }, |
| { |
| "epoch": 0.83072, |
| "grad_norm": 0.32707545161247253, |
| "learning_rate": 3.391680000000001e-05, |
| "loss": 0.8705, |
| "step": 51920 |
| }, |
| { |
| "epoch": 0.83104, |
| "grad_norm": 0.22364617884159088, |
| "learning_rate": 3.38528e-05, |
| "loss": 0.9154, |
| "step": 51940 |
| }, |
| { |
| "epoch": 0.83136, |
| "grad_norm": 0.23513180017471313, |
| "learning_rate": 3.3788800000000004e-05, |
| "loss": 0.863, |
| "step": 51960 |
| }, |
| { |
| "epoch": 0.83168, |
| "grad_norm": 0.24189570546150208, |
| "learning_rate": 3.37248e-05, |
| "loss": 0.9223, |
| "step": 51980 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.2324758619070053, |
| "learning_rate": 3.36608e-05, |
| "loss": 0.8858, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.83232, |
| "grad_norm": 0.21225537359714508, |
| "learning_rate": 3.35968e-05, |
| "loss": 0.8573, |
| "step": 52020 |
| }, |
| { |
| "epoch": 0.83264, |
| "grad_norm": 0.2314990609884262, |
| "learning_rate": 3.3532800000000005e-05, |
| "loss": 0.9258, |
| "step": 52040 |
| }, |
| { |
| "epoch": 0.83296, |
| "grad_norm": 0.22183631360530853, |
| "learning_rate": 3.34688e-05, |
| "loss": 0.8842, |
| "step": 52060 |
| }, |
| { |
| "epoch": 0.83328, |
| "grad_norm": 0.23704120516777039, |
| "learning_rate": 3.34048e-05, |
| "loss": 0.8513, |
| "step": 52080 |
| }, |
| { |
| "epoch": 0.8336, |
| "grad_norm": 0.21080462634563446, |
| "learning_rate": 3.33408e-05, |
| "loss": 0.8745, |
| "step": 52100 |
| }, |
| { |
| "epoch": 0.83392, |
| "grad_norm": 0.25182968378067017, |
| "learning_rate": 3.32768e-05, |
| "loss": 0.8492, |
| "step": 52120 |
| }, |
| { |
| "epoch": 0.83424, |
| "grad_norm": 0.22719983756542206, |
| "learning_rate": 3.32128e-05, |
| "loss": 0.8727, |
| "step": 52140 |
| }, |
| { |
| "epoch": 0.83456, |
| "grad_norm": 0.36781367659568787, |
| "learning_rate": 3.3148800000000004e-05, |
| "loss": 0.9106, |
| "step": 52160 |
| }, |
| { |
| "epoch": 0.83488, |
| "grad_norm": 0.23190808296203613, |
| "learning_rate": 3.30848e-05, |
| "loss": 0.8574, |
| "step": 52180 |
| }, |
| { |
| "epoch": 0.8352, |
| "grad_norm": 0.19346857070922852, |
| "learning_rate": 3.30208e-05, |
| "loss": 0.8695, |
| "step": 52200 |
| }, |
| { |
| "epoch": 0.83552, |
| "grad_norm": 0.21294453740119934, |
| "learning_rate": 3.29568e-05, |
| "loss": 0.8724, |
| "step": 52220 |
| }, |
| { |
| "epoch": 0.83584, |
| "grad_norm": 0.18836593627929688, |
| "learning_rate": 3.2892800000000005e-05, |
| "loss": 0.8623, |
| "step": 52240 |
| }, |
| { |
| "epoch": 0.83616, |
| "grad_norm": 0.21922753751277924, |
| "learning_rate": 3.2828800000000007e-05, |
| "loss": 0.8477, |
| "step": 52260 |
| }, |
| { |
| "epoch": 0.83648, |
| "grad_norm": 0.2198483943939209, |
| "learning_rate": 3.27648e-05, |
| "loss": 0.9069, |
| "step": 52280 |
| }, |
| { |
| "epoch": 0.8368, |
| "grad_norm": 0.20438086986541748, |
| "learning_rate": 3.2700800000000004e-05, |
| "loss": 0.8833, |
| "step": 52300 |
| }, |
| { |
| "epoch": 0.83712, |
| "grad_norm": 0.2795603573322296, |
| "learning_rate": 3.26368e-05, |
| "loss": 0.9141, |
| "step": 52320 |
| }, |
| { |
| "epoch": 0.83744, |
| "grad_norm": 0.22276397049427032, |
| "learning_rate": 3.25728e-05, |
| "loss": 0.8741, |
| "step": 52340 |
| }, |
| { |
| "epoch": 0.83776, |
| "grad_norm": 0.21769918501377106, |
| "learning_rate": 3.25088e-05, |
| "loss": 0.88, |
| "step": 52360 |
| }, |
| { |
| "epoch": 0.83808, |
| "grad_norm": 0.22383011877536774, |
| "learning_rate": 3.2444800000000005e-05, |
| "loss": 0.8504, |
| "step": 52380 |
| }, |
| { |
| "epoch": 0.8384, |
| "grad_norm": 0.23771865665912628, |
| "learning_rate": 3.23808e-05, |
| "loss": 0.9323, |
| "step": 52400 |
| }, |
| { |
| "epoch": 0.83872, |
| "grad_norm": 0.20924776792526245, |
| "learning_rate": 3.23168e-05, |
| "loss": 0.8668, |
| "step": 52420 |
| }, |
| { |
| "epoch": 0.83904, |
| "grad_norm": 0.27309560775756836, |
| "learning_rate": 3.22528e-05, |
| "loss": 0.9336, |
| "step": 52440 |
| }, |
| { |
| "epoch": 0.83936, |
| "grad_norm": 0.24357040226459503, |
| "learning_rate": 3.21888e-05, |
| "loss": 0.9199, |
| "step": 52460 |
| }, |
| { |
| "epoch": 0.83968, |
| "grad_norm": 0.23052769899368286, |
| "learning_rate": 3.21248e-05, |
| "loss": 0.9209, |
| "step": 52480 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.23305866122245789, |
| "learning_rate": 3.20608e-05, |
| "loss": 0.9089, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.84032, |
| "grad_norm": 0.21440325677394867, |
| "learning_rate": 3.19968e-05, |
| "loss": 0.8483, |
| "step": 52520 |
| }, |
| { |
| "epoch": 0.84064, |
| "grad_norm": 0.18805745244026184, |
| "learning_rate": 3.19328e-05, |
| "loss": 0.8436, |
| "step": 52540 |
| }, |
| { |
| "epoch": 0.84096, |
| "grad_norm": 0.24525205790996552, |
| "learning_rate": 3.18688e-05, |
| "loss": 0.8988, |
| "step": 52560 |
| }, |
| { |
| "epoch": 0.84128, |
| "grad_norm": 0.3257310688495636, |
| "learning_rate": 3.1804800000000004e-05, |
| "loss": 0.8461, |
| "step": 52580 |
| }, |
| { |
| "epoch": 0.8416, |
| "grad_norm": 0.21542541682720184, |
| "learning_rate": 3.1740800000000006e-05, |
| "loss": 0.908, |
| "step": 52600 |
| }, |
| { |
| "epoch": 0.84192, |
| "grad_norm": 0.18117113411426544, |
| "learning_rate": 3.16768e-05, |
| "loss": 0.9024, |
| "step": 52620 |
| }, |
| { |
| "epoch": 0.84224, |
| "grad_norm": 0.21288667619228363, |
| "learning_rate": 3.16128e-05, |
| "loss": 0.8981, |
| "step": 52640 |
| }, |
| { |
| "epoch": 0.84256, |
| "grad_norm": 0.20366021990776062, |
| "learning_rate": 3.15488e-05, |
| "loss": 0.8251, |
| "step": 52660 |
| }, |
| { |
| "epoch": 0.84288, |
| "grad_norm": 0.21234050393104553, |
| "learning_rate": 3.14848e-05, |
| "loss": 0.8548, |
| "step": 52680 |
| }, |
| { |
| "epoch": 0.8432, |
| "grad_norm": 0.23660403490066528, |
| "learning_rate": 3.14208e-05, |
| "loss": 0.8707, |
| "step": 52700 |
| }, |
| { |
| "epoch": 0.84352, |
| "grad_norm": 0.22134044766426086, |
| "learning_rate": 3.1356800000000004e-05, |
| "loss": 0.8742, |
| "step": 52720 |
| }, |
| { |
| "epoch": 0.84384, |
| "grad_norm": 0.23551689088344574, |
| "learning_rate": 3.12928e-05, |
| "loss": 0.8716, |
| "step": 52740 |
| }, |
| { |
| "epoch": 0.84416, |
| "grad_norm": 0.184543177485466, |
| "learning_rate": 3.12288e-05, |
| "loss": 0.8352, |
| "step": 52760 |
| }, |
| { |
| "epoch": 0.84448, |
| "grad_norm": 0.2306751012802124, |
| "learning_rate": 3.11648e-05, |
| "loss": 0.9043, |
| "step": 52780 |
| }, |
| { |
| "epoch": 0.8448, |
| "grad_norm": 0.2092065066099167, |
| "learning_rate": 3.1100800000000006e-05, |
| "loss": 0.9103, |
| "step": 52800 |
| }, |
| { |
| "epoch": 0.84512, |
| "grad_norm": 0.1914425492286682, |
| "learning_rate": 3.10368e-05, |
| "loss": 0.8702, |
| "step": 52820 |
| }, |
| { |
| "epoch": 0.84544, |
| "grad_norm": 0.24771615862846375, |
| "learning_rate": 3.09728e-05, |
| "loss": 0.8429, |
| "step": 52840 |
| }, |
| { |
| "epoch": 0.84576, |
| "grad_norm": 0.1887243092060089, |
| "learning_rate": 3.09088e-05, |
| "loss": 0.8835, |
| "step": 52860 |
| }, |
| { |
| "epoch": 0.84608, |
| "grad_norm": 0.2107224017381668, |
| "learning_rate": 3.08448e-05, |
| "loss": 0.8995, |
| "step": 52880 |
| }, |
| { |
| "epoch": 0.8464, |
| "grad_norm": 0.2114105075597763, |
| "learning_rate": 3.07808e-05, |
| "loss": 0.8316, |
| "step": 52900 |
| }, |
| { |
| "epoch": 0.84672, |
| "grad_norm": 0.22698649764060974, |
| "learning_rate": 3.0716800000000004e-05, |
| "loss": 0.917, |
| "step": 52920 |
| }, |
| { |
| "epoch": 0.84704, |
| "grad_norm": 0.23728908598423004, |
| "learning_rate": 3.0652800000000006e-05, |
| "loss": 0.8641, |
| "step": 52940 |
| }, |
| { |
| "epoch": 0.84736, |
| "grad_norm": 0.24342834949493408, |
| "learning_rate": 3.05888e-05, |
| "loss": 0.9067, |
| "step": 52960 |
| }, |
| { |
| "epoch": 0.84768, |
| "grad_norm": 0.23248563706874847, |
| "learning_rate": 3.05248e-05, |
| "loss": 0.8918, |
| "step": 52980 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.22036781907081604, |
| "learning_rate": 3.04608e-05, |
| "loss": 0.8689, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.84832, |
| "grad_norm": 0.21692028641700745, |
| "learning_rate": 3.0396800000000003e-05, |
| "loss": 0.8941, |
| "step": 53020 |
| }, |
| { |
| "epoch": 0.84864, |
| "grad_norm": 0.21200671792030334, |
| "learning_rate": 3.03328e-05, |
| "loss": 0.8787, |
| "step": 53040 |
| }, |
| { |
| "epoch": 0.84896, |
| "grad_norm": 0.31840503215789795, |
| "learning_rate": 3.0268800000000004e-05, |
| "loss": 0.9181, |
| "step": 53060 |
| }, |
| { |
| "epoch": 0.84928, |
| "grad_norm": 0.2347812056541443, |
| "learning_rate": 3.02048e-05, |
| "loss": 0.885, |
| "step": 53080 |
| }, |
| { |
| "epoch": 0.8496, |
| "grad_norm": 0.22629977762699127, |
| "learning_rate": 3.01408e-05, |
| "loss": 0.9208, |
| "step": 53100 |
| }, |
| { |
| "epoch": 0.84992, |
| "grad_norm": 0.20610420405864716, |
| "learning_rate": 3.00768e-05, |
| "loss": 0.8698, |
| "step": 53120 |
| }, |
| { |
| "epoch": 0.85024, |
| "grad_norm": 0.17655342817306519, |
| "learning_rate": 3.0012800000000002e-05, |
| "loss": 0.8767, |
| "step": 53140 |
| }, |
| { |
| "epoch": 0.85056, |
| "grad_norm": 0.2303926944732666, |
| "learning_rate": 2.99488e-05, |
| "loss": 0.9493, |
| "step": 53160 |
| }, |
| { |
| "epoch": 0.85088, |
| "grad_norm": 0.20119711756706238, |
| "learning_rate": 2.9884800000000002e-05, |
| "loss": 0.8769, |
| "step": 53180 |
| }, |
| { |
| "epoch": 0.8512, |
| "grad_norm": 0.2533680200576782, |
| "learning_rate": 2.9820799999999997e-05, |
| "loss": 0.9568, |
| "step": 53200 |
| }, |
| { |
| "epoch": 0.85152, |
| "grad_norm": 0.2610546350479126, |
| "learning_rate": 2.9756800000000003e-05, |
| "loss": 0.9215, |
| "step": 53220 |
| }, |
| { |
| "epoch": 0.85184, |
| "grad_norm": 0.2684386074542999, |
| "learning_rate": 2.9692800000000005e-05, |
| "loss": 0.8739, |
| "step": 53240 |
| }, |
| { |
| "epoch": 0.85216, |
| "grad_norm": 0.2054203301668167, |
| "learning_rate": 2.96288e-05, |
| "loss": 0.8909, |
| "step": 53260 |
| }, |
| { |
| "epoch": 0.85248, |
| "grad_norm": 0.24058941006660461, |
| "learning_rate": 2.9564800000000002e-05, |
| "loss": 0.8314, |
| "step": 53280 |
| }, |
| { |
| "epoch": 0.8528, |
| "grad_norm": 0.2767840325832367, |
| "learning_rate": 2.95008e-05, |
| "loss": 0.8588, |
| "step": 53300 |
| }, |
| { |
| "epoch": 0.85312, |
| "grad_norm": 0.1769871711730957, |
| "learning_rate": 2.9436800000000002e-05, |
| "loss": 0.8978, |
| "step": 53320 |
| }, |
| { |
| "epoch": 0.85344, |
| "grad_norm": 0.23680894076824188, |
| "learning_rate": 2.93728e-05, |
| "loss": 0.879, |
| "step": 53340 |
| }, |
| { |
| "epoch": 0.85376, |
| "grad_norm": 0.2106921374797821, |
| "learning_rate": 2.9308800000000003e-05, |
| "loss": 0.8792, |
| "step": 53360 |
| }, |
| { |
| "epoch": 0.85408, |
| "grad_norm": 0.20546332001686096, |
| "learning_rate": 2.9248000000000002e-05, |
| "loss": 0.8953, |
| "step": 53380 |
| }, |
| { |
| "epoch": 0.8544, |
| "grad_norm": 0.2114826738834381, |
| "learning_rate": 2.9184e-05, |
| "loss": 0.889, |
| "step": 53400 |
| }, |
| { |
| "epoch": 0.85472, |
| "grad_norm": 0.2232695072889328, |
| "learning_rate": 2.9120000000000002e-05, |
| "loss": 0.8441, |
| "step": 53420 |
| }, |
| { |
| "epoch": 0.85504, |
| "grad_norm": 0.25499632954597473, |
| "learning_rate": 2.9056e-05, |
| "loss": 0.9132, |
| "step": 53440 |
| }, |
| { |
| "epoch": 0.85536, |
| "grad_norm": 0.22726675868034363, |
| "learning_rate": 2.8992000000000003e-05, |
| "loss": 0.8905, |
| "step": 53460 |
| }, |
| { |
| "epoch": 0.85568, |
| "grad_norm": 0.18875838816165924, |
| "learning_rate": 2.8927999999999998e-05, |
| "loss": 0.8665, |
| "step": 53480 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 0.23504668474197388, |
| "learning_rate": 2.8864000000000004e-05, |
| "loss": 0.9092, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.85632, |
| "grad_norm": 0.23671472072601318, |
| "learning_rate": 2.88e-05, |
| "loss": 0.8858, |
| "step": 53520 |
| }, |
| { |
| "epoch": 0.85664, |
| "grad_norm": 0.22494633495807648, |
| "learning_rate": 2.8736e-05, |
| "loss": 0.8988, |
| "step": 53540 |
| }, |
| { |
| "epoch": 0.85696, |
| "grad_norm": 0.20699791610240936, |
| "learning_rate": 2.8672e-05, |
| "loss": 0.8523, |
| "step": 53560 |
| }, |
| { |
| "epoch": 0.85728, |
| "grad_norm": 0.24414889514446259, |
| "learning_rate": 2.8608e-05, |
| "loss": 0.8734, |
| "step": 53580 |
| }, |
| { |
| "epoch": 0.8576, |
| "grad_norm": 0.23562034964561462, |
| "learning_rate": 2.8544000000000003e-05, |
| "loss": 0.8349, |
| "step": 53600 |
| }, |
| { |
| "epoch": 0.85792, |
| "grad_norm": 0.18858417868614197, |
| "learning_rate": 2.8480000000000002e-05, |
| "loss": 0.8784, |
| "step": 53620 |
| }, |
| { |
| "epoch": 0.85824, |
| "grad_norm": 0.23060615360736847, |
| "learning_rate": 2.8416000000000004e-05, |
| "loss": 0.8874, |
| "step": 53640 |
| }, |
| { |
| "epoch": 0.85856, |
| "grad_norm": 0.2229340374469757, |
| "learning_rate": 2.8352000000000002e-05, |
| "loss": 0.8772, |
| "step": 53660 |
| }, |
| { |
| "epoch": 0.85888, |
| "grad_norm": 0.23819968104362488, |
| "learning_rate": 2.8288000000000004e-05, |
| "loss": 0.9138, |
| "step": 53680 |
| }, |
| { |
| "epoch": 0.8592, |
| "grad_norm": 0.22570669651031494, |
| "learning_rate": 2.8224e-05, |
| "loss": 0.8543, |
| "step": 53700 |
| }, |
| { |
| "epoch": 0.85952, |
| "grad_norm": 0.1779906302690506, |
| "learning_rate": 2.816e-05, |
| "loss": 0.8453, |
| "step": 53720 |
| }, |
| { |
| "epoch": 0.85984, |
| "grad_norm": 0.18111290037631989, |
| "learning_rate": 2.8096e-05, |
| "loss": 0.8571, |
| "step": 53740 |
| }, |
| { |
| "epoch": 0.86016, |
| "grad_norm": 0.23945818841457367, |
| "learning_rate": 2.8032000000000002e-05, |
| "loss": 0.9347, |
| "step": 53760 |
| }, |
| { |
| "epoch": 0.86048, |
| "grad_norm": 0.2050149291753769, |
| "learning_rate": 2.7968e-05, |
| "loss": 0.8871, |
| "step": 53780 |
| }, |
| { |
| "epoch": 0.8608, |
| "grad_norm": 0.20724190771579742, |
| "learning_rate": 2.7904000000000003e-05, |
| "loss": 0.8506, |
| "step": 53800 |
| }, |
| { |
| "epoch": 0.86112, |
| "grad_norm": 0.21259485185146332, |
| "learning_rate": 2.7839999999999998e-05, |
| "loss": 0.8642, |
| "step": 53820 |
| }, |
| { |
| "epoch": 0.86144, |
| "grad_norm": 0.2348015457391739, |
| "learning_rate": 2.7776000000000003e-05, |
| "loss": 0.8883, |
| "step": 53840 |
| }, |
| { |
| "epoch": 0.86176, |
| "grad_norm": 0.2268034815788269, |
| "learning_rate": 2.7711999999999998e-05, |
| "loss": 0.842, |
| "step": 53860 |
| }, |
| { |
| "epoch": 0.86208, |
| "grad_norm": 0.20903776586055756, |
| "learning_rate": 2.7648e-05, |
| "loss": 0.898, |
| "step": 53880 |
| }, |
| { |
| "epoch": 0.8624, |
| "grad_norm": 0.2246081382036209, |
| "learning_rate": 2.7584e-05, |
| "loss": 0.9053, |
| "step": 53900 |
| }, |
| { |
| "epoch": 0.86272, |
| "grad_norm": 0.20664174854755402, |
| "learning_rate": 2.752e-05, |
| "loss": 0.9123, |
| "step": 53920 |
| }, |
| { |
| "epoch": 0.86304, |
| "grad_norm": 0.199870765209198, |
| "learning_rate": 2.7456000000000003e-05, |
| "loss": 0.8782, |
| "step": 53940 |
| }, |
| { |
| "epoch": 0.86336, |
| "grad_norm": 0.2337988168001175, |
| "learning_rate": 2.7392e-05, |
| "loss": 0.8961, |
| "step": 53960 |
| }, |
| { |
| "epoch": 0.86368, |
| "grad_norm": 0.24039073288440704, |
| "learning_rate": 2.7328000000000003e-05, |
| "loss": 0.9463, |
| "step": 53980 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.21727830171585083, |
| "learning_rate": 2.7264000000000002e-05, |
| "loss": 0.8852, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.86432, |
| "grad_norm": 0.25286540389060974, |
| "learning_rate": 2.7200000000000004e-05, |
| "loss": 0.8877, |
| "step": 54020 |
| }, |
| { |
| "epoch": 0.86464, |
| "grad_norm": 0.2132069319486618, |
| "learning_rate": 2.7136e-05, |
| "loss": 0.8378, |
| "step": 54040 |
| }, |
| { |
| "epoch": 0.86496, |
| "grad_norm": 0.19726885855197906, |
| "learning_rate": 2.7072000000000004e-05, |
| "loss": 0.8827, |
| "step": 54060 |
| }, |
| { |
| "epoch": 0.86528, |
| "grad_norm": 0.19055317342281342, |
| "learning_rate": 2.7008e-05, |
| "loss": 0.845, |
| "step": 54080 |
| }, |
| { |
| "epoch": 0.8656, |
| "grad_norm": 0.21198777854442596, |
| "learning_rate": 2.6944e-05, |
| "loss": 0.8468, |
| "step": 54100 |
| }, |
| { |
| "epoch": 0.86592, |
| "grad_norm": 0.1932942420244217, |
| "learning_rate": 2.688e-05, |
| "loss": 0.8163, |
| "step": 54120 |
| }, |
| { |
| "epoch": 0.86624, |
| "grad_norm": 0.21990489959716797, |
| "learning_rate": 2.6816000000000002e-05, |
| "loss": 0.8721, |
| "step": 54140 |
| }, |
| { |
| "epoch": 0.86656, |
| "grad_norm": 0.2552103102207184, |
| "learning_rate": 2.6752e-05, |
| "loss": 0.9031, |
| "step": 54160 |
| }, |
| { |
| "epoch": 0.86688, |
| "grad_norm": 0.20564943552017212, |
| "learning_rate": 2.6688000000000003e-05, |
| "loss": 0.8891, |
| "step": 54180 |
| }, |
| { |
| "epoch": 0.8672, |
| "grad_norm": 0.21163496375083923, |
| "learning_rate": 2.6623999999999998e-05, |
| "loss": 0.8529, |
| "step": 54200 |
| }, |
| { |
| "epoch": 0.86752, |
| "grad_norm": 0.19944727420806885, |
| "learning_rate": 2.6560000000000003e-05, |
| "loss": 0.8493, |
| "step": 54220 |
| }, |
| { |
| "epoch": 0.86784, |
| "grad_norm": 0.20173463225364685, |
| "learning_rate": 2.6496e-05, |
| "loss": 0.8716, |
| "step": 54240 |
| }, |
| { |
| "epoch": 0.86816, |
| "grad_norm": 0.23707176744937897, |
| "learning_rate": 2.6432e-05, |
| "loss": 0.9124, |
| "step": 54260 |
| }, |
| { |
| "epoch": 0.86848, |
| "grad_norm": 0.24114908277988434, |
| "learning_rate": 2.6368000000000002e-05, |
| "loss": 0.8897, |
| "step": 54280 |
| }, |
| { |
| "epoch": 0.8688, |
| "grad_norm": 0.2588618993759155, |
| "learning_rate": 2.6304e-05, |
| "loss": 0.8567, |
| "step": 54300 |
| }, |
| { |
| "epoch": 0.86912, |
| "grad_norm": 0.21841663122177124, |
| "learning_rate": 2.6240000000000003e-05, |
| "loss": 0.8804, |
| "step": 54320 |
| }, |
| { |
| "epoch": 0.86944, |
| "grad_norm": 0.20472241938114166, |
| "learning_rate": 2.6176e-05, |
| "loss": 0.8423, |
| "step": 54340 |
| }, |
| { |
| "epoch": 0.86976, |
| "grad_norm": 0.21747057139873505, |
| "learning_rate": 2.6112000000000003e-05, |
| "loss": 0.8352, |
| "step": 54360 |
| }, |
| { |
| "epoch": 0.87008, |
| "grad_norm": 0.26556268334388733, |
| "learning_rate": 2.6048e-05, |
| "loss": 0.8188, |
| "step": 54380 |
| }, |
| { |
| "epoch": 0.8704, |
| "grad_norm": 0.20089897513389587, |
| "learning_rate": 2.5984000000000004e-05, |
| "loss": 0.8916, |
| "step": 54400 |
| }, |
| { |
| "epoch": 0.87072, |
| "grad_norm": 0.24367238581180573, |
| "learning_rate": 2.592e-05, |
| "loss": 0.9158, |
| "step": 54420 |
| }, |
| { |
| "epoch": 0.87104, |
| "grad_norm": 0.24030858278274536, |
| "learning_rate": 2.5856e-05, |
| "loss": 0.9347, |
| "step": 54440 |
| }, |
| { |
| "epoch": 0.87136, |
| "grad_norm": 0.22043825685977936, |
| "learning_rate": 2.5792e-05, |
| "loss": 0.8796, |
| "step": 54460 |
| }, |
| { |
| "epoch": 0.87168, |
| "grad_norm": 0.243258535861969, |
| "learning_rate": 2.5728e-05, |
| "loss": 0.9063, |
| "step": 54480 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 0.2250966578722, |
| "learning_rate": 2.5664e-05, |
| "loss": 0.8686, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.87232, |
| "grad_norm": 0.24469095468521118, |
| "learning_rate": 2.5600000000000002e-05, |
| "loss": 0.8392, |
| "step": 54520 |
| }, |
| { |
| "epoch": 0.87264, |
| "grad_norm": 0.2351893037557602, |
| "learning_rate": 2.5535999999999997e-05, |
| "loss": 0.861, |
| "step": 54540 |
| }, |
| { |
| "epoch": 0.87296, |
| "grad_norm": 0.22012612223625183, |
| "learning_rate": 2.5472000000000003e-05, |
| "loss": 0.8925, |
| "step": 54560 |
| }, |
| { |
| "epoch": 0.87328, |
| "grad_norm": 0.22451180219650269, |
| "learning_rate": 2.5407999999999998e-05, |
| "loss": 0.8572, |
| "step": 54580 |
| }, |
| { |
| "epoch": 0.8736, |
| "grad_norm": 0.2381567507982254, |
| "learning_rate": 2.5344e-05, |
| "loss": 0.9039, |
| "step": 54600 |
| }, |
| { |
| "epoch": 0.87392, |
| "grad_norm": 0.2319832742214203, |
| "learning_rate": 2.5280000000000005e-05, |
| "loss": 0.898, |
| "step": 54620 |
| }, |
| { |
| "epoch": 0.87424, |
| "grad_norm": 0.24306103587150574, |
| "learning_rate": 2.5216e-05, |
| "loss": 0.8999, |
| "step": 54640 |
| }, |
| { |
| "epoch": 0.87456, |
| "grad_norm": 0.22463731467723846, |
| "learning_rate": 2.5152000000000002e-05, |
| "loss": 0.8828, |
| "step": 54660 |
| }, |
| { |
| "epoch": 0.87488, |
| "grad_norm": 0.2395946979522705, |
| "learning_rate": 2.5088e-05, |
| "loss": 0.898, |
| "step": 54680 |
| }, |
| { |
| "epoch": 0.8752, |
| "grad_norm": 0.22378131747245789, |
| "learning_rate": 2.5024000000000003e-05, |
| "loss": 0.8967, |
| "step": 54700 |
| }, |
| { |
| "epoch": 0.87552, |
| "grad_norm": 0.24939024448394775, |
| "learning_rate": 2.496e-05, |
| "loss": 0.8861, |
| "step": 54720 |
| }, |
| { |
| "epoch": 0.87584, |
| "grad_norm": 0.23700961470603943, |
| "learning_rate": 2.4896e-05, |
| "loss": 0.9274, |
| "step": 54740 |
| }, |
| { |
| "epoch": 0.87616, |
| "grad_norm": 0.2053232043981552, |
| "learning_rate": 2.4832000000000002e-05, |
| "loss": 0.8325, |
| "step": 54760 |
| }, |
| { |
| "epoch": 0.87648, |
| "grad_norm": 0.20047616958618164, |
| "learning_rate": 2.4768e-05, |
| "loss": 0.9201, |
| "step": 54780 |
| }, |
| { |
| "epoch": 0.8768, |
| "grad_norm": 0.30408886075019836, |
| "learning_rate": 2.4704000000000003e-05, |
| "loss": 0.8911, |
| "step": 54800 |
| }, |
| { |
| "epoch": 0.87712, |
| "grad_norm": 0.20753300189971924, |
| "learning_rate": 2.464e-05, |
| "loss": 0.8867, |
| "step": 54820 |
| }, |
| { |
| "epoch": 0.87744, |
| "grad_norm": 0.2093687355518341, |
| "learning_rate": 2.4576000000000003e-05, |
| "loss": 0.8762, |
| "step": 54840 |
| }, |
| { |
| "epoch": 0.87776, |
| "grad_norm": 0.226734921336174, |
| "learning_rate": 2.4512000000000002e-05, |
| "loss": 0.8893, |
| "step": 54860 |
| }, |
| { |
| "epoch": 0.87808, |
| "grad_norm": 0.21793030202388763, |
| "learning_rate": 2.4448e-05, |
| "loss": 0.8665, |
| "step": 54880 |
| }, |
| { |
| "epoch": 0.8784, |
| "grad_norm": 0.2046462595462799, |
| "learning_rate": 2.4384000000000002e-05, |
| "loss": 0.8999, |
| "step": 54900 |
| }, |
| { |
| "epoch": 0.87872, |
| "grad_norm": 0.2325984686613083, |
| "learning_rate": 2.432e-05, |
| "loss": 0.9291, |
| "step": 54920 |
| }, |
| { |
| "epoch": 0.87904, |
| "grad_norm": 0.20662260055541992, |
| "learning_rate": 2.4256e-05, |
| "loss": 0.8944, |
| "step": 54940 |
| }, |
| { |
| "epoch": 0.87936, |
| "grad_norm": 0.2617480158805847, |
| "learning_rate": 2.4192e-05, |
| "loss": 0.9573, |
| "step": 54960 |
| }, |
| { |
| "epoch": 0.87968, |
| "grad_norm": 0.18719272315502167, |
| "learning_rate": 2.4128e-05, |
| "loss": 0.8767, |
| "step": 54980 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.27199268341064453, |
| "learning_rate": 2.4064000000000002e-05, |
| "loss": 0.9066, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.88032, |
| "grad_norm": 0.21330726146697998, |
| "learning_rate": 2.4e-05, |
| "loss": 0.8594, |
| "step": 55020 |
| }, |
| { |
| "epoch": 0.88064, |
| "grad_norm": 0.1909005492925644, |
| "learning_rate": 2.3936e-05, |
| "loss": 0.8807, |
| "step": 55040 |
| }, |
| { |
| "epoch": 0.88096, |
| "grad_norm": 0.21157494187355042, |
| "learning_rate": 2.3872e-05, |
| "loss": 0.9019, |
| "step": 55060 |
| }, |
| { |
| "epoch": 0.88128, |
| "grad_norm": 0.19137850403785706, |
| "learning_rate": 2.3808000000000003e-05, |
| "loss": 0.8873, |
| "step": 55080 |
| }, |
| { |
| "epoch": 0.8816, |
| "grad_norm": 0.17469841241836548, |
| "learning_rate": 2.3744000000000002e-05, |
| "loss": 0.8225, |
| "step": 55100 |
| }, |
| { |
| "epoch": 0.88192, |
| "grad_norm": 0.21227526664733887, |
| "learning_rate": 2.3680000000000004e-05, |
| "loss": 0.9273, |
| "step": 55120 |
| }, |
| { |
| "epoch": 0.88224, |
| "grad_norm": 0.22155140340328217, |
| "learning_rate": 2.3616000000000002e-05, |
| "loss": 0.8716, |
| "step": 55140 |
| }, |
| { |
| "epoch": 0.88256, |
| "grad_norm": 0.2392839938402176, |
| "learning_rate": 2.3552e-05, |
| "loss": 0.8905, |
| "step": 55160 |
| }, |
| { |
| "epoch": 0.88288, |
| "grad_norm": 0.2480785995721817, |
| "learning_rate": 2.3488000000000003e-05, |
| "loss": 0.8906, |
| "step": 55180 |
| }, |
| { |
| "epoch": 0.8832, |
| "grad_norm": 0.23919089138507843, |
| "learning_rate": 2.3424e-05, |
| "loss": 0.8912, |
| "step": 55200 |
| }, |
| { |
| "epoch": 0.88352, |
| "grad_norm": 0.2384289801120758, |
| "learning_rate": 2.336e-05, |
| "loss": 0.8556, |
| "step": 55220 |
| }, |
| { |
| "epoch": 0.88384, |
| "grad_norm": 0.23314107954502106, |
| "learning_rate": 2.3296000000000002e-05, |
| "loss": 0.8638, |
| "step": 55240 |
| }, |
| { |
| "epoch": 0.88416, |
| "grad_norm": 0.20176903903484344, |
| "learning_rate": 2.3232e-05, |
| "loss": 0.8767, |
| "step": 55260 |
| }, |
| { |
| "epoch": 0.88448, |
| "grad_norm": 0.23449799418449402, |
| "learning_rate": 2.3168000000000002e-05, |
| "loss": 0.8811, |
| "step": 55280 |
| }, |
| { |
| "epoch": 0.8848, |
| "grad_norm": 0.2195407897233963, |
| "learning_rate": 2.3104e-05, |
| "loss": 0.8956, |
| "step": 55300 |
| }, |
| { |
| "epoch": 0.88512, |
| "grad_norm": 0.2260621041059494, |
| "learning_rate": 2.304e-05, |
| "loss": 0.8781, |
| "step": 55320 |
| }, |
| { |
| "epoch": 0.88544, |
| "grad_norm": 0.21167124807834625, |
| "learning_rate": 2.2976e-05, |
| "loss": 0.8251, |
| "step": 55340 |
| }, |
| { |
| "epoch": 0.88576, |
| "grad_norm": 0.219034343957901, |
| "learning_rate": 2.2912e-05, |
| "loss": 0.8413, |
| "step": 55360 |
| }, |
| { |
| "epoch": 0.88608, |
| "grad_norm": 0.24565385282039642, |
| "learning_rate": 2.2848e-05, |
| "loss": 0.9033, |
| "step": 55380 |
| }, |
| { |
| "epoch": 0.8864, |
| "grad_norm": 0.2161465734243393, |
| "learning_rate": 2.2784e-05, |
| "loss": 0.884, |
| "step": 55400 |
| }, |
| { |
| "epoch": 0.88672, |
| "grad_norm": 0.2424006164073944, |
| "learning_rate": 2.2720000000000003e-05, |
| "loss": 0.9424, |
| "step": 55420 |
| }, |
| { |
| "epoch": 0.88704, |
| "grad_norm": 0.2734192907810211, |
| "learning_rate": 2.2656e-05, |
| "loss": 0.9217, |
| "step": 55440 |
| }, |
| { |
| "epoch": 0.88736, |
| "grad_norm": 0.2107728272676468, |
| "learning_rate": 2.2592000000000003e-05, |
| "loss": 0.8789, |
| "step": 55460 |
| }, |
| { |
| "epoch": 0.88768, |
| "grad_norm": 0.2616425156593323, |
| "learning_rate": 2.2528000000000002e-05, |
| "loss": 0.918, |
| "step": 55480 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 0.23145994544029236, |
| "learning_rate": 2.2464e-05, |
| "loss": 0.8626, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.88832, |
| "grad_norm": 0.22918421030044556, |
| "learning_rate": 2.2400000000000002e-05, |
| "loss": 0.8656, |
| "step": 55520 |
| }, |
| { |
| "epoch": 0.88864, |
| "grad_norm": 0.2246193289756775, |
| "learning_rate": 2.2336e-05, |
| "loss": 0.8569, |
| "step": 55540 |
| }, |
| { |
| "epoch": 0.88896, |
| "grad_norm": 0.23850585520267487, |
| "learning_rate": 2.2272e-05, |
| "loss": 0.8612, |
| "step": 55560 |
| }, |
| { |
| "epoch": 0.88928, |
| "grad_norm": 0.21243111789226532, |
| "learning_rate": 2.2208e-05, |
| "loss": 0.8612, |
| "step": 55580 |
| }, |
| { |
| "epoch": 0.8896, |
| "grad_norm": 0.1799386590719223, |
| "learning_rate": 2.2144e-05, |
| "loss": 0.8601, |
| "step": 55600 |
| }, |
| { |
| "epoch": 0.88992, |
| "grad_norm": 0.20039919018745422, |
| "learning_rate": 2.2080000000000002e-05, |
| "loss": 0.8781, |
| "step": 55620 |
| }, |
| { |
| "epoch": 0.89024, |
| "grad_norm": 0.20583823323249817, |
| "learning_rate": 2.2016e-05, |
| "loss": 0.8552, |
| "step": 55640 |
| }, |
| { |
| "epoch": 0.89056, |
| "grad_norm": 0.25878387689590454, |
| "learning_rate": 2.1952e-05, |
| "loss": 0.8694, |
| "step": 55660 |
| }, |
| { |
| "epoch": 0.89088, |
| "grad_norm": 0.26383817195892334, |
| "learning_rate": 2.1888e-05, |
| "loss": 0.916, |
| "step": 55680 |
| }, |
| { |
| "epoch": 0.8912, |
| "grad_norm": 0.2270156741142273, |
| "learning_rate": 2.1824e-05, |
| "loss": 0.9219, |
| "step": 55700 |
| }, |
| { |
| "epoch": 0.89152, |
| "grad_norm": 0.22747768461704254, |
| "learning_rate": 2.176e-05, |
| "loss": 0.9066, |
| "step": 55720 |
| }, |
| { |
| "epoch": 0.89184, |
| "grad_norm": 0.2369069904088974, |
| "learning_rate": 2.1696e-05, |
| "loss": 0.9127, |
| "step": 55740 |
| }, |
| { |
| "epoch": 0.89216, |
| "grad_norm": 0.22584250569343567, |
| "learning_rate": 2.1632000000000002e-05, |
| "loss": 0.8571, |
| "step": 55760 |
| }, |
| { |
| "epoch": 0.89248, |
| "grad_norm": 0.2100561112165451, |
| "learning_rate": 2.1568e-05, |
| "loss": 0.8911, |
| "step": 55780 |
| }, |
| { |
| "epoch": 0.8928, |
| "grad_norm": 0.24614600837230682, |
| "learning_rate": 2.1504000000000003e-05, |
| "loss": 0.8826, |
| "step": 55800 |
| }, |
| { |
| "epoch": 0.89312, |
| "grad_norm": 0.25491416454315186, |
| "learning_rate": 2.144e-05, |
| "loss": 0.8619, |
| "step": 55820 |
| }, |
| { |
| "epoch": 0.89344, |
| "grad_norm": 0.22057655453681946, |
| "learning_rate": 2.1376e-05, |
| "loss": 0.8607, |
| "step": 55840 |
| }, |
| { |
| "epoch": 0.89376, |
| "grad_norm": 0.2257709503173828, |
| "learning_rate": 2.1312000000000002e-05, |
| "loss": 0.8979, |
| "step": 55860 |
| }, |
| { |
| "epoch": 0.89408, |
| "grad_norm": 0.20789384841918945, |
| "learning_rate": 2.1248e-05, |
| "loss": 0.8385, |
| "step": 55880 |
| }, |
| { |
| "epoch": 0.8944, |
| "grad_norm": 0.22959135472774506, |
| "learning_rate": 2.1184000000000002e-05, |
| "loss": 0.866, |
| "step": 55900 |
| }, |
| { |
| "epoch": 0.89472, |
| "grad_norm": 0.21270857751369476, |
| "learning_rate": 2.112e-05, |
| "loss": 0.8747, |
| "step": 55920 |
| }, |
| { |
| "epoch": 0.89504, |
| "grad_norm": 0.22662252187728882, |
| "learning_rate": 2.1056e-05, |
| "loss": 0.8836, |
| "step": 55940 |
| }, |
| { |
| "epoch": 0.89536, |
| "grad_norm": 0.22201156616210938, |
| "learning_rate": 2.0992e-05, |
| "loss": 0.924, |
| "step": 55960 |
| }, |
| { |
| "epoch": 0.89568, |
| "grad_norm": 0.21177421510219574, |
| "learning_rate": 2.0928e-05, |
| "loss": 0.8794, |
| "step": 55980 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.23896725475788116, |
| "learning_rate": 2.0864e-05, |
| "loss": 0.8967, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.89632, |
| "grad_norm": 0.21801073849201202, |
| "learning_rate": 2.08e-05, |
| "loss": 0.9033, |
| "step": 56020 |
| }, |
| { |
| "epoch": 0.89664, |
| "grad_norm": 0.19565339386463165, |
| "learning_rate": 2.0736e-05, |
| "loss": 0.8857, |
| "step": 56040 |
| }, |
| { |
| "epoch": 0.89696, |
| "grad_norm": 0.21768838167190552, |
| "learning_rate": 2.0672e-05, |
| "loss": 0.8463, |
| "step": 56060 |
| }, |
| { |
| "epoch": 0.89728, |
| "grad_norm": 0.20696651935577393, |
| "learning_rate": 2.0608000000000003e-05, |
| "loss": 0.8725, |
| "step": 56080 |
| }, |
| { |
| "epoch": 0.8976, |
| "grad_norm": 0.2298385500907898, |
| "learning_rate": 2.0544000000000002e-05, |
| "loss": 0.902, |
| "step": 56100 |
| }, |
| { |
| "epoch": 0.89792, |
| "grad_norm": 0.22712761163711548, |
| "learning_rate": 2.048e-05, |
| "loss": 0.8109, |
| "step": 56120 |
| }, |
| { |
| "epoch": 0.89824, |
| "grad_norm": 0.23746636509895325, |
| "learning_rate": 2.0416000000000002e-05, |
| "loss": 0.9398, |
| "step": 56140 |
| }, |
| { |
| "epoch": 0.89856, |
| "grad_norm": 0.2161879539489746, |
| "learning_rate": 2.0352e-05, |
| "loss": 0.9345, |
| "step": 56160 |
| }, |
| { |
| "epoch": 0.89888, |
| "grad_norm": 0.18641656637191772, |
| "learning_rate": 2.0288000000000003e-05, |
| "loss": 0.8868, |
| "step": 56180 |
| }, |
| { |
| "epoch": 0.8992, |
| "grad_norm": 0.21114569902420044, |
| "learning_rate": 2.0224e-05, |
| "loss": 0.8638, |
| "step": 56200 |
| }, |
| { |
| "epoch": 0.89952, |
| "grad_norm": 0.2475218027830124, |
| "learning_rate": 2.016e-05, |
| "loss": 0.8695, |
| "step": 56220 |
| }, |
| { |
| "epoch": 0.89984, |
| "grad_norm": 0.2249087691307068, |
| "learning_rate": 2.0096000000000002e-05, |
| "loss": 0.8671, |
| "step": 56240 |
| }, |
| { |
| "epoch": 0.90016, |
| "grad_norm": 0.23106272518634796, |
| "learning_rate": 2.0032e-05, |
| "loss": 0.8591, |
| "step": 56260 |
| }, |
| { |
| "epoch": 0.90048, |
| "grad_norm": 0.23417605459690094, |
| "learning_rate": 1.9968e-05, |
| "loss": 0.8675, |
| "step": 56280 |
| }, |
| { |
| "epoch": 0.9008, |
| "grad_norm": 0.25275781750679016, |
| "learning_rate": 1.9904e-05, |
| "loss": 0.9146, |
| "step": 56300 |
| }, |
| { |
| "epoch": 0.90112, |
| "grad_norm": 0.2217005491256714, |
| "learning_rate": 1.984e-05, |
| "loss": 0.9167, |
| "step": 56320 |
| }, |
| { |
| "epoch": 0.90144, |
| "grad_norm": 0.2137831449508667, |
| "learning_rate": 1.9776000000000002e-05, |
| "loss": 0.8699, |
| "step": 56340 |
| }, |
| { |
| "epoch": 0.90176, |
| "grad_norm": 0.2010769098997116, |
| "learning_rate": 1.9712e-05, |
| "loss": 0.8862, |
| "step": 56360 |
| }, |
| { |
| "epoch": 0.90208, |
| "grad_norm": 0.23932960629463196, |
| "learning_rate": 1.9648e-05, |
| "loss": 0.866, |
| "step": 56380 |
| }, |
| { |
| "epoch": 0.9024, |
| "grad_norm": 0.22078342735767365, |
| "learning_rate": 1.9584e-05, |
| "loss": 0.914, |
| "step": 56400 |
| }, |
| { |
| "epoch": 0.90272, |
| "grad_norm": 0.2117830365896225, |
| "learning_rate": 1.9520000000000003e-05, |
| "loss": 0.8706, |
| "step": 56420 |
| }, |
| { |
| "epoch": 0.90304, |
| "grad_norm": 0.22050690650939941, |
| "learning_rate": 1.9456e-05, |
| "loss": 0.8842, |
| "step": 56440 |
| }, |
| { |
| "epoch": 0.90336, |
| "grad_norm": 0.21533265709877014, |
| "learning_rate": 1.9392000000000003e-05, |
| "loss": 0.8478, |
| "step": 56460 |
| }, |
| { |
| "epoch": 0.90368, |
| "grad_norm": 0.23256616294384003, |
| "learning_rate": 1.9328000000000002e-05, |
| "loss": 0.8912, |
| "step": 56480 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 0.24610291421413422, |
| "learning_rate": 1.9264e-05, |
| "loss": 0.8493, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.90432, |
| "grad_norm": 0.2145715057849884, |
| "learning_rate": 1.9200000000000003e-05, |
| "loss": 0.8626, |
| "step": 56520 |
| }, |
| { |
| "epoch": 0.90464, |
| "grad_norm": 0.21198943257331848, |
| "learning_rate": 1.91392e-05, |
| "loss": 0.9013, |
| "step": 56540 |
| }, |
| { |
| "epoch": 0.90496, |
| "grad_norm": 0.2638920843601227, |
| "learning_rate": 1.90752e-05, |
| "loss": 0.9176, |
| "step": 56560 |
| }, |
| { |
| "epoch": 0.90528, |
| "grad_norm": 0.17651067674160004, |
| "learning_rate": 1.9011200000000002e-05, |
| "loss": 0.9293, |
| "step": 56580 |
| }, |
| { |
| "epoch": 0.9056, |
| "grad_norm": 0.20234732329845428, |
| "learning_rate": 1.89472e-05, |
| "loss": 0.8781, |
| "step": 56600 |
| }, |
| { |
| "epoch": 0.90592, |
| "grad_norm": 0.2263152003288269, |
| "learning_rate": 1.8883200000000002e-05, |
| "loss": 0.9071, |
| "step": 56620 |
| }, |
| { |
| "epoch": 0.90624, |
| "grad_norm": 0.2451239675283432, |
| "learning_rate": 1.88192e-05, |
| "loss": 0.9174, |
| "step": 56640 |
| }, |
| { |
| "epoch": 0.90656, |
| "grad_norm": 0.22056809067726135, |
| "learning_rate": 1.87552e-05, |
| "loss": 0.9132, |
| "step": 56660 |
| }, |
| { |
| "epoch": 0.90688, |
| "grad_norm": 0.2163904458284378, |
| "learning_rate": 1.86912e-05, |
| "loss": 0.9208, |
| "step": 56680 |
| }, |
| { |
| "epoch": 0.9072, |
| "grad_norm": 0.23965485394001007, |
| "learning_rate": 1.86272e-05, |
| "loss": 0.8778, |
| "step": 56700 |
| }, |
| { |
| "epoch": 0.90752, |
| "grad_norm": 0.2319948375225067, |
| "learning_rate": 1.85632e-05, |
| "loss": 0.8673, |
| "step": 56720 |
| }, |
| { |
| "epoch": 0.90784, |
| "grad_norm": 0.17709515988826752, |
| "learning_rate": 1.84992e-05, |
| "loss": 0.8635, |
| "step": 56740 |
| }, |
| { |
| "epoch": 0.90816, |
| "grad_norm": 0.22696132957935333, |
| "learning_rate": 1.84352e-05, |
| "loss": 0.8179, |
| "step": 56760 |
| }, |
| { |
| "epoch": 0.90848, |
| "grad_norm": 0.25801581144332886, |
| "learning_rate": 1.8371199999999998e-05, |
| "loss": 0.9357, |
| "step": 56780 |
| }, |
| { |
| "epoch": 0.9088, |
| "grad_norm": 0.20928123593330383, |
| "learning_rate": 1.8307200000000003e-05, |
| "loss": 0.8228, |
| "step": 56800 |
| }, |
| { |
| "epoch": 0.90912, |
| "grad_norm": 0.18093565106391907, |
| "learning_rate": 1.8243200000000002e-05, |
| "loss": 0.8613, |
| "step": 56820 |
| }, |
| { |
| "epoch": 0.90944, |
| "grad_norm": 0.21541033685207367, |
| "learning_rate": 1.81792e-05, |
| "loss": 0.8624, |
| "step": 56840 |
| }, |
| { |
| "epoch": 0.90976, |
| "grad_norm": 0.1991436630487442, |
| "learning_rate": 1.8115200000000002e-05, |
| "loss": 0.8876, |
| "step": 56860 |
| }, |
| { |
| "epoch": 0.91008, |
| "grad_norm": 0.2240942418575287, |
| "learning_rate": 1.80512e-05, |
| "loss": 0.9089, |
| "step": 56880 |
| }, |
| { |
| "epoch": 0.9104, |
| "grad_norm": 0.2239503711462021, |
| "learning_rate": 1.7987200000000003e-05, |
| "loss": 0.8968, |
| "step": 56900 |
| }, |
| { |
| "epoch": 0.91072, |
| "grad_norm": 0.20320284366607666, |
| "learning_rate": 1.79232e-05, |
| "loss": 0.8932, |
| "step": 56920 |
| }, |
| { |
| "epoch": 0.91104, |
| "grad_norm": 0.2082601636648178, |
| "learning_rate": 1.78592e-05, |
| "loss": 0.8576, |
| "step": 56940 |
| }, |
| { |
| "epoch": 0.91136, |
| "grad_norm": 0.24213466048240662, |
| "learning_rate": 1.7795200000000002e-05, |
| "loss": 0.9186, |
| "step": 56960 |
| }, |
| { |
| "epoch": 0.91168, |
| "grad_norm": 0.21737167239189148, |
| "learning_rate": 1.77312e-05, |
| "loss": 0.8754, |
| "step": 56980 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.22130608558654785, |
| "learning_rate": 1.76672e-05, |
| "loss": 0.8952, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.91232, |
| "grad_norm": 0.24348904192447662, |
| "learning_rate": 1.76032e-05, |
| "loss": 0.8855, |
| "step": 57020 |
| }, |
| { |
| "epoch": 0.91264, |
| "grad_norm": 0.2024615854024887, |
| "learning_rate": 1.75392e-05, |
| "loss": 0.8279, |
| "step": 57040 |
| }, |
| { |
| "epoch": 0.91296, |
| "grad_norm": 0.23168401420116425, |
| "learning_rate": 1.7475199999999998e-05, |
| "loss": 0.833, |
| "step": 57060 |
| }, |
| { |
| "epoch": 0.91328, |
| "grad_norm": 0.2201065719127655, |
| "learning_rate": 1.74112e-05, |
| "loss": 0.9255, |
| "step": 57080 |
| }, |
| { |
| "epoch": 0.9136, |
| "grad_norm": 0.22319328784942627, |
| "learning_rate": 1.73472e-05, |
| "loss": 0.8326, |
| "step": 57100 |
| }, |
| { |
| "epoch": 0.91392, |
| "grad_norm": 0.1963931918144226, |
| "learning_rate": 1.72832e-05, |
| "loss": 0.8416, |
| "step": 57120 |
| }, |
| { |
| "epoch": 0.91424, |
| "grad_norm": 0.23881016671657562, |
| "learning_rate": 1.7219200000000003e-05, |
| "loss": 0.8786, |
| "step": 57140 |
| }, |
| { |
| "epoch": 0.91456, |
| "grad_norm": 0.20957258343696594, |
| "learning_rate": 1.71552e-05, |
| "loss": 0.8972, |
| "step": 57160 |
| }, |
| { |
| "epoch": 0.91488, |
| "grad_norm": 0.25064921379089355, |
| "learning_rate": 1.7091200000000003e-05, |
| "loss": 0.8594, |
| "step": 57180 |
| }, |
| { |
| "epoch": 0.9152, |
| "grad_norm": 0.22620342671871185, |
| "learning_rate": 1.7027200000000002e-05, |
| "loss": 0.855, |
| "step": 57200 |
| }, |
| { |
| "epoch": 0.91552, |
| "grad_norm": 0.22465457022190094, |
| "learning_rate": 1.69632e-05, |
| "loss": 0.8507, |
| "step": 57220 |
| }, |
| { |
| "epoch": 0.91584, |
| "grad_norm": 0.33410021662712097, |
| "learning_rate": 1.6899200000000002e-05, |
| "loss": 0.8608, |
| "step": 57240 |
| }, |
| { |
| "epoch": 0.91616, |
| "grad_norm": 0.20130544900894165, |
| "learning_rate": 1.68352e-05, |
| "loss": 0.8672, |
| "step": 57260 |
| }, |
| { |
| "epoch": 0.91648, |
| "grad_norm": 0.2350778877735138, |
| "learning_rate": 1.67712e-05, |
| "loss": 0.8942, |
| "step": 57280 |
| }, |
| { |
| "epoch": 0.9168, |
| "grad_norm": 0.21875150501728058, |
| "learning_rate": 1.67072e-05, |
| "loss": 0.9244, |
| "step": 57300 |
| }, |
| { |
| "epoch": 0.91712, |
| "grad_norm": 0.18828479945659637, |
| "learning_rate": 1.66432e-05, |
| "loss": 0.9132, |
| "step": 57320 |
| }, |
| { |
| "epoch": 0.91744, |
| "grad_norm": 0.19180278480052948, |
| "learning_rate": 1.65792e-05, |
| "loss": 0.8833, |
| "step": 57340 |
| }, |
| { |
| "epoch": 0.91776, |
| "grad_norm": 0.2215159833431244, |
| "learning_rate": 1.65152e-05, |
| "loss": 0.909, |
| "step": 57360 |
| }, |
| { |
| "epoch": 0.91808, |
| "grad_norm": 0.19516055285930634, |
| "learning_rate": 1.64512e-05, |
| "loss": 0.801, |
| "step": 57380 |
| }, |
| { |
| "epoch": 0.9184, |
| "grad_norm": 0.19184818863868713, |
| "learning_rate": 1.63872e-05, |
| "loss": 0.8942, |
| "step": 57400 |
| }, |
| { |
| "epoch": 0.91872, |
| "grad_norm": 0.21622811257839203, |
| "learning_rate": 1.63232e-05, |
| "loss": 0.8772, |
| "step": 57420 |
| }, |
| { |
| "epoch": 0.91904, |
| "grad_norm": 0.19087673723697662, |
| "learning_rate": 1.62592e-05, |
| "loss": 0.8736, |
| "step": 57440 |
| }, |
| { |
| "epoch": 0.91936, |
| "grad_norm": 0.21483179926872253, |
| "learning_rate": 1.61952e-05, |
| "loss": 0.8696, |
| "step": 57460 |
| }, |
| { |
| "epoch": 0.91968, |
| "grad_norm": 0.18231695890426636, |
| "learning_rate": 1.6131200000000002e-05, |
| "loss": 0.8429, |
| "step": 57480 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.24777543544769287, |
| "learning_rate": 1.60672e-05, |
| "loss": 0.9207, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.92032, |
| "grad_norm": 0.21233485639095306, |
| "learning_rate": 1.6003200000000003e-05, |
| "loss": 0.9358, |
| "step": 57520 |
| }, |
| { |
| "epoch": 0.92064, |
| "grad_norm": 0.257861465215683, |
| "learning_rate": 1.59392e-05, |
| "loss": 0.9123, |
| "step": 57540 |
| }, |
| { |
| "epoch": 0.92096, |
| "grad_norm": 0.18959666788578033, |
| "learning_rate": 1.58752e-05, |
| "loss": 0.8834, |
| "step": 57560 |
| }, |
| { |
| "epoch": 0.92128, |
| "grad_norm": 0.22325819730758667, |
| "learning_rate": 1.5811200000000002e-05, |
| "loss": 0.8358, |
| "step": 57580 |
| }, |
| { |
| "epoch": 0.9216, |
| "grad_norm": 0.25303423404693604, |
| "learning_rate": 1.57472e-05, |
| "loss": 0.8783, |
| "step": 57600 |
| }, |
| { |
| "epoch": 0.92192, |
| "grad_norm": 0.18842332065105438, |
| "learning_rate": 1.56832e-05, |
| "loss": 0.8752, |
| "step": 57620 |
| }, |
| { |
| "epoch": 0.92224, |
| "grad_norm": 0.20843419432640076, |
| "learning_rate": 1.56192e-05, |
| "loss": 0.8911, |
| "step": 57640 |
| }, |
| { |
| "epoch": 0.92256, |
| "grad_norm": 0.2596145570278168, |
| "learning_rate": 1.55552e-05, |
| "loss": 0.8641, |
| "step": 57660 |
| }, |
| { |
| "epoch": 0.92288, |
| "grad_norm": 0.22827741503715515, |
| "learning_rate": 1.5491200000000002e-05, |
| "loss": 0.8859, |
| "step": 57680 |
| }, |
| { |
| "epoch": 0.9232, |
| "grad_norm": 0.21393606066703796, |
| "learning_rate": 1.54272e-05, |
| "loss": 0.9042, |
| "step": 57700 |
| }, |
| { |
| "epoch": 0.92352, |
| "grad_norm": 0.1834941953420639, |
| "learning_rate": 1.53632e-05, |
| "loss": 0.8484, |
| "step": 57720 |
| }, |
| { |
| "epoch": 0.92384, |
| "grad_norm": 0.21815276145935059, |
| "learning_rate": 1.52992e-05, |
| "loss": 0.9284, |
| "step": 57740 |
| }, |
| { |
| "epoch": 0.92416, |
| "grad_norm": 0.20257651805877686, |
| "learning_rate": 1.52352e-05, |
| "loss": 0.8286, |
| "step": 57760 |
| }, |
| { |
| "epoch": 0.92448, |
| "grad_norm": 0.258215993642807, |
| "learning_rate": 1.5171200000000001e-05, |
| "loss": 0.9197, |
| "step": 57780 |
| }, |
| { |
| "epoch": 0.9248, |
| "grad_norm": 0.21582092344760895, |
| "learning_rate": 1.5107200000000002e-05, |
| "loss": 0.8859, |
| "step": 57800 |
| }, |
| { |
| "epoch": 0.92512, |
| "grad_norm": 0.2228287011384964, |
| "learning_rate": 1.5043200000000002e-05, |
| "loss": 0.8918, |
| "step": 57820 |
| }, |
| { |
| "epoch": 0.92544, |
| "grad_norm": 0.22294846177101135, |
| "learning_rate": 1.4979200000000002e-05, |
| "loss": 0.9276, |
| "step": 57840 |
| }, |
| { |
| "epoch": 0.92576, |
| "grad_norm": 0.19540980458259583, |
| "learning_rate": 1.49152e-05, |
| "loss": 0.8801, |
| "step": 57860 |
| }, |
| { |
| "epoch": 0.92608, |
| "grad_norm": 0.19074346125125885, |
| "learning_rate": 1.4851200000000001e-05, |
| "loss": 0.8475, |
| "step": 57880 |
| }, |
| { |
| "epoch": 0.9264, |
| "grad_norm": 0.19179295003414154, |
| "learning_rate": 1.4787200000000001e-05, |
| "loss": 0.9089, |
| "step": 57900 |
| }, |
| { |
| "epoch": 0.92672, |
| "grad_norm": 0.20641054213047028, |
| "learning_rate": 1.4723200000000002e-05, |
| "loss": 0.8313, |
| "step": 57920 |
| }, |
| { |
| "epoch": 0.92704, |
| "grad_norm": 0.19102010130882263, |
| "learning_rate": 1.46592e-05, |
| "loss": 0.8915, |
| "step": 57940 |
| }, |
| { |
| "epoch": 0.92736, |
| "grad_norm": 0.1892133504152298, |
| "learning_rate": 1.45952e-05, |
| "loss": 0.8708, |
| "step": 57960 |
| }, |
| { |
| "epoch": 0.92768, |
| "grad_norm": 0.21764519810676575, |
| "learning_rate": 1.45312e-05, |
| "loss": 0.9107, |
| "step": 57980 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.2371160387992859, |
| "learning_rate": 1.4467200000000001e-05, |
| "loss": 0.9397, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.92832, |
| "grad_norm": 0.2032940536737442, |
| "learning_rate": 1.44032e-05, |
| "loss": 0.8406, |
| "step": 58020 |
| }, |
| { |
| "epoch": 0.92864, |
| "grad_norm": 0.20223209261894226, |
| "learning_rate": 1.43392e-05, |
| "loss": 0.8387, |
| "step": 58040 |
| }, |
| { |
| "epoch": 0.92896, |
| "grad_norm": 0.19026170670986176, |
| "learning_rate": 1.42752e-05, |
| "loss": 0.8826, |
| "step": 58060 |
| }, |
| { |
| "epoch": 0.92928, |
| "grad_norm": 0.23420760035514832, |
| "learning_rate": 1.4211199999999999e-05, |
| "loss": 0.8779, |
| "step": 58080 |
| }, |
| { |
| "epoch": 0.9296, |
| "grad_norm": 0.19909054040908813, |
| "learning_rate": 1.4147199999999999e-05, |
| "loss": 0.8931, |
| "step": 58100 |
| }, |
| { |
| "epoch": 0.92992, |
| "grad_norm": 0.23734207451343536, |
| "learning_rate": 1.4083200000000003e-05, |
| "loss": 0.8522, |
| "step": 58120 |
| }, |
| { |
| "epoch": 0.93024, |
| "grad_norm": 0.1749676764011383, |
| "learning_rate": 1.4019200000000001e-05, |
| "loss": 0.8714, |
| "step": 58140 |
| }, |
| { |
| "epoch": 0.93056, |
| "grad_norm": 0.21267655491828918, |
| "learning_rate": 1.3955200000000002e-05, |
| "loss": 0.8442, |
| "step": 58160 |
| }, |
| { |
| "epoch": 0.93088, |
| "grad_norm": 0.25414636731147766, |
| "learning_rate": 1.3891200000000002e-05, |
| "loss": 0.9268, |
| "step": 58180 |
| }, |
| { |
| "epoch": 0.9312, |
| "grad_norm": 0.20942141115665436, |
| "learning_rate": 1.3827200000000002e-05, |
| "loss": 0.8509, |
| "step": 58200 |
| }, |
| { |
| "epoch": 0.93152, |
| "grad_norm": 0.2121083289384842, |
| "learning_rate": 1.37632e-05, |
| "loss": 0.8751, |
| "step": 58220 |
| }, |
| { |
| "epoch": 0.93184, |
| "grad_norm": 0.21758875250816345, |
| "learning_rate": 1.3699200000000001e-05, |
| "loss": 0.8875, |
| "step": 58240 |
| }, |
| { |
| "epoch": 0.93216, |
| "grad_norm": 0.188105046749115, |
| "learning_rate": 1.3635200000000001e-05, |
| "loss": 0.8876, |
| "step": 58260 |
| }, |
| { |
| "epoch": 0.93248, |
| "grad_norm": 0.18917614221572876, |
| "learning_rate": 1.35712e-05, |
| "loss": 0.8453, |
| "step": 58280 |
| }, |
| { |
| "epoch": 0.9328, |
| "grad_norm": 0.19364288449287415, |
| "learning_rate": 1.35072e-05, |
| "loss": 0.829, |
| "step": 58300 |
| }, |
| { |
| "epoch": 0.93312, |
| "grad_norm": 0.19416366517543793, |
| "learning_rate": 1.34432e-05, |
| "loss": 0.8625, |
| "step": 58320 |
| }, |
| { |
| "epoch": 0.93344, |
| "grad_norm": 0.2294871211051941, |
| "learning_rate": 1.33792e-05, |
| "loss": 0.8696, |
| "step": 58340 |
| }, |
| { |
| "epoch": 0.93376, |
| "grad_norm": 0.21654824912548065, |
| "learning_rate": 1.33152e-05, |
| "loss": 0.8733, |
| "step": 58360 |
| }, |
| { |
| "epoch": 0.93408, |
| "grad_norm": 0.2053345888853073, |
| "learning_rate": 1.32512e-05, |
| "loss": 0.9046, |
| "step": 58380 |
| }, |
| { |
| "epoch": 0.9344, |
| "grad_norm": 0.18881118297576904, |
| "learning_rate": 1.31872e-05, |
| "loss": 0.8252, |
| "step": 58400 |
| }, |
| { |
| "epoch": 0.93472, |
| "grad_norm": 0.23716068267822266, |
| "learning_rate": 1.31232e-05, |
| "loss": 0.8928, |
| "step": 58420 |
| }, |
| { |
| "epoch": 0.93504, |
| "grad_norm": 0.19801940023899078, |
| "learning_rate": 1.3059200000000002e-05, |
| "loss": 0.8692, |
| "step": 58440 |
| }, |
| { |
| "epoch": 0.93536, |
| "grad_norm": 0.2585828900337219, |
| "learning_rate": 1.2995200000000002e-05, |
| "loss": 0.9274, |
| "step": 58460 |
| }, |
| { |
| "epoch": 0.93568, |
| "grad_norm": 0.31189459562301636, |
| "learning_rate": 1.29312e-05, |
| "loss": 0.8347, |
| "step": 58480 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 0.24190790951251984, |
| "learning_rate": 1.2867200000000001e-05, |
| "loss": 0.8476, |
| "step": 58500 |
| }, |
| { |
| "epoch": 0.93632, |
| "grad_norm": 0.2098657637834549, |
| "learning_rate": 1.2803200000000001e-05, |
| "loss": 0.8735, |
| "step": 58520 |
| }, |
| { |
| "epoch": 0.93664, |
| "grad_norm": 0.2108910232782364, |
| "learning_rate": 1.2739200000000002e-05, |
| "loss": 0.874, |
| "step": 58540 |
| }, |
| { |
| "epoch": 0.93696, |
| "grad_norm": 0.2129533439874649, |
| "learning_rate": 1.26752e-05, |
| "loss": 0.8792, |
| "step": 58560 |
| }, |
| { |
| "epoch": 0.93728, |
| "grad_norm": 0.20992882549762726, |
| "learning_rate": 1.26112e-05, |
| "loss": 0.9094, |
| "step": 58580 |
| }, |
| { |
| "epoch": 0.9376, |
| "grad_norm": 0.24041472375392914, |
| "learning_rate": 1.25472e-05, |
| "loss": 0.927, |
| "step": 58600 |
| }, |
| { |
| "epoch": 0.93792, |
| "grad_norm": 0.20306578278541565, |
| "learning_rate": 1.2483200000000001e-05, |
| "loss": 0.8776, |
| "step": 58620 |
| }, |
| { |
| "epoch": 0.93824, |
| "grad_norm": 0.22501681745052338, |
| "learning_rate": 1.2422400000000002e-05, |
| "loss": 0.9315, |
| "step": 58640 |
| }, |
| { |
| "epoch": 0.93856, |
| "grad_norm": 0.23260138928890228, |
| "learning_rate": 1.23584e-05, |
| "loss": 0.8216, |
| "step": 58660 |
| }, |
| { |
| "epoch": 0.93888, |
| "grad_norm": 0.18364718556404114, |
| "learning_rate": 1.22944e-05, |
| "loss": 0.8805, |
| "step": 58680 |
| }, |
| { |
| "epoch": 0.9392, |
| "grad_norm": 0.23733025789260864, |
| "learning_rate": 1.22304e-05, |
| "loss": 0.8966, |
| "step": 58700 |
| }, |
| { |
| "epoch": 0.93952, |
| "grad_norm": 0.19902455806732178, |
| "learning_rate": 1.21664e-05, |
| "loss": 0.9097, |
| "step": 58720 |
| }, |
| { |
| "epoch": 0.93984, |
| "grad_norm": 0.25381171703338623, |
| "learning_rate": 1.2102400000000001e-05, |
| "loss": 0.9078, |
| "step": 58740 |
| }, |
| { |
| "epoch": 0.94016, |
| "grad_norm": 0.22020556032657623, |
| "learning_rate": 1.2038400000000001e-05, |
| "loss": 0.8428, |
| "step": 58760 |
| }, |
| { |
| "epoch": 0.94048, |
| "grad_norm": 0.21431581676006317, |
| "learning_rate": 1.19744e-05, |
| "loss": 0.8228, |
| "step": 58780 |
| }, |
| { |
| "epoch": 0.9408, |
| "grad_norm": 0.24102531373500824, |
| "learning_rate": 1.19104e-05, |
| "loss": 0.886, |
| "step": 58800 |
| }, |
| { |
| "epoch": 0.94112, |
| "grad_norm": 0.20612405240535736, |
| "learning_rate": 1.18464e-05, |
| "loss": 0.8438, |
| "step": 58820 |
| }, |
| { |
| "epoch": 0.94144, |
| "grad_norm": 0.2482701539993286, |
| "learning_rate": 1.1782400000000001e-05, |
| "loss": 0.9063, |
| "step": 58840 |
| }, |
| { |
| "epoch": 0.94176, |
| "grad_norm": 0.2273882031440735, |
| "learning_rate": 1.17184e-05, |
| "loss": 0.8267, |
| "step": 58860 |
| }, |
| { |
| "epoch": 0.94208, |
| "grad_norm": 0.19269466400146484, |
| "learning_rate": 1.16544e-05, |
| "loss": 0.8889, |
| "step": 58880 |
| }, |
| { |
| "epoch": 0.9424, |
| "grad_norm": 0.24158763885498047, |
| "learning_rate": 1.15904e-05, |
| "loss": 0.8983, |
| "step": 58900 |
| }, |
| { |
| "epoch": 0.94272, |
| "grad_norm": 0.18431192636489868, |
| "learning_rate": 1.15264e-05, |
| "loss": 0.8202, |
| "step": 58920 |
| }, |
| { |
| "epoch": 0.94304, |
| "grad_norm": 0.2194778025150299, |
| "learning_rate": 1.14624e-05, |
| "loss": 0.8591, |
| "step": 58940 |
| }, |
| { |
| "epoch": 0.94336, |
| "grad_norm": 0.2087930589914322, |
| "learning_rate": 1.13984e-05, |
| "loss": 0.882, |
| "step": 58960 |
| }, |
| { |
| "epoch": 0.94368, |
| "grad_norm": 0.20091105997562408, |
| "learning_rate": 1.1334400000000001e-05, |
| "loss": 0.8729, |
| "step": 58980 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.22069287300109863, |
| "learning_rate": 1.12704e-05, |
| "loss": 0.8489, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.94432, |
| "grad_norm": 0.19174374639987946, |
| "learning_rate": 1.12064e-05, |
| "loss": 0.8284, |
| "step": 59020 |
| }, |
| { |
| "epoch": 0.94464, |
| "grad_norm": 0.20969411730766296, |
| "learning_rate": 1.11424e-05, |
| "loss": 0.8967, |
| "step": 59040 |
| }, |
| { |
| "epoch": 0.94496, |
| "grad_norm": 0.21621152758598328, |
| "learning_rate": 1.10784e-05, |
| "loss": 0.8646, |
| "step": 59060 |
| }, |
| { |
| "epoch": 0.94528, |
| "grad_norm": 0.2245556265115738, |
| "learning_rate": 1.10144e-05, |
| "loss": 0.961, |
| "step": 59080 |
| }, |
| { |
| "epoch": 0.9456, |
| "grad_norm": 0.2487545758485794, |
| "learning_rate": 1.0950400000000001e-05, |
| "loss": 0.9026, |
| "step": 59100 |
| }, |
| { |
| "epoch": 0.94592, |
| "grad_norm": 0.2444257289171219, |
| "learning_rate": 1.0886400000000001e-05, |
| "loss": 0.861, |
| "step": 59120 |
| }, |
| { |
| "epoch": 0.94624, |
| "grad_norm": 0.19312791526317596, |
| "learning_rate": 1.08224e-05, |
| "loss": 0.8604, |
| "step": 59140 |
| }, |
| { |
| "epoch": 0.94656, |
| "grad_norm": 0.20798088610172272, |
| "learning_rate": 1.07584e-05, |
| "loss": 0.8886, |
| "step": 59160 |
| }, |
| { |
| "epoch": 0.94688, |
| "grad_norm": 0.22413522005081177, |
| "learning_rate": 1.06944e-05, |
| "loss": 0.8656, |
| "step": 59180 |
| }, |
| { |
| "epoch": 0.9472, |
| "grad_norm": 0.19229187071323395, |
| "learning_rate": 1.06304e-05, |
| "loss": 0.8742, |
| "step": 59200 |
| }, |
| { |
| "epoch": 0.94752, |
| "grad_norm": 0.21749311685562134, |
| "learning_rate": 1.05664e-05, |
| "loss": 0.9038, |
| "step": 59220 |
| }, |
| { |
| "epoch": 0.94784, |
| "grad_norm": 0.21696510910987854, |
| "learning_rate": 1.0502400000000001e-05, |
| "loss": 0.8729, |
| "step": 59240 |
| }, |
| { |
| "epoch": 0.94816, |
| "grad_norm": 0.21962523460388184, |
| "learning_rate": 1.0438400000000002e-05, |
| "loss": 0.9075, |
| "step": 59260 |
| }, |
| { |
| "epoch": 0.94848, |
| "grad_norm": 0.2188694030046463, |
| "learning_rate": 1.03744e-05, |
| "loss": 0.8401, |
| "step": 59280 |
| }, |
| { |
| "epoch": 0.9488, |
| "grad_norm": 0.17384332418441772, |
| "learning_rate": 1.03104e-05, |
| "loss": 0.8886, |
| "step": 59300 |
| }, |
| { |
| "epoch": 0.94912, |
| "grad_norm": 0.18422313034534454, |
| "learning_rate": 1.02464e-05, |
| "loss": 0.9251, |
| "step": 59320 |
| }, |
| { |
| "epoch": 0.94944, |
| "grad_norm": 0.2241748720407486, |
| "learning_rate": 1.0182400000000001e-05, |
| "loss": 0.8966, |
| "step": 59340 |
| }, |
| { |
| "epoch": 0.94976, |
| "grad_norm": 0.2653813660144806, |
| "learning_rate": 1.01184e-05, |
| "loss": 0.853, |
| "step": 59360 |
| }, |
| { |
| "epoch": 0.95008, |
| "grad_norm": 0.2018147110939026, |
| "learning_rate": 1.00544e-05, |
| "loss": 0.8624, |
| "step": 59380 |
| }, |
| { |
| "epoch": 0.9504, |
| "grad_norm": 0.23722539842128754, |
| "learning_rate": 9.990400000000002e-06, |
| "loss": 0.943, |
| "step": 59400 |
| }, |
| { |
| "epoch": 0.95072, |
| "grad_norm": 0.2132965326309204, |
| "learning_rate": 9.9264e-06, |
| "loss": 0.8511, |
| "step": 59420 |
| }, |
| { |
| "epoch": 0.95104, |
| "grad_norm": 0.19337671995162964, |
| "learning_rate": 9.8624e-06, |
| "loss": 0.8419, |
| "step": 59440 |
| }, |
| { |
| "epoch": 0.95136, |
| "grad_norm": 0.1993594765663147, |
| "learning_rate": 9.798400000000001e-06, |
| "loss": 0.8809, |
| "step": 59460 |
| }, |
| { |
| "epoch": 0.95168, |
| "grad_norm": 0.20051348209381104, |
| "learning_rate": 9.734400000000001e-06, |
| "loss": 0.8246, |
| "step": 59480 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 0.2141229212284088, |
| "learning_rate": 9.6704e-06, |
| "loss": 0.8575, |
| "step": 59500 |
| }, |
| { |
| "epoch": 0.95232, |
| "grad_norm": 0.20707592368125916, |
| "learning_rate": 9.6096e-06, |
| "loss": 0.8953, |
| "step": 59520 |
| }, |
| { |
| "epoch": 0.95264, |
| "grad_norm": 0.2202858179807663, |
| "learning_rate": 9.5456e-06, |
| "loss": 0.893, |
| "step": 59540 |
| }, |
| { |
| "epoch": 0.95296, |
| "grad_norm": 0.22861933708190918, |
| "learning_rate": 9.4816e-06, |
| "loss": 0.8534, |
| "step": 59560 |
| }, |
| { |
| "epoch": 0.95328, |
| "grad_norm": 0.2285875827074051, |
| "learning_rate": 9.4176e-06, |
| "loss": 0.8686, |
| "step": 59580 |
| }, |
| { |
| "epoch": 0.9536, |
| "grad_norm": 0.19898247718811035, |
| "learning_rate": 9.3536e-06, |
| "loss": 0.8573, |
| "step": 59600 |
| }, |
| { |
| "epoch": 0.95392, |
| "grad_norm": 0.19063091278076172, |
| "learning_rate": 9.289600000000002e-06, |
| "loss": 0.8889, |
| "step": 59620 |
| }, |
| { |
| "epoch": 0.95424, |
| "grad_norm": 0.2302456945180893, |
| "learning_rate": 9.2256e-06, |
| "loss": 0.8927, |
| "step": 59640 |
| }, |
| { |
| "epoch": 0.95456, |
| "grad_norm": 0.23316220939159393, |
| "learning_rate": 9.1616e-06, |
| "loss": 0.8461, |
| "step": 59660 |
| }, |
| { |
| "epoch": 0.95488, |
| "grad_norm": 0.21499593555927277, |
| "learning_rate": 9.0976e-06, |
| "loss": 0.847, |
| "step": 59680 |
| }, |
| { |
| "epoch": 0.9552, |
| "grad_norm": 0.2145620733499527, |
| "learning_rate": 9.033600000000001e-06, |
| "loss": 0.9328, |
| "step": 59700 |
| }, |
| { |
| "epoch": 0.95552, |
| "grad_norm": 0.22874926030635834, |
| "learning_rate": 8.9696e-06, |
| "loss": 0.8883, |
| "step": 59720 |
| }, |
| { |
| "epoch": 0.95584, |
| "grad_norm": 0.2054702192544937, |
| "learning_rate": 8.9056e-06, |
| "loss": 0.9, |
| "step": 59740 |
| }, |
| { |
| "epoch": 0.95616, |
| "grad_norm": 0.24557848274707794, |
| "learning_rate": 8.8416e-06, |
| "loss": 0.8828, |
| "step": 59760 |
| }, |
| { |
| "epoch": 0.95648, |
| "grad_norm": 0.21342433989048004, |
| "learning_rate": 8.7776e-06, |
| "loss": 0.8664, |
| "step": 59780 |
| }, |
| { |
| "epoch": 0.9568, |
| "grad_norm": 0.21812336146831512, |
| "learning_rate": 8.7136e-06, |
| "loss": 0.8613, |
| "step": 59800 |
| }, |
| { |
| "epoch": 0.95712, |
| "grad_norm": 0.21000798046588898, |
| "learning_rate": 8.649600000000001e-06, |
| "loss": 0.8553, |
| "step": 59820 |
| }, |
| { |
| "epoch": 0.95744, |
| "grad_norm": 0.23609398305416107, |
| "learning_rate": 8.585600000000001e-06, |
| "loss": 0.8952, |
| "step": 59840 |
| }, |
| { |
| "epoch": 0.95776, |
| "grad_norm": 0.23479294776916504, |
| "learning_rate": 8.5216e-06, |
| "loss": 0.9345, |
| "step": 59860 |
| }, |
| { |
| "epoch": 0.95808, |
| "grad_norm": 0.1910203993320465, |
| "learning_rate": 8.4576e-06, |
| "loss": 0.891, |
| "step": 59880 |
| }, |
| { |
| "epoch": 0.9584, |
| "grad_norm": 0.21888737380504608, |
| "learning_rate": 8.3936e-06, |
| "loss": 0.8615, |
| "step": 59900 |
| }, |
| { |
| "epoch": 0.95872, |
| "grad_norm": 0.2583872973918915, |
| "learning_rate": 8.3296e-06, |
| "loss": 0.8452, |
| "step": 59920 |
| }, |
| { |
| "epoch": 0.95904, |
| "grad_norm": 0.21396338939666748, |
| "learning_rate": 8.2656e-06, |
| "loss": 0.8582, |
| "step": 59940 |
| }, |
| { |
| "epoch": 0.95936, |
| "grad_norm": 0.18018275499343872, |
| "learning_rate": 8.201600000000001e-06, |
| "loss": 0.9053, |
| "step": 59960 |
| }, |
| { |
| "epoch": 0.95968, |
| "grad_norm": 0.24706120789051056, |
| "learning_rate": 8.137600000000002e-06, |
| "loss": 0.8794, |
| "step": 59980 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.19996990263462067, |
| "learning_rate": 8.0736e-06, |
| "loss": 0.9093, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.96032, |
| "grad_norm": 0.1947757452726364, |
| "learning_rate": 8.0096e-06, |
| "loss": 0.871, |
| "step": 60020 |
| }, |
| { |
| "epoch": 0.96064, |
| "grad_norm": 0.19846396148204803, |
| "learning_rate": 7.9456e-06, |
| "loss": 0.8555, |
| "step": 60040 |
| }, |
| { |
| "epoch": 0.96096, |
| "grad_norm": 0.2101791501045227, |
| "learning_rate": 7.881600000000001e-06, |
| "loss": 0.8805, |
| "step": 60060 |
| }, |
| { |
| "epoch": 0.96128, |
| "grad_norm": 0.2531636953353882, |
| "learning_rate": 7.8176e-06, |
| "loss": 0.8721, |
| "step": 60080 |
| }, |
| { |
| "epoch": 0.9616, |
| "grad_norm": 0.21089531481266022, |
| "learning_rate": 7.7536e-06, |
| "loss": 0.8266, |
| "step": 60100 |
| }, |
| { |
| "epoch": 0.96192, |
| "grad_norm": 0.16842779517173767, |
| "learning_rate": 7.689600000000002e-06, |
| "loss": 0.8531, |
| "step": 60120 |
| }, |
| { |
| "epoch": 0.96224, |
| "grad_norm": 0.21880729496479034, |
| "learning_rate": 7.625600000000001e-06, |
| "loss": 0.9024, |
| "step": 60140 |
| }, |
| { |
| "epoch": 0.96256, |
| "grad_norm": 0.196882426738739, |
| "learning_rate": 7.561600000000001e-06, |
| "loss": 0.8672, |
| "step": 60160 |
| }, |
| { |
| "epoch": 0.96288, |
| "grad_norm": 0.16088998317718506, |
| "learning_rate": 7.497600000000001e-06, |
| "loss": 0.8431, |
| "step": 60180 |
| }, |
| { |
| "epoch": 0.9632, |
| "grad_norm": 0.19500739872455597, |
| "learning_rate": 7.4336e-06, |
| "loss": 0.8772, |
| "step": 60200 |
| }, |
| { |
| "epoch": 0.96352, |
| "grad_norm": 0.22621487081050873, |
| "learning_rate": 7.3696e-06, |
| "loss": 0.89, |
| "step": 60220 |
| }, |
| { |
| "epoch": 0.96384, |
| "grad_norm": 0.20538878440856934, |
| "learning_rate": 7.3056e-06, |
| "loss": 0.9046, |
| "step": 60240 |
| }, |
| { |
| "epoch": 0.96416, |
| "grad_norm": 0.21844108402729034, |
| "learning_rate": 7.241599999999999e-06, |
| "loss": 0.8146, |
| "step": 60260 |
| }, |
| { |
| "epoch": 0.96448, |
| "grad_norm": 0.19265195727348328, |
| "learning_rate": 7.177600000000001e-06, |
| "loss": 0.919, |
| "step": 60280 |
| }, |
| { |
| "epoch": 0.9648, |
| "grad_norm": 0.2025534063577652, |
| "learning_rate": 7.113600000000001e-06, |
| "loss": 0.8966, |
| "step": 60300 |
| }, |
| { |
| "epoch": 0.96512, |
| "grad_norm": 0.2134266346693039, |
| "learning_rate": 7.0496e-06, |
| "loss": 0.91, |
| "step": 60320 |
| }, |
| { |
| "epoch": 0.96544, |
| "grad_norm": 0.25129690766334534, |
| "learning_rate": 6.9856000000000005e-06, |
| "loss": 0.8821, |
| "step": 60340 |
| }, |
| { |
| "epoch": 0.96576, |
| "grad_norm": 0.23355615139007568, |
| "learning_rate": 6.9216e-06, |
| "loss": 0.8755, |
| "step": 60360 |
| }, |
| { |
| "epoch": 0.96608, |
| "grad_norm": 0.22555451095104218, |
| "learning_rate": 6.8576e-06, |
| "loss": 0.8959, |
| "step": 60380 |
| }, |
| { |
| "epoch": 0.9664, |
| "grad_norm": 0.23309843242168427, |
| "learning_rate": 6.7936e-06, |
| "loss": 0.8482, |
| "step": 60400 |
| }, |
| { |
| "epoch": 0.96672, |
| "grad_norm": 0.2550283372402191, |
| "learning_rate": 6.7296e-06, |
| "loss": 0.8756, |
| "step": 60420 |
| }, |
| { |
| "epoch": 0.96704, |
| "grad_norm": 0.24770893156528473, |
| "learning_rate": 6.665600000000001e-06, |
| "loss": 0.8266, |
| "step": 60440 |
| }, |
| { |
| "epoch": 0.96736, |
| "grad_norm": 0.19504638016223907, |
| "learning_rate": 6.6016000000000005e-06, |
| "loss": 0.8924, |
| "step": 60460 |
| }, |
| { |
| "epoch": 0.96768, |
| "grad_norm": 0.1870088279247284, |
| "learning_rate": 6.537600000000001e-06, |
| "loss": 0.8819, |
| "step": 60480 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 0.19606348872184753, |
| "learning_rate": 6.4736e-06, |
| "loss": 0.8583, |
| "step": 60500 |
| }, |
| { |
| "epoch": 0.96832, |
| "grad_norm": 0.21062546968460083, |
| "learning_rate": 6.4096000000000004e-06, |
| "loss": 0.8318, |
| "step": 60520 |
| }, |
| { |
| "epoch": 0.96864, |
| "grad_norm": 0.2244090735912323, |
| "learning_rate": 6.3456e-06, |
| "loss": 0.8503, |
| "step": 60540 |
| }, |
| { |
| "epoch": 0.96896, |
| "grad_norm": 0.2132522016763687, |
| "learning_rate": 6.2816e-06, |
| "loss": 0.8805, |
| "step": 60560 |
| }, |
| { |
| "epoch": 0.96928, |
| "grad_norm": 0.21387845277786255, |
| "learning_rate": 6.2176e-06, |
| "loss": 0.8843, |
| "step": 60580 |
| }, |
| { |
| "epoch": 0.9696, |
| "grad_norm": 0.20109587907791138, |
| "learning_rate": 6.153600000000001e-06, |
| "loss": 0.8487, |
| "step": 60600 |
| }, |
| { |
| "epoch": 0.96992, |
| "grad_norm": 0.20364026725292206, |
| "learning_rate": 6.0896e-06, |
| "loss": 0.8737, |
| "step": 60620 |
| }, |
| { |
| "epoch": 0.97024, |
| "grad_norm": 0.22072643041610718, |
| "learning_rate": 6.0256e-06, |
| "loss": 0.881, |
| "step": 60640 |
| }, |
| { |
| "epoch": 0.97056, |
| "grad_norm": 0.24078206717967987, |
| "learning_rate": 5.961600000000001e-06, |
| "loss": 0.8459, |
| "step": 60660 |
| }, |
| { |
| "epoch": 0.97088, |
| "grad_norm": 0.24617841839790344, |
| "learning_rate": 5.8976e-06, |
| "loss": 0.894, |
| "step": 60680 |
| }, |
| { |
| "epoch": 0.9712, |
| "grad_norm": 0.2186897248029709, |
| "learning_rate": 5.8336e-06, |
| "loss": 0.8612, |
| "step": 60700 |
| }, |
| { |
| "epoch": 0.97152, |
| "grad_norm": 0.1752256453037262, |
| "learning_rate": 5.7696e-06, |
| "loss": 0.8619, |
| "step": 60720 |
| }, |
| { |
| "epoch": 0.97184, |
| "grad_norm": 0.2205258458852768, |
| "learning_rate": 5.705600000000001e-06, |
| "loss": 0.8847, |
| "step": 60740 |
| }, |
| { |
| "epoch": 0.97216, |
| "grad_norm": 0.20608656108379364, |
| "learning_rate": 5.6416e-06, |
| "loss": 0.8728, |
| "step": 60760 |
| }, |
| { |
| "epoch": 0.97248, |
| "grad_norm": 0.18989993631839752, |
| "learning_rate": 5.577600000000001e-06, |
| "loss": 0.8496, |
| "step": 60780 |
| }, |
| { |
| "epoch": 0.9728, |
| "grad_norm": 0.2024667114019394, |
| "learning_rate": 5.5136e-06, |
| "loss": 0.8883, |
| "step": 60800 |
| }, |
| { |
| "epoch": 0.97312, |
| "grad_norm": 0.17910663783550262, |
| "learning_rate": 5.4496e-06, |
| "loss": 0.8628, |
| "step": 60820 |
| }, |
| { |
| "epoch": 0.97344, |
| "grad_norm": 0.19510726630687714, |
| "learning_rate": 5.385600000000001e-06, |
| "loss": 0.8632, |
| "step": 60840 |
| }, |
| { |
| "epoch": 0.97376, |
| "grad_norm": 0.20516471564769745, |
| "learning_rate": 5.3216e-06, |
| "loss": 0.9158, |
| "step": 60860 |
| }, |
| { |
| "epoch": 0.97408, |
| "grad_norm": 0.2369288057088852, |
| "learning_rate": 5.2576e-06, |
| "loss": 0.9067, |
| "step": 60880 |
| }, |
| { |
| "epoch": 0.9744, |
| "grad_norm": 0.23473519086837769, |
| "learning_rate": 5.1936000000000006e-06, |
| "loss": 0.8675, |
| "step": 60900 |
| }, |
| { |
| "epoch": 0.97472, |
| "grad_norm": 0.20721520483493805, |
| "learning_rate": 5.1296e-06, |
| "loss": 0.857, |
| "step": 60920 |
| }, |
| { |
| "epoch": 0.97504, |
| "grad_norm": 0.25758302211761475, |
| "learning_rate": 5.0656e-06, |
| "loss": 0.854, |
| "step": 60940 |
| }, |
| { |
| "epoch": 0.97536, |
| "grad_norm": 0.23826448619365692, |
| "learning_rate": 5.0016e-06, |
| "loss": 0.865, |
| "step": 60960 |
| }, |
| { |
| "epoch": 0.97568, |
| "grad_norm": 0.21797384321689606, |
| "learning_rate": 4.937600000000001e-06, |
| "loss": 0.8956, |
| "step": 60980 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.2073042243719101, |
| "learning_rate": 4.8736e-06, |
| "loss": 0.8522, |
| "step": 61000 |
| }, |
| { |
| "epoch": 0.97632, |
| "grad_norm": 0.1850445568561554, |
| "learning_rate": 4.8096000000000005e-06, |
| "loss": 0.945, |
| "step": 61020 |
| }, |
| { |
| "epoch": 0.97664, |
| "grad_norm": 0.2104647010564804, |
| "learning_rate": 4.7456e-06, |
| "loss": 0.8828, |
| "step": 61040 |
| }, |
| { |
| "epoch": 0.97696, |
| "grad_norm": 0.17378270626068115, |
| "learning_rate": 4.6816e-06, |
| "loss": 0.896, |
| "step": 61060 |
| }, |
| { |
| "epoch": 0.97728, |
| "grad_norm": 0.21518754959106445, |
| "learning_rate": 4.6176000000000005e-06, |
| "loss": 0.8639, |
| "step": 61080 |
| }, |
| { |
| "epoch": 0.9776, |
| "grad_norm": 0.22130274772644043, |
| "learning_rate": 4.5536e-06, |
| "loss": 0.8755, |
| "step": 61100 |
| }, |
| { |
| "epoch": 0.97792, |
| "grad_norm": 0.24225564301013947, |
| "learning_rate": 4.4896e-06, |
| "loss": 0.8747, |
| "step": 61120 |
| }, |
| { |
| "epoch": 0.97824, |
| "grad_norm": 0.19240306317806244, |
| "learning_rate": 4.4256e-06, |
| "loss": 0.9133, |
| "step": 61140 |
| }, |
| { |
| "epoch": 0.97856, |
| "grad_norm": 0.18395653367042542, |
| "learning_rate": 4.361600000000001e-06, |
| "loss": 0.8321, |
| "step": 61160 |
| }, |
| { |
| "epoch": 0.97888, |
| "grad_norm": 0.21432001888751984, |
| "learning_rate": 4.2976e-06, |
| "loss": 0.9217, |
| "step": 61180 |
| }, |
| { |
| "epoch": 0.9792, |
| "grad_norm": 0.22985559701919556, |
| "learning_rate": 4.2336000000000004e-06, |
| "loss": 0.8819, |
| "step": 61200 |
| }, |
| { |
| "epoch": 0.97952, |
| "grad_norm": 0.18436340987682343, |
| "learning_rate": 4.1696e-06, |
| "loss": 0.9066, |
| "step": 61220 |
| }, |
| { |
| "epoch": 0.97984, |
| "grad_norm": 0.2279936820268631, |
| "learning_rate": 4.1056e-06, |
| "loss": 0.8828, |
| "step": 61240 |
| }, |
| { |
| "epoch": 0.98016, |
| "grad_norm": 0.23359614610671997, |
| "learning_rate": 4.0416e-06, |
| "loss": 0.873, |
| "step": 61260 |
| }, |
| { |
| "epoch": 0.98048, |
| "grad_norm": 0.21372786164283752, |
| "learning_rate": 3.9776e-06, |
| "loss": 0.8726, |
| "step": 61280 |
| }, |
| { |
| "epoch": 0.9808, |
| "grad_norm": 0.17267848551273346, |
| "learning_rate": 3.9136e-06, |
| "loss": 0.8408, |
| "step": 61300 |
| }, |
| { |
| "epoch": 0.98112, |
| "grad_norm": 0.19871732592582703, |
| "learning_rate": 3.8496e-06, |
| "loss": 0.8697, |
| "step": 61320 |
| }, |
| { |
| "epoch": 0.98144, |
| "grad_norm": 0.22875666618347168, |
| "learning_rate": 3.7856000000000002e-06, |
| "loss": 0.9083, |
| "step": 61340 |
| }, |
| { |
| "epoch": 0.98176, |
| "grad_norm": 0.22814328968524933, |
| "learning_rate": 3.7216e-06, |
| "loss": 0.8542, |
| "step": 61360 |
| }, |
| { |
| "epoch": 0.98208, |
| "grad_norm": 0.19292208552360535, |
| "learning_rate": 3.6576e-06, |
| "loss": 0.7783, |
| "step": 61380 |
| }, |
| { |
| "epoch": 0.9824, |
| "grad_norm": 0.20703838765621185, |
| "learning_rate": 3.5936000000000006e-06, |
| "loss": 0.8342, |
| "step": 61400 |
| }, |
| { |
| "epoch": 0.98272, |
| "grad_norm": 0.1766250729560852, |
| "learning_rate": 3.5296000000000005e-06, |
| "loss": 0.9091, |
| "step": 61420 |
| }, |
| { |
| "epoch": 0.98304, |
| "grad_norm": 0.1929435431957245, |
| "learning_rate": 3.4656e-06, |
| "loss": 0.8704, |
| "step": 61440 |
| }, |
| { |
| "epoch": 0.98336, |
| "grad_norm": 0.269422709941864, |
| "learning_rate": 3.4015999999999998e-06, |
| "loss": 0.8677, |
| "step": 61460 |
| }, |
| { |
| "epoch": 0.98368, |
| "grad_norm": 0.17369483411312103, |
| "learning_rate": 3.3376000000000005e-06, |
| "loss": 0.8388, |
| "step": 61480 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 0.24999183416366577, |
| "learning_rate": 3.2736000000000003e-06, |
| "loss": 0.8884, |
| "step": 61500 |
| }, |
| { |
| "epoch": 0.98432, |
| "grad_norm": 0.2378091812133789, |
| "learning_rate": 3.2096e-06, |
| "loss": 0.8644, |
| "step": 61520 |
| }, |
| { |
| "epoch": 0.98464, |
| "grad_norm": 0.23447105288505554, |
| "learning_rate": 3.1456e-06, |
| "loss": 0.8962, |
| "step": 61540 |
| }, |
| { |
| "epoch": 0.98496, |
| "grad_norm": 0.22000399231910706, |
| "learning_rate": 3.0816000000000003e-06, |
| "loss": 0.8297, |
| "step": 61560 |
| }, |
| { |
| "epoch": 0.98528, |
| "grad_norm": 0.20318488776683807, |
| "learning_rate": 3.0176e-06, |
| "loss": 0.8639, |
| "step": 61580 |
| }, |
| { |
| "epoch": 0.9856, |
| "grad_norm": 0.22560527920722961, |
| "learning_rate": 2.9536e-06, |
| "loss": 0.8701, |
| "step": 61600 |
| }, |
| { |
| "epoch": 0.98592, |
| "grad_norm": 0.2516303062438965, |
| "learning_rate": 2.8896000000000003e-06, |
| "loss": 0.9267, |
| "step": 61620 |
| }, |
| { |
| "epoch": 0.98624, |
| "grad_norm": 0.2284599393606186, |
| "learning_rate": 2.8256e-06, |
| "loss": 0.858, |
| "step": 61640 |
| }, |
| { |
| "epoch": 0.98656, |
| "grad_norm": 0.22970664501190186, |
| "learning_rate": 2.7616000000000004e-06, |
| "loss": 0.8553, |
| "step": 61660 |
| }, |
| { |
| "epoch": 0.98688, |
| "grad_norm": 0.19490259885787964, |
| "learning_rate": 2.6976000000000002e-06, |
| "loss": 0.8, |
| "step": 61680 |
| }, |
| { |
| "epoch": 0.9872, |
| "grad_norm": 0.2025730013847351, |
| "learning_rate": 2.6336e-06, |
| "loss": 0.8664, |
| "step": 61700 |
| }, |
| { |
| "epoch": 0.98752, |
| "grad_norm": 0.202545627951622, |
| "learning_rate": 2.5696e-06, |
| "loss": 0.8784, |
| "step": 61720 |
| }, |
| { |
| "epoch": 0.98784, |
| "grad_norm": 0.21990488469600677, |
| "learning_rate": 2.5055999999999998e-06, |
| "loss": 0.8768, |
| "step": 61740 |
| }, |
| { |
| "epoch": 0.98816, |
| "grad_norm": 0.2364804446697235, |
| "learning_rate": 2.4416e-06, |
| "loss": 0.9087, |
| "step": 61760 |
| }, |
| { |
| "epoch": 0.98848, |
| "grad_norm": 0.20863565802574158, |
| "learning_rate": 2.3776e-06, |
| "loss": 0.8558, |
| "step": 61780 |
| }, |
| { |
| "epoch": 0.9888, |
| "grad_norm": 0.21479357779026031, |
| "learning_rate": 2.3136e-06, |
| "loss": 0.8801, |
| "step": 61800 |
| }, |
| { |
| "epoch": 0.98912, |
| "grad_norm": 0.2164417803287506, |
| "learning_rate": 2.2496e-06, |
| "loss": 0.8483, |
| "step": 61820 |
| }, |
| { |
| "epoch": 0.98944, |
| "grad_norm": 0.2229296714067459, |
| "learning_rate": 2.1856000000000003e-06, |
| "loss": 0.8708, |
| "step": 61840 |
| }, |
| { |
| "epoch": 0.98976, |
| "grad_norm": 0.15766002237796783, |
| "learning_rate": 2.1216e-06, |
| "loss": 0.8485, |
| "step": 61860 |
| }, |
| { |
| "epoch": 0.99008, |
| "grad_norm": 0.1808682680130005, |
| "learning_rate": 2.0576e-06, |
| "loss": 0.8397, |
| "step": 61880 |
| }, |
| { |
| "epoch": 0.9904, |
| "grad_norm": 0.23974210023880005, |
| "learning_rate": 1.9936e-06, |
| "loss": 0.8173, |
| "step": 61900 |
| }, |
| { |
| "epoch": 0.99072, |
| "grad_norm": 0.1957724243402481, |
| "learning_rate": 1.9296e-06, |
| "loss": 0.8513, |
| "step": 61920 |
| }, |
| { |
| "epoch": 0.99104, |
| "grad_norm": 0.1994854062795639, |
| "learning_rate": 1.8656e-06, |
| "loss": 0.8988, |
| "step": 61940 |
| }, |
| { |
| "epoch": 0.99136, |
| "grad_norm": 0.21945121884346008, |
| "learning_rate": 1.8016000000000003e-06, |
| "loss": 0.8666, |
| "step": 61960 |
| }, |
| { |
| "epoch": 0.99168, |
| "grad_norm": 0.23173430562019348, |
| "learning_rate": 1.7375999999999999e-06, |
| "loss": 0.8751, |
| "step": 61980 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.23692530393600464, |
| "learning_rate": 1.6736000000000002e-06, |
| "loss": 0.8905, |
| "step": 62000 |
| }, |
| { |
| "epoch": 0.99232, |
| "grad_norm": 0.2442493587732315, |
| "learning_rate": 1.6096e-06, |
| "loss": 0.8955, |
| "step": 62020 |
| }, |
| { |
| "epoch": 0.99264, |
| "grad_norm": 0.22609354555606842, |
| "learning_rate": 1.5456e-06, |
| "loss": 0.8639, |
| "step": 62040 |
| }, |
| { |
| "epoch": 0.99296, |
| "grad_norm": 0.19429120421409607, |
| "learning_rate": 1.4816e-06, |
| "loss": 0.8911, |
| "step": 62060 |
| }, |
| { |
| "epoch": 0.99328, |
| "grad_norm": 0.23542903363704681, |
| "learning_rate": 1.4176e-06, |
| "loss": 0.9274, |
| "step": 62080 |
| }, |
| { |
| "epoch": 0.9936, |
| "grad_norm": 0.21632255613803864, |
| "learning_rate": 1.3536e-06, |
| "loss": 0.9395, |
| "step": 62100 |
| }, |
| { |
| "epoch": 0.99392, |
| "grad_norm": 0.21293993294239044, |
| "learning_rate": 1.2896000000000001e-06, |
| "loss": 0.8639, |
| "step": 62120 |
| }, |
| { |
| "epoch": 0.99424, |
| "grad_norm": 0.20467883348464966, |
| "learning_rate": 1.2256e-06, |
| "loss": 0.837, |
| "step": 62140 |
| }, |
| { |
| "epoch": 0.99456, |
| "grad_norm": 0.2215726226568222, |
| "learning_rate": 1.1616e-06, |
| "loss": 0.9212, |
| "step": 62160 |
| }, |
| { |
| "epoch": 0.99488, |
| "grad_norm": 0.2058229297399521, |
| "learning_rate": 1.0976e-06, |
| "loss": 0.8627, |
| "step": 62180 |
| }, |
| { |
| "epoch": 0.9952, |
| "grad_norm": 0.19800324738025665, |
| "learning_rate": 1.0336e-06, |
| "loss": 0.8536, |
| "step": 62200 |
| }, |
| { |
| "epoch": 0.99552, |
| "grad_norm": 0.24008409678936005, |
| "learning_rate": 9.696e-07, |
| "loss": 0.9235, |
| "step": 62220 |
| }, |
| { |
| "epoch": 0.99584, |
| "grad_norm": 0.1996374875307083, |
| "learning_rate": 9.056000000000001e-07, |
| "loss": 0.8699, |
| "step": 62240 |
| }, |
| { |
| "epoch": 0.99616, |
| "grad_norm": 0.20383226871490479, |
| "learning_rate": 8.416e-07, |
| "loss": 0.9042, |
| "step": 62260 |
| }, |
| { |
| "epoch": 0.99648, |
| "grad_norm": 0.23892252147197723, |
| "learning_rate": 7.776000000000001e-07, |
| "loss": 0.8895, |
| "step": 62280 |
| }, |
| { |
| "epoch": 0.9968, |
| "grad_norm": 0.23595763742923737, |
| "learning_rate": 7.136e-07, |
| "loss": 0.9093, |
| "step": 62300 |
| }, |
| { |
| "epoch": 0.99712, |
| "grad_norm": 0.168944850564003, |
| "learning_rate": 6.496e-07, |
| "loss": 0.8377, |
| "step": 62320 |
| }, |
| { |
| "epoch": 0.99744, |
| "grad_norm": 0.22417670488357544, |
| "learning_rate": 5.856000000000001e-07, |
| "loss": 0.9073, |
| "step": 62340 |
| }, |
| { |
| "epoch": 0.99776, |
| "grad_norm": 0.25286805629730225, |
| "learning_rate": 5.216e-07, |
| "loss": 0.8761, |
| "step": 62360 |
| }, |
| { |
| "epoch": 0.99808, |
| "grad_norm": 0.25818583369255066, |
| "learning_rate": 4.576e-07, |
| "loss": 0.9176, |
| "step": 62380 |
| }, |
| { |
| "epoch": 0.9984, |
| "grad_norm": 0.20294295251369476, |
| "learning_rate": 3.9360000000000003e-07, |
| "loss": 0.9181, |
| "step": 62400 |
| }, |
| { |
| "epoch": 0.99872, |
| "grad_norm": 0.2515595555305481, |
| "learning_rate": 3.296e-07, |
| "loss": 0.8951, |
| "step": 62420 |
| }, |
| { |
| "epoch": 0.99904, |
| "grad_norm": 0.21728461980819702, |
| "learning_rate": 2.656e-07, |
| "loss": 0.8226, |
| "step": 62440 |
| }, |
| { |
| "epoch": 0.99936, |
| "grad_norm": 0.2443869262933731, |
| "learning_rate": 2.016e-07, |
| "loss": 0.8448, |
| "step": 62460 |
| }, |
| { |
| "epoch": 0.99968, |
| "grad_norm": 0.2046569287776947, |
| "learning_rate": 1.3760000000000001e-07, |
| "loss": 0.881, |
| "step": 62480 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.21948722004890442, |
| "learning_rate": 7.36e-08, |
| "loss": 0.9002, |
| "step": 62500 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 62500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 600, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1005725769728e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|