{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 83.33333333333333, "eval_steps": 500, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.027777777777777776, "grad_norm": 3.6200122833251953, "learning_rate": 3.0000000000000004e-07, "loss": 2.9396, "step": 10 }, { "epoch": 0.05555555555555555, "grad_norm": 2.3466956615448, "learning_rate": 6.333333333333333e-07, "loss": 2.8445, "step": 20 }, { "epoch": 0.08333333333333333, "grad_norm": 2.187380313873291, "learning_rate": 9.666666666666668e-07, "loss": 2.7856, "step": 30 }, { "epoch": 0.1111111111111111, "grad_norm": 1.784609079360962, "learning_rate": 1.3e-06, "loss": 2.7604, "step": 40 }, { "epoch": 0.1388888888888889, "grad_norm": 1.5115187168121338, "learning_rate": 1.6333333333333333e-06, "loss": 2.7513, "step": 50 }, { "epoch": 0.16666666666666666, "grad_norm": 1.7017112970352173, "learning_rate": 1.9666666666666668e-06, "loss": 2.7358, "step": 60 }, { "epoch": 0.19444444444444445, "grad_norm": 1.5316331386566162, "learning_rate": 2.3e-06, "loss": 2.7075, "step": 70 }, { "epoch": 0.2222222222222222, "grad_norm": 1.3454697132110596, "learning_rate": 2.6333333333333337e-06, "loss": 2.701, "step": 80 }, { "epoch": 0.25, "grad_norm": 1.5326608419418335, "learning_rate": 2.966666666666667e-06, "loss": 2.7028, "step": 90 }, { "epoch": 0.2777777777777778, "grad_norm": 1.5651161670684814, "learning_rate": 3.3e-06, "loss": 2.6727, "step": 100 }, { "epoch": 0.3055555555555556, "grad_norm": 1.9064019918441772, "learning_rate": 3.633333333333334e-06, "loss": 2.6455, "step": 110 }, { "epoch": 0.3333333333333333, "grad_norm": 2.2731173038482666, "learning_rate": 3.966666666666667e-06, "loss": 2.6241, "step": 120 }, { "epoch": 0.3611111111111111, "grad_norm": 1.7849955558776855, "learning_rate": 4.2999999999999995e-06, "loss": 2.6229, "step": 130 }, { "epoch": 0.3888888888888889, "grad_norm": 1.699859619140625, "learning_rate": 4.633333333333334e-06, "loss": 2.61, "step": 140 }, { "epoch": 0.4166666666666667, "grad_norm": 1.7330888509750366, "learning_rate": 4.966666666666667e-06, "loss": 2.5782, "step": 150 }, { "epoch": 0.4444444444444444, "grad_norm": 2.555720329284668, "learning_rate": 5.3e-06, "loss": 2.5311, "step": 160 }, { "epoch": 0.4722222222222222, "grad_norm": 2.3042714595794678, "learning_rate": 5.633333333333333e-06, "loss": 2.5008, "step": 170 }, { "epoch": 0.5, "grad_norm": 2.398834705352783, "learning_rate": 5.9666666666666666e-06, "loss": 2.4775, "step": 180 }, { "epoch": 0.5277777777777778, "grad_norm": 4.011292934417725, "learning_rate": 6.300000000000001e-06, "loss": 2.4185, "step": 190 }, { "epoch": 0.5555555555555556, "grad_norm": 3.643602132797241, "learning_rate": 6.633333333333333e-06, "loss": 2.369, "step": 200 }, { "epoch": 0.5833333333333334, "grad_norm": 3.1978588104248047, "learning_rate": 6.966666666666667e-06, "loss": 2.3334, "step": 210 }, { "epoch": 0.6111111111111112, "grad_norm": 5.300952911376953, "learning_rate": 7.2999999999999996e-06, "loss": 2.2668, "step": 220 }, { "epoch": 0.6388888888888888, "grad_norm": 4.286801338195801, "learning_rate": 7.633333333333334e-06, "loss": 2.1861, "step": 230 }, { "epoch": 0.6666666666666666, "grad_norm": 5.042446613311768, "learning_rate": 7.966666666666666e-06, "loss": 2.0861, "step": 240 }, { "epoch": 0.6944444444444444, "grad_norm": 4.708366870880127, "learning_rate": 8.3e-06, "loss": 2.0229, "step": 250 }, { "epoch": 0.7222222222222222, "grad_norm": 5.741185665130615, "learning_rate": 8.633333333333334e-06, "loss": 1.9385, "step": 260 }, { "epoch": 0.75, "grad_norm": 4.5229597091674805, "learning_rate": 8.966666666666668e-06, "loss": 1.922, "step": 270 }, { "epoch": 0.7777777777777778, "grad_norm": 7.926031112670898, "learning_rate": 9.3e-06, "loss": 1.9412, "step": 280 }, { "epoch": 0.8055555555555556, "grad_norm": 5.669072151184082, "learning_rate": 9.633333333333335e-06, "loss": 1.8577, "step": 290 }, { "epoch": 0.8333333333333334, "grad_norm": 3.7943708896636963, "learning_rate": 9.966666666666667e-06, "loss": 1.8047, "step": 300 }, { "epoch": 0.8611111111111112, "grad_norm": 5.149256229400635, "learning_rate": 1.03e-05, "loss": 1.8163, "step": 310 }, { "epoch": 0.8888888888888888, "grad_norm": 4.666692733764648, "learning_rate": 1.0633333333333334e-05, "loss": 1.792, "step": 320 }, { "epoch": 0.9166666666666666, "grad_norm": 6.126015663146973, "learning_rate": 1.0966666666666666e-05, "loss": 1.7664, "step": 330 }, { "epoch": 0.9444444444444444, "grad_norm": 4.395339012145996, "learning_rate": 1.13e-05, "loss": 1.719, "step": 340 }, { "epoch": 0.9722222222222222, "grad_norm": 4.163234710693359, "learning_rate": 1.1633333333333334e-05, "loss": 1.6872, "step": 350 }, { "epoch": 1.0, "grad_norm": 3.1048901081085205, "learning_rate": 1.1966666666666668e-05, "loss": 1.6613, "step": 360 }, { "epoch": 1.0277777777777777, "grad_norm": 3.7725777626037598, "learning_rate": 1.23e-05, "loss": 1.6648, "step": 370 }, { "epoch": 1.0555555555555556, "grad_norm": 3.4087767601013184, "learning_rate": 1.2633333333333333e-05, "loss": 1.6349, "step": 380 }, { "epoch": 1.0833333333333333, "grad_norm": 1.8151607513427734, "learning_rate": 1.2966666666666669e-05, "loss": 1.6163, "step": 390 }, { "epoch": 1.1111111111111112, "grad_norm": 5.23177433013916, "learning_rate": 1.3300000000000001e-05, "loss": 1.6062, "step": 400 }, { "epoch": 1.1388888888888888, "grad_norm": 3.2614917755126953, "learning_rate": 1.3633333333333334e-05, "loss": 1.6103, "step": 410 }, { "epoch": 1.1666666666666667, "grad_norm": 2.306703805923462, "learning_rate": 1.3966666666666666e-05, "loss": 1.5806, "step": 420 }, { "epoch": 1.1944444444444444, "grad_norm": 2.7506439685821533, "learning_rate": 1.43e-05, "loss": 1.5273, "step": 430 }, { "epoch": 1.2222222222222223, "grad_norm": 3.2531795501708984, "learning_rate": 1.4633333333333334e-05, "loss": 1.5517, "step": 440 }, { "epoch": 1.25, "grad_norm": 2.335641860961914, "learning_rate": 1.4966666666666668e-05, "loss": 1.5184, "step": 450 }, { "epoch": 1.2777777777777777, "grad_norm": 2.769528865814209, "learning_rate": 1.53e-05, "loss": 1.5302, "step": 460 }, { "epoch": 1.3055555555555556, "grad_norm": 3.517038583755493, "learning_rate": 1.563333333333333e-05, "loss": 1.5163, "step": 470 }, { "epoch": 1.3333333333333333, "grad_norm": 2.3602278232574463, "learning_rate": 1.5966666666666667e-05, "loss": 1.5083, "step": 480 }, { "epoch": 1.3611111111111112, "grad_norm": 2.4284496307373047, "learning_rate": 1.63e-05, "loss": 1.5008, "step": 490 }, { "epoch": 1.3888888888888888, "grad_norm": 1.480750560760498, "learning_rate": 1.6633333333333336e-05, "loss": 1.472, "step": 500 }, { "epoch": 1.4166666666666667, "grad_norm": 2.429313898086548, "learning_rate": 1.6966666666666668e-05, "loss": 1.4934, "step": 510 }, { "epoch": 1.4444444444444444, "grad_norm": 1.8658791780471802, "learning_rate": 1.73e-05, "loss": 1.4618, "step": 520 }, { "epoch": 1.4722222222222223, "grad_norm": 2.241239070892334, "learning_rate": 1.7633333333333336e-05, "loss": 1.4668, "step": 530 }, { "epoch": 1.5, "grad_norm": 2.2530484199523926, "learning_rate": 1.796666666666667e-05, "loss": 1.4299, "step": 540 }, { "epoch": 1.5277777777777777, "grad_norm": 1.4986937046051025, "learning_rate": 1.83e-05, "loss": 1.4394, "step": 550 }, { "epoch": 1.5555555555555556, "grad_norm": 1.8272508382797241, "learning_rate": 1.8633333333333333e-05, "loss": 1.4264, "step": 560 }, { "epoch": 1.5833333333333335, "grad_norm": 2.144418954849243, "learning_rate": 1.896666666666667e-05, "loss": 1.4063, "step": 570 }, { "epoch": 1.6111111111111112, "grad_norm": 2.49948787689209, "learning_rate": 1.93e-05, "loss": 1.401, "step": 580 }, { "epoch": 1.6388888888888888, "grad_norm": 1.9806621074676514, "learning_rate": 1.9633333333333334e-05, "loss": 1.4283, "step": 590 }, { "epoch": 1.6666666666666665, "grad_norm": 2.2134459018707275, "learning_rate": 1.9966666666666666e-05, "loss": 1.3966, "step": 600 }, { "epoch": 1.6944444444444444, "grad_norm": 1.7755744457244873, "learning_rate": 2.0300000000000002e-05, "loss": 1.4458, "step": 610 }, { "epoch": 1.7222222222222223, "grad_norm": 2.27699613571167, "learning_rate": 2.0633333333333335e-05, "loss": 1.3777, "step": 620 }, { "epoch": 1.75, "grad_norm": 1.6288598775863647, "learning_rate": 2.0966666666666667e-05, "loss": 1.3849, "step": 630 }, { "epoch": 1.7777777777777777, "grad_norm": 1.8395355939865112, "learning_rate": 2.13e-05, "loss": 1.3811, "step": 640 }, { "epoch": 1.8055555555555556, "grad_norm": 1.4840824604034424, "learning_rate": 2.1633333333333332e-05, "loss": 1.3643, "step": 650 }, { "epoch": 1.8333333333333335, "grad_norm": 2.3580636978149414, "learning_rate": 2.1966666666666668e-05, "loss": 1.3762, "step": 660 }, { "epoch": 1.8611111111111112, "grad_norm": 1.9879131317138672, "learning_rate": 2.23e-05, "loss": 1.3855, "step": 670 }, { "epoch": 1.8888888888888888, "grad_norm": 2.0003507137298584, "learning_rate": 2.2633333333333336e-05, "loss": 1.3583, "step": 680 }, { "epoch": 1.9166666666666665, "grad_norm": 1.6904516220092773, "learning_rate": 2.2966666666666668e-05, "loss": 1.3509, "step": 690 }, { "epoch": 1.9444444444444444, "grad_norm": 1.71620512008667, "learning_rate": 2.3300000000000004e-05, "loss": 1.3406, "step": 700 }, { "epoch": 1.9722222222222223, "grad_norm": 2.494544267654419, "learning_rate": 2.3633333333333336e-05, "loss": 1.389, "step": 710 }, { "epoch": 2.0, "grad_norm": 1.6472359895706177, "learning_rate": 2.396666666666667e-05, "loss": 1.3577, "step": 720 }, { "epoch": 2.0277777777777777, "grad_norm": 1.9563602209091187, "learning_rate": 2.43e-05, "loss": 1.3721, "step": 730 }, { "epoch": 2.0555555555555554, "grad_norm": 2.158012866973877, "learning_rate": 2.4633333333333334e-05, "loss": 1.3224, "step": 740 }, { "epoch": 2.0833333333333335, "grad_norm": 1.3827286958694458, "learning_rate": 2.496666666666667e-05, "loss": 1.3467, "step": 750 }, { "epoch": 2.111111111111111, "grad_norm": 1.7081470489501953, "learning_rate": 2.5300000000000002e-05, "loss": 1.33, "step": 760 }, { "epoch": 2.138888888888889, "grad_norm": 1.575785756111145, "learning_rate": 2.5633333333333338e-05, "loss": 1.3255, "step": 770 }, { "epoch": 2.1666666666666665, "grad_norm": 1.9013493061065674, "learning_rate": 2.5966666666666667e-05, "loss": 1.335, "step": 780 }, { "epoch": 2.1944444444444446, "grad_norm": 1.608219861984253, "learning_rate": 2.6300000000000002e-05, "loss": 1.3128, "step": 790 }, { "epoch": 2.2222222222222223, "grad_norm": 1.48382568359375, "learning_rate": 2.663333333333333e-05, "loss": 1.3105, "step": 800 }, { "epoch": 2.25, "grad_norm": 1.8131403923034668, "learning_rate": 2.6966666666666667e-05, "loss": 1.3305, "step": 810 }, { "epoch": 2.2777777777777777, "grad_norm": 1.4801263809204102, "learning_rate": 2.7300000000000003e-05, "loss": 1.3001, "step": 820 }, { "epoch": 2.3055555555555554, "grad_norm": 1.695723533630371, "learning_rate": 2.7633333333333332e-05, "loss": 1.2983, "step": 830 }, { "epoch": 2.3333333333333335, "grad_norm": 1.7370522022247314, "learning_rate": 2.7966666666666668e-05, "loss": 1.3045, "step": 840 }, { "epoch": 2.361111111111111, "grad_norm": 1.485021710395813, "learning_rate": 2.83e-05, "loss": 1.3084, "step": 850 }, { "epoch": 2.388888888888889, "grad_norm": 1.6745744943618774, "learning_rate": 2.8633333333333336e-05, "loss": 1.2976, "step": 860 }, { "epoch": 2.4166666666666665, "grad_norm": 1.2909083366394043, "learning_rate": 2.8966666666666668e-05, "loss": 1.3041, "step": 870 }, { "epoch": 2.4444444444444446, "grad_norm": 1.5151005983352661, "learning_rate": 2.93e-05, "loss": 1.3002, "step": 880 }, { "epoch": 2.4722222222222223, "grad_norm": 1.2823742628097534, "learning_rate": 2.9633333333333336e-05, "loss": 1.2998, "step": 890 }, { "epoch": 2.5, "grad_norm": 1.2469843626022339, "learning_rate": 2.9966666666666672e-05, "loss": 1.2771, "step": 900 }, { "epoch": 2.5277777777777777, "grad_norm": 1.677615761756897, "learning_rate": 3.03e-05, "loss": 1.2785, "step": 910 }, { "epoch": 2.5555555555555554, "grad_norm": 1.0909905433654785, "learning_rate": 3.063333333333334e-05, "loss": 1.286, "step": 920 }, { "epoch": 2.5833333333333335, "grad_norm": 1.1445268392562866, "learning_rate": 3.096666666666666e-05, "loss": 1.2719, "step": 930 }, { "epoch": 2.611111111111111, "grad_norm": 1.4749085903167725, "learning_rate": 3.13e-05, "loss": 1.2709, "step": 940 }, { "epoch": 2.638888888888889, "grad_norm": 1.649418592453003, "learning_rate": 3.1633333333333334e-05, "loss": 1.2918, "step": 950 }, { "epoch": 2.6666666666666665, "grad_norm": 1.5605149269104004, "learning_rate": 3.196666666666667e-05, "loss": 1.237, "step": 960 }, { "epoch": 2.6944444444444446, "grad_norm": 1.3925927877426147, "learning_rate": 3.2300000000000006e-05, "loss": 1.2629, "step": 970 }, { "epoch": 2.7222222222222223, "grad_norm": 1.0373307466506958, "learning_rate": 3.263333333333333e-05, "loss": 1.2659, "step": 980 }, { "epoch": 2.75, "grad_norm": 1.2308710813522339, "learning_rate": 3.296666666666667e-05, "loss": 1.2521, "step": 990 }, { "epoch": 2.7777777777777777, "grad_norm": 1.2633785009384155, "learning_rate": 3.33e-05, "loss": 1.2669, "step": 1000 }, { "epoch": 2.8055555555555554, "grad_norm": 1.3349947929382324, "learning_rate": 3.3633333333333335e-05, "loss": 1.2386, "step": 1010 }, { "epoch": 2.8333333333333335, "grad_norm": 1.4729466438293457, "learning_rate": 3.396666666666667e-05, "loss": 1.2413, "step": 1020 }, { "epoch": 2.861111111111111, "grad_norm": 1.7489209175109863, "learning_rate": 3.430000000000001e-05, "loss": 1.2409, "step": 1030 }, { "epoch": 2.888888888888889, "grad_norm": 1.6537836790084839, "learning_rate": 3.463333333333333e-05, "loss": 1.2434, "step": 1040 }, { "epoch": 2.9166666666666665, "grad_norm": 1.1275861263275146, "learning_rate": 3.496666666666667e-05, "loss": 1.2476, "step": 1050 }, { "epoch": 2.9444444444444446, "grad_norm": 1.3504081964492798, "learning_rate": 3.53e-05, "loss": 1.2551, "step": 1060 }, { "epoch": 2.9722222222222223, "grad_norm": 1.8836020231246948, "learning_rate": 3.563333333333334e-05, "loss": 1.2489, "step": 1070 }, { "epoch": 3.0, "grad_norm": 1.4962594509124756, "learning_rate": 3.596666666666667e-05, "loss": 1.2527, "step": 1080 }, { "epoch": 3.0277777777777777, "grad_norm": 1.666877031326294, "learning_rate": 3.63e-05, "loss": 1.2328, "step": 1090 }, { "epoch": 3.0555555555555554, "grad_norm": 1.4704580307006836, "learning_rate": 3.6633333333333334e-05, "loss": 1.2484, "step": 1100 }, { "epoch": 3.0833333333333335, "grad_norm": 1.6218695640563965, "learning_rate": 3.6966666666666666e-05, "loss": 1.2472, "step": 1110 }, { "epoch": 3.111111111111111, "grad_norm": 1.3495756387710571, "learning_rate": 3.73e-05, "loss": 1.2318, "step": 1120 }, { "epoch": 3.138888888888889, "grad_norm": 1.523292064666748, "learning_rate": 3.763333333333334e-05, "loss": 1.2542, "step": 1130 }, { "epoch": 3.1666666666666665, "grad_norm": 1.2762353420257568, "learning_rate": 3.796666666666667e-05, "loss": 1.2361, "step": 1140 }, { "epoch": 3.1944444444444446, "grad_norm": 1.0399117469787598, "learning_rate": 3.83e-05, "loss": 1.2367, "step": 1150 }, { "epoch": 3.2222222222222223, "grad_norm": 1.4306637048721313, "learning_rate": 3.8633333333333335e-05, "loss": 1.2434, "step": 1160 }, { "epoch": 3.25, "grad_norm": 1.30075204372406, "learning_rate": 3.896666666666667e-05, "loss": 1.2397, "step": 1170 }, { "epoch": 3.2777777777777777, "grad_norm": 1.0488041639328003, "learning_rate": 3.9300000000000007e-05, "loss": 1.2367, "step": 1180 }, { "epoch": 3.3055555555555554, "grad_norm": 1.1713590621948242, "learning_rate": 3.963333333333333e-05, "loss": 1.2361, "step": 1190 }, { "epoch": 3.3333333333333335, "grad_norm": 1.3348190784454346, "learning_rate": 3.996666666666667e-05, "loss": 1.2254, "step": 1200 }, { "epoch": 3.361111111111111, "grad_norm": 1.2350366115570068, "learning_rate": 4.0300000000000004e-05, "loss": 1.237, "step": 1210 }, { "epoch": 3.388888888888889, "grad_norm": 1.672656774520874, "learning_rate": 4.0633333333333336e-05, "loss": 1.2291, "step": 1220 }, { "epoch": 3.4166666666666665, "grad_norm": 0.9432772994041443, "learning_rate": 4.096666666666667e-05, "loss": 1.2113, "step": 1230 }, { "epoch": 3.4444444444444446, "grad_norm": 1.0585707426071167, "learning_rate": 4.13e-05, "loss": 1.2108, "step": 1240 }, { "epoch": 3.4722222222222223, "grad_norm": 1.360561490058899, "learning_rate": 4.1633333333333333e-05, "loss": 1.234, "step": 1250 }, { "epoch": 3.5, "grad_norm": 0.9114920496940613, "learning_rate": 4.196666666666667e-05, "loss": 1.2092, "step": 1260 }, { "epoch": 3.5277777777777777, "grad_norm": 1.5361149311065674, "learning_rate": 4.23e-05, "loss": 1.239, "step": 1270 }, { "epoch": 3.5555555555555554, "grad_norm": 1.1818305253982544, "learning_rate": 4.263333333333334e-05, "loss": 1.2153, "step": 1280 }, { "epoch": 3.5833333333333335, "grad_norm": 1.420333981513977, "learning_rate": 4.296666666666666e-05, "loss": 1.2106, "step": 1290 }, { "epoch": 3.611111111111111, "grad_norm": 0.9809475541114807, "learning_rate": 4.33e-05, "loss": 1.2065, "step": 1300 }, { "epoch": 3.638888888888889, "grad_norm": 1.4664013385772705, "learning_rate": 4.3633333333333335e-05, "loss": 1.1975, "step": 1310 }, { "epoch": 3.6666666666666665, "grad_norm": 1.4665195941925049, "learning_rate": 4.396666666666667e-05, "loss": 1.2026, "step": 1320 }, { "epoch": 3.6944444444444446, "grad_norm": 1.0431784391403198, "learning_rate": 4.43e-05, "loss": 1.2078, "step": 1330 }, { "epoch": 3.7222222222222223, "grad_norm": 1.146155834197998, "learning_rate": 4.463333333333334e-05, "loss": 1.212, "step": 1340 }, { "epoch": 3.75, "grad_norm": 0.9077619314193726, "learning_rate": 4.496666666666667e-05, "loss": 1.1931, "step": 1350 }, { "epoch": 3.7777777777777777, "grad_norm": 1.0436832904815674, "learning_rate": 4.53e-05, "loss": 1.199, "step": 1360 }, { "epoch": 3.8055555555555554, "grad_norm": 1.7593790292739868, "learning_rate": 4.5633333333333336e-05, "loss": 1.1831, "step": 1370 }, { "epoch": 3.8333333333333335, "grad_norm": 1.146198034286499, "learning_rate": 4.596666666666667e-05, "loss": 1.201, "step": 1380 }, { "epoch": 3.861111111111111, "grad_norm": 1.2821046113967896, "learning_rate": 4.630000000000001e-05, "loss": 1.1764, "step": 1390 }, { "epoch": 3.888888888888889, "grad_norm": 1.2905700206756592, "learning_rate": 4.663333333333333e-05, "loss": 1.1794, "step": 1400 }, { "epoch": 3.9166666666666665, "grad_norm": 0.9740247130393982, "learning_rate": 4.696666666666667e-05, "loss": 1.1798, "step": 1410 }, { "epoch": 3.9444444444444446, "grad_norm": 0.8712648153305054, "learning_rate": 4.73e-05, "loss": 1.1778, "step": 1420 }, { "epoch": 3.9722222222222223, "grad_norm": 0.935690701007843, "learning_rate": 4.763333333333334e-05, "loss": 1.181, "step": 1430 }, { "epoch": 4.0, "grad_norm": 1.7301629781723022, "learning_rate": 4.796666666666667e-05, "loss": 1.1833, "step": 1440 }, { "epoch": 4.027777777777778, "grad_norm": 1.3405653238296509, "learning_rate": 4.83e-05, "loss": 1.1778, "step": 1450 }, { "epoch": 4.055555555555555, "grad_norm": 1.2406929731369019, "learning_rate": 4.8633333333333334e-05, "loss": 1.173, "step": 1460 }, { "epoch": 4.083333333333333, "grad_norm": 1.2655417919158936, "learning_rate": 4.8966666666666667e-05, "loss": 1.1782, "step": 1470 }, { "epoch": 4.111111111111111, "grad_norm": 1.5031538009643555, "learning_rate": 4.93e-05, "loss": 1.171, "step": 1480 }, { "epoch": 4.138888888888889, "grad_norm": 0.9648705124855042, "learning_rate": 4.963333333333334e-05, "loss": 1.1625, "step": 1490 }, { "epoch": 4.166666666666667, "grad_norm": 1.3373568058013916, "learning_rate": 4.996666666666667e-05, "loss": 1.1915, "step": 1500 }, { "epoch": 4.194444444444445, "grad_norm": 1.2972426414489746, "learning_rate": 5.03e-05, "loss": 1.1625, "step": 1510 }, { "epoch": 4.222222222222222, "grad_norm": 1.203098177909851, "learning_rate": 5.0633333333333335e-05, "loss": 1.1551, "step": 1520 }, { "epoch": 4.25, "grad_norm": 1.1645056009292603, "learning_rate": 5.0966666666666674e-05, "loss": 1.1455, "step": 1530 }, { "epoch": 4.277777777777778, "grad_norm": 1.3108216524124146, "learning_rate": 5.130000000000001e-05, "loss": 1.163, "step": 1540 }, { "epoch": 4.305555555555555, "grad_norm": 0.9834801554679871, "learning_rate": 5.163333333333333e-05, "loss": 1.1462, "step": 1550 }, { "epoch": 4.333333333333333, "grad_norm": 1.182983636856079, "learning_rate": 5.196666666666667e-05, "loss": 1.1325, "step": 1560 }, { "epoch": 4.361111111111111, "grad_norm": 1.125453233718872, "learning_rate": 5.2300000000000004e-05, "loss": 1.1411, "step": 1570 }, { "epoch": 4.388888888888889, "grad_norm": 1.0888134241104126, "learning_rate": 5.2633333333333336e-05, "loss": 1.1244, "step": 1580 }, { "epoch": 4.416666666666667, "grad_norm": 1.4513331651687622, "learning_rate": 5.296666666666666e-05, "loss": 1.1399, "step": 1590 }, { "epoch": 4.444444444444445, "grad_norm": 1.247117042541504, "learning_rate": 5.330000000000001e-05, "loss": 1.1447, "step": 1600 }, { "epoch": 4.472222222222222, "grad_norm": 1.3565462827682495, "learning_rate": 5.3633333333333334e-05, "loss": 1.1271, "step": 1610 }, { "epoch": 4.5, "grad_norm": 1.104219675064087, "learning_rate": 5.3966666666666666e-05, "loss": 1.1247, "step": 1620 }, { "epoch": 4.527777777777778, "grad_norm": 0.95891934633255, "learning_rate": 5.4300000000000005e-05, "loss": 1.1221, "step": 1630 }, { "epoch": 4.555555555555555, "grad_norm": 1.187212586402893, "learning_rate": 5.463333333333334e-05, "loss": 1.1173, "step": 1640 }, { "epoch": 4.583333333333333, "grad_norm": 1.3385809659957886, "learning_rate": 5.496666666666666e-05, "loss": 1.1433, "step": 1650 }, { "epoch": 4.611111111111111, "grad_norm": 1.2376090288162231, "learning_rate": 5.530000000000001e-05, "loss": 1.1036, "step": 1660 }, { "epoch": 4.638888888888889, "grad_norm": 1.2190152406692505, "learning_rate": 5.5633333333333335e-05, "loss": 1.1133, "step": 1670 }, { "epoch": 4.666666666666667, "grad_norm": 1.3089593648910522, "learning_rate": 5.596666666666667e-05, "loss": 1.1132, "step": 1680 }, { "epoch": 4.694444444444445, "grad_norm": 1.1893470287322998, "learning_rate": 5.63e-05, "loss": 1.1124, "step": 1690 }, { "epoch": 4.722222222222222, "grad_norm": 1.2936846017837524, "learning_rate": 5.663333333333334e-05, "loss": 1.1052, "step": 1700 }, { "epoch": 4.75, "grad_norm": 1.5605329275131226, "learning_rate": 5.696666666666667e-05, "loss": 1.0985, "step": 1710 }, { "epoch": 4.777777777777778, "grad_norm": 1.218218207359314, "learning_rate": 5.73e-05, "loss": 1.1104, "step": 1720 }, { "epoch": 4.805555555555555, "grad_norm": 1.3115607500076294, "learning_rate": 5.7633333333333336e-05, "loss": 1.0966, "step": 1730 }, { "epoch": 4.833333333333333, "grad_norm": 1.1123130321502686, "learning_rate": 5.796666666666667e-05, "loss": 1.1003, "step": 1740 }, { "epoch": 4.861111111111111, "grad_norm": 1.4585541486740112, "learning_rate": 5.83e-05, "loss": 1.1042, "step": 1750 }, { "epoch": 4.888888888888889, "grad_norm": 1.290058970451355, "learning_rate": 5.863333333333334e-05, "loss": 1.1037, "step": 1760 }, { "epoch": 4.916666666666667, "grad_norm": 1.029157042503357, "learning_rate": 5.896666666666667e-05, "loss": 1.0843, "step": 1770 }, { "epoch": 4.944444444444445, "grad_norm": 1.0369641780853271, "learning_rate": 5.93e-05, "loss": 1.0877, "step": 1780 }, { "epoch": 4.972222222222222, "grad_norm": 1.2939940690994263, "learning_rate": 5.9633333333333344e-05, "loss": 1.0821, "step": 1790 }, { "epoch": 5.0, "grad_norm": 1.1348316669464111, "learning_rate": 5.996666666666667e-05, "loss": 1.0789, "step": 1800 }, { "epoch": 5.027777777777778, "grad_norm": 1.336199402809143, "learning_rate": 6.03e-05, "loss": 1.065, "step": 1810 }, { "epoch": 5.055555555555555, "grad_norm": 0.9330359101295471, "learning_rate": 6.063333333333333e-05, "loss": 1.0709, "step": 1820 }, { "epoch": 5.083333333333333, "grad_norm": 1.2433722019195557, "learning_rate": 6.0966666666666674e-05, "loss": 1.0647, "step": 1830 }, { "epoch": 5.111111111111111, "grad_norm": 1.1839278936386108, "learning_rate": 6.13e-05, "loss": 1.0463, "step": 1840 }, { "epoch": 5.138888888888889, "grad_norm": 1.2285066843032837, "learning_rate": 6.163333333333333e-05, "loss": 1.0642, "step": 1850 }, { "epoch": 5.166666666666667, "grad_norm": 1.7072560787200928, "learning_rate": 6.196666666666668e-05, "loss": 1.0777, "step": 1860 }, { "epoch": 5.194444444444445, "grad_norm": 1.1880024671554565, "learning_rate": 6.23e-05, "loss": 1.0608, "step": 1870 }, { "epoch": 5.222222222222222, "grad_norm": 1.2595951557159424, "learning_rate": 6.263333333333333e-05, "loss": 1.054, "step": 1880 }, { "epoch": 5.25, "grad_norm": 1.204685091972351, "learning_rate": 6.296666666666667e-05, "loss": 1.0541, "step": 1890 }, { "epoch": 5.277777777777778, "grad_norm": 1.1668659448623657, "learning_rate": 6.330000000000001e-05, "loss": 1.0525, "step": 1900 }, { "epoch": 5.305555555555555, "grad_norm": 1.1574805974960327, "learning_rate": 6.363333333333334e-05, "loss": 1.0551, "step": 1910 }, { "epoch": 5.333333333333333, "grad_norm": 1.5936880111694336, "learning_rate": 6.396666666666667e-05, "loss": 1.0515, "step": 1920 }, { "epoch": 5.361111111111111, "grad_norm": 1.2019188404083252, "learning_rate": 6.43e-05, "loss": 1.0465, "step": 1930 }, { "epoch": 5.388888888888889, "grad_norm": 1.1796774864196777, "learning_rate": 6.463333333333334e-05, "loss": 1.0427, "step": 1940 }, { "epoch": 5.416666666666667, "grad_norm": 1.1139317750930786, "learning_rate": 6.496666666666667e-05, "loss": 1.0341, "step": 1950 }, { "epoch": 5.444444444444445, "grad_norm": 1.3176546096801758, "learning_rate": 6.53e-05, "loss": 1.0258, "step": 1960 }, { "epoch": 5.472222222222222, "grad_norm": 1.3508331775665283, "learning_rate": 6.563333333333333e-05, "loss": 1.0534, "step": 1970 }, { "epoch": 5.5, "grad_norm": 1.261190414428711, "learning_rate": 6.596666666666667e-05, "loss": 1.0557, "step": 1980 }, { "epoch": 5.527777777777778, "grad_norm": 1.15389883518219, "learning_rate": 6.630000000000001e-05, "loss": 1.0294, "step": 1990 }, { "epoch": 5.555555555555555, "grad_norm": 1.1387341022491455, "learning_rate": 6.663333333333333e-05, "loss": 1.0137, "step": 2000 }, { "epoch": 5.583333333333333, "grad_norm": 1.061601161956787, "learning_rate": 6.696666666666666e-05, "loss": 1.0165, "step": 2010 }, { "epoch": 5.611111111111111, "grad_norm": 1.0728416442871094, "learning_rate": 6.730000000000001e-05, "loss": 1.0142, "step": 2020 }, { "epoch": 5.638888888888889, "grad_norm": 1.4977203607559204, "learning_rate": 6.763333333333334e-05, "loss": 1.0208, "step": 2030 }, { "epoch": 5.666666666666667, "grad_norm": 1.2097450494766235, "learning_rate": 6.796666666666666e-05, "loss": 1.0359, "step": 2040 }, { "epoch": 5.694444444444445, "grad_norm": 1.2591958045959473, "learning_rate": 6.83e-05, "loss": 1.0252, "step": 2050 }, { "epoch": 5.722222222222222, "grad_norm": 1.1869854927062988, "learning_rate": 6.863333333333334e-05, "loss": 0.9932, "step": 2060 }, { "epoch": 5.75, "grad_norm": 1.0285825729370117, "learning_rate": 6.896666666666667e-05, "loss": 0.9973, "step": 2070 }, { "epoch": 5.777777777777778, "grad_norm": 1.2501593828201294, "learning_rate": 6.93e-05, "loss": 1.0037, "step": 2080 }, { "epoch": 5.805555555555555, "grad_norm": 1.215832233428955, "learning_rate": 6.963333333333334e-05, "loss": 1.0023, "step": 2090 }, { "epoch": 5.833333333333333, "grad_norm": 1.2830485105514526, "learning_rate": 6.996666666666667e-05, "loss": 1.0014, "step": 2100 }, { "epoch": 5.861111111111111, "grad_norm": 1.250379204750061, "learning_rate": 7.03e-05, "loss": 1.0014, "step": 2110 }, { "epoch": 5.888888888888889, "grad_norm": 0.976373016834259, "learning_rate": 7.063333333333333e-05, "loss": 0.983, "step": 2120 }, { "epoch": 5.916666666666667, "grad_norm": 1.1424741744995117, "learning_rate": 7.096666666666667e-05, "loss": 0.9978, "step": 2130 }, { "epoch": 5.944444444444445, "grad_norm": 0.9735451340675354, "learning_rate": 7.13e-05, "loss": 0.9902, "step": 2140 }, { "epoch": 5.972222222222222, "grad_norm": 1.0421696901321411, "learning_rate": 7.163333333333334e-05, "loss": 0.9971, "step": 2150 }, { "epoch": 6.0, "grad_norm": 1.3814692497253418, "learning_rate": 7.196666666666668e-05, "loss": 0.9804, "step": 2160 }, { "epoch": 6.027777777777778, "grad_norm": 1.4130151271820068, "learning_rate": 7.23e-05, "loss": 0.9704, "step": 2170 }, { "epoch": 6.055555555555555, "grad_norm": 1.4542014598846436, "learning_rate": 7.263333333333334e-05, "loss": 0.9948, "step": 2180 }, { "epoch": 6.083333333333333, "grad_norm": 1.2243306636810303, "learning_rate": 7.296666666666667e-05, "loss": 0.9744, "step": 2190 }, { "epoch": 6.111111111111111, "grad_norm": 1.0937132835388184, "learning_rate": 7.33e-05, "loss": 0.9625, "step": 2200 }, { "epoch": 6.138888888888889, "grad_norm": 0.9170511960983276, "learning_rate": 7.363333333333334e-05, "loss": 0.9614, "step": 2210 }, { "epoch": 6.166666666666667, "grad_norm": 1.1496657133102417, "learning_rate": 7.396666666666667e-05, "loss": 0.9636, "step": 2220 }, { "epoch": 6.194444444444445, "grad_norm": 1.266990303993225, "learning_rate": 7.43e-05, "loss": 0.9675, "step": 2230 }, { "epoch": 6.222222222222222, "grad_norm": 1.2259392738342285, "learning_rate": 7.463333333333334e-05, "loss": 0.9643, "step": 2240 }, { "epoch": 6.25, "grad_norm": 1.0929317474365234, "learning_rate": 7.496666666666667e-05, "loss": 0.9786, "step": 2250 }, { "epoch": 6.277777777777778, "grad_norm": 1.1210495233535767, "learning_rate": 7.53e-05, "loss": 0.9726, "step": 2260 }, { "epoch": 6.305555555555555, "grad_norm": 1.2045217752456665, "learning_rate": 7.563333333333333e-05, "loss": 0.9436, "step": 2270 }, { "epoch": 6.333333333333333, "grad_norm": 1.4050379991531372, "learning_rate": 7.596666666666668e-05, "loss": 0.974, "step": 2280 }, { "epoch": 6.361111111111111, "grad_norm": 1.2034026384353638, "learning_rate": 7.630000000000001e-05, "loss": 0.9561, "step": 2290 }, { "epoch": 6.388888888888889, "grad_norm": 1.33096182346344, "learning_rate": 7.663333333333333e-05, "loss": 0.9397, "step": 2300 }, { "epoch": 6.416666666666667, "grad_norm": 1.1243939399719238, "learning_rate": 7.696666666666668e-05, "loss": 0.9586, "step": 2310 }, { "epoch": 6.444444444444445, "grad_norm": 1.2552531957626343, "learning_rate": 7.730000000000001e-05, "loss": 0.9366, "step": 2320 }, { "epoch": 6.472222222222222, "grad_norm": 1.2096279859542847, "learning_rate": 7.763333333333334e-05, "loss": 0.9575, "step": 2330 }, { "epoch": 6.5, "grad_norm": 1.3202650547027588, "learning_rate": 7.796666666666666e-05, "loss": 0.9354, "step": 2340 }, { "epoch": 6.527777777777778, "grad_norm": 1.280335783958435, "learning_rate": 7.83e-05, "loss": 0.9471, "step": 2350 }, { "epoch": 6.555555555555555, "grad_norm": 1.2733498811721802, "learning_rate": 7.863333333333334e-05, "loss": 0.9366, "step": 2360 }, { "epoch": 6.583333333333333, "grad_norm": 1.2128610610961914, "learning_rate": 7.896666666666667e-05, "loss": 0.9437, "step": 2370 }, { "epoch": 6.611111111111111, "grad_norm": 1.1639047861099243, "learning_rate": 7.93e-05, "loss": 0.9265, "step": 2380 }, { "epoch": 6.638888888888889, "grad_norm": 1.1112380027770996, "learning_rate": 7.963333333333334e-05, "loss": 0.9394, "step": 2390 }, { "epoch": 6.666666666666667, "grad_norm": 1.3601691722869873, "learning_rate": 7.996666666666667e-05, "loss": 0.9328, "step": 2400 }, { "epoch": 6.694444444444445, "grad_norm": 1.4039982557296753, "learning_rate": 8.030000000000001e-05, "loss": 0.9274, "step": 2410 }, { "epoch": 6.722222222222222, "grad_norm": 1.2199487686157227, "learning_rate": 8.063333333333333e-05, "loss": 0.9236, "step": 2420 }, { "epoch": 6.75, "grad_norm": 1.2982662916183472, "learning_rate": 8.096666666666667e-05, "loss": 0.916, "step": 2430 }, { "epoch": 6.777777777777778, "grad_norm": 1.1791383028030396, "learning_rate": 8.13e-05, "loss": 0.9386, "step": 2440 }, { "epoch": 6.805555555555555, "grad_norm": 1.2707198858261108, "learning_rate": 8.163333333333334e-05, "loss": 0.9245, "step": 2450 }, { "epoch": 6.833333333333333, "grad_norm": 1.2379072904586792, "learning_rate": 8.196666666666668e-05, "loss": 0.9354, "step": 2460 }, { "epoch": 6.861111111111111, "grad_norm": 1.2228095531463623, "learning_rate": 8.23e-05, "loss": 0.9267, "step": 2470 }, { "epoch": 6.888888888888889, "grad_norm": 1.4087618589401245, "learning_rate": 8.263333333333334e-05, "loss": 0.9172, "step": 2480 }, { "epoch": 6.916666666666667, "grad_norm": 1.3872863054275513, "learning_rate": 8.296666666666667e-05, "loss": 0.9095, "step": 2490 }, { "epoch": 6.944444444444445, "grad_norm": 1.2615960836410522, "learning_rate": 8.33e-05, "loss": 0.9193, "step": 2500 }, { "epoch": 6.972222222222222, "grad_norm": 1.1909763813018799, "learning_rate": 8.363333333333334e-05, "loss": 0.9087, "step": 2510 }, { "epoch": 7.0, "grad_norm": 1.3041459321975708, "learning_rate": 8.396666666666667e-05, "loss": 0.9004, "step": 2520 }, { "epoch": 7.027777777777778, "grad_norm": 1.563923716545105, "learning_rate": 8.43e-05, "loss": 0.8902, "step": 2530 }, { "epoch": 7.055555555555555, "grad_norm": 1.435655951499939, "learning_rate": 8.463333333333335e-05, "loss": 0.9236, "step": 2540 }, { "epoch": 7.083333333333333, "grad_norm": 1.2600359916687012, "learning_rate": 8.496666666666667e-05, "loss": 0.9157, "step": 2550 }, { "epoch": 7.111111111111111, "grad_norm": 1.3055298328399658, "learning_rate": 8.53e-05, "loss": 0.9012, "step": 2560 }, { "epoch": 7.138888888888889, "grad_norm": 1.216810941696167, "learning_rate": 8.563333333333333e-05, "loss": 0.9036, "step": 2570 }, { "epoch": 7.166666666666667, "grad_norm": 1.3752663135528564, "learning_rate": 8.596666666666668e-05, "loss": 0.9009, "step": 2580 }, { "epoch": 7.194444444444445, "grad_norm": 1.2534518241882324, "learning_rate": 8.63e-05, "loss": 0.8958, "step": 2590 }, { "epoch": 7.222222222222222, "grad_norm": 1.2014307975769043, "learning_rate": 8.663333333333333e-05, "loss": 0.897, "step": 2600 }, { "epoch": 7.25, "grad_norm": 1.1947122812271118, "learning_rate": 8.696666666666668e-05, "loss": 0.8752, "step": 2610 }, { "epoch": 7.277777777777778, "grad_norm": 1.401790976524353, "learning_rate": 8.730000000000001e-05, "loss": 0.8842, "step": 2620 }, { "epoch": 7.305555555555555, "grad_norm": 1.0160242319107056, "learning_rate": 8.763333333333334e-05, "loss": 0.8784, "step": 2630 }, { "epoch": 7.333333333333333, "grad_norm": 1.2184860706329346, "learning_rate": 8.796666666666667e-05, "loss": 0.8698, "step": 2640 }, { "epoch": 7.361111111111111, "grad_norm": 1.1553876399993896, "learning_rate": 8.83e-05, "loss": 0.8802, "step": 2650 }, { "epoch": 7.388888888888889, "grad_norm": 1.2448911666870117, "learning_rate": 8.863333333333334e-05, "loss": 0.8982, "step": 2660 }, { "epoch": 7.416666666666667, "grad_norm": 1.2201918363571167, "learning_rate": 8.896666666666667e-05, "loss": 0.8883, "step": 2670 }, { "epoch": 7.444444444444445, "grad_norm": 1.050972819328308, "learning_rate": 8.93e-05, "loss": 0.8825, "step": 2680 }, { "epoch": 7.472222222222222, "grad_norm": 1.2422785758972168, "learning_rate": 8.963333333333333e-05, "loss": 0.8793, "step": 2690 }, { "epoch": 7.5, "grad_norm": 1.3267887830734253, "learning_rate": 8.996666666666667e-05, "loss": 0.8796, "step": 2700 }, { "epoch": 7.527777777777778, "grad_norm": 1.2436256408691406, "learning_rate": 9.030000000000001e-05, "loss": 0.8765, "step": 2710 }, { "epoch": 7.555555555555555, "grad_norm": 1.1524627208709717, "learning_rate": 9.063333333333333e-05, "loss": 0.8609, "step": 2720 }, { "epoch": 7.583333333333333, "grad_norm": 1.334136962890625, "learning_rate": 9.096666666666666e-05, "loss": 0.8706, "step": 2730 }, { "epoch": 7.611111111111111, "grad_norm": 1.1278340816497803, "learning_rate": 9.130000000000001e-05, "loss": 0.8609, "step": 2740 }, { "epoch": 7.638888888888889, "grad_norm": 1.2580872774124146, "learning_rate": 9.163333333333334e-05, "loss": 0.8548, "step": 2750 }, { "epoch": 7.666666666666667, "grad_norm": 1.2709156274795532, "learning_rate": 9.196666666666666e-05, "loss": 0.8544, "step": 2760 }, { "epoch": 7.694444444444445, "grad_norm": 1.3311725854873657, "learning_rate": 9.230000000000001e-05, "loss": 0.8606, "step": 2770 }, { "epoch": 7.722222222222222, "grad_norm": 1.036661982536316, "learning_rate": 9.263333333333334e-05, "loss": 0.8639, "step": 2780 }, { "epoch": 7.75, "grad_norm": 1.2781813144683838, "learning_rate": 9.296666666666667e-05, "loss": 0.8464, "step": 2790 }, { "epoch": 7.777777777777778, "grad_norm": 1.3302937746047974, "learning_rate": 9.33e-05, "loss": 0.8573, "step": 2800 }, { "epoch": 7.805555555555555, "grad_norm": 1.0992307662963867, "learning_rate": 9.363333333333334e-05, "loss": 0.8467, "step": 2810 }, { "epoch": 7.833333333333333, "grad_norm": 1.5055903196334839, "learning_rate": 9.396666666666667e-05, "loss": 0.846, "step": 2820 }, { "epoch": 7.861111111111111, "grad_norm": 1.1639738082885742, "learning_rate": 9.43e-05, "loss": 0.8468, "step": 2830 }, { "epoch": 7.888888888888889, "grad_norm": 1.372176170349121, "learning_rate": 9.463333333333333e-05, "loss": 0.8481, "step": 2840 }, { "epoch": 7.916666666666667, "grad_norm": 1.1805696487426758, "learning_rate": 9.496666666666667e-05, "loss": 0.8498, "step": 2850 }, { "epoch": 7.944444444444445, "grad_norm": 1.2137709856033325, "learning_rate": 9.53e-05, "loss": 0.8403, "step": 2860 }, { "epoch": 7.972222222222222, "grad_norm": 1.247732400894165, "learning_rate": 9.563333333333334e-05, "loss": 0.8398, "step": 2870 }, { "epoch": 8.0, "grad_norm": 1.3063246011734009, "learning_rate": 9.596666666666668e-05, "loss": 0.834, "step": 2880 }, { "epoch": 8.027777777777779, "grad_norm": 1.1005151271820068, "learning_rate": 9.63e-05, "loss": 0.8334, "step": 2890 }, { "epoch": 8.055555555555555, "grad_norm": 1.3689954280853271, "learning_rate": 9.663333333333334e-05, "loss": 0.8502, "step": 2900 }, { "epoch": 8.083333333333334, "grad_norm": 1.2509393692016602, "learning_rate": 9.696666666666667e-05, "loss": 0.8535, "step": 2910 }, { "epoch": 8.11111111111111, "grad_norm": 1.2867627143859863, "learning_rate": 9.730000000000001e-05, "loss": 0.833, "step": 2920 }, { "epoch": 8.13888888888889, "grad_norm": 1.2973523139953613, "learning_rate": 9.763333333333334e-05, "loss": 0.826, "step": 2930 }, { "epoch": 8.166666666666666, "grad_norm": 1.3697179555892944, "learning_rate": 9.796666666666667e-05, "loss": 0.8265, "step": 2940 }, { "epoch": 8.194444444444445, "grad_norm": 1.2723517417907715, "learning_rate": 9.83e-05, "loss": 0.8235, "step": 2950 }, { "epoch": 8.222222222222221, "grad_norm": 1.1682614088058472, "learning_rate": 9.863333333333334e-05, "loss": 0.8519, "step": 2960 }, { "epoch": 8.25, "grad_norm": 1.3073581457138062, "learning_rate": 9.896666666666667e-05, "loss": 0.8332, "step": 2970 }, { "epoch": 8.277777777777779, "grad_norm": 1.4415756464004517, "learning_rate": 9.93e-05, "loss": 0.8218, "step": 2980 }, { "epoch": 8.305555555555555, "grad_norm": 1.0432311296463013, "learning_rate": 9.963333333333333e-05, "loss": 0.8216, "step": 2990 }, { "epoch": 8.333333333333334, "grad_norm": 1.2918546199798584, "learning_rate": 9.996666666666668e-05, "loss": 0.837, "step": 3000 }, { "epoch": 8.36111111111111, "grad_norm": 1.2721909284591675, "learning_rate": 9.999999384858465e-05, "loss": 0.8124, "step": 3010 }, { "epoch": 8.38888888888889, "grad_norm": 1.167109727859497, "learning_rate": 9.999997258443473e-05, "loss": 0.8245, "step": 3020 }, { "epoch": 8.416666666666666, "grad_norm": 1.2446646690368652, "learning_rate": 9.999993613161331e-05, "loss": 0.8061, "step": 3030 }, { "epoch": 8.444444444444445, "grad_norm": 1.2718597650527954, "learning_rate": 9.999988449013146e-05, "loss": 0.8074, "step": 3040 }, { "epoch": 8.472222222222221, "grad_norm": 1.2912003993988037, "learning_rate": 9.99998176600049e-05, "loss": 0.8174, "step": 3050 }, { "epoch": 8.5, "grad_norm": 1.2147102355957031, "learning_rate": 9.999973564125389e-05, "loss": 0.8196, "step": 3060 }, { "epoch": 8.527777777777779, "grad_norm": 1.2203881740570068, "learning_rate": 9.999963843390335e-05, "loss": 0.8243, "step": 3070 }, { "epoch": 8.555555555555555, "grad_norm": 1.1825766563415527, "learning_rate": 9.999952603798282e-05, "loss": 0.8146, "step": 3080 }, { "epoch": 8.583333333333334, "grad_norm": 1.481148362159729, "learning_rate": 9.999939845352646e-05, "loss": 0.8147, "step": 3090 }, { "epoch": 8.61111111111111, "grad_norm": 1.1481742858886719, "learning_rate": 9.999925568057298e-05, "loss": 0.8235, "step": 3100 }, { "epoch": 8.63888888888889, "grad_norm": 1.3712589740753174, "learning_rate": 9.999909771916578e-05, "loss": 0.8345, "step": 3110 }, { "epoch": 8.666666666666666, "grad_norm": 1.2849491834640503, "learning_rate": 9.999892456935285e-05, "loss": 0.7947, "step": 3120 }, { "epoch": 8.694444444444445, "grad_norm": 1.2848420143127441, "learning_rate": 9.999873623118679e-05, "loss": 0.817, "step": 3130 }, { "epoch": 8.722222222222221, "grad_norm": 1.2145476341247559, "learning_rate": 9.999853270472479e-05, "loss": 0.8186, "step": 3140 }, { "epoch": 8.75, "grad_norm": 1.1127598285675049, "learning_rate": 9.999831399002871e-05, "loss": 0.806, "step": 3150 }, { "epoch": 8.777777777777779, "grad_norm": 1.5591439008712769, "learning_rate": 9.999808008716494e-05, "loss": 0.8031, "step": 3160 }, { "epoch": 8.805555555555555, "grad_norm": 1.2247047424316406, "learning_rate": 9.999783099620459e-05, "loss": 0.8126, "step": 3170 }, { "epoch": 8.833333333333334, "grad_norm": 1.329619288444519, "learning_rate": 9.999756671722328e-05, "loss": 0.8194, "step": 3180 }, { "epoch": 8.86111111111111, "grad_norm": 1.1339442729949951, "learning_rate": 9.99972872503013e-05, "loss": 0.7935, "step": 3190 }, { "epoch": 8.88888888888889, "grad_norm": 1.205561876296997, "learning_rate": 9.999699259552359e-05, "loss": 0.8113, "step": 3200 }, { "epoch": 8.916666666666666, "grad_norm": 1.265486478805542, "learning_rate": 9.99966827529796e-05, "loss": 0.7982, "step": 3210 }, { "epoch": 8.944444444444445, "grad_norm": 1.2949241399765015, "learning_rate": 9.999635772276348e-05, "loss": 0.7902, "step": 3220 }, { "epoch": 8.972222222222221, "grad_norm": 1.2392288446426392, "learning_rate": 9.999601750497396e-05, "loss": 0.7817, "step": 3230 }, { "epoch": 9.0, "grad_norm": 1.483309268951416, "learning_rate": 9.99956620997144e-05, "loss": 0.7769, "step": 3240 }, { "epoch": 9.027777777777779, "grad_norm": 1.4052700996398926, "learning_rate": 9.999529150709275e-05, "loss": 0.7752, "step": 3250 }, { "epoch": 9.055555555555555, "grad_norm": 1.6613481044769287, "learning_rate": 9.999490572722158e-05, "loss": 0.807, "step": 3260 }, { "epoch": 9.083333333333334, "grad_norm": 1.3529011011123657, "learning_rate": 9.99945047602181e-05, "loss": 0.7892, "step": 3270 }, { "epoch": 9.11111111111111, "grad_norm": 1.1968965530395508, "learning_rate": 9.99940886062041e-05, "loss": 0.7707, "step": 3280 }, { "epoch": 9.13888888888889, "grad_norm": 1.196085810661316, "learning_rate": 9.999365726530599e-05, "loss": 0.7874, "step": 3290 }, { "epoch": 9.166666666666666, "grad_norm": 1.3550405502319336, "learning_rate": 9.999321073765481e-05, "loss": 0.7746, "step": 3300 }, { "epoch": 9.194444444444445, "grad_norm": 1.1852507591247559, "learning_rate": 9.99927490233862e-05, "loss": 0.7675, "step": 3310 }, { "epoch": 9.222222222222221, "grad_norm": 1.2583191394805908, "learning_rate": 9.999227212264043e-05, "loss": 0.7823, "step": 3320 }, { "epoch": 9.25, "grad_norm": 1.337010145187378, "learning_rate": 9.999178003556236e-05, "loss": 0.7813, "step": 3330 }, { "epoch": 9.277777777777779, "grad_norm": 1.5079622268676758, "learning_rate": 9.999127276230146e-05, "loss": 0.7749, "step": 3340 }, { "epoch": 9.305555555555555, "grad_norm": 1.3082525730133057, "learning_rate": 9.999075030301184e-05, "loss": 0.7768, "step": 3350 }, { "epoch": 9.333333333333334, "grad_norm": 1.4168179035186768, "learning_rate": 9.999021265785221e-05, "loss": 0.7648, "step": 3360 }, { "epoch": 9.36111111111111, "grad_norm": 1.2451118230819702, "learning_rate": 9.998965982698589e-05, "loss": 0.8002, "step": 3370 }, { "epoch": 9.38888888888889, "grad_norm": 1.240040898323059, "learning_rate": 9.998909181058082e-05, "loss": 0.7765, "step": 3380 }, { "epoch": 9.416666666666666, "grad_norm": 1.2922608852386475, "learning_rate": 9.998850860880953e-05, "loss": 0.775, "step": 3390 }, { "epoch": 9.444444444444445, "grad_norm": 1.3247323036193848, "learning_rate": 9.998791022184922e-05, "loss": 0.7776, "step": 3400 }, { "epoch": 9.472222222222221, "grad_norm": 1.192779541015625, "learning_rate": 9.99872966498816e-05, "loss": 0.7779, "step": 3410 }, { "epoch": 9.5, "grad_norm": 1.1327825784683228, "learning_rate": 9.998666789309313e-05, "loss": 0.7742, "step": 3420 }, { "epoch": 9.527777777777779, "grad_norm": 1.03059983253479, "learning_rate": 9.998602395167475e-05, "loss": 0.758, "step": 3430 }, { "epoch": 9.555555555555555, "grad_norm": 1.3057835102081299, "learning_rate": 9.998536482582213e-05, "loss": 0.7599, "step": 3440 }, { "epoch": 9.583333333333334, "grad_norm": 1.2292028665542603, "learning_rate": 9.998469051573544e-05, "loss": 0.7533, "step": 3450 }, { "epoch": 9.61111111111111, "grad_norm": 1.2319155931472778, "learning_rate": 9.998400102161954e-05, "loss": 0.7653, "step": 3460 }, { "epoch": 9.63888888888889, "grad_norm": 1.1550360918045044, "learning_rate": 9.998329634368388e-05, "loss": 0.7599, "step": 3470 }, { "epoch": 9.666666666666666, "grad_norm": 1.5233073234558105, "learning_rate": 9.998257648214253e-05, "loss": 0.7528, "step": 3480 }, { "epoch": 9.694444444444445, "grad_norm": 1.0926159620285034, "learning_rate": 9.998184143721417e-05, "loss": 0.7551, "step": 3490 }, { "epoch": 9.722222222222221, "grad_norm": 1.2708762884140015, "learning_rate": 9.998109120912206e-05, "loss": 0.7653, "step": 3500 }, { "epoch": 9.75, "grad_norm": 1.1657437086105347, "learning_rate": 9.998032579809411e-05, "loss": 0.7522, "step": 3510 }, { "epoch": 9.777777777777779, "grad_norm": 1.1221734285354614, "learning_rate": 9.997954520436286e-05, "loss": 0.751, "step": 3520 }, { "epoch": 9.805555555555555, "grad_norm": 1.1171677112579346, "learning_rate": 9.997874942816538e-05, "loss": 0.7508, "step": 3530 }, { "epoch": 9.833333333333334, "grad_norm": 1.277343988418579, "learning_rate": 9.997793846974345e-05, "loss": 0.7671, "step": 3540 }, { "epoch": 9.86111111111111, "grad_norm": 1.2727237939834595, "learning_rate": 9.997711232934341e-05, "loss": 0.7609, "step": 3550 }, { "epoch": 9.88888888888889, "grad_norm": 1.5112128257751465, "learning_rate": 9.99762710072162e-05, "loss": 0.7566, "step": 3560 }, { "epoch": 9.916666666666666, "grad_norm": 1.2569708824157715, "learning_rate": 9.997541450361743e-05, "loss": 0.7687, "step": 3570 }, { "epoch": 9.944444444444445, "grad_norm": 1.1912360191345215, "learning_rate": 9.997454281880723e-05, "loss": 0.74, "step": 3580 }, { "epoch": 9.972222222222221, "grad_norm": 1.2829766273498535, "learning_rate": 9.997365595305044e-05, "loss": 0.7443, "step": 3590 }, { "epoch": 10.0, "grad_norm": 1.2287217378616333, "learning_rate": 9.997275390661644e-05, "loss": 0.7517, "step": 3600 }, { "epoch": 10.027777777777779, "grad_norm": 1.3096051216125488, "learning_rate": 9.997183667977926e-05, "loss": 0.7286, "step": 3610 }, { "epoch": 10.055555555555555, "grad_norm": 1.2435790300369263, "learning_rate": 9.997090427281752e-05, "loss": 0.7394, "step": 3620 }, { "epoch": 10.083333333333334, "grad_norm": 1.1507556438446045, "learning_rate": 9.996995668601448e-05, "loss": 0.7394, "step": 3630 }, { "epoch": 10.11111111111111, "grad_norm": 1.471458911895752, "learning_rate": 9.996899391965798e-05, "loss": 0.7359, "step": 3640 }, { "epoch": 10.13888888888889, "grad_norm": 1.2459975481033325, "learning_rate": 9.996801597404048e-05, "loss": 0.7428, "step": 3650 }, { "epoch": 10.166666666666666, "grad_norm": 1.594538927078247, "learning_rate": 9.996702284945905e-05, "loss": 0.7273, "step": 3660 }, { "epoch": 10.194444444444445, "grad_norm": 1.221850037574768, "learning_rate": 9.996601454621539e-05, "loss": 0.7392, "step": 3670 }, { "epoch": 10.222222222222221, "grad_norm": 1.0810716152191162, "learning_rate": 9.996499106461577e-05, "loss": 0.7373, "step": 3680 }, { "epoch": 10.25, "grad_norm": 1.3578838109970093, "learning_rate": 9.996395240497112e-05, "loss": 0.7499, "step": 3690 }, { "epoch": 10.277777777777779, "grad_norm": 1.2521731853485107, "learning_rate": 9.996289856759696e-05, "loss": 0.7377, "step": 3700 }, { "epoch": 10.305555555555555, "grad_norm": 1.1124193668365479, "learning_rate": 9.996182955281342e-05, "loss": 0.7325, "step": 3710 }, { "epoch": 10.333333333333334, "grad_norm": 1.3353127241134644, "learning_rate": 9.996074536094519e-05, "loss": 0.737, "step": 3720 }, { "epoch": 10.36111111111111, "grad_norm": 1.3498177528381348, "learning_rate": 9.995964599232168e-05, "loss": 0.7267, "step": 3730 }, { "epoch": 10.38888888888889, "grad_norm": 1.2040518522262573, "learning_rate": 9.995853144727683e-05, "loss": 0.7307, "step": 3740 }, { "epoch": 10.416666666666666, "grad_norm": 1.3221021890640259, "learning_rate": 9.99574017261492e-05, "loss": 0.7264, "step": 3750 }, { "epoch": 10.444444444444445, "grad_norm": 1.2317562103271484, "learning_rate": 9.995625682928198e-05, "loss": 0.7165, "step": 3760 }, { "epoch": 10.472222222222221, "grad_norm": 1.2875711917877197, "learning_rate": 9.995509675702295e-05, "loss": 0.7391, "step": 3770 }, { "epoch": 10.5, "grad_norm": 1.0899977684020996, "learning_rate": 9.995392150972451e-05, "loss": 0.7299, "step": 3780 }, { "epoch": 10.527777777777779, "grad_norm": 1.1078256368637085, "learning_rate": 9.995273108774366e-05, "loss": 0.7115, "step": 3790 }, { "epoch": 10.555555555555555, "grad_norm": 1.5005935430526733, "learning_rate": 9.995152549144205e-05, "loss": 0.7292, "step": 3800 }, { "epoch": 10.583333333333334, "grad_norm": 1.134880542755127, "learning_rate": 9.995030472118587e-05, "loss": 0.7101, "step": 3810 }, { "epoch": 10.61111111111111, "grad_norm": 1.1370619535446167, "learning_rate": 9.9949068777346e-05, "loss": 0.7345, "step": 3820 }, { "epoch": 10.63888888888889, "grad_norm": 1.206141710281372, "learning_rate": 9.994781766029786e-05, "loss": 0.7365, "step": 3830 }, { "epoch": 10.666666666666666, "grad_norm": 1.5181033611297607, "learning_rate": 9.994655137042151e-05, "loss": 0.7182, "step": 3840 }, { "epoch": 10.694444444444445, "grad_norm": 1.2956494092941284, "learning_rate": 9.99452699081016e-05, "loss": 0.722, "step": 3850 }, { "epoch": 10.722222222222221, "grad_norm": 1.1034553050994873, "learning_rate": 9.994397327372743e-05, "loss": 0.7172, "step": 3860 }, { "epoch": 10.75, "grad_norm": 1.364801287651062, "learning_rate": 9.994266146769286e-05, "loss": 0.726, "step": 3870 }, { "epoch": 10.777777777777779, "grad_norm": 1.1844483613967896, "learning_rate": 9.994133449039642e-05, "loss": 0.7159, "step": 3880 }, { "epoch": 10.805555555555555, "grad_norm": 1.2133851051330566, "learning_rate": 9.993999234224118e-05, "loss": 0.7139, "step": 3890 }, { "epoch": 10.833333333333334, "grad_norm": 1.2311289310455322, "learning_rate": 9.993863502363485e-05, "loss": 0.7204, "step": 3900 }, { "epoch": 10.86111111111111, "grad_norm": 1.190416693687439, "learning_rate": 9.993726253498976e-05, "loss": 0.719, "step": 3910 }, { "epoch": 10.88888888888889, "grad_norm": 1.251173496246338, "learning_rate": 9.993587487672282e-05, "loss": 0.7116, "step": 3920 }, { "epoch": 10.916666666666666, "grad_norm": 1.2383418083190918, "learning_rate": 9.993447204925558e-05, "loss": 0.711, "step": 3930 }, { "epoch": 10.944444444444445, "grad_norm": 1.2677409648895264, "learning_rate": 9.993305405301416e-05, "loss": 0.7283, "step": 3940 }, { "epoch": 10.972222222222221, "grad_norm": 1.3600077629089355, "learning_rate": 9.993162088842935e-05, "loss": 0.72, "step": 3950 }, { "epoch": 11.0, "grad_norm": 1.164926290512085, "learning_rate": 9.993017255593646e-05, "loss": 0.714, "step": 3960 }, { "epoch": 11.027777777777779, "grad_norm": 1.3604539632797241, "learning_rate": 9.992870905597548e-05, "loss": 0.7231, "step": 3970 }, { "epoch": 11.055555555555555, "grad_norm": 1.397487998008728, "learning_rate": 9.9927230388991e-05, "loss": 0.7045, "step": 3980 }, { "epoch": 11.083333333333334, "grad_norm": 1.2842170000076294, "learning_rate": 9.992573655543215e-05, "loss": 0.6983, "step": 3990 }, { "epoch": 11.11111111111111, "grad_norm": 1.2127407789230347, "learning_rate": 9.992422755575277e-05, "loss": 0.7266, "step": 4000 }, { "epoch": 11.13888888888889, "grad_norm": 1.0597946643829346, "learning_rate": 9.992270339041123e-05, "loss": 0.7027, "step": 4010 }, { "epoch": 11.166666666666666, "grad_norm": 1.5789223909378052, "learning_rate": 9.992116405987053e-05, "loss": 0.7128, "step": 4020 }, { "epoch": 11.194444444444445, "grad_norm": 1.1750586032867432, "learning_rate": 9.991960956459828e-05, "loss": 0.7092, "step": 4030 }, { "epoch": 11.222222222222221, "grad_norm": 1.1561870574951172, "learning_rate": 9.991803990506669e-05, "loss": 0.7083, "step": 4040 }, { "epoch": 11.25, "grad_norm": 1.1936837434768677, "learning_rate": 9.991645508175258e-05, "loss": 0.6886, "step": 4050 }, { "epoch": 11.277777777777779, "grad_norm": 1.1427656412124634, "learning_rate": 9.99148550951374e-05, "loss": 0.7128, "step": 4060 }, { "epoch": 11.305555555555555, "grad_norm": 1.164657473564148, "learning_rate": 9.991323994570716e-05, "loss": 0.7072, "step": 4070 }, { "epoch": 11.333333333333334, "grad_norm": 1.2976696491241455, "learning_rate": 9.99116096339525e-05, "loss": 0.696, "step": 4080 }, { "epoch": 11.36111111111111, "grad_norm": 1.3549981117248535, "learning_rate": 9.990996416036869e-05, "loss": 0.7001, "step": 4090 }, { "epoch": 11.38888888888889, "grad_norm": 1.2320959568023682, "learning_rate": 9.990830352545555e-05, "loss": 0.7093, "step": 4100 }, { "epoch": 11.416666666666666, "grad_norm": 1.1773102283477783, "learning_rate": 9.990662772971756e-05, "loss": 0.7015, "step": 4110 }, { "epoch": 11.444444444444445, "grad_norm": 1.2570750713348389, "learning_rate": 9.990493677366376e-05, "loss": 0.6921, "step": 4120 }, { "epoch": 11.472222222222221, "grad_norm": 1.1962249279022217, "learning_rate": 9.990323065780786e-05, "loss": 0.6983, "step": 4130 }, { "epoch": 11.5, "grad_norm": 1.3490025997161865, "learning_rate": 9.990150938266808e-05, "loss": 0.6969, "step": 4140 }, { "epoch": 11.527777777777779, "grad_norm": 1.1250478029251099, "learning_rate": 9.989977294876733e-05, "loss": 0.6922, "step": 4150 }, { "epoch": 11.555555555555555, "grad_norm": 1.130831241607666, "learning_rate": 9.989802135663308e-05, "loss": 0.67, "step": 4160 }, { "epoch": 11.583333333333334, "grad_norm": 1.3539743423461914, "learning_rate": 9.989625460679743e-05, "loss": 0.696, "step": 4170 }, { "epoch": 11.61111111111111, "grad_norm": 1.2021878957748413, "learning_rate": 9.989447269979706e-05, "loss": 0.706, "step": 4180 }, { "epoch": 11.63888888888889, "grad_norm": 1.1680912971496582, "learning_rate": 9.989267563617328e-05, "loss": 0.6883, "step": 4190 }, { "epoch": 11.666666666666666, "grad_norm": 1.4946765899658203, "learning_rate": 9.989086341647198e-05, "loss": 0.6828, "step": 4200 }, { "epoch": 11.694444444444445, "grad_norm": 1.2536123991012573, "learning_rate": 9.988903604124366e-05, "loss": 0.6888, "step": 4210 }, { "epoch": 11.722222222222221, "grad_norm": 1.3186373710632324, "learning_rate": 9.988719351104343e-05, "loss": 0.6794, "step": 4220 }, { "epoch": 11.75, "grad_norm": 1.260280728340149, "learning_rate": 9.9885335826431e-05, "loss": 0.6773, "step": 4230 }, { "epoch": 11.777777777777779, "grad_norm": 1.2543504238128662, "learning_rate": 9.988346298797071e-05, "loss": 0.7083, "step": 4240 }, { "epoch": 11.805555555555555, "grad_norm": 1.2063319683074951, "learning_rate": 9.988157499623146e-05, "loss": 0.7021, "step": 4250 }, { "epoch": 11.833333333333334, "grad_norm": 1.239200234413147, "learning_rate": 9.987967185178677e-05, "loss": 0.6822, "step": 4260 }, { "epoch": 11.86111111111111, "grad_norm": 1.1404041051864624, "learning_rate": 9.987775355521476e-05, "loss": 0.6709, "step": 4270 }, { "epoch": 11.88888888888889, "grad_norm": 1.2487589120864868, "learning_rate": 9.987582010709817e-05, "loss": 0.6819, "step": 4280 }, { "epoch": 11.916666666666666, "grad_norm": 1.158599853515625, "learning_rate": 9.987387150802431e-05, "loss": 0.6767, "step": 4290 }, { "epoch": 11.944444444444445, "grad_norm": 1.0902138948440552, "learning_rate": 9.987190775858517e-05, "loss": 0.6841, "step": 4300 }, { "epoch": 11.972222222222221, "grad_norm": 1.3908904790878296, "learning_rate": 9.98699288593772e-05, "loss": 0.6731, "step": 4310 }, { "epoch": 12.0, "grad_norm": 1.1968767642974854, "learning_rate": 9.986793481100161e-05, "loss": 0.6721, "step": 4320 }, { "epoch": 12.027777777777779, "grad_norm": 1.3465301990509033, "learning_rate": 9.986592561406412e-05, "loss": 0.6674, "step": 4330 }, { "epoch": 12.055555555555555, "grad_norm": 1.2743972539901733, "learning_rate": 9.986390126917503e-05, "loss": 0.6747, "step": 4340 }, { "epoch": 12.083333333333334, "grad_norm": 1.4832299947738647, "learning_rate": 9.986186177694933e-05, "loss": 0.6675, "step": 4350 }, { "epoch": 12.11111111111111, "grad_norm": 1.362992286682129, "learning_rate": 9.985980713800656e-05, "loss": 0.6617, "step": 4360 }, { "epoch": 12.13888888888889, "grad_norm": 1.1944453716278076, "learning_rate": 9.985773735297084e-05, "loss": 0.6813, "step": 4370 }, { "epoch": 12.166666666666666, "grad_norm": 1.4186241626739502, "learning_rate": 9.985565242247092e-05, "loss": 0.671, "step": 4380 }, { "epoch": 12.194444444444445, "grad_norm": 1.408324122428894, "learning_rate": 9.985355234714016e-05, "loss": 0.6757, "step": 4390 }, { "epoch": 12.222222222222221, "grad_norm": 1.329076886177063, "learning_rate": 9.985143712761652e-05, "loss": 0.6978, "step": 4400 }, { "epoch": 12.25, "grad_norm": 1.1950397491455078, "learning_rate": 9.984930676454252e-05, "loss": 0.6647, "step": 4410 }, { "epoch": 12.277777777777779, "grad_norm": 1.4175212383270264, "learning_rate": 9.984716125856532e-05, "loss": 0.6775, "step": 4420 }, { "epoch": 12.305555555555555, "grad_norm": 1.2138293981552124, "learning_rate": 9.984500061033667e-05, "loss": 0.6594, "step": 4430 }, { "epoch": 12.333333333333334, "grad_norm": 1.128646731376648, "learning_rate": 9.984282482051293e-05, "loss": 0.6835, "step": 4440 }, { "epoch": 12.36111111111111, "grad_norm": 1.1491400003433228, "learning_rate": 9.9840633889755e-05, "loss": 0.6571, "step": 4450 }, { "epoch": 12.38888888888889, "grad_norm": 1.1971176862716675, "learning_rate": 9.983842781872848e-05, "loss": 0.6675, "step": 4460 }, { "epoch": 12.416666666666666, "grad_norm": 1.1708976030349731, "learning_rate": 9.98362066081035e-05, "loss": 0.6612, "step": 4470 }, { "epoch": 12.444444444444445, "grad_norm": 1.1857138872146606, "learning_rate": 9.983397025855479e-05, "loss": 0.6784, "step": 4480 }, { "epoch": 12.472222222222221, "grad_norm": 1.2693556547164917, "learning_rate": 9.983171877076171e-05, "loss": 0.6776, "step": 4490 }, { "epoch": 12.5, "grad_norm": 1.1636408567428589, "learning_rate": 9.98294521454082e-05, "loss": 0.6642, "step": 4500 }, { "epoch": 12.527777777777779, "grad_norm": 1.340755820274353, "learning_rate": 9.98271703831828e-05, "loss": 0.6884, "step": 4510 }, { "epoch": 12.555555555555555, "grad_norm": 1.1607826948165894, "learning_rate": 9.982487348477865e-05, "loss": 0.663, "step": 4520 }, { "epoch": 12.583333333333334, "grad_norm": 1.383907675743103, "learning_rate": 9.982256145089347e-05, "loss": 0.6702, "step": 4530 }, { "epoch": 12.61111111111111, "grad_norm": 1.2341861724853516, "learning_rate": 9.982023428222962e-05, "loss": 0.6659, "step": 4540 }, { "epoch": 12.63888888888889, "grad_norm": 1.270688533782959, "learning_rate": 9.981789197949403e-05, "loss": 0.6539, "step": 4550 }, { "epoch": 12.666666666666666, "grad_norm": 1.1440987586975098, "learning_rate": 9.98155345433982e-05, "loss": 0.647, "step": 4560 }, { "epoch": 12.694444444444445, "grad_norm": 1.2991887331008911, "learning_rate": 9.981316197465831e-05, "loss": 0.6573, "step": 4570 }, { "epoch": 12.722222222222221, "grad_norm": 1.3380275964736938, "learning_rate": 9.981077427399504e-05, "loss": 0.6585, "step": 4580 }, { "epoch": 12.75, "grad_norm": 1.1082689762115479, "learning_rate": 9.980837144213371e-05, "loss": 0.6638, "step": 4590 }, { "epoch": 12.777777777777779, "grad_norm": 1.2130868434906006, "learning_rate": 9.980595347980426e-05, "loss": 0.6598, "step": 4600 }, { "epoch": 12.805555555555555, "grad_norm": 1.2371426820755005, "learning_rate": 9.980352038774119e-05, "loss": 0.6664, "step": 4610 }, { "epoch": 12.833333333333334, "grad_norm": 1.1907418966293335, "learning_rate": 9.98010721666836e-05, "loss": 0.6681, "step": 4620 }, { "epoch": 12.86111111111111, "grad_norm": 1.4543386697769165, "learning_rate": 9.979860881737523e-05, "loss": 0.6608, "step": 4630 }, { "epoch": 12.88888888888889, "grad_norm": 1.181145429611206, "learning_rate": 9.979613034056434e-05, "loss": 0.6607, "step": 4640 }, { "epoch": 12.916666666666666, "grad_norm": 1.1935237646102905, "learning_rate": 9.979363673700386e-05, "loss": 0.6584, "step": 4650 }, { "epoch": 12.944444444444445, "grad_norm": 1.07270085811615, "learning_rate": 9.979112800745124e-05, "loss": 0.677, "step": 4660 }, { "epoch": 12.972222222222221, "grad_norm": 1.21428382396698, "learning_rate": 9.978860415266861e-05, "loss": 0.6597, "step": 4670 }, { "epoch": 13.0, "grad_norm": 1.2500523328781128, "learning_rate": 9.978606517342262e-05, "loss": 0.6271, "step": 4680 }, { "epoch": 13.027777777777779, "grad_norm": 1.2784935235977173, "learning_rate": 9.978351107048456e-05, "loss": 0.6606, "step": 4690 }, { "epoch": 13.055555555555555, "grad_norm": 1.3298035860061646, "learning_rate": 9.978094184463029e-05, "loss": 0.6589, "step": 4700 }, { "epoch": 13.083333333333334, "grad_norm": 1.2409018278121948, "learning_rate": 9.977835749664029e-05, "loss": 0.6588, "step": 4710 }, { "epoch": 13.11111111111111, "grad_norm": 1.1070401668548584, "learning_rate": 9.97757580272996e-05, "loss": 0.6562, "step": 4720 }, { "epoch": 13.13888888888889, "grad_norm": 1.1339036226272583, "learning_rate": 9.977314343739786e-05, "loss": 0.6519, "step": 4730 }, { "epoch": 13.166666666666666, "grad_norm": 1.1602897644042969, "learning_rate": 9.977051372772934e-05, "loss": 0.6452, "step": 4740 }, { "epoch": 13.194444444444445, "grad_norm": 1.0913641452789307, "learning_rate": 9.976786889909286e-05, "loss": 0.6644, "step": 4750 }, { "epoch": 13.222222222222221, "grad_norm": 1.296343445777893, "learning_rate": 9.976520895229185e-05, "loss": 0.6481, "step": 4760 }, { "epoch": 13.25, "grad_norm": 1.3616904020309448, "learning_rate": 9.976253388813433e-05, "loss": 0.6537, "step": 4770 }, { "epoch": 13.277777777777779, "grad_norm": 1.2027065753936768, "learning_rate": 9.975984370743293e-05, "loss": 0.646, "step": 4780 }, { "epoch": 13.305555555555555, "grad_norm": 1.2545757293701172, "learning_rate": 9.975713841100485e-05, "loss": 0.6383, "step": 4790 }, { "epoch": 13.333333333333334, "grad_norm": 1.1692466735839844, "learning_rate": 9.975441799967187e-05, "loss": 0.6468, "step": 4800 }, { "epoch": 13.36111111111111, "grad_norm": 1.104870080947876, "learning_rate": 9.975168247426039e-05, "loss": 0.6388, "step": 4810 }, { "epoch": 13.38888888888889, "grad_norm": 1.1212408542633057, "learning_rate": 9.974893183560139e-05, "loss": 0.628, "step": 4820 }, { "epoch": 13.416666666666666, "grad_norm": 1.3058899641036987, "learning_rate": 9.974616608453045e-05, "loss": 0.6609, "step": 4830 }, { "epoch": 13.444444444444445, "grad_norm": 1.1818193197250366, "learning_rate": 9.974338522188772e-05, "loss": 0.6666, "step": 4840 }, { "epoch": 13.472222222222221, "grad_norm": 1.130802869796753, "learning_rate": 9.974058924851797e-05, "loss": 0.6431, "step": 4850 }, { "epoch": 13.5, "grad_norm": 1.0923845767974854, "learning_rate": 9.973777816527051e-05, "loss": 0.6418, "step": 4860 }, { "epoch": 13.527777777777779, "grad_norm": 1.1727876663208008, "learning_rate": 9.973495197299931e-05, "loss": 0.6341, "step": 4870 }, { "epoch": 13.555555555555555, "grad_norm": 1.0344693660736084, "learning_rate": 9.973211067256287e-05, "loss": 0.6457, "step": 4880 }, { "epoch": 13.583333333333334, "grad_norm": 1.1077991724014282, "learning_rate": 9.97292542648243e-05, "loss": 0.6479, "step": 4890 }, { "epoch": 13.61111111111111, "grad_norm": 1.1743274927139282, "learning_rate": 9.972638275065131e-05, "loss": 0.646, "step": 4900 }, { "epoch": 13.63888888888889, "grad_norm": 1.1578128337860107, "learning_rate": 9.972349613091621e-05, "loss": 0.6319, "step": 4910 }, { "epoch": 13.666666666666666, "grad_norm": 1.2376030683517456, "learning_rate": 9.972059440649584e-05, "loss": 0.6324, "step": 4920 }, { "epoch": 13.694444444444445, "grad_norm": 1.3264914751052856, "learning_rate": 9.971767757827168e-05, "loss": 0.6592, "step": 4930 }, { "epoch": 13.722222222222221, "grad_norm": 1.406721830368042, "learning_rate": 9.971474564712982e-05, "loss": 0.6368, "step": 4940 }, { "epoch": 13.75, "grad_norm": 1.2855011224746704, "learning_rate": 9.971179861396084e-05, "loss": 0.6378, "step": 4950 }, { "epoch": 13.777777777777779, "grad_norm": 1.3084901571273804, "learning_rate": 9.970883647966003e-05, "loss": 0.6214, "step": 4960 }, { "epoch": 13.805555555555555, "grad_norm": 1.170527458190918, "learning_rate": 9.970585924512717e-05, "loss": 0.6485, "step": 4970 }, { "epoch": 13.833333333333334, "grad_norm": 1.2109428644180298, "learning_rate": 9.970286691126669e-05, "loss": 0.6438, "step": 4980 }, { "epoch": 13.86111111111111, "grad_norm": 1.1302319765090942, "learning_rate": 9.969985947898756e-05, "loss": 0.6362, "step": 4990 }, { "epoch": 13.88888888888889, "grad_norm": 1.1009302139282227, "learning_rate": 9.969683694920337e-05, "loss": 0.6549, "step": 5000 }, { "epoch": 13.916666666666666, "grad_norm": 1.1526658535003662, "learning_rate": 9.969379932283228e-05, "loss": 0.6393, "step": 5010 }, { "epoch": 13.944444444444445, "grad_norm": 1.19945228099823, "learning_rate": 9.969074660079704e-05, "loss": 0.6385, "step": 5020 }, { "epoch": 13.972222222222221, "grad_norm": 1.2384370565414429, "learning_rate": 9.968767878402501e-05, "loss": 0.6399, "step": 5030 }, { "epoch": 14.0, "grad_norm": 1.194100260734558, "learning_rate": 9.968459587344808e-05, "loss": 0.6318, "step": 5040 }, { "epoch": 14.027777777777779, "grad_norm": 1.3250720500946045, "learning_rate": 9.968149787000278e-05, "loss": 0.6376, "step": 5050 }, { "epoch": 14.055555555555555, "grad_norm": 1.248313307762146, "learning_rate": 9.967838477463018e-05, "loss": 0.6334, "step": 5060 }, { "epoch": 14.083333333333334, "grad_norm": 1.161307692527771, "learning_rate": 9.967525658827597e-05, "loss": 0.6072, "step": 5070 }, { "epoch": 14.11111111111111, "grad_norm": 1.2186607122421265, "learning_rate": 9.967211331189042e-05, "loss": 0.6238, "step": 5080 }, { "epoch": 14.13888888888889, "grad_norm": 1.1017577648162842, "learning_rate": 9.966895494642834e-05, "loss": 0.639, "step": 5090 }, { "epoch": 14.166666666666666, "grad_norm": 1.2417526245117188, "learning_rate": 9.96657814928492e-05, "loss": 0.6378, "step": 5100 }, { "epoch": 14.194444444444445, "grad_norm": 1.2280120849609375, "learning_rate": 9.966259295211697e-05, "loss": 0.6271, "step": 5110 }, { "epoch": 14.222222222222221, "grad_norm": 1.435137152671814, "learning_rate": 9.965938932520028e-05, "loss": 0.6615, "step": 5120 }, { "epoch": 14.25, "grad_norm": 1.3084973096847534, "learning_rate": 9.965617061307229e-05, "loss": 0.6174, "step": 5130 }, { "epoch": 14.277777777777779, "grad_norm": 1.1713831424713135, "learning_rate": 9.965293681671077e-05, "loss": 0.6287, "step": 5140 }, { "epoch": 14.305555555555555, "grad_norm": 1.1595520973205566, "learning_rate": 9.964968793709804e-05, "loss": 0.6229, "step": 5150 }, { "epoch": 14.333333333333334, "grad_norm": 1.173557162284851, "learning_rate": 9.964642397522106e-05, "loss": 0.6298, "step": 5160 }, { "epoch": 14.36111111111111, "grad_norm": 1.3562917709350586, "learning_rate": 9.96431449320713e-05, "loss": 0.6063, "step": 5170 }, { "epoch": 14.38888888888889, "grad_norm": 1.2009810209274292, "learning_rate": 9.963985080864486e-05, "loss": 0.6228, "step": 5180 }, { "epoch": 14.416666666666666, "grad_norm": 1.3763115406036377, "learning_rate": 9.96365416059424e-05, "loss": 0.6354, "step": 5190 }, { "epoch": 14.444444444444445, "grad_norm": 1.2150744199752808, "learning_rate": 9.963321732496919e-05, "loss": 0.639, "step": 5200 }, { "epoch": 14.472222222222221, "grad_norm": 1.3129730224609375, "learning_rate": 9.962987796673506e-05, "loss": 0.631, "step": 5210 }, { "epoch": 14.5, "grad_norm": 1.1869406700134277, "learning_rate": 9.962652353225438e-05, "loss": 0.6277, "step": 5220 }, { "epoch": 14.527777777777779, "grad_norm": 1.215285062789917, "learning_rate": 9.962315402254619e-05, "loss": 0.6298, "step": 5230 }, { "epoch": 14.555555555555555, "grad_norm": 1.1564832925796509, "learning_rate": 9.9619769438634e-05, "loss": 0.6195, "step": 5240 }, { "epoch": 14.583333333333334, "grad_norm": 1.3304599523544312, "learning_rate": 9.9616369781546e-05, "loss": 0.6082, "step": 5250 }, { "epoch": 14.61111111111111, "grad_norm": 1.151483416557312, "learning_rate": 9.961295505231491e-05, "loss": 0.6293, "step": 5260 }, { "epoch": 14.63888888888889, "grad_norm": 1.2510981559753418, "learning_rate": 9.960952525197804e-05, "loss": 0.6323, "step": 5270 }, { "epoch": 14.666666666666666, "grad_norm": 1.2678437232971191, "learning_rate": 9.960608038157724e-05, "loss": 0.6247, "step": 5280 }, { "epoch": 14.694444444444445, "grad_norm": 1.2830488681793213, "learning_rate": 9.960262044215901e-05, "loss": 0.6338, "step": 5290 }, { "epoch": 14.722222222222221, "grad_norm": 1.2665530443191528, "learning_rate": 9.959914543477435e-05, "loss": 0.6249, "step": 5300 }, { "epoch": 14.75, "grad_norm": 1.1956958770751953, "learning_rate": 9.959565536047892e-05, "loss": 0.6196, "step": 5310 }, { "epoch": 14.777777777777779, "grad_norm": 1.5132970809936523, "learning_rate": 9.959215022033288e-05, "loss": 0.6288, "step": 5320 }, { "epoch": 14.805555555555555, "grad_norm": 1.3162704706192017, "learning_rate": 9.9588630015401e-05, "loss": 0.6079, "step": 5330 }, { "epoch": 14.833333333333334, "grad_norm": 1.1207201480865479, "learning_rate": 9.958509474675264e-05, "loss": 0.6253, "step": 5340 }, { "epoch": 14.86111111111111, "grad_norm": 1.249407410621643, "learning_rate": 9.958154441546171e-05, "loss": 0.6185, "step": 5350 }, { "epoch": 14.88888888888889, "grad_norm": 1.1821695566177368, "learning_rate": 9.957797902260673e-05, "loss": 0.6274, "step": 5360 }, { "epoch": 14.916666666666666, "grad_norm": 1.2748252153396606, "learning_rate": 9.957439856927073e-05, "loss": 0.6318, "step": 5370 }, { "epoch": 14.944444444444445, "grad_norm": 1.208673119544983, "learning_rate": 9.957080305654139e-05, "loss": 0.6124, "step": 5380 }, { "epoch": 14.972222222222221, "grad_norm": 1.0565599203109741, "learning_rate": 9.956719248551092e-05, "loss": 0.6119, "step": 5390 }, { "epoch": 15.0, "grad_norm": 1.2003182172775269, "learning_rate": 9.956356685727612e-05, "loss": 0.6218, "step": 5400 }, { "epoch": 15.027777777777779, "grad_norm": 1.1781203746795654, "learning_rate": 9.955992617293836e-05, "loss": 0.6152, "step": 5410 }, { "epoch": 15.055555555555555, "grad_norm": 1.2306654453277588, "learning_rate": 9.955627043360358e-05, "loss": 0.5959, "step": 5420 }, { "epoch": 15.083333333333334, "grad_norm": 1.298953652381897, "learning_rate": 9.955259964038231e-05, "loss": 0.6109, "step": 5430 }, { "epoch": 15.11111111111111, "grad_norm": 1.3775755167007446, "learning_rate": 9.954891379438962e-05, "loss": 0.6152, "step": 5440 }, { "epoch": 15.13888888888889, "grad_norm": 1.4353796243667603, "learning_rate": 9.954521289674519e-05, "loss": 0.6264, "step": 5450 }, { "epoch": 15.166666666666666, "grad_norm": 1.2062591314315796, "learning_rate": 9.954149694857325e-05, "loss": 0.6039, "step": 5460 }, { "epoch": 15.194444444444445, "grad_norm": 1.1779621839523315, "learning_rate": 9.953776595100258e-05, "loss": 0.6082, "step": 5470 }, { "epoch": 15.222222222222221, "grad_norm": 1.3064370155334473, "learning_rate": 9.95340199051666e-05, "loss": 0.6034, "step": 5480 }, { "epoch": 15.25, "grad_norm": 1.3259559869766235, "learning_rate": 9.953025881220325e-05, "loss": 0.5886, "step": 5490 }, { "epoch": 15.277777777777779, "grad_norm": 1.2012325525283813, "learning_rate": 9.952648267325504e-05, "loss": 0.5943, "step": 5500 }, { "epoch": 15.305555555555555, "grad_norm": 1.1187747716903687, "learning_rate": 9.952269148946905e-05, "loss": 0.6021, "step": 5510 }, { "epoch": 15.333333333333334, "grad_norm": 1.1905851364135742, "learning_rate": 9.951888526199697e-05, "loss": 0.592, "step": 5520 }, { "epoch": 15.36111111111111, "grad_norm": 1.1974834203720093, "learning_rate": 9.951506399199501e-05, "loss": 0.6053, "step": 5530 }, { "epoch": 15.38888888888889, "grad_norm": 1.213539958000183, "learning_rate": 9.951122768062399e-05, "loss": 0.6, "step": 5540 }, { "epoch": 15.416666666666666, "grad_norm": 1.0668610334396362, "learning_rate": 9.950737632904927e-05, "loss": 0.5893, "step": 5550 }, { "epoch": 15.444444444444445, "grad_norm": 1.2233221530914307, "learning_rate": 9.950350993844077e-05, "loss": 0.6131, "step": 5560 }, { "epoch": 15.472222222222221, "grad_norm": 1.294845700263977, "learning_rate": 9.949962850997303e-05, "loss": 0.5959, "step": 5570 }, { "epoch": 15.5, "grad_norm": 1.229358434677124, "learning_rate": 9.949573204482512e-05, "loss": 0.6125, "step": 5580 }, { "epoch": 15.527777777777779, "grad_norm": 1.3590190410614014, "learning_rate": 9.949182054418064e-05, "loss": 0.6131, "step": 5590 }, { "epoch": 15.555555555555555, "grad_norm": 1.2650511264801025, "learning_rate": 9.948789400922787e-05, "loss": 0.6145, "step": 5600 }, { "epoch": 15.583333333333334, "grad_norm": 1.294121503829956, "learning_rate": 9.948395244115953e-05, "loss": 0.6181, "step": 5610 }, { "epoch": 15.61111111111111, "grad_norm": 1.0389759540557861, "learning_rate": 9.9479995841173e-05, "loss": 0.6087, "step": 5620 }, { "epoch": 15.63888888888889, "grad_norm": 1.028079628944397, "learning_rate": 9.947602421047017e-05, "loss": 0.5874, "step": 5630 }, { "epoch": 15.666666666666666, "grad_norm": 1.2107807397842407, "learning_rate": 9.947203755025753e-05, "loss": 0.6048, "step": 5640 }, { "epoch": 15.694444444444445, "grad_norm": 1.1780753135681152, "learning_rate": 9.946803586174611e-05, "loss": 0.5964, "step": 5650 }, { "epoch": 15.722222222222221, "grad_norm": 1.2091032266616821, "learning_rate": 9.946401914615151e-05, "loss": 0.6103, "step": 5660 }, { "epoch": 15.75, "grad_norm": 1.5116530656814575, "learning_rate": 9.945998740469394e-05, "loss": 0.6155, "step": 5670 }, { "epoch": 15.777777777777779, "grad_norm": 1.267134428024292, "learning_rate": 9.945594063859809e-05, "loss": 0.6041, "step": 5680 }, { "epoch": 15.805555555555555, "grad_norm": 1.1372641324996948, "learning_rate": 9.94518788490933e-05, "loss": 0.6068, "step": 5690 }, { "epoch": 15.833333333333334, "grad_norm": 1.1277731657028198, "learning_rate": 9.944780203741341e-05, "loss": 0.5805, "step": 5700 }, { "epoch": 15.86111111111111, "grad_norm": 1.3784693479537964, "learning_rate": 9.944371020479686e-05, "loss": 0.6131, "step": 5710 }, { "epoch": 15.88888888888889, "grad_norm": 1.099439024925232, "learning_rate": 9.943960335248662e-05, "loss": 0.6142, "step": 5720 }, { "epoch": 15.916666666666666, "grad_norm": 1.3226360082626343, "learning_rate": 9.943548148173027e-05, "loss": 0.6199, "step": 5730 }, { "epoch": 15.944444444444445, "grad_norm": 1.2230520248413086, "learning_rate": 9.943134459377992e-05, "loss": 0.5937, "step": 5740 }, { "epoch": 15.972222222222221, "grad_norm": 1.051131248474121, "learning_rate": 9.942719268989222e-05, "loss": 0.589, "step": 5750 }, { "epoch": 16.0, "grad_norm": 1.0790318250656128, "learning_rate": 9.942302577132844e-05, "loss": 0.5998, "step": 5760 }, { "epoch": 16.02777777777778, "grad_norm": 1.0603973865509033, "learning_rate": 9.941884383935438e-05, "loss": 0.5967, "step": 5770 }, { "epoch": 16.055555555555557, "grad_norm": 1.158576250076294, "learning_rate": 9.941464689524039e-05, "loss": 0.6094, "step": 5780 }, { "epoch": 16.083333333333332, "grad_norm": 1.1545666456222534, "learning_rate": 9.941043494026139e-05, "loss": 0.5855, "step": 5790 }, { "epoch": 16.11111111111111, "grad_norm": 1.2810382843017578, "learning_rate": 9.940620797569685e-05, "loss": 0.597, "step": 5800 }, { "epoch": 16.13888888888889, "grad_norm": 1.152550220489502, "learning_rate": 9.940196600283082e-05, "loss": 0.5915, "step": 5810 }, { "epoch": 16.166666666666668, "grad_norm": 1.5216678380966187, "learning_rate": 9.939770902295192e-05, "loss": 0.5858, "step": 5820 }, { "epoch": 16.194444444444443, "grad_norm": 1.39157235622406, "learning_rate": 9.939343703735329e-05, "loss": 0.608, "step": 5830 }, { "epoch": 16.22222222222222, "grad_norm": 1.2798048257827759, "learning_rate": 9.938915004733264e-05, "loss": 0.5987, "step": 5840 }, { "epoch": 16.25, "grad_norm": 1.1656420230865479, "learning_rate": 9.938484805419224e-05, "loss": 0.6126, "step": 5850 }, { "epoch": 16.27777777777778, "grad_norm": 1.1434552669525146, "learning_rate": 9.938053105923894e-05, "loss": 0.5824, "step": 5860 }, { "epoch": 16.305555555555557, "grad_norm": 1.10191011428833, "learning_rate": 9.937619906378413e-05, "loss": 0.5859, "step": 5870 }, { "epoch": 16.333333333333332, "grad_norm": 1.4009121656417847, "learning_rate": 9.937185206914374e-05, "loss": 0.6264, "step": 5880 }, { "epoch": 16.36111111111111, "grad_norm": 1.2470879554748535, "learning_rate": 9.936749007663829e-05, "loss": 0.5869, "step": 5890 }, { "epoch": 16.38888888888889, "grad_norm": 1.2567635774612427, "learning_rate": 9.93631130875928e-05, "loss": 0.584, "step": 5900 }, { "epoch": 16.416666666666668, "grad_norm": 1.1588325500488281, "learning_rate": 9.935872110333692e-05, "loss": 0.5882, "step": 5910 }, { "epoch": 16.444444444444443, "grad_norm": 1.1975598335266113, "learning_rate": 9.935431412520484e-05, "loss": 0.5979, "step": 5920 }, { "epoch": 16.47222222222222, "grad_norm": 1.0360472202301025, "learning_rate": 9.934989215453523e-05, "loss": 0.5891, "step": 5930 }, { "epoch": 16.5, "grad_norm": 1.0935102701187134, "learning_rate": 9.934545519267139e-05, "loss": 0.5821, "step": 5940 }, { "epoch": 16.52777777777778, "grad_norm": 1.116794228553772, "learning_rate": 9.934100324096117e-05, "loss": 0.5933, "step": 5950 }, { "epoch": 16.555555555555557, "grad_norm": 1.2792224884033203, "learning_rate": 9.933653630075692e-05, "loss": 0.5816, "step": 5960 }, { "epoch": 16.583333333333332, "grad_norm": 1.2866162061691284, "learning_rate": 9.93320543734156e-05, "loss": 0.5672, "step": 5970 }, { "epoch": 16.61111111111111, "grad_norm": 1.3107761144638062, "learning_rate": 9.932755746029871e-05, "loss": 0.5839, "step": 5980 }, { "epoch": 16.63888888888889, "grad_norm": 1.117617130279541, "learning_rate": 9.932304556277228e-05, "loss": 0.581, "step": 5990 }, { "epoch": 16.666666666666668, "grad_norm": 1.1642649173736572, "learning_rate": 9.93185186822069e-05, "loss": 0.5749, "step": 6000 }, { "epoch": 16.694444444444443, "grad_norm": 1.1085569858551025, "learning_rate": 9.931397681997773e-05, "loss": 0.5686, "step": 6010 }, { "epoch": 16.72222222222222, "grad_norm": 1.2127892971038818, "learning_rate": 9.930941997746446e-05, "loss": 0.5868, "step": 6020 }, { "epoch": 16.75, "grad_norm": 1.0864081382751465, "learning_rate": 9.930484815605134e-05, "loss": 0.5695, "step": 6030 }, { "epoch": 16.77777777777778, "grad_norm": 1.2557265758514404, "learning_rate": 9.930026135712717e-05, "loss": 0.5826, "step": 6040 }, { "epoch": 16.805555555555557, "grad_norm": 1.1733394861221313, "learning_rate": 9.92956595820853e-05, "loss": 0.5668, "step": 6050 }, { "epoch": 16.833333333333332, "grad_norm": 1.2490787506103516, "learning_rate": 9.929104283232362e-05, "loss": 0.5738, "step": 6060 }, { "epoch": 16.86111111111111, "grad_norm": 1.3205763101577759, "learning_rate": 9.92864111092446e-05, "loss": 0.5785, "step": 6070 }, { "epoch": 16.88888888888889, "grad_norm": 1.1948721408843994, "learning_rate": 9.92817644142552e-05, "loss": 0.5791, "step": 6080 }, { "epoch": 16.916666666666668, "grad_norm": 1.1170341968536377, "learning_rate": 9.927710274876698e-05, "loss": 0.5773, "step": 6090 }, { "epoch": 16.944444444444443, "grad_norm": 1.1400526762008667, "learning_rate": 9.927242611419603e-05, "loss": 0.5722, "step": 6100 }, { "epoch": 16.97222222222222, "grad_norm": 1.041926383972168, "learning_rate": 9.926773451196301e-05, "loss": 0.5721, "step": 6110 }, { "epoch": 17.0, "grad_norm": 1.323742151260376, "learning_rate": 9.926302794349306e-05, "loss": 0.5854, "step": 6120 }, { "epoch": 17.02777777777778, "grad_norm": 1.2485435009002686, "learning_rate": 9.925830641021594e-05, "loss": 0.5844, "step": 6130 }, { "epoch": 17.055555555555557, "grad_norm": 1.13386070728302, "learning_rate": 9.925356991356593e-05, "loss": 0.5603, "step": 6140 }, { "epoch": 17.083333333333332, "grad_norm": 1.1103910207748413, "learning_rate": 9.924881845498184e-05, "loss": 0.5631, "step": 6150 }, { "epoch": 17.11111111111111, "grad_norm": 1.131513237953186, "learning_rate": 9.924405203590705e-05, "loss": 0.5829, "step": 6160 }, { "epoch": 17.13888888888889, "grad_norm": 1.2765164375305176, "learning_rate": 9.923927065778946e-05, "loss": 0.5804, "step": 6170 }, { "epoch": 17.166666666666668, "grad_norm": 1.2416433095932007, "learning_rate": 9.923447432208154e-05, "loss": 0.5945, "step": 6180 }, { "epoch": 17.194444444444443, "grad_norm": 1.22918701171875, "learning_rate": 9.922966303024027e-05, "loss": 0.5918, "step": 6190 }, { "epoch": 17.22222222222222, "grad_norm": 1.0790663957595825, "learning_rate": 9.922483678372721e-05, "loss": 0.581, "step": 6200 }, { "epoch": 17.25, "grad_norm": 1.15313720703125, "learning_rate": 9.921999558400845e-05, "loss": 0.5724, "step": 6210 }, { "epoch": 17.27777777777778, "grad_norm": 1.2147074937820435, "learning_rate": 9.92151394325546e-05, "loss": 0.5647, "step": 6220 }, { "epoch": 17.305555555555557, "grad_norm": 1.1836779117584229, "learning_rate": 9.921026833084084e-05, "loss": 0.578, "step": 6230 }, { "epoch": 17.333333333333332, "grad_norm": 1.0998460054397583, "learning_rate": 9.920538228034689e-05, "loss": 0.563, "step": 6240 }, { "epoch": 17.36111111111111, "grad_norm": 1.2452219724655151, "learning_rate": 9.920048128255699e-05, "loss": 0.5715, "step": 6250 }, { "epoch": 17.38888888888889, "grad_norm": 1.2286325693130493, "learning_rate": 9.919556533895995e-05, "loss": 0.5699, "step": 6260 }, { "epoch": 17.416666666666668, "grad_norm": 1.0937528610229492, "learning_rate": 9.919063445104907e-05, "loss": 0.5751, "step": 6270 }, { "epoch": 17.444444444444443, "grad_norm": 1.0686107873916626, "learning_rate": 9.918568862032227e-05, "loss": 0.5825, "step": 6280 }, { "epoch": 17.47222222222222, "grad_norm": 1.1919841766357422, "learning_rate": 9.918072784828194e-05, "loss": 0.576, "step": 6290 }, { "epoch": 17.5, "grad_norm": 1.1427522897720337, "learning_rate": 9.917575213643501e-05, "loss": 0.5577, "step": 6300 }, { "epoch": 17.52777777777778, "grad_norm": 1.2836369276046753, "learning_rate": 9.917076148629302e-05, "loss": 0.5687, "step": 6310 }, { "epoch": 17.555555555555557, "grad_norm": 1.2002949714660645, "learning_rate": 9.916575589937196e-05, "loss": 0.5553, "step": 6320 }, { "epoch": 17.583333333333332, "grad_norm": 1.1840379238128662, "learning_rate": 9.916073537719239e-05, "loss": 0.5671, "step": 6330 }, { "epoch": 17.61111111111111, "grad_norm": 1.2670693397521973, "learning_rate": 9.915569992127944e-05, "loss": 0.5703, "step": 6340 }, { "epoch": 17.63888888888889, "grad_norm": 1.176347017288208, "learning_rate": 9.915064953316273e-05, "loss": 0.5559, "step": 6350 }, { "epoch": 17.666666666666668, "grad_norm": 1.1663625240325928, "learning_rate": 9.914558421437645e-05, "loss": 0.5634, "step": 6360 }, { "epoch": 17.694444444444443, "grad_norm": 1.144925594329834, "learning_rate": 9.914050396645929e-05, "loss": 0.5682, "step": 6370 }, { "epoch": 17.72222222222222, "grad_norm": 1.3185532093048096, "learning_rate": 9.913540879095452e-05, "loss": 0.5671, "step": 6380 }, { "epoch": 17.75, "grad_norm": 1.0836809873580933, "learning_rate": 9.913029868940987e-05, "loss": 0.5795, "step": 6390 }, { "epoch": 17.77777777777778, "grad_norm": 1.2420070171356201, "learning_rate": 9.912517366337772e-05, "loss": 0.5707, "step": 6400 }, { "epoch": 17.805555555555557, "grad_norm": 1.3740668296813965, "learning_rate": 9.912003371441487e-05, "loss": 0.575, "step": 6410 }, { "epoch": 17.833333333333332, "grad_norm": 1.253722071647644, "learning_rate": 9.911487884408271e-05, "loss": 0.5792, "step": 6420 }, { "epoch": 17.86111111111111, "grad_norm": 1.0523384809494019, "learning_rate": 9.910970905394719e-05, "loss": 0.5617, "step": 6430 }, { "epoch": 17.88888888888889, "grad_norm": 1.235805869102478, "learning_rate": 9.91045243455787e-05, "loss": 0.5715, "step": 6440 }, { "epoch": 17.916666666666668, "grad_norm": 1.2393981218338013, "learning_rate": 9.909932472055225e-05, "loss": 0.5617, "step": 6450 }, { "epoch": 17.944444444444443, "grad_norm": 1.251813530921936, "learning_rate": 9.909411018044734e-05, "loss": 0.564, "step": 6460 }, { "epoch": 17.97222222222222, "grad_norm": 1.1064459085464478, "learning_rate": 9.908888072684802e-05, "loss": 0.5784, "step": 6470 }, { "epoch": 18.0, "grad_norm": 1.2050491571426392, "learning_rate": 9.908363636134285e-05, "loss": 0.5602, "step": 6480 }, { "epoch": 18.02777777777778, "grad_norm": 1.1400389671325684, "learning_rate": 9.907837708552493e-05, "loss": 0.5602, "step": 6490 }, { "epoch": 18.055555555555557, "grad_norm": 1.5332584381103516, "learning_rate": 9.90731029009919e-05, "loss": 0.5664, "step": 6500 }, { "epoch": 18.083333333333332, "grad_norm": 1.3170058727264404, "learning_rate": 9.906781380934589e-05, "loss": 0.5618, "step": 6510 }, { "epoch": 18.11111111111111, "grad_norm": 1.2195183038711548, "learning_rate": 9.906250981219362e-05, "loss": 0.5726, "step": 6520 }, { "epoch": 18.13888888888889, "grad_norm": 1.195555567741394, "learning_rate": 9.905719091114628e-05, "loss": 0.5811, "step": 6530 }, { "epoch": 18.166666666666668, "grad_norm": 1.1693966388702393, "learning_rate": 9.905185710781964e-05, "loss": 0.5558, "step": 6540 }, { "epoch": 18.194444444444443, "grad_norm": 1.2401199340820312, "learning_rate": 9.904650840383392e-05, "loss": 0.5719, "step": 6550 }, { "epoch": 18.22222222222222, "grad_norm": 1.1655353307724, "learning_rate": 9.904114480081397e-05, "loss": 0.5788, "step": 6560 }, { "epoch": 18.25, "grad_norm": 1.0566147565841675, "learning_rate": 9.903576630038906e-05, "loss": 0.574, "step": 6570 }, { "epoch": 18.27777777777778, "grad_norm": 1.2862606048583984, "learning_rate": 9.903037290419309e-05, "loss": 0.5974, "step": 6580 }, { "epoch": 18.305555555555557, "grad_norm": 1.441632866859436, "learning_rate": 9.902496461386439e-05, "loss": 0.5622, "step": 6590 }, { "epoch": 18.333333333333332, "grad_norm": 1.3150852918624878, "learning_rate": 9.901954143104588e-05, "loss": 0.5629, "step": 6600 }, { "epoch": 18.36111111111111, "grad_norm": 1.2959171533584595, "learning_rate": 9.901410335738496e-05, "loss": 0.5713, "step": 6610 }, { "epoch": 18.38888888888889, "grad_norm": 1.4694809913635254, "learning_rate": 9.900865039453358e-05, "loss": 0.5611, "step": 6620 }, { "epoch": 18.416666666666668, "grad_norm": 1.1448307037353516, "learning_rate": 9.900318254414821e-05, "loss": 0.5718, "step": 6630 }, { "epoch": 18.444444444444443, "grad_norm": 1.1558316946029663, "learning_rate": 9.899769980788985e-05, "loss": 0.5742, "step": 6640 }, { "epoch": 18.47222222222222, "grad_norm": 1.1240488290786743, "learning_rate": 9.899220218742398e-05, "loss": 0.5517, "step": 6650 }, { "epoch": 18.5, "grad_norm": 1.2328622341156006, "learning_rate": 9.898668968442066e-05, "loss": 0.5584, "step": 6660 }, { "epoch": 18.52777777777778, "grad_norm": 1.22172212600708, "learning_rate": 9.898116230055443e-05, "loss": 0.5558, "step": 6670 }, { "epoch": 18.555555555555557, "grad_norm": 1.205459713935852, "learning_rate": 9.897562003750437e-05, "loss": 0.5597, "step": 6680 }, { "epoch": 18.583333333333332, "grad_norm": 1.1809521913528442, "learning_rate": 9.897006289695407e-05, "loss": 0.5642, "step": 6690 }, { "epoch": 18.61111111111111, "grad_norm": 1.282579779624939, "learning_rate": 9.896449088059164e-05, "loss": 0.5582, "step": 6700 }, { "epoch": 18.63888888888889, "grad_norm": 1.3062695264816284, "learning_rate": 9.89589039901097e-05, "loss": 0.5619, "step": 6710 }, { "epoch": 18.666666666666668, "grad_norm": 1.2551175355911255, "learning_rate": 9.895330222720542e-05, "loss": 0.5479, "step": 6720 }, { "epoch": 18.694444444444443, "grad_norm": 1.279563069343567, "learning_rate": 9.894768559358047e-05, "loss": 0.5618, "step": 6730 }, { "epoch": 18.72222222222222, "grad_norm": 1.1957341432571411, "learning_rate": 9.894205409094101e-05, "loss": 0.5745, "step": 6740 }, { "epoch": 18.75, "grad_norm": 1.1110496520996094, "learning_rate": 9.893640772099777e-05, "loss": 0.5438, "step": 6750 }, { "epoch": 18.77777777777778, "grad_norm": 1.2478408813476562, "learning_rate": 9.893074648546595e-05, "loss": 0.5544, "step": 6760 }, { "epoch": 18.805555555555557, "grad_norm": 1.1568655967712402, "learning_rate": 9.892507038606528e-05, "loss": 0.5378, "step": 6770 }, { "epoch": 18.833333333333332, "grad_norm": 1.3075309991836548, "learning_rate": 9.891937942452003e-05, "loss": 0.554, "step": 6780 }, { "epoch": 18.86111111111111, "grad_norm": 1.2037347555160522, "learning_rate": 9.891367360255895e-05, "loss": 0.5649, "step": 6790 }, { "epoch": 18.88888888888889, "grad_norm": 1.1580568552017212, "learning_rate": 9.890795292191532e-05, "loss": 0.5833, "step": 6800 }, { "epoch": 18.916666666666668, "grad_norm": 1.129809856414795, "learning_rate": 9.890221738432694e-05, "loss": 0.5431, "step": 6810 }, { "epoch": 18.944444444444443, "grad_norm": 1.3236039876937866, "learning_rate": 9.88964669915361e-05, "loss": 0.5399, "step": 6820 }, { "epoch": 18.97222222222222, "grad_norm": 1.227782130241394, "learning_rate": 9.889070174528963e-05, "loss": 0.5465, "step": 6830 }, { "epoch": 19.0, "grad_norm": 1.1445332765579224, "learning_rate": 9.888492164733883e-05, "loss": 0.5459, "step": 6840 }, { "epoch": 19.02777777777778, "grad_norm": 1.2764918804168701, "learning_rate": 9.88791266994396e-05, "loss": 0.5427, "step": 6850 }, { "epoch": 19.055555555555557, "grad_norm": 1.2002711296081543, "learning_rate": 9.887331690335223e-05, "loss": 0.5592, "step": 6860 }, { "epoch": 19.083333333333332, "grad_norm": 1.2039692401885986, "learning_rate": 9.886749226084163e-05, "loss": 0.5582, "step": 6870 }, { "epoch": 19.11111111111111, "grad_norm": 1.2407231330871582, "learning_rate": 9.886165277367714e-05, "loss": 0.5596, "step": 6880 }, { "epoch": 19.13888888888889, "grad_norm": 1.0061967372894287, "learning_rate": 9.885579844363265e-05, "loss": 0.559, "step": 6890 }, { "epoch": 19.166666666666668, "grad_norm": 1.11925208568573, "learning_rate": 9.884992927248656e-05, "loss": 0.5459, "step": 6900 }, { "epoch": 19.194444444444443, "grad_norm": 1.2885053157806396, "learning_rate": 9.884404526202178e-05, "loss": 0.5568, "step": 6910 }, { "epoch": 19.22222222222222, "grad_norm": 1.2837600708007812, "learning_rate": 9.883814641402568e-05, "loss": 0.5661, "step": 6920 }, { "epoch": 19.25, "grad_norm": 1.1884775161743164, "learning_rate": 9.88322327302902e-05, "loss": 0.5464, "step": 6930 }, { "epoch": 19.27777777777778, "grad_norm": 1.1831237077713013, "learning_rate": 9.882630421261176e-05, "loss": 0.5347, "step": 6940 }, { "epoch": 19.305555555555557, "grad_norm": 1.2224942445755005, "learning_rate": 9.88203608627913e-05, "loss": 0.5598, "step": 6950 }, { "epoch": 19.333333333333332, "grad_norm": 1.2929342985153198, "learning_rate": 9.881440268263422e-05, "loss": 0.5655, "step": 6960 }, { "epoch": 19.36111111111111, "grad_norm": 1.1037460565567017, "learning_rate": 9.880842967395048e-05, "loss": 0.5353, "step": 6970 }, { "epoch": 19.38888888888889, "grad_norm": 1.1810617446899414, "learning_rate": 9.880244183855452e-05, "loss": 0.5415, "step": 6980 }, { "epoch": 19.416666666666668, "grad_norm": 1.1134600639343262, "learning_rate": 9.879643917826527e-05, "loss": 0.5504, "step": 6990 }, { "epoch": 19.444444444444443, "grad_norm": 1.2013177871704102, "learning_rate": 9.87904216949062e-05, "loss": 0.5473, "step": 7000 }, { "epoch": 19.47222222222222, "grad_norm": 1.2265827655792236, "learning_rate": 9.878438939030526e-05, "loss": 0.5527, "step": 7010 }, { "epoch": 19.5, "grad_norm": 1.084662675857544, "learning_rate": 9.877834226629489e-05, "loss": 0.529, "step": 7020 }, { "epoch": 19.52777777777778, "grad_norm": 1.2839806079864502, "learning_rate": 9.877228032471206e-05, "loss": 0.5603, "step": 7030 }, { "epoch": 19.555555555555557, "grad_norm": 1.2619349956512451, "learning_rate": 9.876620356739823e-05, "loss": 0.5574, "step": 7040 }, { "epoch": 19.583333333333332, "grad_norm": 1.392693042755127, "learning_rate": 9.876011199619935e-05, "loss": 0.5699, "step": 7050 }, { "epoch": 19.61111111111111, "grad_norm": 1.1074671745300293, "learning_rate": 9.875400561296589e-05, "loss": 0.549, "step": 7060 }, { "epoch": 19.63888888888889, "grad_norm": 1.2374407052993774, "learning_rate": 9.874788441955278e-05, "loss": 0.5449, "step": 7070 }, { "epoch": 19.666666666666668, "grad_norm": 1.1391758918762207, "learning_rate": 9.874174841781951e-05, "loss": 0.5256, "step": 7080 }, { "epoch": 19.694444444444443, "grad_norm": 1.110991358757019, "learning_rate": 9.873559760963003e-05, "loss": 0.5424, "step": 7090 }, { "epoch": 19.72222222222222, "grad_norm": 1.2436386346817017, "learning_rate": 9.872943199685278e-05, "loss": 0.5417, "step": 7100 }, { "epoch": 19.75, "grad_norm": 1.2204045057296753, "learning_rate": 9.872325158136071e-05, "loss": 0.5499, "step": 7110 }, { "epoch": 19.77777777777778, "grad_norm": 1.0128934383392334, "learning_rate": 9.871705636503128e-05, "loss": 0.556, "step": 7120 }, { "epoch": 19.805555555555557, "grad_norm": 1.1782664060592651, "learning_rate": 9.871084634974641e-05, "loss": 0.5307, "step": 7130 }, { "epoch": 19.833333333333332, "grad_norm": 1.2845556735992432, "learning_rate": 9.870462153739257e-05, "loss": 0.5531, "step": 7140 }, { "epoch": 19.86111111111111, "grad_norm": 1.121530532836914, "learning_rate": 9.869838192986067e-05, "loss": 0.5409, "step": 7150 }, { "epoch": 19.88888888888889, "grad_norm": 1.0899758338928223, "learning_rate": 9.869212752904616e-05, "loss": 0.5467, "step": 7160 }, { "epoch": 19.916666666666668, "grad_norm": 1.2179509401321411, "learning_rate": 9.868585833684894e-05, "loss": 0.5541, "step": 7170 }, { "epoch": 19.944444444444443, "grad_norm": 1.263163685798645, "learning_rate": 9.867957435517342e-05, "loss": 0.5395, "step": 7180 }, { "epoch": 19.97222222222222, "grad_norm": 1.2055844068527222, "learning_rate": 9.867327558592854e-05, "loss": 0.564, "step": 7190 }, { "epoch": 20.0, "grad_norm": 1.2429609298706055, "learning_rate": 9.866696203102766e-05, "loss": 0.5395, "step": 7200 }, { "epoch": 20.02777777777778, "grad_norm": 1.1275066137313843, "learning_rate": 9.86606336923887e-05, "loss": 0.5286, "step": 7210 }, { "epoch": 20.055555555555557, "grad_norm": 1.2431368827819824, "learning_rate": 9.865429057193403e-05, "loss": 0.56, "step": 7220 }, { "epoch": 20.083333333333332, "grad_norm": 1.0923770666122437, "learning_rate": 9.864793267159053e-05, "loss": 0.532, "step": 7230 }, { "epoch": 20.11111111111111, "grad_norm": 1.066215991973877, "learning_rate": 9.864155999328957e-05, "loss": 0.5388, "step": 7240 }, { "epoch": 20.13888888888889, "grad_norm": 1.3242290019989014, "learning_rate": 9.8635172538967e-05, "loss": 0.5338, "step": 7250 }, { "epoch": 20.166666666666668, "grad_norm": 1.139043927192688, "learning_rate": 9.862877031056312e-05, "loss": 0.5449, "step": 7260 }, { "epoch": 20.194444444444443, "grad_norm": 1.1564420461654663, "learning_rate": 9.862235331002279e-05, "loss": 0.5387, "step": 7270 }, { "epoch": 20.22222222222222, "grad_norm": 1.118118166923523, "learning_rate": 9.861592153929533e-05, "loss": 0.5292, "step": 7280 }, { "epoch": 20.25, "grad_norm": 1.113695740699768, "learning_rate": 9.860947500033455e-05, "loss": 0.5387, "step": 7290 }, { "epoch": 20.27777777777778, "grad_norm": 1.0894173383712769, "learning_rate": 9.86030136950987e-05, "loss": 0.5493, "step": 7300 }, { "epoch": 20.305555555555557, "grad_norm": 1.0512800216674805, "learning_rate": 9.85965376255506e-05, "loss": 0.5258, "step": 7310 }, { "epoch": 20.333333333333332, "grad_norm": 1.0533151626586914, "learning_rate": 9.859004679365747e-05, "loss": 0.5416, "step": 7320 }, { "epoch": 20.36111111111111, "grad_norm": 1.2534639835357666, "learning_rate": 9.858354120139108e-05, "loss": 0.5476, "step": 7330 }, { "epoch": 20.38888888888889, "grad_norm": 1.1458443403244019, "learning_rate": 9.857702085072764e-05, "loss": 0.5407, "step": 7340 }, { "epoch": 20.416666666666668, "grad_norm": 1.1308917999267578, "learning_rate": 9.857048574364787e-05, "loss": 0.5318, "step": 7350 }, { "epoch": 20.444444444444443, "grad_norm": 1.1285085678100586, "learning_rate": 9.856393588213698e-05, "loss": 0.5239, "step": 7360 }, { "epoch": 20.47222222222222, "grad_norm": 1.3232110738754272, "learning_rate": 9.855737126818458e-05, "loss": 0.5345, "step": 7370 }, { "epoch": 20.5, "grad_norm": 1.249216079711914, "learning_rate": 9.855079190378491e-05, "loss": 0.5418, "step": 7380 }, { "epoch": 20.52777777777778, "grad_norm": 1.069140911102295, "learning_rate": 9.854419779093655e-05, "loss": 0.5409, "step": 7390 }, { "epoch": 20.555555555555557, "grad_norm": 1.1623873710632324, "learning_rate": 9.853758893164264e-05, "loss": 0.5312, "step": 7400 }, { "epoch": 20.583333333333332, "grad_norm": 1.2340070009231567, "learning_rate": 9.853096532791078e-05, "loss": 0.5269, "step": 7410 }, { "epoch": 20.61111111111111, "grad_norm": 1.1459466218948364, "learning_rate": 9.852432698175304e-05, "loss": 0.5516, "step": 7420 }, { "epoch": 20.63888888888889, "grad_norm": 1.2168762683868408, "learning_rate": 9.851767389518597e-05, "loss": 0.5366, "step": 7430 }, { "epoch": 20.666666666666668, "grad_norm": 1.05327570438385, "learning_rate": 9.85110060702306e-05, "loss": 0.524, "step": 7440 }, { "epoch": 20.694444444444443, "grad_norm": 1.1967859268188477, "learning_rate": 9.850432350891245e-05, "loss": 0.5216, "step": 7450 }, { "epoch": 20.72222222222222, "grad_norm": 1.2417773008346558, "learning_rate": 9.84976262132615e-05, "loss": 0.5401, "step": 7460 }, { "epoch": 20.75, "grad_norm": 1.4932764768600464, "learning_rate": 9.849091418531222e-05, "loss": 0.5363, "step": 7470 }, { "epoch": 20.77777777777778, "grad_norm": 1.2558168172836304, "learning_rate": 9.848418742710353e-05, "loss": 0.5271, "step": 7480 }, { "epoch": 20.805555555555557, "grad_norm": 1.1808186769485474, "learning_rate": 9.847744594067885e-05, "loss": 0.5432, "step": 7490 }, { "epoch": 20.833333333333332, "grad_norm": 1.2268924713134766, "learning_rate": 9.847068972808607e-05, "loss": 0.5347, "step": 7500 }, { "epoch": 20.86111111111111, "grad_norm": 1.3834648132324219, "learning_rate": 9.846391879137756e-05, "loss": 0.5191, "step": 7510 }, { "epoch": 20.88888888888889, "grad_norm": 1.2358100414276123, "learning_rate": 9.845713313261012e-05, "loss": 0.5478, "step": 7520 }, { "epoch": 20.916666666666668, "grad_norm": 1.1571022272109985, "learning_rate": 9.845033275384505e-05, "loss": 0.55, "step": 7530 }, { "epoch": 20.944444444444443, "grad_norm": 1.248241662979126, "learning_rate": 9.844351765714818e-05, "loss": 0.5323, "step": 7540 }, { "epoch": 20.97222222222222, "grad_norm": 1.2667195796966553, "learning_rate": 9.843668784458971e-05, "loss": 0.5262, "step": 7550 }, { "epoch": 21.0, "grad_norm": 1.1570607423782349, "learning_rate": 9.842984331824437e-05, "loss": 0.5327, "step": 7560 }, { "epoch": 21.02777777777778, "grad_norm": 1.1553726196289062, "learning_rate": 9.842298408019133e-05, "loss": 0.5246, "step": 7570 }, { "epoch": 21.055555555555557, "grad_norm": 1.2531092166900635, "learning_rate": 9.841611013251429e-05, "loss": 0.548, "step": 7580 }, { "epoch": 21.083333333333332, "grad_norm": 1.1012104749679565, "learning_rate": 9.840922147730133e-05, "loss": 0.5243, "step": 7590 }, { "epoch": 21.11111111111111, "grad_norm": 1.1153162717819214, "learning_rate": 9.840231811664506e-05, "loss": 0.5363, "step": 7600 }, { "epoch": 21.13888888888889, "grad_norm": 1.2018601894378662, "learning_rate": 9.839540005264252e-05, "loss": 0.5175, "step": 7610 }, { "epoch": 21.166666666666668, "grad_norm": 1.0574321746826172, "learning_rate": 9.838846728739527e-05, "loss": 0.5201, "step": 7620 }, { "epoch": 21.194444444444443, "grad_norm": 1.169545292854309, "learning_rate": 9.838151982300927e-05, "loss": 0.5342, "step": 7630 }, { "epoch": 21.22222222222222, "grad_norm": 1.3900794982910156, "learning_rate": 9.8374557661595e-05, "loss": 0.5348, "step": 7640 }, { "epoch": 21.25, "grad_norm": 1.1706088781356812, "learning_rate": 9.836758080526735e-05, "loss": 0.5402, "step": 7650 }, { "epoch": 21.27777777777778, "grad_norm": 1.2080782651901245, "learning_rate": 9.836058925614575e-05, "loss": 0.5305, "step": 7660 }, { "epoch": 21.305555555555557, "grad_norm": 1.257055401802063, "learning_rate": 9.8353583016354e-05, "loss": 0.5258, "step": 7670 }, { "epoch": 21.333333333333332, "grad_norm": 1.1867486238479614, "learning_rate": 9.834656208802044e-05, "loss": 0.5276, "step": 7680 }, { "epoch": 21.36111111111111, "grad_norm": 1.2985363006591797, "learning_rate": 9.833952647327784e-05, "loss": 0.5156, "step": 7690 }, { "epoch": 21.38888888888889, "grad_norm": 1.1304078102111816, "learning_rate": 9.833247617426342e-05, "loss": 0.5299, "step": 7700 }, { "epoch": 21.416666666666668, "grad_norm": 1.4478213787078857, "learning_rate": 9.832541119311889e-05, "loss": 0.536, "step": 7710 }, { "epoch": 21.444444444444443, "grad_norm": 1.1176952123641968, "learning_rate": 9.83183315319904e-05, "loss": 0.5099, "step": 7720 }, { "epoch": 21.47222222222222, "grad_norm": 1.3548671007156372, "learning_rate": 9.831123719302855e-05, "loss": 0.5459, "step": 7730 }, { "epoch": 21.5, "grad_norm": 1.228055715560913, "learning_rate": 9.830412817838842e-05, "loss": 0.516, "step": 7740 }, { "epoch": 21.52777777777778, "grad_norm": 1.1308108568191528, "learning_rate": 9.829700449022956e-05, "loss": 0.5204, "step": 7750 }, { "epoch": 21.555555555555557, "grad_norm": 1.1929937601089478, "learning_rate": 9.828986613071593e-05, "loss": 0.5174, "step": 7760 }, { "epoch": 21.583333333333332, "grad_norm": 1.2259856462478638, "learning_rate": 9.828271310201601e-05, "loss": 0.5176, "step": 7770 }, { "epoch": 21.61111111111111, "grad_norm": 1.3301860094070435, "learning_rate": 9.827554540630268e-05, "loss": 0.5372, "step": 7780 }, { "epoch": 21.63888888888889, "grad_norm": 1.1224802732467651, "learning_rate": 9.826836304575329e-05, "loss": 0.5164, "step": 7790 }, { "epoch": 21.666666666666668, "grad_norm": 1.0725796222686768, "learning_rate": 9.826116602254966e-05, "loss": 0.5224, "step": 7800 }, { "epoch": 21.694444444444443, "grad_norm": 1.098864197731018, "learning_rate": 9.825395433887805e-05, "loss": 0.5202, "step": 7810 }, { "epoch": 21.72222222222222, "grad_norm": 1.1819063425064087, "learning_rate": 9.824672799692917e-05, "loss": 0.5307, "step": 7820 }, { "epoch": 21.75, "grad_norm": 1.2545263767242432, "learning_rate": 9.823948699889823e-05, "loss": 0.544, "step": 7830 }, { "epoch": 21.77777777777778, "grad_norm": 1.0350141525268555, "learning_rate": 9.823223134698483e-05, "loss": 0.5235, "step": 7840 }, { "epoch": 21.805555555555557, "grad_norm": 1.1730495691299438, "learning_rate": 9.822496104339303e-05, "loss": 0.5241, "step": 7850 }, { "epoch": 21.833333333333332, "grad_norm": 1.0705418586730957, "learning_rate": 9.821767609033138e-05, "loss": 0.5159, "step": 7860 }, { "epoch": 21.86111111111111, "grad_norm": 1.132726788520813, "learning_rate": 9.821037649001284e-05, "loss": 0.5138, "step": 7870 }, { "epoch": 21.88888888888889, "grad_norm": 1.177650809288025, "learning_rate": 9.820306224465486e-05, "loss": 0.5339, "step": 7880 }, { "epoch": 21.916666666666668, "grad_norm": 1.1833027601242065, "learning_rate": 9.819573335647928e-05, "loss": 0.5313, "step": 7890 }, { "epoch": 21.944444444444443, "grad_norm": 1.140980839729309, "learning_rate": 9.818838982771246e-05, "loss": 0.5154, "step": 7900 }, { "epoch": 21.97222222222222, "grad_norm": 1.1081398725509644, "learning_rate": 9.818103166058514e-05, "loss": 0.5421, "step": 7910 }, { "epoch": 22.0, "grad_norm": 1.0810844898223877, "learning_rate": 9.817365885733254e-05, "loss": 0.5126, "step": 7920 }, { "epoch": 22.02777777777778, "grad_norm": 1.1807750463485718, "learning_rate": 9.816627142019434e-05, "loss": 0.5255, "step": 7930 }, { "epoch": 22.055555555555557, "grad_norm": 1.176068902015686, "learning_rate": 9.815886935141463e-05, "loss": 0.5243, "step": 7940 }, { "epoch": 22.083333333333332, "grad_norm": 1.123746395111084, "learning_rate": 9.8151452653242e-05, "loss": 0.5416, "step": 7950 }, { "epoch": 22.11111111111111, "grad_norm": 1.2829951047897339, "learning_rate": 9.814402132792939e-05, "loss": 0.5474, "step": 7960 }, { "epoch": 22.13888888888889, "grad_norm": 1.4602495431900024, "learning_rate": 9.813657537773428e-05, "loss": 0.534, "step": 7970 }, { "epoch": 22.166666666666668, "grad_norm": 1.2636041641235352, "learning_rate": 9.812911480491854e-05, "loss": 0.5349, "step": 7980 }, { "epoch": 22.194444444444443, "grad_norm": 1.3402085304260254, "learning_rate": 9.81216396117485e-05, "loss": 0.5246, "step": 7990 }, { "epoch": 22.22222222222222, "grad_norm": 1.046677589416504, "learning_rate": 9.811414980049491e-05, "loss": 0.5297, "step": 8000 }, { "epoch": 22.25, "grad_norm": 1.15018630027771, "learning_rate": 9.810664537343301e-05, "loss": 0.5316, "step": 8010 }, { "epoch": 22.27777777777778, "grad_norm": 1.0783133506774902, "learning_rate": 9.809912633284243e-05, "loss": 0.5298, "step": 8020 }, { "epoch": 22.305555555555557, "grad_norm": 0.9926371574401855, "learning_rate": 9.809159268100725e-05, "loss": 0.5232, "step": 8030 }, { "epoch": 22.333333333333332, "grad_norm": 1.1487632989883423, "learning_rate": 9.808404442021599e-05, "loss": 0.5126, "step": 8040 }, { "epoch": 22.36111111111111, "grad_norm": 1.0883289575576782, "learning_rate": 9.807648155276163e-05, "loss": 0.5062, "step": 8050 }, { "epoch": 22.38888888888889, "grad_norm": 1.25315260887146, "learning_rate": 9.806890408094156e-05, "loss": 0.5221, "step": 8060 }, { "epoch": 22.416666666666668, "grad_norm": 1.1603981256484985, "learning_rate": 9.806131200705761e-05, "loss": 0.5098, "step": 8070 }, { "epoch": 22.444444444444443, "grad_norm": 1.2187186479568481, "learning_rate": 9.805370533341605e-05, "loss": 0.5202, "step": 8080 }, { "epoch": 22.47222222222222, "grad_norm": 1.2965457439422607, "learning_rate": 9.804608406232762e-05, "loss": 0.517, "step": 8090 }, { "epoch": 22.5, "grad_norm": 1.0187081098556519, "learning_rate": 9.803844819610741e-05, "loss": 0.5027, "step": 8100 }, { "epoch": 22.52777777777778, "grad_norm": 1.0690468549728394, "learning_rate": 9.803079773707504e-05, "loss": 0.5197, "step": 8110 }, { "epoch": 22.555555555555557, "grad_norm": 1.131892204284668, "learning_rate": 9.802313268755447e-05, "loss": 0.5242, "step": 8120 }, { "epoch": 22.583333333333332, "grad_norm": 1.109485149383545, "learning_rate": 9.801545304987419e-05, "loss": 0.5064, "step": 8130 }, { "epoch": 22.61111111111111, "grad_norm": 1.2259509563446045, "learning_rate": 9.800775882636704e-05, "loss": 0.5288, "step": 8140 }, { "epoch": 22.63888888888889, "grad_norm": 1.2200136184692383, "learning_rate": 9.800005001937034e-05, "loss": 0.5253, "step": 8150 }, { "epoch": 22.666666666666668, "grad_norm": 0.9970175623893738, "learning_rate": 9.79923266312258e-05, "loss": 0.5047, "step": 8160 }, { "epoch": 22.694444444444443, "grad_norm": 1.2110705375671387, "learning_rate": 9.79845886642796e-05, "loss": 0.5096, "step": 8170 }, { "epoch": 22.72222222222222, "grad_norm": 1.1075299978256226, "learning_rate": 9.797683612088233e-05, "loss": 0.5174, "step": 8180 }, { "epoch": 22.75, "grad_norm": 1.1512154340744019, "learning_rate": 9.796906900338898e-05, "loss": 0.5191, "step": 8190 }, { "epoch": 22.77777777777778, "grad_norm": 1.1367464065551758, "learning_rate": 9.796128731415903e-05, "loss": 0.5082, "step": 8200 }, { "epoch": 22.805555555555557, "grad_norm": 1.118614912033081, "learning_rate": 9.795349105555634e-05, "loss": 0.5056, "step": 8210 }, { "epoch": 22.833333333333332, "grad_norm": 1.1703966856002808, "learning_rate": 9.794568022994922e-05, "loss": 0.5146, "step": 8220 }, { "epoch": 22.86111111111111, "grad_norm": 1.2146638631820679, "learning_rate": 9.793785483971034e-05, "loss": 0.519, "step": 8230 }, { "epoch": 22.88888888888889, "grad_norm": 1.0551491975784302, "learning_rate": 9.793001488721691e-05, "loss": 0.5013, "step": 8240 }, { "epoch": 22.916666666666668, "grad_norm": 1.2454307079315186, "learning_rate": 9.792216037485047e-05, "loss": 0.5047, "step": 8250 }, { "epoch": 22.944444444444443, "grad_norm": 1.1746931076049805, "learning_rate": 9.791429130499704e-05, "loss": 0.5089, "step": 8260 }, { "epoch": 22.97222222222222, "grad_norm": 1.2266484498977661, "learning_rate": 9.790640768004698e-05, "loss": 0.5394, "step": 8270 }, { "epoch": 23.0, "grad_norm": 1.172802209854126, "learning_rate": 9.789850950239518e-05, "loss": 0.515, "step": 8280 }, { "epoch": 23.02777777777778, "grad_norm": 1.2074260711669922, "learning_rate": 9.789059677444089e-05, "loss": 0.5076, "step": 8290 }, { "epoch": 23.055555555555557, "grad_norm": 1.1931339502334595, "learning_rate": 9.788266949858776e-05, "loss": 0.5036, "step": 8300 }, { "epoch": 23.083333333333332, "grad_norm": 1.1589069366455078, "learning_rate": 9.787472767724392e-05, "loss": 0.5188, "step": 8310 }, { "epoch": 23.11111111111111, "grad_norm": 1.3810862302780151, "learning_rate": 9.786677131282185e-05, "loss": 0.5222, "step": 8320 }, { "epoch": 23.13888888888889, "grad_norm": 1.1194350719451904, "learning_rate": 9.785880040773853e-05, "loss": 0.5128, "step": 8330 }, { "epoch": 23.166666666666668, "grad_norm": 1.172373652458191, "learning_rate": 9.785081496441527e-05, "loss": 0.5147, "step": 8340 }, { "epoch": 23.194444444444443, "grad_norm": 1.0659799575805664, "learning_rate": 9.784281498527785e-05, "loss": 0.5255, "step": 8350 }, { "epoch": 23.22222222222222, "grad_norm": 1.1125810146331787, "learning_rate": 9.783480047275646e-05, "loss": 0.5085, "step": 8360 }, { "epoch": 23.25, "grad_norm": 1.4583806991577148, "learning_rate": 9.78267714292857e-05, "loss": 0.4945, "step": 8370 }, { "epoch": 23.27777777777778, "grad_norm": 1.1822012662887573, "learning_rate": 9.781872785730454e-05, "loss": 0.5113, "step": 8380 }, { "epoch": 23.305555555555557, "grad_norm": 1.0718395709991455, "learning_rate": 9.781066975925646e-05, "loss": 0.5042, "step": 8390 }, { "epoch": 23.333333333333332, "grad_norm": 1.025134801864624, "learning_rate": 9.780259713758928e-05, "loss": 0.4936, "step": 8400 }, { "epoch": 23.36111111111111, "grad_norm": 1.165482759475708, "learning_rate": 9.779450999475524e-05, "loss": 0.5156, "step": 8410 }, { "epoch": 23.38888888888889, "grad_norm": 1.0238288640975952, "learning_rate": 9.7786408333211e-05, "loss": 0.5068, "step": 8420 }, { "epoch": 23.416666666666668, "grad_norm": 1.055850863456726, "learning_rate": 9.777829215541764e-05, "loss": 0.5034, "step": 8430 }, { "epoch": 23.444444444444443, "grad_norm": 1.0263361930847168, "learning_rate": 9.777016146384064e-05, "loss": 0.4947, "step": 8440 }, { "epoch": 23.47222222222222, "grad_norm": 1.0844613313674927, "learning_rate": 9.776201626094988e-05, "loss": 0.5067, "step": 8450 }, { "epoch": 23.5, "grad_norm": 1.1585056781768799, "learning_rate": 9.775385654921965e-05, "loss": 0.4857, "step": 8460 }, { "epoch": 23.52777777777778, "grad_norm": 1.1516302824020386, "learning_rate": 9.774568233112868e-05, "loss": 0.5043, "step": 8470 }, { "epoch": 23.555555555555557, "grad_norm": 1.3080437183380127, "learning_rate": 9.773749360916007e-05, "loss": 0.5175, "step": 8480 }, { "epoch": 23.583333333333332, "grad_norm": 1.2240817546844482, "learning_rate": 9.772929038580134e-05, "loss": 0.5092, "step": 8490 }, { "epoch": 23.61111111111111, "grad_norm": 1.2890266180038452, "learning_rate": 9.772107266354439e-05, "loss": 0.496, "step": 8500 }, { "epoch": 23.63888888888889, "grad_norm": 1.0792149305343628, "learning_rate": 9.77128404448856e-05, "loss": 0.5117, "step": 8510 }, { "epoch": 23.666666666666668, "grad_norm": 1.3146337270736694, "learning_rate": 9.770459373232565e-05, "loss": 0.5102, "step": 8520 }, { "epoch": 23.694444444444443, "grad_norm": 1.2709676027297974, "learning_rate": 9.769633252836969e-05, "loss": 0.5033, "step": 8530 }, { "epoch": 23.72222222222222, "grad_norm": 1.0970818996429443, "learning_rate": 9.768805683552724e-05, "loss": 0.5098, "step": 8540 }, { "epoch": 23.75, "grad_norm": 1.1494886875152588, "learning_rate": 9.767976665631228e-05, "loss": 0.5065, "step": 8550 }, { "epoch": 23.77777777777778, "grad_norm": 1.1503586769104004, "learning_rate": 9.767146199324311e-05, "loss": 0.5042, "step": 8560 }, { "epoch": 23.805555555555557, "grad_norm": 1.1668504476547241, "learning_rate": 9.766314284884249e-05, "loss": 0.5188, "step": 8570 }, { "epoch": 23.833333333333332, "grad_norm": 1.112113356590271, "learning_rate": 9.765480922563752e-05, "loss": 0.5003, "step": 8580 }, { "epoch": 23.86111111111111, "grad_norm": 1.329634189605713, "learning_rate": 9.764646112615978e-05, "loss": 0.5027, "step": 8590 }, { "epoch": 23.88888888888889, "grad_norm": 1.2992053031921387, "learning_rate": 9.763809855294517e-05, "loss": 0.5186, "step": 8600 }, { "epoch": 23.916666666666668, "grad_norm": 1.153591275215149, "learning_rate": 9.762972150853404e-05, "loss": 0.5146, "step": 8610 }, { "epoch": 23.944444444444443, "grad_norm": 0.9665808081626892, "learning_rate": 9.762132999547111e-05, "loss": 0.5059, "step": 8620 }, { "epoch": 23.97222222222222, "grad_norm": 1.1426913738250732, "learning_rate": 9.761292401630549e-05, "loss": 0.5112, "step": 8630 }, { "epoch": 24.0, "grad_norm": 1.1235194206237793, "learning_rate": 9.76045035735907e-05, "loss": 0.4971, "step": 8640 }, { "epoch": 24.02777777777778, "grad_norm": 1.106667399406433, "learning_rate": 9.759606866988464e-05, "loss": 0.5067, "step": 8650 }, { "epoch": 24.055555555555557, "grad_norm": 1.2025728225708008, "learning_rate": 9.758761930774963e-05, "loss": 0.5039, "step": 8660 }, { "epoch": 24.083333333333332, "grad_norm": 1.33157479763031, "learning_rate": 9.757915548975235e-05, "loss": 0.5021, "step": 8670 }, { "epoch": 24.11111111111111, "grad_norm": 1.0366593599319458, "learning_rate": 9.757067721846389e-05, "loss": 0.5142, "step": 8680 }, { "epoch": 24.13888888888889, "grad_norm": 1.3004919290542603, "learning_rate": 9.756218449645971e-05, "loss": 0.5052, "step": 8690 }, { "epoch": 24.166666666666668, "grad_norm": 1.1049678325653076, "learning_rate": 9.75536773263197e-05, "loss": 0.5015, "step": 8700 }, { "epoch": 24.194444444444443, "grad_norm": 1.1924183368682861, "learning_rate": 9.75451557106281e-05, "loss": 0.4858, "step": 8710 }, { "epoch": 24.22222222222222, "grad_norm": 1.2282252311706543, "learning_rate": 9.753661965197354e-05, "loss": 0.5085, "step": 8720 }, { "epoch": 24.25, "grad_norm": 1.20048987865448, "learning_rate": 9.752806915294908e-05, "loss": 0.5104, "step": 8730 }, { "epoch": 24.27777777777778, "grad_norm": 1.2651985883712769, "learning_rate": 9.75195042161521e-05, "loss": 0.4984, "step": 8740 }, { "epoch": 24.305555555555557, "grad_norm": 1.0507692098617554, "learning_rate": 9.751092484418442e-05, "loss": 0.5014, "step": 8750 }, { "epoch": 24.333333333333332, "grad_norm": 1.1799396276474, "learning_rate": 9.750233103965224e-05, "loss": 0.517, "step": 8760 }, { "epoch": 24.36111111111111, "grad_norm": 1.1905460357666016, "learning_rate": 9.749372280516611e-05, "loss": 0.4969, "step": 8770 }, { "epoch": 24.38888888888889, "grad_norm": 1.245108723640442, "learning_rate": 9.748510014334097e-05, "loss": 0.5046, "step": 8780 }, { "epoch": 24.416666666666668, "grad_norm": 1.0740456581115723, "learning_rate": 9.747646305679621e-05, "loss": 0.509, "step": 8790 }, { "epoch": 24.444444444444443, "grad_norm": 1.0846309661865234, "learning_rate": 9.74678115481555e-05, "loss": 0.5089, "step": 8800 }, { "epoch": 24.47222222222222, "grad_norm": 1.1036077737808228, "learning_rate": 9.745914562004696e-05, "loss": 0.5142, "step": 8810 }, { "epoch": 24.5, "grad_norm": 1.2064894437789917, "learning_rate": 9.745046527510307e-05, "loss": 0.5087, "step": 8820 }, { "epoch": 24.52777777777778, "grad_norm": 1.2940833568572998, "learning_rate": 9.744177051596068e-05, "loss": 0.5175, "step": 8830 }, { "epoch": 24.555555555555557, "grad_norm": 1.1876754760742188, "learning_rate": 9.743306134526105e-05, "loss": 0.493, "step": 8840 }, { "epoch": 24.583333333333332, "grad_norm": 1.1567586660385132, "learning_rate": 9.742433776564977e-05, "loss": 0.5075, "step": 8850 }, { "epoch": 24.61111111111111, "grad_norm": 1.2008906602859497, "learning_rate": 9.741559977977683e-05, "loss": 0.5076, "step": 8860 }, { "epoch": 24.63888888888889, "grad_norm": 1.0581237077713013, "learning_rate": 9.740684739029661e-05, "loss": 0.4891, "step": 8870 }, { "epoch": 24.666666666666668, "grad_norm": 1.1310714483261108, "learning_rate": 9.739808059986789e-05, "loss": 0.4921, "step": 8880 }, { "epoch": 24.694444444444443, "grad_norm": 1.177487850189209, "learning_rate": 9.738929941115373e-05, "loss": 0.4979, "step": 8890 }, { "epoch": 24.72222222222222, "grad_norm": 1.1420097351074219, "learning_rate": 9.738050382682167e-05, "loss": 0.5125, "step": 8900 }, { "epoch": 24.75, "grad_norm": 1.1635606288909912, "learning_rate": 9.737169384954355e-05, "loss": 0.4857, "step": 8910 }, { "epoch": 24.77777777777778, "grad_norm": 1.1338754892349243, "learning_rate": 9.736286948199562e-05, "loss": 0.4844, "step": 8920 }, { "epoch": 24.805555555555557, "grad_norm": 1.0160101652145386, "learning_rate": 9.735403072685848e-05, "loss": 0.4862, "step": 8930 }, { "epoch": 24.833333333333332, "grad_norm": 1.3027561902999878, "learning_rate": 9.734517758681712e-05, "loss": 0.4958, "step": 8940 }, { "epoch": 24.86111111111111, "grad_norm": 1.156058430671692, "learning_rate": 9.733631006456088e-05, "loss": 0.5056, "step": 8950 }, { "epoch": 24.88888888888889, "grad_norm": 1.1430037021636963, "learning_rate": 9.732742816278348e-05, "loss": 0.4995, "step": 8960 }, { "epoch": 24.916666666666668, "grad_norm": 1.176511526107788, "learning_rate": 9.731853188418302e-05, "loss": 0.4853, "step": 8970 }, { "epoch": 24.944444444444443, "grad_norm": 1.0521537065505981, "learning_rate": 9.730962123146194e-05, "loss": 0.4964, "step": 8980 }, { "epoch": 24.97222222222222, "grad_norm": 1.243180274963379, "learning_rate": 9.730069620732709e-05, "loss": 0.5039, "step": 8990 }, { "epoch": 25.0, "grad_norm": 1.2540068626403809, "learning_rate": 9.72917568144896e-05, "loss": 0.5178, "step": 9000 }, { "epoch": 25.02777777777778, "grad_norm": 1.1301926374435425, "learning_rate": 9.728280305566509e-05, "loss": 0.4929, "step": 9010 }, { "epoch": 25.055555555555557, "grad_norm": 1.1833432912826538, "learning_rate": 9.727383493357343e-05, "loss": 0.4762, "step": 9020 }, { "epoch": 25.083333333333332, "grad_norm": 1.1008203029632568, "learning_rate": 9.726485245093891e-05, "loss": 0.4883, "step": 9030 }, { "epoch": 25.11111111111111, "grad_norm": 1.0815232992172241, "learning_rate": 9.725585561049018e-05, "loss": 0.4884, "step": 9040 }, { "epoch": 25.13888888888889, "grad_norm": 1.195497751235962, "learning_rate": 9.724684441496022e-05, "loss": 0.4954, "step": 9050 }, { "epoch": 25.166666666666668, "grad_norm": 0.9831058382987976, "learning_rate": 9.72378188670864e-05, "loss": 0.4869, "step": 9060 }, { "epoch": 25.194444444444443, "grad_norm": 1.3691295385360718, "learning_rate": 9.722877896961047e-05, "loss": 0.4885, "step": 9070 }, { "epoch": 25.22222222222222, "grad_norm": 1.1355762481689453, "learning_rate": 9.721972472527848e-05, "loss": 0.4786, "step": 9080 }, { "epoch": 25.25, "grad_norm": 1.0549838542938232, "learning_rate": 9.721065613684089e-05, "loss": 0.4931, "step": 9090 }, { "epoch": 25.27777777777778, "grad_norm": 1.0112565755844116, "learning_rate": 9.72015732070525e-05, "loss": 0.4983, "step": 9100 }, { "epoch": 25.305555555555557, "grad_norm": 1.1428492069244385, "learning_rate": 9.719247593867244e-05, "loss": 0.5013, "step": 9110 }, { "epoch": 25.333333333333332, "grad_norm": 1.0558217763900757, "learning_rate": 9.718336433446423e-05, "loss": 0.4948, "step": 9120 }, { "epoch": 25.36111111111111, "grad_norm": 1.2319598197937012, "learning_rate": 9.717423839719574e-05, "loss": 0.4727, "step": 9130 }, { "epoch": 25.38888888888889, "grad_norm": 1.1619044542312622, "learning_rate": 9.71650981296392e-05, "loss": 0.4995, "step": 9140 }, { "epoch": 25.416666666666668, "grad_norm": 1.3174179792404175, "learning_rate": 9.715594353457118e-05, "loss": 0.5126, "step": 9150 }, { "epoch": 25.444444444444443, "grad_norm": 1.2261581420898438, "learning_rate": 9.714677461477257e-05, "loss": 0.4948, "step": 9160 }, { "epoch": 25.47222222222222, "grad_norm": 1.2265998125076294, "learning_rate": 9.713759137302869e-05, "loss": 0.5093, "step": 9170 }, { "epoch": 25.5, "grad_norm": 1.2527679204940796, "learning_rate": 9.712839381212914e-05, "loss": 0.504, "step": 9180 }, { "epoch": 25.52777777777778, "grad_norm": 1.0595794916152954, "learning_rate": 9.71191819348679e-05, "loss": 0.4991, "step": 9190 }, { "epoch": 25.555555555555557, "grad_norm": 1.042569637298584, "learning_rate": 9.710995574404331e-05, "loss": 0.4998, "step": 9200 }, { "epoch": 25.583333333333332, "grad_norm": 1.1227022409439087, "learning_rate": 9.710071524245802e-05, "loss": 0.4959, "step": 9210 }, { "epoch": 25.61111111111111, "grad_norm": 1.0472147464752197, "learning_rate": 9.709146043291906e-05, "loss": 0.5128, "step": 9220 }, { "epoch": 25.63888888888889, "grad_norm": 1.234595775604248, "learning_rate": 9.70821913182378e-05, "loss": 0.4949, "step": 9230 }, { "epoch": 25.666666666666668, "grad_norm": 1.173993468284607, "learning_rate": 9.707290790122995e-05, "loss": 0.4889, "step": 9240 }, { "epoch": 25.694444444444443, "grad_norm": 1.2144410610198975, "learning_rate": 9.706361018471557e-05, "loss": 0.5072, "step": 9250 }, { "epoch": 25.72222222222222, "grad_norm": 1.0922741889953613, "learning_rate": 9.705429817151906e-05, "loss": 0.4869, "step": 9260 }, { "epoch": 25.75, "grad_norm": 1.0876251459121704, "learning_rate": 9.704497186446917e-05, "loss": 0.4923, "step": 9270 }, { "epoch": 25.77777777777778, "grad_norm": 1.165663719177246, "learning_rate": 9.703563126639896e-05, "loss": 0.4847, "step": 9280 }, { "epoch": 25.805555555555557, "grad_norm": 1.1280593872070312, "learning_rate": 9.70262763801459e-05, "loss": 0.4777, "step": 9290 }, { "epoch": 25.833333333333332, "grad_norm": 1.2034002542495728, "learning_rate": 9.701690720855171e-05, "loss": 0.4927, "step": 9300 }, { "epoch": 25.86111111111111, "grad_norm": 0.9706876873970032, "learning_rate": 9.700752375446253e-05, "loss": 0.4875, "step": 9310 }, { "epoch": 25.88888888888889, "grad_norm": 1.1609328985214233, "learning_rate": 9.69981260207288e-05, "loss": 0.4849, "step": 9320 }, { "epoch": 25.916666666666668, "grad_norm": 1.2954212427139282, "learning_rate": 9.698871401020529e-05, "loss": 0.5039, "step": 9330 }, { "epoch": 25.944444444444443, "grad_norm": 1.122140884399414, "learning_rate": 9.697928772575112e-05, "loss": 0.509, "step": 9340 }, { "epoch": 25.97222222222222, "grad_norm": 1.1387852430343628, "learning_rate": 9.696984717022976e-05, "loss": 0.48, "step": 9350 }, { "epoch": 26.0, "grad_norm": 1.1649469137191772, "learning_rate": 9.6960392346509e-05, "loss": 0.4938, "step": 9360 }, { "epoch": 26.02777777777778, "grad_norm": 1.1471428871154785, "learning_rate": 9.695092325746097e-05, "loss": 0.4979, "step": 9370 }, { "epoch": 26.055555555555557, "grad_norm": 1.163936972618103, "learning_rate": 9.694143990596211e-05, "loss": 0.5043, "step": 9380 }, { "epoch": 26.083333333333332, "grad_norm": 0.982515811920166, "learning_rate": 9.693194229489325e-05, "loss": 0.5049, "step": 9390 }, { "epoch": 26.11111111111111, "grad_norm": 1.2151527404785156, "learning_rate": 9.692243042713944e-05, "loss": 0.4908, "step": 9400 }, { "epoch": 26.13888888888889, "grad_norm": 1.2728848457336426, "learning_rate": 9.691290430559022e-05, "loss": 0.4957, "step": 9410 }, { "epoch": 26.166666666666668, "grad_norm": 1.2013332843780518, "learning_rate": 9.690336393313932e-05, "loss": 0.4999, "step": 9420 }, { "epoch": 26.194444444444443, "grad_norm": 1.193662166595459, "learning_rate": 9.689380931268487e-05, "loss": 0.4854, "step": 9430 }, { "epoch": 26.22222222222222, "grad_norm": 1.1414357423782349, "learning_rate": 9.688424044712932e-05, "loss": 0.4936, "step": 9440 }, { "epoch": 26.25, "grad_norm": 1.0911527872085571, "learning_rate": 9.687465733937942e-05, "loss": 0.4835, "step": 9450 }, { "epoch": 26.27777777777778, "grad_norm": 1.1892982721328735, "learning_rate": 9.686505999234627e-05, "loss": 0.5002, "step": 9460 }, { "epoch": 26.305555555555557, "grad_norm": 1.1462008953094482, "learning_rate": 9.685544840894529e-05, "loss": 0.5027, "step": 9470 }, { "epoch": 26.333333333333332, "grad_norm": 1.1347973346710205, "learning_rate": 9.684582259209624e-05, "loss": 0.4858, "step": 9480 }, { "epoch": 26.36111111111111, "grad_norm": 1.1346851587295532, "learning_rate": 9.683618254472317e-05, "loss": 0.4984, "step": 9490 }, { "epoch": 26.38888888888889, "grad_norm": 1.26644766330719, "learning_rate": 9.682652826975449e-05, "loss": 0.4922, "step": 9500 }, { "epoch": 26.416666666666668, "grad_norm": 1.0681781768798828, "learning_rate": 9.681685977012291e-05, "loss": 0.4866, "step": 9510 }, { "epoch": 26.444444444444443, "grad_norm": 1.1388882398605347, "learning_rate": 9.680717704876546e-05, "loss": 0.4938, "step": 9520 }, { "epoch": 26.47222222222222, "grad_norm": 1.0835952758789062, "learning_rate": 9.679748010862349e-05, "loss": 0.4969, "step": 9530 }, { "epoch": 26.5, "grad_norm": 1.1505986452102661, "learning_rate": 9.678776895264267e-05, "loss": 0.4907, "step": 9540 }, { "epoch": 26.52777777777778, "grad_norm": 1.1324480772018433, "learning_rate": 9.6778043583773e-05, "loss": 0.5013, "step": 9550 }, { "epoch": 26.555555555555557, "grad_norm": 0.996260404586792, "learning_rate": 9.67683040049688e-05, "loss": 0.4983, "step": 9560 }, { "epoch": 26.583333333333332, "grad_norm": 1.2236727476119995, "learning_rate": 9.675855021918869e-05, "loss": 0.4978, "step": 9570 }, { "epoch": 26.61111111111111, "grad_norm": 1.1010775566101074, "learning_rate": 9.674878222939561e-05, "loss": 0.4985, "step": 9580 }, { "epoch": 26.63888888888889, "grad_norm": 1.0154650211334229, "learning_rate": 9.673900003855681e-05, "loss": 0.4839, "step": 9590 }, { "epoch": 26.666666666666668, "grad_norm": 1.1624354124069214, "learning_rate": 9.672920364964389e-05, "loss": 0.4919, "step": 9600 }, { "epoch": 26.694444444444443, "grad_norm": 1.1866141557693481, "learning_rate": 9.671939306563269e-05, "loss": 0.5045, "step": 9610 }, { "epoch": 26.72222222222222, "grad_norm": 1.153336524963379, "learning_rate": 9.670956828950345e-05, "loss": 0.5073, "step": 9620 }, { "epoch": 26.75, "grad_norm": 1.1752012968063354, "learning_rate": 9.669972932424065e-05, "loss": 0.4955, "step": 9630 }, { "epoch": 26.77777777777778, "grad_norm": 0.988801121711731, "learning_rate": 9.668987617283312e-05, "loss": 0.4847, "step": 9640 }, { "epoch": 26.805555555555557, "grad_norm": 1.132846474647522, "learning_rate": 9.668000883827397e-05, "loss": 0.4889, "step": 9650 }, { "epoch": 26.833333333333332, "grad_norm": 1.0936012268066406, "learning_rate": 9.667012732356067e-05, "loss": 0.4893, "step": 9660 }, { "epoch": 26.86111111111111, "grad_norm": 1.1273730993270874, "learning_rate": 9.666023163169493e-05, "loss": 0.4897, "step": 9670 }, { "epoch": 26.88888888888889, "grad_norm": 1.3291267156600952, "learning_rate": 9.665032176568281e-05, "loss": 0.4953, "step": 9680 }, { "epoch": 26.916666666666668, "grad_norm": 1.125232458114624, "learning_rate": 9.664039772853469e-05, "loss": 0.482, "step": 9690 }, { "epoch": 26.944444444444443, "grad_norm": 1.1576279401779175, "learning_rate": 9.663045952326518e-05, "loss": 0.4844, "step": 9700 }, { "epoch": 26.97222222222222, "grad_norm": 1.1300530433654785, "learning_rate": 9.662050715289328e-05, "loss": 0.5039, "step": 9710 }, { "epoch": 27.0, "grad_norm": 1.0043938159942627, "learning_rate": 9.661054062044226e-05, "loss": 0.4922, "step": 9720 }, { "epoch": 27.02777777777778, "grad_norm": 1.1923110485076904, "learning_rate": 9.660055992893968e-05, "loss": 0.5008, "step": 9730 }, { "epoch": 27.055555555555557, "grad_norm": 1.0169575214385986, "learning_rate": 9.659056508141739e-05, "loss": 0.4811, "step": 9740 }, { "epoch": 27.083333333333332, "grad_norm": 1.2147600650787354, "learning_rate": 9.658055608091161e-05, "loss": 0.4767, "step": 9750 }, { "epoch": 27.11111111111111, "grad_norm": 1.0005812644958496, "learning_rate": 9.657053293046276e-05, "loss": 0.493, "step": 9760 }, { "epoch": 27.13888888888889, "grad_norm": 1.1928461790084839, "learning_rate": 9.656049563311564e-05, "loss": 0.4809, "step": 9770 }, { "epoch": 27.166666666666668, "grad_norm": 1.2224053144454956, "learning_rate": 9.655044419191929e-05, "loss": 0.474, "step": 9780 }, { "epoch": 27.194444444444443, "grad_norm": 1.1826022863388062, "learning_rate": 9.654037860992711e-05, "loss": 0.4878, "step": 9790 }, { "epoch": 27.22222222222222, "grad_norm": 1.205020785331726, "learning_rate": 9.653029889019672e-05, "loss": 0.4761, "step": 9800 }, { "epoch": 27.25, "grad_norm": 1.2009152173995972, "learning_rate": 9.65202050357901e-05, "loss": 0.4748, "step": 9810 }, { "epoch": 27.27777777777778, "grad_norm": 1.3741172552108765, "learning_rate": 9.651009704977347e-05, "loss": 0.4727, "step": 9820 }, { "epoch": 27.305555555555557, "grad_norm": 1.0534125566482544, "learning_rate": 9.649997493521738e-05, "loss": 0.4901, "step": 9830 }, { "epoch": 27.333333333333332, "grad_norm": 1.330483317375183, "learning_rate": 9.64898386951967e-05, "loss": 0.498, "step": 9840 }, { "epoch": 27.36111111111111, "grad_norm": 1.1288280487060547, "learning_rate": 9.647968833279049e-05, "loss": 0.4974, "step": 9850 }, { "epoch": 27.38888888888889, "grad_norm": 1.1841758489608765, "learning_rate": 9.646952385108218e-05, "loss": 0.483, "step": 9860 }, { "epoch": 27.416666666666668, "grad_norm": 1.1297965049743652, "learning_rate": 9.645934525315951e-05, "loss": 0.4761, "step": 9870 }, { "epoch": 27.444444444444443, "grad_norm": 1.0460224151611328, "learning_rate": 9.644915254211442e-05, "loss": 0.4807, "step": 9880 }, { "epoch": 27.47222222222222, "grad_norm": 1.148513674736023, "learning_rate": 9.643894572104321e-05, "loss": 0.5092, "step": 9890 }, { "epoch": 27.5, "grad_norm": 1.092671275138855, "learning_rate": 9.642872479304644e-05, "loss": 0.4933, "step": 9900 }, { "epoch": 27.52777777777778, "grad_norm": 1.202886939048767, "learning_rate": 9.641848976122895e-05, "loss": 0.4837, "step": 9910 }, { "epoch": 27.555555555555557, "grad_norm": 1.024741291999817, "learning_rate": 9.64082406286999e-05, "loss": 0.4799, "step": 9920 }, { "epoch": 27.583333333333332, "grad_norm": 1.1255911588668823, "learning_rate": 9.639797739857269e-05, "loss": 0.468, "step": 9930 }, { "epoch": 27.61111111111111, "grad_norm": 1.2408448457717896, "learning_rate": 9.638770007396498e-05, "loss": 0.5036, "step": 9940 }, { "epoch": 27.63888888888889, "grad_norm": 1.3000744581222534, "learning_rate": 9.63774086579988e-05, "loss": 0.5056, "step": 9950 }, { "epoch": 27.666666666666668, "grad_norm": 1.1080530881881714, "learning_rate": 9.63671031538004e-05, "loss": 0.4724, "step": 9960 }, { "epoch": 27.694444444444443, "grad_norm": 1.130543828010559, "learning_rate": 9.635678356450031e-05, "loss": 0.4904, "step": 9970 }, { "epoch": 27.72222222222222, "grad_norm": 1.2029622793197632, "learning_rate": 9.634644989323336e-05, "loss": 0.4798, "step": 9980 }, { "epoch": 27.75, "grad_norm": 1.1366149187088013, "learning_rate": 9.633610214313861e-05, "loss": 0.4859, "step": 9990 }, { "epoch": 27.77777777777778, "grad_norm": 1.1969281435012817, "learning_rate": 9.632574031735951e-05, "loss": 0.4873, "step": 10000 }, { "epoch": 27.805555555555557, "grad_norm": 1.06442391872406, "learning_rate": 9.631536441904364e-05, "loss": 0.4773, "step": 10010 }, { "epoch": 27.833333333333332, "grad_norm": 1.0137405395507812, "learning_rate": 9.630497445134293e-05, "loss": 0.4833, "step": 10020 }, { "epoch": 27.86111111111111, "grad_norm": 1.050545334815979, "learning_rate": 9.62945704174136e-05, "loss": 0.4816, "step": 10030 }, { "epoch": 27.88888888888889, "grad_norm": 1.0528615713119507, "learning_rate": 9.628415232041612e-05, "loss": 0.5016, "step": 10040 }, { "epoch": 27.916666666666668, "grad_norm": 1.0126546621322632, "learning_rate": 9.627372016351524e-05, "loss": 0.4835, "step": 10050 }, { "epoch": 27.944444444444443, "grad_norm": 1.04146409034729, "learning_rate": 9.626327394987995e-05, "loss": 0.4942, "step": 10060 }, { "epoch": 27.97222222222222, "grad_norm": 1.049981951713562, "learning_rate": 9.625281368268355e-05, "loss": 0.4841, "step": 10070 }, { "epoch": 28.0, "grad_norm": 1.0796629190444946, "learning_rate": 9.624233936510357e-05, "loss": 0.4777, "step": 10080 }, { "epoch": 28.02777777777778, "grad_norm": 1.047562837600708, "learning_rate": 9.623185100032187e-05, "loss": 0.4724, "step": 10090 }, { "epoch": 28.055555555555557, "grad_norm": 1.1197571754455566, "learning_rate": 9.62213485915245e-05, "loss": 0.4828, "step": 10100 }, { "epoch": 28.083333333333332, "grad_norm": 1.039741039276123, "learning_rate": 9.621083214190186e-05, "loss": 0.4797, "step": 10110 }, { "epoch": 28.11111111111111, "grad_norm": 1.2342488765716553, "learning_rate": 9.62003016546485e-05, "loss": 0.4735, "step": 10120 }, { "epoch": 28.13888888888889, "grad_norm": 1.2265441417694092, "learning_rate": 9.618975713296339e-05, "loss": 0.478, "step": 10130 }, { "epoch": 28.166666666666668, "grad_norm": 1.1104278564453125, "learning_rate": 9.61791985800496e-05, "loss": 0.4713, "step": 10140 }, { "epoch": 28.194444444444443, "grad_norm": 1.0849943161010742, "learning_rate": 9.616862599911458e-05, "loss": 0.4713, "step": 10150 }, { "epoch": 28.22222222222222, "grad_norm": 1.1836323738098145, "learning_rate": 9.615803939337e-05, "loss": 0.4813, "step": 10160 }, { "epoch": 28.25, "grad_norm": 1.0997586250305176, "learning_rate": 9.614743876603178e-05, "loss": 0.4751, "step": 10170 }, { "epoch": 28.27777777777778, "grad_norm": 1.2154889106750488, "learning_rate": 9.613682412032013e-05, "loss": 0.4747, "step": 10180 }, { "epoch": 28.305555555555557, "grad_norm": 1.0296413898468018, "learning_rate": 9.612619545945947e-05, "loss": 0.4819, "step": 10190 }, { "epoch": 28.333333333333332, "grad_norm": 1.1829516887664795, "learning_rate": 9.611555278667852e-05, "loss": 0.483, "step": 10200 }, { "epoch": 28.36111111111111, "grad_norm": 1.146872878074646, "learning_rate": 9.610489610521024e-05, "loss": 0.4736, "step": 10210 }, { "epoch": 28.38888888888889, "grad_norm": 1.009640097618103, "learning_rate": 9.609422541829187e-05, "loss": 0.4665, "step": 10220 }, { "epoch": 28.416666666666668, "grad_norm": 1.1527684926986694, "learning_rate": 9.608354072916486e-05, "loss": 0.4803, "step": 10230 }, { "epoch": 28.444444444444443, "grad_norm": 0.9860728979110718, "learning_rate": 9.607284204107493e-05, "loss": 0.4742, "step": 10240 }, { "epoch": 28.47222222222222, "grad_norm": 1.1745576858520508, "learning_rate": 9.606212935727208e-05, "loss": 0.4802, "step": 10250 }, { "epoch": 28.5, "grad_norm": 1.0860012769699097, "learning_rate": 9.605140268101052e-05, "loss": 0.4744, "step": 10260 }, { "epoch": 28.52777777777778, "grad_norm": 1.080436110496521, "learning_rate": 9.604066201554875e-05, "loss": 0.4895, "step": 10270 }, { "epoch": 28.555555555555557, "grad_norm": 1.0177855491638184, "learning_rate": 9.60299073641495e-05, "loss": 0.504, "step": 10280 }, { "epoch": 28.583333333333332, "grad_norm": 1.1114675998687744, "learning_rate": 9.601913873007974e-05, "loss": 0.4852, "step": 10290 }, { "epoch": 28.61111111111111, "grad_norm": 1.0669480562210083, "learning_rate": 9.60083561166107e-05, "loss": 0.4698, "step": 10300 }, { "epoch": 28.63888888888889, "grad_norm": 1.0468201637268066, "learning_rate": 9.599755952701783e-05, "loss": 0.4813, "step": 10310 }, { "epoch": 28.666666666666668, "grad_norm": 0.9592236280441284, "learning_rate": 9.598674896458089e-05, "loss": 0.4778, "step": 10320 }, { "epoch": 28.694444444444443, "grad_norm": 1.0029027462005615, "learning_rate": 9.597592443258383e-05, "loss": 0.4721, "step": 10330 }, { "epoch": 28.72222222222222, "grad_norm": 1.1900737285614014, "learning_rate": 9.596508593431483e-05, "loss": 0.4661, "step": 10340 }, { "epoch": 28.75, "grad_norm": 1.1422181129455566, "learning_rate": 9.59542334730664e-05, "loss": 0.474, "step": 10350 }, { "epoch": 28.77777777777778, "grad_norm": 0.9973180294036865, "learning_rate": 9.594336705213516e-05, "loss": 0.4663, "step": 10360 }, { "epoch": 28.805555555555557, "grad_norm": 1.2666646242141724, "learning_rate": 9.593248667482208e-05, "loss": 0.4826, "step": 10370 }, { "epoch": 28.833333333333332, "grad_norm": 1.216444730758667, "learning_rate": 9.592159234443233e-05, "loss": 0.4826, "step": 10380 }, { "epoch": 28.86111111111111, "grad_norm": 1.323413372039795, "learning_rate": 9.59106840642753e-05, "loss": 0.4685, "step": 10390 }, { "epoch": 28.88888888888889, "grad_norm": 0.971478283405304, "learning_rate": 9.589976183766467e-05, "loss": 0.4645, "step": 10400 }, { "epoch": 28.916666666666668, "grad_norm": 0.9798492789268494, "learning_rate": 9.58888256679183e-05, "loss": 0.4662, "step": 10410 }, { "epoch": 28.944444444444443, "grad_norm": 1.0107555389404297, "learning_rate": 9.587787555835832e-05, "loss": 0.4705, "step": 10420 }, { "epoch": 28.97222222222222, "grad_norm": 1.118080973625183, "learning_rate": 9.586691151231107e-05, "loss": 0.4677, "step": 10430 }, { "epoch": 29.0, "grad_norm": 1.0962576866149902, "learning_rate": 9.585593353310715e-05, "loss": 0.489, "step": 10440 }, { "epoch": 29.02777777777778, "grad_norm": 1.107435703277588, "learning_rate": 9.58449416240814e-05, "loss": 0.4701, "step": 10450 }, { "epoch": 29.055555555555557, "grad_norm": 1.2271331548690796, "learning_rate": 9.583393578857283e-05, "loss": 0.4711, "step": 10460 }, { "epoch": 29.083333333333332, "grad_norm": 1.1538604497909546, "learning_rate": 9.582291602992474e-05, "loss": 0.4705, "step": 10470 }, { "epoch": 29.11111111111111, "grad_norm": 1.0366886854171753, "learning_rate": 9.581188235148466e-05, "loss": 0.4733, "step": 10480 }, { "epoch": 29.13888888888889, "grad_norm": 1.0406254529953003, "learning_rate": 9.58008347566043e-05, "loss": 0.4624, "step": 10490 }, { "epoch": 29.166666666666668, "grad_norm": 1.130492925643921, "learning_rate": 9.578977324863965e-05, "loss": 0.469, "step": 10500 }, { "epoch": 29.194444444444443, "grad_norm": 0.9715937972068787, "learning_rate": 9.577869783095089e-05, "loss": 0.4765, "step": 10510 }, { "epoch": 29.22222222222222, "grad_norm": 1.1120985746383667, "learning_rate": 9.576760850690245e-05, "loss": 0.4834, "step": 10520 }, { "epoch": 29.25, "grad_norm": 1.3029093742370605, "learning_rate": 9.575650527986298e-05, "loss": 0.4846, "step": 10530 }, { "epoch": 29.27777777777778, "grad_norm": 1.0639647245407104, "learning_rate": 9.574538815320531e-05, "loss": 0.4734, "step": 10540 }, { "epoch": 29.305555555555557, "grad_norm": 1.0476982593536377, "learning_rate": 9.573425713030656e-05, "loss": 0.47, "step": 10550 }, { "epoch": 29.333333333333332, "grad_norm": 1.1224277019500732, "learning_rate": 9.572311221454806e-05, "loss": 0.4711, "step": 10560 }, { "epoch": 29.36111111111111, "grad_norm": 1.0360702276229858, "learning_rate": 9.57119534093153e-05, "loss": 0.4723, "step": 10570 }, { "epoch": 29.38888888888889, "grad_norm": 1.0914586782455444, "learning_rate": 9.570078071799806e-05, "loss": 0.4672, "step": 10580 }, { "epoch": 29.416666666666668, "grad_norm": 1.1180009841918945, "learning_rate": 9.568959414399028e-05, "loss": 0.4766, "step": 10590 }, { "epoch": 29.444444444444443, "grad_norm": 0.9802696704864502, "learning_rate": 9.567839369069018e-05, "loss": 0.475, "step": 10600 }, { "epoch": 29.47222222222222, "grad_norm": 1.0759350061416626, "learning_rate": 9.566717936150013e-05, "loss": 0.4776, "step": 10610 }, { "epoch": 29.5, "grad_norm": 0.9607139229774475, "learning_rate": 9.565595115982678e-05, "loss": 0.4699, "step": 10620 }, { "epoch": 29.52777777777778, "grad_norm": 1.0706286430358887, "learning_rate": 9.564470908908094e-05, "loss": 0.4802, "step": 10630 }, { "epoch": 29.555555555555557, "grad_norm": 1.1207245588302612, "learning_rate": 9.563345315267764e-05, "loss": 0.4625, "step": 10640 }, { "epoch": 29.583333333333332, "grad_norm": 1.012415885925293, "learning_rate": 9.562218335403616e-05, "loss": 0.4553, "step": 10650 }, { "epoch": 29.61111111111111, "grad_norm": 1.012382984161377, "learning_rate": 9.561089969657999e-05, "loss": 0.4666, "step": 10660 }, { "epoch": 29.63888888888889, "grad_norm": 1.1207733154296875, "learning_rate": 9.559960218373673e-05, "loss": 0.4614, "step": 10670 }, { "epoch": 29.666666666666668, "grad_norm": 1.1283265352249146, "learning_rate": 9.558829081893836e-05, "loss": 0.4635, "step": 10680 }, { "epoch": 29.694444444444443, "grad_norm": 1.05988609790802, "learning_rate": 9.55769656056209e-05, "loss": 0.4684, "step": 10690 }, { "epoch": 29.72222222222222, "grad_norm": 1.0229146480560303, "learning_rate": 9.556562654722469e-05, "loss": 0.4751, "step": 10700 }, { "epoch": 29.75, "grad_norm": 1.1140981912612915, "learning_rate": 9.555427364719422e-05, "loss": 0.4753, "step": 10710 }, { "epoch": 29.77777777777778, "grad_norm": 1.0121873617172241, "learning_rate": 9.55429069089782e-05, "loss": 0.4629, "step": 10720 }, { "epoch": 29.805555555555557, "grad_norm": 1.1142841577529907, "learning_rate": 9.553152633602956e-05, "loss": 0.4625, "step": 10730 }, { "epoch": 29.833333333333332, "grad_norm": 1.1790072917938232, "learning_rate": 9.552013193180543e-05, "loss": 0.4752, "step": 10740 }, { "epoch": 29.86111111111111, "grad_norm": 1.011474847793579, "learning_rate": 9.550872369976707e-05, "loss": 0.4756, "step": 10750 }, { "epoch": 29.88888888888889, "grad_norm": 1.083573818206787, "learning_rate": 9.549730164338007e-05, "loss": 0.4752, "step": 10760 }, { "epoch": 29.916666666666668, "grad_norm": 1.1197314262390137, "learning_rate": 9.548586576611408e-05, "loss": 0.4764, "step": 10770 }, { "epoch": 29.944444444444443, "grad_norm": 1.0212528705596924, "learning_rate": 9.54744160714431e-05, "loss": 0.4772, "step": 10780 }, { "epoch": 29.97222222222222, "grad_norm": 1.0464553833007812, "learning_rate": 9.546295256284516e-05, "loss": 0.4675, "step": 10790 }, { "epoch": 30.0, "grad_norm": 1.1594187021255493, "learning_rate": 9.545147524380265e-05, "loss": 0.4735, "step": 10800 }, { "epoch": 30.02777777777778, "grad_norm": 1.1205204725265503, "learning_rate": 9.543998411780201e-05, "loss": 0.4598, "step": 10810 }, { "epoch": 30.055555555555557, "grad_norm": 1.12398362159729, "learning_rate": 9.542847918833397e-05, "loss": 0.479, "step": 10820 }, { "epoch": 30.083333333333332, "grad_norm": 1.285439372062683, "learning_rate": 9.541696045889343e-05, "loss": 0.495, "step": 10830 }, { "epoch": 30.11111111111111, "grad_norm": 0.9967326521873474, "learning_rate": 9.540542793297947e-05, "loss": 0.4759, "step": 10840 }, { "epoch": 30.13888888888889, "grad_norm": 1.0587372779846191, "learning_rate": 9.539388161409537e-05, "loss": 0.4676, "step": 10850 }, { "epoch": 30.166666666666668, "grad_norm": 1.136666178703308, "learning_rate": 9.538232150574857e-05, "loss": 0.461, "step": 10860 }, { "epoch": 30.194444444444443, "grad_norm": 0.9398528337478638, "learning_rate": 9.537074761145076e-05, "loss": 0.4575, "step": 10870 }, { "epoch": 30.22222222222222, "grad_norm": 1.061644434928894, "learning_rate": 9.535915993471778e-05, "loss": 0.4745, "step": 10880 }, { "epoch": 30.25, "grad_norm": 1.1952366828918457, "learning_rate": 9.534755847906964e-05, "loss": 0.4606, "step": 10890 }, { "epoch": 30.27777777777778, "grad_norm": 1.127639889717102, "learning_rate": 9.533594324803057e-05, "loss": 0.4629, "step": 10900 }, { "epoch": 30.305555555555557, "grad_norm": 1.0295088291168213, "learning_rate": 9.532431424512895e-05, "loss": 0.4699, "step": 10910 }, { "epoch": 30.333333333333332, "grad_norm": 1.2557015419006348, "learning_rate": 9.531267147389741e-05, "loss": 0.4572, "step": 10920 }, { "epoch": 30.36111111111111, "grad_norm": 1.12253999710083, "learning_rate": 9.530101493787266e-05, "loss": 0.4729, "step": 10930 }, { "epoch": 30.38888888888889, "grad_norm": 1.1339937448501587, "learning_rate": 9.528934464059571e-05, "loss": 0.4566, "step": 10940 }, { "epoch": 30.416666666666668, "grad_norm": 1.079160213470459, "learning_rate": 9.527766058561163e-05, "loss": 0.4658, "step": 10950 }, { "epoch": 30.444444444444443, "grad_norm": 1.1184194087982178, "learning_rate": 9.526596277646976e-05, "loss": 0.4667, "step": 10960 }, { "epoch": 30.47222222222222, "grad_norm": 1.1378034353256226, "learning_rate": 9.525425121672358e-05, "loss": 0.4673, "step": 10970 }, { "epoch": 30.5, "grad_norm": 1.2054013013839722, "learning_rate": 9.524252590993074e-05, "loss": 0.4773, "step": 10980 }, { "epoch": 30.52777777777778, "grad_norm": 1.079720377922058, "learning_rate": 9.523078685965309e-05, "loss": 0.4799, "step": 10990 }, { "epoch": 30.555555555555557, "grad_norm": 0.9786263108253479, "learning_rate": 9.521903406945664e-05, "loss": 0.4883, "step": 11000 }, { "epoch": 30.583333333333332, "grad_norm": 1.1971220970153809, "learning_rate": 9.520726754291158e-05, "loss": 0.4594, "step": 11010 }, { "epoch": 30.61111111111111, "grad_norm": 1.214547872543335, "learning_rate": 9.519548728359227e-05, "loss": 0.4696, "step": 11020 }, { "epoch": 30.63888888888889, "grad_norm": 1.194610357284546, "learning_rate": 9.518369329507726e-05, "loss": 0.4925, "step": 11030 }, { "epoch": 30.666666666666668, "grad_norm": 1.0112541913986206, "learning_rate": 9.51718855809492e-05, "loss": 0.4571, "step": 11040 }, { "epoch": 30.694444444444443, "grad_norm": 1.105913519859314, "learning_rate": 9.516006414479502e-05, "loss": 0.4642, "step": 11050 }, { "epoch": 30.72222222222222, "grad_norm": 1.0552407503128052, "learning_rate": 9.514822899020572e-05, "loss": 0.4538, "step": 11060 }, { "epoch": 30.75, "grad_norm": 1.1747634410858154, "learning_rate": 9.513638012077654e-05, "loss": 0.4725, "step": 11070 }, { "epoch": 30.77777777777778, "grad_norm": 1.159045934677124, "learning_rate": 9.512451754010683e-05, "loss": 0.4627, "step": 11080 }, { "epoch": 30.805555555555557, "grad_norm": 1.1371920108795166, "learning_rate": 9.511264125180013e-05, "loss": 0.4648, "step": 11090 }, { "epoch": 30.833333333333332, "grad_norm": 1.1665030717849731, "learning_rate": 9.510075125946414e-05, "loss": 0.4777, "step": 11100 }, { "epoch": 30.86111111111111, "grad_norm": 1.0544878244400024, "learning_rate": 9.508884756671075e-05, "loss": 0.4872, "step": 11110 }, { "epoch": 30.88888888888889, "grad_norm": 1.1819250583648682, "learning_rate": 9.507693017715596e-05, "loss": 0.4723, "step": 11120 }, { "epoch": 30.916666666666668, "grad_norm": 1.0769087076187134, "learning_rate": 9.506499909441997e-05, "loss": 0.4644, "step": 11130 }, { "epoch": 30.944444444444443, "grad_norm": 0.9925504326820374, "learning_rate": 9.505305432212713e-05, "loss": 0.4806, "step": 11140 }, { "epoch": 30.97222222222222, "grad_norm": 1.147393822669983, "learning_rate": 9.504109586390595e-05, "loss": 0.4737, "step": 11150 }, { "epoch": 31.0, "grad_norm": 1.1155574321746826, "learning_rate": 9.502912372338908e-05, "loss": 0.4821, "step": 11160 }, { "epoch": 31.02777777777778, "grad_norm": 1.3038854598999023, "learning_rate": 9.501713790421335e-05, "loss": 0.465, "step": 11170 }, { "epoch": 31.055555555555557, "grad_norm": 1.2898101806640625, "learning_rate": 9.500513841001974e-05, "loss": 0.4646, "step": 11180 }, { "epoch": 31.083333333333332, "grad_norm": 1.073869228363037, "learning_rate": 9.499312524445336e-05, "loss": 0.4791, "step": 11190 }, { "epoch": 31.11111111111111, "grad_norm": 1.051421046257019, "learning_rate": 9.498109841116351e-05, "loss": 0.4662, "step": 11200 }, { "epoch": 31.13888888888889, "grad_norm": 1.1452996730804443, "learning_rate": 9.496905791380363e-05, "loss": 0.4789, "step": 11210 }, { "epoch": 31.166666666666668, "grad_norm": 1.0820488929748535, "learning_rate": 9.495700375603129e-05, "loss": 0.4631, "step": 11220 }, { "epoch": 31.194444444444443, "grad_norm": 1.1504907608032227, "learning_rate": 9.494493594150822e-05, "loss": 0.4706, "step": 11230 }, { "epoch": 31.22222222222222, "grad_norm": 1.0963772535324097, "learning_rate": 9.493285447390032e-05, "loss": 0.4605, "step": 11240 }, { "epoch": 31.25, "grad_norm": 1.0593972206115723, "learning_rate": 9.492075935687761e-05, "loss": 0.4789, "step": 11250 }, { "epoch": 31.27777777777778, "grad_norm": 1.195516586303711, "learning_rate": 9.490865059411427e-05, "loss": 0.4496, "step": 11260 }, { "epoch": 31.305555555555557, "grad_norm": 1.0942906141281128, "learning_rate": 9.489652818928863e-05, "loss": 0.4733, "step": 11270 }, { "epoch": 31.333333333333332, "grad_norm": 1.1319717168807983, "learning_rate": 9.488439214608315e-05, "loss": 0.4597, "step": 11280 }, { "epoch": 31.36111111111111, "grad_norm": 1.1214392185211182, "learning_rate": 9.487224246818444e-05, "loss": 0.4677, "step": 11290 }, { "epoch": 31.38888888888889, "grad_norm": 1.0253746509552002, "learning_rate": 9.486007915928325e-05, "loss": 0.4633, "step": 11300 }, { "epoch": 31.416666666666668, "grad_norm": 1.0580105781555176, "learning_rate": 9.484790222307448e-05, "loss": 0.456, "step": 11310 }, { "epoch": 31.444444444444443, "grad_norm": 1.0612961053848267, "learning_rate": 9.483571166325716e-05, "loss": 0.4606, "step": 11320 }, { "epoch": 31.47222222222222, "grad_norm": 0.9833794236183167, "learning_rate": 9.482350748353444e-05, "loss": 0.4716, "step": 11330 }, { "epoch": 31.5, "grad_norm": 1.1350574493408203, "learning_rate": 9.481128968761363e-05, "loss": 0.4816, "step": 11340 }, { "epoch": 31.52777777777778, "grad_norm": 0.9753798246383667, "learning_rate": 9.479905827920621e-05, "loss": 0.4725, "step": 11350 }, { "epoch": 31.555555555555557, "grad_norm": 1.1571884155273438, "learning_rate": 9.478681326202773e-05, "loss": 0.4546, "step": 11360 }, { "epoch": 31.583333333333332, "grad_norm": 1.083578109741211, "learning_rate": 9.477455463979791e-05, "loss": 0.4859, "step": 11370 }, { "epoch": 31.61111111111111, "grad_norm": 0.9999598860740662, "learning_rate": 9.476228241624059e-05, "loss": 0.4577, "step": 11380 }, { "epoch": 31.63888888888889, "grad_norm": 0.968291163444519, "learning_rate": 9.474999659508374e-05, "loss": 0.4544, "step": 11390 }, { "epoch": 31.666666666666668, "grad_norm": 1.1034234762191772, "learning_rate": 9.47376971800595e-05, "loss": 0.4559, "step": 11400 }, { "epoch": 31.694444444444443, "grad_norm": 1.0436636209487915, "learning_rate": 9.472538417490409e-05, "loss": 0.4618, "step": 11410 }, { "epoch": 31.72222222222222, "grad_norm": 1.035047173500061, "learning_rate": 9.471305758335784e-05, "loss": 0.4701, "step": 11420 }, { "epoch": 31.75, "grad_norm": 1.0148295164108276, "learning_rate": 9.47007174091653e-05, "loss": 0.4649, "step": 11430 }, { "epoch": 31.77777777777778, "grad_norm": 1.1562445163726807, "learning_rate": 9.468836365607507e-05, "loss": 0.4634, "step": 11440 }, { "epoch": 31.805555555555557, "grad_norm": 1.1988191604614258, "learning_rate": 9.467599632783988e-05, "loss": 0.4725, "step": 11450 }, { "epoch": 31.833333333333332, "grad_norm": 1.0014550685882568, "learning_rate": 9.466361542821662e-05, "loss": 0.4863, "step": 11460 }, { "epoch": 31.86111111111111, "grad_norm": 1.1457366943359375, "learning_rate": 9.465122096096625e-05, "loss": 0.4724, "step": 11470 }, { "epoch": 31.88888888888889, "grad_norm": 1.0954633951187134, "learning_rate": 9.463881292985391e-05, "loss": 0.4693, "step": 11480 }, { "epoch": 31.916666666666668, "grad_norm": 0.9716088175773621, "learning_rate": 9.462639133864881e-05, "loss": 0.4585, "step": 11490 }, { "epoch": 31.944444444444443, "grad_norm": 1.148139238357544, "learning_rate": 9.461395619112432e-05, "loss": 0.4652, "step": 11500 }, { "epoch": 31.97222222222222, "grad_norm": 1.0623798370361328, "learning_rate": 9.460150749105791e-05, "loss": 0.4595, "step": 11510 }, { "epoch": 32.0, "grad_norm": 1.068311333656311, "learning_rate": 9.458904524223116e-05, "loss": 0.4491, "step": 11520 }, { "epoch": 32.02777777777778, "grad_norm": 1.093930959701538, "learning_rate": 9.457656944842976e-05, "loss": 0.4716, "step": 11530 }, { "epoch": 32.05555555555556, "grad_norm": 1.0030279159545898, "learning_rate": 9.456408011344353e-05, "loss": 0.4559, "step": 11540 }, { "epoch": 32.083333333333336, "grad_norm": 1.243802785873413, "learning_rate": 9.455157724106643e-05, "loss": 0.4691, "step": 11550 }, { "epoch": 32.111111111111114, "grad_norm": 1.1864745616912842, "learning_rate": 9.453906083509647e-05, "loss": 0.4621, "step": 11560 }, { "epoch": 32.138888888888886, "grad_norm": 1.026894211769104, "learning_rate": 9.45265308993358e-05, "loss": 0.4601, "step": 11570 }, { "epoch": 32.166666666666664, "grad_norm": 1.0236951112747192, "learning_rate": 9.451398743759071e-05, "loss": 0.4444, "step": 11580 }, { "epoch": 32.19444444444444, "grad_norm": 1.2240664958953857, "learning_rate": 9.450143045367156e-05, "loss": 0.4745, "step": 11590 }, { "epoch": 32.22222222222222, "grad_norm": 0.9833422899246216, "learning_rate": 9.448885995139283e-05, "loss": 0.4583, "step": 11600 }, { "epoch": 32.25, "grad_norm": 1.0903602838516235, "learning_rate": 9.44762759345731e-05, "loss": 0.4564, "step": 11610 }, { "epoch": 32.27777777777778, "grad_norm": 1.0992469787597656, "learning_rate": 9.446367840703509e-05, "loss": 0.4614, "step": 11620 }, { "epoch": 32.30555555555556, "grad_norm": 1.0131362676620483, "learning_rate": 9.445106737260556e-05, "loss": 0.4543, "step": 11630 }, { "epoch": 32.333333333333336, "grad_norm": 1.0443813800811768, "learning_rate": 9.443844283511543e-05, "loss": 0.4532, "step": 11640 }, { "epoch": 32.361111111111114, "grad_norm": 1.061132788658142, "learning_rate": 9.442580479839968e-05, "loss": 0.4505, "step": 11650 }, { "epoch": 32.388888888888886, "grad_norm": 1.1600122451782227, "learning_rate": 9.441315326629745e-05, "loss": 0.4494, "step": 11660 }, { "epoch": 32.416666666666664, "grad_norm": 1.1435779333114624, "learning_rate": 9.44004882426519e-05, "loss": 0.4543, "step": 11670 }, { "epoch": 32.44444444444444, "grad_norm": 1.052506685256958, "learning_rate": 9.438780973131037e-05, "loss": 0.4544, "step": 11680 }, { "epoch": 32.47222222222222, "grad_norm": 1.07728910446167, "learning_rate": 9.437511773612423e-05, "loss": 0.4677, "step": 11690 }, { "epoch": 32.5, "grad_norm": 0.9934169054031372, "learning_rate": 9.436241226094896e-05, "loss": 0.4613, "step": 11700 }, { "epoch": 32.52777777777778, "grad_norm": 0.9426023960113525, "learning_rate": 9.434969330964418e-05, "loss": 0.4843, "step": 11710 }, { "epoch": 32.55555555555556, "grad_norm": 1.0600041151046753, "learning_rate": 9.433696088607356e-05, "loss": 0.4522, "step": 11720 }, { "epoch": 32.583333333333336, "grad_norm": 1.025963544845581, "learning_rate": 9.432421499410486e-05, "loss": 0.4576, "step": 11730 }, { "epoch": 32.611111111111114, "grad_norm": 1.0038188695907593, "learning_rate": 9.431145563760998e-05, "loss": 0.4531, "step": 11740 }, { "epoch": 32.638888888888886, "grad_norm": 1.0725425481796265, "learning_rate": 9.429868282046484e-05, "loss": 0.4612, "step": 11750 }, { "epoch": 32.666666666666664, "grad_norm": 0.9535105228424072, "learning_rate": 9.428589654654951e-05, "loss": 0.4536, "step": 11760 }, { "epoch": 32.69444444444444, "grad_norm": 1.1349048614501953, "learning_rate": 9.42730968197481e-05, "loss": 0.4482, "step": 11770 }, { "epoch": 32.72222222222222, "grad_norm": 1.0787104368209839, "learning_rate": 9.426028364394883e-05, "loss": 0.4478, "step": 11780 }, { "epoch": 32.75, "grad_norm": 1.042946457862854, "learning_rate": 9.424745702304402e-05, "loss": 0.4568, "step": 11790 }, { "epoch": 32.77777777777778, "grad_norm": 1.0017383098602295, "learning_rate": 9.423461696093006e-05, "loss": 0.4617, "step": 11800 }, { "epoch": 32.80555555555556, "grad_norm": 1.2160089015960693, "learning_rate": 9.422176346150741e-05, "loss": 0.4605, "step": 11810 }, { "epoch": 32.833333333333336, "grad_norm": 1.0851976871490479, "learning_rate": 9.420889652868063e-05, "loss": 0.4687, "step": 11820 }, { "epoch": 32.861111111111114, "grad_norm": 1.1702202558517456, "learning_rate": 9.419601616635836e-05, "loss": 0.4666, "step": 11830 }, { "epoch": 32.888888888888886, "grad_norm": 1.1032030582427979, "learning_rate": 9.418312237845331e-05, "loss": 0.4543, "step": 11840 }, { "epoch": 32.916666666666664, "grad_norm": 1.0236226320266724, "learning_rate": 9.417021516888225e-05, "loss": 0.4574, "step": 11850 }, { "epoch": 32.94444444444444, "grad_norm": 1.081404209136963, "learning_rate": 9.415729454156608e-05, "loss": 0.474, "step": 11860 }, { "epoch": 32.97222222222222, "grad_norm": 1.2017813920974731, "learning_rate": 9.414436050042973e-05, "loss": 0.472, "step": 11870 }, { "epoch": 33.0, "grad_norm": 1.1391186714172363, "learning_rate": 9.413141304940223e-05, "loss": 0.4451, "step": 11880 }, { "epoch": 33.02777777777778, "grad_norm": 1.1964545249938965, "learning_rate": 9.411845219241666e-05, "loss": 0.4557, "step": 11890 }, { "epoch": 33.05555555555556, "grad_norm": 1.0916777849197388, "learning_rate": 9.410547793341021e-05, "loss": 0.4634, "step": 11900 }, { "epoch": 33.083333333333336, "grad_norm": 0.9798954725265503, "learning_rate": 9.409249027632408e-05, "loss": 0.4575, "step": 11910 }, { "epoch": 33.111111111111114, "grad_norm": 0.9767063856124878, "learning_rate": 9.407948922510362e-05, "loss": 0.4477, "step": 11920 }, { "epoch": 33.138888888888886, "grad_norm": 1.0749108791351318, "learning_rate": 9.406647478369817e-05, "loss": 0.4466, "step": 11930 }, { "epoch": 33.166666666666664, "grad_norm": 0.9664678573608398, "learning_rate": 9.405344695606118e-05, "loss": 0.4656, "step": 11940 }, { "epoch": 33.19444444444444, "grad_norm": 0.9997530579566956, "learning_rate": 9.404040574615018e-05, "loss": 0.4566, "step": 11950 }, { "epoch": 33.22222222222222, "grad_norm": 1.0613455772399902, "learning_rate": 9.402735115792674e-05, "loss": 0.4505, "step": 11960 }, { "epoch": 33.25, "grad_norm": 1.1405795812606812, "learning_rate": 9.401428319535649e-05, "loss": 0.4533, "step": 11970 }, { "epoch": 33.27777777777778, "grad_norm": 1.1087145805358887, "learning_rate": 9.400120186240912e-05, "loss": 0.4525, "step": 11980 }, { "epoch": 33.30555555555556, "grad_norm": 0.9601554274559021, "learning_rate": 9.398810716305844e-05, "loss": 0.4529, "step": 11990 }, { "epoch": 33.333333333333336, "grad_norm": 0.9673436880111694, "learning_rate": 9.397499910128222e-05, "loss": 0.4533, "step": 12000 }, { "epoch": 33.361111111111114, "grad_norm": 1.1362271308898926, "learning_rate": 9.396187768106237e-05, "loss": 0.4562, "step": 12010 }, { "epoch": 33.388888888888886, "grad_norm": 1.0838239192962646, "learning_rate": 9.394874290638482e-05, "loss": 0.461, "step": 12020 }, { "epoch": 33.416666666666664, "grad_norm": 1.1425223350524902, "learning_rate": 9.393559478123959e-05, "loss": 0.4619, "step": 12030 }, { "epoch": 33.44444444444444, "grad_norm": 1.0179312229156494, "learning_rate": 9.39224333096207e-05, "loss": 0.4597, "step": 12040 }, { "epoch": 33.47222222222222, "grad_norm": 1.253058910369873, "learning_rate": 9.390925849552629e-05, "loss": 0.4769, "step": 12050 }, { "epoch": 33.5, "grad_norm": 1.1861222982406616, "learning_rate": 9.389607034295849e-05, "loss": 0.4708, "step": 12060 }, { "epoch": 33.52777777777778, "grad_norm": 1.0732221603393555, "learning_rate": 9.388286885592355e-05, "loss": 0.4548, "step": 12070 }, { "epoch": 33.55555555555556, "grad_norm": 1.229371190071106, "learning_rate": 9.386965403843168e-05, "loss": 0.4547, "step": 12080 }, { "epoch": 33.583333333333336, "grad_norm": 1.0561856031417847, "learning_rate": 9.385642589449726e-05, "loss": 0.4445, "step": 12090 }, { "epoch": 33.611111111111114, "grad_norm": 0.9751131534576416, "learning_rate": 9.38431844281386e-05, "loss": 0.4617, "step": 12100 }, { "epoch": 33.638888888888886, "grad_norm": 1.013195514678955, "learning_rate": 9.38299296433781e-05, "loss": 0.4524, "step": 12110 }, { "epoch": 33.666666666666664, "grad_norm": 1.0424195528030396, "learning_rate": 9.381666154424226e-05, "loss": 0.4564, "step": 12120 }, { "epoch": 33.69444444444444, "grad_norm": 1.05198073387146, "learning_rate": 9.380338013476157e-05, "loss": 0.4446, "step": 12130 }, { "epoch": 33.72222222222222, "grad_norm": 1.0077288150787354, "learning_rate": 9.379008541897054e-05, "loss": 0.4575, "step": 12140 }, { "epoch": 33.75, "grad_norm": 0.9994299411773682, "learning_rate": 9.377677740090777e-05, "loss": 0.456, "step": 12150 }, { "epoch": 33.77777777777778, "grad_norm": 1.0111653804779053, "learning_rate": 9.376345608461588e-05, "loss": 0.4653, "step": 12160 }, { "epoch": 33.80555555555556, "grad_norm": 1.099918007850647, "learning_rate": 9.375012147414155e-05, "loss": 0.4499, "step": 12170 }, { "epoch": 33.833333333333336, "grad_norm": 0.9776371121406555, "learning_rate": 9.373677357353545e-05, "loss": 0.4541, "step": 12180 }, { "epoch": 33.861111111111114, "grad_norm": 1.0665359497070312, "learning_rate": 9.372341238685237e-05, "loss": 0.4771, "step": 12190 }, { "epoch": 33.888888888888886, "grad_norm": 1.085218071937561, "learning_rate": 9.371003791815102e-05, "loss": 0.4431, "step": 12200 }, { "epoch": 33.916666666666664, "grad_norm": 1.0322829484939575, "learning_rate": 9.369665017149429e-05, "loss": 0.474, "step": 12210 }, { "epoch": 33.94444444444444, "grad_norm": 0.9948261976242065, "learning_rate": 9.368324915094895e-05, "loss": 0.455, "step": 12220 }, { "epoch": 33.97222222222222, "grad_norm": 1.0803265571594238, "learning_rate": 9.366983486058591e-05, "loss": 0.4651, "step": 12230 }, { "epoch": 34.0, "grad_norm": 1.0120294094085693, "learning_rate": 9.365640730448009e-05, "loss": 0.4465, "step": 12240 }, { "epoch": 34.02777777777778, "grad_norm": 1.112246036529541, "learning_rate": 9.36429664867104e-05, "loss": 0.4457, "step": 12250 }, { "epoch": 34.05555555555556, "grad_norm": 1.086799144744873, "learning_rate": 9.362951241135982e-05, "loss": 0.467, "step": 12260 }, { "epoch": 34.083333333333336, "grad_norm": 1.248932123184204, "learning_rate": 9.361604508251534e-05, "loss": 0.4634, "step": 12270 }, { "epoch": 34.111111111111114, "grad_norm": 1.005892038345337, "learning_rate": 9.360256450426799e-05, "loss": 0.4532, "step": 12280 }, { "epoch": 34.138888888888886, "grad_norm": 1.0680310726165771, "learning_rate": 9.358907068071279e-05, "loss": 0.4524, "step": 12290 }, { "epoch": 34.166666666666664, "grad_norm": 1.0496641397476196, "learning_rate": 9.357556361594882e-05, "loss": 0.4367, "step": 12300 }, { "epoch": 34.19444444444444, "grad_norm": 0.9217042326927185, "learning_rate": 9.356204331407917e-05, "loss": 0.4504, "step": 12310 }, { "epoch": 34.22222222222222, "grad_norm": 0.9972006678581238, "learning_rate": 9.354850977921094e-05, "loss": 0.452, "step": 12320 }, { "epoch": 34.25, "grad_norm": 1.1270267963409424, "learning_rate": 9.353496301545529e-05, "loss": 0.4496, "step": 12330 }, { "epoch": 34.27777777777778, "grad_norm": 1.0215107202529907, "learning_rate": 9.352140302692733e-05, "loss": 0.4494, "step": 12340 }, { "epoch": 34.30555555555556, "grad_norm": 0.9443902373313904, "learning_rate": 9.350782981774627e-05, "loss": 0.4532, "step": 12350 }, { "epoch": 34.333333333333336, "grad_norm": 1.0189152956008911, "learning_rate": 9.349424339203526e-05, "loss": 0.4397, "step": 12360 }, { "epoch": 34.361111111111114, "grad_norm": 1.0400813817977905, "learning_rate": 9.34806437539215e-05, "loss": 0.4401, "step": 12370 }, { "epoch": 34.388888888888886, "grad_norm": 0.9087105393409729, "learning_rate": 9.346703090753622e-05, "loss": 0.4635, "step": 12380 }, { "epoch": 34.416666666666664, "grad_norm": 0.9699682593345642, "learning_rate": 9.345340485701461e-05, "loss": 0.4365, "step": 12390 }, { "epoch": 34.44444444444444, "grad_norm": 1.0289716720581055, "learning_rate": 9.343976560649595e-05, "loss": 0.4713, "step": 12400 }, { "epoch": 34.47222222222222, "grad_norm": 1.2124781608581543, "learning_rate": 9.342611316012344e-05, "loss": 0.4708, "step": 12410 }, { "epoch": 34.5, "grad_norm": 1.0700753927230835, "learning_rate": 9.341244752204437e-05, "loss": 0.4631, "step": 12420 }, { "epoch": 34.52777777777778, "grad_norm": 0.9348348379135132, "learning_rate": 9.339876869640995e-05, "loss": 0.4614, "step": 12430 }, { "epoch": 34.55555555555556, "grad_norm": 1.0394774675369263, "learning_rate": 9.33850766873755e-05, "loss": 0.4484, "step": 12440 }, { "epoch": 34.583333333333336, "grad_norm": 0.9375025629997253, "learning_rate": 9.337137149910028e-05, "loss": 0.4326, "step": 12450 }, { "epoch": 34.611111111111114, "grad_norm": 1.024649977684021, "learning_rate": 9.335765313574753e-05, "loss": 0.4606, "step": 12460 }, { "epoch": 34.638888888888886, "grad_norm": 1.0202101469039917, "learning_rate": 9.334392160148457e-05, "loss": 0.4565, "step": 12470 }, { "epoch": 34.666666666666664, "grad_norm": 1.118481993675232, "learning_rate": 9.333017690048264e-05, "loss": 0.4546, "step": 12480 }, { "epoch": 34.69444444444444, "grad_norm": 1.0353014469146729, "learning_rate": 9.331641903691706e-05, "loss": 0.4526, "step": 12490 }, { "epoch": 34.72222222222222, "grad_norm": 1.073121190071106, "learning_rate": 9.330264801496707e-05, "loss": 0.4547, "step": 12500 }, { "epoch": 34.75, "grad_norm": 1.0874545574188232, "learning_rate": 9.328886383881594e-05, "loss": 0.4411, "step": 12510 }, { "epoch": 34.77777777777778, "grad_norm": 1.0936354398727417, "learning_rate": 9.327506651265095e-05, "loss": 0.4447, "step": 12520 }, { "epoch": 34.80555555555556, "grad_norm": 0.9314836859703064, "learning_rate": 9.326125604066338e-05, "loss": 0.4477, "step": 12530 }, { "epoch": 34.833333333333336, "grad_norm": 1.0441383123397827, "learning_rate": 9.324743242704847e-05, "loss": 0.4561, "step": 12540 }, { "epoch": 34.861111111111114, "grad_norm": 1.0141081809997559, "learning_rate": 9.323359567600546e-05, "loss": 0.4543, "step": 12550 }, { "epoch": 34.888888888888886, "grad_norm": 0.9611909985542297, "learning_rate": 9.321974579173761e-05, "loss": 0.4397, "step": 12560 }, { "epoch": 34.916666666666664, "grad_norm": 1.032280683517456, "learning_rate": 9.320588277845213e-05, "loss": 0.4553, "step": 12570 }, { "epoch": 34.94444444444444, "grad_norm": 1.0264511108398438, "learning_rate": 9.319200664036026e-05, "loss": 0.4558, "step": 12580 }, { "epoch": 34.97222222222222, "grad_norm": 1.021620512008667, "learning_rate": 9.31781173816772e-05, "loss": 0.4386, "step": 12590 }, { "epoch": 35.0, "grad_norm": 1.1263211965560913, "learning_rate": 9.316421500662212e-05, "loss": 0.4598, "step": 12600 }, { "epoch": 35.02777777777778, "grad_norm": 1.0368796586990356, "learning_rate": 9.31502995194182e-05, "loss": 0.4374, "step": 12610 }, { "epoch": 35.05555555555556, "grad_norm": 1.0738285779953003, "learning_rate": 9.31363709242926e-05, "loss": 0.4418, "step": 12620 }, { "epoch": 35.083333333333336, "grad_norm": 0.9605795741081238, "learning_rate": 9.312242922547647e-05, "loss": 0.4502, "step": 12630 }, { "epoch": 35.111111111111114, "grad_norm": 1.043331503868103, "learning_rate": 9.310847442720492e-05, "loss": 0.4456, "step": 12640 }, { "epoch": 35.138888888888886, "grad_norm": 1.0305379629135132, "learning_rate": 9.309450653371706e-05, "loss": 0.4409, "step": 12650 }, { "epoch": 35.166666666666664, "grad_norm": 1.04071044921875, "learning_rate": 9.308052554925595e-05, "loss": 0.4505, "step": 12660 }, { "epoch": 35.19444444444444, "grad_norm": 1.0461891889572144, "learning_rate": 9.306653147806867e-05, "loss": 0.4529, "step": 12670 }, { "epoch": 35.22222222222222, "grad_norm": 1.1076604127883911, "learning_rate": 9.305252432440622e-05, "loss": 0.4368, "step": 12680 }, { "epoch": 35.25, "grad_norm": 1.1267516613006592, "learning_rate": 9.303850409252361e-05, "loss": 0.4561, "step": 12690 }, { "epoch": 35.27777777777778, "grad_norm": 1.1552820205688477, "learning_rate": 9.302447078667985e-05, "loss": 0.4534, "step": 12700 }, { "epoch": 35.30555555555556, "grad_norm": 1.0563323497772217, "learning_rate": 9.301042441113783e-05, "loss": 0.4309, "step": 12710 }, { "epoch": 35.333333333333336, "grad_norm": 1.022520899772644, "learning_rate": 9.299636497016451e-05, "loss": 0.4492, "step": 12720 }, { "epoch": 35.361111111111114, "grad_norm": 1.0538161993026733, "learning_rate": 9.298229246803076e-05, "loss": 0.4477, "step": 12730 }, { "epoch": 35.388888888888886, "grad_norm": 1.0719826221466064, "learning_rate": 9.296820690901144e-05, "loss": 0.4465, "step": 12740 }, { "epoch": 35.416666666666664, "grad_norm": 0.990983784198761, "learning_rate": 9.295410829738539e-05, "loss": 0.4441, "step": 12750 }, { "epoch": 35.44444444444444, "grad_norm": 0.9841418862342834, "learning_rate": 9.293999663743535e-05, "loss": 0.4476, "step": 12760 }, { "epoch": 35.47222222222222, "grad_norm": 1.0757654905319214, "learning_rate": 9.292587193344813e-05, "loss": 0.445, "step": 12770 }, { "epoch": 35.5, "grad_norm": 1.0628470182418823, "learning_rate": 9.291173418971437e-05, "loss": 0.4588, "step": 12780 }, { "epoch": 35.52777777777778, "grad_norm": 1.1148340702056885, "learning_rate": 9.28975834105288e-05, "loss": 0.4555, "step": 12790 }, { "epoch": 35.55555555555556, "grad_norm": 1.125744104385376, "learning_rate": 9.288341960019004e-05, "loss": 0.4532, "step": 12800 }, { "epoch": 35.583333333333336, "grad_norm": 1.2228899002075195, "learning_rate": 9.286924276300067e-05, "loss": 0.4662, "step": 12810 }, { "epoch": 35.611111111111114, "grad_norm": 1.067991018295288, "learning_rate": 9.285505290326726e-05, "loss": 0.4598, "step": 12820 }, { "epoch": 35.638888888888886, "grad_norm": 1.0429739952087402, "learning_rate": 9.284085002530027e-05, "loss": 0.4585, "step": 12830 }, { "epoch": 35.666666666666664, "grad_norm": 1.008601427078247, "learning_rate": 9.282663413341422e-05, "loss": 0.4454, "step": 12840 }, { "epoch": 35.69444444444444, "grad_norm": 1.0699278116226196, "learning_rate": 9.281240523192747e-05, "loss": 0.4421, "step": 12850 }, { "epoch": 35.72222222222222, "grad_norm": 1.0408090353012085, "learning_rate": 9.279816332516242e-05, "loss": 0.4449, "step": 12860 }, { "epoch": 35.75, "grad_norm": 0.991386353969574, "learning_rate": 9.278390841744536e-05, "loss": 0.4574, "step": 12870 }, { "epoch": 35.77777777777778, "grad_norm": 1.018779993057251, "learning_rate": 9.276964051310658e-05, "loss": 0.4479, "step": 12880 }, { "epoch": 35.80555555555556, "grad_norm": 0.9781907796859741, "learning_rate": 9.275535961648027e-05, "loss": 0.4461, "step": 12890 }, { "epoch": 35.833333333333336, "grad_norm": 1.0041651725769043, "learning_rate": 9.274106573190459e-05, "loss": 0.435, "step": 12900 }, { "epoch": 35.861111111111114, "grad_norm": 1.0129191875457764, "learning_rate": 9.272675886372168e-05, "loss": 0.4485, "step": 12910 }, { "epoch": 35.888888888888886, "grad_norm": 1.0790419578552246, "learning_rate": 9.271243901627754e-05, "loss": 0.4551, "step": 12920 }, { "epoch": 35.916666666666664, "grad_norm": 1.1764365434646606, "learning_rate": 9.269810619392219e-05, "loss": 0.4456, "step": 12930 }, { "epoch": 35.94444444444444, "grad_norm": 1.0603920221328735, "learning_rate": 9.268376040100955e-05, "loss": 0.425, "step": 12940 }, { "epoch": 35.97222222222222, "grad_norm": 0.971967339515686, "learning_rate": 9.266940164189752e-05, "loss": 0.4488, "step": 12950 }, { "epoch": 36.0, "grad_norm": 1.0002821683883667, "learning_rate": 9.265502992094787e-05, "loss": 0.435, "step": 12960 }, { "epoch": 36.02777777777778, "grad_norm": 1.032763123512268, "learning_rate": 9.264064524252638e-05, "loss": 0.4528, "step": 12970 }, { "epoch": 36.05555555555556, "grad_norm": 1.0128145217895508, "learning_rate": 9.262624761100271e-05, "loss": 0.4441, "step": 12980 }, { "epoch": 36.083333333333336, "grad_norm": 1.0838338136672974, "learning_rate": 9.261183703075051e-05, "loss": 0.4452, "step": 12990 }, { "epoch": 36.111111111111114, "grad_norm": 1.1626358032226562, "learning_rate": 9.259741350614733e-05, "loss": 0.4444, "step": 13000 }, { "epoch": 36.138888888888886, "grad_norm": 0.9943220615386963, "learning_rate": 9.258297704157464e-05, "loss": 0.4492, "step": 13010 }, { "epoch": 36.166666666666664, "grad_norm": 1.0221469402313232, "learning_rate": 9.256852764141786e-05, "loss": 0.446, "step": 13020 }, { "epoch": 36.19444444444444, "grad_norm": 1.0502995252609253, "learning_rate": 9.255406531006634e-05, "loss": 0.4567, "step": 13030 }, { "epoch": 36.22222222222222, "grad_norm": 0.9853429198265076, "learning_rate": 9.253959005191335e-05, "loss": 0.4384, "step": 13040 }, { "epoch": 36.25, "grad_norm": 1.057911992073059, "learning_rate": 9.25251018713561e-05, "loss": 0.4517, "step": 13050 }, { "epoch": 36.27777777777778, "grad_norm": 0.9471535682678223, "learning_rate": 9.251060077279571e-05, "loss": 0.4457, "step": 13060 }, { "epoch": 36.30555555555556, "grad_norm": 0.9369986653327942, "learning_rate": 9.249608676063724e-05, "loss": 0.4212, "step": 13070 }, { "epoch": 36.333333333333336, "grad_norm": 1.0892534255981445, "learning_rate": 9.248155983928964e-05, "loss": 0.4457, "step": 13080 }, { "epoch": 36.361111111111114, "grad_norm": 1.0793671607971191, "learning_rate": 9.246702001316583e-05, "loss": 0.4469, "step": 13090 }, { "epoch": 36.388888888888886, "grad_norm": 1.0901334285736084, "learning_rate": 9.245246728668262e-05, "loss": 0.4612, "step": 13100 }, { "epoch": 36.416666666666664, "grad_norm": 0.9438841938972473, "learning_rate": 9.243790166426073e-05, "loss": 0.4451, "step": 13110 }, { "epoch": 36.44444444444444, "grad_norm": 1.1523778438568115, "learning_rate": 9.242332315032484e-05, "loss": 0.4559, "step": 13120 }, { "epoch": 36.47222222222222, "grad_norm": 1.1003468036651611, "learning_rate": 9.240873174930349e-05, "loss": 0.4455, "step": 13130 }, { "epoch": 36.5, "grad_norm": 1.011376142501831, "learning_rate": 9.239412746562917e-05, "loss": 0.4428, "step": 13140 }, { "epoch": 36.52777777777778, "grad_norm": 1.0534017086029053, "learning_rate": 9.237951030373828e-05, "loss": 0.4504, "step": 13150 }, { "epoch": 36.55555555555556, "grad_norm": 0.935117781162262, "learning_rate": 9.236488026807113e-05, "loss": 0.4329, "step": 13160 }, { "epoch": 36.583333333333336, "grad_norm": 1.0877517461776733, "learning_rate": 9.235023736307193e-05, "loss": 0.4496, "step": 13170 }, { "epoch": 36.611111111111114, "grad_norm": 0.9358137845993042, "learning_rate": 9.233558159318881e-05, "loss": 0.4487, "step": 13180 }, { "epoch": 36.638888888888886, "grad_norm": 1.0549784898757935, "learning_rate": 9.232091296287382e-05, "loss": 0.4362, "step": 13190 }, { "epoch": 36.666666666666664, "grad_norm": 0.9736929535865784, "learning_rate": 9.230623147658288e-05, "loss": 0.4628, "step": 13200 }, { "epoch": 36.69444444444444, "grad_norm": 1.1679389476776123, "learning_rate": 9.229153713877586e-05, "loss": 0.4506, "step": 13210 }, { "epoch": 36.72222222222222, "grad_norm": 0.995822548866272, "learning_rate": 9.227682995391649e-05, "loss": 0.4556, "step": 13220 }, { "epoch": 36.75, "grad_norm": 1.043515682220459, "learning_rate": 9.226210992647243e-05, "loss": 0.4508, "step": 13230 }, { "epoch": 36.77777777777778, "grad_norm": 0.9200074672698975, "learning_rate": 9.224737706091525e-05, "loss": 0.4485, "step": 13240 }, { "epoch": 36.80555555555556, "grad_norm": 0.9468806982040405, "learning_rate": 9.223263136172039e-05, "loss": 0.4495, "step": 13250 }, { "epoch": 36.833333333333336, "grad_norm": 0.9130278825759888, "learning_rate": 9.22178728333672e-05, "loss": 0.4596, "step": 13260 }, { "epoch": 36.861111111111114, "grad_norm": 1.1236158609390259, "learning_rate": 9.220310148033897e-05, "loss": 0.4592, "step": 13270 }, { "epoch": 36.888888888888886, "grad_norm": 0.9974984526634216, "learning_rate": 9.21883173071228e-05, "loss": 0.4539, "step": 13280 }, { "epoch": 36.916666666666664, "grad_norm": 0.9714788198471069, "learning_rate": 9.217352031820976e-05, "loss": 0.4566, "step": 13290 }, { "epoch": 36.94444444444444, "grad_norm": 1.0471323728561401, "learning_rate": 9.215871051809477e-05, "loss": 0.4669, "step": 13300 }, { "epoch": 36.97222222222222, "grad_norm": 0.9751743078231812, "learning_rate": 9.214388791127666e-05, "loss": 0.4266, "step": 13310 }, { "epoch": 37.0, "grad_norm": 1.0050173997879028, "learning_rate": 9.212905250225814e-05, "loss": 0.4436, "step": 13320 }, { "epoch": 37.02777777777778, "grad_norm": 0.996888279914856, "learning_rate": 9.211420429554583e-05, "loss": 0.4598, "step": 13330 }, { "epoch": 37.05555555555556, "grad_norm": 1.0348467826843262, "learning_rate": 9.209934329565022e-05, "loss": 0.4417, "step": 13340 }, { "epoch": 37.083333333333336, "grad_norm": 1.0750457048416138, "learning_rate": 9.208446950708568e-05, "loss": 0.4397, "step": 13350 }, { "epoch": 37.111111111111114, "grad_norm": 0.9658521413803101, "learning_rate": 9.20695829343705e-05, "loss": 0.4496, "step": 13360 }, { "epoch": 37.138888888888886, "grad_norm": 0.9339320063591003, "learning_rate": 9.205468358202678e-05, "loss": 0.4405, "step": 13370 }, { "epoch": 37.166666666666664, "grad_norm": 1.0171159505844116, "learning_rate": 9.203977145458059e-05, "loss": 0.4391, "step": 13380 }, { "epoch": 37.19444444444444, "grad_norm": 1.0416250228881836, "learning_rate": 9.202484655656182e-05, "loss": 0.4381, "step": 13390 }, { "epoch": 37.22222222222222, "grad_norm": 0.9483913779258728, "learning_rate": 9.200990889250427e-05, "loss": 0.4425, "step": 13400 }, { "epoch": 37.25, "grad_norm": 1.0587725639343262, "learning_rate": 9.19949584669456e-05, "loss": 0.4464, "step": 13410 }, { "epoch": 37.27777777777778, "grad_norm": 1.1353199481964111, "learning_rate": 9.197999528442738e-05, "loss": 0.45, "step": 13420 }, { "epoch": 37.30555555555556, "grad_norm": 1.0591360330581665, "learning_rate": 9.196501934949499e-05, "loss": 0.4596, "step": 13430 }, { "epoch": 37.333333333333336, "grad_norm": 1.0812792778015137, "learning_rate": 9.195003066669776e-05, "loss": 0.459, "step": 13440 }, { "epoch": 37.361111111111114, "grad_norm": 1.0084134340286255, "learning_rate": 9.193502924058884e-05, "loss": 0.4536, "step": 13450 }, { "epoch": 37.388888888888886, "grad_norm": 0.9250982403755188, "learning_rate": 9.192001507572526e-05, "loss": 0.4477, "step": 13460 }, { "epoch": 37.416666666666664, "grad_norm": 1.1447402238845825, "learning_rate": 9.190498817666793e-05, "loss": 0.4537, "step": 13470 }, { "epoch": 37.44444444444444, "grad_norm": 0.9728381037712097, "learning_rate": 9.188994854798163e-05, "loss": 0.4401, "step": 13480 }, { "epoch": 37.47222222222222, "grad_norm": 1.048888921737671, "learning_rate": 9.187489619423499e-05, "loss": 0.4651, "step": 13490 }, { "epoch": 37.5, "grad_norm": 1.0175195932388306, "learning_rate": 9.185983112000056e-05, "loss": 0.4428, "step": 13500 }, { "epoch": 37.52777777777778, "grad_norm": 0.989080011844635, "learning_rate": 9.184475332985464e-05, "loss": 0.4613, "step": 13510 }, { "epoch": 37.55555555555556, "grad_norm": 0.9652053713798523, "learning_rate": 9.182966282837754e-05, "loss": 0.4229, "step": 13520 }, { "epoch": 37.583333333333336, "grad_norm": 1.0293511152267456, "learning_rate": 9.18145596201533e-05, "loss": 0.4417, "step": 13530 }, { "epoch": 37.611111111111114, "grad_norm": 1.0348623991012573, "learning_rate": 9.179944370976991e-05, "loss": 0.4493, "step": 13540 }, { "epoch": 37.638888888888886, "grad_norm": 0.9909543395042419, "learning_rate": 9.178431510181918e-05, "loss": 0.4458, "step": 13550 }, { "epoch": 37.666666666666664, "grad_norm": 1.0738584995269775, "learning_rate": 9.176917380089675e-05, "loss": 0.439, "step": 13560 }, { "epoch": 37.69444444444444, "grad_norm": 0.9696876406669617, "learning_rate": 9.175401981160219e-05, "loss": 0.4258, "step": 13570 }, { "epoch": 37.72222222222222, "grad_norm": 0.9974539279937744, "learning_rate": 9.173885313853885e-05, "loss": 0.4491, "step": 13580 }, { "epoch": 37.75, "grad_norm": 0.9263717532157898, "learning_rate": 9.172367378631398e-05, "loss": 0.4376, "step": 13590 }, { "epoch": 37.77777777777778, "grad_norm": 1.0205576419830322, "learning_rate": 9.170848175953866e-05, "loss": 0.4448, "step": 13600 }, { "epoch": 37.80555555555556, "grad_norm": 1.0432108640670776, "learning_rate": 9.169327706282784e-05, "loss": 0.4409, "step": 13610 }, { "epoch": 37.833333333333336, "grad_norm": 1.0003827810287476, "learning_rate": 9.167805970080029e-05, "loss": 0.4503, "step": 13620 }, { "epoch": 37.861111111111114, "grad_norm": 1.1900774240493774, "learning_rate": 9.166282967807864e-05, "loss": 0.4562, "step": 13630 }, { "epoch": 37.888888888888886, "grad_norm": 0.9506798386573792, "learning_rate": 9.16475869992894e-05, "loss": 0.4459, "step": 13640 }, { "epoch": 37.916666666666664, "grad_norm": 1.1429461240768433, "learning_rate": 9.163233166906284e-05, "loss": 0.4371, "step": 13650 }, { "epoch": 37.94444444444444, "grad_norm": 0.9466935396194458, "learning_rate": 9.161706369203317e-05, "loss": 0.4386, "step": 13660 }, { "epoch": 37.97222222222222, "grad_norm": 0.9940795302391052, "learning_rate": 9.16017830728384e-05, "loss": 0.433, "step": 13670 }, { "epoch": 38.0, "grad_norm": 0.9072529077529907, "learning_rate": 9.158648981612035e-05, "loss": 0.4463, "step": 13680 }, { "epoch": 38.02777777777778, "grad_norm": 1.22565758228302, "learning_rate": 9.157118392652472e-05, "loss": 0.4472, "step": 13690 }, { "epoch": 38.05555555555556, "grad_norm": 1.0202702283859253, "learning_rate": 9.155586540870104e-05, "loss": 0.4439, "step": 13700 }, { "epoch": 38.083333333333336, "grad_norm": 1.0294607877731323, "learning_rate": 9.154053426730267e-05, "loss": 0.4541, "step": 13710 }, { "epoch": 38.111111111111114, "grad_norm": 1.0503731966018677, "learning_rate": 9.15251905069868e-05, "loss": 0.4349, "step": 13720 }, { "epoch": 38.138888888888886, "grad_norm": 1.1912750005722046, "learning_rate": 9.150983413241446e-05, "loss": 0.4551, "step": 13730 }, { "epoch": 38.166666666666664, "grad_norm": 1.0450938940048218, "learning_rate": 9.149446514825051e-05, "loss": 0.4403, "step": 13740 }, { "epoch": 38.19444444444444, "grad_norm": 1.1213607788085938, "learning_rate": 9.147908355916365e-05, "loss": 0.4539, "step": 13750 }, { "epoch": 38.22222222222222, "grad_norm": 0.9662550091743469, "learning_rate": 9.146368936982642e-05, "loss": 0.4426, "step": 13760 }, { "epoch": 38.25, "grad_norm": 1.0219544172286987, "learning_rate": 9.144828258491511e-05, "loss": 0.4386, "step": 13770 }, { "epoch": 38.27777777777778, "grad_norm": 1.097422480583191, "learning_rate": 9.143286320910996e-05, "loss": 0.4374, "step": 13780 }, { "epoch": 38.30555555555556, "grad_norm": 1.0803706645965576, "learning_rate": 9.141743124709491e-05, "loss": 0.4485, "step": 13790 }, { "epoch": 38.333333333333336, "grad_norm": 0.961184024810791, "learning_rate": 9.140198670355784e-05, "loss": 0.4439, "step": 13800 }, { "epoch": 38.361111111111114, "grad_norm": 0.9902667999267578, "learning_rate": 9.138652958319034e-05, "loss": 0.4572, "step": 13810 }, { "epoch": 38.388888888888886, "grad_norm": 1.1117254495620728, "learning_rate": 9.137105989068791e-05, "loss": 0.4441, "step": 13820 }, { "epoch": 38.416666666666664, "grad_norm": 1.1382683515548706, "learning_rate": 9.135557763074983e-05, "loss": 0.4409, "step": 13830 }, { "epoch": 38.44444444444444, "grad_norm": 0.9383972883224487, "learning_rate": 9.13400828080792e-05, "loss": 0.4447, "step": 13840 }, { "epoch": 38.47222222222222, "grad_norm": 0.9723703861236572, "learning_rate": 9.132457542738292e-05, "loss": 0.4381, "step": 13850 }, { "epoch": 38.5, "grad_norm": 1.0181328058242798, "learning_rate": 9.130905549337174e-05, "loss": 0.4473, "step": 13860 }, { "epoch": 38.52777777777778, "grad_norm": 0.9761826395988464, "learning_rate": 9.129352301076021e-05, "loss": 0.4332, "step": 13870 }, { "epoch": 38.55555555555556, "grad_norm": 0.9767125844955444, "learning_rate": 9.127797798426668e-05, "loss": 0.4317, "step": 13880 }, { "epoch": 38.583333333333336, "grad_norm": 1.0178205966949463, "learning_rate": 9.126242041861333e-05, "loss": 0.4477, "step": 13890 }, { "epoch": 38.611111111111114, "grad_norm": 0.9097136855125427, "learning_rate": 9.124685031852611e-05, "loss": 0.4468, "step": 13900 }, { "epoch": 38.638888888888886, "grad_norm": 1.0008841753005981, "learning_rate": 9.123126768873482e-05, "loss": 0.4324, "step": 13910 }, { "epoch": 38.666666666666664, "grad_norm": 1.0988702774047852, "learning_rate": 9.121567253397308e-05, "loss": 0.4515, "step": 13920 }, { "epoch": 38.69444444444444, "grad_norm": 1.021979570388794, "learning_rate": 9.120006485897824e-05, "loss": 0.4388, "step": 13930 }, { "epoch": 38.72222222222222, "grad_norm": 1.0394560098648071, "learning_rate": 9.118444466849152e-05, "loss": 0.439, "step": 13940 }, { "epoch": 38.75, "grad_norm": 1.0720101594924927, "learning_rate": 9.116881196725793e-05, "loss": 0.4581, "step": 13950 }, { "epoch": 38.77777777777778, "grad_norm": 0.9329380393028259, "learning_rate": 9.115316676002627e-05, "loss": 0.44, "step": 13960 }, { "epoch": 38.80555555555556, "grad_norm": 1.045322299003601, "learning_rate": 9.113750905154911e-05, "loss": 0.433, "step": 13970 }, { "epoch": 38.833333333333336, "grad_norm": 0.9711141586303711, "learning_rate": 9.112183884658289e-05, "loss": 0.4393, "step": 13980 }, { "epoch": 38.861111111111114, "grad_norm": 1.0033527612686157, "learning_rate": 9.11061561498878e-05, "loss": 0.4496, "step": 13990 }, { "epoch": 38.888888888888886, "grad_norm": 0.903581440448761, "learning_rate": 9.109046096622779e-05, "loss": 0.4275, "step": 14000 }, { "epoch": 38.916666666666664, "grad_norm": 0.9540365934371948, "learning_rate": 9.107475330037069e-05, "loss": 0.4397, "step": 14010 }, { "epoch": 38.94444444444444, "grad_norm": 1.0117900371551514, "learning_rate": 9.105903315708806e-05, "loss": 0.455, "step": 14020 }, { "epoch": 38.97222222222222, "grad_norm": 1.010347604751587, "learning_rate": 9.104330054115524e-05, "loss": 0.4373, "step": 14030 }, { "epoch": 39.0, "grad_norm": 1.0049464702606201, "learning_rate": 9.102755545735141e-05, "loss": 0.4356, "step": 14040 }, { "epoch": 39.02777777777778, "grad_norm": 0.9192337989807129, "learning_rate": 9.10117979104595e-05, "loss": 0.4231, "step": 14050 }, { "epoch": 39.05555555555556, "grad_norm": 1.0675286054611206, "learning_rate": 9.099602790526624e-05, "loss": 0.4377, "step": 14060 }, { "epoch": 39.083333333333336, "grad_norm": 1.0604379177093506, "learning_rate": 9.098024544656212e-05, "loss": 0.4498, "step": 14070 }, { "epoch": 39.111111111111114, "grad_norm": 1.01913321018219, "learning_rate": 9.096445053914148e-05, "loss": 0.435, "step": 14080 }, { "epoch": 39.138888888888886, "grad_norm": 1.2758145332336426, "learning_rate": 9.094864318780236e-05, "loss": 0.436, "step": 14090 }, { "epoch": 39.166666666666664, "grad_norm": 1.1649781465530396, "learning_rate": 9.093282339734663e-05, "loss": 0.4346, "step": 14100 }, { "epoch": 39.19444444444444, "grad_norm": 1.0223811864852905, "learning_rate": 9.091699117257992e-05, "loss": 0.4258, "step": 14110 }, { "epoch": 39.22222222222222, "grad_norm": 0.9740352630615234, "learning_rate": 9.090114651831163e-05, "loss": 0.4436, "step": 14120 }, { "epoch": 39.25, "grad_norm": 1.0258551836013794, "learning_rate": 9.088528943935497e-05, "loss": 0.447, "step": 14130 }, { "epoch": 39.27777777777778, "grad_norm": 1.0576980113983154, "learning_rate": 9.086941994052689e-05, "loss": 0.4539, "step": 14140 }, { "epoch": 39.30555555555556, "grad_norm": 0.9029607772827148, "learning_rate": 9.085353802664813e-05, "loss": 0.4382, "step": 14150 }, { "epoch": 39.333333333333336, "grad_norm": 0.921208918094635, "learning_rate": 9.08376437025432e-05, "loss": 0.4237, "step": 14160 }, { "epoch": 39.361111111111114, "grad_norm": 1.0218217372894287, "learning_rate": 9.082173697304035e-05, "loss": 0.4378, "step": 14170 }, { "epoch": 39.388888888888886, "grad_norm": 1.0523886680603027, "learning_rate": 9.080581784297166e-05, "loss": 0.4374, "step": 14180 }, { "epoch": 39.416666666666664, "grad_norm": 0.9428532123565674, "learning_rate": 9.078988631717291e-05, "loss": 0.4383, "step": 14190 }, { "epoch": 39.44444444444444, "grad_norm": 0.9810369610786438, "learning_rate": 9.077394240048369e-05, "loss": 0.4397, "step": 14200 }, { "epoch": 39.47222222222222, "grad_norm": 1.1360112428665161, "learning_rate": 9.075798609774736e-05, "loss": 0.4449, "step": 14210 }, { "epoch": 39.5, "grad_norm": 0.8716865181922913, "learning_rate": 9.0742017413811e-05, "loss": 0.4375, "step": 14220 }, { "epoch": 39.52777777777778, "grad_norm": 1.0279864072799683, "learning_rate": 9.072603635352548e-05, "loss": 0.4507, "step": 14230 }, { "epoch": 39.55555555555556, "grad_norm": 1.0018460750579834, "learning_rate": 9.071004292174541e-05, "loss": 0.4282, "step": 14240 }, { "epoch": 39.583333333333336, "grad_norm": 0.9820541739463806, "learning_rate": 9.06940371233292e-05, "loss": 0.4576, "step": 14250 }, { "epoch": 39.611111111111114, "grad_norm": 0.9664506912231445, "learning_rate": 9.067801896313898e-05, "loss": 0.4421, "step": 14260 }, { "epoch": 39.638888888888886, "grad_norm": 1.0212266445159912, "learning_rate": 9.066198844604064e-05, "loss": 0.434, "step": 14270 }, { "epoch": 39.666666666666664, "grad_norm": 0.9433786869049072, "learning_rate": 9.06459455769038e-05, "loss": 0.43, "step": 14280 }, { "epoch": 39.69444444444444, "grad_norm": 0.9419794082641602, "learning_rate": 9.062989036060193e-05, "loss": 0.4179, "step": 14290 }, { "epoch": 39.72222222222222, "grad_norm": 0.9126619696617126, "learning_rate": 9.061382280201212e-05, "loss": 0.4351, "step": 14300 }, { "epoch": 39.75, "grad_norm": 1.0441426038742065, "learning_rate": 9.059774290601528e-05, "loss": 0.4225, "step": 14310 }, { "epoch": 39.77777777777778, "grad_norm": 0.9826822280883789, "learning_rate": 9.058165067749606e-05, "loss": 0.4614, "step": 14320 }, { "epoch": 39.80555555555556, "grad_norm": 0.9918476939201355, "learning_rate": 9.056554612134288e-05, "loss": 0.4427, "step": 14330 }, { "epoch": 39.833333333333336, "grad_norm": 1.100233554840088, "learning_rate": 9.054942924244785e-05, "loss": 0.4493, "step": 14340 }, { "epoch": 39.861111111111114, "grad_norm": 0.9745161533355713, "learning_rate": 9.053330004570686e-05, "loss": 0.4431, "step": 14350 }, { "epoch": 39.888888888888886, "grad_norm": 0.8967203497886658, "learning_rate": 9.051715853601955e-05, "loss": 0.4496, "step": 14360 }, { "epoch": 39.916666666666664, "grad_norm": 0.9745586514472961, "learning_rate": 9.050100471828926e-05, "loss": 0.4379, "step": 14370 }, { "epoch": 39.94444444444444, "grad_norm": 0.9679124355316162, "learning_rate": 9.048483859742311e-05, "loss": 0.4447, "step": 14380 }, { "epoch": 39.97222222222222, "grad_norm": 1.0377804040908813, "learning_rate": 9.046866017833193e-05, "loss": 0.4337, "step": 14390 }, { "epoch": 40.0, "grad_norm": 0.9857845306396484, "learning_rate": 9.045246946593029e-05, "loss": 0.4437, "step": 14400 }, { "epoch": 40.02777777777778, "grad_norm": 0.8898565173149109, "learning_rate": 9.043626646513652e-05, "loss": 0.4496, "step": 14410 }, { "epoch": 40.05555555555556, "grad_norm": 0.9944568276405334, "learning_rate": 9.042005118087267e-05, "loss": 0.4354, "step": 14420 }, { "epoch": 40.083333333333336, "grad_norm": 0.9124097228050232, "learning_rate": 9.040382361806448e-05, "loss": 0.4243, "step": 14430 }, { "epoch": 40.111111111111114, "grad_norm": 1.136096715927124, "learning_rate": 9.038758378164148e-05, "loss": 0.4416, "step": 14440 }, { "epoch": 40.138888888888886, "grad_norm": 1.0787185430526733, "learning_rate": 9.037133167653691e-05, "loss": 0.4433, "step": 14450 }, { "epoch": 40.166666666666664, "grad_norm": 1.1060492992401123, "learning_rate": 9.035506730768771e-05, "loss": 0.4413, "step": 14460 }, { "epoch": 40.19444444444444, "grad_norm": 0.9852575659751892, "learning_rate": 9.033879068003458e-05, "loss": 0.4385, "step": 14470 }, { "epoch": 40.22222222222222, "grad_norm": 1.1431337594985962, "learning_rate": 9.032250179852193e-05, "loss": 0.4513, "step": 14480 }, { "epoch": 40.25, "grad_norm": 1.0808700323104858, "learning_rate": 9.030620066809787e-05, "loss": 0.4415, "step": 14490 }, { "epoch": 40.27777777777778, "grad_norm": 0.9883102774620056, "learning_rate": 9.028988729371428e-05, "loss": 0.4223, "step": 14500 }, { "epoch": 40.30555555555556, "grad_norm": 0.9723131656646729, "learning_rate": 9.027356168032673e-05, "loss": 0.4342, "step": 14510 }, { "epoch": 40.333333333333336, "grad_norm": 1.089882493019104, "learning_rate": 9.02572238328945e-05, "loss": 0.4263, "step": 14520 }, { "epoch": 40.361111111111114, "grad_norm": 0.9169453382492065, "learning_rate": 9.02408737563806e-05, "loss": 0.4279, "step": 14530 }, { "epoch": 40.388888888888886, "grad_norm": 1.040717363357544, "learning_rate": 9.022451145575174e-05, "loss": 0.4523, "step": 14540 }, { "epoch": 40.416666666666664, "grad_norm": 1.0248355865478516, "learning_rate": 9.02081369359784e-05, "loss": 0.4423, "step": 14550 }, { "epoch": 40.44444444444444, "grad_norm": 1.0749666690826416, "learning_rate": 9.019175020203465e-05, "loss": 0.4479, "step": 14560 }, { "epoch": 40.47222222222222, "grad_norm": 1.0955750942230225, "learning_rate": 9.017535125889842e-05, "loss": 0.4388, "step": 14570 }, { "epoch": 40.5, "grad_norm": 0.9083215594291687, "learning_rate": 9.015894011155124e-05, "loss": 0.4404, "step": 14580 }, { "epoch": 40.52777777777778, "grad_norm": 0.9532095193862915, "learning_rate": 9.014251676497838e-05, "loss": 0.4293, "step": 14590 }, { "epoch": 40.55555555555556, "grad_norm": 0.9454753994941711, "learning_rate": 9.012608122416884e-05, "loss": 0.4386, "step": 14600 }, { "epoch": 40.583333333333336, "grad_norm": 1.0758888721466064, "learning_rate": 9.010963349411529e-05, "loss": 0.442, "step": 14610 }, { "epoch": 40.611111111111114, "grad_norm": 1.0037920475006104, "learning_rate": 9.00931735798141e-05, "loss": 0.4396, "step": 14620 }, { "epoch": 40.638888888888886, "grad_norm": 1.0231634378433228, "learning_rate": 9.00767014862654e-05, "loss": 0.4659, "step": 14630 }, { "epoch": 40.666666666666664, "grad_norm": 0.9349000453948975, "learning_rate": 9.006021721847295e-05, "loss": 0.4458, "step": 14640 }, { "epoch": 40.69444444444444, "grad_norm": 0.979676365852356, "learning_rate": 9.004372078144423e-05, "loss": 0.4383, "step": 14650 }, { "epoch": 40.72222222222222, "grad_norm": 0.9191057085990906, "learning_rate": 9.002721218019043e-05, "loss": 0.4434, "step": 14660 }, { "epoch": 40.75, "grad_norm": 0.9880043864250183, "learning_rate": 9.001069141972642e-05, "loss": 0.4239, "step": 14670 }, { "epoch": 40.77777777777778, "grad_norm": 0.8863441348075867, "learning_rate": 8.99941585050708e-05, "loss": 0.429, "step": 14680 }, { "epoch": 40.80555555555556, "grad_norm": 0.9629117846488953, "learning_rate": 8.997761344124578e-05, "loss": 0.4262, "step": 14690 }, { "epoch": 40.833333333333336, "grad_norm": 0.9351202845573425, "learning_rate": 8.996105623327737e-05, "loss": 0.4387, "step": 14700 }, { "epoch": 40.861111111111114, "grad_norm": 0.9798956513404846, "learning_rate": 8.994448688619517e-05, "loss": 0.4568, "step": 14710 }, { "epoch": 40.888888888888886, "grad_norm": 0.9976133108139038, "learning_rate": 8.992790540503253e-05, "loss": 0.4484, "step": 14720 }, { "epoch": 40.916666666666664, "grad_norm": 1.1114243268966675, "learning_rate": 8.991131179482648e-05, "loss": 0.4459, "step": 14730 }, { "epoch": 40.94444444444444, "grad_norm": 0.9076769948005676, "learning_rate": 8.989470606061768e-05, "loss": 0.4539, "step": 14740 }, { "epoch": 40.97222222222222, "grad_norm": 0.9777836799621582, "learning_rate": 8.987808820745056e-05, "loss": 0.4469, "step": 14750 }, { "epoch": 41.0, "grad_norm": 0.8805384635925293, "learning_rate": 8.986145824037315e-05, "loss": 0.4471, "step": 14760 }, { "epoch": 41.02777777777778, "grad_norm": 1.0427556037902832, "learning_rate": 8.984481616443721e-05, "loss": 0.4623, "step": 14770 }, { "epoch": 41.05555555555556, "grad_norm": 0.9087859392166138, "learning_rate": 8.982816198469815e-05, "loss": 0.4375, "step": 14780 }, { "epoch": 41.083333333333336, "grad_norm": 0.9261074066162109, "learning_rate": 8.98114957062151e-05, "loss": 0.4393, "step": 14790 }, { "epoch": 41.111111111111114, "grad_norm": 1.0470553636550903, "learning_rate": 8.97948173340508e-05, "loss": 0.4162, "step": 14800 }, { "epoch": 41.138888888888886, "grad_norm": 1.114217758178711, "learning_rate": 8.977812687327172e-05, "loss": 0.4331, "step": 14810 }, { "epoch": 41.166666666666664, "grad_norm": 1.014365792274475, "learning_rate": 8.976142432894798e-05, "loss": 0.4331, "step": 14820 }, { "epoch": 41.19444444444444, "grad_norm": 1.1729148626327515, "learning_rate": 8.974470970615336e-05, "loss": 0.4445, "step": 14830 }, { "epoch": 41.22222222222222, "grad_norm": 0.8537477254867554, "learning_rate": 8.972798300996534e-05, "loss": 0.4199, "step": 14840 }, { "epoch": 41.25, "grad_norm": 1.044650673866272, "learning_rate": 8.971124424546504e-05, "loss": 0.4359, "step": 14850 }, { "epoch": 41.27777777777778, "grad_norm": 0.925240159034729, "learning_rate": 8.969449341773724e-05, "loss": 0.435, "step": 14860 }, { "epoch": 41.30555555555556, "grad_norm": 1.037763237953186, "learning_rate": 8.967773053187042e-05, "loss": 0.4358, "step": 14870 }, { "epoch": 41.333333333333336, "grad_norm": 1.0558542013168335, "learning_rate": 8.966095559295668e-05, "loss": 0.4338, "step": 14880 }, { "epoch": 41.361111111111114, "grad_norm": 0.9702829122543335, "learning_rate": 8.964416860609184e-05, "loss": 0.431, "step": 14890 }, { "epoch": 41.388888888888886, "grad_norm": 0.9076346158981323, "learning_rate": 8.962736957637532e-05, "loss": 0.4232, "step": 14900 }, { "epoch": 41.416666666666664, "grad_norm": 1.1050057411193848, "learning_rate": 8.96105585089102e-05, "loss": 0.4321, "step": 14910 }, { "epoch": 41.44444444444444, "grad_norm": 0.9568971991539001, "learning_rate": 8.959373540880329e-05, "loss": 0.4668, "step": 14920 }, { "epoch": 41.47222222222222, "grad_norm": 0.9520556926727295, "learning_rate": 8.957690028116495e-05, "loss": 0.4257, "step": 14930 }, { "epoch": 41.5, "grad_norm": 1.055200219154358, "learning_rate": 8.956005313110928e-05, "loss": 0.4441, "step": 14940 }, { "epoch": 41.52777777777778, "grad_norm": 1.1959421634674072, "learning_rate": 8.9543193963754e-05, "loss": 0.4321, "step": 14950 }, { "epoch": 41.55555555555556, "grad_norm": 1.0743354558944702, "learning_rate": 8.952632278422048e-05, "loss": 0.423, "step": 14960 }, { "epoch": 41.583333333333336, "grad_norm": 0.8804478645324707, "learning_rate": 8.95094395976337e-05, "loss": 0.4227, "step": 14970 }, { "epoch": 41.611111111111114, "grad_norm": 0.8712294697761536, "learning_rate": 8.949254440912239e-05, "loss": 0.426, "step": 14980 }, { "epoch": 41.638888888888886, "grad_norm": 1.0301003456115723, "learning_rate": 8.94756372238188e-05, "loss": 0.449, "step": 14990 }, { "epoch": 41.666666666666664, "grad_norm": 0.97358238697052, "learning_rate": 8.945871804685892e-05, "loss": 0.4359, "step": 15000 }, { "epoch": 41.69444444444444, "grad_norm": 0.8928020596504211, "learning_rate": 8.944178688338236e-05, "loss": 0.4255, "step": 15010 }, { "epoch": 41.72222222222222, "grad_norm": 0.9834263920783997, "learning_rate": 8.942484373853233e-05, "loss": 0.4256, "step": 15020 }, { "epoch": 41.75, "grad_norm": 1.0046405792236328, "learning_rate": 8.940788861745572e-05, "loss": 0.4446, "step": 15030 }, { "epoch": 41.77777777777778, "grad_norm": 0.893673837184906, "learning_rate": 8.939092152530308e-05, "loss": 0.4277, "step": 15040 }, { "epoch": 41.80555555555556, "grad_norm": 0.9838545322418213, "learning_rate": 8.937394246722853e-05, "loss": 0.4433, "step": 15050 }, { "epoch": 41.833333333333336, "grad_norm": 0.8939712047576904, "learning_rate": 8.935695144838984e-05, "loss": 0.4222, "step": 15060 }, { "epoch": 41.861111111111114, "grad_norm": 0.8532902002334595, "learning_rate": 8.933994847394849e-05, "loss": 0.4391, "step": 15070 }, { "epoch": 41.888888888888886, "grad_norm": 1.034393072128296, "learning_rate": 8.932293354906949e-05, "loss": 0.4249, "step": 15080 }, { "epoch": 41.916666666666664, "grad_norm": 0.8723398447036743, "learning_rate": 8.930590667892153e-05, "loss": 0.4273, "step": 15090 }, { "epoch": 41.94444444444444, "grad_norm": 1.038054347038269, "learning_rate": 8.928886786867696e-05, "loss": 0.4297, "step": 15100 }, { "epoch": 41.97222222222222, "grad_norm": 1.0389201641082764, "learning_rate": 8.927181712351168e-05, "loss": 0.4405, "step": 15110 }, { "epoch": 42.0, "grad_norm": 0.9169611930847168, "learning_rate": 8.925475444860527e-05, "loss": 0.4429, "step": 15120 }, { "epoch": 42.02777777777778, "grad_norm": 0.9127033352851868, "learning_rate": 8.923767984914092e-05, "loss": 0.4387, "step": 15130 }, { "epoch": 42.05555555555556, "grad_norm": 0.9212204217910767, "learning_rate": 8.922059333030545e-05, "loss": 0.4207, "step": 15140 }, { "epoch": 42.083333333333336, "grad_norm": 0.9324175715446472, "learning_rate": 8.920349489728928e-05, "loss": 0.4334, "step": 15150 }, { "epoch": 42.111111111111114, "grad_norm": 1.077994704246521, "learning_rate": 8.918638455528646e-05, "loss": 0.4307, "step": 15160 }, { "epoch": 42.138888888888886, "grad_norm": 0.9629296660423279, "learning_rate": 8.916926230949468e-05, "loss": 0.426, "step": 15170 }, { "epoch": 42.166666666666664, "grad_norm": 0.9928814768791199, "learning_rate": 8.915212816511522e-05, "loss": 0.4271, "step": 15180 }, { "epoch": 42.19444444444444, "grad_norm": 0.8867086172103882, "learning_rate": 8.913498212735296e-05, "loss": 0.4358, "step": 15190 }, { "epoch": 42.22222222222222, "grad_norm": 0.9365127682685852, "learning_rate": 8.911782420141643e-05, "loss": 0.4365, "step": 15200 }, { "epoch": 42.25, "grad_norm": 0.9903731346130371, "learning_rate": 8.910065439251775e-05, "loss": 0.4247, "step": 15210 }, { "epoch": 42.27777777777778, "grad_norm": 0.9391055703163147, "learning_rate": 8.908347270587268e-05, "loss": 0.4291, "step": 15220 }, { "epoch": 42.30555555555556, "grad_norm": 0.9451014399528503, "learning_rate": 8.906627914670054e-05, "loss": 0.4285, "step": 15230 }, { "epoch": 42.333333333333336, "grad_norm": 1.0026825666427612, "learning_rate": 8.904907372022427e-05, "loss": 0.4445, "step": 15240 }, { "epoch": 42.361111111111114, "grad_norm": 1.1900650262832642, "learning_rate": 8.903185643167042e-05, "loss": 0.4391, "step": 15250 }, { "epoch": 42.388888888888886, "grad_norm": 0.9341135621070862, "learning_rate": 8.901462728626919e-05, "loss": 0.4301, "step": 15260 }, { "epoch": 42.416666666666664, "grad_norm": 1.0235172510147095, "learning_rate": 8.899738628925429e-05, "loss": 0.4423, "step": 15270 }, { "epoch": 42.44444444444444, "grad_norm": 1.033046841621399, "learning_rate": 8.898013344586312e-05, "loss": 0.4331, "step": 15280 }, { "epoch": 42.47222222222222, "grad_norm": 1.1696375608444214, "learning_rate": 8.896286876133661e-05, "loss": 0.4417, "step": 15290 }, { "epoch": 42.5, "grad_norm": 0.9612084627151489, "learning_rate": 8.894559224091933e-05, "loss": 0.4331, "step": 15300 }, { "epoch": 42.52777777777778, "grad_norm": 1.0849716663360596, "learning_rate": 8.892830388985942e-05, "loss": 0.4487, "step": 15310 }, { "epoch": 42.55555555555556, "grad_norm": 0.9763560891151428, "learning_rate": 8.891100371340864e-05, "loss": 0.4478, "step": 15320 }, { "epoch": 42.583333333333336, "grad_norm": 1.0364196300506592, "learning_rate": 8.889369171682231e-05, "loss": 0.439, "step": 15330 }, { "epoch": 42.611111111111114, "grad_norm": 0.9484772682189941, "learning_rate": 8.887636790535936e-05, "loss": 0.4346, "step": 15340 }, { "epoch": 42.638888888888886, "grad_norm": 0.9369587898254395, "learning_rate": 8.885903228428231e-05, "loss": 0.4281, "step": 15350 }, { "epoch": 42.666666666666664, "grad_norm": 0.8985670804977417, "learning_rate": 8.884168485885727e-05, "loss": 0.4269, "step": 15360 }, { "epoch": 42.69444444444444, "grad_norm": 0.9177696108818054, "learning_rate": 8.882432563435393e-05, "loss": 0.4245, "step": 15370 }, { "epoch": 42.72222222222222, "grad_norm": 1.0097296237945557, "learning_rate": 8.880695461604556e-05, "loss": 0.4369, "step": 15380 }, { "epoch": 42.75, "grad_norm": 0.9163837432861328, "learning_rate": 8.878957180920901e-05, "loss": 0.4464, "step": 15390 }, { "epoch": 42.77777777777778, "grad_norm": 1.030058741569519, "learning_rate": 8.877217721912473e-05, "loss": 0.4482, "step": 15400 }, { "epoch": 42.80555555555556, "grad_norm": 0.9293743371963501, "learning_rate": 8.875477085107673e-05, "loss": 0.4228, "step": 15410 }, { "epoch": 42.833333333333336, "grad_norm": 0.9592161178588867, "learning_rate": 8.87373527103526e-05, "loss": 0.4381, "step": 15420 }, { "epoch": 42.861111111111114, "grad_norm": 0.822125256061554, "learning_rate": 8.871992280224353e-05, "loss": 0.426, "step": 15430 }, { "epoch": 42.888888888888886, "grad_norm": 0.9283614158630371, "learning_rate": 8.870248113204422e-05, "loss": 0.4329, "step": 15440 }, { "epoch": 42.916666666666664, "grad_norm": 0.9582603573799133, "learning_rate": 8.868502770505306e-05, "loss": 0.4501, "step": 15450 }, { "epoch": 42.94444444444444, "grad_norm": 0.9725087285041809, "learning_rate": 8.86675625265719e-05, "loss": 0.4359, "step": 15460 }, { "epoch": 42.97222222222222, "grad_norm": 1.0259579420089722, "learning_rate": 8.865008560190618e-05, "loss": 0.4398, "step": 15470 }, { "epoch": 43.0, "grad_norm": 1.0032180547714233, "learning_rate": 8.863259693636496e-05, "loss": 0.4226, "step": 15480 }, { "epoch": 43.02777777777778, "grad_norm": 0.9235767126083374, "learning_rate": 8.861509653526083e-05, "loss": 0.446, "step": 15490 }, { "epoch": 43.05555555555556, "grad_norm": 0.9338498115539551, "learning_rate": 8.859758440390993e-05, "loss": 0.4287, "step": 15500 }, { "epoch": 43.083333333333336, "grad_norm": 0.9206061363220215, "learning_rate": 8.858006054763202e-05, "loss": 0.4276, "step": 15510 }, { "epoch": 43.111111111111114, "grad_norm": 0.934994637966156, "learning_rate": 8.856252497175035e-05, "loss": 0.4308, "step": 15520 }, { "epoch": 43.138888888888886, "grad_norm": 1.0790443420410156, "learning_rate": 8.854497768159178e-05, "loss": 0.4456, "step": 15530 }, { "epoch": 43.166666666666664, "grad_norm": 1.05172598361969, "learning_rate": 8.852741868248671e-05, "loss": 0.4461, "step": 15540 }, { "epoch": 43.19444444444444, "grad_norm": 0.9872245788574219, "learning_rate": 8.85098479797691e-05, "loss": 0.4255, "step": 15550 }, { "epoch": 43.22222222222222, "grad_norm": 0.9344096183776855, "learning_rate": 8.849226557877646e-05, "loss": 0.4229, "step": 15560 }, { "epoch": 43.25, "grad_norm": 0.9625851511955261, "learning_rate": 8.84746714848499e-05, "loss": 0.423, "step": 15570 }, { "epoch": 43.27777777777778, "grad_norm": 0.9581050872802734, "learning_rate": 8.845706570333397e-05, "loss": 0.4419, "step": 15580 }, { "epoch": 43.30555555555556, "grad_norm": 1.0405375957489014, "learning_rate": 8.84394482395769e-05, "loss": 0.4341, "step": 15590 }, { "epoch": 43.333333333333336, "grad_norm": 0.8409476280212402, "learning_rate": 8.842181909893038e-05, "loss": 0.4216, "step": 15600 }, { "epoch": 43.361111111111114, "grad_norm": 1.0588722229003906, "learning_rate": 8.840417828674969e-05, "loss": 0.4265, "step": 15610 }, { "epoch": 43.388888888888886, "grad_norm": 0.9320085644721985, "learning_rate": 8.838652580839364e-05, "loss": 0.4315, "step": 15620 }, { "epoch": 43.416666666666664, "grad_norm": 0.9130483269691467, "learning_rate": 8.836886166922458e-05, "loss": 0.4129, "step": 15630 }, { "epoch": 43.44444444444444, "grad_norm": 0.9143326282501221, "learning_rate": 8.835118587460844e-05, "loss": 0.4319, "step": 15640 }, { "epoch": 43.47222222222222, "grad_norm": 1.1213890314102173, "learning_rate": 8.83334984299146e-05, "loss": 0.4393, "step": 15650 }, { "epoch": 43.5, "grad_norm": 0.9838744401931763, "learning_rate": 8.83157993405161e-05, "loss": 0.4253, "step": 15660 }, { "epoch": 43.52777777777778, "grad_norm": 0.9441247582435608, "learning_rate": 8.829808861178943e-05, "loss": 0.4325, "step": 15670 }, { "epoch": 43.55555555555556, "grad_norm": 1.0393775701522827, "learning_rate": 8.828036624911464e-05, "loss": 0.4185, "step": 15680 }, { "epoch": 43.583333333333336, "grad_norm": 0.9398859143257141, "learning_rate": 8.826263225787532e-05, "loss": 0.442, "step": 15690 }, { "epoch": 43.611111111111114, "grad_norm": 0.9302213191986084, "learning_rate": 8.824488664345858e-05, "loss": 0.4292, "step": 15700 }, { "epoch": 43.638888888888886, "grad_norm": 0.917304515838623, "learning_rate": 8.822712941125508e-05, "loss": 0.4376, "step": 15710 }, { "epoch": 43.666666666666664, "grad_norm": 0.9163775444030762, "learning_rate": 8.820936056665898e-05, "loss": 0.4315, "step": 15720 }, { "epoch": 43.69444444444444, "grad_norm": 1.0067847967147827, "learning_rate": 8.819158011506801e-05, "loss": 0.4213, "step": 15730 }, { "epoch": 43.72222222222222, "grad_norm": 0.9413975477218628, "learning_rate": 8.81737880618834e-05, "loss": 0.4395, "step": 15740 }, { "epoch": 43.75, "grad_norm": 1.04986572265625, "learning_rate": 8.815598441250987e-05, "loss": 0.4232, "step": 15750 }, { "epoch": 43.77777777777778, "grad_norm": 1.040224313735962, "learning_rate": 8.813816917235576e-05, "loss": 0.4402, "step": 15760 }, { "epoch": 43.80555555555556, "grad_norm": 0.9656763672828674, "learning_rate": 8.812034234683282e-05, "loss": 0.4428, "step": 15770 }, { "epoch": 43.833333333333336, "grad_norm": 1.0770927667617798, "learning_rate": 8.810250394135637e-05, "loss": 0.4439, "step": 15780 }, { "epoch": 43.861111111111114, "grad_norm": 0.968833327293396, "learning_rate": 8.808465396134529e-05, "loss": 0.4464, "step": 15790 }, { "epoch": 43.888888888888886, "grad_norm": 1.0621583461761475, "learning_rate": 8.806679241222189e-05, "loss": 0.4347, "step": 15800 }, { "epoch": 43.916666666666664, "grad_norm": 0.9696939587593079, "learning_rate": 8.804891929941203e-05, "loss": 0.4424, "step": 15810 }, { "epoch": 43.94444444444444, "grad_norm": 1.0380024909973145, "learning_rate": 8.803103462834514e-05, "loss": 0.4545, "step": 15820 }, { "epoch": 43.97222222222222, "grad_norm": 1.0151057243347168, "learning_rate": 8.801313840445408e-05, "loss": 0.4487, "step": 15830 }, { "epoch": 44.0, "grad_norm": 0.9208022952079773, "learning_rate": 8.799523063317524e-05, "loss": 0.4185, "step": 15840 }, { "epoch": 44.02777777777778, "grad_norm": 0.8839237093925476, "learning_rate": 8.797731131994854e-05, "loss": 0.4288, "step": 15850 }, { "epoch": 44.05555555555556, "grad_norm": 0.9086605310440063, "learning_rate": 8.795938047021739e-05, "loss": 0.4237, "step": 15860 }, { "epoch": 44.083333333333336, "grad_norm": 0.9699338674545288, "learning_rate": 8.794143808942872e-05, "loss": 0.4277, "step": 15870 }, { "epoch": 44.111111111111114, "grad_norm": 1.0166763067245483, "learning_rate": 8.792348418303296e-05, "loss": 0.4428, "step": 15880 }, { "epoch": 44.138888888888886, "grad_norm": 0.942548930644989, "learning_rate": 8.790551875648398e-05, "loss": 0.418, "step": 15890 }, { "epoch": 44.166666666666664, "grad_norm": 0.8962242007255554, "learning_rate": 8.788754181523926e-05, "loss": 0.4353, "step": 15900 }, { "epoch": 44.19444444444444, "grad_norm": 0.9304192662239075, "learning_rate": 8.78695533647597e-05, "loss": 0.4407, "step": 15910 }, { "epoch": 44.22222222222222, "grad_norm": 0.9040632247924805, "learning_rate": 8.785155341050972e-05, "loss": 0.4242, "step": 15920 }, { "epoch": 44.25, "grad_norm": 0.9436600804328918, "learning_rate": 8.783354195795721e-05, "loss": 0.4607, "step": 15930 }, { "epoch": 44.27777777777778, "grad_norm": 0.9845672845840454, "learning_rate": 8.78155190125736e-05, "loss": 0.4209, "step": 15940 }, { "epoch": 44.30555555555556, "grad_norm": 0.9383752942085266, "learning_rate": 8.779748457983378e-05, "loss": 0.4358, "step": 15950 }, { "epoch": 44.333333333333336, "grad_norm": 0.9188030362129211, "learning_rate": 8.777943866521612e-05, "loss": 0.4374, "step": 15960 }, { "epoch": 44.361111111111114, "grad_norm": 1.075286865234375, "learning_rate": 8.77613812742025e-05, "loss": 0.4268, "step": 15970 }, { "epoch": 44.388888888888886, "grad_norm": 1.0406895875930786, "learning_rate": 8.774331241227829e-05, "loss": 0.4296, "step": 15980 }, { "epoch": 44.416666666666664, "grad_norm": 0.9244762659072876, "learning_rate": 8.772523208493232e-05, "loss": 0.4202, "step": 15990 }, { "epoch": 44.44444444444444, "grad_norm": 0.9299258589744568, "learning_rate": 8.770714029765692e-05, "loss": 0.4334, "step": 16000 }, { "epoch": 44.47222222222222, "grad_norm": 0.9027380347251892, "learning_rate": 8.768903705594789e-05, "loss": 0.4229, "step": 16010 }, { "epoch": 44.5, "grad_norm": 0.9301236867904663, "learning_rate": 8.767092236530453e-05, "loss": 0.4154, "step": 16020 }, { "epoch": 44.52777777777778, "grad_norm": 0.9084728360176086, "learning_rate": 8.76527962312296e-05, "loss": 0.4231, "step": 16030 }, { "epoch": 44.55555555555556, "grad_norm": 0.9051267504692078, "learning_rate": 8.763465865922934e-05, "loss": 0.4299, "step": 16040 }, { "epoch": 44.583333333333336, "grad_norm": 0.9087299108505249, "learning_rate": 8.761650965481347e-05, "loss": 0.442, "step": 16050 }, { "epoch": 44.611111111111114, "grad_norm": 1.0740177631378174, "learning_rate": 8.759834922349516e-05, "loss": 0.4327, "step": 16060 }, { "epoch": 44.638888888888886, "grad_norm": 0.9739348888397217, "learning_rate": 8.758017737079108e-05, "loss": 0.4254, "step": 16070 }, { "epoch": 44.666666666666664, "grad_norm": 0.9383732676506042, "learning_rate": 8.756199410222137e-05, "loss": 0.446, "step": 16080 }, { "epoch": 44.69444444444444, "grad_norm": 0.9427518248558044, "learning_rate": 8.754379942330963e-05, "loss": 0.4331, "step": 16090 }, { "epoch": 44.72222222222222, "grad_norm": 0.9625463485717773, "learning_rate": 8.75255933395829e-05, "loss": 0.4299, "step": 16100 }, { "epoch": 44.75, "grad_norm": 0.9881193041801453, "learning_rate": 8.750737585657171e-05, "loss": 0.4233, "step": 16110 }, { "epoch": 44.77777777777778, "grad_norm": 0.9516944289207458, "learning_rate": 8.748914697981008e-05, "loss": 0.4278, "step": 16120 }, { "epoch": 44.80555555555556, "grad_norm": 0.9412126541137695, "learning_rate": 8.747090671483542e-05, "loss": 0.4229, "step": 16130 }, { "epoch": 44.833333333333336, "grad_norm": 0.9833455085754395, "learning_rate": 8.745265506718869e-05, "loss": 0.443, "step": 16140 }, { "epoch": 44.861111111111114, "grad_norm": 0.9460441470146179, "learning_rate": 8.74343920424142e-05, "loss": 0.4209, "step": 16150 }, { "epoch": 44.888888888888886, "grad_norm": 0.9416811466217041, "learning_rate": 8.741611764605982e-05, "loss": 0.443, "step": 16160 }, { "epoch": 44.916666666666664, "grad_norm": 0.9920147657394409, "learning_rate": 8.739783188367682e-05, "loss": 0.4241, "step": 16170 }, { "epoch": 44.94444444444444, "grad_norm": 0.959662675857544, "learning_rate": 8.737953476081991e-05, "loss": 0.4437, "step": 16180 }, { "epoch": 44.97222222222222, "grad_norm": 0.9234309196472168, "learning_rate": 8.73612262830473e-05, "loss": 0.4249, "step": 16190 }, { "epoch": 45.0, "grad_norm": 0.9198420643806458, "learning_rate": 8.734290645592061e-05, "loss": 0.4476, "step": 16200 }, { "epoch": 45.02777777777778, "grad_norm": 0.9408969879150391, "learning_rate": 8.732457528500493e-05, "loss": 0.4116, "step": 16210 }, { "epoch": 45.05555555555556, "grad_norm": 0.9941111207008362, "learning_rate": 8.730623277586875e-05, "loss": 0.4178, "step": 16220 }, { "epoch": 45.083333333333336, "grad_norm": 1.0800466537475586, "learning_rate": 8.72878789340841e-05, "loss": 0.4333, "step": 16230 }, { "epoch": 45.111111111111114, "grad_norm": 1.2430535554885864, "learning_rate": 8.726951376522635e-05, "loss": 0.4306, "step": 16240 }, { "epoch": 45.138888888888886, "grad_norm": 0.9304962754249573, "learning_rate": 8.725113727487435e-05, "loss": 0.4224, "step": 16250 }, { "epoch": 45.166666666666664, "grad_norm": 0.9021289348602295, "learning_rate": 8.723274946861042e-05, "loss": 0.4378, "step": 16260 }, { "epoch": 45.19444444444444, "grad_norm": 0.8518134355545044, "learning_rate": 8.721435035202026e-05, "loss": 0.4151, "step": 16270 }, { "epoch": 45.22222222222222, "grad_norm": 1.0034804344177246, "learning_rate": 8.719593993069306e-05, "loss": 0.4257, "step": 16280 }, { "epoch": 45.25, "grad_norm": 0.9683818221092224, "learning_rate": 8.717751821022139e-05, "loss": 0.4386, "step": 16290 }, { "epoch": 45.27777777777778, "grad_norm": 0.9008449912071228, "learning_rate": 8.715908519620134e-05, "loss": 0.4265, "step": 16300 }, { "epoch": 45.30555555555556, "grad_norm": 0.9532806873321533, "learning_rate": 8.71406408942323e-05, "loss": 0.4387, "step": 16310 }, { "epoch": 45.333333333333336, "grad_norm": 0.9065651297569275, "learning_rate": 8.712218530991723e-05, "loss": 0.4324, "step": 16320 }, { "epoch": 45.361111111111114, "grad_norm": 0.88138347864151, "learning_rate": 8.710371844886241e-05, "loss": 0.4302, "step": 16330 }, { "epoch": 45.388888888888886, "grad_norm": 1.317144751548767, "learning_rate": 8.708524031667758e-05, "loss": 0.4221, "step": 16340 }, { "epoch": 45.416666666666664, "grad_norm": 0.9918212890625, "learning_rate": 8.706675091897592e-05, "loss": 0.4209, "step": 16350 }, { "epoch": 45.44444444444444, "grad_norm": 0.8384910225868225, "learning_rate": 8.704825026137404e-05, "loss": 0.4262, "step": 16360 }, { "epoch": 45.47222222222222, "grad_norm": 0.9784674048423767, "learning_rate": 8.702973834949192e-05, "loss": 0.4246, "step": 16370 }, { "epoch": 45.5, "grad_norm": 1.0472217798233032, "learning_rate": 8.701121518895301e-05, "loss": 0.4338, "step": 16380 }, { "epoch": 45.52777777777778, "grad_norm": 0.9982578754425049, "learning_rate": 8.699268078538414e-05, "loss": 0.4261, "step": 16390 }, { "epoch": 45.55555555555556, "grad_norm": 0.9509432911872864, "learning_rate": 8.69741351444156e-05, "loss": 0.4286, "step": 16400 }, { "epoch": 45.583333333333336, "grad_norm": 0.9420770406723022, "learning_rate": 8.695557827168101e-05, "loss": 0.4367, "step": 16410 }, { "epoch": 45.611111111111114, "grad_norm": 0.9580206274986267, "learning_rate": 8.693701017281753e-05, "loss": 0.4204, "step": 16420 }, { "epoch": 45.638888888888886, "grad_norm": 0.8688709139823914, "learning_rate": 8.691843085346563e-05, "loss": 0.4152, "step": 16430 }, { "epoch": 45.666666666666664, "grad_norm": 1.0067473649978638, "learning_rate": 8.689984031926919e-05, "loss": 0.4229, "step": 16440 }, { "epoch": 45.69444444444444, "grad_norm": 0.8580542802810669, "learning_rate": 8.688123857587555e-05, "loss": 0.4145, "step": 16450 }, { "epoch": 45.72222222222222, "grad_norm": 1.005967378616333, "learning_rate": 8.686262562893544e-05, "loss": 0.4258, "step": 16460 }, { "epoch": 45.75, "grad_norm": 0.9415436387062073, "learning_rate": 8.684400148410294e-05, "loss": 0.4302, "step": 16470 }, { "epoch": 45.77777777777778, "grad_norm": 0.9164429903030396, "learning_rate": 8.682536614703562e-05, "loss": 0.4267, "step": 16480 }, { "epoch": 45.80555555555556, "grad_norm": 1.0147212743759155, "learning_rate": 8.680671962339437e-05, "loss": 0.4371, "step": 16490 }, { "epoch": 45.833333333333336, "grad_norm": 0.8767426609992981, "learning_rate": 8.678806191884352e-05, "loss": 0.4303, "step": 16500 }, { "epoch": 45.861111111111114, "grad_norm": 0.9417939782142639, "learning_rate": 8.67693930390508e-05, "loss": 0.4161, "step": 16510 }, { "epoch": 45.888888888888886, "grad_norm": 0.9227451682090759, "learning_rate": 8.67507129896873e-05, "loss": 0.4236, "step": 16520 }, { "epoch": 45.916666666666664, "grad_norm": 0.8221470713615417, "learning_rate": 8.673202177642757e-05, "loss": 0.4184, "step": 16530 }, { "epoch": 45.94444444444444, "grad_norm": 0.9904474020004272, "learning_rate": 8.671331940494945e-05, "loss": 0.4312, "step": 16540 }, { "epoch": 45.97222222222222, "grad_norm": 0.9185703992843628, "learning_rate": 8.669460588093427e-05, "loss": 0.4339, "step": 16550 }, { "epoch": 46.0, "grad_norm": 0.8771454691886902, "learning_rate": 8.667588121006667e-05, "loss": 0.4134, "step": 16560 }, { "epoch": 46.02777777777778, "grad_norm": 0.882063627243042, "learning_rate": 8.665714539803475e-05, "loss": 0.4288, "step": 16570 }, { "epoch": 46.05555555555556, "grad_norm": 0.94566810131073, "learning_rate": 8.663839845052993e-05, "loss": 0.4433, "step": 16580 }, { "epoch": 46.083333333333336, "grad_norm": 1.003836750984192, "learning_rate": 8.661964037324703e-05, "loss": 0.424, "step": 16590 }, { "epoch": 46.111111111111114, "grad_norm": 1.0045000314712524, "learning_rate": 8.660087117188427e-05, "loss": 0.4211, "step": 16600 }, { "epoch": 46.138888888888886, "grad_norm": 0.9710845351219177, "learning_rate": 8.658209085214325e-05, "loss": 0.4328, "step": 16610 }, { "epoch": 46.166666666666664, "grad_norm": 1.0418941974639893, "learning_rate": 8.656329941972891e-05, "loss": 0.424, "step": 16620 }, { "epoch": 46.19444444444444, "grad_norm": 0.9480881690979004, "learning_rate": 8.654449688034963e-05, "loss": 0.4224, "step": 16630 }, { "epoch": 46.22222222222222, "grad_norm": 0.9350104928016663, "learning_rate": 8.652568323971706e-05, "loss": 0.4249, "step": 16640 }, { "epoch": 46.25, "grad_norm": 0.9301325678825378, "learning_rate": 8.650685850354636e-05, "loss": 0.4274, "step": 16650 }, { "epoch": 46.27777777777778, "grad_norm": 0.8905588984489441, "learning_rate": 8.648802267755593e-05, "loss": 0.422, "step": 16660 }, { "epoch": 46.30555555555556, "grad_norm": 0.9650471806526184, "learning_rate": 8.646917576746764e-05, "loss": 0.4301, "step": 16670 }, { "epoch": 46.333333333333336, "grad_norm": 0.860927402973175, "learning_rate": 8.645031777900666e-05, "loss": 0.4223, "step": 16680 }, { "epoch": 46.361111111111114, "grad_norm": 0.9284091591835022, "learning_rate": 8.643144871790154e-05, "loss": 0.4151, "step": 16690 }, { "epoch": 46.388888888888886, "grad_norm": 1.015234112739563, "learning_rate": 8.641256858988424e-05, "loss": 0.4342, "step": 16700 }, { "epoch": 46.416666666666664, "grad_norm": 0.8521730303764343, "learning_rate": 8.639367740069e-05, "loss": 0.4159, "step": 16710 }, { "epoch": 46.44444444444444, "grad_norm": 0.9030424952507019, "learning_rate": 8.63747751560575e-05, "loss": 0.4268, "step": 16720 }, { "epoch": 46.47222222222222, "grad_norm": 1.011919617652893, "learning_rate": 8.635586186172871e-05, "loss": 0.4207, "step": 16730 }, { "epoch": 46.5, "grad_norm": 0.8715218305587769, "learning_rate": 8.633693752344902e-05, "loss": 0.4205, "step": 16740 }, { "epoch": 46.52777777777778, "grad_norm": 0.9386000633239746, "learning_rate": 8.631800214696713e-05, "loss": 0.419, "step": 16750 }, { "epoch": 46.55555555555556, "grad_norm": 1.0022157430648804, "learning_rate": 8.629905573803511e-05, "loss": 0.431, "step": 16760 }, { "epoch": 46.583333333333336, "grad_norm": 0.9710389971733093, "learning_rate": 8.628009830240839e-05, "loss": 0.4259, "step": 16770 }, { "epoch": 46.611111111111114, "grad_norm": 0.9531349539756775, "learning_rate": 8.626112984584571e-05, "loss": 0.4188, "step": 16780 }, { "epoch": 46.638888888888886, "grad_norm": 0.883183479309082, "learning_rate": 8.62421503741092e-05, "loss": 0.4235, "step": 16790 }, { "epoch": 46.666666666666664, "grad_norm": 0.8836976885795593, "learning_rate": 8.622315989296432e-05, "loss": 0.4246, "step": 16800 }, { "epoch": 46.69444444444444, "grad_norm": 0.9326961636543274, "learning_rate": 8.62041584081799e-05, "loss": 0.4261, "step": 16810 }, { "epoch": 46.72222222222222, "grad_norm": 0.8993636965751648, "learning_rate": 8.618514592552807e-05, "loss": 0.4339, "step": 16820 }, { "epoch": 46.75, "grad_norm": 1.202780842781067, "learning_rate": 8.616612245078431e-05, "loss": 0.4241, "step": 16830 }, { "epoch": 46.77777777777778, "grad_norm": 1.1146106719970703, "learning_rate": 8.614708798972746e-05, "loss": 0.4177, "step": 16840 }, { "epoch": 46.80555555555556, "grad_norm": 1.0546807050704956, "learning_rate": 8.61280425481397e-05, "loss": 0.4265, "step": 16850 }, { "epoch": 46.833333333333336, "grad_norm": 0.9110280275344849, "learning_rate": 8.61089861318065e-05, "loss": 0.4186, "step": 16860 }, { "epoch": 46.861111111111114, "grad_norm": 0.9186534285545349, "learning_rate": 8.608991874651673e-05, "loss": 0.4059, "step": 16870 }, { "epoch": 46.888888888888886, "grad_norm": 0.9660666584968567, "learning_rate": 8.607084039806255e-05, "loss": 0.4268, "step": 16880 }, { "epoch": 46.916666666666664, "grad_norm": 1.0355039834976196, "learning_rate": 8.605175109223944e-05, "loss": 0.4266, "step": 16890 }, { "epoch": 46.94444444444444, "grad_norm": 1.097741961479187, "learning_rate": 8.603265083484624e-05, "loss": 0.4502, "step": 16900 }, { "epoch": 46.97222222222222, "grad_norm": 0.96333909034729, "learning_rate": 8.60135396316851e-05, "loss": 0.426, "step": 16910 }, { "epoch": 47.0, "grad_norm": 1.0305421352386475, "learning_rate": 8.599441748856152e-05, "loss": 0.4166, "step": 16920 }, { "epoch": 47.02777777777778, "grad_norm": 0.965178370475769, "learning_rate": 8.597528441128427e-05, "loss": 0.4207, "step": 16930 }, { "epoch": 47.05555555555556, "grad_norm": 0.9455260634422302, "learning_rate": 8.595614040566549e-05, "loss": 0.4278, "step": 16940 }, { "epoch": 47.083333333333336, "grad_norm": 0.9581805467605591, "learning_rate": 8.593698547752063e-05, "loss": 0.4078, "step": 16950 }, { "epoch": 47.111111111111114, "grad_norm": 0.8906053900718689, "learning_rate": 8.591781963266843e-05, "loss": 0.4306, "step": 16960 }, { "epoch": 47.138888888888886, "grad_norm": 1.0253243446350098, "learning_rate": 8.5898642876931e-05, "loss": 0.4295, "step": 16970 }, { "epoch": 47.166666666666664, "grad_norm": 0.9197062849998474, "learning_rate": 8.587945521613369e-05, "loss": 0.4162, "step": 16980 }, { "epoch": 47.19444444444444, "grad_norm": 1.0166668891906738, "learning_rate": 8.586025665610524e-05, "loss": 0.422, "step": 16990 }, { "epoch": 47.22222222222222, "grad_norm": 0.9486523270606995, "learning_rate": 8.584104720267765e-05, "loss": 0.4384, "step": 17000 }, { "epoch": 47.25, "grad_norm": 0.880485475063324, "learning_rate": 8.582182686168625e-05, "loss": 0.4142, "step": 17010 }, { "epoch": 47.27777777777778, "grad_norm": 0.9764683842658997, "learning_rate": 8.580259563896967e-05, "loss": 0.429, "step": 17020 }, { "epoch": 47.30555555555556, "grad_norm": 0.992422878742218, "learning_rate": 8.578335354036983e-05, "loss": 0.4273, "step": 17030 }, { "epoch": 47.333333333333336, "grad_norm": 0.9280513525009155, "learning_rate": 8.576410057173201e-05, "loss": 0.4227, "step": 17040 }, { "epoch": 47.361111111111114, "grad_norm": 0.9325366020202637, "learning_rate": 8.574483673890474e-05, "loss": 0.4099, "step": 17050 }, { "epoch": 47.388888888888886, "grad_norm": 0.9867041707038879, "learning_rate": 8.572556204773983e-05, "loss": 0.4255, "step": 17060 }, { "epoch": 47.416666666666664, "grad_norm": 0.9657875299453735, "learning_rate": 8.570627650409246e-05, "loss": 0.4256, "step": 17070 }, { "epoch": 47.44444444444444, "grad_norm": 0.90130215883255, "learning_rate": 8.568698011382107e-05, "loss": 0.4406, "step": 17080 }, { "epoch": 47.47222222222222, "grad_norm": 0.9096490144729614, "learning_rate": 8.566767288278738e-05, "loss": 0.4101, "step": 17090 }, { "epoch": 47.5, "grad_norm": 0.8741828799247742, "learning_rate": 8.56483548168564e-05, "loss": 0.427, "step": 17100 }, { "epoch": 47.52777777777778, "grad_norm": 0.9453229904174805, "learning_rate": 8.562902592189648e-05, "loss": 0.4356, "step": 17110 }, { "epoch": 47.55555555555556, "grad_norm": 0.8867805004119873, "learning_rate": 8.560968620377921e-05, "loss": 0.4239, "step": 17120 }, { "epoch": 47.583333333333336, "grad_norm": 0.9935838580131531, "learning_rate": 8.559033566837951e-05, "loss": 0.4388, "step": 17130 }, { "epoch": 47.611111111111114, "grad_norm": 0.927003800868988, "learning_rate": 8.557097432157551e-05, "loss": 0.4211, "step": 17140 }, { "epoch": 47.638888888888886, "grad_norm": 0.9030565619468689, "learning_rate": 8.555160216924872e-05, "loss": 0.4294, "step": 17150 }, { "epoch": 47.666666666666664, "grad_norm": 0.8997489809989929, "learning_rate": 8.55322192172839e-05, "loss": 0.4226, "step": 17160 }, { "epoch": 47.69444444444444, "grad_norm": 0.9639886021614075, "learning_rate": 8.551282547156902e-05, "loss": 0.429, "step": 17170 }, { "epoch": 47.72222222222222, "grad_norm": 1.0806025266647339, "learning_rate": 8.549342093799544e-05, "loss": 0.4312, "step": 17180 }, { "epoch": 47.75, "grad_norm": 0.9444421529769897, "learning_rate": 8.547400562245773e-05, "loss": 0.4174, "step": 17190 }, { "epoch": 47.77777777777778, "grad_norm": 1.2358028888702393, "learning_rate": 8.545457953085374e-05, "loss": 0.4253, "step": 17200 }, { "epoch": 47.80555555555556, "grad_norm": 0.9483662247657776, "learning_rate": 8.543514266908463e-05, "loss": 0.4274, "step": 17210 }, { "epoch": 47.833333333333336, "grad_norm": 0.8504269123077393, "learning_rate": 8.541569504305478e-05, "loss": 0.4248, "step": 17220 }, { "epoch": 47.861111111111114, "grad_norm": 0.9647966623306274, "learning_rate": 8.539623665867187e-05, "loss": 0.4319, "step": 17230 }, { "epoch": 47.888888888888886, "grad_norm": 0.8794550895690918, "learning_rate": 8.537676752184685e-05, "loss": 0.4151, "step": 17240 }, { "epoch": 47.916666666666664, "grad_norm": 0.9000657796859741, "learning_rate": 8.53572876384939e-05, "loss": 0.4242, "step": 17250 }, { "epoch": 47.94444444444444, "grad_norm": 0.9433192014694214, "learning_rate": 8.533779701453056e-05, "loss": 0.4195, "step": 17260 }, { "epoch": 47.97222222222222, "grad_norm": 1.0135650634765625, "learning_rate": 8.53182956558775e-05, "loss": 0.433, "step": 17270 }, { "epoch": 48.0, "grad_norm": 0.8912717700004578, "learning_rate": 8.529878356845877e-05, "loss": 0.437, "step": 17280 }, { "epoch": 48.02777777777778, "grad_norm": 0.9315609931945801, "learning_rate": 8.527926075820158e-05, "loss": 0.4181, "step": 17290 }, { "epoch": 48.05555555555556, "grad_norm": 0.9264905452728271, "learning_rate": 8.525972723103648e-05, "loss": 0.4368, "step": 17300 }, { "epoch": 48.083333333333336, "grad_norm": 0.9959391951560974, "learning_rate": 8.524018299289722e-05, "loss": 0.4194, "step": 17310 }, { "epoch": 48.111111111111114, "grad_norm": 0.8369890451431274, "learning_rate": 8.522062804972083e-05, "loss": 0.4158, "step": 17320 }, { "epoch": 48.138888888888886, "grad_norm": 1.070787787437439, "learning_rate": 8.520106240744759e-05, "loss": 0.44, "step": 17330 }, { "epoch": 48.166666666666664, "grad_norm": 0.9919176697731018, "learning_rate": 8.518148607202102e-05, "loss": 0.4283, "step": 17340 }, { "epoch": 48.19444444444444, "grad_norm": 0.9451998472213745, "learning_rate": 8.51618990493879e-05, "loss": 0.43, "step": 17350 }, { "epoch": 48.22222222222222, "grad_norm": 0.8570332527160645, "learning_rate": 8.514230134549823e-05, "loss": 0.4298, "step": 17360 }, { "epoch": 48.25, "grad_norm": 0.8598370552062988, "learning_rate": 8.51226929663053e-05, "loss": 0.4095, "step": 17370 }, { "epoch": 48.27777777777778, "grad_norm": 0.8686737418174744, "learning_rate": 8.51030739177656e-05, "loss": 0.4158, "step": 17380 }, { "epoch": 48.30555555555556, "grad_norm": 0.9076867699623108, "learning_rate": 8.508344420583889e-05, "loss": 0.4178, "step": 17390 }, { "epoch": 48.333333333333336, "grad_norm": 0.8441874980926514, "learning_rate": 8.506380383648816e-05, "loss": 0.4115, "step": 17400 }, { "epoch": 48.361111111111114, "grad_norm": 1.0170507431030273, "learning_rate": 8.504415281567963e-05, "loss": 0.4322, "step": 17410 }, { "epoch": 48.388888888888886, "grad_norm": 1.0828900337219238, "learning_rate": 8.502449114938275e-05, "loss": 0.44, "step": 17420 }, { "epoch": 48.416666666666664, "grad_norm": 1.0051491260528564, "learning_rate": 8.500481884357025e-05, "loss": 0.4254, "step": 17430 }, { "epoch": 48.44444444444444, "grad_norm": 0.9548107385635376, "learning_rate": 8.498513590421801e-05, "loss": 0.4223, "step": 17440 }, { "epoch": 48.47222222222222, "grad_norm": 0.9325215816497803, "learning_rate": 8.496544233730522e-05, "loss": 0.4162, "step": 17450 }, { "epoch": 48.5, "grad_norm": 0.9446883201599121, "learning_rate": 8.494573814881426e-05, "loss": 0.4228, "step": 17460 }, { "epoch": 48.52777777777778, "grad_norm": 0.912699282169342, "learning_rate": 8.492602334473074e-05, "loss": 0.415, "step": 17470 }, { "epoch": 48.55555555555556, "grad_norm": 0.8835820555686951, "learning_rate": 8.49062979310435e-05, "loss": 0.4319, "step": 17480 }, { "epoch": 48.583333333333336, "grad_norm": 0.9277591109275818, "learning_rate": 8.488656191374458e-05, "loss": 0.4173, "step": 17490 }, { "epoch": 48.611111111111114, "grad_norm": 0.9737343788146973, "learning_rate": 8.48668152988293e-05, "loss": 0.4097, "step": 17500 }, { "epoch": 48.638888888888886, "grad_norm": 0.9362115859985352, "learning_rate": 8.484705809229612e-05, "loss": 0.407, "step": 17510 }, { "epoch": 48.666666666666664, "grad_norm": 0.9207050204277039, "learning_rate": 8.482729030014677e-05, "loss": 0.4158, "step": 17520 }, { "epoch": 48.69444444444444, "grad_norm": 1.119404673576355, "learning_rate": 8.48075119283862e-05, "loss": 0.4222, "step": 17530 }, { "epoch": 48.72222222222222, "grad_norm": 1.0955305099487305, "learning_rate": 8.478772298302254e-05, "loss": 0.4364, "step": 17540 }, { "epoch": 48.75, "grad_norm": 1.1370455026626587, "learning_rate": 8.476792347006716e-05, "loss": 0.4289, "step": 17550 }, { "epoch": 48.77777777777778, "grad_norm": 0.9810451865196228, "learning_rate": 8.474811339553462e-05, "loss": 0.4293, "step": 17560 }, { "epoch": 48.80555555555556, "grad_norm": 0.904763400554657, "learning_rate": 8.47282927654427e-05, "loss": 0.4248, "step": 17570 }, { "epoch": 48.833333333333336, "grad_norm": 0.961844801902771, "learning_rate": 8.470846158581238e-05, "loss": 0.4417, "step": 17580 }, { "epoch": 48.861111111111114, "grad_norm": 0.8774029016494751, "learning_rate": 8.468861986266787e-05, "loss": 0.4172, "step": 17590 }, { "epoch": 48.888888888888886, "grad_norm": 0.938909113407135, "learning_rate": 8.466876760203654e-05, "loss": 0.4105, "step": 17600 }, { "epoch": 48.916666666666664, "grad_norm": 0.8336055874824524, "learning_rate": 8.464890480994898e-05, "loss": 0.4041, "step": 17610 }, { "epoch": 48.94444444444444, "grad_norm": 0.920473575592041, "learning_rate": 8.462903149243899e-05, "loss": 0.4201, "step": 17620 }, { "epoch": 48.97222222222222, "grad_norm": 1.0206794738769531, "learning_rate": 8.460914765554357e-05, "loss": 0.4193, "step": 17630 }, { "epoch": 49.0, "grad_norm": 0.9569054841995239, "learning_rate": 8.458925330530288e-05, "loss": 0.4268, "step": 17640 }, { "epoch": 49.02777777777778, "grad_norm": 0.9164972901344299, "learning_rate": 8.456934844776032e-05, "loss": 0.415, "step": 17650 }, { "epoch": 49.05555555555556, "grad_norm": 0.9351978898048401, "learning_rate": 8.454943308896246e-05, "loss": 0.4244, "step": 17660 }, { "epoch": 49.083333333333336, "grad_norm": 0.9735944867134094, "learning_rate": 8.452950723495905e-05, "loss": 0.4299, "step": 17670 }, { "epoch": 49.111111111111114, "grad_norm": 0.8994882702827454, "learning_rate": 8.450957089180303e-05, "loss": 0.4143, "step": 17680 }, { "epoch": 49.138888888888886, "grad_norm": 0.9317067265510559, "learning_rate": 8.448962406555055e-05, "loss": 0.4086, "step": 17690 }, { "epoch": 49.166666666666664, "grad_norm": 1.0248125791549683, "learning_rate": 8.446966676226093e-05, "loss": 0.4407, "step": 17700 }, { "epoch": 49.19444444444444, "grad_norm": 0.9329915642738342, "learning_rate": 8.444969898799667e-05, "loss": 0.4259, "step": 17710 }, { "epoch": 49.22222222222222, "grad_norm": 0.8886683583259583, "learning_rate": 8.442972074882343e-05, "loss": 0.4243, "step": 17720 }, { "epoch": 49.25, "grad_norm": 0.9443854093551636, "learning_rate": 8.44097320508101e-05, "loss": 0.431, "step": 17730 }, { "epoch": 49.27777777777778, "grad_norm": 0.9470022916793823, "learning_rate": 8.43897329000287e-05, "loss": 0.4228, "step": 17740 }, { "epoch": 49.30555555555556, "grad_norm": 0.8920340538024902, "learning_rate": 8.436972330255448e-05, "loss": 0.4069, "step": 17750 }, { "epoch": 49.333333333333336, "grad_norm": 0.9840595126152039, "learning_rate": 8.434970326446579e-05, "loss": 0.4133, "step": 17760 }, { "epoch": 49.361111111111114, "grad_norm": 0.9624890685081482, "learning_rate": 8.432967279184418e-05, "loss": 0.4307, "step": 17770 }, { "epoch": 49.388888888888886, "grad_norm": 0.8972107768058777, "learning_rate": 8.430963189077441e-05, "loss": 0.4162, "step": 17780 }, { "epoch": 49.416666666666664, "grad_norm": 0.9980994462966919, "learning_rate": 8.428958056734437e-05, "loss": 0.4139, "step": 17790 }, { "epoch": 49.44444444444444, "grad_norm": 0.9171987175941467, "learning_rate": 8.426951882764513e-05, "loss": 0.4301, "step": 17800 }, { "epoch": 49.47222222222222, "grad_norm": 0.9370334148406982, "learning_rate": 8.424944667777089e-05, "loss": 0.4286, "step": 17810 }, { "epoch": 49.5, "grad_norm": 1.0136364698410034, "learning_rate": 8.422936412381905e-05, "loss": 0.4195, "step": 17820 }, { "epoch": 49.52777777777778, "grad_norm": 0.9237704873085022, "learning_rate": 8.420927117189017e-05, "loss": 0.4081, "step": 17830 }, { "epoch": 49.55555555555556, "grad_norm": 0.9769731163978577, "learning_rate": 8.418916782808795e-05, "loss": 0.4307, "step": 17840 }, { "epoch": 49.583333333333336, "grad_norm": 0.901103675365448, "learning_rate": 8.416905409851926e-05, "loss": 0.4396, "step": 17850 }, { "epoch": 49.611111111111114, "grad_norm": 1.0272295475006104, "learning_rate": 8.41489299892941e-05, "loss": 0.4174, "step": 17860 }, { "epoch": 49.638888888888886, "grad_norm": 1.0851842164993286, "learning_rate": 8.412879550652566e-05, "loss": 0.419, "step": 17870 }, { "epoch": 49.666666666666664, "grad_norm": 0.913102924823761, "learning_rate": 8.410865065633029e-05, "loss": 0.4263, "step": 17880 }, { "epoch": 49.69444444444444, "grad_norm": 0.8615071773529053, "learning_rate": 8.408849544482742e-05, "loss": 0.4176, "step": 17890 }, { "epoch": 49.72222222222222, "grad_norm": 0.8895493149757385, "learning_rate": 8.406832987813968e-05, "loss": 0.4166, "step": 17900 }, { "epoch": 49.75, "grad_norm": 0.8630537986755371, "learning_rate": 8.404815396239286e-05, "loss": 0.4142, "step": 17910 }, { "epoch": 49.77777777777778, "grad_norm": 0.8754549622535706, "learning_rate": 8.402796770371587e-05, "loss": 0.4308, "step": 17920 }, { "epoch": 49.80555555555556, "grad_norm": 0.9959356188774109, "learning_rate": 8.400777110824071e-05, "loss": 0.4204, "step": 17930 }, { "epoch": 49.833333333333336, "grad_norm": 0.8101836442947388, "learning_rate": 8.398756418210263e-05, "loss": 0.4179, "step": 17940 }, { "epoch": 49.861111111111114, "grad_norm": 0.9576742649078369, "learning_rate": 8.396734693143993e-05, "loss": 0.4116, "step": 17950 }, { "epoch": 49.888888888888886, "grad_norm": 1.0879943370819092, "learning_rate": 8.39471193623941e-05, "loss": 0.4282, "step": 17960 }, { "epoch": 49.916666666666664, "grad_norm": 1.011393666267395, "learning_rate": 8.392688148110974e-05, "loss": 0.4252, "step": 17970 }, { "epoch": 49.94444444444444, "grad_norm": 0.9468705058097839, "learning_rate": 8.390663329373456e-05, "loss": 0.4251, "step": 17980 }, { "epoch": 49.97222222222222, "grad_norm": 0.9098077416419983, "learning_rate": 8.388637480641944e-05, "loss": 0.4299, "step": 17990 }, { "epoch": 50.0, "grad_norm": 1.0253474712371826, "learning_rate": 8.386610602531837e-05, "loss": 0.4291, "step": 18000 }, { "epoch": 50.02777777777778, "grad_norm": 1.0010018348693848, "learning_rate": 8.384582695658847e-05, "loss": 0.4268, "step": 18010 }, { "epoch": 50.05555555555556, "grad_norm": 0.8713262677192688, "learning_rate": 8.382553760638999e-05, "loss": 0.4266, "step": 18020 }, { "epoch": 50.083333333333336, "grad_norm": 0.7707643508911133, "learning_rate": 8.380523798088631e-05, "loss": 0.4228, "step": 18030 }, { "epoch": 50.111111111111114, "grad_norm": 0.888548731803894, "learning_rate": 8.378492808624389e-05, "loss": 0.4214, "step": 18040 }, { "epoch": 50.138888888888886, "grad_norm": 0.8794466257095337, "learning_rate": 8.376460792863237e-05, "loss": 0.4237, "step": 18050 }, { "epoch": 50.166666666666664, "grad_norm": 0.9235266447067261, "learning_rate": 8.374427751422444e-05, "loss": 0.4219, "step": 18060 }, { "epoch": 50.19444444444444, "grad_norm": 0.8565009832382202, "learning_rate": 8.3723936849196e-05, "loss": 0.4142, "step": 18070 }, { "epoch": 50.22222222222222, "grad_norm": 0.945402204990387, "learning_rate": 8.370358593972595e-05, "loss": 0.4185, "step": 18080 }, { "epoch": 50.25, "grad_norm": 0.8532956838607788, "learning_rate": 8.36832247919964e-05, "loss": 0.4066, "step": 18090 }, { "epoch": 50.27777777777778, "grad_norm": 0.8816715478897095, "learning_rate": 8.36628534121925e-05, "loss": 0.4122, "step": 18100 }, { "epoch": 50.30555555555556, "grad_norm": 1.0124456882476807, "learning_rate": 8.364247180650254e-05, "loss": 0.4282, "step": 18110 }, { "epoch": 50.333333333333336, "grad_norm": 0.9936186075210571, "learning_rate": 8.362207998111794e-05, "loss": 0.4218, "step": 18120 }, { "epoch": 50.361111111111114, "grad_norm": 0.8877413272857666, "learning_rate": 8.360167794223318e-05, "loss": 0.4069, "step": 18130 }, { "epoch": 50.388888888888886, "grad_norm": 0.9249780178070068, "learning_rate": 8.358126569604586e-05, "loss": 0.429, "step": 18140 }, { "epoch": 50.416666666666664, "grad_norm": 0.8679052591323853, "learning_rate": 8.356084324875668e-05, "loss": 0.4081, "step": 18150 }, { "epoch": 50.44444444444444, "grad_norm": 0.9796388745307922, "learning_rate": 8.354041060656945e-05, "loss": 0.431, "step": 18160 }, { "epoch": 50.47222222222222, "grad_norm": 0.9257802367210388, "learning_rate": 8.351996777569106e-05, "loss": 0.4054, "step": 18170 }, { "epoch": 50.5, "grad_norm": 0.9250991940498352, "learning_rate": 8.349951476233148e-05, "loss": 0.4243, "step": 18180 }, { "epoch": 50.52777777777778, "grad_norm": 1.0100295543670654, "learning_rate": 8.347905157270386e-05, "loss": 0.432, "step": 18190 }, { "epoch": 50.55555555555556, "grad_norm": 0.9558270573616028, "learning_rate": 8.345857821302432e-05, "loss": 0.4116, "step": 18200 }, { "epoch": 50.583333333333336, "grad_norm": 0.8680241107940674, "learning_rate": 8.343809468951213e-05, "loss": 0.4053, "step": 18210 }, { "epoch": 50.611111111111114, "grad_norm": 0.9914184212684631, "learning_rate": 8.341760100838965e-05, "loss": 0.4172, "step": 18220 }, { "epoch": 50.638888888888886, "grad_norm": 0.9020833373069763, "learning_rate": 8.339709717588233e-05, "loss": 0.4337, "step": 18230 }, { "epoch": 50.666666666666664, "grad_norm": 0.9363406300544739, "learning_rate": 8.33765831982187e-05, "loss": 0.4156, "step": 18240 }, { "epoch": 50.69444444444444, "grad_norm": 0.9646679759025574, "learning_rate": 8.335605908163035e-05, "loss": 0.4306, "step": 18250 }, { "epoch": 50.72222222222222, "grad_norm": 0.9096387028694153, "learning_rate": 8.333552483235196e-05, "loss": 0.4192, "step": 18260 }, { "epoch": 50.75, "grad_norm": 0.9503023028373718, "learning_rate": 8.33149804566213e-05, "loss": 0.4104, "step": 18270 }, { "epoch": 50.77777777777778, "grad_norm": 0.8096219897270203, "learning_rate": 8.329442596067921e-05, "loss": 0.4046, "step": 18280 }, { "epoch": 50.80555555555556, "grad_norm": 0.8024914860725403, "learning_rate": 8.32738613507696e-05, "loss": 0.4179, "step": 18290 }, { "epoch": 50.833333333333336, "grad_norm": 1.11770498752594, "learning_rate": 8.325328663313946e-05, "loss": 0.4228, "step": 18300 }, { "epoch": 50.861111111111114, "grad_norm": 1.050220012664795, "learning_rate": 8.323270181403884e-05, "loss": 0.4192, "step": 18310 }, { "epoch": 50.888888888888886, "grad_norm": 0.8036817908287048, "learning_rate": 8.321210689972086e-05, "loss": 0.3938, "step": 18320 }, { "epoch": 50.916666666666664, "grad_norm": 1.0314773321151733, "learning_rate": 8.319150189644174e-05, "loss": 0.4223, "step": 18330 }, { "epoch": 50.94444444444444, "grad_norm": 0.855839729309082, "learning_rate": 8.31708868104607e-05, "loss": 0.4063, "step": 18340 }, { "epoch": 50.97222222222222, "grad_norm": 0.8422667980194092, "learning_rate": 8.315026164804007e-05, "loss": 0.4051, "step": 18350 }, { "epoch": 51.0, "grad_norm": 0.9332193732261658, "learning_rate": 8.312962641544524e-05, "loss": 0.4204, "step": 18360 }, { "epoch": 51.02777777777778, "grad_norm": 0.9694331288337708, "learning_rate": 8.310898111894465e-05, "loss": 0.4273, "step": 18370 }, { "epoch": 51.05555555555556, "grad_norm": 1.032362937927246, "learning_rate": 8.308832576480977e-05, "loss": 0.4232, "step": 18380 }, { "epoch": 51.083333333333336, "grad_norm": 0.8426670432090759, "learning_rate": 8.306766035931519e-05, "loss": 0.4152, "step": 18390 }, { "epoch": 51.111111111111114, "grad_norm": 0.8726053237915039, "learning_rate": 8.304698490873847e-05, "loss": 0.4311, "step": 18400 }, { "epoch": 51.138888888888886, "grad_norm": 0.9616475105285645, "learning_rate": 8.30262994193603e-05, "loss": 0.4263, "step": 18410 }, { "epoch": 51.166666666666664, "grad_norm": 0.8709384202957153, "learning_rate": 8.300560389746438e-05, "loss": 0.4194, "step": 18420 }, { "epoch": 51.19444444444444, "grad_norm": 0.9013974666595459, "learning_rate": 8.298489834933745e-05, "loss": 0.4198, "step": 18430 }, { "epoch": 51.22222222222222, "grad_norm": 1.0144869089126587, "learning_rate": 8.296418278126934e-05, "loss": 0.4193, "step": 18440 }, { "epoch": 51.25, "grad_norm": 0.9150227904319763, "learning_rate": 8.294345719955284e-05, "loss": 0.425, "step": 18450 }, { "epoch": 51.27777777777778, "grad_norm": 0.9569722414016724, "learning_rate": 8.29227216104839e-05, "loss": 0.4219, "step": 18460 }, { "epoch": 51.30555555555556, "grad_norm": 0.931233286857605, "learning_rate": 8.290197602036137e-05, "loss": 0.4099, "step": 18470 }, { "epoch": 51.333333333333336, "grad_norm": 0.9834687113761902, "learning_rate": 8.288122043548725e-05, "loss": 0.4058, "step": 18480 }, { "epoch": 51.361111111111114, "grad_norm": 0.8713189959526062, "learning_rate": 8.286045486216657e-05, "loss": 0.4237, "step": 18490 }, { "epoch": 51.388888888888886, "grad_norm": 0.9478954076766968, "learning_rate": 8.283967930670733e-05, "loss": 0.4216, "step": 18500 }, { "epoch": 51.416666666666664, "grad_norm": 0.9755882024765015, "learning_rate": 8.281889377542058e-05, "loss": 0.4278, "step": 18510 }, { "epoch": 51.44444444444444, "grad_norm": 0.9334542155265808, "learning_rate": 8.279809827462045e-05, "loss": 0.4166, "step": 18520 }, { "epoch": 51.47222222222222, "grad_norm": 0.8277208805084229, "learning_rate": 8.277729281062402e-05, "loss": 0.4201, "step": 18530 }, { "epoch": 51.5, "grad_norm": 0.9446923136711121, "learning_rate": 8.27564773897515e-05, "loss": 0.4308, "step": 18540 }, { "epoch": 51.52777777777778, "grad_norm": 0.9163891077041626, "learning_rate": 8.273565201832602e-05, "loss": 0.4146, "step": 18550 }, { "epoch": 51.55555555555556, "grad_norm": 0.9056198000907898, "learning_rate": 8.27148167026738e-05, "loss": 0.4273, "step": 18560 }, { "epoch": 51.583333333333336, "grad_norm": 0.8873075842857361, "learning_rate": 8.269397144912405e-05, "loss": 0.4163, "step": 18570 }, { "epoch": 51.611111111111114, "grad_norm": 0.8640052080154419, "learning_rate": 8.267311626400899e-05, "loss": 0.4063, "step": 18580 }, { "epoch": 51.638888888888886, "grad_norm": 1.1188410520553589, "learning_rate": 8.26522511536639e-05, "loss": 0.417, "step": 18590 }, { "epoch": 51.666666666666664, "grad_norm": 0.8533560633659363, "learning_rate": 8.263137612442706e-05, "loss": 0.4274, "step": 18600 }, { "epoch": 51.69444444444444, "grad_norm": 1.1565194129943848, "learning_rate": 8.261049118263971e-05, "loss": 0.427, "step": 18610 }, { "epoch": 51.72222222222222, "grad_norm": 0.9334111213684082, "learning_rate": 8.258959633464619e-05, "loss": 0.413, "step": 18620 }, { "epoch": 51.75, "grad_norm": 0.9353922009468079, "learning_rate": 8.256869158679377e-05, "loss": 0.4151, "step": 18630 }, { "epoch": 51.77777777777778, "grad_norm": 0.860293984413147, "learning_rate": 8.254777694543278e-05, "loss": 0.4072, "step": 18640 }, { "epoch": 51.80555555555556, "grad_norm": 0.9611693024635315, "learning_rate": 8.252685241691651e-05, "loss": 0.4084, "step": 18650 }, { "epoch": 51.833333333333336, "grad_norm": 0.8948472142219543, "learning_rate": 8.250591800760133e-05, "loss": 0.4167, "step": 18660 }, { "epoch": 51.861111111111114, "grad_norm": 1.0842273235321045, "learning_rate": 8.248497372384649e-05, "loss": 0.4396, "step": 18670 }, { "epoch": 51.888888888888886, "grad_norm": 0.8858404755592346, "learning_rate": 8.246401957201437e-05, "loss": 0.414, "step": 18680 }, { "epoch": 51.916666666666664, "grad_norm": 0.9961584210395813, "learning_rate": 8.244305555847027e-05, "loss": 0.4232, "step": 18690 }, { "epoch": 51.94444444444444, "grad_norm": 0.9215061664581299, "learning_rate": 8.24220816895825e-05, "loss": 0.4209, "step": 18700 }, { "epoch": 51.97222222222222, "grad_norm": 0.9649695754051208, "learning_rate": 8.240109797172237e-05, "loss": 0.4187, "step": 18710 }, { "epoch": 52.0, "grad_norm": 0.8251422047615051, "learning_rate": 8.238010441126416e-05, "loss": 0.413, "step": 18720 }, { "epoch": 52.02777777777778, "grad_norm": 0.9057660102844238, "learning_rate": 8.23591010145852e-05, "loss": 0.4114, "step": 18730 }, { "epoch": 52.05555555555556, "grad_norm": 0.9638702869415283, "learning_rate": 8.233808778806571e-05, "loss": 0.4216, "step": 18740 }, { "epoch": 52.083333333333336, "grad_norm": 0.8688040971755981, "learning_rate": 8.231706473808903e-05, "loss": 0.4079, "step": 18750 }, { "epoch": 52.111111111111114, "grad_norm": 0.9070979356765747, "learning_rate": 8.229603187104133e-05, "loss": 0.4092, "step": 18760 }, { "epoch": 52.138888888888886, "grad_norm": 0.9055222272872925, "learning_rate": 8.22749891933119e-05, "loss": 0.4165, "step": 18770 }, { "epoch": 52.166666666666664, "grad_norm": 0.915960967540741, "learning_rate": 8.225393671129291e-05, "loss": 0.411, "step": 18780 }, { "epoch": 52.19444444444444, "grad_norm": 0.8769609332084656, "learning_rate": 8.223287443137957e-05, "loss": 0.4172, "step": 18790 }, { "epoch": 52.22222222222222, "grad_norm": 1.0551645755767822, "learning_rate": 8.221180235997004e-05, "loss": 0.4095, "step": 18800 }, { "epoch": 52.25, "grad_norm": 0.8098524212837219, "learning_rate": 8.219072050346544e-05, "loss": 0.4244, "step": 18810 }, { "epoch": 52.27777777777778, "grad_norm": 0.9623449444770813, "learning_rate": 8.216962886826992e-05, "loss": 0.4264, "step": 18820 }, { "epoch": 52.30555555555556, "grad_norm": 1.0355383157730103, "learning_rate": 8.214852746079054e-05, "loss": 0.4115, "step": 18830 }, { "epoch": 52.333333333333336, "grad_norm": 0.8809351921081543, "learning_rate": 8.212741628743732e-05, "loss": 0.4071, "step": 18840 }, { "epoch": 52.361111111111114, "grad_norm": 0.9319239854812622, "learning_rate": 8.210629535462333e-05, "loss": 0.42, "step": 18850 }, { "epoch": 52.388888888888886, "grad_norm": 0.8748042583465576, "learning_rate": 8.208516466876453e-05, "loss": 0.4059, "step": 18860 }, { "epoch": 52.416666666666664, "grad_norm": 0.878713071346283, "learning_rate": 8.206402423627986e-05, "loss": 0.4116, "step": 18870 }, { "epoch": 52.44444444444444, "grad_norm": 0.8745724558830261, "learning_rate": 8.204287406359124e-05, "loss": 0.423, "step": 18880 }, { "epoch": 52.47222222222222, "grad_norm": 0.8920221328735352, "learning_rate": 8.20217141571235e-05, "loss": 0.4037, "step": 18890 }, { "epoch": 52.5, "grad_norm": 0.9143213033676147, "learning_rate": 8.200054452330449e-05, "loss": 0.4199, "step": 18900 }, { "epoch": 52.52777777777778, "grad_norm": 1.0100420713424683, "learning_rate": 8.197936516856499e-05, "loss": 0.4079, "step": 18910 }, { "epoch": 52.55555555555556, "grad_norm": 0.9464031457901001, "learning_rate": 8.195817609933871e-05, "loss": 0.4363, "step": 18920 }, { "epoch": 52.583333333333336, "grad_norm": 0.9695948362350464, "learning_rate": 8.193697732206233e-05, "loss": 0.4157, "step": 18930 }, { "epoch": 52.611111111111114, "grad_norm": 0.9687411785125732, "learning_rate": 8.19157688431755e-05, "loss": 0.4154, "step": 18940 }, { "epoch": 52.638888888888886, "grad_norm": 0.8963485956192017, "learning_rate": 8.189455066912077e-05, "loss": 0.4099, "step": 18950 }, { "epoch": 52.666666666666664, "grad_norm": 1.1669154167175293, "learning_rate": 8.187332280634369e-05, "loss": 0.4212, "step": 18960 }, { "epoch": 52.69444444444444, "grad_norm": 0.8450939059257507, "learning_rate": 8.18520852612927e-05, "loss": 0.4103, "step": 18970 }, { "epoch": 52.72222222222222, "grad_norm": 0.8724775910377502, "learning_rate": 8.183083804041921e-05, "loss": 0.405, "step": 18980 }, { "epoch": 52.75, "grad_norm": 0.8454248905181885, "learning_rate": 8.180958115017757e-05, "loss": 0.3993, "step": 18990 }, { "epoch": 52.77777777777778, "grad_norm": 0.891711950302124, "learning_rate": 8.178831459702505e-05, "loss": 0.4024, "step": 19000 }, { "epoch": 52.80555555555556, "grad_norm": 0.970490574836731, "learning_rate": 8.17670383874219e-05, "loss": 0.4128, "step": 19010 }, { "epoch": 52.833333333333336, "grad_norm": 0.9434512853622437, "learning_rate": 8.174575252783124e-05, "loss": 0.4171, "step": 19020 }, { "epoch": 52.861111111111114, "grad_norm": 0.9864383339881897, "learning_rate": 8.172445702471914e-05, "loss": 0.418, "step": 19030 }, { "epoch": 52.888888888888886, "grad_norm": 0.9048617482185364, "learning_rate": 8.170315188455466e-05, "loss": 0.4177, "step": 19040 }, { "epoch": 52.916666666666664, "grad_norm": 0.8970410823822021, "learning_rate": 8.168183711380969e-05, "loss": 0.4034, "step": 19050 }, { "epoch": 52.94444444444444, "grad_norm": 0.8246312737464905, "learning_rate": 8.166051271895913e-05, "loss": 0.3961, "step": 19060 }, { "epoch": 52.97222222222222, "grad_norm": 0.9078124165534973, "learning_rate": 8.163917870648075e-05, "loss": 0.4123, "step": 19070 }, { "epoch": 53.0, "grad_norm": 0.8463596701622009, "learning_rate": 8.161783508285526e-05, "loss": 0.4135, "step": 19080 }, { "epoch": 53.02777777777778, "grad_norm": 0.9845765233039856, "learning_rate": 8.159648185456628e-05, "loss": 0.4091, "step": 19090 }, { "epoch": 53.05555555555556, "grad_norm": 1.1391329765319824, "learning_rate": 8.157511902810038e-05, "loss": 0.4225, "step": 19100 }, { "epoch": 53.083333333333336, "grad_norm": 0.8940687775611877, "learning_rate": 8.155374660994701e-05, "loss": 0.4061, "step": 19110 }, { "epoch": 53.111111111111114, "grad_norm": 0.8360753059387207, "learning_rate": 8.153236460659857e-05, "loss": 0.4083, "step": 19120 }, { "epoch": 53.138888888888886, "grad_norm": 0.8998532891273499, "learning_rate": 8.151097302455031e-05, "loss": 0.4115, "step": 19130 }, { "epoch": 53.166666666666664, "grad_norm": 0.8937026858329773, "learning_rate": 8.148957187030044e-05, "loss": 0.4095, "step": 19140 }, { "epoch": 53.19444444444444, "grad_norm": 0.8352879881858826, "learning_rate": 8.146816115035006e-05, "loss": 0.413, "step": 19150 }, { "epoch": 53.22222222222222, "grad_norm": 0.9858429431915283, "learning_rate": 8.14467408712032e-05, "loss": 0.4179, "step": 19160 }, { "epoch": 53.25, "grad_norm": 0.8759896159172058, "learning_rate": 8.142531103936678e-05, "loss": 0.4107, "step": 19170 }, { "epoch": 53.27777777777778, "grad_norm": 1.0843819379806519, "learning_rate": 8.14038716613506e-05, "loss": 0.4137, "step": 19180 }, { "epoch": 53.30555555555556, "grad_norm": 0.9571003317832947, "learning_rate": 8.138242274366736e-05, "loss": 0.4114, "step": 19190 }, { "epoch": 53.333333333333336, "grad_norm": 0.879183828830719, "learning_rate": 8.136096429283271e-05, "loss": 0.4098, "step": 19200 }, { "epoch": 53.361111111111114, "grad_norm": 0.8082855939865112, "learning_rate": 8.133949631536515e-05, "loss": 0.3972, "step": 19210 }, { "epoch": 53.388888888888886, "grad_norm": 1.0756891965866089, "learning_rate": 8.131801881778607e-05, "loss": 0.4133, "step": 19220 }, { "epoch": 53.416666666666664, "grad_norm": 1.1014193296432495, "learning_rate": 8.129653180661978e-05, "loss": 0.4124, "step": 19230 }, { "epoch": 53.44444444444444, "grad_norm": 0.8833562731742859, "learning_rate": 8.127503528839346e-05, "loss": 0.4176, "step": 19240 }, { "epoch": 53.47222222222222, "grad_norm": 0.865941047668457, "learning_rate": 8.125352926963721e-05, "loss": 0.416, "step": 19250 }, { "epoch": 53.5, "grad_norm": 0.7916058897972107, "learning_rate": 8.123201375688395e-05, "loss": 0.4121, "step": 19260 }, { "epoch": 53.52777777777778, "grad_norm": 0.8568618297576904, "learning_rate": 8.121048875666954e-05, "loss": 0.4065, "step": 19270 }, { "epoch": 53.55555555555556, "grad_norm": 1.0087318420410156, "learning_rate": 8.118895427553274e-05, "loss": 0.4064, "step": 19280 }, { "epoch": 53.583333333333336, "grad_norm": 0.9849892258644104, "learning_rate": 8.116741032001511e-05, "loss": 0.4223, "step": 19290 }, { "epoch": 53.611111111111114, "grad_norm": 1.0062222480773926, "learning_rate": 8.114585689666114e-05, "loss": 0.4147, "step": 19300 }, { "epoch": 53.638888888888886, "grad_norm": 0.9486151337623596, "learning_rate": 8.112429401201821e-05, "loss": 0.4094, "step": 19310 }, { "epoch": 53.666666666666664, "grad_norm": 0.8162830471992493, "learning_rate": 8.110272167263656e-05, "loss": 0.3953, "step": 19320 }, { "epoch": 53.69444444444444, "grad_norm": 0.8938644528388977, "learning_rate": 8.108113988506929e-05, "loss": 0.4185, "step": 19330 }, { "epoch": 53.72222222222222, "grad_norm": 0.8699896335601807, "learning_rate": 8.105954865587235e-05, "loss": 0.4141, "step": 19340 }, { "epoch": 53.75, "grad_norm": 0.8811201453208923, "learning_rate": 8.103794799160463e-05, "loss": 0.4136, "step": 19350 }, { "epoch": 53.77777777777778, "grad_norm": 0.8444754481315613, "learning_rate": 8.101633789882781e-05, "loss": 0.4028, "step": 19360 }, { "epoch": 53.80555555555556, "grad_norm": 0.8844550251960754, "learning_rate": 8.099471838410648e-05, "loss": 0.4083, "step": 19370 }, { "epoch": 53.833333333333336, "grad_norm": 0.8328917026519775, "learning_rate": 8.097308945400806e-05, "loss": 0.4152, "step": 19380 }, { "epoch": 53.861111111111114, "grad_norm": 0.9253825545310974, "learning_rate": 8.095145111510288e-05, "loss": 0.4179, "step": 19390 }, { "epoch": 53.888888888888886, "grad_norm": 0.835314929485321, "learning_rate": 8.092980337396406e-05, "loss": 0.4031, "step": 19400 }, { "epoch": 53.916666666666664, "grad_norm": 0.93366938829422, "learning_rate": 8.090814623716763e-05, "loss": 0.4151, "step": 19410 }, { "epoch": 53.94444444444444, "grad_norm": 0.7700728178024292, "learning_rate": 8.088647971129246e-05, "loss": 0.3984, "step": 19420 }, { "epoch": 53.97222222222222, "grad_norm": 0.9065217971801758, "learning_rate": 8.086480380292026e-05, "loss": 0.4077, "step": 19430 }, { "epoch": 54.0, "grad_norm": 0.8761457204818726, "learning_rate": 8.084311851863562e-05, "loss": 0.42, "step": 19440 }, { "epoch": 54.02777777777778, "grad_norm": 0.8749204277992249, "learning_rate": 8.082142386502591e-05, "loss": 0.404, "step": 19450 }, { "epoch": 54.05555555555556, "grad_norm": 0.8522437214851379, "learning_rate": 8.079971984868145e-05, "loss": 0.4012, "step": 19460 }, { "epoch": 54.083333333333336, "grad_norm": 0.8202647566795349, "learning_rate": 8.077800647619532e-05, "loss": 0.4175, "step": 19470 }, { "epoch": 54.111111111111114, "grad_norm": 0.879338800907135, "learning_rate": 8.075628375416345e-05, "loss": 0.4246, "step": 19480 }, { "epoch": 54.138888888888886, "grad_norm": 0.8662365078926086, "learning_rate": 8.073455168918464e-05, "loss": 0.4207, "step": 19490 }, { "epoch": 54.166666666666664, "grad_norm": 0.9258108139038086, "learning_rate": 8.071281028786055e-05, "loss": 0.4019, "step": 19500 }, { "epoch": 54.19444444444444, "grad_norm": 0.8164238333702087, "learning_rate": 8.069105955679562e-05, "loss": 0.4006, "step": 19510 }, { "epoch": 54.22222222222222, "grad_norm": 0.8904544711112976, "learning_rate": 8.066929950259713e-05, "loss": 0.4156, "step": 19520 }, { "epoch": 54.25, "grad_norm": 0.8808101415634155, "learning_rate": 8.064753013187522e-05, "loss": 0.4247, "step": 19530 }, { "epoch": 54.27777777777778, "grad_norm": 0.9776115417480469, "learning_rate": 8.062575145124289e-05, "loss": 0.4081, "step": 19540 }, { "epoch": 54.30555555555556, "grad_norm": 0.8736504316329956, "learning_rate": 8.060396346731587e-05, "loss": 0.4187, "step": 19550 }, { "epoch": 54.333333333333336, "grad_norm": 0.8755725622177124, "learning_rate": 8.058216618671281e-05, "loss": 0.4044, "step": 19560 }, { "epoch": 54.361111111111114, "grad_norm": 0.8058453798294067, "learning_rate": 8.056035961605514e-05, "loss": 0.4209, "step": 19570 }, { "epoch": 54.388888888888886, "grad_norm": 0.8635553121566772, "learning_rate": 8.05385437619671e-05, "loss": 0.4146, "step": 19580 }, { "epoch": 54.416666666666664, "grad_norm": 0.8610352277755737, "learning_rate": 8.05167186310758e-05, "loss": 0.4078, "step": 19590 }, { "epoch": 54.44444444444444, "grad_norm": 0.7703658938407898, "learning_rate": 8.049488423001113e-05, "loss": 0.4111, "step": 19600 }, { "epoch": 54.47222222222222, "grad_norm": 0.8802044987678528, "learning_rate": 8.047304056540581e-05, "loss": 0.3985, "step": 19610 }, { "epoch": 54.5, "grad_norm": 0.9646197557449341, "learning_rate": 8.045118764389534e-05, "loss": 0.4064, "step": 19620 }, { "epoch": 54.52777777777778, "grad_norm": 0.8870508670806885, "learning_rate": 8.042932547211809e-05, "loss": 0.4185, "step": 19630 }, { "epoch": 54.55555555555556, "grad_norm": 0.9407327771186829, "learning_rate": 8.04074540567152e-05, "loss": 0.398, "step": 19640 }, { "epoch": 54.583333333333336, "grad_norm": 0.8933085203170776, "learning_rate": 8.038557340433063e-05, "loss": 0.4077, "step": 19650 }, { "epoch": 54.611111111111114, "grad_norm": 0.889249861240387, "learning_rate": 8.036368352161115e-05, "loss": 0.4218, "step": 19660 }, { "epoch": 54.638888888888886, "grad_norm": 0.9500078558921814, "learning_rate": 8.034178441520633e-05, "loss": 0.4276, "step": 19670 }, { "epoch": 54.666666666666664, "grad_norm": 0.8849400281906128, "learning_rate": 8.031987609176852e-05, "loss": 0.3956, "step": 19680 }, { "epoch": 54.69444444444444, "grad_norm": 0.9567634463310242, "learning_rate": 8.02979585579529e-05, "loss": 0.4082, "step": 19690 }, { "epoch": 54.72222222222222, "grad_norm": 0.9152423739433289, "learning_rate": 8.027603182041745e-05, "loss": 0.4171, "step": 19700 }, { "epoch": 54.75, "grad_norm": 0.9751508235931396, "learning_rate": 8.025409588582292e-05, "loss": 0.4024, "step": 19710 }, { "epoch": 54.77777777777778, "grad_norm": 0.9152700901031494, "learning_rate": 8.023215076083288e-05, "loss": 0.4205, "step": 19720 }, { "epoch": 54.80555555555556, "grad_norm": 0.922569215297699, "learning_rate": 8.021019645211367e-05, "loss": 0.4058, "step": 19730 }, { "epoch": 54.833333333333336, "grad_norm": 0.9516170620918274, "learning_rate": 8.018823296633441e-05, "loss": 0.4132, "step": 19740 }, { "epoch": 54.861111111111114, "grad_norm": 0.8759192824363708, "learning_rate": 8.016626031016708e-05, "loss": 0.419, "step": 19750 }, { "epoch": 54.888888888888886, "grad_norm": 0.8799887299537659, "learning_rate": 8.014427849028636e-05, "loss": 0.4014, "step": 19760 }, { "epoch": 54.916666666666664, "grad_norm": 1.0057440996170044, "learning_rate": 8.012228751336974e-05, "loss": 0.4213, "step": 19770 }, { "epoch": 54.94444444444444, "grad_norm": 0.8076949715614319, "learning_rate": 8.01002873860975e-05, "loss": 0.4147, "step": 19780 }, { "epoch": 54.97222222222222, "grad_norm": 0.9208512902259827, "learning_rate": 8.00782781151527e-05, "loss": 0.4032, "step": 19790 }, { "epoch": 55.0, "grad_norm": 0.8662780523300171, "learning_rate": 8.005625970722119e-05, "loss": 0.4118, "step": 19800 }, { "epoch": 55.02777777777778, "grad_norm": 0.8190547227859497, "learning_rate": 8.003423216899158e-05, "loss": 0.4174, "step": 19810 }, { "epoch": 55.05555555555556, "grad_norm": 0.8828972578048706, "learning_rate": 8.001219550715522e-05, "loss": 0.412, "step": 19820 }, { "epoch": 55.083333333333336, "grad_norm": 0.877909779548645, "learning_rate": 7.999014972840632e-05, "loss": 0.421, "step": 19830 }, { "epoch": 55.111111111111114, "grad_norm": 0.8879987597465515, "learning_rate": 7.996809483944174e-05, "loss": 0.4192, "step": 19840 }, { "epoch": 55.138888888888886, "grad_norm": 0.9768377542495728, "learning_rate": 7.994603084696124e-05, "loss": 0.4216, "step": 19850 }, { "epoch": 55.166666666666664, "grad_norm": 0.9530559778213501, "learning_rate": 7.992395775766724e-05, "loss": 0.4207, "step": 19860 }, { "epoch": 55.19444444444444, "grad_norm": 0.8994603157043457, "learning_rate": 7.990187557826497e-05, "loss": 0.4114, "step": 19870 }, { "epoch": 55.22222222222222, "grad_norm": 0.8357743620872498, "learning_rate": 7.987978431546242e-05, "loss": 0.4011, "step": 19880 }, { "epoch": 55.25, "grad_norm": 0.7802899479866028, "learning_rate": 7.985768397597031e-05, "loss": 0.4162, "step": 19890 }, { "epoch": 55.27777777777778, "grad_norm": 0.8305616974830627, "learning_rate": 7.983557456650216e-05, "loss": 0.4113, "step": 19900 }, { "epoch": 55.30555555555556, "grad_norm": 0.8794474005699158, "learning_rate": 7.981345609377422e-05, "loss": 0.3966, "step": 19910 }, { "epoch": 55.333333333333336, "grad_norm": 0.8503965735435486, "learning_rate": 7.97913285645055e-05, "loss": 0.4101, "step": 19920 }, { "epoch": 55.361111111111114, "grad_norm": 0.9946988821029663, "learning_rate": 7.976919198541776e-05, "loss": 0.4046, "step": 19930 }, { "epoch": 55.388888888888886, "grad_norm": 0.8550975322723389, "learning_rate": 7.974704636323548e-05, "loss": 0.4061, "step": 19940 }, { "epoch": 55.416666666666664, "grad_norm": 1.0610251426696777, "learning_rate": 7.972489170468597e-05, "loss": 0.4086, "step": 19950 }, { "epoch": 55.44444444444444, "grad_norm": 1.020185947418213, "learning_rate": 7.970272801649918e-05, "loss": 0.4097, "step": 19960 }, { "epoch": 55.47222222222222, "grad_norm": 0.970829427242279, "learning_rate": 7.96805553054079e-05, "loss": 0.4137, "step": 19970 }, { "epoch": 55.5, "grad_norm": 0.9542768597602844, "learning_rate": 7.965837357814756e-05, "loss": 0.4093, "step": 19980 }, { "epoch": 55.52777777777778, "grad_norm": 0.8546079993247986, "learning_rate": 7.963618284145643e-05, "loss": 0.4243, "step": 19990 }, { "epoch": 55.55555555555556, "grad_norm": 0.8714570999145508, "learning_rate": 7.961398310207544e-05, "loss": 0.4008, "step": 20000 }, { "epoch": 55.583333333333336, "grad_norm": 0.7939913272857666, "learning_rate": 7.95917743667483e-05, "loss": 0.4044, "step": 20010 }, { "epoch": 55.611111111111114, "grad_norm": 0.8933812379837036, "learning_rate": 7.956955664222144e-05, "loss": 0.4026, "step": 20020 }, { "epoch": 55.638888888888886, "grad_norm": 0.904510498046875, "learning_rate": 7.954732993524399e-05, "loss": 0.4126, "step": 20030 }, { "epoch": 55.666666666666664, "grad_norm": 0.9579504728317261, "learning_rate": 7.952509425256786e-05, "loss": 0.4042, "step": 20040 }, { "epoch": 55.69444444444444, "grad_norm": 0.7981271743774414, "learning_rate": 7.950284960094767e-05, "loss": 0.4058, "step": 20050 }, { "epoch": 55.72222222222222, "grad_norm": 0.911158561706543, "learning_rate": 7.948059598714076e-05, "loss": 0.4192, "step": 20060 }, { "epoch": 55.75, "grad_norm": 0.9741163849830627, "learning_rate": 7.945833341790717e-05, "loss": 0.4104, "step": 20070 }, { "epoch": 55.77777777777778, "grad_norm": 0.9079179763793945, "learning_rate": 7.94360619000097e-05, "loss": 0.4051, "step": 20080 }, { "epoch": 55.80555555555556, "grad_norm": 0.8538716435432434, "learning_rate": 7.941378144021381e-05, "loss": 0.4082, "step": 20090 }, { "epoch": 55.833333333333336, "grad_norm": 0.8753595352172852, "learning_rate": 7.939149204528777e-05, "loss": 0.4115, "step": 20100 }, { "epoch": 55.861111111111114, "grad_norm": 0.8823834657669067, "learning_rate": 7.936919372200246e-05, "loss": 0.3999, "step": 20110 }, { "epoch": 55.888888888888886, "grad_norm": 0.7877596616744995, "learning_rate": 7.934688647713158e-05, "loss": 0.3905, "step": 20120 }, { "epoch": 55.916666666666664, "grad_norm": 0.868342399597168, "learning_rate": 7.932457031745143e-05, "loss": 0.3952, "step": 20130 }, { "epoch": 55.94444444444444, "grad_norm": 1.025092363357544, "learning_rate": 7.930224524974108e-05, "loss": 0.4202, "step": 20140 }, { "epoch": 55.97222222222222, "grad_norm": 0.949741780757904, "learning_rate": 7.927991128078232e-05, "loss": 0.4113, "step": 20150 }, { "epoch": 56.0, "grad_norm": 1.0435035228729248, "learning_rate": 7.925756841735958e-05, "loss": 0.4157, "step": 20160 }, { "epoch": 56.02777777777778, "grad_norm": 0.9234961271286011, "learning_rate": 7.923521666626008e-05, "loss": 0.4018, "step": 20170 }, { "epoch": 56.05555555555556, "grad_norm": 0.9745360016822815, "learning_rate": 7.921285603427366e-05, "loss": 0.4137, "step": 20180 }, { "epoch": 56.083333333333336, "grad_norm": 0.8842001557350159, "learning_rate": 7.91904865281929e-05, "loss": 0.4113, "step": 20190 }, { "epoch": 56.111111111111114, "grad_norm": 1.0080811977386475, "learning_rate": 7.916810815481307e-05, "loss": 0.4146, "step": 20200 }, { "epoch": 56.138888888888886, "grad_norm": 0.8936883807182312, "learning_rate": 7.914572092093211e-05, "loss": 0.4107, "step": 20210 }, { "epoch": 56.166666666666664, "grad_norm": 0.9122526049613953, "learning_rate": 7.912332483335068e-05, "loss": 0.4181, "step": 20220 }, { "epoch": 56.19444444444444, "grad_norm": 0.8713816404342651, "learning_rate": 7.910091989887213e-05, "loss": 0.3988, "step": 20230 }, { "epoch": 56.22222222222222, "grad_norm": 0.8496429920196533, "learning_rate": 7.907850612430248e-05, "loss": 0.4193, "step": 20240 }, { "epoch": 56.25, "grad_norm": 0.9246876835823059, "learning_rate": 7.905608351645044e-05, "loss": 0.3965, "step": 20250 }, { "epoch": 56.27777777777778, "grad_norm": 0.7717999815940857, "learning_rate": 7.90336520821274e-05, "loss": 0.4139, "step": 20260 }, { "epoch": 56.30555555555556, "grad_norm": 0.8215273022651672, "learning_rate": 7.901121182814746e-05, "loss": 0.4183, "step": 20270 }, { "epoch": 56.333333333333336, "grad_norm": 0.8658959865570068, "learning_rate": 7.898876276132736e-05, "loss": 0.4007, "step": 20280 }, { "epoch": 56.361111111111114, "grad_norm": 1.0307732820510864, "learning_rate": 7.896630488848654e-05, "loss": 0.3997, "step": 20290 }, { "epoch": 56.388888888888886, "grad_norm": 0.8531832695007324, "learning_rate": 7.89438382164471e-05, "loss": 0.403, "step": 20300 }, { "epoch": 56.416666666666664, "grad_norm": 1.020642638206482, "learning_rate": 7.892136275203383e-05, "loss": 0.4063, "step": 20310 }, { "epoch": 56.44444444444444, "grad_norm": 0.826191246509552, "learning_rate": 7.889887850207418e-05, "loss": 0.3925, "step": 20320 }, { "epoch": 56.47222222222222, "grad_norm": 0.9198227524757385, "learning_rate": 7.887638547339827e-05, "loss": 0.4106, "step": 20330 }, { "epoch": 56.5, "grad_norm": 0.8201080560684204, "learning_rate": 7.885388367283891e-05, "loss": 0.4272, "step": 20340 }, { "epoch": 56.52777777777778, "grad_norm": 0.9618809819221497, "learning_rate": 7.88313731072315e-05, "loss": 0.4137, "step": 20350 }, { "epoch": 56.55555555555556, "grad_norm": 0.8320409655570984, "learning_rate": 7.88088537834142e-05, "loss": 0.4131, "step": 20360 }, { "epoch": 56.583333333333336, "grad_norm": 0.8569093942642212, "learning_rate": 7.878632570822778e-05, "loss": 0.3991, "step": 20370 }, { "epoch": 56.611111111111114, "grad_norm": 0.9061983227729797, "learning_rate": 7.876378888851567e-05, "loss": 0.3927, "step": 20380 }, { "epoch": 56.638888888888886, "grad_norm": 0.7939720153808594, "learning_rate": 7.874124333112396e-05, "loss": 0.4049, "step": 20390 }, { "epoch": 56.666666666666664, "grad_norm": 0.8610790967941284, "learning_rate": 7.871868904290138e-05, "loss": 0.3947, "step": 20400 }, { "epoch": 56.69444444444444, "grad_norm": 0.9530953764915466, "learning_rate": 7.869612603069935e-05, "loss": 0.4131, "step": 20410 }, { "epoch": 56.72222222222222, "grad_norm": 0.8622480034828186, "learning_rate": 7.867355430137192e-05, "loss": 0.3996, "step": 20420 }, { "epoch": 56.75, "grad_norm": 0.8349974751472473, "learning_rate": 7.865097386177577e-05, "loss": 0.4109, "step": 20430 }, { "epoch": 56.77777777777778, "grad_norm": 0.9026838541030884, "learning_rate": 7.862838471877023e-05, "loss": 0.4141, "step": 20440 }, { "epoch": 56.80555555555556, "grad_norm": 1.0089243650436401, "learning_rate": 7.860578687921731e-05, "loss": 0.3989, "step": 20450 }, { "epoch": 56.833333333333336, "grad_norm": 0.7984613180160522, "learning_rate": 7.858318034998164e-05, "loss": 0.3986, "step": 20460 }, { "epoch": 56.861111111111114, "grad_norm": 0.994134783744812, "learning_rate": 7.856056513793046e-05, "loss": 0.4072, "step": 20470 }, { "epoch": 56.888888888888886, "grad_norm": 0.8374465703964233, "learning_rate": 7.85379412499337e-05, "loss": 0.4014, "step": 20480 }, { "epoch": 56.916666666666664, "grad_norm": 0.7737146019935608, "learning_rate": 7.851530869286389e-05, "loss": 0.4002, "step": 20490 }, { "epoch": 56.94444444444444, "grad_norm": 0.8637639284133911, "learning_rate": 7.849266747359619e-05, "loss": 0.3966, "step": 20500 }, { "epoch": 56.97222222222222, "grad_norm": 0.9597125053405762, "learning_rate": 7.847001759900843e-05, "loss": 0.4064, "step": 20510 }, { "epoch": 57.0, "grad_norm": 0.8753880858421326, "learning_rate": 7.844735907598102e-05, "loss": 0.4028, "step": 20520 }, { "epoch": 57.02777777777778, "grad_norm": 0.7951539754867554, "learning_rate": 7.842469191139703e-05, "loss": 0.3977, "step": 20530 }, { "epoch": 57.05555555555556, "grad_norm": 0.7979432344436646, "learning_rate": 7.840201611214215e-05, "loss": 0.4108, "step": 20540 }, { "epoch": 57.083333333333336, "grad_norm": 0.8552292585372925, "learning_rate": 7.837933168510469e-05, "loss": 0.4092, "step": 20550 }, { "epoch": 57.111111111111114, "grad_norm": 0.9003222584724426, "learning_rate": 7.835663863717559e-05, "loss": 0.4121, "step": 20560 }, { "epoch": 57.138888888888886, "grad_norm": 0.8936198353767395, "learning_rate": 7.833393697524838e-05, "loss": 0.4075, "step": 20570 }, { "epoch": 57.166666666666664, "grad_norm": 0.9083646535873413, "learning_rate": 7.831122670621922e-05, "loss": 0.4112, "step": 20580 }, { "epoch": 57.19444444444444, "grad_norm": 0.8072107434272766, "learning_rate": 7.82885078369869e-05, "loss": 0.3955, "step": 20590 }, { "epoch": 57.22222222222222, "grad_norm": 0.9479065537452698, "learning_rate": 7.826578037445283e-05, "loss": 0.4114, "step": 20600 }, { "epoch": 57.25, "grad_norm": 0.9688078165054321, "learning_rate": 7.824304432552097e-05, "loss": 0.4145, "step": 20610 }, { "epoch": 57.27777777777778, "grad_norm": 0.9058818817138672, "learning_rate": 7.822029969709798e-05, "loss": 0.3958, "step": 20620 }, { "epoch": 57.30555555555556, "grad_norm": 0.8791967034339905, "learning_rate": 7.819754649609306e-05, "loss": 0.3993, "step": 20630 }, { "epoch": 57.333333333333336, "grad_norm": 0.9657634496688843, "learning_rate": 7.817478472941802e-05, "loss": 0.3996, "step": 20640 }, { "epoch": 57.361111111111114, "grad_norm": 0.9519778490066528, "learning_rate": 7.815201440398727e-05, "loss": 0.408, "step": 20650 }, { "epoch": 57.388888888888886, "grad_norm": 0.86173415184021, "learning_rate": 7.812923552671789e-05, "loss": 0.4119, "step": 20660 }, { "epoch": 57.416666666666664, "grad_norm": 0.9022930860519409, "learning_rate": 7.810644810452945e-05, "loss": 0.4097, "step": 20670 }, { "epoch": 57.44444444444444, "grad_norm": 0.9622611999511719, "learning_rate": 7.808365214434417e-05, "loss": 0.4082, "step": 20680 }, { "epoch": 57.47222222222222, "grad_norm": 0.8971050381660461, "learning_rate": 7.80608476530869e-05, "loss": 0.4087, "step": 20690 }, { "epoch": 57.5, "grad_norm": 0.9515594244003296, "learning_rate": 7.8038034637685e-05, "loss": 0.4036, "step": 20700 }, { "epoch": 57.52777777777778, "grad_norm": 0.9294024705886841, "learning_rate": 7.801521310506848e-05, "loss": 0.4027, "step": 20710 }, { "epoch": 57.55555555555556, "grad_norm": 0.8486034274101257, "learning_rate": 7.799238306216994e-05, "loss": 0.4127, "step": 20720 }, { "epoch": 57.583333333333336, "grad_norm": 0.9179788827896118, "learning_rate": 7.796954451592448e-05, "loss": 0.4077, "step": 20730 }, { "epoch": 57.611111111111114, "grad_norm": 0.808158814907074, "learning_rate": 7.794669747326992e-05, "loss": 0.4005, "step": 20740 }, { "epoch": 57.638888888888886, "grad_norm": 0.8117669820785522, "learning_rate": 7.792384194114654e-05, "loss": 0.3928, "step": 20750 }, { "epoch": 57.666666666666664, "grad_norm": 0.8595913052558899, "learning_rate": 7.790097792649729e-05, "loss": 0.3955, "step": 20760 }, { "epoch": 57.69444444444444, "grad_norm": 0.8493025898933411, "learning_rate": 7.787810543626762e-05, "loss": 0.4142, "step": 20770 }, { "epoch": 57.72222222222222, "grad_norm": 0.7851598262786865, "learning_rate": 7.785522447740558e-05, "loss": 0.4185, "step": 20780 }, { "epoch": 57.75, "grad_norm": 0.9101428985595703, "learning_rate": 7.783233505686182e-05, "loss": 0.4051, "step": 20790 }, { "epoch": 57.77777777777778, "grad_norm": 0.8508751392364502, "learning_rate": 7.780943718158955e-05, "loss": 0.4027, "step": 20800 }, { "epoch": 57.80555555555556, "grad_norm": 0.8744320273399353, "learning_rate": 7.778653085854453e-05, "loss": 0.4103, "step": 20810 }, { "epoch": 57.833333333333336, "grad_norm": 0.8528762459754944, "learning_rate": 7.77636160946851e-05, "loss": 0.4116, "step": 20820 }, { "epoch": 57.861111111111114, "grad_norm": 0.9470981359481812, "learning_rate": 7.774069289697215e-05, "loss": 0.4054, "step": 20830 }, { "epoch": 57.888888888888886, "grad_norm": 0.9275734424591064, "learning_rate": 7.771776127236913e-05, "loss": 0.4045, "step": 20840 }, { "epoch": 57.916666666666664, "grad_norm": 0.901168167591095, "learning_rate": 7.769482122784212e-05, "loss": 0.4104, "step": 20850 }, { "epoch": 57.94444444444444, "grad_norm": 0.8084326982498169, "learning_rate": 7.767187277035963e-05, "loss": 0.3883, "step": 20860 }, { "epoch": 57.97222222222222, "grad_norm": 0.7894762754440308, "learning_rate": 7.764891590689285e-05, "loss": 0.3907, "step": 20870 }, { "epoch": 58.0, "grad_norm": 0.8161562085151672, "learning_rate": 7.762595064441542e-05, "loss": 0.3935, "step": 20880 }, { "epoch": 58.02777777777778, "grad_norm": 0.8065366744995117, "learning_rate": 7.760297698990362e-05, "loss": 0.3957, "step": 20890 }, { "epoch": 58.05555555555556, "grad_norm": 0.8661063313484192, "learning_rate": 7.757999495033623e-05, "loss": 0.4162, "step": 20900 }, { "epoch": 58.083333333333336, "grad_norm": 0.8916553854942322, "learning_rate": 7.755700453269456e-05, "loss": 0.4028, "step": 20910 }, { "epoch": 58.111111111111114, "grad_norm": 0.9742231965065002, "learning_rate": 7.753400574396254e-05, "loss": 0.4063, "step": 20920 }, { "epoch": 58.138888888888886, "grad_norm": 0.9121001362800598, "learning_rate": 7.751099859112655e-05, "loss": 0.3968, "step": 20930 }, { "epoch": 58.166666666666664, "grad_norm": 0.9157317876815796, "learning_rate": 7.748798308117557e-05, "loss": 0.4011, "step": 20940 }, { "epoch": 58.19444444444444, "grad_norm": 0.840242862701416, "learning_rate": 7.746495922110112e-05, "loss": 0.3916, "step": 20950 }, { "epoch": 58.22222222222222, "grad_norm": 0.8732499480247498, "learning_rate": 7.744192701789723e-05, "loss": 0.4052, "step": 20960 }, { "epoch": 58.25, "grad_norm": 0.8842206597328186, "learning_rate": 7.741888647856046e-05, "loss": 0.4112, "step": 20970 }, { "epoch": 58.27777777777778, "grad_norm": 0.8751305937767029, "learning_rate": 7.739583761008994e-05, "loss": 0.4053, "step": 20980 }, { "epoch": 58.30555555555556, "grad_norm": 0.8715077638626099, "learning_rate": 7.73727804194873e-05, "loss": 0.4125, "step": 20990 }, { "epoch": 58.333333333333336, "grad_norm": 0.9068023562431335, "learning_rate": 7.734971491375671e-05, "loss": 0.4085, "step": 21000 }, { "epoch": 58.361111111111114, "grad_norm": 0.91440749168396, "learning_rate": 7.732664109990485e-05, "loss": 0.3913, "step": 21010 }, { "epoch": 58.388888888888886, "grad_norm": 0.9490864276885986, "learning_rate": 7.730355898494095e-05, "loss": 0.3934, "step": 21020 }, { "epoch": 58.416666666666664, "grad_norm": 0.7766602039337158, "learning_rate": 7.728046857587673e-05, "loss": 0.4009, "step": 21030 }, { "epoch": 58.44444444444444, "grad_norm": 0.8695651292800903, "learning_rate": 7.725736987972647e-05, "loss": 0.3975, "step": 21040 }, { "epoch": 58.47222222222222, "grad_norm": 0.9696753621101379, "learning_rate": 7.723426290350691e-05, "loss": 0.4096, "step": 21050 }, { "epoch": 58.5, "grad_norm": 0.9599329829216003, "learning_rate": 7.721114765423736e-05, "loss": 0.4083, "step": 21060 }, { "epoch": 58.52777777777778, "grad_norm": 0.9191943407058716, "learning_rate": 7.718802413893963e-05, "loss": 0.3975, "step": 21070 }, { "epoch": 58.55555555555556, "grad_norm": 0.9197735786437988, "learning_rate": 7.716489236463802e-05, "loss": 0.4143, "step": 21080 }, { "epoch": 58.583333333333336, "grad_norm": 0.8773924112319946, "learning_rate": 7.714175233835936e-05, "loss": 0.3967, "step": 21090 }, { "epoch": 58.611111111111114, "grad_norm": 0.8403290510177612, "learning_rate": 7.711860406713299e-05, "loss": 0.4191, "step": 21100 }, { "epoch": 58.638888888888886, "grad_norm": 0.8486591577529907, "learning_rate": 7.70954475579907e-05, "loss": 0.4022, "step": 21110 }, { "epoch": 58.666666666666664, "grad_norm": 0.9692311882972717, "learning_rate": 7.707228281796688e-05, "loss": 0.4096, "step": 21120 }, { "epoch": 58.69444444444444, "grad_norm": 0.8764861822128296, "learning_rate": 7.704910985409833e-05, "loss": 0.3962, "step": 21130 }, { "epoch": 58.72222222222222, "grad_norm": 0.8703640103340149, "learning_rate": 7.702592867342439e-05, "loss": 0.4039, "step": 21140 }, { "epoch": 58.75, "grad_norm": 0.8236069083213806, "learning_rate": 7.700273928298691e-05, "loss": 0.4071, "step": 21150 }, { "epoch": 58.77777777777778, "grad_norm": 0.866468608379364, "learning_rate": 7.697954168983021e-05, "loss": 0.3892, "step": 21160 }, { "epoch": 58.80555555555556, "grad_norm": 0.8417448997497559, "learning_rate": 7.695633590100109e-05, "loss": 0.402, "step": 21170 }, { "epoch": 58.833333333333336, "grad_norm": 0.7740654945373535, "learning_rate": 7.693312192354886e-05, "loss": 0.3924, "step": 21180 }, { "epoch": 58.861111111111114, "grad_norm": 0.8159924149513245, "learning_rate": 7.690989976452532e-05, "loss": 0.4016, "step": 21190 }, { "epoch": 58.888888888888886, "grad_norm": 1.0236567258834839, "learning_rate": 7.688666943098475e-05, "loss": 0.418, "step": 21200 }, { "epoch": 58.916666666666664, "grad_norm": 0.8758280873298645, "learning_rate": 7.686343092998389e-05, "loss": 0.3968, "step": 21210 }, { "epoch": 58.94444444444444, "grad_norm": 0.8869706392288208, "learning_rate": 7.684018426858202e-05, "loss": 0.4039, "step": 21220 }, { "epoch": 58.97222222222222, "grad_norm": 0.9229925870895386, "learning_rate": 7.681692945384084e-05, "loss": 0.413, "step": 21230 }, { "epoch": 59.0, "grad_norm": 0.9003004431724548, "learning_rate": 7.679366649282456e-05, "loss": 0.4022, "step": 21240 }, { "epoch": 59.02777777777778, "grad_norm": 0.874858558177948, "learning_rate": 7.677039539259983e-05, "loss": 0.4022, "step": 21250 }, { "epoch": 59.05555555555556, "grad_norm": 0.9833279848098755, "learning_rate": 7.674711616023581e-05, "loss": 0.4124, "step": 21260 }, { "epoch": 59.083333333333336, "grad_norm": 0.9101369380950928, "learning_rate": 7.672382880280413e-05, "loss": 0.4096, "step": 21270 }, { "epoch": 59.111111111111114, "grad_norm": 0.8458041548728943, "learning_rate": 7.670053332737885e-05, "loss": 0.4058, "step": 21280 }, { "epoch": 59.138888888888886, "grad_norm": 0.9649948477745056, "learning_rate": 7.667722974103654e-05, "loss": 0.396, "step": 21290 }, { "epoch": 59.166666666666664, "grad_norm": 0.8538283109664917, "learning_rate": 7.66539180508562e-05, "loss": 0.4186, "step": 21300 }, { "epoch": 59.19444444444444, "grad_norm": 0.9494044780731201, "learning_rate": 7.663059826391932e-05, "loss": 0.3831, "step": 21310 }, { "epoch": 59.22222222222222, "grad_norm": 0.8069197535514832, "learning_rate": 7.660727038730981e-05, "loss": 0.4028, "step": 21320 }, { "epoch": 59.25, "grad_norm": 0.95360267162323, "learning_rate": 7.65839344281141e-05, "loss": 0.3975, "step": 21330 }, { "epoch": 59.27777777777778, "grad_norm": 0.8960968255996704, "learning_rate": 7.656059039342101e-05, "loss": 0.4061, "step": 21340 }, { "epoch": 59.30555555555556, "grad_norm": 0.8159615993499756, "learning_rate": 7.653723829032187e-05, "loss": 0.3925, "step": 21350 }, { "epoch": 59.333333333333336, "grad_norm": 0.8315176367759705, "learning_rate": 7.65138781259104e-05, "loss": 0.4173, "step": 21360 }, { "epoch": 59.361111111111114, "grad_norm": 0.9094675183296204, "learning_rate": 7.649050990728279e-05, "loss": 0.4204, "step": 21370 }, { "epoch": 59.388888888888886, "grad_norm": 0.9452742338180542, "learning_rate": 7.646713364153774e-05, "loss": 0.4127, "step": 21380 }, { "epoch": 59.416666666666664, "grad_norm": 0.8962054252624512, "learning_rate": 7.64437493357763e-05, "loss": 0.4031, "step": 21390 }, { "epoch": 59.44444444444444, "grad_norm": 0.7909293174743652, "learning_rate": 7.642035699710202e-05, "loss": 0.4142, "step": 21400 }, { "epoch": 59.47222222222222, "grad_norm": 0.8440085649490356, "learning_rate": 7.639695663262089e-05, "loss": 0.3987, "step": 21410 }, { "epoch": 59.5, "grad_norm": 0.8110901117324829, "learning_rate": 7.637354824944128e-05, "loss": 0.4136, "step": 21420 }, { "epoch": 59.52777777777778, "grad_norm": 0.8907915949821472, "learning_rate": 7.635013185467408e-05, "loss": 0.41, "step": 21430 }, { "epoch": 59.55555555555556, "grad_norm": 0.8806754350662231, "learning_rate": 7.632670745543256e-05, "loss": 0.397, "step": 21440 }, { "epoch": 59.583333333333336, "grad_norm": 0.8994673490524292, "learning_rate": 7.630327505883242e-05, "loss": 0.4071, "step": 21450 }, { "epoch": 59.611111111111114, "grad_norm": 0.8292191624641418, "learning_rate": 7.627983467199182e-05, "loss": 0.3921, "step": 21460 }, { "epoch": 59.638888888888886, "grad_norm": 0.8828245997428894, "learning_rate": 7.625638630203132e-05, "loss": 0.404, "step": 21470 }, { "epoch": 59.666666666666664, "grad_norm": 0.8543248772621155, "learning_rate": 7.623292995607394e-05, "loss": 0.396, "step": 21480 }, { "epoch": 59.69444444444444, "grad_norm": 0.8483899831771851, "learning_rate": 7.620946564124507e-05, "loss": 0.4049, "step": 21490 }, { "epoch": 59.72222222222222, "grad_norm": 0.9482619166374207, "learning_rate": 7.618599336467256e-05, "loss": 0.4081, "step": 21500 }, { "epoch": 59.75, "grad_norm": 0.8357436060905457, "learning_rate": 7.616251313348666e-05, "loss": 0.3997, "step": 21510 }, { "epoch": 59.77777777777778, "grad_norm": 0.8382982611656189, "learning_rate": 7.613902495482005e-05, "loss": 0.4123, "step": 21520 }, { "epoch": 59.80555555555556, "grad_norm": 0.7778939604759216, "learning_rate": 7.611552883580784e-05, "loss": 0.3894, "step": 21530 }, { "epoch": 59.833333333333336, "grad_norm": 0.8129295706748962, "learning_rate": 7.609202478358748e-05, "loss": 0.3929, "step": 21540 }, { "epoch": 59.861111111111114, "grad_norm": 0.8272168636322021, "learning_rate": 7.606851280529895e-05, "loss": 0.3949, "step": 21550 }, { "epoch": 59.888888888888886, "grad_norm": 0.8040558099746704, "learning_rate": 7.604499290808449e-05, "loss": 0.3863, "step": 21560 }, { "epoch": 59.916666666666664, "grad_norm": 0.7883102893829346, "learning_rate": 7.602146509908888e-05, "loss": 0.3939, "step": 21570 }, { "epoch": 59.94444444444444, "grad_norm": 0.926847517490387, "learning_rate": 7.599792938545921e-05, "loss": 0.3986, "step": 21580 }, { "epoch": 59.97222222222222, "grad_norm": 0.85920250415802, "learning_rate": 7.597438577434506e-05, "loss": 0.4131, "step": 21590 }, { "epoch": 60.0, "grad_norm": 0.8090295791625977, "learning_rate": 7.595083427289831e-05, "loss": 0.3912, "step": 21600 }, { "epoch": 60.02777777777778, "grad_norm": 0.8924542665481567, "learning_rate": 7.59272748882733e-05, "loss": 0.414, "step": 21610 }, { "epoch": 60.05555555555556, "grad_norm": 1.0171163082122803, "learning_rate": 7.590370762762675e-05, "loss": 0.4071, "step": 21620 }, { "epoch": 60.083333333333336, "grad_norm": 0.8818901181221008, "learning_rate": 7.588013249811777e-05, "loss": 0.3967, "step": 21630 }, { "epoch": 60.111111111111114, "grad_norm": 0.9443721175193787, "learning_rate": 7.585654950690786e-05, "loss": 0.4055, "step": 21640 }, { "epoch": 60.138888888888886, "grad_norm": 0.7753686308860779, "learning_rate": 7.583295866116091e-05, "loss": 0.4012, "step": 21650 }, { "epoch": 60.166666666666664, "grad_norm": 0.8044214844703674, "learning_rate": 7.580935996804321e-05, "loss": 0.3896, "step": 21660 }, { "epoch": 60.19444444444444, "grad_norm": 0.9770222306251526, "learning_rate": 7.57857534347234e-05, "loss": 0.4085, "step": 21670 }, { "epoch": 60.22222222222222, "grad_norm": 0.7792853116989136, "learning_rate": 7.576213906837254e-05, "loss": 0.4034, "step": 21680 }, { "epoch": 60.25, "grad_norm": 0.7902606129646301, "learning_rate": 7.573851687616403e-05, "loss": 0.4044, "step": 21690 }, { "epoch": 60.27777777777778, "grad_norm": 0.8527215123176575, "learning_rate": 7.571488686527368e-05, "loss": 0.3994, "step": 21700 }, { "epoch": 60.30555555555556, "grad_norm": 0.8091486096382141, "learning_rate": 7.569124904287968e-05, "loss": 0.4045, "step": 21710 }, { "epoch": 60.333333333333336, "grad_norm": 0.892570972442627, "learning_rate": 7.566760341616254e-05, "loss": 0.41, "step": 21720 }, { "epoch": 60.361111111111114, "grad_norm": 0.8653004169464111, "learning_rate": 7.564394999230519e-05, "loss": 0.4049, "step": 21730 }, { "epoch": 60.388888888888886, "grad_norm": 0.8686509132385254, "learning_rate": 7.562028877849294e-05, "loss": 0.3962, "step": 21740 }, { "epoch": 60.416666666666664, "grad_norm": 0.831854522228241, "learning_rate": 7.559661978191341e-05, "loss": 0.385, "step": 21750 }, { "epoch": 60.44444444444444, "grad_norm": 0.8287072777748108, "learning_rate": 7.557294300975664e-05, "loss": 0.3964, "step": 21760 }, { "epoch": 60.47222222222222, "grad_norm": 0.8203486800193787, "learning_rate": 7.554925846921499e-05, "loss": 0.3931, "step": 21770 }, { "epoch": 60.5, "grad_norm": 0.8373599052429199, "learning_rate": 7.552556616748321e-05, "loss": 0.3959, "step": 21780 }, { "epoch": 60.52777777777778, "grad_norm": 0.9149885773658752, "learning_rate": 7.550186611175838e-05, "loss": 0.4071, "step": 21790 }, { "epoch": 60.55555555555556, "grad_norm": 0.956061840057373, "learning_rate": 7.547815830923998e-05, "loss": 0.3949, "step": 21800 }, { "epoch": 60.583333333333336, "grad_norm": 0.9469408392906189, "learning_rate": 7.54544427671298e-05, "loss": 0.4085, "step": 21810 }, { "epoch": 60.611111111111114, "grad_norm": 0.8741374611854553, "learning_rate": 7.543071949263198e-05, "loss": 0.3983, "step": 21820 }, { "epoch": 60.638888888888886, "grad_norm": 0.8857053518295288, "learning_rate": 7.540698849295305e-05, "loss": 0.4158, "step": 21830 }, { "epoch": 60.666666666666664, "grad_norm": 0.8584412932395935, "learning_rate": 7.538324977530183e-05, "loss": 0.3958, "step": 21840 }, { "epoch": 60.69444444444444, "grad_norm": 0.8889865279197693, "learning_rate": 7.535950334688955e-05, "loss": 0.4177, "step": 21850 }, { "epoch": 60.72222222222222, "grad_norm": 0.8700029850006104, "learning_rate": 7.533574921492972e-05, "loss": 0.3979, "step": 21860 }, { "epoch": 60.75, "grad_norm": 0.967856764793396, "learning_rate": 7.531198738663824e-05, "loss": 0.4009, "step": 21870 }, { "epoch": 60.77777777777778, "grad_norm": 0.8283471465110779, "learning_rate": 7.528821786923333e-05, "loss": 0.4002, "step": 21880 }, { "epoch": 60.80555555555556, "grad_norm": 0.7884073853492737, "learning_rate": 7.52644406699355e-05, "loss": 0.3986, "step": 21890 }, { "epoch": 60.833333333333336, "grad_norm": 0.839739203453064, "learning_rate": 7.524065579596766e-05, "loss": 0.3895, "step": 21900 }, { "epoch": 60.861111111111114, "grad_norm": 0.8334357142448425, "learning_rate": 7.521686325455506e-05, "loss": 0.4116, "step": 21910 }, { "epoch": 60.888888888888886, "grad_norm": 0.8598772287368774, "learning_rate": 7.51930630529252e-05, "loss": 0.389, "step": 21920 }, { "epoch": 60.916666666666664, "grad_norm": 0.85909104347229, "learning_rate": 7.516925519830797e-05, "loss": 0.4157, "step": 21930 }, { "epoch": 60.94444444444444, "grad_norm": 0.9725584983825684, "learning_rate": 7.514543969793557e-05, "loss": 0.4081, "step": 21940 }, { "epoch": 60.97222222222222, "grad_norm": 0.8423429131507874, "learning_rate": 7.512161655904251e-05, "loss": 0.3982, "step": 21950 }, { "epoch": 61.0, "grad_norm": 0.8423534631729126, "learning_rate": 7.509778578886563e-05, "loss": 0.3963, "step": 21960 }, { "epoch": 61.02777777777778, "grad_norm": 0.8803194165229797, "learning_rate": 7.507394739464412e-05, "loss": 0.3881, "step": 21970 }, { "epoch": 61.05555555555556, "grad_norm": 1.037484884262085, "learning_rate": 7.50501013836194e-05, "loss": 0.4191, "step": 21980 }, { "epoch": 61.083333333333336, "grad_norm": 0.8213266730308533, "learning_rate": 7.50262477630353e-05, "loss": 0.4059, "step": 21990 }, { "epoch": 61.111111111111114, "grad_norm": 0.8677218556404114, "learning_rate": 7.500238654013794e-05, "loss": 0.3963, "step": 22000 }, { "epoch": 61.138888888888886, "grad_norm": 0.9286115765571594, "learning_rate": 7.497851772217566e-05, "loss": 0.3953, "step": 22010 }, { "epoch": 61.166666666666664, "grad_norm": 0.8543211817741394, "learning_rate": 7.495464131639924e-05, "loss": 0.4066, "step": 22020 }, { "epoch": 61.19444444444444, "grad_norm": 0.7695619463920593, "learning_rate": 7.493075733006166e-05, "loss": 0.4092, "step": 22030 }, { "epoch": 61.22222222222222, "grad_norm": 0.9069320559501648, "learning_rate": 7.490686577041828e-05, "loss": 0.4011, "step": 22040 }, { "epoch": 61.25, "grad_norm": 0.8872517347335815, "learning_rate": 7.488296664472668e-05, "loss": 0.3949, "step": 22050 }, { "epoch": 61.27777777777778, "grad_norm": 0.8819825649261475, "learning_rate": 7.485905996024682e-05, "loss": 0.3841, "step": 22060 }, { "epoch": 61.30555555555556, "grad_norm": 0.8568851947784424, "learning_rate": 7.483514572424093e-05, "loss": 0.3925, "step": 22070 }, { "epoch": 61.333333333333336, "grad_norm": 0.8807728886604309, "learning_rate": 7.481122394397349e-05, "loss": 0.4017, "step": 22080 }, { "epoch": 61.361111111111114, "grad_norm": 0.9157502055168152, "learning_rate": 7.478729462671131e-05, "loss": 0.406, "step": 22090 }, { "epoch": 61.388888888888886, "grad_norm": 0.8473281264305115, "learning_rate": 7.47633577797235e-05, "loss": 0.3977, "step": 22100 }, { "epoch": 61.416666666666664, "grad_norm": 0.9473898410797119, "learning_rate": 7.473941341028144e-05, "loss": 0.4125, "step": 22110 }, { "epoch": 61.44444444444444, "grad_norm": 0.7982479333877563, "learning_rate": 7.471546152565879e-05, "loss": 0.3868, "step": 22120 }, { "epoch": 61.47222222222222, "grad_norm": 0.7827219367027283, "learning_rate": 7.46915021331315e-05, "loss": 0.3903, "step": 22130 }, { "epoch": 61.5, "grad_norm": 0.824442982673645, "learning_rate": 7.466753523997778e-05, "loss": 0.3967, "step": 22140 }, { "epoch": 61.52777777777778, "grad_norm": 0.8271493315696716, "learning_rate": 7.464356085347819e-05, "loss": 0.4027, "step": 22150 }, { "epoch": 61.55555555555556, "grad_norm": 0.8284518718719482, "learning_rate": 7.461957898091548e-05, "loss": 0.4036, "step": 22160 }, { "epoch": 61.583333333333336, "grad_norm": 0.8197233080863953, "learning_rate": 7.459558962957473e-05, "loss": 0.3948, "step": 22170 }, { "epoch": 61.611111111111114, "grad_norm": 0.808319091796875, "learning_rate": 7.457159280674326e-05, "loss": 0.3966, "step": 22180 }, { "epoch": 61.638888888888886, "grad_norm": 0.777766764163971, "learning_rate": 7.454758851971066e-05, "loss": 0.3947, "step": 22190 }, { "epoch": 61.666666666666664, "grad_norm": 0.9024808406829834, "learning_rate": 7.45235767757688e-05, "loss": 0.3927, "step": 22200 }, { "epoch": 61.69444444444444, "grad_norm": 0.8176521062850952, "learning_rate": 7.449955758221183e-05, "loss": 0.3902, "step": 22210 }, { "epoch": 61.72222222222222, "grad_norm": 0.832528829574585, "learning_rate": 7.447553094633615e-05, "loss": 0.4174, "step": 22220 }, { "epoch": 61.75, "grad_norm": 0.9110567569732666, "learning_rate": 7.445149687544039e-05, "loss": 0.3965, "step": 22230 }, { "epoch": 61.77777777777778, "grad_norm": 0.9078605771064758, "learning_rate": 7.44274553768255e-05, "loss": 0.4029, "step": 22240 }, { "epoch": 61.80555555555556, "grad_norm": 1.0054519176483154, "learning_rate": 7.440340645779464e-05, "loss": 0.4021, "step": 22250 }, { "epoch": 61.833333333333336, "grad_norm": 0.7932281494140625, "learning_rate": 7.437935012565322e-05, "loss": 0.3973, "step": 22260 }, { "epoch": 61.861111111111114, "grad_norm": 0.8625940680503845, "learning_rate": 7.435528638770893e-05, "loss": 0.4011, "step": 22270 }, { "epoch": 61.888888888888886, "grad_norm": 0.7961739897727966, "learning_rate": 7.433121525127171e-05, "loss": 0.4001, "step": 22280 }, { "epoch": 61.916666666666664, "grad_norm": 0.7978439927101135, "learning_rate": 7.430713672365371e-05, "loss": 0.3909, "step": 22290 }, { "epoch": 61.94444444444444, "grad_norm": 0.9067399501800537, "learning_rate": 7.428305081216938e-05, "loss": 0.397, "step": 22300 }, { "epoch": 61.97222222222222, "grad_norm": 0.9782360196113586, "learning_rate": 7.425895752413536e-05, "loss": 0.3926, "step": 22310 }, { "epoch": 62.0, "grad_norm": 0.9139953255653381, "learning_rate": 7.423485686687057e-05, "loss": 0.409, "step": 22320 }, { "epoch": 62.02777777777778, "grad_norm": 0.8494561910629272, "learning_rate": 7.421074884769616e-05, "loss": 0.4087, "step": 22330 }, { "epoch": 62.05555555555556, "grad_norm": 0.97252357006073, "learning_rate": 7.418663347393548e-05, "loss": 0.4103, "step": 22340 }, { "epoch": 62.083333333333336, "grad_norm": 0.804602861404419, "learning_rate": 7.416251075291418e-05, "loss": 0.407, "step": 22350 }, { "epoch": 62.111111111111114, "grad_norm": 0.9072726964950562, "learning_rate": 7.413838069196007e-05, "loss": 0.3883, "step": 22360 }, { "epoch": 62.138888888888886, "grad_norm": 0.906332790851593, "learning_rate": 7.411424329840324e-05, "loss": 0.4017, "step": 22370 }, { "epoch": 62.166666666666664, "grad_norm": 0.9086737632751465, "learning_rate": 7.409009857957601e-05, "loss": 0.4015, "step": 22380 }, { "epoch": 62.19444444444444, "grad_norm": 0.8510323166847229, "learning_rate": 7.40659465428129e-05, "loss": 0.3984, "step": 22390 }, { "epoch": 62.22222222222222, "grad_norm": 0.9512057304382324, "learning_rate": 7.404178719545063e-05, "loss": 0.4013, "step": 22400 }, { "epoch": 62.25, "grad_norm": 0.8429734706878662, "learning_rate": 7.401762054482822e-05, "loss": 0.4137, "step": 22410 }, { "epoch": 62.27777777777778, "grad_norm": 0.8271015286445618, "learning_rate": 7.39934465982868e-05, "loss": 0.3969, "step": 22420 }, { "epoch": 62.30555555555556, "grad_norm": 0.7811294198036194, "learning_rate": 7.396926536316984e-05, "loss": 0.3934, "step": 22430 }, { "epoch": 62.333333333333336, "grad_norm": 0.8782914280891418, "learning_rate": 7.394507684682293e-05, "loss": 0.3859, "step": 22440 }, { "epoch": 62.361111111111114, "grad_norm": 0.8475577235221863, "learning_rate": 7.392088105659393e-05, "loss": 0.4115, "step": 22450 }, { "epoch": 62.388888888888886, "grad_norm": 0.8046143054962158, "learning_rate": 7.389667799983284e-05, "loss": 0.4013, "step": 22460 }, { "epoch": 62.416666666666664, "grad_norm": 0.9243751168251038, "learning_rate": 7.387246768389193e-05, "loss": 0.3884, "step": 22470 }, { "epoch": 62.44444444444444, "grad_norm": 0.9921064376831055, "learning_rate": 7.384825011612563e-05, "loss": 0.4061, "step": 22480 }, { "epoch": 62.47222222222222, "grad_norm": 0.9193991422653198, "learning_rate": 7.382402530389066e-05, "loss": 0.4112, "step": 22490 }, { "epoch": 62.5, "grad_norm": 0.8948872685432434, "learning_rate": 7.379979325454582e-05, "loss": 0.4069, "step": 22500 }, { "epoch": 62.52777777777778, "grad_norm": 0.8263757824897766, "learning_rate": 7.37755539754522e-05, "loss": 0.3914, "step": 22510 }, { "epoch": 62.55555555555556, "grad_norm": 0.9979539513587952, "learning_rate": 7.375130747397302e-05, "loss": 0.4005, "step": 22520 }, { "epoch": 62.583333333333336, "grad_norm": 0.9329767823219299, "learning_rate": 7.372705375747377e-05, "loss": 0.4001, "step": 22530 }, { "epoch": 62.611111111111114, "grad_norm": 0.7794559597969055, "learning_rate": 7.370279283332205e-05, "loss": 0.387, "step": 22540 }, { "epoch": 62.638888888888886, "grad_norm": 0.8548510074615479, "learning_rate": 7.36785247088877e-05, "loss": 0.3849, "step": 22550 }, { "epoch": 62.666666666666664, "grad_norm": 0.8274439573287964, "learning_rate": 7.365424939154275e-05, "loss": 0.3918, "step": 22560 }, { "epoch": 62.69444444444444, "grad_norm": 0.8697026371955872, "learning_rate": 7.362996688866138e-05, "loss": 0.4031, "step": 22570 }, { "epoch": 62.72222222222222, "grad_norm": 0.9108673334121704, "learning_rate": 7.360567720761999e-05, "loss": 0.3813, "step": 22580 }, { "epoch": 62.75, "grad_norm": 0.9253430962562561, "learning_rate": 7.358138035579711e-05, "loss": 0.3895, "step": 22590 }, { "epoch": 62.77777777777778, "grad_norm": 0.7850391268730164, "learning_rate": 7.355707634057354e-05, "loss": 0.3917, "step": 22600 }, { "epoch": 62.80555555555556, "grad_norm": 0.8288474082946777, "learning_rate": 7.353276516933215e-05, "loss": 0.3902, "step": 22610 }, { "epoch": 62.833333333333336, "grad_norm": 0.8108002543449402, "learning_rate": 7.350844684945806e-05, "loss": 0.3868, "step": 22620 }, { "epoch": 62.861111111111114, "grad_norm": 0.8120394945144653, "learning_rate": 7.348412138833851e-05, "loss": 0.3813, "step": 22630 }, { "epoch": 62.888888888888886, "grad_norm": 0.9302880167961121, "learning_rate": 7.345978879336295e-05, "loss": 0.4075, "step": 22640 }, { "epoch": 62.916666666666664, "grad_norm": 0.8459628820419312, "learning_rate": 7.343544907192296e-05, "loss": 0.3888, "step": 22650 }, { "epoch": 62.94444444444444, "grad_norm": 0.9023866653442383, "learning_rate": 7.341110223141235e-05, "loss": 0.3949, "step": 22660 }, { "epoch": 62.97222222222222, "grad_norm": 0.8229860067367554, "learning_rate": 7.3386748279227e-05, "loss": 0.3959, "step": 22670 }, { "epoch": 63.0, "grad_norm": 0.8355605006217957, "learning_rate": 7.336238722276501e-05, "loss": 0.398, "step": 22680 }, { "epoch": 63.02777777777778, "grad_norm": 0.8296946287155151, "learning_rate": 7.333801906942663e-05, "loss": 0.4127, "step": 22690 }, { "epoch": 63.05555555555556, "grad_norm": 0.9676777124404907, "learning_rate": 7.331364382661428e-05, "loss": 0.4035, "step": 22700 }, { "epoch": 63.083333333333336, "grad_norm": 0.9035135507583618, "learning_rate": 7.328926150173248e-05, "loss": 0.3968, "step": 22710 }, { "epoch": 63.111111111111114, "grad_norm": 0.8889166712760925, "learning_rate": 7.326487210218795e-05, "loss": 0.4043, "step": 22720 }, { "epoch": 63.138888888888886, "grad_norm": 0.7869527339935303, "learning_rate": 7.324047563538955e-05, "loss": 0.402, "step": 22730 }, { "epoch": 63.166666666666664, "grad_norm": 0.7815321087837219, "learning_rate": 7.321607210874828e-05, "loss": 0.4021, "step": 22740 }, { "epoch": 63.19444444444444, "grad_norm": 0.8425294160842896, "learning_rate": 7.31916615296773e-05, "loss": 0.4093, "step": 22750 }, { "epoch": 63.22222222222222, "grad_norm": 0.8056963682174683, "learning_rate": 7.316724390559188e-05, "loss": 0.4084, "step": 22760 }, { "epoch": 63.25, "grad_norm": 0.8562192916870117, "learning_rate": 7.314281924390946e-05, "loss": 0.3949, "step": 22770 }, { "epoch": 63.27777777777778, "grad_norm": 0.9605845808982849, "learning_rate": 7.311838755204959e-05, "loss": 0.3957, "step": 22780 }, { "epoch": 63.30555555555556, "grad_norm": 0.8568516969680786, "learning_rate": 7.3093948837434e-05, "loss": 0.4021, "step": 22790 }, { "epoch": 63.333333333333336, "grad_norm": 0.8535754084587097, "learning_rate": 7.306950310748651e-05, "loss": 0.4013, "step": 22800 }, { "epoch": 63.361111111111114, "grad_norm": 0.7879639863967896, "learning_rate": 7.304505036963311e-05, "loss": 0.4072, "step": 22810 }, { "epoch": 63.388888888888886, "grad_norm": 0.8047844767570496, "learning_rate": 7.302059063130186e-05, "loss": 0.3939, "step": 22820 }, { "epoch": 63.416666666666664, "grad_norm": 0.846867024898529, "learning_rate": 7.2996123899923e-05, "loss": 0.3956, "step": 22830 }, { "epoch": 63.44444444444444, "grad_norm": 0.9565554261207581, "learning_rate": 7.297165018292886e-05, "loss": 0.4003, "step": 22840 }, { "epoch": 63.47222222222222, "grad_norm": 0.7686779499053955, "learning_rate": 7.294716948775396e-05, "loss": 0.39, "step": 22850 }, { "epoch": 63.5, "grad_norm": 0.9041710495948792, "learning_rate": 7.292268182183484e-05, "loss": 0.3911, "step": 22860 }, { "epoch": 63.52777777777778, "grad_norm": 0.8167824745178223, "learning_rate": 7.28981871926102e-05, "loss": 0.3833, "step": 22870 }, { "epoch": 63.55555555555556, "grad_norm": 0.825703501701355, "learning_rate": 7.28736856075209e-05, "loss": 0.395, "step": 22880 }, { "epoch": 63.583333333333336, "grad_norm": 0.9493511319160461, "learning_rate": 7.284917707400985e-05, "loss": 0.3994, "step": 22890 }, { "epoch": 63.611111111111114, "grad_norm": 0.8394533395767212, "learning_rate": 7.282466159952212e-05, "loss": 0.3915, "step": 22900 }, { "epoch": 63.638888888888886, "grad_norm": 0.8916902542114258, "learning_rate": 7.280013919150483e-05, "loss": 0.3976, "step": 22910 }, { "epoch": 63.666666666666664, "grad_norm": 0.8532298803329468, "learning_rate": 7.277560985740728e-05, "loss": 0.3954, "step": 22920 }, { "epoch": 63.69444444444444, "grad_norm": 0.9403172731399536, "learning_rate": 7.275107360468079e-05, "loss": 0.3969, "step": 22930 }, { "epoch": 63.72222222222222, "grad_norm": 0.7488793730735779, "learning_rate": 7.272653044077885e-05, "loss": 0.3871, "step": 22940 }, { "epoch": 63.75, "grad_norm": 0.9795345664024353, "learning_rate": 7.270198037315703e-05, "loss": 0.398, "step": 22950 }, { "epoch": 63.77777777777778, "grad_norm": 0.8884519934654236, "learning_rate": 7.267742340927297e-05, "loss": 0.3895, "step": 22960 }, { "epoch": 63.80555555555556, "grad_norm": 0.8206329941749573, "learning_rate": 7.265285955658645e-05, "loss": 0.3929, "step": 22970 }, { "epoch": 63.833333333333336, "grad_norm": 0.9733875393867493, "learning_rate": 7.26282888225593e-05, "loss": 0.4017, "step": 22980 }, { "epoch": 63.861111111111114, "grad_norm": 0.7732780575752258, "learning_rate": 7.260371121465548e-05, "loss": 0.3877, "step": 22990 }, { "epoch": 63.888888888888886, "grad_norm": 0.9261922240257263, "learning_rate": 7.2579126740341e-05, "loss": 0.3985, "step": 23000 }, { "epoch": 63.916666666666664, "grad_norm": 0.7983617186546326, "learning_rate": 7.2554535407084e-05, "loss": 0.4042, "step": 23010 }, { "epoch": 63.94444444444444, "grad_norm": 0.8351189494132996, "learning_rate": 7.252993722235464e-05, "loss": 0.4054, "step": 23020 }, { "epoch": 63.97222222222222, "grad_norm": 0.8219130635261536, "learning_rate": 7.250533219362523e-05, "loss": 0.4028, "step": 23030 }, { "epoch": 64.0, "grad_norm": 0.9081054329872131, "learning_rate": 7.248072032837012e-05, "loss": 0.3965, "step": 23040 }, { "epoch": 64.02777777777777, "grad_norm": 0.8695220351219177, "learning_rate": 7.245610163406575e-05, "loss": 0.4139, "step": 23050 }, { "epoch": 64.05555555555556, "grad_norm": 0.898693859577179, "learning_rate": 7.243147611819061e-05, "loss": 0.3889, "step": 23060 }, { "epoch": 64.08333333333333, "grad_norm": 0.9255906939506531, "learning_rate": 7.240684378822531e-05, "loss": 0.4028, "step": 23070 }, { "epoch": 64.11111111111111, "grad_norm": 0.9436472058296204, "learning_rate": 7.238220465165248e-05, "loss": 0.4017, "step": 23080 }, { "epoch": 64.13888888888889, "grad_norm": 0.9698681831359863, "learning_rate": 7.235755871595684e-05, "loss": 0.4006, "step": 23090 }, { "epoch": 64.16666666666667, "grad_norm": 0.8715750575065613, "learning_rate": 7.233290598862517e-05, "loss": 0.4133, "step": 23100 }, { "epoch": 64.19444444444444, "grad_norm": 0.9136176109313965, "learning_rate": 7.230824647714635e-05, "loss": 0.3948, "step": 23110 }, { "epoch": 64.22222222222223, "grad_norm": 0.8758148550987244, "learning_rate": 7.228358018901124e-05, "loss": 0.3952, "step": 23120 }, { "epoch": 64.25, "grad_norm": 0.8637894988059998, "learning_rate": 7.225890713171286e-05, "loss": 0.3823, "step": 23130 }, { "epoch": 64.27777777777777, "grad_norm": 0.8304360508918762, "learning_rate": 7.223422731274618e-05, "loss": 0.39, "step": 23140 }, { "epoch": 64.30555555555556, "grad_norm": 0.8772447109222412, "learning_rate": 7.220954073960832e-05, "loss": 0.3911, "step": 23150 }, { "epoch": 64.33333333333333, "grad_norm": 0.8357976078987122, "learning_rate": 7.218484741979838e-05, "loss": 0.4074, "step": 23160 }, { "epoch": 64.36111111111111, "grad_norm": 0.798031747341156, "learning_rate": 7.216014736081756e-05, "loss": 0.4035, "step": 23170 }, { "epoch": 64.38888888888889, "grad_norm": 0.8123219013214111, "learning_rate": 7.213544057016906e-05, "loss": 0.3828, "step": 23180 }, { "epoch": 64.41666666666667, "grad_norm": 0.795964777469635, "learning_rate": 7.211072705535819e-05, "loss": 0.3825, "step": 23190 }, { "epoch": 64.44444444444444, "grad_norm": 0.8541239500045776, "learning_rate": 7.208600682389224e-05, "loss": 0.4041, "step": 23200 }, { "epoch": 64.47222222222223, "grad_norm": 1.0199569463729858, "learning_rate": 7.206127988328055e-05, "loss": 0.4119, "step": 23210 }, { "epoch": 64.5, "grad_norm": 0.8350604176521301, "learning_rate": 7.203654624103453e-05, "loss": 0.3904, "step": 23220 }, { "epoch": 64.52777777777777, "grad_norm": 0.9108057022094727, "learning_rate": 7.201180590466761e-05, "loss": 0.4177, "step": 23230 }, { "epoch": 64.55555555555556, "grad_norm": 0.9412866830825806, "learning_rate": 7.198705888169523e-05, "loss": 0.3988, "step": 23240 }, { "epoch": 64.58333333333333, "grad_norm": 0.9235355257987976, "learning_rate": 7.196230517963491e-05, "loss": 0.4132, "step": 23250 }, { "epoch": 64.61111111111111, "grad_norm": 0.9135078191757202, "learning_rate": 7.193754480600615e-05, "loss": 0.4059, "step": 23260 }, { "epoch": 64.63888888888889, "grad_norm": 0.9100010395050049, "learning_rate": 7.19127777683305e-05, "loss": 0.399, "step": 23270 }, { "epoch": 64.66666666666667, "grad_norm": 0.7980075478553772, "learning_rate": 7.188800407413156e-05, "loss": 0.3899, "step": 23280 }, { "epoch": 64.69444444444444, "grad_norm": 0.7485405802726746, "learning_rate": 7.186322373093489e-05, "loss": 0.4003, "step": 23290 }, { "epoch": 64.72222222222223, "grad_norm": 0.8286911845207214, "learning_rate": 7.18384367462681e-05, "loss": 0.3896, "step": 23300 }, { "epoch": 64.75, "grad_norm": 0.8159041404724121, "learning_rate": 7.181364312766085e-05, "loss": 0.391, "step": 23310 }, { "epoch": 64.77777777777777, "grad_norm": 0.8508695363998413, "learning_rate": 7.178884288264477e-05, "loss": 0.3975, "step": 23320 }, { "epoch": 64.80555555555556, "grad_norm": 0.8398821949958801, "learning_rate": 7.176403601875353e-05, "loss": 0.3925, "step": 23330 }, { "epoch": 64.83333333333333, "grad_norm": 0.8682308197021484, "learning_rate": 7.173922254352279e-05, "loss": 0.3983, "step": 23340 }, { "epoch": 64.86111111111111, "grad_norm": 0.8466445803642273, "learning_rate": 7.171440246449024e-05, "loss": 0.3988, "step": 23350 }, { "epoch": 64.88888888888889, "grad_norm": 0.7544618248939514, "learning_rate": 7.168957578919555e-05, "loss": 0.3701, "step": 23360 }, { "epoch": 64.91666666666667, "grad_norm": 0.8028872609138489, "learning_rate": 7.16647425251804e-05, "loss": 0.3844, "step": 23370 }, { "epoch": 64.94444444444444, "grad_norm": 0.8298812508583069, "learning_rate": 7.163990267998852e-05, "loss": 0.3955, "step": 23380 }, { "epoch": 64.97222222222223, "grad_norm": 0.8120835423469543, "learning_rate": 7.161505626116556e-05, "loss": 0.4049, "step": 23390 }, { "epoch": 65.0, "grad_norm": 0.7943319082260132, "learning_rate": 7.159020327625923e-05, "loss": 0.3936, "step": 23400 }, { "epoch": 65.02777777777777, "grad_norm": 0.8191510438919067, "learning_rate": 7.15653437328192e-05, "loss": 0.3866, "step": 23410 }, { "epoch": 65.05555555555556, "grad_norm": 0.7912892699241638, "learning_rate": 7.154047763839713e-05, "loss": 0.3988, "step": 23420 }, { "epoch": 65.08333333333333, "grad_norm": 0.8320272564888, "learning_rate": 7.15156050005467e-05, "loss": 0.3879, "step": 23430 }, { "epoch": 65.11111111111111, "grad_norm": 0.8763775825500488, "learning_rate": 7.149072582682357e-05, "loss": 0.3974, "step": 23440 }, { "epoch": 65.13888888888889, "grad_norm": 0.7668279409408569, "learning_rate": 7.146584012478535e-05, "loss": 0.3862, "step": 23450 }, { "epoch": 65.16666666666667, "grad_norm": 0.8578150868415833, "learning_rate": 7.144094790199169e-05, "loss": 0.3932, "step": 23460 }, { "epoch": 65.19444444444444, "grad_norm": 0.8459358215332031, "learning_rate": 7.141604916600415e-05, "loss": 0.3954, "step": 23470 }, { "epoch": 65.22222222222223, "grad_norm": 0.8252513408660889, "learning_rate": 7.139114392438635e-05, "loss": 0.4002, "step": 23480 }, { "epoch": 65.25, "grad_norm": 0.8356480598449707, "learning_rate": 7.136623218470382e-05, "loss": 0.3916, "step": 23490 }, { "epoch": 65.27777777777777, "grad_norm": 0.826714277267456, "learning_rate": 7.13413139545241e-05, "loss": 0.3934, "step": 23500 }, { "epoch": 65.30555555555556, "grad_norm": 0.9037787318229675, "learning_rate": 7.131638924141668e-05, "loss": 0.3851, "step": 23510 }, { "epoch": 65.33333333333333, "grad_norm": 0.9085341691970825, "learning_rate": 7.129145805295304e-05, "loss": 0.4065, "step": 23520 }, { "epoch": 65.36111111111111, "grad_norm": 0.8033353686332703, "learning_rate": 7.126652039670661e-05, "loss": 0.386, "step": 23530 }, { "epoch": 65.38888888888889, "grad_norm": 0.8796432018280029, "learning_rate": 7.124157628025278e-05, "loss": 0.3925, "step": 23540 }, { "epoch": 65.41666666666667, "grad_norm": 0.8837464451789856, "learning_rate": 7.121662571116894e-05, "loss": 0.3782, "step": 23550 }, { "epoch": 65.44444444444444, "grad_norm": 0.8085775375366211, "learning_rate": 7.119166869703441e-05, "loss": 0.3873, "step": 23560 }, { "epoch": 65.47222222222223, "grad_norm": 0.9086360931396484, "learning_rate": 7.116670524543044e-05, "loss": 0.3963, "step": 23570 }, { "epoch": 65.5, "grad_norm": 0.8327445983886719, "learning_rate": 7.114173536394032e-05, "loss": 0.3913, "step": 23580 }, { "epoch": 65.52777777777777, "grad_norm": 0.9294596314430237, "learning_rate": 7.111675906014917e-05, "loss": 0.405, "step": 23590 }, { "epoch": 65.55555555555556, "grad_norm": 0.9827059507369995, "learning_rate": 7.109177634164421e-05, "loss": 0.4074, "step": 23600 }, { "epoch": 65.58333333333333, "grad_norm": 0.910786509513855, "learning_rate": 7.106678721601449e-05, "loss": 0.4019, "step": 23610 }, { "epoch": 65.61111111111111, "grad_norm": 0.8500734567642212, "learning_rate": 7.104179169085103e-05, "loss": 0.3954, "step": 23620 }, { "epoch": 65.63888888888889, "grad_norm": 0.7781490683555603, "learning_rate": 7.101678977374683e-05, "loss": 0.3938, "step": 23630 }, { "epoch": 65.66666666666667, "grad_norm": 0.9613387584686279, "learning_rate": 7.099178147229685e-05, "loss": 0.4036, "step": 23640 }, { "epoch": 65.69444444444444, "grad_norm": 0.8409623503684998, "learning_rate": 7.096676679409789e-05, "loss": 0.3977, "step": 23650 }, { "epoch": 65.72222222222223, "grad_norm": 0.7786349058151245, "learning_rate": 7.094174574674877e-05, "loss": 0.3919, "step": 23660 }, { "epoch": 65.75, "grad_norm": 0.8152868747711182, "learning_rate": 7.091671833785025e-05, "loss": 0.4026, "step": 23670 }, { "epoch": 65.77777777777777, "grad_norm": 1.1007286310195923, "learning_rate": 7.089168457500493e-05, "loss": 0.3944, "step": 23680 }, { "epoch": 65.80555555555556, "grad_norm": 0.8798183798789978, "learning_rate": 7.086664446581747e-05, "loss": 0.4127, "step": 23690 }, { "epoch": 65.83333333333333, "grad_norm": 0.9202383756637573, "learning_rate": 7.084159801789438e-05, "loss": 0.3845, "step": 23700 }, { "epoch": 65.86111111111111, "grad_norm": 0.7509032487869263, "learning_rate": 7.081654523884411e-05, "loss": 0.385, "step": 23710 }, { "epoch": 65.88888888888889, "grad_norm": 0.8100054860115051, "learning_rate": 7.0791486136277e-05, "loss": 0.396, "step": 23720 }, { "epoch": 65.91666666666667, "grad_norm": 0.7879800200462341, "learning_rate": 7.07664207178054e-05, "loss": 0.401, "step": 23730 }, { "epoch": 65.94444444444444, "grad_norm": 0.7657002210617065, "learning_rate": 7.074134899104345e-05, "loss": 0.3915, "step": 23740 }, { "epoch": 65.97222222222223, "grad_norm": 0.8946869373321533, "learning_rate": 7.071627096360735e-05, "loss": 0.3935, "step": 23750 }, { "epoch": 66.0, "grad_norm": 0.8677574992179871, "learning_rate": 7.069118664311511e-05, "loss": 0.3938, "step": 23760 }, { "epoch": 66.02777777777777, "grad_norm": 0.8319178223609924, "learning_rate": 7.06660960371867e-05, "loss": 0.3816, "step": 23770 }, { "epoch": 66.05555555555556, "grad_norm": 0.8183095455169678, "learning_rate": 7.064099915344396e-05, "loss": 0.3942, "step": 23780 }, { "epoch": 66.08333333333333, "grad_norm": 1.0256813764572144, "learning_rate": 7.061589599951066e-05, "loss": 0.4027, "step": 23790 }, { "epoch": 66.11111111111111, "grad_norm": 0.8382439017295837, "learning_rate": 7.05907865830125e-05, "loss": 0.3898, "step": 23800 }, { "epoch": 66.13888888888889, "grad_norm": 0.8996408581733704, "learning_rate": 7.056567091157703e-05, "loss": 0.3987, "step": 23810 }, { "epoch": 66.16666666666667, "grad_norm": 0.8379705548286438, "learning_rate": 7.054054899283375e-05, "loss": 0.3929, "step": 23820 }, { "epoch": 66.19444444444444, "grad_norm": 0.7782262563705444, "learning_rate": 7.051542083441403e-05, "loss": 0.3997, "step": 23830 }, { "epoch": 66.22222222222223, "grad_norm": 0.7667084336280823, "learning_rate": 7.049028644395113e-05, "loss": 0.3865, "step": 23840 }, { "epoch": 66.25, "grad_norm": 0.8524795770645142, "learning_rate": 7.046514582908024e-05, "loss": 0.4163, "step": 23850 }, { "epoch": 66.27777777777777, "grad_norm": 0.8519012331962585, "learning_rate": 7.043999899743838e-05, "loss": 0.404, "step": 23860 }, { "epoch": 66.30555555555556, "grad_norm": 0.7922749519348145, "learning_rate": 7.041484595666451e-05, "loss": 0.3921, "step": 23870 }, { "epoch": 66.33333333333333, "grad_norm": 0.8642224073410034, "learning_rate": 7.038968671439948e-05, "loss": 0.4026, "step": 23880 }, { "epoch": 66.36111111111111, "grad_norm": 0.8194354772567749, "learning_rate": 7.036452127828596e-05, "loss": 0.3921, "step": 23890 }, { "epoch": 66.38888888888889, "grad_norm": 0.7959164977073669, "learning_rate": 7.033934965596859e-05, "loss": 0.3879, "step": 23900 }, { "epoch": 66.41666666666667, "grad_norm": 0.8582066893577576, "learning_rate": 7.031417185509381e-05, "loss": 0.3983, "step": 23910 }, { "epoch": 66.44444444444444, "grad_norm": 0.9152886867523193, "learning_rate": 7.028898788331e-05, "loss": 0.4044, "step": 23920 }, { "epoch": 66.47222222222223, "grad_norm": 0.9277646541595459, "learning_rate": 7.026379774826736e-05, "loss": 0.3984, "step": 23930 }, { "epoch": 66.5, "grad_norm": 0.8627841472625732, "learning_rate": 7.0238601457618e-05, "loss": 0.3937, "step": 23940 }, { "epoch": 66.52777777777777, "grad_norm": 0.9159237742424011, "learning_rate": 7.02133990190159e-05, "loss": 0.4003, "step": 23950 }, { "epoch": 66.55555555555556, "grad_norm": 0.7756644487380981, "learning_rate": 7.018819044011687e-05, "loss": 0.3868, "step": 23960 }, { "epoch": 66.58333333333333, "grad_norm": 0.840053141117096, "learning_rate": 7.016297572857863e-05, "loss": 0.3825, "step": 23970 }, { "epoch": 66.61111111111111, "grad_norm": 0.7714797258377075, "learning_rate": 7.013775489206072e-05, "loss": 0.3877, "step": 23980 }, { "epoch": 66.63888888888889, "grad_norm": 0.8401614427566528, "learning_rate": 7.01125279382246e-05, "loss": 0.4014, "step": 23990 }, { "epoch": 66.66666666666667, "grad_norm": 0.837662398815155, "learning_rate": 7.008729487473351e-05, "loss": 0.3975, "step": 24000 }, { "epoch": 66.69444444444444, "grad_norm": 0.8576868772506714, "learning_rate": 7.006205570925263e-05, "loss": 0.3959, "step": 24010 }, { "epoch": 66.72222222222223, "grad_norm": 0.8367955684661865, "learning_rate": 7.003681044944892e-05, "loss": 0.397, "step": 24020 }, { "epoch": 66.75, "grad_norm": 0.8955130577087402, "learning_rate": 7.001155910299126e-05, "loss": 0.3794, "step": 24030 }, { "epoch": 66.77777777777777, "grad_norm": 0.8776747584342957, "learning_rate": 6.99863016775503e-05, "loss": 0.3869, "step": 24040 }, { "epoch": 66.80555555555556, "grad_norm": 0.7578858137130737, "learning_rate": 6.996103818079859e-05, "loss": 0.3929, "step": 24050 }, { "epoch": 66.83333333333333, "grad_norm": 0.8594916462898254, "learning_rate": 6.993576862041054e-05, "loss": 0.3903, "step": 24060 }, { "epoch": 66.86111111111111, "grad_norm": 0.8113401532173157, "learning_rate": 6.991049300406235e-05, "loss": 0.3924, "step": 24070 }, { "epoch": 66.88888888888889, "grad_norm": 0.816952109336853, "learning_rate": 6.988521133943209e-05, "loss": 0.3977, "step": 24080 }, { "epoch": 66.91666666666667, "grad_norm": 0.8556949496269226, "learning_rate": 6.985992363419966e-05, "loss": 0.4095, "step": 24090 }, { "epoch": 66.94444444444444, "grad_norm": 0.8407782912254333, "learning_rate": 6.983462989604682e-05, "loss": 0.3862, "step": 24100 }, { "epoch": 66.97222222222223, "grad_norm": 0.8673591017723083, "learning_rate": 6.980933013265709e-05, "loss": 0.4007, "step": 24110 }, { "epoch": 67.0, "grad_norm": 0.8677119016647339, "learning_rate": 6.978402435171592e-05, "loss": 0.3994, "step": 24120 }, { "epoch": 67.02777777777777, "grad_norm": 0.8772804737091064, "learning_rate": 6.975871256091052e-05, "loss": 0.3861, "step": 24130 }, { "epoch": 67.05555555555556, "grad_norm": 0.8100700974464417, "learning_rate": 6.973339476792995e-05, "loss": 0.3846, "step": 24140 }, { "epoch": 67.08333333333333, "grad_norm": 0.8092573881149292, "learning_rate": 6.970807098046505e-05, "loss": 0.384, "step": 24150 }, { "epoch": 67.11111111111111, "grad_norm": 0.7694796919822693, "learning_rate": 6.968274120620858e-05, "loss": 0.3823, "step": 24160 }, { "epoch": 67.13888888888889, "grad_norm": 0.8044880628585815, "learning_rate": 6.965740545285499e-05, "loss": 0.3865, "step": 24170 }, { "epoch": 67.16666666666667, "grad_norm": 0.785912811756134, "learning_rate": 6.963206372810068e-05, "loss": 0.3898, "step": 24180 }, { "epoch": 67.19444444444444, "grad_norm": 0.861036479473114, "learning_rate": 6.960671603964375e-05, "loss": 0.4033, "step": 24190 }, { "epoch": 67.22222222222223, "grad_norm": 0.8116509318351746, "learning_rate": 6.958136239518418e-05, "loss": 0.3894, "step": 24200 }, { "epoch": 67.25, "grad_norm": 0.7676807641983032, "learning_rate": 6.955600280242371e-05, "loss": 0.39, "step": 24210 }, { "epoch": 67.27777777777777, "grad_norm": 0.8142495155334473, "learning_rate": 6.953063726906596e-05, "loss": 0.3972, "step": 24220 }, { "epoch": 67.30555555555556, "grad_norm": 0.9176709055900574, "learning_rate": 6.950526580281626e-05, "loss": 0.3958, "step": 24230 }, { "epoch": 67.33333333333333, "grad_norm": 0.9225029945373535, "learning_rate": 6.947988841138184e-05, "loss": 0.395, "step": 24240 }, { "epoch": 67.36111111111111, "grad_norm": 0.8623054623603821, "learning_rate": 6.945450510247165e-05, "loss": 0.3872, "step": 24250 }, { "epoch": 67.38888888888889, "grad_norm": 0.8245826363563538, "learning_rate": 6.942911588379647e-05, "loss": 0.3969, "step": 24260 }, { "epoch": 67.41666666666667, "grad_norm": 1.0362855195999146, "learning_rate": 6.940372076306888e-05, "loss": 0.3922, "step": 24270 }, { "epoch": 67.44444444444444, "grad_norm": 0.8395399451255798, "learning_rate": 6.937831974800326e-05, "loss": 0.3847, "step": 24280 }, { "epoch": 67.47222222222223, "grad_norm": 0.8151178956031799, "learning_rate": 6.935291284631574e-05, "loss": 0.3909, "step": 24290 }, { "epoch": 67.5, "grad_norm": 0.8720090985298157, "learning_rate": 6.932750006572428e-05, "loss": 0.3999, "step": 24300 }, { "epoch": 67.52777777777777, "grad_norm": 0.7605141997337341, "learning_rate": 6.930208141394863e-05, "loss": 0.3903, "step": 24310 }, { "epoch": 67.55555555555556, "grad_norm": 0.7830259799957275, "learning_rate": 6.927665689871026e-05, "loss": 0.4031, "step": 24320 }, { "epoch": 67.58333333333333, "grad_norm": 0.7923908829689026, "learning_rate": 6.925122652773253e-05, "loss": 0.3851, "step": 24330 }, { "epoch": 67.61111111111111, "grad_norm": 0.8151420950889587, "learning_rate": 6.922579030874046e-05, "loss": 0.3867, "step": 24340 }, { "epoch": 67.63888888888889, "grad_norm": 0.8267762064933777, "learning_rate": 6.920034824946093e-05, "loss": 0.3942, "step": 24350 }, { "epoch": 67.66666666666667, "grad_norm": 1.0306706428527832, "learning_rate": 6.917490035762255e-05, "loss": 0.3958, "step": 24360 }, { "epoch": 67.69444444444444, "grad_norm": 0.8678660988807678, "learning_rate": 6.914944664095573e-05, "loss": 0.382, "step": 24370 }, { "epoch": 67.72222222222223, "grad_norm": 0.9921072721481323, "learning_rate": 6.912398710719264e-05, "loss": 0.3853, "step": 24380 }, { "epoch": 67.75, "grad_norm": 0.7950336933135986, "learning_rate": 6.90985217640672e-05, "loss": 0.4037, "step": 24390 }, { "epoch": 67.77777777777777, "grad_norm": 0.8051491975784302, "learning_rate": 6.90730506193151e-05, "loss": 0.3922, "step": 24400 }, { "epoch": 67.80555555555556, "grad_norm": 0.8988342881202698, "learning_rate": 6.904757368067384e-05, "loss": 0.3964, "step": 24410 }, { "epoch": 67.83333333333333, "grad_norm": 0.9481894969940186, "learning_rate": 6.90220909558826e-05, "loss": 0.4188, "step": 24420 }, { "epoch": 67.86111111111111, "grad_norm": 0.9220780730247498, "learning_rate": 6.899660245268237e-05, "loss": 0.3887, "step": 24430 }, { "epoch": 67.88888888888889, "grad_norm": 0.8599420189857483, "learning_rate": 6.897110817881592e-05, "loss": 0.3785, "step": 24440 }, { "epoch": 67.91666666666667, "grad_norm": 0.9558334350585938, "learning_rate": 6.894560814202769e-05, "loss": 0.3998, "step": 24450 }, { "epoch": 67.94444444444444, "grad_norm": 0.7356024384498596, "learning_rate": 6.892010235006394e-05, "loss": 0.3895, "step": 24460 }, { "epoch": 67.97222222222223, "grad_norm": 0.7758776545524597, "learning_rate": 6.889459081067264e-05, "loss": 0.4044, "step": 24470 }, { "epoch": 68.0, "grad_norm": 0.7989199161529541, "learning_rate": 6.886907353160356e-05, "loss": 0.3861, "step": 24480 }, { "epoch": 68.02777777777777, "grad_norm": 0.7369942665100098, "learning_rate": 6.884355052060814e-05, "loss": 0.3877, "step": 24490 }, { "epoch": 68.05555555555556, "grad_norm": 0.7261472940444946, "learning_rate": 6.88180217854396e-05, "loss": 0.374, "step": 24500 }, { "epoch": 68.08333333333333, "grad_norm": 0.7997725605964661, "learning_rate": 6.87924873338529e-05, "loss": 0.3775, "step": 24510 }, { "epoch": 68.11111111111111, "grad_norm": 0.7673945426940918, "learning_rate": 6.876694717360475e-05, "loss": 0.393, "step": 24520 }, { "epoch": 68.13888888888889, "grad_norm": 0.9699840545654297, "learning_rate": 6.874140131245355e-05, "loss": 0.415, "step": 24530 }, { "epoch": 68.16666666666667, "grad_norm": 0.9817547798156738, "learning_rate": 6.871584975815948e-05, "loss": 0.3966, "step": 24540 }, { "epoch": 68.19444444444444, "grad_norm": 0.8667738437652588, "learning_rate": 6.86902925184844e-05, "loss": 0.4014, "step": 24550 }, { "epoch": 68.22222222222223, "grad_norm": 0.8210436105728149, "learning_rate": 6.866472960119195e-05, "loss": 0.3837, "step": 24560 }, { "epoch": 68.25, "grad_norm": 0.8093900084495544, "learning_rate": 6.863916101404748e-05, "loss": 0.3882, "step": 24570 }, { "epoch": 68.27777777777777, "grad_norm": 0.7710138559341431, "learning_rate": 6.8613586764818e-05, "loss": 0.374, "step": 24580 }, { "epoch": 68.30555555555556, "grad_norm": 0.9629856944084167, "learning_rate": 6.858800686127233e-05, "loss": 0.4107, "step": 24590 }, { "epoch": 68.33333333333333, "grad_norm": 0.9160496592521667, "learning_rate": 6.856242131118097e-05, "loss": 0.3933, "step": 24600 }, { "epoch": 68.36111111111111, "grad_norm": 0.8321443796157837, "learning_rate": 6.853683012231614e-05, "loss": 0.3931, "step": 24610 }, { "epoch": 68.38888888888889, "grad_norm": 0.9828425049781799, "learning_rate": 6.851123330245173e-05, "loss": 0.4002, "step": 24620 }, { "epoch": 68.41666666666667, "grad_norm": 0.7996378540992737, "learning_rate": 6.848563085936343e-05, "loss": 0.3968, "step": 24630 }, { "epoch": 68.44444444444444, "grad_norm": 0.7837101221084595, "learning_rate": 6.846002280082853e-05, "loss": 0.374, "step": 24640 }, { "epoch": 68.47222222222223, "grad_norm": 0.7571370601654053, "learning_rate": 6.843440913462614e-05, "loss": 0.3863, "step": 24650 }, { "epoch": 68.5, "grad_norm": 0.8252092599868774, "learning_rate": 6.840878986853698e-05, "loss": 0.39, "step": 24660 }, { "epoch": 68.52777777777777, "grad_norm": 0.7675164937973022, "learning_rate": 6.838316501034352e-05, "loss": 0.3908, "step": 24670 }, { "epoch": 68.55555555555556, "grad_norm": 0.7596427798271179, "learning_rate": 6.83575345678299e-05, "loss": 0.3865, "step": 24680 }, { "epoch": 68.58333333333333, "grad_norm": 0.9289238452911377, "learning_rate": 6.833189854878196e-05, "loss": 0.4087, "step": 24690 }, { "epoch": 68.61111111111111, "grad_norm": 0.8499657511711121, "learning_rate": 6.83062569609873e-05, "loss": 0.3932, "step": 24700 }, { "epoch": 68.63888888888889, "grad_norm": 0.8900989294052124, "learning_rate": 6.828060981223512e-05, "loss": 0.3763, "step": 24710 }, { "epoch": 68.66666666666667, "grad_norm": 0.8171483874320984, "learning_rate": 6.825495711031634e-05, "loss": 0.3955, "step": 24720 }, { "epoch": 68.69444444444444, "grad_norm": 0.7413240671157837, "learning_rate": 6.822929886302359e-05, "loss": 0.3897, "step": 24730 }, { "epoch": 68.72222222222223, "grad_norm": 0.8287396430969238, "learning_rate": 6.820363507815116e-05, "loss": 0.3826, "step": 24740 }, { "epoch": 68.75, "grad_norm": 0.859183132648468, "learning_rate": 6.817796576349501e-05, "loss": 0.3883, "step": 24750 }, { "epoch": 68.77777777777777, "grad_norm": 0.7868667840957642, "learning_rate": 6.815229092685285e-05, "loss": 0.3698, "step": 24760 }, { "epoch": 68.80555555555556, "grad_norm": 0.8772388100624084, "learning_rate": 6.812661057602399e-05, "loss": 0.3808, "step": 24770 }, { "epoch": 68.83333333333333, "grad_norm": 0.7359911203384399, "learning_rate": 6.810092471880943e-05, "loss": 0.3902, "step": 24780 }, { "epoch": 68.86111111111111, "grad_norm": 0.8440349698066711, "learning_rate": 6.807523336301187e-05, "loss": 0.3997, "step": 24790 }, { "epoch": 68.88888888888889, "grad_norm": 0.7922499775886536, "learning_rate": 6.804953651643566e-05, "loss": 0.3913, "step": 24800 }, { "epoch": 68.91666666666667, "grad_norm": 0.7851448655128479, "learning_rate": 6.802383418688685e-05, "loss": 0.3793, "step": 24810 }, { "epoch": 68.94444444444444, "grad_norm": 0.7608336806297302, "learning_rate": 6.799812638217309e-05, "loss": 0.3788, "step": 24820 }, { "epoch": 68.97222222222223, "grad_norm": 0.7723075747489929, "learning_rate": 6.797241311010373e-05, "loss": 0.3844, "step": 24830 }, { "epoch": 69.0, "grad_norm": 0.9090803861618042, "learning_rate": 6.794669437848982e-05, "loss": 0.392, "step": 24840 }, { "epoch": 69.02777777777777, "grad_norm": 0.9109652042388916, "learning_rate": 6.792097019514402e-05, "loss": 0.3983, "step": 24850 }, { "epoch": 69.05555555555556, "grad_norm": 0.7598544359207153, "learning_rate": 6.789524056788064e-05, "loss": 0.3973, "step": 24860 }, { "epoch": 69.08333333333333, "grad_norm": 0.7109713554382324, "learning_rate": 6.786950550451567e-05, "loss": 0.3772, "step": 24870 }, { "epoch": 69.11111111111111, "grad_norm": 0.7326560616493225, "learning_rate": 6.784376501286676e-05, "loss": 0.3858, "step": 24880 }, { "epoch": 69.13888888888889, "grad_norm": 0.864432156085968, "learning_rate": 6.781801910075316e-05, "loss": 0.3953, "step": 24890 }, { "epoch": 69.16666666666667, "grad_norm": 0.8207216262817383, "learning_rate": 6.779226777599581e-05, "loss": 0.3814, "step": 24900 }, { "epoch": 69.19444444444444, "grad_norm": 0.7526924014091492, "learning_rate": 6.776651104641729e-05, "loss": 0.38, "step": 24910 }, { "epoch": 69.22222222222223, "grad_norm": 0.7732716202735901, "learning_rate": 6.774074891984183e-05, "loss": 0.3787, "step": 24920 }, { "epoch": 69.25, "grad_norm": 0.9075376391410828, "learning_rate": 6.771498140409526e-05, "loss": 0.3946, "step": 24930 }, { "epoch": 69.27777777777777, "grad_norm": 0.9069038033485413, "learning_rate": 6.768920850700506e-05, "loss": 0.4042, "step": 24940 }, { "epoch": 69.30555555555556, "grad_norm": 0.875665009021759, "learning_rate": 6.766343023640039e-05, "loss": 0.3889, "step": 24950 }, { "epoch": 69.33333333333333, "grad_norm": 0.8648170828819275, "learning_rate": 6.763764660011198e-05, "loss": 0.3829, "step": 24960 }, { "epoch": 69.36111111111111, "grad_norm": 0.9063073396682739, "learning_rate": 6.761185760597223e-05, "loss": 0.3982, "step": 24970 }, { "epoch": 69.38888888888889, "grad_norm": 0.8312014937400818, "learning_rate": 6.758606326181515e-05, "loss": 0.3984, "step": 24980 }, { "epoch": 69.41666666666667, "grad_norm": 0.8291789293289185, "learning_rate": 6.75602635754764e-05, "loss": 0.3951, "step": 24990 }, { "epoch": 69.44444444444444, "grad_norm": 0.9121516942977905, "learning_rate": 6.75344585547932e-05, "loss": 0.385, "step": 25000 }, { "epoch": 69.47222222222223, "grad_norm": 0.83965003490448, "learning_rate": 6.750864820760449e-05, "loss": 0.3952, "step": 25010 }, { "epoch": 69.5, "grad_norm": 0.8274394869804382, "learning_rate": 6.748283254175072e-05, "loss": 0.3895, "step": 25020 }, { "epoch": 69.52777777777777, "grad_norm": 0.8841075897216797, "learning_rate": 6.745701156507404e-05, "loss": 0.3861, "step": 25030 }, { "epoch": 69.55555555555556, "grad_norm": 0.7937182188034058, "learning_rate": 6.743118528541818e-05, "loss": 0.3824, "step": 25040 }, { "epoch": 69.58333333333333, "grad_norm": 0.7310532331466675, "learning_rate": 6.740535371062846e-05, "loss": 0.3839, "step": 25050 }, { "epoch": 69.61111111111111, "grad_norm": 0.9290516972541809, "learning_rate": 6.737951684855185e-05, "loss": 0.3935, "step": 25060 }, { "epoch": 69.63888888888889, "grad_norm": 0.8135287761688232, "learning_rate": 6.735367470703691e-05, "loss": 0.3806, "step": 25070 }, { "epoch": 69.66666666666667, "grad_norm": 0.7741101384162903, "learning_rate": 6.732782729393379e-05, "loss": 0.3767, "step": 25080 }, { "epoch": 69.69444444444444, "grad_norm": 0.8442894220352173, "learning_rate": 6.730197461709425e-05, "loss": 0.3839, "step": 25090 }, { "epoch": 69.72222222222223, "grad_norm": 0.9043867588043213, "learning_rate": 6.727611668437164e-05, "loss": 0.3895, "step": 25100 }, { "epoch": 69.75, "grad_norm": 7.398512840270996, "learning_rate": 6.725025350362094e-05, "loss": 0.4101, "step": 25110 }, { "epoch": 69.77777777777777, "grad_norm": 0.9239524006843567, "learning_rate": 6.72243850826987e-05, "loss": 0.3969, "step": 25120 }, { "epoch": 69.80555555555556, "grad_norm": 0.8983470797538757, "learning_rate": 6.719851142946305e-05, "loss": 0.3832, "step": 25130 }, { "epoch": 69.83333333333333, "grad_norm": 0.8021544218063354, "learning_rate": 6.717263255177372e-05, "loss": 0.3967, "step": 25140 }, { "epoch": 69.86111111111111, "grad_norm": 0.8929539322853088, "learning_rate": 6.714674845749205e-05, "loss": 0.4059, "step": 25150 }, { "epoch": 69.88888888888889, "grad_norm": 0.8382063508033752, "learning_rate": 6.712085915448092e-05, "loss": 0.3903, "step": 25160 }, { "epoch": 69.91666666666667, "grad_norm": 0.7762001752853394, "learning_rate": 6.709496465060486e-05, "loss": 0.3892, "step": 25170 }, { "epoch": 69.94444444444444, "grad_norm": 0.8711280226707458, "learning_rate": 6.706906495372987e-05, "loss": 0.3879, "step": 25180 }, { "epoch": 69.97222222222223, "grad_norm": 0.9657962918281555, "learning_rate": 6.704316007172365e-05, "loss": 0.3807, "step": 25190 }, { "epoch": 70.0, "grad_norm": 0.8286260366439819, "learning_rate": 6.701725001245539e-05, "loss": 0.3898, "step": 25200 }, { "epoch": 70.02777777777777, "grad_norm": 0.7943068146705627, "learning_rate": 6.699133478379588e-05, "loss": 0.3685, "step": 25210 }, { "epoch": 70.05555555555556, "grad_norm": 0.8414152264595032, "learning_rate": 6.69654143936175e-05, "loss": 0.3854, "step": 25220 }, { "epoch": 70.08333333333333, "grad_norm": 0.8384286165237427, "learning_rate": 6.693948884979419e-05, "loss": 0.3905, "step": 25230 }, { "epoch": 70.11111111111111, "grad_norm": 0.8176618218421936, "learning_rate": 6.691355816020142e-05, "loss": 0.3774, "step": 25240 }, { "epoch": 70.13888888888889, "grad_norm": 0.8584496378898621, "learning_rate": 6.688762233271624e-05, "loss": 0.399, "step": 25250 }, { "epoch": 70.16666666666667, "grad_norm": 0.8920627236366272, "learning_rate": 6.68616813752173e-05, "loss": 0.3826, "step": 25260 }, { "epoch": 70.19444444444444, "grad_norm": 0.9360004663467407, "learning_rate": 6.683573529558477e-05, "loss": 0.3986, "step": 25270 }, { "epoch": 70.22222222222223, "grad_norm": 0.8287175297737122, "learning_rate": 6.680978410170037e-05, "loss": 0.3964, "step": 25280 }, { "epoch": 70.25, "grad_norm": 0.8103188872337341, "learning_rate": 6.678382780144741e-05, "loss": 0.3811, "step": 25290 }, { "epoch": 70.27777777777777, "grad_norm": 0.7513006925582886, "learning_rate": 6.675786640271071e-05, "loss": 0.3843, "step": 25300 }, { "epoch": 70.30555555555556, "grad_norm": 0.7329114675521851, "learning_rate": 6.673189991337665e-05, "loss": 0.3889, "step": 25310 }, { "epoch": 70.33333333333333, "grad_norm": 0.7282760739326477, "learning_rate": 6.670592834133317e-05, "loss": 0.3856, "step": 25320 }, { "epoch": 70.36111111111111, "grad_norm": 0.8344529271125793, "learning_rate": 6.667995169446979e-05, "loss": 0.3831, "step": 25330 }, { "epoch": 70.38888888888889, "grad_norm": 0.7578366994857788, "learning_rate": 6.665396998067747e-05, "loss": 0.3786, "step": 25340 }, { "epoch": 70.41666666666667, "grad_norm": 0.7875884175300598, "learning_rate": 6.66279832078488e-05, "loss": 0.3997, "step": 25350 }, { "epoch": 70.44444444444444, "grad_norm": 0.8390287756919861, "learning_rate": 6.660199138387786e-05, "loss": 0.3752, "step": 25360 }, { "epoch": 70.47222222222223, "grad_norm": 0.8166279196739197, "learning_rate": 6.65759945166603e-05, "loss": 0.4112, "step": 25370 }, { "epoch": 70.5, "grad_norm": 0.9286925196647644, "learning_rate": 6.654999261409326e-05, "loss": 0.3854, "step": 25380 }, { "epoch": 70.52777777777777, "grad_norm": 0.7322827577590942, "learning_rate": 6.652398568407544e-05, "loss": 0.3918, "step": 25390 }, { "epoch": 70.55555555555556, "grad_norm": 0.7443730235099792, "learning_rate": 6.649797373450707e-05, "loss": 0.3748, "step": 25400 }, { "epoch": 70.58333333333333, "grad_norm": 0.8756852149963379, "learning_rate": 6.647195677328988e-05, "loss": 0.3788, "step": 25410 }, { "epoch": 70.61111111111111, "grad_norm": 0.745641827583313, "learning_rate": 6.644593480832712e-05, "loss": 0.3937, "step": 25420 }, { "epoch": 70.63888888888889, "grad_norm": 0.7606772780418396, "learning_rate": 6.641990784752363e-05, "loss": 0.3937, "step": 25430 }, { "epoch": 70.66666666666667, "grad_norm": 0.7894446849822998, "learning_rate": 6.639387589878566e-05, "loss": 0.3912, "step": 25440 }, { "epoch": 70.69444444444444, "grad_norm": 0.7556613087654114, "learning_rate": 6.636783897002103e-05, "loss": 0.4002, "step": 25450 }, { "epoch": 70.72222222222223, "grad_norm": 0.9101612567901611, "learning_rate": 6.63417970691391e-05, "loss": 0.3936, "step": 25460 }, { "epoch": 70.75, "grad_norm": 0.8851401805877686, "learning_rate": 6.63157502040507e-05, "loss": 0.3794, "step": 25470 }, { "epoch": 70.77777777777777, "grad_norm": 0.9335518479347229, "learning_rate": 6.628969838266819e-05, "loss": 0.3807, "step": 25480 }, { "epoch": 70.80555555555556, "grad_norm": 0.8551751375198364, "learning_rate": 6.626364161290541e-05, "loss": 0.3812, "step": 25490 }, { "epoch": 70.83333333333333, "grad_norm": 0.8234642148017883, "learning_rate": 6.623757990267774e-05, "loss": 0.3881, "step": 25500 }, { "epoch": 70.86111111111111, "grad_norm": 0.8500755429267883, "learning_rate": 6.621151325990201e-05, "loss": 0.4067, "step": 25510 }, { "epoch": 70.88888888888889, "grad_norm": 0.8579781651496887, "learning_rate": 6.618544169249657e-05, "loss": 0.3807, "step": 25520 }, { "epoch": 70.91666666666667, "grad_norm": 0.7698135375976562, "learning_rate": 6.615936520838133e-05, "loss": 0.4011, "step": 25530 }, { "epoch": 70.94444444444444, "grad_norm": 0.9166080951690674, "learning_rate": 6.613328381547759e-05, "loss": 0.3891, "step": 25540 }, { "epoch": 70.97222222222223, "grad_norm": 0.8891611695289612, "learning_rate": 6.610719752170821e-05, "loss": 0.3854, "step": 25550 }, { "epoch": 71.0, "grad_norm": 0.7578216195106506, "learning_rate": 6.60811063349975e-05, "loss": 0.4031, "step": 25560 }, { "epoch": 71.02777777777777, "grad_norm": 0.7708112001419067, "learning_rate": 6.605501026327127e-05, "loss": 0.4019, "step": 25570 }, { "epoch": 71.05555555555556, "grad_norm": 0.826268196105957, "learning_rate": 6.602890931445685e-05, "loss": 0.386, "step": 25580 }, { "epoch": 71.08333333333333, "grad_norm": 0.8181393146514893, "learning_rate": 6.6002803496483e-05, "loss": 0.3873, "step": 25590 }, { "epoch": 71.11111111111111, "grad_norm": 0.8405119180679321, "learning_rate": 6.597669281727997e-05, "loss": 0.3934, "step": 25600 }, { "epoch": 71.13888888888889, "grad_norm": 0.8367940187454224, "learning_rate": 6.595057728477949e-05, "loss": 0.3803, "step": 25610 }, { "epoch": 71.16666666666667, "grad_norm": 0.8532776832580566, "learning_rate": 6.59244569069148e-05, "loss": 0.3898, "step": 25620 }, { "epoch": 71.19444444444444, "grad_norm": 0.8700582981109619, "learning_rate": 6.589833169162054e-05, "loss": 0.3945, "step": 25630 }, { "epoch": 71.22222222222223, "grad_norm": 0.7763793468475342, "learning_rate": 6.587220164683291e-05, "loss": 0.3866, "step": 25640 }, { "epoch": 71.25, "grad_norm": 0.704367995262146, "learning_rate": 6.58460667804895e-05, "loss": 0.3784, "step": 25650 }, { "epoch": 71.27777777777777, "grad_norm": 0.8007232546806335, "learning_rate": 6.581992710052938e-05, "loss": 0.3794, "step": 25660 }, { "epoch": 71.30555555555556, "grad_norm": 0.8470344543457031, "learning_rate": 6.579378261489311e-05, "loss": 0.3936, "step": 25670 }, { "epoch": 71.33333333333333, "grad_norm": 0.7889506220817566, "learning_rate": 6.576763333152268e-05, "loss": 0.3822, "step": 25680 }, { "epoch": 71.36111111111111, "grad_norm": 0.7754158973693848, "learning_rate": 6.574147925836159e-05, "loss": 0.3895, "step": 25690 }, { "epoch": 71.38888888888889, "grad_norm": 0.902866005897522, "learning_rate": 6.571532040335472e-05, "loss": 0.4, "step": 25700 }, { "epoch": 71.41666666666667, "grad_norm": 0.9679892063140869, "learning_rate": 6.568915677444845e-05, "loss": 0.3794, "step": 25710 }, { "epoch": 71.44444444444444, "grad_norm": 0.8134178519248962, "learning_rate": 6.56629883795906e-05, "loss": 0.3852, "step": 25720 }, { "epoch": 71.47222222222223, "grad_norm": 0.8517280220985413, "learning_rate": 6.563681522673043e-05, "loss": 0.4055, "step": 25730 }, { "epoch": 71.5, "grad_norm": 0.8152915835380554, "learning_rate": 6.561063732381867e-05, "loss": 0.3934, "step": 25740 }, { "epoch": 71.52777777777777, "grad_norm": 0.8701452016830444, "learning_rate": 6.558445467880745e-05, "loss": 0.3726, "step": 25750 }, { "epoch": 71.55555555555556, "grad_norm": 0.7369703054428101, "learning_rate": 6.55582672996504e-05, "loss": 0.3819, "step": 25760 }, { "epoch": 71.58333333333333, "grad_norm": 0.8016225099563599, "learning_rate": 6.553207519430253e-05, "loss": 0.3731, "step": 25770 }, { "epoch": 71.61111111111111, "grad_norm": 0.961371898651123, "learning_rate": 6.550587837072032e-05, "loss": 0.4133, "step": 25780 }, { "epoch": 71.63888888888889, "grad_norm": 0.9388338923454285, "learning_rate": 6.547967683686166e-05, "loss": 0.3896, "step": 25790 }, { "epoch": 71.66666666666667, "grad_norm": 0.7835066318511963, "learning_rate": 6.545347060068591e-05, "loss": 0.379, "step": 25800 }, { "epoch": 71.69444444444444, "grad_norm": 0.9121574759483337, "learning_rate": 6.542725967015382e-05, "loss": 0.3886, "step": 25810 }, { "epoch": 71.72222222222223, "grad_norm": 0.7865225076675415, "learning_rate": 6.540104405322757e-05, "loss": 0.3724, "step": 25820 }, { "epoch": 71.75, "grad_norm": 0.8227207064628601, "learning_rate": 6.537482375787077e-05, "loss": 0.3869, "step": 25830 }, { "epoch": 71.77777777777777, "grad_norm": 0.7538423538208008, "learning_rate": 6.534859879204845e-05, "loss": 0.3976, "step": 25840 }, { "epoch": 71.80555555555556, "grad_norm": 0.8941071033477783, "learning_rate": 6.532236916372709e-05, "loss": 0.3947, "step": 25850 }, { "epoch": 71.83333333333333, "grad_norm": 0.7469039559364319, "learning_rate": 6.529613488087454e-05, "loss": 0.3926, "step": 25860 }, { "epoch": 71.86111111111111, "grad_norm": 0.8394835591316223, "learning_rate": 6.526989595146009e-05, "loss": 0.3905, "step": 25870 }, { "epoch": 71.88888888888889, "grad_norm": 0.8236760497093201, "learning_rate": 6.524365238345441e-05, "loss": 0.3872, "step": 25880 }, { "epoch": 71.91666666666667, "grad_norm": 0.8268040418624878, "learning_rate": 6.521740418482964e-05, "loss": 0.3888, "step": 25890 }, { "epoch": 71.94444444444444, "grad_norm": 0.8322372436523438, "learning_rate": 6.519115136355925e-05, "loss": 0.3921, "step": 25900 }, { "epoch": 71.97222222222223, "grad_norm": 0.8447415232658386, "learning_rate": 6.51648939276182e-05, "loss": 0.3857, "step": 25910 }, { "epoch": 72.0, "grad_norm": 0.7487422227859497, "learning_rate": 6.513863188498277e-05, "loss": 0.3724, "step": 25920 }, { "epoch": 72.02777777777777, "grad_norm": 0.7606449127197266, "learning_rate": 6.511236524363068e-05, "loss": 0.3777, "step": 25930 }, { "epoch": 72.05555555555556, "grad_norm": 0.7941218614578247, "learning_rate": 6.508609401154104e-05, "loss": 0.3718, "step": 25940 }, { "epoch": 72.08333333333333, "grad_norm": 0.8765971660614014, "learning_rate": 6.505981819669439e-05, "loss": 0.377, "step": 25950 }, { "epoch": 72.11111111111111, "grad_norm": 0.8810222148895264, "learning_rate": 6.503353780707258e-05, "loss": 0.3818, "step": 25960 }, { "epoch": 72.13888888888889, "grad_norm": 0.8740923404693604, "learning_rate": 6.500725285065895e-05, "loss": 0.3946, "step": 25970 }, { "epoch": 72.16666666666667, "grad_norm": 0.8189709782600403, "learning_rate": 6.498096333543813e-05, "loss": 0.3705, "step": 25980 }, { "epoch": 72.19444444444444, "grad_norm": 0.7568565011024475, "learning_rate": 6.49546692693962e-05, "loss": 0.3959, "step": 25990 }, { "epoch": 72.22222222222223, "grad_norm": 0.8561545014381409, "learning_rate": 6.492837066052059e-05, "loss": 0.4024, "step": 26000 }, { "epoch": 72.25, "grad_norm": 0.7905543446540833, "learning_rate": 6.490206751680014e-05, "loss": 0.3755, "step": 26010 }, { "epoch": 72.27777777777777, "grad_norm": 0.8568716645240784, "learning_rate": 6.487575984622505e-05, "loss": 0.39, "step": 26020 }, { "epoch": 72.30555555555556, "grad_norm": 0.8317221403121948, "learning_rate": 6.484944765678689e-05, "loss": 0.4013, "step": 26030 }, { "epoch": 72.33333333333333, "grad_norm": 0.7232362031936646, "learning_rate": 6.482313095647861e-05, "loss": 0.3762, "step": 26040 }, { "epoch": 72.36111111111111, "grad_norm": 0.7311893701553345, "learning_rate": 6.479680975329451e-05, "loss": 0.3696, "step": 26050 }, { "epoch": 72.38888888888889, "grad_norm": 0.8000879883766174, "learning_rate": 6.477048405523031e-05, "loss": 0.3869, "step": 26060 }, { "epoch": 72.41666666666667, "grad_norm": 0.7115476727485657, "learning_rate": 6.474415387028304e-05, "loss": 0.3692, "step": 26070 }, { "epoch": 72.44444444444444, "grad_norm": 0.7836112976074219, "learning_rate": 6.471781920645114e-05, "loss": 0.3817, "step": 26080 }, { "epoch": 72.47222222222223, "grad_norm": 0.8262609839439392, "learning_rate": 6.469148007173434e-05, "loss": 0.3887, "step": 26090 }, { "epoch": 72.5, "grad_norm": 0.7987886667251587, "learning_rate": 6.466513647413381e-05, "loss": 0.4023, "step": 26100 }, { "epoch": 72.52777777777777, "grad_norm": 0.9903879761695862, "learning_rate": 6.463878842165203e-05, "loss": 0.3888, "step": 26110 }, { "epoch": 72.55555555555556, "grad_norm": 0.8004825711250305, "learning_rate": 6.461243592229286e-05, "loss": 0.3979, "step": 26120 }, { "epoch": 72.58333333333333, "grad_norm": 0.7533220052719116, "learning_rate": 6.458607898406146e-05, "loss": 0.3807, "step": 26130 }, { "epoch": 72.61111111111111, "grad_norm": 0.7514380216598511, "learning_rate": 6.455971761496439e-05, "loss": 0.3824, "step": 26140 }, { "epoch": 72.63888888888889, "grad_norm": 0.8644043803215027, "learning_rate": 6.453335182300953e-05, "loss": 0.3999, "step": 26150 }, { "epoch": 72.66666666666667, "grad_norm": 0.8152493834495544, "learning_rate": 6.450698161620612e-05, "loss": 0.3903, "step": 26160 }, { "epoch": 72.69444444444444, "grad_norm": 0.828143835067749, "learning_rate": 6.448060700256473e-05, "loss": 0.3874, "step": 26170 }, { "epoch": 72.72222222222223, "grad_norm": 0.887808084487915, "learning_rate": 6.445422799009726e-05, "loss": 0.3801, "step": 26180 }, { "epoch": 72.75, "grad_norm": 0.8504675626754761, "learning_rate": 6.442784458681699e-05, "loss": 0.3724, "step": 26190 }, { "epoch": 72.77777777777777, "grad_norm": 0.9074897766113281, "learning_rate": 6.440145680073847e-05, "loss": 0.3901, "step": 26200 }, { "epoch": 72.80555555555556, "grad_norm": 0.8414016366004944, "learning_rate": 6.437506463987762e-05, "loss": 0.3944, "step": 26210 }, { "epoch": 72.83333333333333, "grad_norm": 0.7688679695129395, "learning_rate": 6.434866811225168e-05, "loss": 0.3906, "step": 26220 }, { "epoch": 72.86111111111111, "grad_norm": 0.8921936750411987, "learning_rate": 6.432226722587923e-05, "loss": 0.3918, "step": 26230 }, { "epoch": 72.88888888888889, "grad_norm": 0.7585747241973877, "learning_rate": 6.429586198878015e-05, "loss": 0.3829, "step": 26240 }, { "epoch": 72.91666666666667, "grad_norm": 0.7906227707862854, "learning_rate": 6.426945240897566e-05, "loss": 0.3902, "step": 26250 }, { "epoch": 72.94444444444444, "grad_norm": 0.7352809309959412, "learning_rate": 6.424303849448829e-05, "loss": 0.3786, "step": 26260 }, { "epoch": 72.97222222222223, "grad_norm": 0.833233118057251, "learning_rate": 6.42166202533419e-05, "loss": 0.3821, "step": 26270 }, { "epoch": 73.0, "grad_norm": 0.8266717791557312, "learning_rate": 6.419019769356164e-05, "loss": 0.3751, "step": 26280 }, { "epoch": 73.02777777777777, "grad_norm": 0.7733241319656372, "learning_rate": 6.416377082317398e-05, "loss": 0.3986, "step": 26290 }, { "epoch": 73.05555555555556, "grad_norm": 0.8874008059501648, "learning_rate": 6.413733965020674e-05, "loss": 0.3804, "step": 26300 }, { "epoch": 73.08333333333333, "grad_norm": 0.806169331073761, "learning_rate": 6.411090418268896e-05, "loss": 0.381, "step": 26310 }, { "epoch": 73.11111111111111, "grad_norm": 0.8593589067459106, "learning_rate": 6.408446442865109e-05, "loss": 0.3787, "step": 26320 }, { "epoch": 73.13888888888889, "grad_norm": 0.8037199974060059, "learning_rate": 6.405802039612479e-05, "loss": 0.3926, "step": 26330 }, { "epoch": 73.16666666666667, "grad_norm": 0.7991811037063599, "learning_rate": 6.403157209314308e-05, "loss": 0.3834, "step": 26340 }, { "epoch": 73.19444444444444, "grad_norm": 0.8217834234237671, "learning_rate": 6.400511952774024e-05, "loss": 0.3943, "step": 26350 }, { "epoch": 73.22222222222223, "grad_norm": 0.8507989048957825, "learning_rate": 6.397866270795187e-05, "loss": 0.3762, "step": 26360 }, { "epoch": 73.25, "grad_norm": 0.8679382801055908, "learning_rate": 6.395220164181489e-05, "loss": 0.3976, "step": 26370 }, { "epoch": 73.27777777777777, "grad_norm": 0.8661856651306152, "learning_rate": 6.39257363373674e-05, "loss": 0.389, "step": 26380 }, { "epoch": 73.30555555555556, "grad_norm": 0.7282438278198242, "learning_rate": 6.389926680264892e-05, "loss": 0.3714, "step": 26390 }, { "epoch": 73.33333333333333, "grad_norm": 0.7951355576515198, "learning_rate": 6.387279304570017e-05, "loss": 0.3837, "step": 26400 }, { "epoch": 73.36111111111111, "grad_norm": 1.0179892778396606, "learning_rate": 6.384631507456319e-05, "loss": 0.3916, "step": 26410 }, { "epoch": 73.38888888888889, "grad_norm": 0.9463052749633789, "learning_rate": 6.381983289728126e-05, "loss": 0.3885, "step": 26420 }, { "epoch": 73.41666666666667, "grad_norm": 0.737682044506073, "learning_rate": 6.3793346521899e-05, "loss": 0.3852, "step": 26430 }, { "epoch": 73.44444444444444, "grad_norm": 0.7276875972747803, "learning_rate": 6.376685595646226e-05, "loss": 0.3843, "step": 26440 }, { "epoch": 73.47222222222223, "grad_norm": 0.7627324461936951, "learning_rate": 6.374036120901816e-05, "loss": 0.3887, "step": 26450 }, { "epoch": 73.5, "grad_norm": 0.8244392275810242, "learning_rate": 6.371386228761514e-05, "loss": 0.3885, "step": 26460 }, { "epoch": 73.52777777777777, "grad_norm": 0.7737241387367249, "learning_rate": 6.368735920030283e-05, "loss": 0.374, "step": 26470 }, { "epoch": 73.55555555555556, "grad_norm": 0.8368585109710693, "learning_rate": 6.366085195513218e-05, "loss": 0.3803, "step": 26480 }, { "epoch": 73.58333333333333, "grad_norm": 0.7231385707855225, "learning_rate": 6.363434056015543e-05, "loss": 0.375, "step": 26490 }, { "epoch": 73.61111111111111, "grad_norm": 0.8207660913467407, "learning_rate": 6.360782502342599e-05, "loss": 0.3913, "step": 26500 }, { "epoch": 73.63888888888889, "grad_norm": 0.7237842082977295, "learning_rate": 6.358130535299862e-05, "loss": 0.3804, "step": 26510 }, { "epoch": 73.66666666666667, "grad_norm": 0.7587536573410034, "learning_rate": 6.355478155692926e-05, "loss": 0.3963, "step": 26520 }, { "epoch": 73.69444444444444, "grad_norm": 0.7757230401039124, "learning_rate": 6.352825364327517e-05, "loss": 0.3762, "step": 26530 }, { "epoch": 73.72222222222223, "grad_norm": 0.7538174390792847, "learning_rate": 6.350172162009482e-05, "loss": 0.3997, "step": 26540 }, { "epoch": 73.75, "grad_norm": 0.7553260326385498, "learning_rate": 6.347518549544793e-05, "loss": 0.3927, "step": 26550 }, { "epoch": 73.77777777777777, "grad_norm": 0.7489867806434631, "learning_rate": 6.344864527739547e-05, "loss": 0.3736, "step": 26560 }, { "epoch": 73.80555555555556, "grad_norm": 0.8466343283653259, "learning_rate": 6.342210097399966e-05, "loss": 0.3826, "step": 26570 }, { "epoch": 73.83333333333333, "grad_norm": 0.8372225165367126, "learning_rate": 6.339555259332398e-05, "loss": 0.3818, "step": 26580 }, { "epoch": 73.86111111111111, "grad_norm": 0.7545796632766724, "learning_rate": 6.33690001434331e-05, "loss": 0.3748, "step": 26590 }, { "epoch": 73.88888888888889, "grad_norm": 0.72734135389328, "learning_rate": 6.334244363239296e-05, "loss": 0.3831, "step": 26600 }, { "epoch": 73.91666666666667, "grad_norm": 0.8355863094329834, "learning_rate": 6.331588306827073e-05, "loss": 0.3861, "step": 26610 }, { "epoch": 73.94444444444444, "grad_norm": 0.764445424079895, "learning_rate": 6.328931845913483e-05, "loss": 0.3691, "step": 26620 }, { "epoch": 73.97222222222223, "grad_norm": 0.8746671676635742, "learning_rate": 6.326274981305484e-05, "loss": 0.3939, "step": 26630 }, { "epoch": 74.0, "grad_norm": 0.7607324123382568, "learning_rate": 6.323617713810166e-05, "loss": 0.3951, "step": 26640 }, { "epoch": 74.02777777777777, "grad_norm": 0.8954172134399414, "learning_rate": 6.320960044234734e-05, "loss": 0.3967, "step": 26650 }, { "epoch": 74.05555555555556, "grad_norm": 0.7919141054153442, "learning_rate": 6.318301973386518e-05, "loss": 0.3707, "step": 26660 }, { "epoch": 74.08333333333333, "grad_norm": 0.8043704032897949, "learning_rate": 6.315643502072971e-05, "loss": 0.3746, "step": 26670 }, { "epoch": 74.11111111111111, "grad_norm": 0.8118901252746582, "learning_rate": 6.312984631101667e-05, "loss": 0.3783, "step": 26680 }, { "epoch": 74.13888888888889, "grad_norm": 0.7834828495979309, "learning_rate": 6.310325361280297e-05, "loss": 0.3796, "step": 26690 }, { "epoch": 74.16666666666667, "grad_norm": 0.9602314233779907, "learning_rate": 6.30766569341668e-05, "loss": 0.3805, "step": 26700 }, { "epoch": 74.19444444444444, "grad_norm": 0.8300098776817322, "learning_rate": 6.305005628318753e-05, "loss": 0.395, "step": 26710 }, { "epoch": 74.22222222222223, "grad_norm": 0.7693149447441101, "learning_rate": 6.302345166794572e-05, "loss": 0.3816, "step": 26720 }, { "epoch": 74.25, "grad_norm": 0.8482222557067871, "learning_rate": 6.299684309652316e-05, "loss": 0.3726, "step": 26730 }, { "epoch": 74.27777777777777, "grad_norm": 0.7561185359954834, "learning_rate": 6.297023057700283e-05, "loss": 0.3812, "step": 26740 }, { "epoch": 74.30555555555556, "grad_norm": 0.779642641544342, "learning_rate": 6.294361411746891e-05, "loss": 0.3826, "step": 26750 }, { "epoch": 74.33333333333333, "grad_norm": 0.7654866576194763, "learning_rate": 6.291699372600677e-05, "loss": 0.3812, "step": 26760 }, { "epoch": 74.36111111111111, "grad_norm": 0.7494361400604248, "learning_rate": 6.2890369410703e-05, "loss": 0.3897, "step": 26770 }, { "epoch": 74.38888888888889, "grad_norm": 0.8179722428321838, "learning_rate": 6.286374117964534e-05, "loss": 0.3815, "step": 26780 }, { "epoch": 74.41666666666667, "grad_norm": 0.8496167659759521, "learning_rate": 6.283710904092277e-05, "loss": 0.3835, "step": 26790 }, { "epoch": 74.44444444444444, "grad_norm": 0.757094144821167, "learning_rate": 6.281047300262542e-05, "loss": 0.3893, "step": 26800 }, { "epoch": 74.47222222222223, "grad_norm": 0.865192174911499, "learning_rate": 6.278383307284461e-05, "loss": 0.3954, "step": 26810 }, { "epoch": 74.5, "grad_norm": 1.052403211593628, "learning_rate": 6.275718925967284e-05, "loss": 0.3824, "step": 26820 }, { "epoch": 74.52777777777777, "grad_norm": 0.7946756482124329, "learning_rate": 6.273054157120382e-05, "loss": 0.3817, "step": 26830 }, { "epoch": 74.55555555555556, "grad_norm": 0.8832020163536072, "learning_rate": 6.270389001553238e-05, "loss": 0.3969, "step": 26840 }, { "epoch": 74.58333333333333, "grad_norm": 0.8445005416870117, "learning_rate": 6.26772346007546e-05, "loss": 0.398, "step": 26850 }, { "epoch": 74.61111111111111, "grad_norm": 0.7530742287635803, "learning_rate": 6.265057533496767e-05, "loss": 0.3834, "step": 26860 }, { "epoch": 74.63888888888889, "grad_norm": 1.0111994743347168, "learning_rate": 6.262391222626997e-05, "loss": 0.377, "step": 26870 }, { "epoch": 74.66666666666667, "grad_norm": 0.7876585721969604, "learning_rate": 6.259724528276106e-05, "loss": 0.3931, "step": 26880 }, { "epoch": 74.69444444444444, "grad_norm": 0.8253551721572876, "learning_rate": 6.257057451254162e-05, "loss": 0.3726, "step": 26890 }, { "epoch": 74.72222222222223, "grad_norm": 0.7565832138061523, "learning_rate": 6.254389992371357e-05, "loss": 0.3834, "step": 26900 }, { "epoch": 74.75, "grad_norm": 0.8471810817718506, "learning_rate": 6.25172215243799e-05, "loss": 0.3881, "step": 26910 }, { "epoch": 74.77777777777777, "grad_norm": 0.8200857639312744, "learning_rate": 6.249053932264486e-05, "loss": 0.3662, "step": 26920 }, { "epoch": 74.80555555555556, "grad_norm": 0.7902987003326416, "learning_rate": 6.246385332661376e-05, "loss": 0.3608, "step": 26930 }, { "epoch": 74.83333333333333, "grad_norm": 0.8827223777770996, "learning_rate": 6.24371635443931e-05, "loss": 0.3928, "step": 26940 }, { "epoch": 74.86111111111111, "grad_norm": 0.7757554650306702, "learning_rate": 6.241046998409054e-05, "loss": 0.3867, "step": 26950 }, { "epoch": 74.88888888888889, "grad_norm": 0.7656170725822449, "learning_rate": 6.238377265381489e-05, "loss": 0.3882, "step": 26960 }, { "epoch": 74.91666666666667, "grad_norm": 0.8187659978866577, "learning_rate": 6.235707156167607e-05, "loss": 0.3886, "step": 26970 }, { "epoch": 74.94444444444444, "grad_norm": 0.8508791923522949, "learning_rate": 6.233036671578519e-05, "loss": 0.3776, "step": 26980 }, { "epoch": 74.97222222222223, "grad_norm": 0.8609371781349182, "learning_rate": 6.230365812425445e-05, "loss": 0.3861, "step": 26990 }, { "epoch": 75.0, "grad_norm": 0.7600507140159607, "learning_rate": 6.227694579519724e-05, "loss": 0.3879, "step": 27000 }, { "epoch": 75.02777777777777, "grad_norm": 0.8073087334632874, "learning_rate": 6.225022973672805e-05, "loss": 0.4047, "step": 27010 }, { "epoch": 75.05555555555556, "grad_norm": 0.8766651749610901, "learning_rate": 6.222350995696253e-05, "loss": 0.3785, "step": 27020 }, { "epoch": 75.08333333333333, "grad_norm": 0.8681967854499817, "learning_rate": 6.21967864640174e-05, "loss": 0.3758, "step": 27030 }, { "epoch": 75.11111111111111, "grad_norm": 0.7465468645095825, "learning_rate": 6.217005926601059e-05, "loss": 0.367, "step": 27040 }, { "epoch": 75.13888888888889, "grad_norm": 0.7531543970108032, "learning_rate": 6.214332837106111e-05, "loss": 0.384, "step": 27050 }, { "epoch": 75.16666666666667, "grad_norm": 0.733474612236023, "learning_rate": 6.21165937872891e-05, "loss": 0.3787, "step": 27060 }, { "epoch": 75.19444444444444, "grad_norm": 0.7874627113342285, "learning_rate": 6.208985552281582e-05, "loss": 0.3771, "step": 27070 }, { "epoch": 75.22222222222223, "grad_norm": 0.740445077419281, "learning_rate": 6.206311358576364e-05, "loss": 0.3765, "step": 27080 }, { "epoch": 75.25, "grad_norm": 0.7908750772476196, "learning_rate": 6.203636798425608e-05, "loss": 0.3915, "step": 27090 }, { "epoch": 75.27777777777777, "grad_norm": 0.7521921396255493, "learning_rate": 6.20096187264177e-05, "loss": 0.3893, "step": 27100 }, { "epoch": 75.30555555555556, "grad_norm": 0.9135710000991821, "learning_rate": 6.198286582037425e-05, "loss": 0.3835, "step": 27110 }, { "epoch": 75.33333333333333, "grad_norm": 0.8750967979431152, "learning_rate": 6.195610927425256e-05, "loss": 0.3826, "step": 27120 }, { "epoch": 75.36111111111111, "grad_norm": 0.8093649744987488, "learning_rate": 6.192934909618056e-05, "loss": 0.3902, "step": 27130 }, { "epoch": 75.38888888888889, "grad_norm": 0.8123215436935425, "learning_rate": 6.190258529428728e-05, "loss": 0.3883, "step": 27140 }, { "epoch": 75.41666666666667, "grad_norm": 0.8112093210220337, "learning_rate": 6.187581787670285e-05, "loss": 0.3859, "step": 27150 }, { "epoch": 75.44444444444444, "grad_norm": 0.815905749797821, "learning_rate": 6.184904685155852e-05, "loss": 0.3802, "step": 27160 }, { "epoch": 75.47222222222223, "grad_norm": 0.8294159770011902, "learning_rate": 6.18222722269866e-05, "loss": 0.3743, "step": 27170 }, { "epoch": 75.5, "grad_norm": 0.8200350999832153, "learning_rate": 6.179549401112053e-05, "loss": 0.3933, "step": 27180 }, { "epoch": 75.52777777777777, "grad_norm": 0.8109229803085327, "learning_rate": 6.176871221209482e-05, "loss": 0.3793, "step": 27190 }, { "epoch": 75.55555555555556, "grad_norm": 0.8697022199630737, "learning_rate": 6.174192683804508e-05, "loss": 0.378, "step": 27200 }, { "epoch": 75.58333333333333, "grad_norm": 0.8542343974113464, "learning_rate": 6.1715137897108e-05, "loss": 0.366, "step": 27210 }, { "epoch": 75.61111111111111, "grad_norm": 0.834006667137146, "learning_rate": 6.168834539742134e-05, "loss": 0.385, "step": 27220 }, { "epoch": 75.63888888888889, "grad_norm": 0.8230557441711426, "learning_rate": 6.166154934712397e-05, "loss": 0.3728, "step": 27230 }, { "epoch": 75.66666666666667, "grad_norm": 0.7962156534194946, "learning_rate": 6.163474975435581e-05, "loss": 0.3681, "step": 27240 }, { "epoch": 75.69444444444444, "grad_norm": 0.7175993323326111, "learning_rate": 6.160794662725787e-05, "loss": 0.3781, "step": 27250 }, { "epoch": 75.72222222222223, "grad_norm": 0.8228724598884583, "learning_rate": 6.158113997397222e-05, "loss": 0.3791, "step": 27260 }, { "epoch": 75.75, "grad_norm": 0.8357778191566467, "learning_rate": 6.155432980264205e-05, "loss": 0.3748, "step": 27270 }, { "epoch": 75.77777777777777, "grad_norm": 0.7700074315071106, "learning_rate": 6.152751612141156e-05, "loss": 0.3815, "step": 27280 }, { "epoch": 75.80555555555556, "grad_norm": 0.8152464628219604, "learning_rate": 6.150069893842602e-05, "loss": 0.3892, "step": 27290 }, { "epoch": 75.83333333333333, "grad_norm": 0.8933842778205872, "learning_rate": 6.147387826183182e-05, "loss": 0.3916, "step": 27300 }, { "epoch": 75.86111111111111, "grad_norm": 0.7309064865112305, "learning_rate": 6.144705409977635e-05, "loss": 0.3714, "step": 27310 }, { "epoch": 75.88888888888889, "grad_norm": 0.7328813076019287, "learning_rate": 6.142022646040808e-05, "loss": 0.3734, "step": 27320 }, { "epoch": 75.91666666666667, "grad_norm": 0.8790497779846191, "learning_rate": 6.139339535187653e-05, "loss": 0.3868, "step": 27330 }, { "epoch": 75.94444444444444, "grad_norm": 0.9082517027854919, "learning_rate": 6.136656078233232e-05, "loss": 0.3788, "step": 27340 }, { "epoch": 75.97222222222223, "grad_norm": 0.71174156665802, "learning_rate": 6.133972275992707e-05, "loss": 0.3955, "step": 27350 }, { "epoch": 76.0, "grad_norm": 0.840091347694397, "learning_rate": 6.131288129281342e-05, "loss": 0.3848, "step": 27360 }, { "epoch": 76.02777777777777, "grad_norm": 0.7913119196891785, "learning_rate": 6.128603638914516e-05, "loss": 0.3854, "step": 27370 }, { "epoch": 76.05555555555556, "grad_norm": 0.8506684303283691, "learning_rate": 6.125918805707704e-05, "loss": 0.3751, "step": 27380 }, { "epoch": 76.08333333333333, "grad_norm": 0.7834353446960449, "learning_rate": 6.123233630476485e-05, "loss": 0.3956, "step": 27390 }, { "epoch": 76.11111111111111, "grad_norm": 0.8143153190612793, "learning_rate": 6.120548114036547e-05, "loss": 0.3977, "step": 27400 }, { "epoch": 76.13888888888889, "grad_norm": 0.8780723810195923, "learning_rate": 6.117862257203679e-05, "loss": 0.3847, "step": 27410 }, { "epoch": 76.16666666666667, "grad_norm": 0.7510525584220886, "learning_rate": 6.115176060793771e-05, "loss": 0.3916, "step": 27420 }, { "epoch": 76.19444444444444, "grad_norm": 0.7442197203636169, "learning_rate": 6.112489525622822e-05, "loss": 0.3774, "step": 27430 }, { "epoch": 76.22222222222223, "grad_norm": 0.7229782342910767, "learning_rate": 6.109802652506928e-05, "loss": 0.3722, "step": 27440 }, { "epoch": 76.25, "grad_norm": 0.7038047909736633, "learning_rate": 6.107115442262291e-05, "loss": 0.3817, "step": 27450 }, { "epoch": 76.27777777777777, "grad_norm": 0.8004559278488159, "learning_rate": 6.104427895705214e-05, "loss": 0.3956, "step": 27460 }, { "epoch": 76.30555555555556, "grad_norm": 0.7582125663757324, "learning_rate": 6.101740013652103e-05, "loss": 0.3785, "step": 27470 }, { "epoch": 76.33333333333333, "grad_norm": 0.8473936319351196, "learning_rate": 6.099051796919465e-05, "loss": 0.379, "step": 27480 }, { "epoch": 76.36111111111111, "grad_norm": 0.7818582653999329, "learning_rate": 6.096363246323911e-05, "loss": 0.3916, "step": 27490 }, { "epoch": 76.38888888888889, "grad_norm": 0.8253767490386963, "learning_rate": 6.0936743626821504e-05, "loss": 0.3823, "step": 27500 }, { "epoch": 76.41666666666667, "grad_norm": 0.7983365654945374, "learning_rate": 6.090985146810996e-05, "loss": 0.3753, "step": 27510 }, { "epoch": 76.44444444444444, "grad_norm": 0.7979713678359985, "learning_rate": 6.088295599527357e-05, "loss": 0.3927, "step": 27520 }, { "epoch": 76.47222222222223, "grad_norm": 0.8414709568023682, "learning_rate": 6.085605721648252e-05, "loss": 0.3693, "step": 27530 }, { "epoch": 76.5, "grad_norm": 0.752540647983551, "learning_rate": 6.082915513990792e-05, "loss": 0.3856, "step": 27540 }, { "epoch": 76.52777777777777, "grad_norm": 0.7658264636993408, "learning_rate": 6.080224977372192e-05, "loss": 0.3702, "step": 27550 }, { "epoch": 76.55555555555556, "grad_norm": 0.7677528858184814, "learning_rate": 6.0775341126097666e-05, "loss": 0.3711, "step": 27560 }, { "epoch": 76.58333333333333, "grad_norm": 0.9730269312858582, "learning_rate": 6.074842920520926e-05, "loss": 0.3773, "step": 27570 }, { "epoch": 76.61111111111111, "grad_norm": 0.8675915598869324, "learning_rate": 6.072151401923186e-05, "loss": 0.3772, "step": 27580 }, { "epoch": 76.63888888888889, "grad_norm": 0.8337302803993225, "learning_rate": 6.069459557634159e-05, "loss": 0.3697, "step": 27590 }, { "epoch": 76.66666666666667, "grad_norm": 0.7338860034942627, "learning_rate": 6.066767388471557e-05, "loss": 0.3815, "step": 27600 }, { "epoch": 76.69444444444444, "grad_norm": 0.784747302532196, "learning_rate": 6.064074895253188e-05, "loss": 0.3786, "step": 27610 }, { "epoch": 76.72222222222223, "grad_norm": 0.8168798685073853, "learning_rate": 6.061382078796961e-05, "loss": 0.3738, "step": 27620 }, { "epoch": 76.75, "grad_norm": 0.7906957864761353, "learning_rate": 6.0586889399208814e-05, "loss": 0.3813, "step": 27630 }, { "epoch": 76.77777777777777, "grad_norm": 0.8207159042358398, "learning_rate": 6.0559954794430565e-05, "loss": 0.373, "step": 27640 }, { "epoch": 76.80555555555556, "grad_norm": 0.7771559357643127, "learning_rate": 6.053301698181687e-05, "loss": 0.3842, "step": 27650 }, { "epoch": 76.83333333333333, "grad_norm": 0.776150107383728, "learning_rate": 6.0506075969550725e-05, "loss": 0.3781, "step": 27660 }, { "epoch": 76.86111111111111, "grad_norm": 0.8406950831413269, "learning_rate": 6.047913176581609e-05, "loss": 0.3909, "step": 27670 }, { "epoch": 76.88888888888889, "grad_norm": 0.7683680057525635, "learning_rate": 6.0452184378797904e-05, "loss": 0.3725, "step": 27680 }, { "epoch": 76.91666666666667, "grad_norm": 0.8904539346694946, "learning_rate": 6.042523381668209e-05, "loss": 0.3797, "step": 27690 }, { "epoch": 76.94444444444444, "grad_norm": 0.8450145721435547, "learning_rate": 6.03982800876555e-05, "loss": 0.3763, "step": 27700 }, { "epoch": 76.97222222222223, "grad_norm": 0.7450411915779114, "learning_rate": 6.0371323199905975e-05, "loss": 0.3675, "step": 27710 }, { "epoch": 77.0, "grad_norm": 0.7221920490264893, "learning_rate": 6.03443631616223e-05, "loss": 0.3753, "step": 27720 }, { "epoch": 77.02777777777777, "grad_norm": 0.7869806885719299, "learning_rate": 6.031739998099421e-05, "loss": 0.3873, "step": 27730 }, { "epoch": 77.05555555555556, "grad_norm": 0.7590476870536804, "learning_rate": 6.029043366621243e-05, "loss": 0.3942, "step": 27740 }, { "epoch": 77.08333333333333, "grad_norm": 0.8513635396957397, "learning_rate": 6.0263464225468615e-05, "loss": 0.3783, "step": 27750 }, { "epoch": 77.11111111111111, "grad_norm": 0.923306405544281, "learning_rate": 6.023649166695534e-05, "loss": 0.381, "step": 27760 }, { "epoch": 77.13888888888889, "grad_norm": 0.9327143430709839, "learning_rate": 6.0209515998866186e-05, "loss": 0.3815, "step": 27770 }, { "epoch": 77.16666666666667, "grad_norm": 0.8044421672821045, "learning_rate": 6.018253722939563e-05, "loss": 0.3857, "step": 27780 }, { "epoch": 77.19444444444444, "grad_norm": 0.9315139651298523, "learning_rate": 6.015555536673914e-05, "loss": 0.3728, "step": 27790 }, { "epoch": 77.22222222222223, "grad_norm": 0.7689402103424072, "learning_rate": 6.0128570419093054e-05, "loss": 0.3674, "step": 27800 }, { "epoch": 77.25, "grad_norm": 0.7755511999130249, "learning_rate": 6.010158239465471e-05, "loss": 0.3855, "step": 27810 }, { "epoch": 77.27777777777777, "grad_norm": 0.927844762802124, "learning_rate": 6.007459130162235e-05, "loss": 0.3861, "step": 27820 }, { "epoch": 77.30555555555556, "grad_norm": 0.7647553086280823, "learning_rate": 6.004759714819516e-05, "loss": 0.3687, "step": 27830 }, { "epoch": 77.33333333333333, "grad_norm": 0.8372790813446045, "learning_rate": 6.002059994257323e-05, "loss": 0.3747, "step": 27840 }, { "epoch": 77.36111111111111, "grad_norm": 0.7917150855064392, "learning_rate": 5.999359969295764e-05, "loss": 0.3819, "step": 27850 }, { "epoch": 77.38888888888889, "grad_norm": 0.7130768895149231, "learning_rate": 5.9966596407550314e-05, "loss": 0.3642, "step": 27860 }, { "epoch": 77.41666666666667, "grad_norm": 0.7191554307937622, "learning_rate": 5.993959009455416e-05, "loss": 0.3712, "step": 27870 }, { "epoch": 77.44444444444444, "grad_norm": 0.7615761160850525, "learning_rate": 5.991258076217298e-05, "loss": 0.3677, "step": 27880 }, { "epoch": 77.47222222222223, "grad_norm": 0.939773678779602, "learning_rate": 5.988556841861147e-05, "loss": 0.3887, "step": 27890 }, { "epoch": 77.5, "grad_norm": 0.8476228713989258, "learning_rate": 5.985855307207531e-05, "loss": 0.377, "step": 27900 }, { "epoch": 77.52777777777777, "grad_norm": 0.8057718276977539, "learning_rate": 5.9831534730771e-05, "loss": 0.3881, "step": 27910 }, { "epoch": 77.55555555555556, "grad_norm": 0.7527903318405151, "learning_rate": 5.980451340290605e-05, "loss": 0.3808, "step": 27920 }, { "epoch": 77.58333333333333, "grad_norm": 0.9452918767929077, "learning_rate": 5.97774890966888e-05, "loss": 0.3854, "step": 27930 }, { "epoch": 77.61111111111111, "grad_norm": 0.6848639249801636, "learning_rate": 5.975046182032851e-05, "loss": 0.3855, "step": 27940 }, { "epoch": 77.63888888888889, "grad_norm": 0.7587316632270813, "learning_rate": 5.972343158203537e-05, "loss": 0.3781, "step": 27950 }, { "epoch": 77.66666666666667, "grad_norm": 0.7502270936965942, "learning_rate": 5.969639839002045e-05, "loss": 0.3684, "step": 27960 }, { "epoch": 77.69444444444444, "grad_norm": 0.7659494876861572, "learning_rate": 5.966936225249572e-05, "loss": 0.388, "step": 27970 }, { "epoch": 77.72222222222223, "grad_norm": 0.9282560348510742, "learning_rate": 5.9642323177674044e-05, "loss": 0.3759, "step": 27980 }, { "epoch": 77.75, "grad_norm": 0.7325099110603333, "learning_rate": 5.9615281173769154e-05, "loss": 0.3707, "step": 27990 }, { "epoch": 77.77777777777777, "grad_norm": 0.7780429720878601, "learning_rate": 5.958823624899574e-05, "loss": 0.3748, "step": 28000 }, { "epoch": 77.80555555555556, "grad_norm": 0.8254136443138123, "learning_rate": 5.956118841156933e-05, "loss": 0.3753, "step": 28010 }, { "epoch": 77.83333333333333, "grad_norm": 0.8624144196510315, "learning_rate": 5.953413766970631e-05, "loss": 0.3923, "step": 28020 }, { "epoch": 77.86111111111111, "grad_norm": 0.7758822441101074, "learning_rate": 5.9507084031624e-05, "loss": 0.3852, "step": 28030 }, { "epoch": 77.88888888888889, "grad_norm": 0.791984498500824, "learning_rate": 5.948002750554058e-05, "loss": 0.3868, "step": 28040 }, { "epoch": 77.91666666666667, "grad_norm": 0.8090484738349915, "learning_rate": 5.9452968099675124e-05, "loss": 0.3757, "step": 28050 }, { "epoch": 77.94444444444444, "grad_norm": 0.797244668006897, "learning_rate": 5.9425905822247527e-05, "loss": 0.377, "step": 28060 }, { "epoch": 77.97222222222223, "grad_norm": 0.7641376852989197, "learning_rate": 5.939884068147864e-05, "loss": 0.3588, "step": 28070 }, { "epoch": 78.0, "grad_norm": 0.7753786444664001, "learning_rate": 5.937177268559011e-05, "loss": 0.3883, "step": 28080 }, { "epoch": 78.02777777777777, "grad_norm": 0.7857624292373657, "learning_rate": 5.934470184280448e-05, "loss": 0.3833, "step": 28090 }, { "epoch": 78.05555555555556, "grad_norm": 0.8640322089195251, "learning_rate": 5.931762816134516e-05, "loss": 0.3919, "step": 28100 }, { "epoch": 78.08333333333333, "grad_norm": 0.7990588545799255, "learning_rate": 5.9290551649436434e-05, "loss": 0.3723, "step": 28110 }, { "epoch": 78.11111111111111, "grad_norm": 0.8830792903900146, "learning_rate": 5.9263472315303416e-05, "loss": 0.3892, "step": 28120 }, { "epoch": 78.13888888888889, "grad_norm": 0.7244244813919067, "learning_rate": 5.9236390167172096e-05, "loss": 0.3708, "step": 28130 }, { "epoch": 78.16666666666667, "grad_norm": 0.7572644948959351, "learning_rate": 5.920930521326932e-05, "loss": 0.361, "step": 28140 }, { "epoch": 78.19444444444444, "grad_norm": 0.6934698224067688, "learning_rate": 5.918221746182276e-05, "loss": 0.3797, "step": 28150 }, { "epoch": 78.22222222222223, "grad_norm": 0.7534027099609375, "learning_rate": 5.9155126921061e-05, "loss": 0.3668, "step": 28160 }, { "epoch": 78.25, "grad_norm": 0.7020094394683838, "learning_rate": 5.91280335992134e-05, "loss": 0.3806, "step": 28170 }, { "epoch": 78.27777777777777, "grad_norm": 0.7885091304779053, "learning_rate": 5.91009375045102e-05, "loss": 0.3746, "step": 28180 }, { "epoch": 78.30555555555556, "grad_norm": 0.7214508056640625, "learning_rate": 5.9073838645182476e-05, "loss": 0.3666, "step": 28190 }, { "epoch": 78.33333333333333, "grad_norm": 0.8139709830284119, "learning_rate": 5.904673702946217e-05, "loss": 0.388, "step": 28200 }, { "epoch": 78.36111111111111, "grad_norm": 0.8195705413818359, "learning_rate": 5.9019632665582004e-05, "loss": 0.3748, "step": 28210 }, { "epoch": 78.38888888888889, "grad_norm": 0.843584418296814, "learning_rate": 5.899252556177559e-05, "loss": 0.3771, "step": 28220 }, { "epoch": 78.41666666666667, "grad_norm": 0.728827953338623, "learning_rate": 5.896541572627735e-05, "loss": 0.3859, "step": 28230 }, { "epoch": 78.44444444444444, "grad_norm": 0.7415587306022644, "learning_rate": 5.893830316732253e-05, "loss": 0.3781, "step": 28240 }, { "epoch": 78.47222222222223, "grad_norm": 0.8006575107574463, "learning_rate": 5.8911187893147214e-05, "loss": 0.3864, "step": 28250 }, { "epoch": 78.5, "grad_norm": 1.0344663858413696, "learning_rate": 5.888406991198828e-05, "loss": 0.3748, "step": 28260 }, { "epoch": 78.52777777777777, "grad_norm": 0.7585006952285767, "learning_rate": 5.885694923208349e-05, "loss": 0.3763, "step": 28270 }, { "epoch": 78.55555555555556, "grad_norm": 0.8284279704093933, "learning_rate": 5.882982586167138e-05, "loss": 0.3669, "step": 28280 }, { "epoch": 78.58333333333333, "grad_norm": 0.8242248296737671, "learning_rate": 5.880269980899131e-05, "loss": 0.3624, "step": 28290 }, { "epoch": 78.61111111111111, "grad_norm": 0.7949168682098389, "learning_rate": 5.8775571082283465e-05, "loss": 0.37, "step": 28300 }, { "epoch": 78.63888888888889, "grad_norm": 0.7742564678192139, "learning_rate": 5.8748439689788824e-05, "loss": 0.3762, "step": 28310 }, { "epoch": 78.66666666666667, "grad_norm": 0.7771027088165283, "learning_rate": 5.87213056397492e-05, "loss": 0.376, "step": 28320 }, { "epoch": 78.69444444444444, "grad_norm": 0.7577527761459351, "learning_rate": 5.869416894040719e-05, "loss": 0.3844, "step": 28330 }, { "epoch": 78.72222222222223, "grad_norm": 0.8348276019096375, "learning_rate": 5.866702960000621e-05, "loss": 0.3661, "step": 28340 }, { "epoch": 78.75, "grad_norm": 0.7709356546401978, "learning_rate": 5.863988762679048e-05, "loss": 0.3731, "step": 28350 }, { "epoch": 78.77777777777777, "grad_norm": 0.9534432888031006, "learning_rate": 5.8612743029005e-05, "loss": 0.3794, "step": 28360 }, { "epoch": 78.80555555555556, "grad_norm": 0.7591077089309692, "learning_rate": 5.858559581489561e-05, "loss": 0.3809, "step": 28370 }, { "epoch": 78.83333333333333, "grad_norm": 0.8502004146575928, "learning_rate": 5.85584459927089e-05, "loss": 0.3722, "step": 28380 }, { "epoch": 78.86111111111111, "grad_norm": 0.835249125957489, "learning_rate": 5.853129357069227e-05, "loss": 0.3761, "step": 28390 }, { "epoch": 78.88888888888889, "grad_norm": 0.7739892601966858, "learning_rate": 5.8504138557093913e-05, "loss": 0.3824, "step": 28400 }, { "epoch": 78.91666666666667, "grad_norm": 0.9050560593605042, "learning_rate": 5.8476980960162784e-05, "loss": 0.3859, "step": 28410 }, { "epoch": 78.94444444444444, "grad_norm": 0.7723100185394287, "learning_rate": 5.844982078814868e-05, "loss": 0.375, "step": 28420 }, { "epoch": 78.97222222222223, "grad_norm": 0.8554949760437012, "learning_rate": 5.842265804930211e-05, "loss": 0.3694, "step": 28430 }, { "epoch": 79.0, "grad_norm": 0.7552803158760071, "learning_rate": 5.839549275187444e-05, "loss": 0.3901, "step": 28440 }, { "epoch": 79.02777777777777, "grad_norm": 0.7893080115318298, "learning_rate": 5.836832490411771e-05, "loss": 0.3649, "step": 28450 }, { "epoch": 79.05555555555556, "grad_norm": 0.7892082929611206, "learning_rate": 5.834115451428485e-05, "loss": 0.3869, "step": 28460 }, { "epoch": 79.08333333333333, "grad_norm": 0.8475852012634277, "learning_rate": 5.831398159062946e-05, "loss": 0.3605, "step": 28470 }, { "epoch": 79.11111111111111, "grad_norm": 0.8450709581375122, "learning_rate": 5.828680614140599e-05, "loss": 0.3899, "step": 28480 }, { "epoch": 79.13888888888889, "grad_norm": 0.8435484766960144, "learning_rate": 5.825962817486962e-05, "loss": 0.3868, "step": 28490 }, { "epoch": 79.16666666666667, "grad_norm": 0.9056557416915894, "learning_rate": 5.823244769927629e-05, "loss": 0.3886, "step": 28500 }, { "epoch": 79.19444444444444, "grad_norm": 0.7492390871047974, "learning_rate": 5.8205264722882716e-05, "loss": 0.3751, "step": 28510 }, { "epoch": 79.22222222222223, "grad_norm": 0.7846071124076843, "learning_rate": 5.817807925394636e-05, "loss": 0.3765, "step": 28520 }, { "epoch": 79.25, "grad_norm": 0.7682069540023804, "learning_rate": 5.815089130072546e-05, "loss": 0.3737, "step": 28530 }, { "epoch": 79.27777777777777, "grad_norm": 0.912541389465332, "learning_rate": 5.8123700871479e-05, "loss": 0.3873, "step": 28540 }, { "epoch": 79.30555555555556, "grad_norm": 0.8076422214508057, "learning_rate": 5.809650797446671e-05, "loss": 0.3786, "step": 28550 }, { "epoch": 79.33333333333333, "grad_norm": 0.7112845778465271, "learning_rate": 5.806931261794907e-05, "loss": 0.3643, "step": 28560 }, { "epoch": 79.36111111111111, "grad_norm": 0.6997553706169128, "learning_rate": 5.804211481018731e-05, "loss": 0.3684, "step": 28570 }, { "epoch": 79.38888888888889, "grad_norm": 0.6808909177780151, "learning_rate": 5.801491455944341e-05, "loss": 0.3805, "step": 28580 }, { "epoch": 79.41666666666667, "grad_norm": 0.6994345188140869, "learning_rate": 5.79877118739801e-05, "loss": 0.3631, "step": 28590 }, { "epoch": 79.44444444444444, "grad_norm": 0.8551114797592163, "learning_rate": 5.7960506762060816e-05, "loss": 0.387, "step": 28600 }, { "epoch": 79.47222222222223, "grad_norm": 0.7270412445068359, "learning_rate": 5.793329923194977e-05, "loss": 0.3827, "step": 28610 }, { "epoch": 79.5, "grad_norm": 0.9051022529602051, "learning_rate": 5.790608929191187e-05, "loss": 0.3879, "step": 28620 }, { "epoch": 79.52777777777777, "grad_norm": 0.885503351688385, "learning_rate": 5.78788769502128e-05, "loss": 0.3795, "step": 28630 }, { "epoch": 79.55555555555556, "grad_norm": 0.86806321144104, "learning_rate": 5.785166221511894e-05, "loss": 0.3797, "step": 28640 }, { "epoch": 79.58333333333333, "grad_norm": 1.0083943605422974, "learning_rate": 5.7824445094897415e-05, "loss": 0.3745, "step": 28650 }, { "epoch": 79.61111111111111, "grad_norm": 0.7210556864738464, "learning_rate": 5.7797225597816065e-05, "loss": 0.3636, "step": 28660 }, { "epoch": 79.63888888888889, "grad_norm": 0.8163444995880127, "learning_rate": 5.777000373214345e-05, "loss": 0.3906, "step": 28670 }, { "epoch": 79.66666666666667, "grad_norm": 0.7196439504623413, "learning_rate": 5.774277950614885e-05, "loss": 0.3785, "step": 28680 }, { "epoch": 79.69444444444444, "grad_norm": 0.7906640768051147, "learning_rate": 5.771555292810227e-05, "loss": 0.3784, "step": 28690 }, { "epoch": 79.72222222222223, "grad_norm": 0.8191994428634644, "learning_rate": 5.768832400627444e-05, "loss": 0.3796, "step": 28700 }, { "epoch": 79.75, "grad_norm": 0.721922755241394, "learning_rate": 5.7661092748936775e-05, "loss": 0.3671, "step": 28710 }, { "epoch": 79.77777777777777, "grad_norm": 0.7636218667030334, "learning_rate": 5.76338591643614e-05, "loss": 0.3821, "step": 28720 }, { "epoch": 79.80555555555556, "grad_norm": 0.8475291728973389, "learning_rate": 5.760662326082118e-05, "loss": 0.3802, "step": 28730 }, { "epoch": 79.83333333333333, "grad_norm": 0.7041158080101013, "learning_rate": 5.757938504658965e-05, "loss": 0.3707, "step": 28740 }, { "epoch": 79.86111111111111, "grad_norm": 0.8294655084609985, "learning_rate": 5.755214452994107e-05, "loss": 0.3692, "step": 28750 }, { "epoch": 79.88888888888889, "grad_norm": 0.7507734298706055, "learning_rate": 5.752490171915039e-05, "loss": 0.3902, "step": 28760 }, { "epoch": 79.91666666666667, "grad_norm": 0.8743959665298462, "learning_rate": 5.749765662249324e-05, "loss": 0.3799, "step": 28770 }, { "epoch": 79.94444444444444, "grad_norm": 0.8970789313316345, "learning_rate": 5.747040924824596e-05, "loss": 0.391, "step": 28780 }, { "epoch": 79.97222222222223, "grad_norm": 0.8117119073867798, "learning_rate": 5.7443159604685613e-05, "loss": 0.3825, "step": 28790 }, { "epoch": 80.0, "grad_norm": 0.7737615704536438, "learning_rate": 5.74159077000899e-05, "loss": 0.3685, "step": 28800 }, { "epoch": 80.02777777777777, "grad_norm": 0.8971832394599915, "learning_rate": 5.7388653542737235e-05, "loss": 0.3638, "step": 28810 }, { "epoch": 80.05555555555556, "grad_norm": 0.822492241859436, "learning_rate": 5.736139714090672e-05, "loss": 0.3725, "step": 28820 }, { "epoch": 80.08333333333333, "grad_norm": 0.7452799677848816, "learning_rate": 5.73341385028781e-05, "loss": 0.3609, "step": 28830 }, { "epoch": 80.11111111111111, "grad_norm": 0.6961538791656494, "learning_rate": 5.7306877636931855e-05, "loss": 0.3668, "step": 28840 }, { "epoch": 80.13888888888889, "grad_norm": 0.8937341570854187, "learning_rate": 5.7279614551349125e-05, "loss": 0.3767, "step": 28850 }, { "epoch": 80.16666666666667, "grad_norm": 0.7149876356124878, "learning_rate": 5.725234925441169e-05, "loss": 0.3662, "step": 28860 }, { "epoch": 80.19444444444444, "grad_norm": 0.7619442343711853, "learning_rate": 5.7225081754402044e-05, "loss": 0.3913, "step": 28870 }, { "epoch": 80.22222222222223, "grad_norm": 0.6898200511932373, "learning_rate": 5.7197812059603326e-05, "loss": 0.3668, "step": 28880 }, { "epoch": 80.25, "grad_norm": 0.7862520813941956, "learning_rate": 5.717054017829934e-05, "loss": 0.3688, "step": 28890 }, { "epoch": 80.27777777777777, "grad_norm": 0.7623870372772217, "learning_rate": 5.7143266118774584e-05, "loss": 0.3785, "step": 28900 }, { "epoch": 80.30555555555556, "grad_norm": 0.7023026347160339, "learning_rate": 5.711598988931418e-05, "loss": 0.364, "step": 28910 }, { "epoch": 80.33333333333333, "grad_norm": 0.7773314118385315, "learning_rate": 5.7088711498203954e-05, "loss": 0.3814, "step": 28920 }, { "epoch": 80.36111111111111, "grad_norm": 0.7923189997673035, "learning_rate": 5.706143095373033e-05, "loss": 0.3777, "step": 28930 }, { "epoch": 80.38888888888889, "grad_norm": 0.7071933746337891, "learning_rate": 5.703414826418042e-05, "loss": 0.3636, "step": 28940 }, { "epoch": 80.41666666666667, "grad_norm": 0.9234169125556946, "learning_rate": 5.7006863437842007e-05, "loss": 0.3646, "step": 28950 }, { "epoch": 80.44444444444444, "grad_norm": 0.7499669790267944, "learning_rate": 5.697957648300348e-05, "loss": 0.3693, "step": 28960 }, { "epoch": 80.47222222222223, "grad_norm": 0.7289141416549683, "learning_rate": 5.695228740795391e-05, "loss": 0.3768, "step": 28970 }, { "epoch": 80.5, "grad_norm": 0.8612245917320251, "learning_rate": 5.6924996220982985e-05, "loss": 0.378, "step": 28980 }, { "epoch": 80.52777777777777, "grad_norm": 0.9490052461624146, "learning_rate": 5.6897702930381045e-05, "loss": 0.3936, "step": 28990 }, { "epoch": 80.55555555555556, "grad_norm": 0.7846028804779053, "learning_rate": 5.687040754443908e-05, "loss": 0.3744, "step": 29000 }, { "epoch": 80.58333333333333, "grad_norm": 0.8430658578872681, "learning_rate": 5.6843110071448725e-05, "loss": 0.395, "step": 29010 }, { "epoch": 80.61111111111111, "grad_norm": 0.7320647835731506, "learning_rate": 5.6815810519702194e-05, "loss": 0.3741, "step": 29020 }, { "epoch": 80.63888888888889, "grad_norm": 0.7177789211273193, "learning_rate": 5.6788508897492396e-05, "loss": 0.3588, "step": 29030 }, { "epoch": 80.66666666666667, "grad_norm": 0.7251845598220825, "learning_rate": 5.676120521311282e-05, "loss": 0.3792, "step": 29040 }, { "epoch": 80.69444444444444, "grad_norm": 0.7917733192443848, "learning_rate": 5.6733899474857634e-05, "loss": 0.3694, "step": 29050 }, { "epoch": 80.72222222222223, "grad_norm": 0.7731283903121948, "learning_rate": 5.670659169102157e-05, "loss": 0.3804, "step": 29060 }, { "epoch": 80.75, "grad_norm": 0.7879687547683716, "learning_rate": 5.6679281869900044e-05, "loss": 0.3769, "step": 29070 }, { "epoch": 80.77777777777777, "grad_norm": 0.7872176766395569, "learning_rate": 5.6651970019789045e-05, "loss": 0.3662, "step": 29080 }, { "epoch": 80.80555555555556, "grad_norm": 0.8085471391677856, "learning_rate": 5.662465614898519e-05, "loss": 0.3925, "step": 29090 }, { "epoch": 80.83333333333333, "grad_norm": 0.7633122801780701, "learning_rate": 5.6597340265785695e-05, "loss": 0.3817, "step": 29100 }, { "epoch": 80.86111111111111, "grad_norm": 0.7685845494270325, "learning_rate": 5.657002237848843e-05, "loss": 0.3859, "step": 29110 }, { "epoch": 80.88888888888889, "grad_norm": 0.8270798921585083, "learning_rate": 5.654270249539183e-05, "loss": 0.3656, "step": 29120 }, { "epoch": 80.91666666666667, "grad_norm": 0.7319735884666443, "learning_rate": 5.651538062479498e-05, "loss": 0.3651, "step": 29130 }, { "epoch": 80.94444444444444, "grad_norm": 0.7542715668678284, "learning_rate": 5.648805677499751e-05, "loss": 0.3735, "step": 29140 }, { "epoch": 80.97222222222223, "grad_norm": 0.7226755619049072, "learning_rate": 5.646073095429969e-05, "loss": 0.3709, "step": 29150 }, { "epoch": 81.0, "grad_norm": 0.8117840886116028, "learning_rate": 5.643340317100241e-05, "loss": 0.3749, "step": 29160 }, { "epoch": 81.02777777777777, "grad_norm": 0.8066041469573975, "learning_rate": 5.64060734334071e-05, "loss": 0.3674, "step": 29170 }, { "epoch": 81.05555555555556, "grad_norm": 0.7704938054084778, "learning_rate": 5.637874174981583e-05, "loss": 0.3774, "step": 29180 }, { "epoch": 81.08333333333333, "grad_norm": 0.73977130651474, "learning_rate": 5.635140812853124e-05, "loss": 0.3635, "step": 29190 }, { "epoch": 81.11111111111111, "grad_norm": 0.7154613733291626, "learning_rate": 5.6324072577856544e-05, "loss": 0.3868, "step": 29200 }, { "epoch": 81.13888888888889, "grad_norm": 0.7517378926277161, "learning_rate": 5.629673510609559e-05, "loss": 0.369, "step": 29210 }, { "epoch": 81.16666666666667, "grad_norm": 0.8236134052276611, "learning_rate": 5.626939572155276e-05, "loss": 0.386, "step": 29220 }, { "epoch": 81.19444444444444, "grad_norm": 0.8307527303695679, "learning_rate": 5.6242054432533054e-05, "loss": 0.3698, "step": 29230 }, { "epoch": 81.22222222222223, "grad_norm": 0.7559710741043091, "learning_rate": 5.621471124734201e-05, "loss": 0.3758, "step": 29240 }, { "epoch": 81.25, "grad_norm": 0.7042670249938965, "learning_rate": 5.6187366174285794e-05, "loss": 0.3611, "step": 29250 }, { "epoch": 81.27777777777777, "grad_norm": 0.8153295516967773, "learning_rate": 5.616001922167109e-05, "loss": 0.379, "step": 29260 }, { "epoch": 81.30555555555556, "grad_norm": 0.7448877096176147, "learning_rate": 5.61326703978052e-05, "loss": 0.3864, "step": 29270 }, { "epoch": 81.33333333333333, "grad_norm": 0.8598271012306213, "learning_rate": 5.6105319710995964e-05, "loss": 0.3616, "step": 29280 }, { "epoch": 81.36111111111111, "grad_norm": 0.7834572792053223, "learning_rate": 5.60779671695518e-05, "loss": 0.3738, "step": 29290 }, { "epoch": 81.38888888888889, "grad_norm": 0.7091790437698364, "learning_rate": 5.6050612781781684e-05, "loss": 0.3658, "step": 29300 }, { "epoch": 81.41666666666667, "grad_norm": 0.7287483811378479, "learning_rate": 5.602325655599516e-05, "loss": 0.3629, "step": 29310 }, { "epoch": 81.44444444444444, "grad_norm": 0.7861401438713074, "learning_rate": 5.599589850050234e-05, "loss": 0.3818, "step": 29320 }, { "epoch": 81.47222222222223, "grad_norm": 0.8509084582328796, "learning_rate": 5.5968538623613874e-05, "loss": 0.3936, "step": 29330 }, { "epoch": 81.5, "grad_norm": 0.784953773021698, "learning_rate": 5.594117693364095e-05, "loss": 0.374, "step": 29340 }, { "epoch": 81.52777777777777, "grad_norm": 0.7665091753005981, "learning_rate": 5.591381343889535e-05, "loss": 0.3884, "step": 29350 }, { "epoch": 81.55555555555556, "grad_norm": 0.7292123436927795, "learning_rate": 5.5886448147689355e-05, "loss": 0.3674, "step": 29360 }, { "epoch": 81.58333333333333, "grad_norm": 0.7729705572128296, "learning_rate": 5.585908106833585e-05, "loss": 0.3597, "step": 29370 }, { "epoch": 81.61111111111111, "grad_norm": 0.7347968220710754, "learning_rate": 5.5831712209148226e-05, "loss": 0.3703, "step": 29380 }, { "epoch": 81.63888888888889, "grad_norm": 0.7521613836288452, "learning_rate": 5.58043415784404e-05, "loss": 0.3703, "step": 29390 }, { "epoch": 81.66666666666667, "grad_norm": 0.7806551456451416, "learning_rate": 5.577696918452686e-05, "loss": 0.3822, "step": 29400 }, { "epoch": 81.69444444444444, "grad_norm": 0.7992231249809265, "learning_rate": 5.5749595035722604e-05, "loss": 0.3706, "step": 29410 }, { "epoch": 81.72222222222223, "grad_norm": 0.750907838344574, "learning_rate": 5.5722219140343193e-05, "loss": 0.37, "step": 29420 }, { "epoch": 81.75, "grad_norm": 0.8213971257209778, "learning_rate": 5.56948415067047e-05, "loss": 0.3739, "step": 29430 }, { "epoch": 81.77777777777777, "grad_norm": 0.7579622864723206, "learning_rate": 5.5667462143123704e-05, "loss": 0.3565, "step": 29440 }, { "epoch": 81.80555555555556, "grad_norm": 0.7626963257789612, "learning_rate": 5.564008105791737e-05, "loss": 0.37, "step": 29450 }, { "epoch": 81.83333333333333, "grad_norm": 0.7768185138702393, "learning_rate": 5.5612698259403316e-05, "loss": 0.3801, "step": 29460 }, { "epoch": 81.86111111111111, "grad_norm": 0.7186532020568848, "learning_rate": 5.5585313755899724e-05, "loss": 0.3608, "step": 29470 }, { "epoch": 81.88888888888889, "grad_norm": 0.8420273065567017, "learning_rate": 5.5557927555725285e-05, "loss": 0.3812, "step": 29480 }, { "epoch": 81.91666666666667, "grad_norm": 0.8668802976608276, "learning_rate": 5.55305396671992e-05, "loss": 0.3644, "step": 29490 }, { "epoch": 81.94444444444444, "grad_norm": 0.8863128423690796, "learning_rate": 5.55031500986412e-05, "loss": 0.3667, "step": 29500 }, { "epoch": 81.97222222222223, "grad_norm": 0.8213383555412292, "learning_rate": 5.547575885837149e-05, "loss": 0.3804, "step": 29510 }, { "epoch": 82.0, "grad_norm": 0.7495995759963989, "learning_rate": 5.5448365954710825e-05, "loss": 0.3766, "step": 29520 }, { "epoch": 82.02777777777777, "grad_norm": 0.7557427883148193, "learning_rate": 5.5420971395980446e-05, "loss": 0.3726, "step": 29530 }, { "epoch": 82.05555555555556, "grad_norm": 0.7970104217529297, "learning_rate": 5.539357519050209e-05, "loss": 0.3631, "step": 29540 }, { "epoch": 82.08333333333333, "grad_norm": 0.7307514548301697, "learning_rate": 5.536617734659799e-05, "loss": 0.3729, "step": 29550 }, { "epoch": 82.11111111111111, "grad_norm": 0.7642149925231934, "learning_rate": 5.533877787259091e-05, "loss": 0.3901, "step": 29560 }, { "epoch": 82.13888888888889, "grad_norm": 0.7324491143226624, "learning_rate": 5.5311376776804044e-05, "loss": 0.368, "step": 29570 }, { "epoch": 82.16666666666667, "grad_norm": 0.8428247570991516, "learning_rate": 5.528397406756118e-05, "loss": 0.3706, "step": 29580 }, { "epoch": 82.19444444444444, "grad_norm": 0.7225236892700195, "learning_rate": 5.525656975318652e-05, "loss": 0.3692, "step": 29590 }, { "epoch": 82.22222222222223, "grad_norm": 0.7516540884971619, "learning_rate": 5.522916384200474e-05, "loss": 0.3837, "step": 29600 }, { "epoch": 82.25, "grad_norm": 0.8895572423934937, "learning_rate": 5.520175634234106e-05, "loss": 0.3812, "step": 29610 }, { "epoch": 82.27777777777777, "grad_norm": 0.6935630440711975, "learning_rate": 5.517434726252113e-05, "loss": 0.3635, "step": 29620 }, { "epoch": 82.30555555555556, "grad_norm": 0.7181688547134399, "learning_rate": 5.514693661087113e-05, "loss": 0.3701, "step": 29630 }, { "epoch": 82.33333333333333, "grad_norm": 0.7625977993011475, "learning_rate": 5.511952439571769e-05, "loss": 0.3682, "step": 29640 }, { "epoch": 82.36111111111111, "grad_norm": 0.757738471031189, "learning_rate": 5.509211062538791e-05, "loss": 0.3762, "step": 29650 }, { "epoch": 82.38888888888889, "grad_norm": 0.8410611152648926, "learning_rate": 5.506469530820939e-05, "loss": 0.3773, "step": 29660 }, { "epoch": 82.41666666666667, "grad_norm": 0.7514066100120544, "learning_rate": 5.503727845251014e-05, "loss": 0.366, "step": 29670 }, { "epoch": 82.44444444444444, "grad_norm": 0.8096616268157959, "learning_rate": 5.50098600666187e-05, "loss": 0.3773, "step": 29680 }, { "epoch": 82.47222222222223, "grad_norm": 0.830627977848053, "learning_rate": 5.498244015886406e-05, "loss": 0.3742, "step": 29690 }, { "epoch": 82.5, "grad_norm": 0.7567207217216492, "learning_rate": 5.495501873757565e-05, "loss": 0.3627, "step": 29700 }, { "epoch": 82.52777777777777, "grad_norm": 0.8561055660247803, "learning_rate": 5.492759581108336e-05, "loss": 0.3796, "step": 29710 }, { "epoch": 82.55555555555556, "grad_norm": 0.7670607566833496, "learning_rate": 5.490017138771759e-05, "loss": 0.376, "step": 29720 }, { "epoch": 82.58333333333333, "grad_norm": 0.8258269429206848, "learning_rate": 5.487274547580912e-05, "loss": 0.3738, "step": 29730 }, { "epoch": 82.61111111111111, "grad_norm": 0.8038049340248108, "learning_rate": 5.484531808368923e-05, "loss": 0.3759, "step": 29740 }, { "epoch": 82.63888888888889, "grad_norm": 0.763401448726654, "learning_rate": 5.4817889219689656e-05, "loss": 0.379, "step": 29750 }, { "epoch": 82.66666666666667, "grad_norm": 0.8800534605979919, "learning_rate": 5.4790458892142536e-05, "loss": 0.381, "step": 29760 }, { "epoch": 82.69444444444444, "grad_norm": 0.7392902374267578, "learning_rate": 5.476302710938048e-05, "loss": 0.3698, "step": 29770 }, { "epoch": 82.72222222222223, "grad_norm": 0.8826219439506531, "learning_rate": 5.473559387973657e-05, "loss": 0.3786, "step": 29780 }, { "epoch": 82.75, "grad_norm": 0.869041919708252, "learning_rate": 5.470815921154425e-05, "loss": 0.3794, "step": 29790 }, { "epoch": 82.77777777777777, "grad_norm": 0.8859379887580872, "learning_rate": 5.468072311313749e-05, "loss": 0.3852, "step": 29800 }, { "epoch": 82.80555555555556, "grad_norm": 0.9154105186462402, "learning_rate": 5.465328559285063e-05, "loss": 0.3823, "step": 29810 }, { "epoch": 82.83333333333333, "grad_norm": 0.8065362572669983, "learning_rate": 5.462584665901849e-05, "loss": 0.379, "step": 29820 }, { "epoch": 82.86111111111111, "grad_norm": 0.815925657749176, "learning_rate": 5.4598406319976235e-05, "loss": 0.3631, "step": 29830 }, { "epoch": 82.88888888888889, "grad_norm": 0.7671855688095093, "learning_rate": 5.457096458405958e-05, "loss": 0.3686, "step": 29840 }, { "epoch": 82.91666666666667, "grad_norm": 0.8006246089935303, "learning_rate": 5.454352145960457e-05, "loss": 0.3753, "step": 29850 }, { "epoch": 82.94444444444444, "grad_norm": 0.9156626462936401, "learning_rate": 5.4516076954947715e-05, "loss": 0.378, "step": 29860 }, { "epoch": 82.97222222222223, "grad_norm": 0.8835230469703674, "learning_rate": 5.448863107842591e-05, "loss": 0.3895, "step": 29870 }, { "epoch": 83.0, "grad_norm": 0.8060216307640076, "learning_rate": 5.446118383837651e-05, "loss": 0.367, "step": 29880 }, { "epoch": 83.02777777777777, "grad_norm": 0.7091156840324402, "learning_rate": 5.443373524313722e-05, "loss": 0.3637, "step": 29890 }, { "epoch": 83.05555555555556, "grad_norm": 0.7950685024261475, "learning_rate": 5.440628530104626e-05, "loss": 0.3799, "step": 29900 }, { "epoch": 83.08333333333333, "grad_norm": 0.6838985085487366, "learning_rate": 5.4378834020442146e-05, "loss": 0.3644, "step": 29910 }, { "epoch": 83.11111111111111, "grad_norm": 0.7898828983306885, "learning_rate": 5.4351381409663884e-05, "loss": 0.3626, "step": 29920 }, { "epoch": 83.13888888888889, "grad_norm": 0.8305829167366028, "learning_rate": 5.432392747705084e-05, "loss": 0.3824, "step": 29930 }, { "epoch": 83.16666666666667, "grad_norm": 0.7951960563659668, "learning_rate": 5.429647223094278e-05, "loss": 0.3777, "step": 29940 }, { "epoch": 83.19444444444444, "grad_norm": 0.7662842869758606, "learning_rate": 5.4269015679679924e-05, "loss": 0.3675, "step": 29950 }, { "epoch": 83.22222222222223, "grad_norm": 0.770594596862793, "learning_rate": 5.424155783160281e-05, "loss": 0.384, "step": 29960 }, { "epoch": 83.25, "grad_norm": 0.8420248627662659, "learning_rate": 5.4214098695052415e-05, "loss": 0.373, "step": 29970 }, { "epoch": 83.27777777777777, "grad_norm": 0.8190386891365051, "learning_rate": 5.418663827837012e-05, "loss": 0.3702, "step": 29980 }, { "epoch": 83.30555555555556, "grad_norm": 0.7443423867225647, "learning_rate": 5.415917658989763e-05, "loss": 0.3593, "step": 29990 }, { "epoch": 83.33333333333333, "grad_norm": 0.7934229969978333, "learning_rate": 5.413171363797713e-05, "loss": 0.3705, "step": 30000 } ], "logging_steps": 10, "max_steps": 60000, "num_input_tokens_seen": 0, "num_train_epochs": 167, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }