| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 26.455026455026456, |
| "eval_steps": 500, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05291005291005291, |
| "grad_norm": 3.041571855545044, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.9501, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10582010582010581, |
| "grad_norm": 2.1699185371398926, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.8764, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.15873015873015872, |
| "grad_norm": 1.5091040134429932, |
| "learning_rate": 1.2e-05, |
| "loss": 0.6398, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.21164021164021163, |
| "grad_norm": 0.9642059206962585, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.446, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.26455026455026454, |
| "grad_norm": 0.5779881477355957, |
| "learning_rate": 2e-05, |
| "loss": 0.3417, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.31746031746031744, |
| "grad_norm": 0.5219764709472656, |
| "learning_rate": 2.4e-05, |
| "loss": 0.2568, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.37037037037037035, |
| "grad_norm": 0.3546525239944458, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.1985, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.42328042328042326, |
| "grad_norm": 0.3198654353618622, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.1648, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.3067784011363983, |
| "learning_rate": 3.6e-05, |
| "loss": 0.1332, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5291005291005291, |
| "grad_norm": 0.2967132031917572, |
| "learning_rate": 4e-05, |
| "loss": 0.1113, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.582010582010582, |
| "grad_norm": 0.233621284365654, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.0984, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6349206349206349, |
| "grad_norm": 0.28300589323043823, |
| "learning_rate": 4.8e-05, |
| "loss": 0.0869, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6878306878306878, |
| "grad_norm": 0.23296041786670685, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 0.0757, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7407407407407407, |
| "grad_norm": 0.34818753600120544, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 0.0695, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7936507936507936, |
| "grad_norm": 0.2807776629924774, |
| "learning_rate": 6e-05, |
| "loss": 0.0592, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8465608465608465, |
| "grad_norm": 0.2950199544429779, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 0.0557, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8994708994708994, |
| "grad_norm": 0.22396177053451538, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 0.0474, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 0.20290444791316986, |
| "learning_rate": 7.2e-05, |
| "loss": 0.0449, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.0052910052910053, |
| "grad_norm": 0.2324942648410797, |
| "learning_rate": 7.6e-05, |
| "loss": 0.0415, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0582010582010581, |
| "grad_norm": 0.2845875024795532, |
| "learning_rate": 8e-05, |
| "loss": 0.0387, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 0.2593020498752594, |
| "learning_rate": 8.4e-05, |
| "loss": 0.0353, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.164021164021164, |
| "grad_norm": 0.2940255105495453, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 0.038, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.216931216931217, |
| "grad_norm": 0.25001412630081177, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 0.0364, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.2698412698412698, |
| "grad_norm": 0.25423663854599, |
| "learning_rate": 9.6e-05, |
| "loss": 0.0323, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.3227513227513228, |
| "grad_norm": 0.20486362278461456, |
| "learning_rate": 0.0001, |
| "loss": 0.0301, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3756613756613756, |
| "grad_norm": 0.21555016934871674, |
| "learning_rate": 0.00010400000000000001, |
| "loss": 0.0279, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.2603663504123688, |
| "learning_rate": 0.00010800000000000001, |
| "loss": 0.0278, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.4814814814814814, |
| "grad_norm": 0.2120589166879654, |
| "learning_rate": 0.00011200000000000001, |
| "loss": 0.0262, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.5343915343915344, |
| "grad_norm": 0.19613762199878693, |
| "learning_rate": 0.000116, |
| "loss": 0.0246, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.5873015873015874, |
| "grad_norm": 0.2326143980026245, |
| "learning_rate": 0.00012, |
| "loss": 0.0238, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.6402116402116402, |
| "grad_norm": 0.22464260458946228, |
| "learning_rate": 0.000124, |
| "loss": 0.0233, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.693121693121693, |
| "grad_norm": 0.21060794591903687, |
| "learning_rate": 0.00012800000000000002, |
| "loss": 0.0216, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.746031746031746, |
| "grad_norm": 0.16642867028713226, |
| "learning_rate": 0.000132, |
| "loss": 0.02, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.798941798941799, |
| "grad_norm": 0.2334732860326767, |
| "learning_rate": 0.00013600000000000003, |
| "loss": 0.0199, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.8518518518518519, |
| "grad_norm": 0.2562713623046875, |
| "learning_rate": 0.00014, |
| "loss": 0.0198, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.9047619047619047, |
| "grad_norm": 0.21125763654708862, |
| "learning_rate": 0.000144, |
| "loss": 0.0229, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.9576719576719577, |
| "grad_norm": 0.1970241814851761, |
| "learning_rate": 0.000148, |
| "loss": 0.0218, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.0105820105820107, |
| "grad_norm": 0.20859766006469727, |
| "learning_rate": 0.000152, |
| "loss": 0.0209, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.0634920634920633, |
| "grad_norm": 0.24649283289909363, |
| "learning_rate": 0.00015600000000000002, |
| "loss": 0.02, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.1164021164021163, |
| "grad_norm": 0.2227829247713089, |
| "learning_rate": 0.00016, |
| "loss": 0.0193, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.1693121693121693, |
| "grad_norm": 0.18638582527637482, |
| "learning_rate": 0.000164, |
| "loss": 0.019, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 0.17806550860404968, |
| "learning_rate": 0.000168, |
| "loss": 0.0172, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.2751322751322753, |
| "grad_norm": 0.18050020933151245, |
| "learning_rate": 0.000172, |
| "loss": 0.0174, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.328042328042328, |
| "grad_norm": 0.19839580357074738, |
| "learning_rate": 0.00017600000000000002, |
| "loss": 0.017, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.380952380952381, |
| "grad_norm": 0.21962286531925201, |
| "learning_rate": 0.00018, |
| "loss": 0.0156, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.433862433862434, |
| "grad_norm": 0.1837696135044098, |
| "learning_rate": 0.00018400000000000003, |
| "loss": 0.0156, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.4867724867724865, |
| "grad_norm": 0.1841411590576172, |
| "learning_rate": 0.000188, |
| "loss": 0.0168, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.5396825396825395, |
| "grad_norm": 0.1979677677154541, |
| "learning_rate": 0.000192, |
| "loss": 0.0204, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.5925925925925926, |
| "grad_norm": 0.21689844131469727, |
| "learning_rate": 0.000196, |
| "loss": 0.0241, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.6455026455026456, |
| "grad_norm": 0.1965128779411316, |
| "learning_rate": 0.0002, |
| "loss": 0.0194, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.6984126984126986, |
| "grad_norm": 0.2420044094324112, |
| "learning_rate": 0.00020400000000000003, |
| "loss": 0.018, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.751322751322751, |
| "grad_norm": 0.21214275062084198, |
| "learning_rate": 0.00020800000000000001, |
| "loss": 0.0163, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.804232804232804, |
| "grad_norm": 0.19208742678165436, |
| "learning_rate": 0.00021200000000000003, |
| "loss": 0.0175, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.1883249282836914, |
| "learning_rate": 0.00021600000000000002, |
| "loss": 0.0161, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.91005291005291, |
| "grad_norm": 0.18176694214344025, |
| "learning_rate": 0.00022000000000000003, |
| "loss": 0.0142, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.962962962962963, |
| "grad_norm": 0.2530503273010254, |
| "learning_rate": 0.00022400000000000002, |
| "loss": 0.0132, |
| "step": 560 |
| }, |
| { |
| "epoch": 3.015873015873016, |
| "grad_norm": 0.23503687977790833, |
| "learning_rate": 0.00022799999999999999, |
| "loss": 0.0142, |
| "step": 570 |
| }, |
| { |
| "epoch": 3.068783068783069, |
| "grad_norm": 0.20202770829200745, |
| "learning_rate": 0.000232, |
| "loss": 0.0142, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.121693121693122, |
| "grad_norm": 0.22023585438728333, |
| "learning_rate": 0.000236, |
| "loss": 0.0138, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.1746031746031744, |
| "grad_norm": 0.17895381152629852, |
| "learning_rate": 0.00024, |
| "loss": 0.0123, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.2275132275132274, |
| "grad_norm": 0.19829729199409485, |
| "learning_rate": 0.000244, |
| "loss": 0.0117, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.2804232804232805, |
| "grad_norm": 0.27360883355140686, |
| "learning_rate": 0.000248, |
| "loss": 0.0118, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 0.29356101155281067, |
| "learning_rate": 0.000252, |
| "loss": 0.0147, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.386243386243386, |
| "grad_norm": 0.23610763251781464, |
| "learning_rate": 0.00025600000000000004, |
| "loss": 0.0137, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.439153439153439, |
| "grad_norm": 0.18409186601638794, |
| "learning_rate": 0.00026000000000000003, |
| "loss": 0.0133, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.492063492063492, |
| "grad_norm": 0.20087911188602448, |
| "learning_rate": 0.000264, |
| "loss": 0.0126, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.544973544973545, |
| "grad_norm": 0.2380540668964386, |
| "learning_rate": 0.000268, |
| "loss": 0.0133, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.597883597883598, |
| "grad_norm": 0.19549822807312012, |
| "learning_rate": 0.00027200000000000005, |
| "loss": 0.014, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.6507936507936507, |
| "grad_norm": 0.17787310481071472, |
| "learning_rate": 0.000276, |
| "loss": 0.0128, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.7037037037037037, |
| "grad_norm": 0.16858863830566406, |
| "learning_rate": 0.00028, |
| "loss": 0.012, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.7566137566137567, |
| "grad_norm": 0.20191702246665955, |
| "learning_rate": 0.000284, |
| "loss": 0.012, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.8095238095238093, |
| "grad_norm": 0.25565165281295776, |
| "learning_rate": 0.000288, |
| "loss": 0.0126, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.8624338624338623, |
| "grad_norm": 0.16845475137233734, |
| "learning_rate": 0.000292, |
| "loss": 0.0124, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.9153439153439153, |
| "grad_norm": 0.19665707647800446, |
| "learning_rate": 0.000296, |
| "loss": 0.0122, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.9682539682539684, |
| "grad_norm": 0.2073742151260376, |
| "learning_rate": 0.00030000000000000003, |
| "loss": 0.0111, |
| "step": 750 |
| }, |
| { |
| "epoch": 4.021164021164021, |
| "grad_norm": 0.25036758184432983, |
| "learning_rate": 0.000304, |
| "loss": 0.0122, |
| "step": 760 |
| }, |
| { |
| "epoch": 4.074074074074074, |
| "grad_norm": 0.23370663821697235, |
| "learning_rate": 0.000308, |
| "loss": 0.0121, |
| "step": 770 |
| }, |
| { |
| "epoch": 4.1269841269841265, |
| "grad_norm": 0.219792902469635, |
| "learning_rate": 0.00031200000000000005, |
| "loss": 0.0113, |
| "step": 780 |
| }, |
| { |
| "epoch": 4.1798941798941796, |
| "grad_norm": 0.13535428047180176, |
| "learning_rate": 0.00031600000000000004, |
| "loss": 0.0099, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.232804232804233, |
| "grad_norm": 0.1930338442325592, |
| "learning_rate": 0.00032, |
| "loss": 0.0099, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "grad_norm": 0.1936197578907013, |
| "learning_rate": 0.000324, |
| "loss": 0.0103, |
| "step": 810 |
| }, |
| { |
| "epoch": 4.338624338624339, |
| "grad_norm": 0.16250453889369965, |
| "learning_rate": 0.000328, |
| "loss": 0.0102, |
| "step": 820 |
| }, |
| { |
| "epoch": 4.391534391534392, |
| "grad_norm": 0.19020062685012817, |
| "learning_rate": 0.000332, |
| "loss": 0.0105, |
| "step": 830 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "grad_norm": 0.1647062748670578, |
| "learning_rate": 0.000336, |
| "loss": 0.011, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.497354497354498, |
| "grad_norm": 0.20398204028606415, |
| "learning_rate": 0.00034, |
| "loss": 0.0132, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.550264550264551, |
| "grad_norm": 0.2011038213968277, |
| "learning_rate": 0.000344, |
| "loss": 0.0125, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.603174603174603, |
| "grad_norm": 0.20113958418369293, |
| "learning_rate": 0.000348, |
| "loss": 0.013, |
| "step": 870 |
| }, |
| { |
| "epoch": 4.656084656084656, |
| "grad_norm": 0.21507562696933746, |
| "learning_rate": 0.00035200000000000005, |
| "loss": 0.0119, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.708994708994709, |
| "grad_norm": 0.26027268171310425, |
| "learning_rate": 0.00035600000000000003, |
| "loss": 0.0153, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.761904761904762, |
| "grad_norm": 0.24658744037151337, |
| "learning_rate": 0.00036, |
| "loss": 0.0156, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.814814814814815, |
| "grad_norm": 0.24339796602725983, |
| "learning_rate": 0.000364, |
| "loss": 0.0151, |
| "step": 910 |
| }, |
| { |
| "epoch": 4.867724867724868, |
| "grad_norm": 0.7407196164131165, |
| "learning_rate": 0.00036800000000000005, |
| "loss": 0.0246, |
| "step": 920 |
| }, |
| { |
| "epoch": 4.920634920634921, |
| "grad_norm": 0.5323154330253601, |
| "learning_rate": 0.00037200000000000004, |
| "loss": 0.0428, |
| "step": 930 |
| }, |
| { |
| "epoch": 4.973544973544973, |
| "grad_norm": 0.3756681978702545, |
| "learning_rate": 0.000376, |
| "loss": 0.0337, |
| "step": 940 |
| }, |
| { |
| "epoch": 5.026455026455026, |
| "grad_norm": 0.2930818498134613, |
| "learning_rate": 0.00038, |
| "loss": 0.0262, |
| "step": 950 |
| }, |
| { |
| "epoch": 5.079365079365079, |
| "grad_norm": 0.2086581140756607, |
| "learning_rate": 0.000384, |
| "loss": 0.0214, |
| "step": 960 |
| }, |
| { |
| "epoch": 5.132275132275132, |
| "grad_norm": 0.221287801861763, |
| "learning_rate": 0.000388, |
| "loss": 0.0232, |
| "step": 970 |
| }, |
| { |
| "epoch": 5.185185185185185, |
| "grad_norm": 0.21039186418056488, |
| "learning_rate": 0.000392, |
| "loss": 0.0169, |
| "step": 980 |
| }, |
| { |
| "epoch": 5.238095238095238, |
| "grad_norm": 0.19493348896503448, |
| "learning_rate": 0.00039600000000000003, |
| "loss": 0.0137, |
| "step": 990 |
| }, |
| { |
| "epoch": 5.291005291005291, |
| "grad_norm": 0.17548353970050812, |
| "learning_rate": 0.0004, |
| "loss": 0.0123, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.343915343915344, |
| "grad_norm": 0.1651264727115631, |
| "learning_rate": 0.0003999996919696056, |
| "loss": 0.0122, |
| "step": 1010 |
| }, |
| { |
| "epoch": 5.396825396825397, |
| "grad_norm": 0.15470008552074432, |
| "learning_rate": 0.0003999987678793712, |
| "loss": 0.0112, |
| "step": 1020 |
| }, |
| { |
| "epoch": 5.449735449735449, |
| "grad_norm": 0.1465303599834442, |
| "learning_rate": 0.0003999972277321432, |
| "loss": 0.0122, |
| "step": 1030 |
| }, |
| { |
| "epoch": 5.502645502645502, |
| "grad_norm": 0.15165336430072784, |
| "learning_rate": 0.0003999950715326658, |
| "loss": 0.0101, |
| "step": 1040 |
| }, |
| { |
| "epoch": 5.555555555555555, |
| "grad_norm": 0.14673031866550446, |
| "learning_rate": 0.0003999922992875807, |
| "loss": 0.0096, |
| "step": 1050 |
| }, |
| { |
| "epoch": 5.608465608465608, |
| "grad_norm": 0.15030068159103394, |
| "learning_rate": 0.0003999889110054274, |
| "loss": 0.0102, |
| "step": 1060 |
| }, |
| { |
| "epoch": 5.661375661375661, |
| "grad_norm": 0.1340291053056717, |
| "learning_rate": 0.00039998490669664266, |
| "loss": 0.0099, |
| "step": 1070 |
| }, |
| { |
| "epoch": 5.714285714285714, |
| "grad_norm": 0.23277540504932404, |
| "learning_rate": 0.00039998028637356107, |
| "loss": 0.0105, |
| "step": 1080 |
| }, |
| { |
| "epoch": 5.767195767195767, |
| "grad_norm": 0.15930365025997162, |
| "learning_rate": 0.00039997505005041456, |
| "loss": 0.0115, |
| "step": 1090 |
| }, |
| { |
| "epoch": 5.8201058201058204, |
| "grad_norm": 0.22390137612819672, |
| "learning_rate": 0.0003999691977433327, |
| "loss": 0.0112, |
| "step": 1100 |
| }, |
| { |
| "epoch": 5.8730158730158735, |
| "grad_norm": 0.17377761006355286, |
| "learning_rate": 0.0003999627294703423, |
| "loss": 0.0108, |
| "step": 1110 |
| }, |
| { |
| "epoch": 5.925925925925926, |
| "grad_norm": 0.20633409917354584, |
| "learning_rate": 0.0003999556452513676, |
| "loss": 0.0111, |
| "step": 1120 |
| }, |
| { |
| "epoch": 5.978835978835979, |
| "grad_norm": 0.15734145045280457, |
| "learning_rate": 0.0003999479451082301, |
| "loss": 0.0098, |
| "step": 1130 |
| }, |
| { |
| "epoch": 6.031746031746032, |
| "grad_norm": 0.1325804442167282, |
| "learning_rate": 0.0003999396290646487, |
| "loss": 0.0095, |
| "step": 1140 |
| }, |
| { |
| "epoch": 6.084656084656085, |
| "grad_norm": 0.14096157252788544, |
| "learning_rate": 0.00039993069714623934, |
| "loss": 0.0088, |
| "step": 1150 |
| }, |
| { |
| "epoch": 6.137566137566138, |
| "grad_norm": 0.1900232434272766, |
| "learning_rate": 0.00039992114938051487, |
| "loss": 0.0091, |
| "step": 1160 |
| }, |
| { |
| "epoch": 6.190476190476191, |
| "grad_norm": 0.13816455006599426, |
| "learning_rate": 0.0003999109857968855, |
| "loss": 0.0081, |
| "step": 1170 |
| }, |
| { |
| "epoch": 6.243386243386244, |
| "grad_norm": 0.17951875925064087, |
| "learning_rate": 0.0003999002064266581, |
| "loss": 0.0086, |
| "step": 1180 |
| }, |
| { |
| "epoch": 6.296296296296296, |
| "grad_norm": 0.14678612351417542, |
| "learning_rate": 0.0003998888113030364, |
| "loss": 0.0086, |
| "step": 1190 |
| }, |
| { |
| "epoch": 6.349206349206349, |
| "grad_norm": 0.12591250240802765, |
| "learning_rate": 0.00039987680046112085, |
| "loss": 0.0081, |
| "step": 1200 |
| }, |
| { |
| "epoch": 6.402116402116402, |
| "grad_norm": 0.11895497143268585, |
| "learning_rate": 0.00039986417393790845, |
| "loss": 0.0073, |
| "step": 1210 |
| }, |
| { |
| "epoch": 6.455026455026455, |
| "grad_norm": 0.14046961069107056, |
| "learning_rate": 0.0003998509317722928, |
| "loss": 0.0073, |
| "step": 1220 |
| }, |
| { |
| "epoch": 6.507936507936508, |
| "grad_norm": 0.11593084037303925, |
| "learning_rate": 0.00039983707400506374, |
| "loss": 0.0077, |
| "step": 1230 |
| }, |
| { |
| "epoch": 6.560846560846561, |
| "grad_norm": 0.1327681690454483, |
| "learning_rate": 0.00039982260067890737, |
| "loss": 0.0071, |
| "step": 1240 |
| }, |
| { |
| "epoch": 6.613756613756614, |
| "grad_norm": 0.1821785569190979, |
| "learning_rate": 0.00039980751183840604, |
| "loss": 0.0084, |
| "step": 1250 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 0.14916472136974335, |
| "learning_rate": 0.0003997918075300379, |
| "loss": 0.0078, |
| "step": 1260 |
| }, |
| { |
| "epoch": 6.71957671957672, |
| "grad_norm": 0.1373327672481537, |
| "learning_rate": 0.000399775487802177, |
| "loss": 0.0084, |
| "step": 1270 |
| }, |
| { |
| "epoch": 6.772486772486772, |
| "grad_norm": 0.1542566865682602, |
| "learning_rate": 0.0003997585527050931, |
| "loss": 0.0079, |
| "step": 1280 |
| }, |
| { |
| "epoch": 6.825396825396825, |
| "grad_norm": 0.1910487413406372, |
| "learning_rate": 0.0003997410022909514, |
| "loss": 0.009, |
| "step": 1290 |
| }, |
| { |
| "epoch": 6.878306878306878, |
| "grad_norm": 0.12972815334796906, |
| "learning_rate": 0.0003997228366138125, |
| "loss": 0.0079, |
| "step": 1300 |
| }, |
| { |
| "epoch": 6.931216931216931, |
| "grad_norm": 0.09012899547815323, |
| "learning_rate": 0.00039970405572963225, |
| "loss": 0.0064, |
| "step": 1310 |
| }, |
| { |
| "epoch": 6.984126984126984, |
| "grad_norm": 0.12821277976036072, |
| "learning_rate": 0.0003996846596962614, |
| "loss": 0.0069, |
| "step": 1320 |
| }, |
| { |
| "epoch": 7.037037037037037, |
| "grad_norm": 0.1257280707359314, |
| "learning_rate": 0.0003996646485734458, |
| "loss": 0.0063, |
| "step": 1330 |
| }, |
| { |
| "epoch": 7.08994708994709, |
| "grad_norm": 0.1407238394021988, |
| "learning_rate": 0.00039964402242282564, |
| "loss": 0.0067, |
| "step": 1340 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "grad_norm": 0.16583344340324402, |
| "learning_rate": 0.0003996227813079357, |
| "loss": 0.0063, |
| "step": 1350 |
| }, |
| { |
| "epoch": 7.195767195767195, |
| "grad_norm": 0.1333070546388626, |
| "learning_rate": 0.00039960092529420525, |
| "loss": 0.0069, |
| "step": 1360 |
| }, |
| { |
| "epoch": 7.248677248677248, |
| "grad_norm": 0.11976024508476257, |
| "learning_rate": 0.0003995784544489573, |
| "loss": 0.0066, |
| "step": 1370 |
| }, |
| { |
| "epoch": 7.301587301587301, |
| "grad_norm": 0.14129309356212616, |
| "learning_rate": 0.00039955536884140886, |
| "loss": 0.0069, |
| "step": 1380 |
| }, |
| { |
| "epoch": 7.354497354497354, |
| "grad_norm": 0.11559101939201355, |
| "learning_rate": 0.0003995316685426708, |
| "loss": 0.0068, |
| "step": 1390 |
| }, |
| { |
| "epoch": 7.407407407407407, |
| "grad_norm": 0.1609719693660736, |
| "learning_rate": 0.00039950735362574713, |
| "loss": 0.0068, |
| "step": 1400 |
| }, |
| { |
| "epoch": 7.4603174603174605, |
| "grad_norm": 0.14449729025363922, |
| "learning_rate": 0.00039948242416553516, |
| "loss": 0.0063, |
| "step": 1410 |
| }, |
| { |
| "epoch": 7.5132275132275135, |
| "grad_norm": 0.1491028368473053, |
| "learning_rate": 0.00039945688023882515, |
| "loss": 0.0067, |
| "step": 1420 |
| }, |
| { |
| "epoch": 7.5661375661375665, |
| "grad_norm": 0.14751970767974854, |
| "learning_rate": 0.00039943072192430033, |
| "loss": 0.0064, |
| "step": 1430 |
| }, |
| { |
| "epoch": 7.619047619047619, |
| "grad_norm": 0.15719734132289886, |
| "learning_rate": 0.00039940394930253615, |
| "loss": 0.0063, |
| "step": 1440 |
| }, |
| { |
| "epoch": 7.671957671957672, |
| "grad_norm": 0.13127799332141876, |
| "learning_rate": 0.00039937656245600045, |
| "loss": 0.0062, |
| "step": 1450 |
| }, |
| { |
| "epoch": 7.724867724867725, |
| "grad_norm": 0.11200472712516785, |
| "learning_rate": 0.00039934856146905305, |
| "loss": 0.006, |
| "step": 1460 |
| }, |
| { |
| "epoch": 7.777777777777778, |
| "grad_norm": 0.12920986115932465, |
| "learning_rate": 0.0003993199464279455, |
| "loss": 0.0057, |
| "step": 1470 |
| }, |
| { |
| "epoch": 7.830687830687831, |
| "grad_norm": 0.11783723533153534, |
| "learning_rate": 0.0003992907174208207, |
| "loss": 0.006, |
| "step": 1480 |
| }, |
| { |
| "epoch": 7.883597883597884, |
| "grad_norm": 0.13370218873023987, |
| "learning_rate": 0.00039926087453771305, |
| "loss": 0.0065, |
| "step": 1490 |
| }, |
| { |
| "epoch": 7.936507936507937, |
| "grad_norm": 0.13977813720703125, |
| "learning_rate": 0.00039923041787054766, |
| "loss": 0.0065, |
| "step": 1500 |
| }, |
| { |
| "epoch": 7.98941798941799, |
| "grad_norm": 0.0971393808722496, |
| "learning_rate": 0.00039919934751314026, |
| "loss": 0.0061, |
| "step": 1510 |
| }, |
| { |
| "epoch": 8.042328042328043, |
| "grad_norm": 0.1217234656214714, |
| "learning_rate": 0.00039916766356119706, |
| "loss": 0.0063, |
| "step": 1520 |
| }, |
| { |
| "epoch": 8.095238095238095, |
| "grad_norm": 0.10790567845106125, |
| "learning_rate": 0.00039913536611231425, |
| "loss": 0.0061, |
| "step": 1530 |
| }, |
| { |
| "epoch": 8.148148148148149, |
| "grad_norm": 0.13306866586208344, |
| "learning_rate": 0.0003991024552659777, |
| "loss": 0.0059, |
| "step": 1540 |
| }, |
| { |
| "epoch": 8.201058201058201, |
| "grad_norm": 0.11544477194547653, |
| "learning_rate": 0.000399068931123563, |
| "loss": 0.0059, |
| "step": 1550 |
| }, |
| { |
| "epoch": 8.253968253968253, |
| "grad_norm": 0.10638356953859329, |
| "learning_rate": 0.00039903479378833453, |
| "loss": 0.006, |
| "step": 1560 |
| }, |
| { |
| "epoch": 8.306878306878307, |
| "grad_norm": 0.0943189263343811, |
| "learning_rate": 0.00039900004336544567, |
| "loss": 0.0056, |
| "step": 1570 |
| }, |
| { |
| "epoch": 8.359788359788359, |
| "grad_norm": 0.14040158689022064, |
| "learning_rate": 0.0003989646799619384, |
| "loss": 0.0056, |
| "step": 1580 |
| }, |
| { |
| "epoch": 8.412698412698413, |
| "grad_norm": 0.12987872958183289, |
| "learning_rate": 0.00039892870368674266, |
| "loss": 0.0064, |
| "step": 1590 |
| }, |
| { |
| "epoch": 8.465608465608465, |
| "grad_norm": 0.1415356993675232, |
| "learning_rate": 0.00039889211465067635, |
| "loss": 0.0066, |
| "step": 1600 |
| }, |
| { |
| "epoch": 8.518518518518519, |
| "grad_norm": 0.10001295059919357, |
| "learning_rate": 0.0003988549129664448, |
| "loss": 0.0063, |
| "step": 1610 |
| }, |
| { |
| "epoch": 8.571428571428571, |
| "grad_norm": 0.1405894011259079, |
| "learning_rate": 0.0003988170987486405, |
| "loss": 0.0055, |
| "step": 1620 |
| }, |
| { |
| "epoch": 8.624338624338625, |
| "grad_norm": 0.1409602016210556, |
| "learning_rate": 0.0003987786721137428, |
| "loss": 0.0058, |
| "step": 1630 |
| }, |
| { |
| "epoch": 8.677248677248677, |
| "grad_norm": 0.16097617149353027, |
| "learning_rate": 0.00039873963318011734, |
| "loss": 0.0095, |
| "step": 1640 |
| }, |
| { |
| "epoch": 8.73015873015873, |
| "grad_norm": 0.16123740375041962, |
| "learning_rate": 0.0003986999820680159, |
| "loss": 0.0089, |
| "step": 1650 |
| }, |
| { |
| "epoch": 8.783068783068783, |
| "grad_norm": 0.12723775207996368, |
| "learning_rate": 0.00039865971889957604, |
| "loss": 0.0072, |
| "step": 1660 |
| }, |
| { |
| "epoch": 8.835978835978835, |
| "grad_norm": 0.14602605998516083, |
| "learning_rate": 0.0003986188437988204, |
| "loss": 0.0077, |
| "step": 1670 |
| }, |
| { |
| "epoch": 8.88888888888889, |
| "grad_norm": 0.11515416204929352, |
| "learning_rate": 0.00039857735689165685, |
| "loss": 0.0072, |
| "step": 1680 |
| }, |
| { |
| "epoch": 8.941798941798941, |
| "grad_norm": 0.1307208091020584, |
| "learning_rate": 0.00039853525830587766, |
| "loss": 0.0066, |
| "step": 1690 |
| }, |
| { |
| "epoch": 8.994708994708995, |
| "grad_norm": 0.11783187836408615, |
| "learning_rate": 0.00039849254817115925, |
| "loss": 0.0063, |
| "step": 1700 |
| }, |
| { |
| "epoch": 9.047619047619047, |
| "grad_norm": 0.12625868618488312, |
| "learning_rate": 0.00039844922661906183, |
| "loss": 0.0057, |
| "step": 1710 |
| }, |
| { |
| "epoch": 9.100529100529101, |
| "grad_norm": 0.14339777827262878, |
| "learning_rate": 0.0003984052937830289, |
| "loss": 0.0082, |
| "step": 1720 |
| }, |
| { |
| "epoch": 9.153439153439153, |
| "grad_norm": 0.14355403184890747, |
| "learning_rate": 0.00039836074979838695, |
| "loss": 0.0077, |
| "step": 1730 |
| }, |
| { |
| "epoch": 9.206349206349206, |
| "grad_norm": 0.1556662619113922, |
| "learning_rate": 0.00039831559480234507, |
| "loss": 0.0085, |
| "step": 1740 |
| }, |
| { |
| "epoch": 9.25925925925926, |
| "grad_norm": 0.1498294323682785, |
| "learning_rate": 0.0003982698289339943, |
| "loss": 0.0076, |
| "step": 1750 |
| }, |
| { |
| "epoch": 9.312169312169312, |
| "grad_norm": 0.28491827845573425, |
| "learning_rate": 0.0003982234523343074, |
| "loss": 0.0144, |
| "step": 1760 |
| }, |
| { |
| "epoch": 9.365079365079366, |
| "grad_norm": 0.2578721046447754, |
| "learning_rate": 0.0003981764651461385, |
| "loss": 0.0181, |
| "step": 1770 |
| }, |
| { |
| "epoch": 9.417989417989418, |
| "grad_norm": 0.21556247770786285, |
| "learning_rate": 0.00039812886751422233, |
| "loss": 0.0141, |
| "step": 1780 |
| }, |
| { |
| "epoch": 9.470899470899472, |
| "grad_norm": 0.15084554255008698, |
| "learning_rate": 0.00039808065958517417, |
| "loss": 0.0116, |
| "step": 1790 |
| }, |
| { |
| "epoch": 9.523809523809524, |
| "grad_norm": 0.179042786359787, |
| "learning_rate": 0.00039803184150748893, |
| "loss": 0.0097, |
| "step": 1800 |
| }, |
| { |
| "epoch": 9.576719576719576, |
| "grad_norm": 0.22134196758270264, |
| "learning_rate": 0.00039798241343154124, |
| "loss": 0.0102, |
| "step": 1810 |
| }, |
| { |
| "epoch": 9.62962962962963, |
| "grad_norm": 0.20731066167354584, |
| "learning_rate": 0.0003979323755095846, |
| "loss": 0.0132, |
| "step": 1820 |
| }, |
| { |
| "epoch": 9.682539682539682, |
| "grad_norm": 0.17891205847263336, |
| "learning_rate": 0.000397881727895751, |
| "loss": 0.0129, |
| "step": 1830 |
| }, |
| { |
| "epoch": 9.735449735449736, |
| "grad_norm": 0.18633806705474854, |
| "learning_rate": 0.00039783047074605043, |
| "loss": 0.0126, |
| "step": 1840 |
| }, |
| { |
| "epoch": 9.788359788359788, |
| "grad_norm": 0.13140429556369781, |
| "learning_rate": 0.00039777860421837054, |
| "loss": 0.0093, |
| "step": 1850 |
| }, |
| { |
| "epoch": 9.841269841269842, |
| "grad_norm": 0.10666497051715851, |
| "learning_rate": 0.000397726128472476, |
| "loss": 0.0076, |
| "step": 1860 |
| }, |
| { |
| "epoch": 9.894179894179894, |
| "grad_norm": 0.11088919639587402, |
| "learning_rate": 0.00039767304367000807, |
| "loss": 0.0067, |
| "step": 1870 |
| }, |
| { |
| "epoch": 9.947089947089948, |
| "grad_norm": 0.12122470140457153, |
| "learning_rate": 0.00039761934997448406, |
| "loss": 0.0066, |
| "step": 1880 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.11925218254327774, |
| "learning_rate": 0.0003975650475512969, |
| "loss": 0.0065, |
| "step": 1890 |
| }, |
| { |
| "epoch": 10.052910052910052, |
| "grad_norm": 0.09949792176485062, |
| "learning_rate": 0.00039751013656771445, |
| "loss": 0.0054, |
| "step": 1900 |
| }, |
| { |
| "epoch": 10.105820105820106, |
| "grad_norm": 0.09856072068214417, |
| "learning_rate": 0.0003974546171928793, |
| "loss": 0.0056, |
| "step": 1910 |
| }, |
| { |
| "epoch": 10.158730158730158, |
| "grad_norm": 0.13713252544403076, |
| "learning_rate": 0.0003973984895978081, |
| "loss": 0.0057, |
| "step": 1920 |
| }, |
| { |
| "epoch": 10.211640211640212, |
| "grad_norm": 0.15290550887584686, |
| "learning_rate": 0.0003973417539553908, |
| "loss": 0.0061, |
| "step": 1930 |
| }, |
| { |
| "epoch": 10.264550264550264, |
| "grad_norm": 0.11667478084564209, |
| "learning_rate": 0.00039728441044039036, |
| "loss": 0.0059, |
| "step": 1940 |
| }, |
| { |
| "epoch": 10.317460317460318, |
| "grad_norm": 0.12303528189659119, |
| "learning_rate": 0.0003972264592294423, |
| "loss": 0.0065, |
| "step": 1950 |
| }, |
| { |
| "epoch": 10.37037037037037, |
| "grad_norm": 0.10757434368133545, |
| "learning_rate": 0.0003971679005010541, |
| "loss": 0.0065, |
| "step": 1960 |
| }, |
| { |
| "epoch": 10.423280423280424, |
| "grad_norm": 0.11240673065185547, |
| "learning_rate": 0.00039710873443560426, |
| "loss": 0.0057, |
| "step": 1970 |
| }, |
| { |
| "epoch": 10.476190476190476, |
| "grad_norm": 0.10644736886024475, |
| "learning_rate": 0.00039704896121534224, |
| "loss": 0.0065, |
| "step": 1980 |
| }, |
| { |
| "epoch": 10.529100529100528, |
| "grad_norm": 0.10123038291931152, |
| "learning_rate": 0.00039698858102438786, |
| "loss": 0.0059, |
| "step": 1990 |
| }, |
| { |
| "epoch": 10.582010582010582, |
| "grad_norm": 0.12077871710062027, |
| "learning_rate": 0.00039692759404873034, |
| "loss": 0.0061, |
| "step": 2000 |
| }, |
| { |
| "epoch": 10.634920634920634, |
| "grad_norm": 0.13381993770599365, |
| "learning_rate": 0.00039686600047622817, |
| "loss": 0.0058, |
| "step": 2010 |
| }, |
| { |
| "epoch": 10.687830687830688, |
| "grad_norm": 0.09834510833024979, |
| "learning_rate": 0.0003968038004966082, |
| "loss": 0.0052, |
| "step": 2020 |
| }, |
| { |
| "epoch": 10.74074074074074, |
| "grad_norm": 0.11062684655189514, |
| "learning_rate": 0.0003967409943014654, |
| "loss": 0.0047, |
| "step": 2030 |
| }, |
| { |
| "epoch": 10.793650793650794, |
| "grad_norm": 0.10785150527954102, |
| "learning_rate": 0.0003966775820842618, |
| "loss": 0.0049, |
| "step": 2040 |
| }, |
| { |
| "epoch": 10.846560846560847, |
| "grad_norm": 0.08249185979366302, |
| "learning_rate": 0.0003966135640403264, |
| "loss": 0.0047, |
| "step": 2050 |
| }, |
| { |
| "epoch": 10.899470899470899, |
| "grad_norm": 0.1601790189743042, |
| "learning_rate": 0.00039654894036685425, |
| "loss": 0.0068, |
| "step": 2060 |
| }, |
| { |
| "epoch": 10.952380952380953, |
| "grad_norm": 0.1896616369485855, |
| "learning_rate": 0.0003964837112629058, |
| "loss": 0.0113, |
| "step": 2070 |
| }, |
| { |
| "epoch": 11.005291005291005, |
| "grad_norm": 0.19728443026542664, |
| "learning_rate": 0.0003964178769294066, |
| "loss": 0.0111, |
| "step": 2080 |
| }, |
| { |
| "epoch": 11.058201058201059, |
| "grad_norm": 0.18131254613399506, |
| "learning_rate": 0.00039635143756914636, |
| "loss": 0.0104, |
| "step": 2090 |
| }, |
| { |
| "epoch": 11.11111111111111, |
| "grad_norm": 0.1904458999633789, |
| "learning_rate": 0.00039628439338677856, |
| "loss": 0.0086, |
| "step": 2100 |
| }, |
| { |
| "epoch": 11.164021164021165, |
| "grad_norm": 0.13467629253864288, |
| "learning_rate": 0.0003962167445888196, |
| "loss": 0.0076, |
| "step": 2110 |
| }, |
| { |
| "epoch": 11.216931216931217, |
| "grad_norm": 0.11170188337564468, |
| "learning_rate": 0.0003961484913836484, |
| "loss": 0.0074, |
| "step": 2120 |
| }, |
| { |
| "epoch": 11.26984126984127, |
| "grad_norm": 0.14301618933677673, |
| "learning_rate": 0.00039607963398150545, |
| "loss": 0.0086, |
| "step": 2130 |
| }, |
| { |
| "epoch": 11.322751322751323, |
| "grad_norm": 0.11310645937919617, |
| "learning_rate": 0.00039601017259449265, |
| "loss": 0.007, |
| "step": 2140 |
| }, |
| { |
| "epoch": 11.375661375661375, |
| "grad_norm": 0.15265315771102905, |
| "learning_rate": 0.00039594010743657207, |
| "loss": 0.0069, |
| "step": 2150 |
| }, |
| { |
| "epoch": 11.428571428571429, |
| "grad_norm": 0.1819579005241394, |
| "learning_rate": 0.0003958694387235657, |
| "loss": 0.0064, |
| "step": 2160 |
| }, |
| { |
| "epoch": 11.481481481481481, |
| "grad_norm": 0.12576521933078766, |
| "learning_rate": 0.00039579816667315466, |
| "loss": 0.0062, |
| "step": 2170 |
| }, |
| { |
| "epoch": 11.534391534391535, |
| "grad_norm": 0.13003787398338318, |
| "learning_rate": 0.0003957262915048786, |
| "loss": 0.0062, |
| "step": 2180 |
| }, |
| { |
| "epoch": 11.587301587301587, |
| "grad_norm": 0.13775634765625, |
| "learning_rate": 0.00039565381344013485, |
| "loss": 0.0057, |
| "step": 2190 |
| }, |
| { |
| "epoch": 11.640211640211641, |
| "grad_norm": 0.16895855963230133, |
| "learning_rate": 0.0003955807327021778, |
| "loss": 0.0057, |
| "step": 2200 |
| }, |
| { |
| "epoch": 11.693121693121693, |
| "grad_norm": 0.1317339539527893, |
| "learning_rate": 0.0003955070495161185, |
| "loss": 0.0055, |
| "step": 2210 |
| }, |
| { |
| "epoch": 11.746031746031747, |
| "grad_norm": 0.12059590220451355, |
| "learning_rate": 0.00039543276410892336, |
| "loss": 0.0074, |
| "step": 2220 |
| }, |
| { |
| "epoch": 11.798941798941799, |
| "grad_norm": 0.1815410554409027, |
| "learning_rate": 0.00039535787670941415, |
| "loss": 0.0068, |
| "step": 2230 |
| }, |
| { |
| "epoch": 11.851851851851851, |
| "grad_norm": 1.0670920610427856, |
| "learning_rate": 0.00039528238754826675, |
| "loss": 0.0335, |
| "step": 2240 |
| }, |
| { |
| "epoch": 11.904761904761905, |
| "grad_norm": 1.9259690046310425, |
| "learning_rate": 0.00039520629685801077, |
| "loss": 0.2453, |
| "step": 2250 |
| }, |
| { |
| "epoch": 11.957671957671957, |
| "grad_norm": 1.7839056253433228, |
| "learning_rate": 0.00039512960487302865, |
| "loss": 0.4239, |
| "step": 2260 |
| }, |
| { |
| "epoch": 12.010582010582011, |
| "grad_norm": 1.0937215089797974, |
| "learning_rate": 0.000395052311829555, |
| "loss": 0.2597, |
| "step": 2270 |
| }, |
| { |
| "epoch": 12.063492063492063, |
| "grad_norm": 0.693492591381073, |
| "learning_rate": 0.0003949744179656759, |
| "loss": 0.1695, |
| "step": 2280 |
| }, |
| { |
| "epoch": 12.116402116402117, |
| "grad_norm": 0.7162590026855469, |
| "learning_rate": 0.0003948959235213281, |
| "loss": 0.1095, |
| "step": 2290 |
| }, |
| { |
| "epoch": 12.16931216931217, |
| "grad_norm": 0.6560825109481812, |
| "learning_rate": 0.00039481682873829835, |
| "loss": 0.092, |
| "step": 2300 |
| }, |
| { |
| "epoch": 12.222222222222221, |
| "grad_norm": 0.49403268098831177, |
| "learning_rate": 0.0003947371338602227, |
| "loss": 0.0659, |
| "step": 2310 |
| }, |
| { |
| "epoch": 12.275132275132275, |
| "grad_norm": 0.4173797070980072, |
| "learning_rate": 0.0003946568391325855, |
| "loss": 0.0482, |
| "step": 2320 |
| }, |
| { |
| "epoch": 12.328042328042327, |
| "grad_norm": 0.3571629226207733, |
| "learning_rate": 0.00039457594480271895, |
| "loss": 0.0364, |
| "step": 2330 |
| }, |
| { |
| "epoch": 12.380952380952381, |
| "grad_norm": 0.25554782152175903, |
| "learning_rate": 0.00039449445111980217, |
| "loss": 0.027, |
| "step": 2340 |
| }, |
| { |
| "epoch": 12.433862433862434, |
| "grad_norm": 0.20498105883598328, |
| "learning_rate": 0.00039441235833486045, |
| "loss": 0.0205, |
| "step": 2350 |
| }, |
| { |
| "epoch": 12.486772486772487, |
| "grad_norm": 0.29463911056518555, |
| "learning_rate": 0.0003943296667007646, |
| "loss": 0.019, |
| "step": 2360 |
| }, |
| { |
| "epoch": 12.53968253968254, |
| "grad_norm": 0.16020773351192474, |
| "learning_rate": 0.0003942463764722299, |
| "loss": 0.0209, |
| "step": 2370 |
| }, |
| { |
| "epoch": 12.592592592592592, |
| "grad_norm": 0.17252512276172638, |
| "learning_rate": 0.00039416248790581567, |
| "loss": 0.0169, |
| "step": 2380 |
| }, |
| { |
| "epoch": 12.645502645502646, |
| "grad_norm": 0.18067197501659393, |
| "learning_rate": 0.0003940780012599241, |
| "loss": 0.0163, |
| "step": 2390 |
| }, |
| { |
| "epoch": 12.698412698412698, |
| "grad_norm": 0.12952257692813873, |
| "learning_rate": 0.0003939929167947997, |
| "loss": 0.0135, |
| "step": 2400 |
| }, |
| { |
| "epoch": 12.751322751322752, |
| "grad_norm": 0.14395594596862793, |
| "learning_rate": 0.00039390723477252866, |
| "loss": 0.0122, |
| "step": 2410 |
| }, |
| { |
| "epoch": 12.804232804232804, |
| "grad_norm": 0.11114363372325897, |
| "learning_rate": 0.00039382095545703746, |
| "loss": 0.0113, |
| "step": 2420 |
| }, |
| { |
| "epoch": 12.857142857142858, |
| "grad_norm": 0.10381834954023361, |
| "learning_rate": 0.0003937340791140927, |
| "loss": 0.0105, |
| "step": 2430 |
| }, |
| { |
| "epoch": 12.91005291005291, |
| "grad_norm": 0.1080816239118576, |
| "learning_rate": 0.00039364660601129994, |
| "loss": 0.0103, |
| "step": 2440 |
| }, |
| { |
| "epoch": 12.962962962962964, |
| "grad_norm": 0.09894987940788269, |
| "learning_rate": 0.0003935585364181029, |
| "loss": 0.0102, |
| "step": 2450 |
| }, |
| { |
| "epoch": 13.015873015873016, |
| "grad_norm": 0.10151786357164383, |
| "learning_rate": 0.00039346987060578264, |
| "loss": 0.0093, |
| "step": 2460 |
| }, |
| { |
| "epoch": 13.068783068783068, |
| "grad_norm": 0.09461592882871628, |
| "learning_rate": 0.0003933806088474569, |
| "loss": 0.0091, |
| "step": 2470 |
| }, |
| { |
| "epoch": 13.121693121693122, |
| "grad_norm": 0.10612160712480545, |
| "learning_rate": 0.00039329075141807904, |
| "loss": 0.0089, |
| "step": 2480 |
| }, |
| { |
| "epoch": 13.174603174603174, |
| "grad_norm": 0.11442465335130692, |
| "learning_rate": 0.00039320029859443717, |
| "loss": 0.01, |
| "step": 2490 |
| }, |
| { |
| "epoch": 13.227513227513228, |
| "grad_norm": 0.16065755486488342, |
| "learning_rate": 0.00039310925065515353, |
| "loss": 0.009, |
| "step": 2500 |
| }, |
| { |
| "epoch": 13.28042328042328, |
| "grad_norm": 0.11742605268955231, |
| "learning_rate": 0.00039301760788068346, |
| "loss": 0.009, |
| "step": 2510 |
| }, |
| { |
| "epoch": 13.333333333333334, |
| "grad_norm": 0.1333356350660324, |
| "learning_rate": 0.0003929253705533144, |
| "loss": 0.0095, |
| "step": 2520 |
| }, |
| { |
| "epoch": 13.386243386243386, |
| "grad_norm": 0.16482898592948914, |
| "learning_rate": 0.0003928325389571656, |
| "loss": 0.0118, |
| "step": 2530 |
| }, |
| { |
| "epoch": 13.43915343915344, |
| "grad_norm": 0.11537864804267883, |
| "learning_rate": 0.0003927391133781865, |
| "loss": 0.0105, |
| "step": 2540 |
| }, |
| { |
| "epoch": 13.492063492063492, |
| "grad_norm": 0.19208720326423645, |
| "learning_rate": 0.0003926450941041562, |
| "loss": 0.0097, |
| "step": 2550 |
| }, |
| { |
| "epoch": 13.544973544973544, |
| "grad_norm": 0.15091383457183838, |
| "learning_rate": 0.00039255048142468275, |
| "loss": 0.011, |
| "step": 2560 |
| }, |
| { |
| "epoch": 13.597883597883598, |
| "grad_norm": 0.11797965317964554, |
| "learning_rate": 0.0003924552756312019, |
| "loss": 0.0086, |
| "step": 2570 |
| }, |
| { |
| "epoch": 13.65079365079365, |
| "grad_norm": 0.09077858924865723, |
| "learning_rate": 0.00039235947701697643, |
| "loss": 0.0081, |
| "step": 2580 |
| }, |
| { |
| "epoch": 13.703703703703704, |
| "grad_norm": 0.09163253009319305, |
| "learning_rate": 0.0003922630858770951, |
| "loss": 0.0071, |
| "step": 2590 |
| }, |
| { |
| "epoch": 13.756613756613756, |
| "grad_norm": 0.09899520128965378, |
| "learning_rate": 0.00039216610250847214, |
| "loss": 0.0064, |
| "step": 2600 |
| }, |
| { |
| "epoch": 13.80952380952381, |
| "grad_norm": 0.13137751817703247, |
| "learning_rate": 0.0003920685272098457, |
| "loss": 0.0073, |
| "step": 2610 |
| }, |
| { |
| "epoch": 13.862433862433862, |
| "grad_norm": 0.1063733696937561, |
| "learning_rate": 0.0003919703602817772, |
| "loss": 0.007, |
| "step": 2620 |
| }, |
| { |
| "epoch": 13.915343915343914, |
| "grad_norm": 0.09142506867647171, |
| "learning_rate": 0.0003918716020266509, |
| "loss": 0.0068, |
| "step": 2630 |
| }, |
| { |
| "epoch": 13.968253968253968, |
| "grad_norm": 0.1315004676580429, |
| "learning_rate": 0.00039177225274867196, |
| "loss": 0.0065, |
| "step": 2640 |
| }, |
| { |
| "epoch": 14.02116402116402, |
| "grad_norm": 0.09566756337881088, |
| "learning_rate": 0.00039167231275386656, |
| "loss": 0.0061, |
| "step": 2650 |
| }, |
| { |
| "epoch": 14.074074074074074, |
| "grad_norm": 0.10887089371681213, |
| "learning_rate": 0.0003915717823500802, |
| "loss": 0.0055, |
| "step": 2660 |
| }, |
| { |
| "epoch": 14.126984126984127, |
| "grad_norm": 0.09512364864349365, |
| "learning_rate": 0.00039147066184697706, |
| "loss": 0.0057, |
| "step": 2670 |
| }, |
| { |
| "epoch": 14.17989417989418, |
| "grad_norm": 0.09984894841909409, |
| "learning_rate": 0.00039136895155603905, |
| "loss": 0.0058, |
| "step": 2680 |
| }, |
| { |
| "epoch": 14.232804232804233, |
| "grad_norm": 0.0963057205080986, |
| "learning_rate": 0.0003912666517905647, |
| "loss": 0.0055, |
| "step": 2690 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "grad_norm": 0.09049777686595917, |
| "learning_rate": 0.0003911637628656685, |
| "loss": 0.0062, |
| "step": 2700 |
| }, |
| { |
| "epoch": 14.338624338624339, |
| "grad_norm": 0.10670194029808044, |
| "learning_rate": 0.00039106028509827955, |
| "loss": 0.006, |
| "step": 2710 |
| }, |
| { |
| "epoch": 14.39153439153439, |
| "grad_norm": 0.0816589891910553, |
| "learning_rate": 0.0003909562188071408, |
| "loss": 0.0056, |
| "step": 2720 |
| }, |
| { |
| "epoch": 14.444444444444445, |
| "grad_norm": 0.10106207430362701, |
| "learning_rate": 0.00039085156431280813, |
| "loss": 0.0057, |
| "step": 2730 |
| }, |
| { |
| "epoch": 14.497354497354497, |
| "grad_norm": 0.08731462806463242, |
| "learning_rate": 0.0003907463219376491, |
| "loss": 0.0054, |
| "step": 2740 |
| }, |
| { |
| "epoch": 14.55026455026455, |
| "grad_norm": 0.09356683492660522, |
| "learning_rate": 0.00039064049200584225, |
| "loss": 0.0053, |
| "step": 2750 |
| }, |
| { |
| "epoch": 14.603174603174603, |
| "grad_norm": 0.10447552055120468, |
| "learning_rate": 0.000390534074843376, |
| "loss": 0.0054, |
| "step": 2760 |
| }, |
| { |
| "epoch": 14.656084656084657, |
| "grad_norm": 0.11416878551244736, |
| "learning_rate": 0.0003904270707780475, |
| "loss": 0.0055, |
| "step": 2770 |
| }, |
| { |
| "epoch": 14.708994708994709, |
| "grad_norm": 0.0949578583240509, |
| "learning_rate": 0.00039031948013946175, |
| "loss": 0.0053, |
| "step": 2780 |
| }, |
| { |
| "epoch": 14.761904761904763, |
| "grad_norm": 0.08923380821943283, |
| "learning_rate": 0.00039021130325903074, |
| "loss": 0.0052, |
| "step": 2790 |
| }, |
| { |
| "epoch": 14.814814814814815, |
| "grad_norm": 0.09841927140951157, |
| "learning_rate": 0.000390102540469972, |
| "loss": 0.005, |
| "step": 2800 |
| }, |
| { |
| "epoch": 14.867724867724867, |
| "grad_norm": 0.07885875552892685, |
| "learning_rate": 0.0003899931921073081, |
| "loss": 0.0049, |
| "step": 2810 |
| }, |
| { |
| "epoch": 14.920634920634921, |
| "grad_norm": 0.0910676047205925, |
| "learning_rate": 0.0003898832585078652, |
| "loss": 0.0048, |
| "step": 2820 |
| }, |
| { |
| "epoch": 14.973544973544973, |
| "grad_norm": 0.08391734957695007, |
| "learning_rate": 0.00038977274001027206, |
| "loss": 0.0049, |
| "step": 2830 |
| }, |
| { |
| "epoch": 15.026455026455027, |
| "grad_norm": 0.0831340104341507, |
| "learning_rate": 0.00038966163695495945, |
| "loss": 0.0049, |
| "step": 2840 |
| }, |
| { |
| "epoch": 15.079365079365079, |
| "grad_norm": 0.10346338152885437, |
| "learning_rate": 0.00038954994968415846, |
| "loss": 0.0055, |
| "step": 2850 |
| }, |
| { |
| "epoch": 15.132275132275133, |
| "grad_norm": 0.09366438537836075, |
| "learning_rate": 0.0003894376785418998, |
| "loss": 0.0054, |
| "step": 2860 |
| }, |
| { |
| "epoch": 15.185185185185185, |
| "grad_norm": 0.09279809892177582, |
| "learning_rate": 0.0003893248238740128, |
| "loss": 0.0056, |
| "step": 2870 |
| }, |
| { |
| "epoch": 15.238095238095237, |
| "grad_norm": 0.08680757135152817, |
| "learning_rate": 0.00038921138602812406, |
| "loss": 0.005, |
| "step": 2880 |
| }, |
| { |
| "epoch": 15.291005291005291, |
| "grad_norm": 0.08765140175819397, |
| "learning_rate": 0.0003890973653536566, |
| "loss": 0.0048, |
| "step": 2890 |
| }, |
| { |
| "epoch": 15.343915343915343, |
| "grad_norm": 0.08962959051132202, |
| "learning_rate": 0.0003889827622018289, |
| "loss": 0.0046, |
| "step": 2900 |
| }, |
| { |
| "epoch": 15.396825396825397, |
| "grad_norm": 0.0812198743224144, |
| "learning_rate": 0.00038886757692565334, |
| "loss": 0.005, |
| "step": 2910 |
| }, |
| { |
| "epoch": 15.44973544973545, |
| "grad_norm": 0.10678814351558685, |
| "learning_rate": 0.00038875180987993564, |
| "loss": 0.005, |
| "step": 2920 |
| }, |
| { |
| "epoch": 15.502645502645503, |
| "grad_norm": 0.06816115230321884, |
| "learning_rate": 0.0003886354614212735, |
| "loss": 0.0048, |
| "step": 2930 |
| }, |
| { |
| "epoch": 15.555555555555555, |
| "grad_norm": 0.09805359691381454, |
| "learning_rate": 0.0003885185319080555, |
| "loss": 0.0047, |
| "step": 2940 |
| }, |
| { |
| "epoch": 15.60846560846561, |
| "grad_norm": 0.082832932472229, |
| "learning_rate": 0.0003884010217004601, |
| "loss": 0.0046, |
| "step": 2950 |
| }, |
| { |
| "epoch": 15.661375661375661, |
| "grad_norm": 0.06807373464107513, |
| "learning_rate": 0.0003882829311604545, |
| "loss": 0.0047, |
| "step": 2960 |
| }, |
| { |
| "epoch": 15.714285714285714, |
| "grad_norm": 0.0932752788066864, |
| "learning_rate": 0.0003881642606517934, |
| "loss": 0.0051, |
| "step": 2970 |
| }, |
| { |
| "epoch": 15.767195767195767, |
| "grad_norm": 0.0888075903058052, |
| "learning_rate": 0.0003880450105400181, |
| "loss": 0.0046, |
| "step": 2980 |
| }, |
| { |
| "epoch": 15.82010582010582, |
| "grad_norm": 0.09350543469190598, |
| "learning_rate": 0.0003879251811924551, |
| "loss": 0.0044, |
| "step": 2990 |
| }, |
| { |
| "epoch": 15.873015873015873, |
| "grad_norm": 0.06647849828004837, |
| "learning_rate": 0.0003878047729782153, |
| "loss": 0.0041, |
| "step": 3000 |
| }, |
| { |
| "epoch": 15.925925925925926, |
| "grad_norm": 0.06874548643827438, |
| "learning_rate": 0.00038768378626819253, |
| "loss": 0.004, |
| "step": 3010 |
| }, |
| { |
| "epoch": 15.97883597883598, |
| "grad_norm": 0.08275860548019409, |
| "learning_rate": 0.0003875622214350626, |
| "loss": 0.0043, |
| "step": 3020 |
| }, |
| { |
| "epoch": 16.03174603174603, |
| "grad_norm": 0.07945531606674194, |
| "learning_rate": 0.00038744007885328227, |
| "loss": 0.0041, |
| "step": 3030 |
| }, |
| { |
| "epoch": 16.084656084656086, |
| "grad_norm": 0.06742502003908157, |
| "learning_rate": 0.00038731735889908773, |
| "loss": 0.0038, |
| "step": 3040 |
| }, |
| { |
| "epoch": 16.137566137566136, |
| "grad_norm": 0.08099709451198578, |
| "learning_rate": 0.0003871940619504938, |
| "loss": 0.004, |
| "step": 3050 |
| }, |
| { |
| "epoch": 16.19047619047619, |
| "grad_norm": 0.07479023188352585, |
| "learning_rate": 0.0003870701883872924, |
| "loss": 0.0041, |
| "step": 3060 |
| }, |
| { |
| "epoch": 16.243386243386244, |
| "grad_norm": 0.07875571399927139, |
| "learning_rate": 0.00038694573859105196, |
| "loss": 0.0041, |
| "step": 3070 |
| }, |
| { |
| "epoch": 16.296296296296298, |
| "grad_norm": 0.0765179693698883, |
| "learning_rate": 0.0003868207129451155, |
| "loss": 0.004, |
| "step": 3080 |
| }, |
| { |
| "epoch": 16.349206349206348, |
| "grad_norm": 0.07733309268951416, |
| "learning_rate": 0.0003866951118346001, |
| "loss": 0.0041, |
| "step": 3090 |
| }, |
| { |
| "epoch": 16.402116402116402, |
| "grad_norm": 0.11193585395812988, |
| "learning_rate": 0.00038656893564639543, |
| "loss": 0.0041, |
| "step": 3100 |
| }, |
| { |
| "epoch": 16.455026455026456, |
| "grad_norm": 0.0782993957400322, |
| "learning_rate": 0.0003864421847691624, |
| "loss": 0.0048, |
| "step": 3110 |
| }, |
| { |
| "epoch": 16.507936507936506, |
| "grad_norm": 0.11347425729036331, |
| "learning_rate": 0.00038631485959333224, |
| "loss": 0.0042, |
| "step": 3120 |
| }, |
| { |
| "epoch": 16.56084656084656, |
| "grad_norm": 0.12052897363901138, |
| "learning_rate": 0.0003861869605111053, |
| "loss": 0.0042, |
| "step": 3130 |
| }, |
| { |
| "epoch": 16.613756613756614, |
| "grad_norm": 0.1033359244465828, |
| "learning_rate": 0.00038605848791644937, |
| "loss": 0.0045, |
| "step": 3140 |
| }, |
| { |
| "epoch": 16.666666666666668, |
| "grad_norm": 0.083700992166996, |
| "learning_rate": 0.0003859294422050994, |
| "loss": 0.0045, |
| "step": 3150 |
| }, |
| { |
| "epoch": 16.719576719576718, |
| "grad_norm": 0.10319680720567703, |
| "learning_rate": 0.00038579982377455517, |
| "loss": 0.0039, |
| "step": 3160 |
| }, |
| { |
| "epoch": 16.772486772486772, |
| "grad_norm": 0.09331599622964859, |
| "learning_rate": 0.00038566963302408096, |
| "loss": 0.0039, |
| "step": 3170 |
| }, |
| { |
| "epoch": 16.825396825396826, |
| "grad_norm": 0.08213219791650772, |
| "learning_rate": 0.00038553887035470383, |
| "loss": 0.0038, |
| "step": 3180 |
| }, |
| { |
| "epoch": 16.87830687830688, |
| "grad_norm": 0.10324909538030624, |
| "learning_rate": 0.00038540753616921254, |
| "loss": 0.0045, |
| "step": 3190 |
| }, |
| { |
| "epoch": 16.93121693121693, |
| "grad_norm": 0.10563361644744873, |
| "learning_rate": 0.0003852756308721563, |
| "loss": 0.0044, |
| "step": 3200 |
| }, |
| { |
| "epoch": 16.984126984126984, |
| "grad_norm": 0.08582045137882233, |
| "learning_rate": 0.0003851431548698435, |
| "loss": 0.0038, |
| "step": 3210 |
| }, |
| { |
| "epoch": 17.037037037037038, |
| "grad_norm": 0.07344099879264832, |
| "learning_rate": 0.0003850101085703405, |
| "loss": 0.0038, |
| "step": 3220 |
| }, |
| { |
| "epoch": 17.08994708994709, |
| "grad_norm": 0.1084500402212143, |
| "learning_rate": 0.0003848764923834704, |
| "loss": 0.004, |
| "step": 3230 |
| }, |
| { |
| "epoch": 17.142857142857142, |
| "grad_norm": 0.0705375075340271, |
| "learning_rate": 0.00038474230672081166, |
| "loss": 0.0036, |
| "step": 3240 |
| }, |
| { |
| "epoch": 17.195767195767196, |
| "grad_norm": 0.0940157026052475, |
| "learning_rate": 0.00038460755199569684, |
| "loss": 0.0035, |
| "step": 3250 |
| }, |
| { |
| "epoch": 17.24867724867725, |
| "grad_norm": 0.062344059348106384, |
| "learning_rate": 0.0003844722286232114, |
| "loss": 0.0034, |
| "step": 3260 |
| }, |
| { |
| "epoch": 17.3015873015873, |
| "grad_norm": 0.06925034523010254, |
| "learning_rate": 0.00038433633702019256, |
| "loss": 0.0034, |
| "step": 3270 |
| }, |
| { |
| "epoch": 17.354497354497354, |
| "grad_norm": 0.08498704433441162, |
| "learning_rate": 0.0003841998776052278, |
| "loss": 0.0037, |
| "step": 3280 |
| }, |
| { |
| "epoch": 17.40740740740741, |
| "grad_norm": 0.0821915790438652, |
| "learning_rate": 0.00038406285079865344, |
| "loss": 0.0038, |
| "step": 3290 |
| }, |
| { |
| "epoch": 17.46031746031746, |
| "grad_norm": 0.08294922858476639, |
| "learning_rate": 0.00038392525702255387, |
| "loss": 0.0036, |
| "step": 3300 |
| }, |
| { |
| "epoch": 17.513227513227513, |
| "grad_norm": 0.08474984765052795, |
| "learning_rate": 0.0003837870967007596, |
| "loss": 0.0035, |
| "step": 3310 |
| }, |
| { |
| "epoch": 17.566137566137566, |
| "grad_norm": 0.06738153845071793, |
| "learning_rate": 0.00038364837025884653, |
| "loss": 0.0038, |
| "step": 3320 |
| }, |
| { |
| "epoch": 17.61904761904762, |
| "grad_norm": 0.08157813549041748, |
| "learning_rate": 0.00038350907812413415, |
| "loss": 0.0043, |
| "step": 3330 |
| }, |
| { |
| "epoch": 17.67195767195767, |
| "grad_norm": 0.11258726567029953, |
| "learning_rate": 0.00038336922072568463, |
| "loss": 0.0041, |
| "step": 3340 |
| }, |
| { |
| "epoch": 17.724867724867725, |
| "grad_norm": 0.08987373113632202, |
| "learning_rate": 0.0003832287984943013, |
| "loss": 0.0036, |
| "step": 3350 |
| }, |
| { |
| "epoch": 17.77777777777778, |
| "grad_norm": 0.07632698118686676, |
| "learning_rate": 0.0003830878118625272, |
| "loss": 0.0033, |
| "step": 3360 |
| }, |
| { |
| "epoch": 17.83068783068783, |
| "grad_norm": 0.07706711441278458, |
| "learning_rate": 0.0003829462612646441, |
| "loss": 0.0035, |
| "step": 3370 |
| }, |
| { |
| "epoch": 17.883597883597883, |
| "grad_norm": 0.06717482209205627, |
| "learning_rate": 0.0003828041471366709, |
| "loss": 0.0034, |
| "step": 3380 |
| }, |
| { |
| "epoch": 17.936507936507937, |
| "grad_norm": 0.08264566957950592, |
| "learning_rate": 0.00038266146991636224, |
| "loss": 0.0037, |
| "step": 3390 |
| }, |
| { |
| "epoch": 17.98941798941799, |
| "grad_norm": 0.08034229278564453, |
| "learning_rate": 0.0003825182300432073, |
| "loss": 0.0033, |
| "step": 3400 |
| }, |
| { |
| "epoch": 18.04232804232804, |
| "grad_norm": 0.08124092221260071, |
| "learning_rate": 0.0003823744279584285, |
| "loss": 0.0034, |
| "step": 3410 |
| }, |
| { |
| "epoch": 18.095238095238095, |
| "grad_norm": 0.06899471580982208, |
| "learning_rate": 0.00038223006410497996, |
| "loss": 0.0033, |
| "step": 3420 |
| }, |
| { |
| "epoch": 18.14814814814815, |
| "grad_norm": 0.12756113708019257, |
| "learning_rate": 0.00038208513892754617, |
| "loss": 0.0037, |
| "step": 3430 |
| }, |
| { |
| "epoch": 18.201058201058203, |
| "grad_norm": 0.08263306319713593, |
| "learning_rate": 0.0003819396528725408, |
| "loss": 0.0036, |
| "step": 3440 |
| }, |
| { |
| "epoch": 18.253968253968253, |
| "grad_norm": 0.07276030629873276, |
| "learning_rate": 0.00038179360638810505, |
| "loss": 0.0038, |
| "step": 3450 |
| }, |
| { |
| "epoch": 18.306878306878307, |
| "grad_norm": 0.060400381684303284, |
| "learning_rate": 0.0003816469999241065, |
| "loss": 0.0037, |
| "step": 3460 |
| }, |
| { |
| "epoch": 18.35978835978836, |
| "grad_norm": 0.09411425143480301, |
| "learning_rate": 0.00038149983393213763, |
| "loss": 0.0037, |
| "step": 3470 |
| }, |
| { |
| "epoch": 18.41269841269841, |
| "grad_norm": 0.07314950972795486, |
| "learning_rate": 0.0003813521088655144, |
| "loss": 0.0039, |
| "step": 3480 |
| }, |
| { |
| "epoch": 18.465608465608465, |
| "grad_norm": 0.0763072744011879, |
| "learning_rate": 0.000381203825179275, |
| "loss": 0.0033, |
| "step": 3490 |
| }, |
| { |
| "epoch": 18.51851851851852, |
| "grad_norm": 0.0973953828215599, |
| "learning_rate": 0.00038105498333017816, |
| "loss": 0.0033, |
| "step": 3500 |
| }, |
| { |
| "epoch": 18.571428571428573, |
| "grad_norm": 0.10556458681821823, |
| "learning_rate": 0.000380905583776702, |
| "loss": 0.0035, |
| "step": 3510 |
| }, |
| { |
| "epoch": 18.624338624338623, |
| "grad_norm": 0.11148570477962494, |
| "learning_rate": 0.0003807556269790427, |
| "loss": 0.0037, |
| "step": 3520 |
| }, |
| { |
| "epoch": 18.677248677248677, |
| "grad_norm": 0.09374136477708817, |
| "learning_rate": 0.0003806051133991127, |
| "loss": 0.0036, |
| "step": 3530 |
| }, |
| { |
| "epoch": 18.73015873015873, |
| "grad_norm": 0.08377573639154434, |
| "learning_rate": 0.0003804540435005395, |
| "loss": 0.0035, |
| "step": 3540 |
| }, |
| { |
| "epoch": 18.78306878306878, |
| "grad_norm": 0.09433193504810333, |
| "learning_rate": 0.00038030241774866433, |
| "loss": 0.0034, |
| "step": 3550 |
| }, |
| { |
| "epoch": 18.835978835978835, |
| "grad_norm": 0.07314567267894745, |
| "learning_rate": 0.00038015023661054057, |
| "loss": 0.0035, |
| "step": 3560 |
| }, |
| { |
| "epoch": 18.88888888888889, |
| "grad_norm": 0.10299256443977356, |
| "learning_rate": 0.00037999750055493255, |
| "loss": 0.0035, |
| "step": 3570 |
| }, |
| { |
| "epoch": 18.941798941798943, |
| "grad_norm": 0.06799434870481491, |
| "learning_rate": 0.00037984421005231355, |
| "loss": 0.0032, |
| "step": 3580 |
| }, |
| { |
| "epoch": 18.994708994708994, |
| "grad_norm": 0.058005448430776596, |
| "learning_rate": 0.000379690365574865, |
| "loss": 0.0029, |
| "step": 3590 |
| }, |
| { |
| "epoch": 19.047619047619047, |
| "grad_norm": 0.06635001301765442, |
| "learning_rate": 0.0003795359675964746, |
| "loss": 0.0029, |
| "step": 3600 |
| }, |
| { |
| "epoch": 19.1005291005291, |
| "grad_norm": 0.11281086504459381, |
| "learning_rate": 0.0003793810165927352, |
| "loss": 0.0033, |
| "step": 3610 |
| }, |
| { |
| "epoch": 19.15343915343915, |
| "grad_norm": 0.08719826489686966, |
| "learning_rate": 0.00037922551304094275, |
| "loss": 0.0034, |
| "step": 3620 |
| }, |
| { |
| "epoch": 19.206349206349206, |
| "grad_norm": 0.1060280054807663, |
| "learning_rate": 0.00037906945742009567, |
| "loss": 0.0031, |
| "step": 3630 |
| }, |
| { |
| "epoch": 19.25925925925926, |
| "grad_norm": 0.07241532951593399, |
| "learning_rate": 0.0003789128502108925, |
| "loss": 0.0031, |
| "step": 3640 |
| }, |
| { |
| "epoch": 19.312169312169313, |
| "grad_norm": 0.08218563348054886, |
| "learning_rate": 0.00037875569189573123, |
| "loss": 0.0033, |
| "step": 3650 |
| }, |
| { |
| "epoch": 19.365079365079364, |
| "grad_norm": 0.1528523564338684, |
| "learning_rate": 0.00037859798295870715, |
| "loss": 0.0036, |
| "step": 3660 |
| }, |
| { |
| "epoch": 19.417989417989418, |
| "grad_norm": 0.11239644885063171, |
| "learning_rate": 0.00037843972388561177, |
| "loss": 0.0039, |
| "step": 3670 |
| }, |
| { |
| "epoch": 19.47089947089947, |
| "grad_norm": 0.10018712282180786, |
| "learning_rate": 0.00037828091516393106, |
| "loss": 0.0036, |
| "step": 3680 |
| }, |
| { |
| "epoch": 19.523809523809526, |
| "grad_norm": 0.07450844347476959, |
| "learning_rate": 0.0003781215572828442, |
| "loss": 0.0033, |
| "step": 3690 |
| }, |
| { |
| "epoch": 19.576719576719576, |
| "grad_norm": 0.0734860748052597, |
| "learning_rate": 0.0003779616507332219, |
| "loss": 0.0032, |
| "step": 3700 |
| }, |
| { |
| "epoch": 19.62962962962963, |
| "grad_norm": 0.07196395099163055, |
| "learning_rate": 0.00037780119600762494, |
| "loss": 0.0031, |
| "step": 3710 |
| }, |
| { |
| "epoch": 19.682539682539684, |
| "grad_norm": 0.09709396213293076, |
| "learning_rate": 0.00037764019360030256, |
| "loss": 0.0032, |
| "step": 3720 |
| }, |
| { |
| "epoch": 19.735449735449734, |
| "grad_norm": 0.09168734401464462, |
| "learning_rate": 0.00037747864400719126, |
| "loss": 0.0033, |
| "step": 3730 |
| }, |
| { |
| "epoch": 19.788359788359788, |
| "grad_norm": 0.07125968486070633, |
| "learning_rate": 0.0003773165477259128, |
| "loss": 0.0034, |
| "step": 3740 |
| }, |
| { |
| "epoch": 19.841269841269842, |
| "grad_norm": 0.07378100603818893, |
| "learning_rate": 0.000377153905255773, |
| "loss": 0.0033, |
| "step": 3750 |
| }, |
| { |
| "epoch": 19.894179894179896, |
| "grad_norm": 0.07083296030759811, |
| "learning_rate": 0.0003769907170977601, |
| "loss": 0.0032, |
| "step": 3760 |
| }, |
| { |
| "epoch": 19.947089947089946, |
| "grad_norm": 0.06470996886491776, |
| "learning_rate": 0.00037682698375454324, |
| "loss": 0.003, |
| "step": 3770 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.09688153117895126, |
| "learning_rate": 0.00037666270573047085, |
| "loss": 0.0033, |
| "step": 3780 |
| }, |
| { |
| "epoch": 20.052910052910054, |
| "grad_norm": 0.11122123897075653, |
| "learning_rate": 0.00037649788353156925, |
| "loss": 0.0036, |
| "step": 3790 |
| }, |
| { |
| "epoch": 20.105820105820104, |
| "grad_norm": 0.08080142736434937, |
| "learning_rate": 0.00037633251766554086, |
| "loss": 0.0033, |
| "step": 3800 |
| }, |
| { |
| "epoch": 20.158730158730158, |
| "grad_norm": 0.08291842043399811, |
| "learning_rate": 0.0003761666086417628, |
| "loss": 0.0034, |
| "step": 3810 |
| }, |
| { |
| "epoch": 20.211640211640212, |
| "grad_norm": 0.11718545109033585, |
| "learning_rate": 0.0003760001569712853, |
| "loss": 0.0037, |
| "step": 3820 |
| }, |
| { |
| "epoch": 20.264550264550266, |
| "grad_norm": 0.09750944375991821, |
| "learning_rate": 0.0003758331631668301, |
| "loss": 0.0037, |
| "step": 3830 |
| }, |
| { |
| "epoch": 20.317460317460316, |
| "grad_norm": 0.11106361448764801, |
| "learning_rate": 0.00037566562774278886, |
| "loss": 0.0032, |
| "step": 3840 |
| }, |
| { |
| "epoch": 20.37037037037037, |
| "grad_norm": 0.0689949169754982, |
| "learning_rate": 0.0003754975512152216, |
| "loss": 0.0035, |
| "step": 3850 |
| }, |
| { |
| "epoch": 20.423280423280424, |
| "grad_norm": 0.08527049422264099, |
| "learning_rate": 0.0003753289341018552, |
| "loss": 0.003, |
| "step": 3860 |
| }, |
| { |
| "epoch": 20.476190476190474, |
| "grad_norm": 0.0683789923787117, |
| "learning_rate": 0.00037515977692208154, |
| "loss": 0.0027, |
| "step": 3870 |
| }, |
| { |
| "epoch": 20.52910052910053, |
| "grad_norm": 0.06511671096086502, |
| "learning_rate": 0.00037499008019695613, |
| "loss": 0.0031, |
| "step": 3880 |
| }, |
| { |
| "epoch": 20.582010582010582, |
| "grad_norm": 0.09722796827554703, |
| "learning_rate": 0.00037481984444919647, |
| "loss": 0.0032, |
| "step": 3890 |
| }, |
| { |
| "epoch": 20.634920634920636, |
| "grad_norm": 0.0859525129199028, |
| "learning_rate": 0.0003746490702031805, |
| "loss": 0.0034, |
| "step": 3900 |
| }, |
| { |
| "epoch": 20.687830687830687, |
| "grad_norm": 0.07728194445371628, |
| "learning_rate": 0.0003744777579849447, |
| "loss": 0.003, |
| "step": 3910 |
| }, |
| { |
| "epoch": 20.74074074074074, |
| "grad_norm": 0.05846023187041283, |
| "learning_rate": 0.00037430590832218273, |
| "loss": 0.003, |
| "step": 3920 |
| }, |
| { |
| "epoch": 20.793650793650794, |
| "grad_norm": 0.06172306835651398, |
| "learning_rate": 0.00037413352174424396, |
| "loss": 0.0028, |
| "step": 3930 |
| }, |
| { |
| "epoch": 20.84656084656085, |
| "grad_norm": 0.08405909687280655, |
| "learning_rate": 0.0003739605987821313, |
| "loss": 0.0028, |
| "step": 3940 |
| }, |
| { |
| "epoch": 20.8994708994709, |
| "grad_norm": 0.07122877985239029, |
| "learning_rate": 0.0003737871399685001, |
| "loss": 0.0028, |
| "step": 3950 |
| }, |
| { |
| "epoch": 20.952380952380953, |
| "grad_norm": 0.06499795615673065, |
| "learning_rate": 0.00037361314583765615, |
| "loss": 0.0029, |
| "step": 3960 |
| }, |
| { |
| "epoch": 21.005291005291006, |
| "grad_norm": 0.09252621978521347, |
| "learning_rate": 0.0003734386169255544, |
| "loss": 0.0033, |
| "step": 3970 |
| }, |
| { |
| "epoch": 21.058201058201057, |
| "grad_norm": 0.0944569781422615, |
| "learning_rate": 0.00037326355376979676, |
| "loss": 0.003, |
| "step": 3980 |
| }, |
| { |
| "epoch": 21.11111111111111, |
| "grad_norm": 0.09000955522060394, |
| "learning_rate": 0.00037308795690963104, |
| "loss": 0.0031, |
| "step": 3990 |
| }, |
| { |
| "epoch": 21.164021164021165, |
| "grad_norm": 0.06754779070615768, |
| "learning_rate": 0.000372911826885949, |
| "loss": 0.0027, |
| "step": 4000 |
| }, |
| { |
| "epoch": 21.21693121693122, |
| "grad_norm": 0.08070409297943115, |
| "learning_rate": 0.00037273516424128465, |
| "loss": 0.0027, |
| "step": 4010 |
| }, |
| { |
| "epoch": 21.26984126984127, |
| "grad_norm": 0.09665092825889587, |
| "learning_rate": 0.00037255796951981255, |
| "loss": 0.003, |
| "step": 4020 |
| }, |
| { |
| "epoch": 21.322751322751323, |
| "grad_norm": 0.09719184041023254, |
| "learning_rate": 0.0003723802432673463, |
| "loss": 0.0031, |
| "step": 4030 |
| }, |
| { |
| "epoch": 21.375661375661377, |
| "grad_norm": 0.08830776065587997, |
| "learning_rate": 0.0003722019860313369, |
| "loss": 0.0031, |
| "step": 4040 |
| }, |
| { |
| "epoch": 21.428571428571427, |
| "grad_norm": 0.09745042771100998, |
| "learning_rate": 0.0003720231983608706, |
| "loss": 0.0034, |
| "step": 4050 |
| }, |
| { |
| "epoch": 21.48148148148148, |
| "grad_norm": 0.07377637177705765, |
| "learning_rate": 0.00037184388080666796, |
| "loss": 0.0027, |
| "step": 4060 |
| }, |
| { |
| "epoch": 21.534391534391535, |
| "grad_norm": 0.06053460016846657, |
| "learning_rate": 0.0003716640339210815, |
| "loss": 0.0028, |
| "step": 4070 |
| }, |
| { |
| "epoch": 21.58730158730159, |
| "grad_norm": 0.0902014896273613, |
| "learning_rate": 0.0003714836582580942, |
| "loss": 0.0031, |
| "step": 4080 |
| }, |
| { |
| "epoch": 21.64021164021164, |
| "grad_norm": 0.10117074847221375, |
| "learning_rate": 0.00037130275437331805, |
| "loss": 0.003, |
| "step": 4090 |
| }, |
| { |
| "epoch": 21.693121693121693, |
| "grad_norm": 0.0773254930973053, |
| "learning_rate": 0.000371121322823992, |
| "loss": 0.0029, |
| "step": 4100 |
| }, |
| { |
| "epoch": 21.746031746031747, |
| "grad_norm": 0.09695654362440109, |
| "learning_rate": 0.00037093936416898027, |
| "loss": 0.0032, |
| "step": 4110 |
| }, |
| { |
| "epoch": 21.798941798941797, |
| "grad_norm": 0.08522791415452957, |
| "learning_rate": 0.00037075687896877084, |
| "loss": 0.003, |
| "step": 4120 |
| }, |
| { |
| "epoch": 21.85185185185185, |
| "grad_norm": 0.06715506315231323, |
| "learning_rate": 0.0003705738677854737, |
| "loss": 0.0031, |
| "step": 4130 |
| }, |
| { |
| "epoch": 21.904761904761905, |
| "grad_norm": 0.07648234814405441, |
| "learning_rate": 0.0003703903311828188, |
| "loss": 0.0029, |
| "step": 4140 |
| }, |
| { |
| "epoch": 21.95767195767196, |
| "grad_norm": 0.07449754327535629, |
| "learning_rate": 0.00037020626972615465, |
| "loss": 0.0032, |
| "step": 4150 |
| }, |
| { |
| "epoch": 22.01058201058201, |
| "grad_norm": 0.09906895458698273, |
| "learning_rate": 0.00037002168398244664, |
| "loss": 0.0033, |
| "step": 4160 |
| }, |
| { |
| "epoch": 22.063492063492063, |
| "grad_norm": 0.0835467204451561, |
| "learning_rate": 0.00036983657452027474, |
| "loss": 0.0031, |
| "step": 4170 |
| }, |
| { |
| "epoch": 22.116402116402117, |
| "grad_norm": 0.09476753324270248, |
| "learning_rate": 0.00036965094190983253, |
| "loss": 0.003, |
| "step": 4180 |
| }, |
| { |
| "epoch": 22.16931216931217, |
| "grad_norm": 0.08117684721946716, |
| "learning_rate": 0.00036946478672292483, |
| "loss": 0.003, |
| "step": 4190 |
| }, |
| { |
| "epoch": 22.22222222222222, |
| "grad_norm": 0.11887771636247635, |
| "learning_rate": 0.00036927810953296615, |
| "loss": 0.0038, |
| "step": 4200 |
| }, |
| { |
| "epoch": 22.275132275132275, |
| "grad_norm": 0.23875179886817932, |
| "learning_rate": 0.000369090910914979, |
| "loss": 0.0056, |
| "step": 4210 |
| }, |
| { |
| "epoch": 22.32804232804233, |
| "grad_norm": 0.10867864638566971, |
| "learning_rate": 0.0003689031914455921, |
| "loss": 0.0053, |
| "step": 4220 |
| }, |
| { |
| "epoch": 22.38095238095238, |
| "grad_norm": 0.11437585949897766, |
| "learning_rate": 0.0003687149517030384, |
| "loss": 0.0043, |
| "step": 4230 |
| }, |
| { |
| "epoch": 22.433862433862434, |
| "grad_norm": 0.10560745000839233, |
| "learning_rate": 0.00036852619226715347, |
| "loss": 0.0041, |
| "step": 4240 |
| }, |
| { |
| "epoch": 22.486772486772487, |
| "grad_norm": 0.09894239902496338, |
| "learning_rate": 0.0003683369137193738, |
| "loss": 0.004, |
| "step": 4250 |
| }, |
| { |
| "epoch": 22.53968253968254, |
| "grad_norm": 0.10818574577569962, |
| "learning_rate": 0.00036814711664273484, |
| "loss": 0.0038, |
| "step": 4260 |
| }, |
| { |
| "epoch": 22.59259259259259, |
| "grad_norm": 0.0938066691160202, |
| "learning_rate": 0.0003679568016218693, |
| "loss": 0.0037, |
| "step": 4270 |
| }, |
| { |
| "epoch": 22.645502645502646, |
| "grad_norm": 0.08017753809690475, |
| "learning_rate": 0.00036776596924300523, |
| "loss": 0.0037, |
| "step": 4280 |
| }, |
| { |
| "epoch": 22.6984126984127, |
| "grad_norm": 0.09411531686782837, |
| "learning_rate": 0.0003675746200939644, |
| "loss": 0.0035, |
| "step": 4290 |
| }, |
| { |
| "epoch": 22.75132275132275, |
| "grad_norm": 0.07573402673006058, |
| "learning_rate": 0.0003673827547641604, |
| "loss": 0.0032, |
| "step": 4300 |
| }, |
| { |
| "epoch": 22.804232804232804, |
| "grad_norm": 0.07840324193239212, |
| "learning_rate": 0.00036719037384459665, |
| "loss": 0.0032, |
| "step": 4310 |
| }, |
| { |
| "epoch": 22.857142857142858, |
| "grad_norm": 0.08790623396635056, |
| "learning_rate": 0.00036699747792786496, |
| "loss": 0.0036, |
| "step": 4320 |
| }, |
| { |
| "epoch": 22.91005291005291, |
| "grad_norm": 0.17838507890701294, |
| "learning_rate": 0.0003668040676081433, |
| "loss": 0.0039, |
| "step": 4330 |
| }, |
| { |
| "epoch": 22.962962962962962, |
| "grad_norm": 0.4083213806152344, |
| "learning_rate": 0.0003666101434811943, |
| "loss": 0.0103, |
| "step": 4340 |
| }, |
| { |
| "epoch": 23.015873015873016, |
| "grad_norm": 0.42006683349609375, |
| "learning_rate": 0.0003664157061443632, |
| "loss": 0.0286, |
| "step": 4350 |
| }, |
| { |
| "epoch": 23.06878306878307, |
| "grad_norm": 0.44540899991989136, |
| "learning_rate": 0.00036622075619657606, |
| "loss": 0.0331, |
| "step": 4360 |
| }, |
| { |
| "epoch": 23.12169312169312, |
| "grad_norm": 0.5338546633720398, |
| "learning_rate": 0.00036602529423833803, |
| "loss": 0.0403, |
| "step": 4370 |
| }, |
| { |
| "epoch": 23.174603174603174, |
| "grad_norm": 0.4317573606967926, |
| "learning_rate": 0.00036582932087173123, |
| "loss": 0.0399, |
| "step": 4380 |
| }, |
| { |
| "epoch": 23.227513227513228, |
| "grad_norm": 0.36144527792930603, |
| "learning_rate": 0.0003656328367004134, |
| "loss": 0.0281, |
| "step": 4390 |
| }, |
| { |
| "epoch": 23.280423280423282, |
| "grad_norm": 0.2512885332107544, |
| "learning_rate": 0.0003654358423296153, |
| "loss": 0.0192, |
| "step": 4400 |
| }, |
| { |
| "epoch": 23.333333333333332, |
| "grad_norm": 0.176627978682518, |
| "learning_rate": 0.0003652383383661396, |
| "loss": 0.0135, |
| "step": 4410 |
| }, |
| { |
| "epoch": 23.386243386243386, |
| "grad_norm": 0.13672363758087158, |
| "learning_rate": 0.0003650403254183585, |
| "loss": 0.0101, |
| "step": 4420 |
| }, |
| { |
| "epoch": 23.43915343915344, |
| "grad_norm": 0.14289478957653046, |
| "learning_rate": 0.00036484180409621206, |
| "loss": 0.0078, |
| "step": 4430 |
| }, |
| { |
| "epoch": 23.49206349206349, |
| "grad_norm": 0.10004527121782303, |
| "learning_rate": 0.00036464277501120624, |
| "loss": 0.0061, |
| "step": 4440 |
| }, |
| { |
| "epoch": 23.544973544973544, |
| "grad_norm": 0.10518872737884521, |
| "learning_rate": 0.0003644432387764113, |
| "loss": 0.0057, |
| "step": 4450 |
| }, |
| { |
| "epoch": 23.597883597883598, |
| "grad_norm": 0.09654070436954498, |
| "learning_rate": 0.0003642431960064592, |
| "loss": 0.0049, |
| "step": 4460 |
| }, |
| { |
| "epoch": 23.650793650793652, |
| "grad_norm": 0.10660598427057266, |
| "learning_rate": 0.00036404264731754274, |
| "loss": 0.0049, |
| "step": 4470 |
| }, |
| { |
| "epoch": 23.703703703703702, |
| "grad_norm": 0.07862317562103271, |
| "learning_rate": 0.0003638415933274127, |
| "loss": 0.0045, |
| "step": 4480 |
| }, |
| { |
| "epoch": 23.756613756613756, |
| "grad_norm": 0.07714495062828064, |
| "learning_rate": 0.0003636400346553765, |
| "loss": 0.0042, |
| "step": 4490 |
| }, |
| { |
| "epoch": 23.80952380952381, |
| "grad_norm": 0.08995921164751053, |
| "learning_rate": 0.0003634379719222961, |
| "loss": 0.0042, |
| "step": 4500 |
| }, |
| { |
| "epoch": 23.862433862433864, |
| "grad_norm": 0.07015353441238403, |
| "learning_rate": 0.0003632354057505862, |
| "loss": 0.004, |
| "step": 4510 |
| }, |
| { |
| "epoch": 23.915343915343914, |
| "grad_norm": 0.07248852401971817, |
| "learning_rate": 0.00036303233676421206, |
| "loss": 0.0041, |
| "step": 4520 |
| }, |
| { |
| "epoch": 23.96825396825397, |
| "grad_norm": 0.07491960376501083, |
| "learning_rate": 0.000362828765588688, |
| "loss": 0.0037, |
| "step": 4530 |
| }, |
| { |
| "epoch": 24.021164021164022, |
| "grad_norm": 0.1126113086938858, |
| "learning_rate": 0.00036262469285107505, |
| "loss": 0.0043, |
| "step": 4540 |
| }, |
| { |
| "epoch": 24.074074074074073, |
| "grad_norm": 0.0801805779337883, |
| "learning_rate": 0.0003624201191799793, |
| "loss": 0.0036, |
| "step": 4550 |
| }, |
| { |
| "epoch": 24.126984126984127, |
| "grad_norm": 0.07318644970655441, |
| "learning_rate": 0.0003622150452055498, |
| "loss": 0.0034, |
| "step": 4560 |
| }, |
| { |
| "epoch": 24.17989417989418, |
| "grad_norm": 0.07393806427717209, |
| "learning_rate": 0.00036200947155947674, |
| "loss": 0.0035, |
| "step": 4570 |
| }, |
| { |
| "epoch": 24.232804232804234, |
| "grad_norm": 0.09606146067380905, |
| "learning_rate": 0.0003618033988749895, |
| "loss": 0.0036, |
| "step": 4580 |
| }, |
| { |
| "epoch": 24.285714285714285, |
| "grad_norm": 0.069748654961586, |
| "learning_rate": 0.00036159682778685447, |
| "loss": 0.0034, |
| "step": 4590 |
| }, |
| { |
| "epoch": 24.33862433862434, |
| "grad_norm": 0.11135067790746689, |
| "learning_rate": 0.00036138975893137347, |
| "loss": 0.0037, |
| "step": 4600 |
| }, |
| { |
| "epoch": 24.391534391534393, |
| "grad_norm": 0.07549436390399933, |
| "learning_rate": 0.00036118219294638146, |
| "loss": 0.0035, |
| "step": 4610 |
| }, |
| { |
| "epoch": 24.444444444444443, |
| "grad_norm": 0.07316253334283829, |
| "learning_rate": 0.0003609741304712448, |
| "loss": 0.0034, |
| "step": 4620 |
| }, |
| { |
| "epoch": 24.497354497354497, |
| "grad_norm": 0.0705738440155983, |
| "learning_rate": 0.000360765572146859, |
| "loss": 0.0037, |
| "step": 4630 |
| }, |
| { |
| "epoch": 24.55026455026455, |
| "grad_norm": 0.08228278160095215, |
| "learning_rate": 0.0003605565186156474, |
| "loss": 0.0034, |
| "step": 4640 |
| }, |
| { |
| "epoch": 24.603174603174605, |
| "grad_norm": 0.07518155127763748, |
| "learning_rate": 0.0003603469705215582, |
| "loss": 0.0032, |
| "step": 4650 |
| }, |
| { |
| "epoch": 24.656084656084655, |
| "grad_norm": 0.07805877178907394, |
| "learning_rate": 0.00036013692851006316, |
| "loss": 0.0037, |
| "step": 4660 |
| }, |
| { |
| "epoch": 24.70899470899471, |
| "grad_norm": 0.09085424989461899, |
| "learning_rate": 0.0003599263932281557, |
| "loss": 0.0043, |
| "step": 4670 |
| }, |
| { |
| "epoch": 24.761904761904763, |
| "grad_norm": 0.07668601721525192, |
| "learning_rate": 0.0003597153653243484, |
| "loss": 0.0045, |
| "step": 4680 |
| }, |
| { |
| "epoch": 24.814814814814813, |
| "grad_norm": 0.07025279849767685, |
| "learning_rate": 0.0003595038454486713, |
| "loss": 0.0042, |
| "step": 4690 |
| }, |
| { |
| "epoch": 24.867724867724867, |
| "grad_norm": 0.11927028745412827, |
| "learning_rate": 0.00035929183425266996, |
| "loss": 0.0075, |
| "step": 4700 |
| }, |
| { |
| "epoch": 24.92063492063492, |
| "grad_norm": 0.13428369164466858, |
| "learning_rate": 0.0003590793323894033, |
| "loss": 0.0104, |
| "step": 4710 |
| }, |
| { |
| "epoch": 24.973544973544975, |
| "grad_norm": 0.2348325103521347, |
| "learning_rate": 0.00035886634051344166, |
| "loss": 0.0096, |
| "step": 4720 |
| }, |
| { |
| "epoch": 25.026455026455025, |
| "grad_norm": 0.1860775500535965, |
| "learning_rate": 0.0003586528592808647, |
| "loss": 0.01, |
| "step": 4730 |
| }, |
| { |
| "epoch": 25.07936507936508, |
| "grad_norm": 0.12654832005500793, |
| "learning_rate": 0.00035843888934925964, |
| "loss": 0.0079, |
| "step": 4740 |
| }, |
| { |
| "epoch": 25.132275132275133, |
| "grad_norm": 0.13935686647891998, |
| "learning_rate": 0.0003582244313777187, |
| "loss": 0.0067, |
| "step": 4750 |
| }, |
| { |
| "epoch": 25.185185185185187, |
| "grad_norm": 0.1062978133559227, |
| "learning_rate": 0.0003580094860268377, |
| "loss": 0.0061, |
| "step": 4760 |
| }, |
| { |
| "epoch": 25.238095238095237, |
| "grad_norm": 0.13322557508945465, |
| "learning_rate": 0.0003577940539587137, |
| "loss": 0.0053, |
| "step": 4770 |
| }, |
| { |
| "epoch": 25.29100529100529, |
| "grad_norm": 0.16509148478507996, |
| "learning_rate": 0.00035757813583694293, |
| "loss": 0.0062, |
| "step": 4780 |
| }, |
| { |
| "epoch": 25.343915343915345, |
| "grad_norm": 0.07526130974292755, |
| "learning_rate": 0.0003573617323266188, |
| "loss": 0.0054, |
| "step": 4790 |
| }, |
| { |
| "epoch": 25.396825396825395, |
| "grad_norm": 0.09176628291606903, |
| "learning_rate": 0.0003571448440943299, |
| "loss": 0.0056, |
| "step": 4800 |
| }, |
| { |
| "epoch": 25.44973544973545, |
| "grad_norm": 0.09155473113059998, |
| "learning_rate": 0.00035692747180815797, |
| "loss": 0.005, |
| "step": 4810 |
| }, |
| { |
| "epoch": 25.502645502645503, |
| "grad_norm": 0.0963868498802185, |
| "learning_rate": 0.0003567096161376757, |
| "loss": 0.0047, |
| "step": 4820 |
| }, |
| { |
| "epoch": 25.555555555555557, |
| "grad_norm": 0.08302408456802368, |
| "learning_rate": 0.0003564912777539447, |
| "loss": 0.004, |
| "step": 4830 |
| }, |
| { |
| "epoch": 25.608465608465607, |
| "grad_norm": 0.06412070989608765, |
| "learning_rate": 0.0003562724573295136, |
| "loss": 0.0036, |
| "step": 4840 |
| }, |
| { |
| "epoch": 25.66137566137566, |
| "grad_norm": 0.07806012034416199, |
| "learning_rate": 0.00035605315553841575, |
| "loss": 0.0035, |
| "step": 4850 |
| }, |
| { |
| "epoch": 25.714285714285715, |
| "grad_norm": 0.08711991459131241, |
| "learning_rate": 0.0003558333730561675, |
| "loss": 0.0033, |
| "step": 4860 |
| }, |
| { |
| "epoch": 25.767195767195766, |
| "grad_norm": 0.0889301747083664, |
| "learning_rate": 0.00035561311055976556, |
| "loss": 0.0037, |
| "step": 4870 |
| }, |
| { |
| "epoch": 25.82010582010582, |
| "grad_norm": 0.11517832428216934, |
| "learning_rate": 0.0003553923687276854, |
| "loss": 0.0038, |
| "step": 4880 |
| }, |
| { |
| "epoch": 25.873015873015873, |
| "grad_norm": 0.07500556111335754, |
| "learning_rate": 0.00035517114823987886, |
| "loss": 0.0038, |
| "step": 4890 |
| }, |
| { |
| "epoch": 25.925925925925927, |
| "grad_norm": 0.06777866929769516, |
| "learning_rate": 0.00035494944977777245, |
| "loss": 0.0038, |
| "step": 4900 |
| }, |
| { |
| "epoch": 25.978835978835978, |
| "grad_norm": 0.0699387863278389, |
| "learning_rate": 0.00035472727402426475, |
| "loss": 0.0036, |
| "step": 4910 |
| }, |
| { |
| "epoch": 26.03174603174603, |
| "grad_norm": 0.06708484888076782, |
| "learning_rate": 0.0003545046216637245, |
| "loss": 0.0034, |
| "step": 4920 |
| }, |
| { |
| "epoch": 26.084656084656086, |
| "grad_norm": 0.08221206814050674, |
| "learning_rate": 0.0003542814933819888, |
| "loss": 0.0036, |
| "step": 4930 |
| }, |
| { |
| "epoch": 26.137566137566136, |
| "grad_norm": 0.08084139227867126, |
| "learning_rate": 0.00035405788986636057, |
| "loss": 0.0033, |
| "step": 4940 |
| }, |
| { |
| "epoch": 26.19047619047619, |
| "grad_norm": 0.05680591240525246, |
| "learning_rate": 0.00035383381180560653, |
| "loss": 0.0029, |
| "step": 4950 |
| }, |
| { |
| "epoch": 26.243386243386244, |
| "grad_norm": 0.06472446769475937, |
| "learning_rate": 0.00035360925988995516, |
| "loss": 0.0036, |
| "step": 4960 |
| }, |
| { |
| "epoch": 26.296296296296298, |
| "grad_norm": 0.07075701653957367, |
| "learning_rate": 0.0003533842348110947, |
| "loss": 0.003, |
| "step": 4970 |
| }, |
| { |
| "epoch": 26.349206349206348, |
| "grad_norm": 0.06456038355827332, |
| "learning_rate": 0.0003531587372621708, |
| "loss": 0.0027, |
| "step": 4980 |
| }, |
| { |
| "epoch": 26.402116402116402, |
| "grad_norm": 0.06482533365488052, |
| "learning_rate": 0.0003529327679377844, |
| "loss": 0.0031, |
| "step": 4990 |
| }, |
| { |
| "epoch": 26.455026455026456, |
| "grad_norm": 0.06165693700313568, |
| "learning_rate": 0.0003527063275339898, |
| "loss": 0.003, |
| "step": 5000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 18900, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.7759626173505107e+22, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|