{ "best_metric": null, "best_model_checkpoint": null, "epoch": 142.04545454545453, "eval_steps": 500, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.028409090909090908, "grad_norm": 145.83482360839844, "learning_rate": 0.0001, "loss": 19.629, "step": 10 }, { "epoch": 0.056818181818181816, "grad_norm": 76.42560577392578, "learning_rate": 0.0001, "loss": 4.9944, "step": 20 }, { "epoch": 0.08522727272727272, "grad_norm": 75.80607604980469, "learning_rate": 0.0001, "loss": 4.0315, "step": 30 }, { "epoch": 0.11363636363636363, "grad_norm": 73.45606231689453, "learning_rate": 0.0001, "loss": 3.6022, "step": 40 }, { "epoch": 0.14204545454545456, "grad_norm": 63.843379974365234, "learning_rate": 0.0001, "loss": 3.4001, "step": 50 }, { "epoch": 0.17045454545454544, "grad_norm": 62.33827209472656, "learning_rate": 0.0001, "loss": 3.1379, "step": 60 }, { "epoch": 0.19886363636363635, "grad_norm": 48.174617767333984, "learning_rate": 0.0001, "loss": 2.9382, "step": 70 }, { "epoch": 0.22727272727272727, "grad_norm": 51.949546813964844, "learning_rate": 0.0001, "loss": 2.7898, "step": 80 }, { "epoch": 0.2556818181818182, "grad_norm": 55.129215240478516, "learning_rate": 0.0001, "loss": 2.6821, "step": 90 }, { "epoch": 0.2840909090909091, "grad_norm": 57.89436721801758, "learning_rate": 0.0001, "loss": 2.6053, "step": 100 }, { "epoch": 0.3125, "grad_norm": 60.28406524658203, "learning_rate": 0.0001, "loss": 2.4816, "step": 110 }, { "epoch": 0.3409090909090909, "grad_norm": 56.817726135253906, "learning_rate": 0.0001, "loss": 2.3979, "step": 120 }, { "epoch": 0.3693181818181818, "grad_norm": 50.40308380126953, "learning_rate": 0.0001, "loss": 2.2238, "step": 130 }, { "epoch": 0.3977272727272727, "grad_norm": 50.979698181152344, "learning_rate": 0.0001, "loss": 2.1042, "step": 140 }, { "epoch": 0.42613636363636365, "grad_norm": 49.087684631347656, "learning_rate": 0.0001, "loss": 1.9842, "step": 150 }, { "epoch": 0.45454545454545453, "grad_norm": 42.376319885253906, "learning_rate": 0.0001, "loss": 1.9271, "step": 160 }, { "epoch": 0.48295454545454547, "grad_norm": 52.510475158691406, "learning_rate": 0.0001, "loss": 1.8932, "step": 170 }, { "epoch": 0.5113636363636364, "grad_norm": 47.899627685546875, "learning_rate": 0.0001, "loss": 1.835, "step": 180 }, { "epoch": 0.5397727272727273, "grad_norm": 49.3372688293457, "learning_rate": 0.0001, "loss": 1.723, "step": 190 }, { "epoch": 0.5681818181818182, "grad_norm": 53.45163345336914, "learning_rate": 0.0001, "loss": 1.6617, "step": 200 }, { "epoch": 0.5965909090909091, "grad_norm": 40.74541091918945, "learning_rate": 0.0001, "loss": 1.6427, "step": 210 }, { "epoch": 0.625, "grad_norm": 45.38197708129883, "learning_rate": 0.0001, "loss": 1.5676, "step": 220 }, { "epoch": 0.6534090909090909, "grad_norm": 44.96558380126953, "learning_rate": 0.0001, "loss": 1.4756, "step": 230 }, { "epoch": 0.6818181818181818, "grad_norm": 39.52370071411133, "learning_rate": 0.0001, "loss": 1.4001, "step": 240 }, { "epoch": 0.7102272727272727, "grad_norm": 35.26641845703125, "learning_rate": 0.0001, "loss": 1.346, "step": 250 }, { "epoch": 0.7386363636363636, "grad_norm": 43.04871368408203, "learning_rate": 0.0001, "loss": 1.2858, "step": 260 }, { "epoch": 0.7670454545454546, "grad_norm": 41.421043395996094, "learning_rate": 0.0001, "loss": 1.2311, "step": 270 }, { "epoch": 0.7954545454545454, "grad_norm": 34.36821365356445, "learning_rate": 0.0001, "loss": 1.1698, "step": 280 }, { "epoch": 0.8238636363636364, "grad_norm": 28.939420700073242, "learning_rate": 0.0001, "loss": 1.1007, "step": 290 }, { "epoch": 0.8522727272727273, "grad_norm": 34.10892868041992, "learning_rate": 0.0001, "loss": 1.0837, "step": 300 }, { "epoch": 0.8806818181818182, "grad_norm": 32.707054138183594, "learning_rate": 0.0001, "loss": 1.037, "step": 310 }, { "epoch": 0.9090909090909091, "grad_norm": 35.907508850097656, "learning_rate": 0.0001, "loss": 1.0093, "step": 320 }, { "epoch": 0.9375, "grad_norm": 37.658451080322266, "learning_rate": 0.0001, "loss": 0.9501, "step": 330 }, { "epoch": 0.9659090909090909, "grad_norm": 29.12462615966797, "learning_rate": 0.0001, "loss": 0.9148, "step": 340 }, { "epoch": 0.9943181818181818, "grad_norm": 31.38657569885254, "learning_rate": 0.0001, "loss": 0.8863, "step": 350 }, { "epoch": 1.0227272727272727, "grad_norm": 29.956890106201172, "learning_rate": 0.0001, "loss": 0.8712, "step": 360 }, { "epoch": 1.0511363636363635, "grad_norm": 26.45981788635254, "learning_rate": 0.0001, "loss": 0.8092, "step": 370 }, { "epoch": 1.0795454545454546, "grad_norm": 27.67877197265625, "learning_rate": 0.0001, "loss": 0.8083, "step": 380 }, { "epoch": 1.1079545454545454, "grad_norm": 29.698911666870117, "learning_rate": 0.0001, "loss": 0.7937, "step": 390 }, { "epoch": 1.1363636363636362, "grad_norm": 25.87833595275879, "learning_rate": 0.0001, "loss": 0.7842, "step": 400 }, { "epoch": 1.1647727272727273, "grad_norm": 30.7982120513916, "learning_rate": 0.0001, "loss": 0.7507, "step": 410 }, { "epoch": 1.1931818181818181, "grad_norm": 25.391246795654297, "learning_rate": 0.0001, "loss": 0.7208, "step": 420 }, { "epoch": 1.2215909090909092, "grad_norm": 22.389162063598633, "learning_rate": 0.0001, "loss": 0.6947, "step": 430 }, { "epoch": 1.25, "grad_norm": 21.166950225830078, "learning_rate": 0.0001, "loss": 0.6735, "step": 440 }, { "epoch": 1.2784090909090908, "grad_norm": 20.702722549438477, "learning_rate": 0.0001, "loss": 0.6466, "step": 450 }, { "epoch": 1.3068181818181819, "grad_norm": 25.78806495666504, "learning_rate": 0.0001, "loss": 0.6449, "step": 460 }, { "epoch": 1.3352272727272727, "grad_norm": 19.319217681884766, "learning_rate": 0.0001, "loss": 0.6416, "step": 470 }, { "epoch": 1.3636363636363638, "grad_norm": 22.997730255126953, "learning_rate": 0.0001, "loss": 0.6296, "step": 480 }, { "epoch": 1.3920454545454546, "grad_norm": 21.263296127319336, "learning_rate": 0.0001, "loss": 0.586, "step": 490 }, { "epoch": 1.4204545454545454, "grad_norm": 18.75411605834961, "learning_rate": 0.0001, "loss": 0.565, "step": 500 }, { "epoch": 1.4488636363636362, "grad_norm": 21.52752113342285, "learning_rate": 0.0001, "loss": 0.5923, "step": 510 }, { "epoch": 1.4772727272727273, "grad_norm": 21.464553833007812, "learning_rate": 0.0001, "loss": 0.5726, "step": 520 }, { "epoch": 1.5056818181818183, "grad_norm": 19.978652954101562, "learning_rate": 0.0001, "loss": 0.5734, "step": 530 }, { "epoch": 1.5340909090909092, "grad_norm": 21.40723419189453, "learning_rate": 0.0001, "loss": 0.549, "step": 540 }, { "epoch": 1.5625, "grad_norm": 18.181068420410156, "learning_rate": 0.0001, "loss": 0.5337, "step": 550 }, { "epoch": 1.5909090909090908, "grad_norm": 16.99245262145996, "learning_rate": 0.0001, "loss": 0.528, "step": 560 }, { "epoch": 1.6193181818181817, "grad_norm": 18.75094223022461, "learning_rate": 0.0001, "loss": 0.5316, "step": 570 }, { "epoch": 1.6477272727272727, "grad_norm": 23.37386703491211, "learning_rate": 0.0001, "loss": 0.5116, "step": 580 }, { "epoch": 1.6761363636363638, "grad_norm": 20.599090576171875, "learning_rate": 0.0001, "loss": 0.5135, "step": 590 }, { "epoch": 1.7045454545454546, "grad_norm": 19.43827247619629, "learning_rate": 0.0001, "loss": 0.4833, "step": 600 }, { "epoch": 1.7329545454545454, "grad_norm": 16.123802185058594, "learning_rate": 0.0001, "loss": 0.486, "step": 610 }, { "epoch": 1.7613636363636362, "grad_norm": 18.867568969726562, "learning_rate": 0.0001, "loss": 0.47, "step": 620 }, { "epoch": 1.7897727272727273, "grad_norm": 16.580411911010742, "learning_rate": 0.0001, "loss": 0.4803, "step": 630 }, { "epoch": 1.8181818181818183, "grad_norm": 16.470767974853516, "learning_rate": 0.0001, "loss": 0.4763, "step": 640 }, { "epoch": 1.8465909090909092, "grad_norm": 15.845813751220703, "learning_rate": 0.0001, "loss": 0.4796, "step": 650 }, { "epoch": 1.875, "grad_norm": 16.313016891479492, "learning_rate": 0.0001, "loss": 0.4726, "step": 660 }, { "epoch": 1.9034090909090908, "grad_norm": 16.114171981811523, "learning_rate": 0.0001, "loss": 0.4657, "step": 670 }, { "epoch": 1.9318181818181817, "grad_norm": 18.153635025024414, "learning_rate": 0.0001, "loss": 0.4656, "step": 680 }, { "epoch": 1.9602272727272727, "grad_norm": 19.012916564941406, "learning_rate": 0.0001, "loss": 0.4543, "step": 690 }, { "epoch": 1.9886363636363638, "grad_norm": 15.661040306091309, "learning_rate": 0.0001, "loss": 0.4451, "step": 700 }, { "epoch": 2.0170454545454546, "grad_norm": 20.665252685546875, "learning_rate": 0.0001, "loss": 0.4542, "step": 710 }, { "epoch": 2.0454545454545454, "grad_norm": 18.220745086669922, "learning_rate": 0.0001, "loss": 0.4615, "step": 720 }, { "epoch": 2.0738636363636362, "grad_norm": 15.511617660522461, "learning_rate": 0.0001, "loss": 0.4379, "step": 730 }, { "epoch": 2.102272727272727, "grad_norm": 16.05436897277832, "learning_rate": 0.0001, "loss": 0.4457, "step": 740 }, { "epoch": 2.1306818181818183, "grad_norm": 14.067421913146973, "learning_rate": 0.0001, "loss": 0.4404, "step": 750 }, { "epoch": 2.159090909090909, "grad_norm": 17.595314025878906, "learning_rate": 0.0001, "loss": 0.4359, "step": 760 }, { "epoch": 2.1875, "grad_norm": 14.292813301086426, "learning_rate": 0.0001, "loss": 0.4265, "step": 770 }, { "epoch": 2.215909090909091, "grad_norm": 13.254941940307617, "learning_rate": 0.0001, "loss": 0.4282, "step": 780 }, { "epoch": 2.2443181818181817, "grad_norm": 14.131694793701172, "learning_rate": 0.0001, "loss": 0.4154, "step": 790 }, { "epoch": 2.2727272727272725, "grad_norm": 16.197458267211914, "learning_rate": 0.0001, "loss": 0.4271, "step": 800 }, { "epoch": 2.3011363636363638, "grad_norm": 13.791603088378906, "learning_rate": 0.0001, "loss": 0.4159, "step": 810 }, { "epoch": 2.3295454545454546, "grad_norm": 15.197473526000977, "learning_rate": 0.0001, "loss": 0.4011, "step": 820 }, { "epoch": 2.3579545454545454, "grad_norm": 16.548952102661133, "learning_rate": 0.0001, "loss": 0.4153, "step": 830 }, { "epoch": 2.3863636363636362, "grad_norm": 15.021014213562012, "learning_rate": 0.0001, "loss": 0.4077, "step": 840 }, { "epoch": 2.4147727272727275, "grad_norm": 11.394856452941895, "learning_rate": 0.0001, "loss": 0.412, "step": 850 }, { "epoch": 2.4431818181818183, "grad_norm": 14.868868827819824, "learning_rate": 0.0001, "loss": 0.3898, "step": 860 }, { "epoch": 2.471590909090909, "grad_norm": 15.362899780273438, "learning_rate": 0.0001, "loss": 0.3985, "step": 870 }, { "epoch": 2.5, "grad_norm": 12.927398681640625, "learning_rate": 0.0001, "loss": 0.3969, "step": 880 }, { "epoch": 2.528409090909091, "grad_norm": 12.813764572143555, "learning_rate": 0.0001, "loss": 0.3808, "step": 890 }, { "epoch": 2.5568181818181817, "grad_norm": 14.54391860961914, "learning_rate": 0.0001, "loss": 0.3786, "step": 900 }, { "epoch": 2.5852272727272725, "grad_norm": 12.791834831237793, "learning_rate": 0.0001, "loss": 0.3891, "step": 910 }, { "epoch": 2.6136363636363638, "grad_norm": 12.401715278625488, "learning_rate": 0.0001, "loss": 0.3903, "step": 920 }, { "epoch": 2.6420454545454546, "grad_norm": 12.847018241882324, "learning_rate": 0.0001, "loss": 0.3741, "step": 930 }, { "epoch": 2.6704545454545454, "grad_norm": 14.203393936157227, "learning_rate": 0.0001, "loss": 0.3827, "step": 940 }, { "epoch": 2.6988636363636362, "grad_norm": 14.513806343078613, "learning_rate": 0.0001, "loss": 0.3848, "step": 950 }, { "epoch": 2.7272727272727275, "grad_norm": 15.528099060058594, "learning_rate": 0.0001, "loss": 0.3861, "step": 960 }, { "epoch": 2.7556818181818183, "grad_norm": 16.94673728942871, "learning_rate": 0.0001, "loss": 0.3796, "step": 970 }, { "epoch": 2.784090909090909, "grad_norm": 14.714554786682129, "learning_rate": 0.0001, "loss": 0.3683, "step": 980 }, { "epoch": 2.8125, "grad_norm": 13.82036304473877, "learning_rate": 0.0001, "loss": 0.3751, "step": 990 }, { "epoch": 2.840909090909091, "grad_norm": 13.940563201904297, "learning_rate": 0.0001, "loss": 0.3719, "step": 1000 }, { "epoch": 2.8693181818181817, "grad_norm": 13.515235900878906, "learning_rate": 0.0001, "loss": 0.3761, "step": 1010 }, { "epoch": 2.8977272727272725, "grad_norm": 14.955562591552734, "learning_rate": 0.0001, "loss": 0.3576, "step": 1020 }, { "epoch": 2.9261363636363638, "grad_norm": 11.280851364135742, "learning_rate": 0.0001, "loss": 0.3614, "step": 1030 }, { "epoch": 2.9545454545454546, "grad_norm": 12.09704303741455, "learning_rate": 0.0001, "loss": 0.3661, "step": 1040 }, { "epoch": 2.9829545454545454, "grad_norm": 14.392845153808594, "learning_rate": 0.0001, "loss": 0.3722, "step": 1050 }, { "epoch": 3.0113636363636362, "grad_norm": 13.309704780578613, "learning_rate": 0.0001, "loss": 0.3595, "step": 1060 }, { "epoch": 3.039772727272727, "grad_norm": 15.42646312713623, "learning_rate": 0.0001, "loss": 0.3427, "step": 1070 }, { "epoch": 3.0681818181818183, "grad_norm": 10.911493301391602, "learning_rate": 0.0001, "loss": 0.3517, "step": 1080 }, { "epoch": 3.096590909090909, "grad_norm": 12.293902397155762, "learning_rate": 0.0001, "loss": 0.3398, "step": 1090 }, { "epoch": 3.125, "grad_norm": 13.060087203979492, "learning_rate": 0.0001, "loss": 0.3491, "step": 1100 }, { "epoch": 3.153409090909091, "grad_norm": 11.007771492004395, "learning_rate": 0.0001, "loss": 0.3566, "step": 1110 }, { "epoch": 3.1818181818181817, "grad_norm": 13.680668830871582, "learning_rate": 0.0001, "loss": 0.341, "step": 1120 }, { "epoch": 3.210227272727273, "grad_norm": 9.585054397583008, "learning_rate": 0.0001, "loss": 0.3582, "step": 1130 }, { "epoch": 3.2386363636363638, "grad_norm": 12.668915748596191, "learning_rate": 0.0001, "loss": 0.351, "step": 1140 }, { "epoch": 3.2670454545454546, "grad_norm": 12.355649948120117, "learning_rate": 0.0001, "loss": 0.3489, "step": 1150 }, { "epoch": 3.2954545454545454, "grad_norm": 11.911877632141113, "learning_rate": 0.0001, "loss": 0.3348, "step": 1160 }, { "epoch": 3.3238636363636362, "grad_norm": 11.719152450561523, "learning_rate": 0.0001, "loss": 0.3364, "step": 1170 }, { "epoch": 3.3522727272727275, "grad_norm": 10.242898941040039, "learning_rate": 0.0001, "loss": 0.3264, "step": 1180 }, { "epoch": 3.3806818181818183, "grad_norm": 9.679462432861328, "learning_rate": 0.0001, "loss": 0.3251, "step": 1190 }, { "epoch": 3.409090909090909, "grad_norm": 10.055230140686035, "learning_rate": 0.0001, "loss": 0.3275, "step": 1200 }, { "epoch": 3.4375, "grad_norm": 8.881628036499023, "learning_rate": 0.0001, "loss": 0.325, "step": 1210 }, { "epoch": 3.465909090909091, "grad_norm": 11.237067222595215, "learning_rate": 0.0001, "loss": 0.3497, "step": 1220 }, { "epoch": 3.4943181818181817, "grad_norm": 12.494010925292969, "learning_rate": 0.0001, "loss": 0.334, "step": 1230 }, { "epoch": 3.5227272727272725, "grad_norm": 11.651463508605957, "learning_rate": 0.0001, "loss": 0.3291, "step": 1240 }, { "epoch": 3.5511363636363638, "grad_norm": 11.586341857910156, "learning_rate": 0.0001, "loss": 0.3236, "step": 1250 }, { "epoch": 3.5795454545454546, "grad_norm": 10.351299285888672, "learning_rate": 0.0001, "loss": 0.3314, "step": 1260 }, { "epoch": 3.6079545454545454, "grad_norm": 11.262849807739258, "learning_rate": 0.0001, "loss": 0.3375, "step": 1270 }, { "epoch": 3.6363636363636362, "grad_norm": 10.904534339904785, "learning_rate": 0.0001, "loss": 0.3281, "step": 1280 }, { "epoch": 3.6647727272727275, "grad_norm": 8.963489532470703, "learning_rate": 0.0001, "loss": 0.3298, "step": 1290 }, { "epoch": 3.6931818181818183, "grad_norm": 10.735923767089844, "learning_rate": 0.0001, "loss": 0.3272, "step": 1300 }, { "epoch": 3.721590909090909, "grad_norm": 12.867420196533203, "learning_rate": 0.0001, "loss": 0.3234, "step": 1310 }, { "epoch": 3.75, "grad_norm": 11.347630500793457, "learning_rate": 0.0001, "loss": 0.3209, "step": 1320 }, { "epoch": 3.778409090909091, "grad_norm": 10.211435317993164, "learning_rate": 0.0001, "loss": 0.3173, "step": 1330 }, { "epoch": 3.8068181818181817, "grad_norm": 8.992242813110352, "learning_rate": 0.0001, "loss": 0.3199, "step": 1340 }, { "epoch": 3.8352272727272725, "grad_norm": 9.036025047302246, "learning_rate": 0.0001, "loss": 0.3124, "step": 1350 }, { "epoch": 3.8636363636363638, "grad_norm": 11.357304573059082, "learning_rate": 0.0001, "loss": 0.3256, "step": 1360 }, { "epoch": 3.8920454545454546, "grad_norm": 12.447697639465332, "learning_rate": 0.0001, "loss": 0.3234, "step": 1370 }, { "epoch": 3.9204545454545454, "grad_norm": 10.388401985168457, "learning_rate": 0.0001, "loss": 0.3134, "step": 1380 }, { "epoch": 3.9488636363636362, "grad_norm": 10.227154731750488, "learning_rate": 0.0001, "loss": 0.3036, "step": 1390 }, { "epoch": 3.9772727272727275, "grad_norm": 11.007589340209961, "learning_rate": 0.0001, "loss": 0.3087, "step": 1400 }, { "epoch": 4.005681818181818, "grad_norm": 9.138888359069824, "learning_rate": 0.0001, "loss": 0.3109, "step": 1410 }, { "epoch": 4.034090909090909, "grad_norm": 9.967912673950195, "learning_rate": 0.0001, "loss": 0.307, "step": 1420 }, { "epoch": 4.0625, "grad_norm": 10.28461742401123, "learning_rate": 0.0001, "loss": 0.2973, "step": 1430 }, { "epoch": 4.090909090909091, "grad_norm": 9.785955429077148, "learning_rate": 0.0001, "loss": 0.3004, "step": 1440 }, { "epoch": 4.119318181818182, "grad_norm": 10.850992202758789, "learning_rate": 0.0001, "loss": 0.3005, "step": 1450 }, { "epoch": 4.1477272727272725, "grad_norm": 12.91825008392334, "learning_rate": 0.0001, "loss": 0.3005, "step": 1460 }, { "epoch": 4.176136363636363, "grad_norm": 11.555893898010254, "learning_rate": 0.0001, "loss": 0.3107, "step": 1470 }, { "epoch": 4.204545454545454, "grad_norm": 10.81035327911377, "learning_rate": 0.0001, "loss": 0.2977, "step": 1480 }, { "epoch": 4.232954545454546, "grad_norm": 11.161906242370605, "learning_rate": 0.0001, "loss": 0.2962, "step": 1490 }, { "epoch": 4.261363636363637, "grad_norm": 12.040653228759766, "learning_rate": 0.0001, "loss": 0.3025, "step": 1500 }, { "epoch": 4.2897727272727275, "grad_norm": 9.015117645263672, "learning_rate": 0.0001, "loss": 0.292, "step": 1510 }, { "epoch": 4.318181818181818, "grad_norm": 10.438865661621094, "learning_rate": 0.0001, "loss": 0.2881, "step": 1520 }, { "epoch": 4.346590909090909, "grad_norm": 10.363481521606445, "learning_rate": 0.0001, "loss": 0.2919, "step": 1530 }, { "epoch": 4.375, "grad_norm": 10.898138999938965, "learning_rate": 0.0001, "loss": 0.3035, "step": 1540 }, { "epoch": 4.403409090909091, "grad_norm": 9.264910697937012, "learning_rate": 0.0001, "loss": 0.2866, "step": 1550 }, { "epoch": 4.431818181818182, "grad_norm": 9.535000801086426, "learning_rate": 0.0001, "loss": 0.2833, "step": 1560 }, { "epoch": 4.4602272727272725, "grad_norm": 10.703611373901367, "learning_rate": 0.0001, "loss": 0.2977, "step": 1570 }, { "epoch": 4.488636363636363, "grad_norm": 8.830336570739746, "learning_rate": 0.0001, "loss": 0.2822, "step": 1580 }, { "epoch": 4.517045454545455, "grad_norm": 9.80781364440918, "learning_rate": 0.0001, "loss": 0.2897, "step": 1590 }, { "epoch": 4.545454545454545, "grad_norm": 9.538243293762207, "learning_rate": 0.0001, "loss": 0.2893, "step": 1600 }, { "epoch": 4.573863636363637, "grad_norm": 7.803942680358887, "learning_rate": 0.0001, "loss": 0.2898, "step": 1610 }, { "epoch": 4.6022727272727275, "grad_norm": 9.329748153686523, "learning_rate": 0.0001, "loss": 0.288, "step": 1620 }, { "epoch": 4.630681818181818, "grad_norm": 9.706318855285645, "learning_rate": 0.0001, "loss": 0.2824, "step": 1630 }, { "epoch": 4.659090909090909, "grad_norm": 8.528480529785156, "learning_rate": 0.0001, "loss": 0.2969, "step": 1640 }, { "epoch": 4.6875, "grad_norm": 8.045533180236816, "learning_rate": 0.0001, "loss": 0.2995, "step": 1650 }, { "epoch": 4.715909090909091, "grad_norm": 8.474618911743164, "learning_rate": 0.0001, "loss": 0.2906, "step": 1660 }, { "epoch": 4.744318181818182, "grad_norm": 9.250617027282715, "learning_rate": 0.0001, "loss": 0.2915, "step": 1670 }, { "epoch": 4.7727272727272725, "grad_norm": 8.805644989013672, "learning_rate": 0.0001, "loss": 0.2835, "step": 1680 }, { "epoch": 4.801136363636363, "grad_norm": 8.365328788757324, "learning_rate": 0.0001, "loss": 0.2872, "step": 1690 }, { "epoch": 4.829545454545455, "grad_norm": 9.336677551269531, "learning_rate": 0.0001, "loss": 0.2822, "step": 1700 }, { "epoch": 4.857954545454545, "grad_norm": 8.8048095703125, "learning_rate": 0.0001, "loss": 0.2802, "step": 1710 }, { "epoch": 4.886363636363637, "grad_norm": 10.265268325805664, "learning_rate": 0.0001, "loss": 0.2718, "step": 1720 }, { "epoch": 4.9147727272727275, "grad_norm": 9.204639434814453, "learning_rate": 0.0001, "loss": 0.2874, "step": 1730 }, { "epoch": 4.943181818181818, "grad_norm": 8.516647338867188, "learning_rate": 0.0001, "loss": 0.2913, "step": 1740 }, { "epoch": 4.971590909090909, "grad_norm": 7.545566082000732, "learning_rate": 0.0001, "loss": 0.2799, "step": 1750 }, { "epoch": 5.0, "grad_norm": 9.611028671264648, "learning_rate": 0.0001, "loss": 0.2816, "step": 1760 }, { "epoch": 5.028409090909091, "grad_norm": 7.730203151702881, "learning_rate": 0.0001, "loss": 0.278, "step": 1770 }, { "epoch": 5.056818181818182, "grad_norm": 9.771706581115723, "learning_rate": 0.0001, "loss": 0.2764, "step": 1780 }, { "epoch": 5.0852272727272725, "grad_norm": 8.7466402053833, "learning_rate": 0.0001, "loss": 0.2823, "step": 1790 }, { "epoch": 5.113636363636363, "grad_norm": 9.843619346618652, "learning_rate": 0.0001, "loss": 0.2654, "step": 1800 }, { "epoch": 5.142045454545454, "grad_norm": 8.296882629394531, "learning_rate": 0.0001, "loss": 0.2691, "step": 1810 }, { "epoch": 5.170454545454546, "grad_norm": 8.18472957611084, "learning_rate": 0.0001, "loss": 0.2644, "step": 1820 }, { "epoch": 5.198863636363637, "grad_norm": 8.96210765838623, "learning_rate": 0.0001, "loss": 0.2688, "step": 1830 }, { "epoch": 5.2272727272727275, "grad_norm": 9.177153587341309, "learning_rate": 0.0001, "loss": 0.2683, "step": 1840 }, { "epoch": 5.255681818181818, "grad_norm": 7.267095565795898, "learning_rate": 0.0001, "loss": 0.2673, "step": 1850 }, { "epoch": 5.284090909090909, "grad_norm": 8.78824520111084, "learning_rate": 0.0001, "loss": 0.2629, "step": 1860 }, { "epoch": 5.3125, "grad_norm": 8.33309268951416, "learning_rate": 0.0001, "loss": 0.2635, "step": 1870 }, { "epoch": 5.340909090909091, "grad_norm": 9.574383735656738, "learning_rate": 0.0001, "loss": 0.2563, "step": 1880 }, { "epoch": 5.369318181818182, "grad_norm": 7.813918590545654, "learning_rate": 0.0001, "loss": 0.2467, "step": 1890 }, { "epoch": 5.3977272727272725, "grad_norm": 9.375533103942871, "learning_rate": 0.0001, "loss": 0.262, "step": 1900 }, { "epoch": 5.426136363636363, "grad_norm": 9.987363815307617, "learning_rate": 0.0001, "loss": 0.2618, "step": 1910 }, { "epoch": 5.454545454545454, "grad_norm": 10.02425479888916, "learning_rate": 0.0001, "loss": 0.2635, "step": 1920 }, { "epoch": 5.482954545454546, "grad_norm": 9.342535972595215, "learning_rate": 0.0001, "loss": 0.2497, "step": 1930 }, { "epoch": 5.511363636363637, "grad_norm": 9.32978343963623, "learning_rate": 0.0001, "loss": 0.2592, "step": 1940 }, { "epoch": 5.5397727272727275, "grad_norm": 7.348328113555908, "learning_rate": 0.0001, "loss": 0.2592, "step": 1950 }, { "epoch": 5.568181818181818, "grad_norm": 8.86340045928955, "learning_rate": 0.0001, "loss": 0.2541, "step": 1960 }, { "epoch": 5.596590909090909, "grad_norm": 8.326016426086426, "learning_rate": 0.0001, "loss": 0.2663, "step": 1970 }, { "epoch": 5.625, "grad_norm": 8.392045021057129, "learning_rate": 0.0001, "loss": 0.2682, "step": 1980 }, { "epoch": 5.653409090909091, "grad_norm": 8.57619571685791, "learning_rate": 0.0001, "loss": 0.2596, "step": 1990 }, { "epoch": 5.681818181818182, "grad_norm": 7.7515058517456055, "learning_rate": 0.0001, "loss": 0.2408, "step": 2000 }, { "epoch": 5.7102272727272725, "grad_norm": 8.581171989440918, "learning_rate": 0.0001, "loss": 0.2492, "step": 2010 }, { "epoch": 5.738636363636363, "grad_norm": 8.195562362670898, "learning_rate": 0.0001, "loss": 0.2497, "step": 2020 }, { "epoch": 5.767045454545455, "grad_norm": 7.793923854827881, "learning_rate": 0.0001, "loss": 0.2415, "step": 2030 }, { "epoch": 5.795454545454545, "grad_norm": 7.39900016784668, "learning_rate": 0.0001, "loss": 0.2436, "step": 2040 }, { "epoch": 5.823863636363637, "grad_norm": 8.420592308044434, "learning_rate": 0.0001, "loss": 0.2523, "step": 2050 }, { "epoch": 5.8522727272727275, "grad_norm": 8.713873863220215, "learning_rate": 0.0001, "loss": 0.2623, "step": 2060 }, { "epoch": 5.880681818181818, "grad_norm": 9.038602828979492, "learning_rate": 0.0001, "loss": 0.2543, "step": 2070 }, { "epoch": 5.909090909090909, "grad_norm": 8.842888832092285, "learning_rate": 0.0001, "loss": 0.2539, "step": 2080 }, { "epoch": 5.9375, "grad_norm": 9.234344482421875, "learning_rate": 0.0001, "loss": 0.2393, "step": 2090 }, { "epoch": 5.965909090909091, "grad_norm": 7.840005874633789, "learning_rate": 0.0001, "loss": 0.2418, "step": 2100 }, { "epoch": 5.994318181818182, "grad_norm": 8.143929481506348, "learning_rate": 0.0001, "loss": 0.2418, "step": 2110 }, { "epoch": 6.0227272727272725, "grad_norm": 7.842228412628174, "learning_rate": 0.0001, "loss": 0.2343, "step": 2120 }, { "epoch": 6.051136363636363, "grad_norm": 6.924618721008301, "learning_rate": 0.0001, "loss": 0.2437, "step": 2130 }, { "epoch": 6.079545454545454, "grad_norm": 7.25029993057251, "learning_rate": 0.0001, "loss": 0.2463, "step": 2140 }, { "epoch": 6.107954545454546, "grad_norm": 8.335989952087402, "learning_rate": 0.0001, "loss": 0.2473, "step": 2150 }, { "epoch": 6.136363636363637, "grad_norm": 6.865011215209961, "learning_rate": 0.0001, "loss": 0.2384, "step": 2160 }, { "epoch": 6.1647727272727275, "grad_norm": 8.29775619506836, "learning_rate": 0.0001, "loss": 0.2382, "step": 2170 }, { "epoch": 6.193181818181818, "grad_norm": 7.266998767852783, "learning_rate": 0.0001, "loss": 0.2383, "step": 2180 }, { "epoch": 6.221590909090909, "grad_norm": 7.584468364715576, "learning_rate": 0.0001, "loss": 0.2263, "step": 2190 }, { "epoch": 6.25, "grad_norm": 6.939903259277344, "learning_rate": 0.0001, "loss": 0.2418, "step": 2200 }, { "epoch": 6.278409090909091, "grad_norm": 6.492012023925781, "learning_rate": 0.0001, "loss": 0.2358, "step": 2210 }, { "epoch": 6.306818181818182, "grad_norm": 7.337180137634277, "learning_rate": 0.0001, "loss": 0.2336, "step": 2220 }, { "epoch": 6.3352272727272725, "grad_norm": 8.410757064819336, "learning_rate": 0.0001, "loss": 0.2292, "step": 2230 }, { "epoch": 6.363636363636363, "grad_norm": 7.204639911651611, "learning_rate": 0.0001, "loss": 0.2321, "step": 2240 }, { "epoch": 6.392045454545454, "grad_norm": 7.258450508117676, "learning_rate": 0.0001, "loss": 0.2356, "step": 2250 }, { "epoch": 6.420454545454546, "grad_norm": 8.304643630981445, "learning_rate": 0.0001, "loss": 0.2347, "step": 2260 }, { "epoch": 6.448863636363637, "grad_norm": 6.700302600860596, "learning_rate": 0.0001, "loss": 0.2309, "step": 2270 }, { "epoch": 6.4772727272727275, "grad_norm": 7.752438545227051, "learning_rate": 0.0001, "loss": 0.2225, "step": 2280 }, { "epoch": 6.505681818181818, "grad_norm": 7.962435245513916, "learning_rate": 0.0001, "loss": 0.2247, "step": 2290 }, { "epoch": 6.534090909090909, "grad_norm": 8.655714988708496, "learning_rate": 0.0001, "loss": 0.2299, "step": 2300 }, { "epoch": 6.5625, "grad_norm": 6.5540771484375, "learning_rate": 0.0001, "loss": 0.2243, "step": 2310 }, { "epoch": 6.590909090909091, "grad_norm": 7.325479507446289, "learning_rate": 0.0001, "loss": 0.2209, "step": 2320 }, { "epoch": 6.619318181818182, "grad_norm": 7.687260150909424, "learning_rate": 0.0001, "loss": 0.2219, "step": 2330 }, { "epoch": 6.6477272727272725, "grad_norm": 8.61622428894043, "learning_rate": 0.0001, "loss": 0.2177, "step": 2340 }, { "epoch": 6.676136363636363, "grad_norm": 7.550006866455078, "learning_rate": 0.0001, "loss": 0.2309, "step": 2350 }, { "epoch": 6.704545454545455, "grad_norm": 7.1695685386657715, "learning_rate": 0.0001, "loss": 0.2278, "step": 2360 }, { "epoch": 6.732954545454545, "grad_norm": 8.121203422546387, "learning_rate": 0.0001, "loss": 0.2192, "step": 2370 }, { "epoch": 6.761363636363637, "grad_norm": 9.03805923461914, "learning_rate": 0.0001, "loss": 0.2298, "step": 2380 }, { "epoch": 6.7897727272727275, "grad_norm": 7.608403205871582, "learning_rate": 0.0001, "loss": 0.23, "step": 2390 }, { "epoch": 6.818181818181818, "grad_norm": 7.704319953918457, "learning_rate": 0.0001, "loss": 0.228, "step": 2400 }, { "epoch": 6.846590909090909, "grad_norm": 6.52188777923584, "learning_rate": 0.0001, "loss": 0.2206, "step": 2410 }, { "epoch": 6.875, "grad_norm": 6.7469635009765625, "learning_rate": 0.0001, "loss": 0.2283, "step": 2420 }, { "epoch": 6.903409090909091, "grad_norm": 6.883518218994141, "learning_rate": 0.0001, "loss": 0.2123, "step": 2430 }, { "epoch": 6.931818181818182, "grad_norm": 7.054996013641357, "learning_rate": 0.0001, "loss": 0.2237, "step": 2440 }, { "epoch": 6.9602272727272725, "grad_norm": 7.665782451629639, "learning_rate": 0.0001, "loss": 0.2202, "step": 2450 }, { "epoch": 6.988636363636363, "grad_norm": 8.317813873291016, "learning_rate": 0.0001, "loss": 0.2252, "step": 2460 }, { "epoch": 7.017045454545454, "grad_norm": 7.469433307647705, "learning_rate": 0.0001, "loss": 0.2244, "step": 2470 }, { "epoch": 7.045454545454546, "grad_norm": 6.86864709854126, "learning_rate": 0.0001, "loss": 0.2132, "step": 2480 }, { "epoch": 7.073863636363637, "grad_norm": 7.019229412078857, "learning_rate": 0.0001, "loss": 0.2191, "step": 2490 }, { "epoch": 7.1022727272727275, "grad_norm": 6.337296962738037, "learning_rate": 0.0001, "loss": 0.2135, "step": 2500 }, { "epoch": 7.130681818181818, "grad_norm": 7.91449499130249, "learning_rate": 0.0001, "loss": 0.2132, "step": 2510 }, { "epoch": 7.159090909090909, "grad_norm": 7.0960283279418945, "learning_rate": 0.0001, "loss": 0.2058, "step": 2520 }, { "epoch": 7.1875, "grad_norm": 6.885858058929443, "learning_rate": 0.0001, "loss": 0.205, "step": 2530 }, { "epoch": 7.215909090909091, "grad_norm": 7.231472015380859, "learning_rate": 0.0001, "loss": 0.2125, "step": 2540 }, { "epoch": 7.244318181818182, "grad_norm": 6.965603351593018, "learning_rate": 0.0001, "loss": 0.209, "step": 2550 }, { "epoch": 7.2727272727272725, "grad_norm": 7.230012893676758, "learning_rate": 0.0001, "loss": 0.2062, "step": 2560 }, { "epoch": 7.301136363636363, "grad_norm": 6.389279842376709, "learning_rate": 0.0001, "loss": 0.2033, "step": 2570 }, { "epoch": 7.329545454545454, "grad_norm": 6.917042255401611, "learning_rate": 0.0001, "loss": 0.2089, "step": 2580 }, { "epoch": 7.357954545454546, "grad_norm": 6.476625919342041, "learning_rate": 0.0001, "loss": 0.203, "step": 2590 }, { "epoch": 7.386363636363637, "grad_norm": 6.501523494720459, "learning_rate": 0.0001, "loss": 0.2034, "step": 2600 }, { "epoch": 7.4147727272727275, "grad_norm": 4.998976230621338, "learning_rate": 0.0001, "loss": 0.2078, "step": 2610 }, { "epoch": 7.443181818181818, "grad_norm": 5.617987632751465, "learning_rate": 0.0001, "loss": 0.2065, "step": 2620 }, { "epoch": 7.471590909090909, "grad_norm": 7.509957313537598, "learning_rate": 0.0001, "loss": 0.2069, "step": 2630 }, { "epoch": 7.5, "grad_norm": 6.23819637298584, "learning_rate": 0.0001, "loss": 0.2067, "step": 2640 }, { "epoch": 7.528409090909091, "grad_norm": 6.4593119621276855, "learning_rate": 0.0001, "loss": 0.2081, "step": 2650 }, { "epoch": 7.556818181818182, "grad_norm": 7.757627964019775, "learning_rate": 0.0001, "loss": 0.2045, "step": 2660 }, { "epoch": 7.5852272727272725, "grad_norm": 7.729194641113281, "learning_rate": 0.0001, "loss": 0.2057, "step": 2670 }, { "epoch": 7.613636363636363, "grad_norm": 6.746730804443359, "learning_rate": 0.0001, "loss": 0.2055, "step": 2680 }, { "epoch": 7.642045454545455, "grad_norm": 6.76716947555542, "learning_rate": 0.0001, "loss": 0.2029, "step": 2690 }, { "epoch": 7.670454545454545, "grad_norm": 6.230428695678711, "learning_rate": 0.0001, "loss": 0.2086, "step": 2700 }, { "epoch": 7.698863636363637, "grad_norm": 6.170040607452393, "learning_rate": 0.0001, "loss": 0.2088, "step": 2710 }, { "epoch": 7.7272727272727275, "grad_norm": 6.0955491065979, "learning_rate": 0.0001, "loss": 0.1942, "step": 2720 }, { "epoch": 7.755681818181818, "grad_norm": 6.41675329208374, "learning_rate": 0.0001, "loss": 0.198, "step": 2730 }, { "epoch": 7.784090909090909, "grad_norm": 8.517492294311523, "learning_rate": 0.0001, "loss": 0.2057, "step": 2740 }, { "epoch": 7.8125, "grad_norm": 6.808162689208984, "learning_rate": 0.0001, "loss": 0.2017, "step": 2750 }, { "epoch": 7.840909090909091, "grad_norm": 6.75582218170166, "learning_rate": 0.0001, "loss": 0.2036, "step": 2760 }, { "epoch": 7.869318181818182, "grad_norm": 6.981121063232422, "learning_rate": 0.0001, "loss": 0.2075, "step": 2770 }, { "epoch": 7.8977272727272725, "grad_norm": 5.264399528503418, "learning_rate": 0.0001, "loss": 0.204, "step": 2780 }, { "epoch": 7.926136363636363, "grad_norm": 6.4845781326293945, "learning_rate": 0.0001, "loss": 0.2064, "step": 2790 }, { "epoch": 7.954545454545455, "grad_norm": 7.397743225097656, "learning_rate": 0.0001, "loss": 0.2047, "step": 2800 }, { "epoch": 7.982954545454545, "grad_norm": 8.000630378723145, "learning_rate": 0.0001, "loss": 0.2044, "step": 2810 }, { "epoch": 8.011363636363637, "grad_norm": 6.957930088043213, "learning_rate": 0.0001, "loss": 0.2003, "step": 2820 }, { "epoch": 8.039772727272727, "grad_norm": 6.850410461425781, "learning_rate": 0.0001, "loss": 0.1958, "step": 2830 }, { "epoch": 8.068181818181818, "grad_norm": 6.901455402374268, "learning_rate": 0.0001, "loss": 0.2006, "step": 2840 }, { "epoch": 8.096590909090908, "grad_norm": 5.5034871101379395, "learning_rate": 0.0001, "loss": 0.1876, "step": 2850 }, { "epoch": 8.125, "grad_norm": 6.498006343841553, "learning_rate": 0.0001, "loss": 0.191, "step": 2860 }, { "epoch": 8.153409090909092, "grad_norm": 5.930977821350098, "learning_rate": 0.0001, "loss": 0.1884, "step": 2870 }, { "epoch": 8.181818181818182, "grad_norm": 6.002486705780029, "learning_rate": 0.0001, "loss": 0.1952, "step": 2880 }, { "epoch": 8.210227272727273, "grad_norm": 5.113884925842285, "learning_rate": 0.0001, "loss": 0.1904, "step": 2890 }, { "epoch": 8.238636363636363, "grad_norm": 6.802750587463379, "learning_rate": 0.0001, "loss": 0.1887, "step": 2900 }, { "epoch": 8.267045454545455, "grad_norm": 5.978296756744385, "learning_rate": 0.0001, "loss": 0.1909, "step": 2910 }, { "epoch": 8.295454545454545, "grad_norm": 7.176412105560303, "learning_rate": 0.0001, "loss": 0.1913, "step": 2920 }, { "epoch": 8.323863636363637, "grad_norm": 6.965484619140625, "learning_rate": 0.0001, "loss": 0.1854, "step": 2930 }, { "epoch": 8.352272727272727, "grad_norm": 5.903598785400391, "learning_rate": 0.0001, "loss": 0.1797, "step": 2940 }, { "epoch": 8.380681818181818, "grad_norm": 6.34436559677124, "learning_rate": 0.0001, "loss": 0.1807, "step": 2950 }, { "epoch": 8.409090909090908, "grad_norm": 5.903111934661865, "learning_rate": 0.0001, "loss": 0.1851, "step": 2960 }, { "epoch": 8.4375, "grad_norm": 5.883657455444336, "learning_rate": 0.0001, "loss": 0.1826, "step": 2970 }, { "epoch": 8.465909090909092, "grad_norm": 5.767624378204346, "learning_rate": 0.0001, "loss": 0.1862, "step": 2980 }, { "epoch": 8.494318181818182, "grad_norm": 5.390651226043701, "learning_rate": 0.0001, "loss": 0.1906, "step": 2990 }, { "epoch": 8.522727272727273, "grad_norm": 5.619853496551514, "learning_rate": 0.0001, "loss": 0.1804, "step": 3000 }, { "epoch": 8.551136363636363, "grad_norm": 6.3636932373046875, "learning_rate": 0.0001, "loss": 0.1849, "step": 3010 }, { "epoch": 8.579545454545455, "grad_norm": 6.031747341156006, "learning_rate": 0.0001, "loss": 0.1884, "step": 3020 }, { "epoch": 8.607954545454545, "grad_norm": 5.940463066101074, "learning_rate": 0.0001, "loss": 0.188, "step": 3030 }, { "epoch": 8.636363636363637, "grad_norm": 5.887471675872803, "learning_rate": 0.0001, "loss": 0.1798, "step": 3040 }, { "epoch": 8.664772727272727, "grad_norm": 5.479545593261719, "learning_rate": 0.0001, "loss": 0.1778, "step": 3050 }, { "epoch": 8.693181818181818, "grad_norm": 6.690113544464111, "learning_rate": 0.0001, "loss": 0.1786, "step": 3060 }, { "epoch": 8.721590909090908, "grad_norm": 5.396069049835205, "learning_rate": 0.0001, "loss": 0.1785, "step": 3070 }, { "epoch": 8.75, "grad_norm": 5.759469509124756, "learning_rate": 0.0001, "loss": 0.1718, "step": 3080 }, { "epoch": 8.778409090909092, "grad_norm": 4.685205459594727, "learning_rate": 0.0001, "loss": 0.172, "step": 3090 }, { "epoch": 8.806818181818182, "grad_norm": 5.000999927520752, "learning_rate": 0.0001, "loss": 0.1808, "step": 3100 }, { "epoch": 8.835227272727273, "grad_norm": 5.158972263336182, "learning_rate": 0.0001, "loss": 0.18, "step": 3110 }, { "epoch": 8.863636363636363, "grad_norm": 5.847781658172607, "learning_rate": 0.0001, "loss": 0.1851, "step": 3120 }, { "epoch": 8.892045454545455, "grad_norm": 4.9706645011901855, "learning_rate": 0.0001, "loss": 0.1838, "step": 3130 }, { "epoch": 8.920454545454545, "grad_norm": 5.0156660079956055, "learning_rate": 0.0001, "loss": 0.1825, "step": 3140 }, { "epoch": 8.948863636363637, "grad_norm": 5.2722344398498535, "learning_rate": 0.0001, "loss": 0.1794, "step": 3150 }, { "epoch": 8.977272727272727, "grad_norm": 4.946606159210205, "learning_rate": 0.0001, "loss": 0.174, "step": 3160 }, { "epoch": 9.005681818181818, "grad_norm": 5.0111846923828125, "learning_rate": 0.0001, "loss": 0.1769, "step": 3170 }, { "epoch": 9.034090909090908, "grad_norm": 4.587785243988037, "learning_rate": 0.0001, "loss": 0.1725, "step": 3180 }, { "epoch": 9.0625, "grad_norm": 4.933738708496094, "learning_rate": 0.0001, "loss": 0.1812, "step": 3190 }, { "epoch": 9.090909090909092, "grad_norm": 5.0207037925720215, "learning_rate": 0.0001, "loss": 0.1804, "step": 3200 }, { "epoch": 9.119318181818182, "grad_norm": 6.469820022583008, "learning_rate": 0.0001, "loss": 0.1731, "step": 3210 }, { "epoch": 9.147727272727273, "grad_norm": 5.247611999511719, "learning_rate": 0.0001, "loss": 0.1785, "step": 3220 }, { "epoch": 9.176136363636363, "grad_norm": 4.957090854644775, "learning_rate": 0.0001, "loss": 0.1721, "step": 3230 }, { "epoch": 9.204545454545455, "grad_norm": 4.917489051818848, "learning_rate": 0.0001, "loss": 0.1767, "step": 3240 }, { "epoch": 9.232954545454545, "grad_norm": 6.98730993270874, "learning_rate": 0.0001, "loss": 0.1773, "step": 3250 }, { "epoch": 9.261363636363637, "grad_norm": 5.937990665435791, "learning_rate": 0.0001, "loss": 0.1737, "step": 3260 }, { "epoch": 9.289772727272727, "grad_norm": 6.112240791320801, "learning_rate": 0.0001, "loss": 0.1708, "step": 3270 }, { "epoch": 9.318181818181818, "grad_norm": 5.8593878746032715, "learning_rate": 0.0001, "loss": 0.174, "step": 3280 }, { "epoch": 9.346590909090908, "grad_norm": 6.075056552886963, "learning_rate": 0.0001, "loss": 0.1699, "step": 3290 }, { "epoch": 9.375, "grad_norm": 5.816572666168213, "learning_rate": 0.0001, "loss": 0.1722, "step": 3300 }, { "epoch": 9.403409090909092, "grad_norm": 6.339922904968262, "learning_rate": 0.0001, "loss": 0.1653, "step": 3310 }, { "epoch": 9.431818181818182, "grad_norm": 5.111523628234863, "learning_rate": 0.0001, "loss": 0.1618, "step": 3320 }, { "epoch": 9.460227272727273, "grad_norm": 5.104013442993164, "learning_rate": 0.0001, "loss": 0.1652, "step": 3330 }, { "epoch": 9.488636363636363, "grad_norm": 4.7531280517578125, "learning_rate": 0.0001, "loss": 0.1639, "step": 3340 }, { "epoch": 9.517045454545455, "grad_norm": 4.486930847167969, "learning_rate": 0.0001, "loss": 0.1729, "step": 3350 }, { "epoch": 9.545454545454545, "grad_norm": 5.003032684326172, "learning_rate": 0.0001, "loss": 0.173, "step": 3360 }, { "epoch": 9.573863636363637, "grad_norm": 5.644103050231934, "learning_rate": 0.0001, "loss": 0.1694, "step": 3370 }, { "epoch": 9.602272727272727, "grad_norm": 5.101214408874512, "learning_rate": 0.0001, "loss": 0.1711, "step": 3380 }, { "epoch": 9.630681818181818, "grad_norm": 5.529112815856934, "learning_rate": 0.0001, "loss": 0.1715, "step": 3390 }, { "epoch": 9.659090909090908, "grad_norm": 5.411925792694092, "learning_rate": 0.0001, "loss": 0.1675, "step": 3400 }, { "epoch": 9.6875, "grad_norm": 5.155153751373291, "learning_rate": 0.0001, "loss": 0.1648, "step": 3410 }, { "epoch": 9.715909090909092, "grad_norm": 4.77042293548584, "learning_rate": 0.0001, "loss": 0.1622, "step": 3420 }, { "epoch": 9.744318181818182, "grad_norm": 4.622435092926025, "learning_rate": 0.0001, "loss": 0.1678, "step": 3430 }, { "epoch": 9.772727272727273, "grad_norm": 5.802976131439209, "learning_rate": 0.0001, "loss": 0.1712, "step": 3440 }, { "epoch": 9.801136363636363, "grad_norm": 4.810296058654785, "learning_rate": 0.0001, "loss": 0.173, "step": 3450 }, { "epoch": 9.829545454545455, "grad_norm": 5.124487400054932, "learning_rate": 0.0001, "loss": 0.1689, "step": 3460 }, { "epoch": 9.857954545454545, "grad_norm": 5.081210136413574, "learning_rate": 0.0001, "loss": 0.1625, "step": 3470 }, { "epoch": 9.886363636363637, "grad_norm": 5.038453578948975, "learning_rate": 0.0001, "loss": 0.1571, "step": 3480 }, { "epoch": 9.914772727272727, "grad_norm": 4.524289608001709, "learning_rate": 0.0001, "loss": 0.1614, "step": 3490 }, { "epoch": 9.943181818181818, "grad_norm": 5.175899505615234, "learning_rate": 0.0001, "loss": 0.1601, "step": 3500 }, { "epoch": 9.971590909090908, "grad_norm": 4.064411640167236, "learning_rate": 0.0001, "loss": 0.1626, "step": 3510 }, { "epoch": 10.0, "grad_norm": 4.967013835906982, "learning_rate": 0.0001, "loss": 0.1653, "step": 3520 }, { "epoch": 10.028409090909092, "grad_norm": 5.4418535232543945, "learning_rate": 0.0001, "loss": 0.161, "step": 3530 }, { "epoch": 10.056818181818182, "grad_norm": 5.082826614379883, "learning_rate": 0.0001, "loss": 0.1602, "step": 3540 }, { "epoch": 10.085227272727273, "grad_norm": 4.592067241668701, "learning_rate": 0.0001, "loss": 0.1623, "step": 3550 }, { "epoch": 10.113636363636363, "grad_norm": 5.288888931274414, "learning_rate": 0.0001, "loss": 0.1576, "step": 3560 }, { "epoch": 10.142045454545455, "grad_norm": 5.104770660400391, "learning_rate": 0.0001, "loss": 0.1593, "step": 3570 }, { "epoch": 10.170454545454545, "grad_norm": 4.773959159851074, "learning_rate": 0.0001, "loss": 0.1545, "step": 3580 }, { "epoch": 10.198863636363637, "grad_norm": 4.410947799682617, "learning_rate": 0.0001, "loss": 0.161, "step": 3590 }, { "epoch": 10.227272727272727, "grad_norm": 4.374294281005859, "learning_rate": 0.0001, "loss": 0.158, "step": 3600 }, { "epoch": 10.255681818181818, "grad_norm": 4.402506351470947, "learning_rate": 0.0001, "loss": 0.1567, "step": 3610 }, { "epoch": 10.284090909090908, "grad_norm": 5.090147495269775, "learning_rate": 0.0001, "loss": 0.1603, "step": 3620 }, { "epoch": 10.3125, "grad_norm": 5.5478081703186035, "learning_rate": 0.0001, "loss": 0.1631, "step": 3630 }, { "epoch": 10.340909090909092, "grad_norm": 5.645622730255127, "learning_rate": 0.0001, "loss": 0.1597, "step": 3640 }, { "epoch": 10.369318181818182, "grad_norm": 4.826333999633789, "learning_rate": 0.0001, "loss": 0.1584, "step": 3650 }, { "epoch": 10.397727272727273, "grad_norm": 5.210224628448486, "learning_rate": 0.0001, "loss": 0.1553, "step": 3660 }, { "epoch": 10.426136363636363, "grad_norm": 3.516092300415039, "learning_rate": 0.0001, "loss": 0.1543, "step": 3670 }, { "epoch": 10.454545454545455, "grad_norm": 4.710558891296387, "learning_rate": 0.0001, "loss": 0.1507, "step": 3680 }, { "epoch": 10.482954545454545, "grad_norm": 4.940939903259277, "learning_rate": 0.0001, "loss": 0.1526, "step": 3690 }, { "epoch": 10.511363636363637, "grad_norm": 4.353475093841553, "learning_rate": 0.0001, "loss": 0.1537, "step": 3700 }, { "epoch": 10.539772727272727, "grad_norm": 3.7736759185791016, "learning_rate": 0.0001, "loss": 0.1557, "step": 3710 }, { "epoch": 10.568181818181818, "grad_norm": 4.482377529144287, "learning_rate": 0.0001, "loss": 0.1636, "step": 3720 }, { "epoch": 10.596590909090908, "grad_norm": 4.80997896194458, "learning_rate": 0.0001, "loss": 0.1611, "step": 3730 }, { "epoch": 10.625, "grad_norm": 6.185352802276611, "learning_rate": 0.0001, "loss": 0.1555, "step": 3740 }, { "epoch": 10.653409090909092, "grad_norm": 5.383978366851807, "learning_rate": 0.0001, "loss": 0.1609, "step": 3750 }, { "epoch": 10.681818181818182, "grad_norm": 6.075902938842773, "learning_rate": 0.0001, "loss": 0.1612, "step": 3760 }, { "epoch": 10.710227272727273, "grad_norm": 5.537624835968018, "learning_rate": 0.0001, "loss": 0.155, "step": 3770 }, { "epoch": 10.738636363636363, "grad_norm": 4.914467811584473, "learning_rate": 0.0001, "loss": 0.1555, "step": 3780 }, { "epoch": 10.767045454545455, "grad_norm": 4.567920684814453, "learning_rate": 0.0001, "loss": 0.1566, "step": 3790 }, { "epoch": 10.795454545454545, "grad_norm": 4.5670390129089355, "learning_rate": 0.0001, "loss": 0.1566, "step": 3800 }, { "epoch": 10.823863636363637, "grad_norm": 3.629544734954834, "learning_rate": 0.0001, "loss": 0.1502, "step": 3810 }, { "epoch": 10.852272727272727, "grad_norm": 4.088180065155029, "learning_rate": 0.0001, "loss": 0.1594, "step": 3820 }, { "epoch": 10.880681818181818, "grad_norm": 4.8524017333984375, "learning_rate": 0.0001, "loss": 0.1572, "step": 3830 }, { "epoch": 10.909090909090908, "grad_norm": 5.3502888679504395, "learning_rate": 0.0001, "loss": 0.1512, "step": 3840 }, { "epoch": 10.9375, "grad_norm": 4.959495544433594, "learning_rate": 0.0001, "loss": 0.1549, "step": 3850 }, { "epoch": 10.965909090909092, "grad_norm": 4.991962432861328, "learning_rate": 0.0001, "loss": 0.1504, "step": 3860 }, { "epoch": 10.994318181818182, "grad_norm": 4.054560661315918, "learning_rate": 0.0001, "loss": 0.1553, "step": 3870 }, { "epoch": 11.022727272727273, "grad_norm": 3.9775209426879883, "learning_rate": 0.0001, "loss": 0.149, "step": 3880 }, { "epoch": 11.051136363636363, "grad_norm": 4.538222312927246, "learning_rate": 0.0001, "loss": 0.1505, "step": 3890 }, { "epoch": 11.079545454545455, "grad_norm": 5.487000465393066, "learning_rate": 0.0001, "loss": 0.15, "step": 3900 }, { "epoch": 11.107954545454545, "grad_norm": 5.862754821777344, "learning_rate": 0.0001, "loss": 0.1493, "step": 3910 }, { "epoch": 11.136363636363637, "grad_norm": 4.4752302169799805, "learning_rate": 0.0001, "loss": 0.1524, "step": 3920 }, { "epoch": 11.164772727272727, "grad_norm": 4.51123571395874, "learning_rate": 0.0001, "loss": 0.1545, "step": 3930 }, { "epoch": 11.193181818181818, "grad_norm": 4.44078254699707, "learning_rate": 0.0001, "loss": 0.1481, "step": 3940 }, { "epoch": 11.221590909090908, "grad_norm": 4.542746067047119, "learning_rate": 0.0001, "loss": 0.1496, "step": 3950 }, { "epoch": 11.25, "grad_norm": 4.513556003570557, "learning_rate": 0.0001, "loss": 0.1456, "step": 3960 }, { "epoch": 11.278409090909092, "grad_norm": 5.227005958557129, "learning_rate": 0.0001, "loss": 0.146, "step": 3970 }, { "epoch": 11.306818181818182, "grad_norm": 4.134369850158691, "learning_rate": 0.0001, "loss": 0.1497, "step": 3980 }, { "epoch": 11.335227272727273, "grad_norm": 5.030073642730713, "learning_rate": 0.0001, "loss": 0.1496, "step": 3990 }, { "epoch": 11.363636363636363, "grad_norm": 4.397629737854004, "learning_rate": 0.0001, "loss": 0.1462, "step": 4000 }, { "epoch": 11.392045454545455, "grad_norm": 4.636000633239746, "learning_rate": 0.0001, "loss": 0.144, "step": 4010 }, { "epoch": 11.420454545454545, "grad_norm": 4.899885177612305, "learning_rate": 0.0001, "loss": 0.1445, "step": 4020 }, { "epoch": 11.448863636363637, "grad_norm": 4.209653377532959, "learning_rate": 0.0001, "loss": 0.1517, "step": 4030 }, { "epoch": 11.477272727272727, "grad_norm": 4.315791606903076, "learning_rate": 0.0001, "loss": 0.1423, "step": 4040 }, { "epoch": 11.505681818181818, "grad_norm": 4.065213203430176, "learning_rate": 0.0001, "loss": 0.1429, "step": 4050 }, { "epoch": 11.534090909090908, "grad_norm": 4.354069709777832, "learning_rate": 0.0001, "loss": 0.1452, "step": 4060 }, { "epoch": 11.5625, "grad_norm": 4.485837459564209, "learning_rate": 0.0001, "loss": 0.1429, "step": 4070 }, { "epoch": 11.590909090909092, "grad_norm": 4.509272575378418, "learning_rate": 0.0001, "loss": 0.1437, "step": 4080 }, { "epoch": 11.619318181818182, "grad_norm": 4.269772052764893, "learning_rate": 0.0001, "loss": 0.1468, "step": 4090 }, { "epoch": 11.647727272727273, "grad_norm": 4.422598361968994, "learning_rate": 0.0001, "loss": 0.151, "step": 4100 }, { "epoch": 11.676136363636363, "grad_norm": 4.730630874633789, "learning_rate": 0.0001, "loss": 0.1497, "step": 4110 }, { "epoch": 11.704545454545455, "grad_norm": 5.042013645172119, "learning_rate": 0.0001, "loss": 0.1476, "step": 4120 }, { "epoch": 11.732954545454545, "grad_norm": 4.182816982269287, "learning_rate": 0.0001, "loss": 0.1471, "step": 4130 }, { "epoch": 11.761363636363637, "grad_norm": 4.254685401916504, "learning_rate": 0.0001, "loss": 0.1371, "step": 4140 }, { "epoch": 11.789772727272727, "grad_norm": 4.958248138427734, "learning_rate": 0.0001, "loss": 0.1465, "step": 4150 }, { "epoch": 11.818181818181818, "grad_norm": 4.743212699890137, "learning_rate": 0.0001, "loss": 0.1448, "step": 4160 }, { "epoch": 11.846590909090908, "grad_norm": 4.2032084465026855, "learning_rate": 0.0001, "loss": 0.1389, "step": 4170 }, { "epoch": 11.875, "grad_norm": 4.244325637817383, "learning_rate": 0.0001, "loss": 0.1397, "step": 4180 }, { "epoch": 11.903409090909092, "grad_norm": 3.134256362915039, "learning_rate": 0.0001, "loss": 0.1443, "step": 4190 }, { "epoch": 11.931818181818182, "grad_norm": 4.238053321838379, "learning_rate": 0.0001, "loss": 0.1485, "step": 4200 }, { "epoch": 11.960227272727273, "grad_norm": 4.34376335144043, "learning_rate": 0.0001, "loss": 0.1421, "step": 4210 }, { "epoch": 11.988636363636363, "grad_norm": 3.7817201614379883, "learning_rate": 0.0001, "loss": 0.1441, "step": 4220 }, { "epoch": 12.017045454545455, "grad_norm": 3.5958733558654785, "learning_rate": 0.0001, "loss": 0.1373, "step": 4230 }, { "epoch": 12.045454545454545, "grad_norm": 4.10888147354126, "learning_rate": 0.0001, "loss": 0.1405, "step": 4240 }, { "epoch": 12.073863636363637, "grad_norm": 3.802342176437378, "learning_rate": 0.0001, "loss": 0.1393, "step": 4250 }, { "epoch": 12.102272727272727, "grad_norm": 4.85184907913208, "learning_rate": 0.0001, "loss": 0.1359, "step": 4260 }, { "epoch": 12.130681818181818, "grad_norm": 4.548974514007568, "learning_rate": 0.0001, "loss": 0.1402, "step": 4270 }, { "epoch": 12.159090909090908, "grad_norm": 4.047370433807373, "learning_rate": 0.0001, "loss": 0.143, "step": 4280 }, { "epoch": 12.1875, "grad_norm": 4.97476863861084, "learning_rate": 0.0001, "loss": 0.1439, "step": 4290 }, { "epoch": 12.215909090909092, "grad_norm": 4.076110363006592, "learning_rate": 0.0001, "loss": 0.145, "step": 4300 }, { "epoch": 12.244318181818182, "grad_norm": 4.098419189453125, "learning_rate": 0.0001, "loss": 0.1423, "step": 4310 }, { "epoch": 12.272727272727273, "grad_norm": 3.961846351623535, "learning_rate": 0.0001, "loss": 0.1369, "step": 4320 }, { "epoch": 12.301136363636363, "grad_norm": 4.079448223114014, "learning_rate": 0.0001, "loss": 0.141, "step": 4330 }, { "epoch": 12.329545454545455, "grad_norm": 3.375678777694702, "learning_rate": 0.0001, "loss": 0.1368, "step": 4340 }, { "epoch": 12.357954545454545, "grad_norm": 3.7309460639953613, "learning_rate": 0.0001, "loss": 0.1338, "step": 4350 }, { "epoch": 12.386363636363637, "grad_norm": 4.20289421081543, "learning_rate": 0.0001, "loss": 0.1429, "step": 4360 }, { "epoch": 12.414772727272727, "grad_norm": 4.175302982330322, "learning_rate": 0.0001, "loss": 0.139, "step": 4370 }, { "epoch": 12.443181818181818, "grad_norm": 3.7921714782714844, "learning_rate": 0.0001, "loss": 0.1408, "step": 4380 }, { "epoch": 12.471590909090908, "grad_norm": 4.009100437164307, "learning_rate": 0.0001, "loss": 0.1432, "step": 4390 }, { "epoch": 12.5, "grad_norm": 3.71403431892395, "learning_rate": 0.0001, "loss": 0.1381, "step": 4400 }, { "epoch": 12.528409090909092, "grad_norm": 4.153659820556641, "learning_rate": 0.0001, "loss": 0.1362, "step": 4410 }, { "epoch": 12.556818181818182, "grad_norm": 3.8363094329833984, "learning_rate": 0.0001, "loss": 0.1434, "step": 4420 }, { "epoch": 12.585227272727273, "grad_norm": 3.9091579914093018, "learning_rate": 0.0001, "loss": 0.1372, "step": 4430 }, { "epoch": 12.613636363636363, "grad_norm": 4.5517578125, "learning_rate": 0.0001, "loss": 0.1388, "step": 4440 }, { "epoch": 12.642045454545455, "grad_norm": 3.891643762588501, "learning_rate": 0.0001, "loss": 0.1361, "step": 4450 }, { "epoch": 12.670454545454545, "grad_norm": 3.9435248374938965, "learning_rate": 0.0001, "loss": 0.1417, "step": 4460 }, { "epoch": 12.698863636363637, "grad_norm": 3.625453472137451, "learning_rate": 0.0001, "loss": 0.1392, "step": 4470 }, { "epoch": 12.727272727272727, "grad_norm": 4.054428577423096, "learning_rate": 0.0001, "loss": 0.1335, "step": 4480 }, { "epoch": 12.755681818181818, "grad_norm": 4.017980098724365, "learning_rate": 0.0001, "loss": 0.1409, "step": 4490 }, { "epoch": 12.784090909090908, "grad_norm": 3.3853940963745117, "learning_rate": 0.0001, "loss": 0.1407, "step": 4500 }, { "epoch": 12.8125, "grad_norm": 3.403177261352539, "learning_rate": 0.0001, "loss": 0.1344, "step": 4510 }, { "epoch": 12.840909090909092, "grad_norm": 3.364267349243164, "learning_rate": 0.0001, "loss": 0.1379, "step": 4520 }, { "epoch": 12.869318181818182, "grad_norm": 4.48183012008667, "learning_rate": 0.0001, "loss": 0.1407, "step": 4530 }, { "epoch": 12.897727272727273, "grad_norm": 3.5637905597686768, "learning_rate": 0.0001, "loss": 0.1404, "step": 4540 }, { "epoch": 12.926136363636363, "grad_norm": 3.4277963638305664, "learning_rate": 0.0001, "loss": 0.1357, "step": 4550 }, { "epoch": 12.954545454545455, "grad_norm": 3.4155449867248535, "learning_rate": 0.0001, "loss": 0.1385, "step": 4560 }, { "epoch": 12.982954545454545, "grad_norm": 3.1836628913879395, "learning_rate": 0.0001, "loss": 0.1381, "step": 4570 }, { "epoch": 13.011363636363637, "grad_norm": 4.119326591491699, "learning_rate": 0.0001, "loss": 0.1311, "step": 4580 }, { "epoch": 13.039772727272727, "grad_norm": 3.324186086654663, "learning_rate": 0.0001, "loss": 0.1341, "step": 4590 }, { "epoch": 13.068181818181818, "grad_norm": 3.69582200050354, "learning_rate": 0.0001, "loss": 0.1373, "step": 4600 }, { "epoch": 13.096590909090908, "grad_norm": 3.6252574920654297, "learning_rate": 0.0001, "loss": 0.1368, "step": 4610 }, { "epoch": 13.125, "grad_norm": 3.5859949588775635, "learning_rate": 0.0001, "loss": 0.1303, "step": 4620 }, { "epoch": 13.153409090909092, "grad_norm": 4.536507606506348, "learning_rate": 0.0001, "loss": 0.1389, "step": 4630 }, { "epoch": 13.181818181818182, "grad_norm": 3.7678303718566895, "learning_rate": 0.0001, "loss": 0.1376, "step": 4640 }, { "epoch": 13.210227272727273, "grad_norm": 3.8305280208587646, "learning_rate": 0.0001, "loss": 0.1299, "step": 4650 }, { "epoch": 13.238636363636363, "grad_norm": 4.209882736206055, "learning_rate": 0.0001, "loss": 0.1366, "step": 4660 }, { "epoch": 13.267045454545455, "grad_norm": 3.751279354095459, "learning_rate": 0.0001, "loss": 0.1395, "step": 4670 }, { "epoch": 13.295454545454545, "grad_norm": 3.758382558822632, "learning_rate": 0.0001, "loss": 0.1371, "step": 4680 }, { "epoch": 13.323863636363637, "grad_norm": 4.068879127502441, "learning_rate": 0.0001, "loss": 0.1335, "step": 4690 }, { "epoch": 13.352272727272727, "grad_norm": 4.470997333526611, "learning_rate": 0.0001, "loss": 0.1349, "step": 4700 }, { "epoch": 13.380681818181818, "grad_norm": 3.5465259552001953, "learning_rate": 0.0001, "loss": 0.1336, "step": 4710 }, { "epoch": 13.409090909090908, "grad_norm": 3.6585092544555664, "learning_rate": 0.0001, "loss": 0.1279, "step": 4720 }, { "epoch": 13.4375, "grad_norm": 3.6728506088256836, "learning_rate": 0.0001, "loss": 0.1289, "step": 4730 }, { "epoch": 13.465909090909092, "grad_norm": 3.1070103645324707, "learning_rate": 0.0001, "loss": 0.1293, "step": 4740 }, { "epoch": 13.494318181818182, "grad_norm": 2.9372332096099854, "learning_rate": 0.0001, "loss": 0.1329, "step": 4750 }, { "epoch": 13.522727272727273, "grad_norm": 3.2514431476593018, "learning_rate": 0.0001, "loss": 0.1287, "step": 4760 }, { "epoch": 13.551136363636363, "grad_norm": 2.84192156791687, "learning_rate": 0.0001, "loss": 0.1372, "step": 4770 }, { "epoch": 13.579545454545455, "grad_norm": 3.847137928009033, "learning_rate": 0.0001, "loss": 0.1321, "step": 4780 }, { "epoch": 13.607954545454545, "grad_norm": 3.7606923580169678, "learning_rate": 0.0001, "loss": 0.1337, "step": 4790 }, { "epoch": 13.636363636363637, "grad_norm": 3.415740966796875, "learning_rate": 0.0001, "loss": 0.1327, "step": 4800 }, { "epoch": 13.664772727272727, "grad_norm": 3.71706485748291, "learning_rate": 0.0001, "loss": 0.1403, "step": 4810 }, { "epoch": 13.693181818181818, "grad_norm": 3.2357699871063232, "learning_rate": 0.0001, "loss": 0.1308, "step": 4820 }, { "epoch": 13.721590909090908, "grad_norm": 3.241356372833252, "learning_rate": 0.0001, "loss": 0.1369, "step": 4830 }, { "epoch": 13.75, "grad_norm": 3.0397732257843018, "learning_rate": 0.0001, "loss": 0.1356, "step": 4840 }, { "epoch": 13.778409090909092, "grad_norm": 3.939297914505005, "learning_rate": 0.0001, "loss": 0.1342, "step": 4850 }, { "epoch": 13.806818181818182, "grad_norm": 3.530168294906616, "learning_rate": 0.0001, "loss": 0.1345, "step": 4860 }, { "epoch": 13.835227272727273, "grad_norm": 3.2555956840515137, "learning_rate": 0.0001, "loss": 0.1304, "step": 4870 }, { "epoch": 13.863636363636363, "grad_norm": 3.490713357925415, "learning_rate": 0.0001, "loss": 0.132, "step": 4880 }, { "epoch": 13.892045454545455, "grad_norm": 3.034759521484375, "learning_rate": 0.0001, "loss": 0.1287, "step": 4890 }, { "epoch": 13.920454545454545, "grad_norm": 3.2557218074798584, "learning_rate": 0.0001, "loss": 0.1358, "step": 4900 }, { "epoch": 13.948863636363637, "grad_norm": 3.692721128463745, "learning_rate": 0.0001, "loss": 0.1305, "step": 4910 }, { "epoch": 13.977272727272727, "grad_norm": 3.3548946380615234, "learning_rate": 0.0001, "loss": 0.1306, "step": 4920 }, { "epoch": 14.005681818181818, "grad_norm": 2.9304184913635254, "learning_rate": 0.0001, "loss": 0.1268, "step": 4930 }, { "epoch": 14.034090909090908, "grad_norm": 2.7205934524536133, "learning_rate": 0.0001, "loss": 0.1267, "step": 4940 }, { "epoch": 14.0625, "grad_norm": 3.1881885528564453, "learning_rate": 0.0001, "loss": 0.1292, "step": 4950 }, { "epoch": 14.090909090909092, "grad_norm": 2.813159465789795, "learning_rate": 0.0001, "loss": 0.1281, "step": 4960 }, { "epoch": 14.119318181818182, "grad_norm": 3.351205348968506, "learning_rate": 0.0001, "loss": 0.1336, "step": 4970 }, { "epoch": 14.147727272727273, "grad_norm": 3.1499414443969727, "learning_rate": 0.0001, "loss": 0.1327, "step": 4980 }, { "epoch": 14.176136363636363, "grad_norm": 3.1446123123168945, "learning_rate": 0.0001, "loss": 0.1296, "step": 4990 }, { "epoch": 14.204545454545455, "grad_norm": 4.177588939666748, "learning_rate": 0.0001, "loss": 0.1269, "step": 5000 }, { "epoch": 14.232954545454545, "grad_norm": 3.2882914543151855, "learning_rate": 0.0001, "loss": 0.1294, "step": 5010 }, { "epoch": 14.261363636363637, "grad_norm": 3.151151657104492, "learning_rate": 0.0001, "loss": 0.128, "step": 5020 }, { "epoch": 14.289772727272727, "grad_norm": 3.507800340652466, "learning_rate": 0.0001, "loss": 0.1306, "step": 5030 }, { "epoch": 14.318181818181818, "grad_norm": 3.266287088394165, "learning_rate": 0.0001, "loss": 0.13, "step": 5040 }, { "epoch": 14.346590909090908, "grad_norm": 3.7392666339874268, "learning_rate": 0.0001, "loss": 0.1246, "step": 5050 }, { "epoch": 14.375, "grad_norm": 3.385209083557129, "learning_rate": 0.0001, "loss": 0.1206, "step": 5060 }, { "epoch": 14.403409090909092, "grad_norm": 3.0839014053344727, "learning_rate": 0.0001, "loss": 0.119, "step": 5070 }, { "epoch": 14.431818181818182, "grad_norm": 2.9895691871643066, "learning_rate": 0.0001, "loss": 0.13, "step": 5080 }, { "epoch": 14.460227272727273, "grad_norm": 3.4198343753814697, "learning_rate": 0.0001, "loss": 0.1325, "step": 5090 }, { "epoch": 14.488636363636363, "grad_norm": 3.2257754802703857, "learning_rate": 0.0001, "loss": 0.13, "step": 5100 }, { "epoch": 14.517045454545455, "grad_norm": 2.9251694679260254, "learning_rate": 0.0001, "loss": 0.1247, "step": 5110 }, { "epoch": 14.545454545454545, "grad_norm": 3.3132123947143555, "learning_rate": 0.0001, "loss": 0.1246, "step": 5120 }, { "epoch": 14.573863636363637, "grad_norm": 3.5017828941345215, "learning_rate": 0.0001, "loss": 0.1265, "step": 5130 }, { "epoch": 14.602272727272727, "grad_norm": 3.087315559387207, "learning_rate": 0.0001, "loss": 0.1275, "step": 5140 }, { "epoch": 14.630681818181818, "grad_norm": 2.8191609382629395, "learning_rate": 0.0001, "loss": 0.1278, "step": 5150 }, { "epoch": 14.659090909090908, "grad_norm": 3.038038492202759, "learning_rate": 0.0001, "loss": 0.1197, "step": 5160 }, { "epoch": 14.6875, "grad_norm": 2.9609692096710205, "learning_rate": 0.0001, "loss": 0.1212, "step": 5170 }, { "epoch": 14.715909090909092, "grad_norm": 3.029618263244629, "learning_rate": 0.0001, "loss": 0.1235, "step": 5180 }, { "epoch": 14.744318181818182, "grad_norm": 2.6114909648895264, "learning_rate": 0.0001, "loss": 0.1286, "step": 5190 }, { "epoch": 14.772727272727273, "grad_norm": 2.887552261352539, "learning_rate": 0.0001, "loss": 0.126, "step": 5200 }, { "epoch": 14.801136363636363, "grad_norm": 3.0050230026245117, "learning_rate": 0.0001, "loss": 0.1266, "step": 5210 }, { "epoch": 14.829545454545455, "grad_norm": 3.215804100036621, "learning_rate": 0.0001, "loss": 0.1281, "step": 5220 }, { "epoch": 14.857954545454545, "grad_norm": 3.709592819213867, "learning_rate": 0.0001, "loss": 0.1325, "step": 5230 }, { "epoch": 14.886363636363637, "grad_norm": 3.143139600753784, "learning_rate": 0.0001, "loss": 0.1323, "step": 5240 }, { "epoch": 14.914772727272727, "grad_norm": 3.504509925842285, "learning_rate": 0.0001, "loss": 0.1201, "step": 5250 }, { "epoch": 14.943181818181818, "grad_norm": 3.8694465160369873, "learning_rate": 0.0001, "loss": 0.1277, "step": 5260 }, { "epoch": 14.971590909090908, "grad_norm": 3.4749040603637695, "learning_rate": 0.0001, "loss": 0.1272, "step": 5270 }, { "epoch": 15.0, "grad_norm": 3.4868409633636475, "learning_rate": 0.0001, "loss": 0.1205, "step": 5280 }, { "epoch": 15.028409090909092, "grad_norm": 2.9750540256500244, "learning_rate": 0.0001, "loss": 0.126, "step": 5290 }, { "epoch": 15.056818181818182, "grad_norm": 3.6922764778137207, "learning_rate": 0.0001, "loss": 0.1256, "step": 5300 }, { "epoch": 15.085227272727273, "grad_norm": 2.812814712524414, "learning_rate": 0.0001, "loss": 0.1181, "step": 5310 }, { "epoch": 15.113636363636363, "grad_norm": 3.3117034435272217, "learning_rate": 0.0001, "loss": 0.1226, "step": 5320 }, { "epoch": 15.142045454545455, "grad_norm": 3.519850492477417, "learning_rate": 0.0001, "loss": 0.123, "step": 5330 }, { "epoch": 15.170454545454545, "grad_norm": 3.4698708057403564, "learning_rate": 0.0001, "loss": 0.1176, "step": 5340 }, { "epoch": 15.198863636363637, "grad_norm": 3.4124035835266113, "learning_rate": 0.0001, "loss": 0.12, "step": 5350 }, { "epoch": 15.227272727272727, "grad_norm": 3.1546342372894287, "learning_rate": 0.0001, "loss": 0.1215, "step": 5360 }, { "epoch": 15.255681818181818, "grad_norm": 3.2864038944244385, "learning_rate": 0.0001, "loss": 0.1178, "step": 5370 }, { "epoch": 15.284090909090908, "grad_norm": 3.288776397705078, "learning_rate": 0.0001, "loss": 0.1235, "step": 5380 }, { "epoch": 15.3125, "grad_norm": 3.0721805095672607, "learning_rate": 0.0001, "loss": 0.1166, "step": 5390 }, { "epoch": 15.340909090909092, "grad_norm": 2.994493246078491, "learning_rate": 0.0001, "loss": 0.119, "step": 5400 }, { "epoch": 15.369318181818182, "grad_norm": 3.0647480487823486, "learning_rate": 0.0001, "loss": 0.1199, "step": 5410 }, { "epoch": 15.397727272727273, "grad_norm": 3.24365496635437, "learning_rate": 0.0001, "loss": 0.1185, "step": 5420 }, { "epoch": 15.426136363636363, "grad_norm": 2.872796058654785, "learning_rate": 0.0001, "loss": 0.1239, "step": 5430 }, { "epoch": 15.454545454545455, "grad_norm": 3.0730972290039062, "learning_rate": 0.0001, "loss": 0.118, "step": 5440 }, { "epoch": 15.482954545454545, "grad_norm": 3.010819911956787, "learning_rate": 0.0001, "loss": 0.1282, "step": 5450 }, { "epoch": 15.511363636363637, "grad_norm": 3.111093521118164, "learning_rate": 0.0001, "loss": 0.1203, "step": 5460 }, { "epoch": 15.539772727272727, "grad_norm": 2.3254058361053467, "learning_rate": 0.0001, "loss": 0.1196, "step": 5470 }, { "epoch": 15.568181818181818, "grad_norm": 2.7858808040618896, "learning_rate": 0.0001, "loss": 0.1157, "step": 5480 }, { "epoch": 15.596590909090908, "grad_norm": 2.8064205646514893, "learning_rate": 0.0001, "loss": 0.1241, "step": 5490 }, { "epoch": 15.625, "grad_norm": 3.140082597732544, "learning_rate": 0.0001, "loss": 0.123, "step": 5500 }, { "epoch": 15.653409090909092, "grad_norm": 3.064652681350708, "learning_rate": 0.0001, "loss": 0.1263, "step": 5510 }, { "epoch": 15.681818181818182, "grad_norm": 3.274289131164551, "learning_rate": 0.0001, "loss": 0.1239, "step": 5520 }, { "epoch": 15.710227272727273, "grad_norm": 2.9138309955596924, "learning_rate": 0.0001, "loss": 0.1232, "step": 5530 }, { "epoch": 15.738636363636363, "grad_norm": 2.9141759872436523, "learning_rate": 0.0001, "loss": 0.1222, "step": 5540 }, { "epoch": 15.767045454545455, "grad_norm": 2.755699396133423, "learning_rate": 0.0001, "loss": 0.1209, "step": 5550 }, { "epoch": 15.795454545454545, "grad_norm": 2.7435543537139893, "learning_rate": 0.0001, "loss": 0.1205, "step": 5560 }, { "epoch": 15.823863636363637, "grad_norm": 2.868746519088745, "learning_rate": 0.0001, "loss": 0.1188, "step": 5570 }, { "epoch": 15.852272727272727, "grad_norm": 2.853201389312744, "learning_rate": 0.0001, "loss": 0.1154, "step": 5580 }, { "epoch": 15.880681818181818, "grad_norm": 3.2404487133026123, "learning_rate": 0.0001, "loss": 0.1174, "step": 5590 }, { "epoch": 15.909090909090908, "grad_norm": 3.210789203643799, "learning_rate": 0.0001, "loss": 0.1204, "step": 5600 }, { "epoch": 15.9375, "grad_norm": 2.9118998050689697, "learning_rate": 0.0001, "loss": 0.1183, "step": 5610 }, { "epoch": 15.965909090909092, "grad_norm": 2.8953421115875244, "learning_rate": 0.0001, "loss": 0.1182, "step": 5620 }, { "epoch": 15.994318181818182, "grad_norm": 2.942523241043091, "learning_rate": 0.0001, "loss": 0.1195, "step": 5630 }, { "epoch": 16.022727272727273, "grad_norm": 3.2362887859344482, "learning_rate": 0.0001, "loss": 0.1157, "step": 5640 }, { "epoch": 16.051136363636363, "grad_norm": 2.438734292984009, "learning_rate": 0.0001, "loss": 0.1124, "step": 5650 }, { "epoch": 16.079545454545453, "grad_norm": 2.5169425010681152, "learning_rate": 0.0001, "loss": 0.1113, "step": 5660 }, { "epoch": 16.107954545454547, "grad_norm": 2.644383668899536, "learning_rate": 0.0001, "loss": 0.1165, "step": 5670 }, { "epoch": 16.136363636363637, "grad_norm": 3.3263514041900635, "learning_rate": 0.0001, "loss": 0.1221, "step": 5680 }, { "epoch": 16.164772727272727, "grad_norm": 2.8352041244506836, "learning_rate": 0.0001, "loss": 0.1138, "step": 5690 }, { "epoch": 16.193181818181817, "grad_norm": 3.1213154792785645, "learning_rate": 0.0001, "loss": 0.1164, "step": 5700 }, { "epoch": 16.22159090909091, "grad_norm": 3.123992681503296, "learning_rate": 0.0001, "loss": 0.1163, "step": 5710 }, { "epoch": 16.25, "grad_norm": 3.104673385620117, "learning_rate": 0.0001, "loss": 0.1154, "step": 5720 }, { "epoch": 16.27840909090909, "grad_norm": 3.469186544418335, "learning_rate": 0.0001, "loss": 0.1137, "step": 5730 }, { "epoch": 16.306818181818183, "grad_norm": 3.1163649559020996, "learning_rate": 0.0001, "loss": 0.1164, "step": 5740 }, { "epoch": 16.335227272727273, "grad_norm": 2.9757080078125, "learning_rate": 0.0001, "loss": 0.1215, "step": 5750 }, { "epoch": 16.363636363636363, "grad_norm": 3.346102237701416, "learning_rate": 0.0001, "loss": 0.114, "step": 5760 }, { "epoch": 16.392045454545453, "grad_norm": 3.4598140716552734, "learning_rate": 0.0001, "loss": 0.1134, "step": 5770 }, { "epoch": 16.420454545454547, "grad_norm": 2.8395731449127197, "learning_rate": 0.0001, "loss": 0.1136, "step": 5780 }, { "epoch": 16.448863636363637, "grad_norm": 2.390820026397705, "learning_rate": 0.0001, "loss": 0.115, "step": 5790 }, { "epoch": 16.477272727272727, "grad_norm": 3.3408634662628174, "learning_rate": 0.0001, "loss": 0.1191, "step": 5800 }, { "epoch": 16.505681818181817, "grad_norm": 2.721245050430298, "learning_rate": 0.0001, "loss": 0.1187, "step": 5810 }, { "epoch": 16.53409090909091, "grad_norm": 3.057680130004883, "learning_rate": 0.0001, "loss": 0.1169, "step": 5820 }, { "epoch": 16.5625, "grad_norm": 2.8173437118530273, "learning_rate": 0.0001, "loss": 0.1178, "step": 5830 }, { "epoch": 16.59090909090909, "grad_norm": 2.824174404144287, "learning_rate": 0.0001, "loss": 0.1137, "step": 5840 }, { "epoch": 16.619318181818183, "grad_norm": 2.9729907512664795, "learning_rate": 0.0001, "loss": 0.1149, "step": 5850 }, { "epoch": 16.647727272727273, "grad_norm": 2.893472909927368, "learning_rate": 0.0001, "loss": 0.1147, "step": 5860 }, { "epoch": 16.676136363636363, "grad_norm": 2.6419155597686768, "learning_rate": 0.0001, "loss": 0.1166, "step": 5870 }, { "epoch": 16.704545454545453, "grad_norm": 2.341890811920166, "learning_rate": 0.0001, "loss": 0.1148, "step": 5880 }, { "epoch": 16.732954545454547, "grad_norm": 2.980921506881714, "learning_rate": 0.0001, "loss": 0.1134, "step": 5890 }, { "epoch": 16.761363636363637, "grad_norm": 2.975208044052124, "learning_rate": 0.0001, "loss": 0.1146, "step": 5900 }, { "epoch": 16.789772727272727, "grad_norm": 2.528339147567749, "learning_rate": 0.0001, "loss": 0.1155, "step": 5910 }, { "epoch": 16.818181818181817, "grad_norm": 2.539898633956909, "learning_rate": 0.0001, "loss": 0.1137, "step": 5920 }, { "epoch": 16.84659090909091, "grad_norm": 2.4367032051086426, "learning_rate": 0.0001, "loss": 0.1175, "step": 5930 }, { "epoch": 16.875, "grad_norm": 2.5197834968566895, "learning_rate": 0.0001, "loss": 0.1158, "step": 5940 }, { "epoch": 16.90340909090909, "grad_norm": 2.4279847145080566, "learning_rate": 0.0001, "loss": 0.1113, "step": 5950 }, { "epoch": 16.931818181818183, "grad_norm": 3.2526116371154785, "learning_rate": 0.0001, "loss": 0.1129, "step": 5960 }, { "epoch": 16.960227272727273, "grad_norm": 2.5634706020355225, "learning_rate": 0.0001, "loss": 0.1146, "step": 5970 }, { "epoch": 16.988636363636363, "grad_norm": 2.89918851852417, "learning_rate": 0.0001, "loss": 0.1147, "step": 5980 }, { "epoch": 17.017045454545453, "grad_norm": 2.8295469284057617, "learning_rate": 0.0001, "loss": 0.114, "step": 5990 }, { "epoch": 17.045454545454547, "grad_norm": 2.8802335262298584, "learning_rate": 0.0001, "loss": 0.1125, "step": 6000 }, { "epoch": 17.073863636363637, "grad_norm": 2.6155662536621094, "learning_rate": 0.0001, "loss": 0.1158, "step": 6010 }, { "epoch": 17.102272727272727, "grad_norm": 2.791156530380249, "learning_rate": 0.0001, "loss": 0.1147, "step": 6020 }, { "epoch": 17.130681818181817, "grad_norm": 2.7444076538085938, "learning_rate": 0.0001, "loss": 0.1172, "step": 6030 }, { "epoch": 17.15909090909091, "grad_norm": 3.0765230655670166, "learning_rate": 0.0001, "loss": 0.1114, "step": 6040 }, { "epoch": 17.1875, "grad_norm": 3.4001102447509766, "learning_rate": 0.0001, "loss": 0.1156, "step": 6050 }, { "epoch": 17.21590909090909, "grad_norm": 2.574037790298462, "learning_rate": 0.0001, "loss": 0.114, "step": 6060 }, { "epoch": 17.244318181818183, "grad_norm": 2.428994655609131, "learning_rate": 0.0001, "loss": 0.1059, "step": 6070 }, { "epoch": 17.272727272727273, "grad_norm": 2.552593469619751, "learning_rate": 0.0001, "loss": 0.1132, "step": 6080 }, { "epoch": 17.301136363636363, "grad_norm": 2.748263359069824, "learning_rate": 0.0001, "loss": 0.1149, "step": 6090 }, { "epoch": 17.329545454545453, "grad_norm": 2.565458059310913, "learning_rate": 0.0001, "loss": 0.1105, "step": 6100 }, { "epoch": 17.357954545454547, "grad_norm": 3.3726043701171875, "learning_rate": 0.0001, "loss": 0.1108, "step": 6110 }, { "epoch": 17.386363636363637, "grad_norm": 2.640763282775879, "learning_rate": 0.0001, "loss": 0.1049, "step": 6120 }, { "epoch": 17.414772727272727, "grad_norm": 2.3288469314575195, "learning_rate": 0.0001, "loss": 0.1073, "step": 6130 }, { "epoch": 17.443181818181817, "grad_norm": 2.47501802444458, "learning_rate": 0.0001, "loss": 0.1097, "step": 6140 }, { "epoch": 17.47159090909091, "grad_norm": 2.443122148513794, "learning_rate": 0.0001, "loss": 0.1167, "step": 6150 }, { "epoch": 17.5, "grad_norm": 3.0210578441619873, "learning_rate": 0.0001, "loss": 0.1185, "step": 6160 }, { "epoch": 17.52840909090909, "grad_norm": 2.7917838096618652, "learning_rate": 0.0001, "loss": 0.1176, "step": 6170 }, { "epoch": 17.556818181818183, "grad_norm": 2.502795457839966, "learning_rate": 0.0001, "loss": 0.1101, "step": 6180 }, { "epoch": 17.585227272727273, "grad_norm": 2.6011240482330322, "learning_rate": 0.0001, "loss": 0.111, "step": 6190 }, { "epoch": 17.613636363636363, "grad_norm": 2.917656183242798, "learning_rate": 0.0001, "loss": 0.1093, "step": 6200 }, { "epoch": 17.642045454545453, "grad_norm": 2.369063138961792, "learning_rate": 0.0001, "loss": 0.1116, "step": 6210 }, { "epoch": 17.670454545454547, "grad_norm": 2.5128045082092285, "learning_rate": 0.0001, "loss": 0.1111, "step": 6220 }, { "epoch": 17.698863636363637, "grad_norm": 3.0461254119873047, "learning_rate": 0.0001, "loss": 0.1147, "step": 6230 }, { "epoch": 17.727272727272727, "grad_norm": 2.1202504634857178, "learning_rate": 0.0001, "loss": 0.1107, "step": 6240 }, { "epoch": 17.755681818181817, "grad_norm": 2.3112752437591553, "learning_rate": 0.0001, "loss": 0.1086, "step": 6250 }, { "epoch": 17.78409090909091, "grad_norm": 2.8188629150390625, "learning_rate": 0.0001, "loss": 0.121, "step": 6260 }, { "epoch": 17.8125, "grad_norm": 2.883798599243164, "learning_rate": 0.0001, "loss": 0.1088, "step": 6270 }, { "epoch": 17.84090909090909, "grad_norm": 2.619675397872925, "learning_rate": 0.0001, "loss": 0.1121, "step": 6280 }, { "epoch": 17.869318181818183, "grad_norm": 2.5900354385375977, "learning_rate": 0.0001, "loss": 0.111, "step": 6290 }, { "epoch": 17.897727272727273, "grad_norm": 3.2702383995056152, "learning_rate": 0.0001, "loss": 0.1123, "step": 6300 }, { "epoch": 17.926136363636363, "grad_norm": 2.632286548614502, "learning_rate": 0.0001, "loss": 0.1096, "step": 6310 }, { "epoch": 17.954545454545453, "grad_norm": 2.598470687866211, "learning_rate": 0.0001, "loss": 0.109, "step": 6320 }, { "epoch": 17.982954545454547, "grad_norm": 2.3819477558135986, "learning_rate": 0.0001, "loss": 0.1118, "step": 6330 }, { "epoch": 18.011363636363637, "grad_norm": 2.4921939373016357, "learning_rate": 0.0001, "loss": 0.1116, "step": 6340 }, { "epoch": 18.039772727272727, "grad_norm": 2.820632219314575, "learning_rate": 0.0001, "loss": 0.1052, "step": 6350 }, { "epoch": 18.068181818181817, "grad_norm": 2.769113063812256, "learning_rate": 0.0001, "loss": 0.1141, "step": 6360 }, { "epoch": 18.09659090909091, "grad_norm": 2.58843731880188, "learning_rate": 0.0001, "loss": 0.1133, "step": 6370 }, { "epoch": 18.125, "grad_norm": 1.958970069885254, "learning_rate": 0.0001, "loss": 0.114, "step": 6380 }, { "epoch": 18.15340909090909, "grad_norm": 2.466975688934326, "learning_rate": 0.0001, "loss": 0.1071, "step": 6390 }, { "epoch": 18.181818181818183, "grad_norm": 2.1818594932556152, "learning_rate": 0.0001, "loss": 0.1095, "step": 6400 }, { "epoch": 18.210227272727273, "grad_norm": 2.3512721061706543, "learning_rate": 0.0001, "loss": 0.114, "step": 6410 }, { "epoch": 18.238636363636363, "grad_norm": 2.3737564086914062, "learning_rate": 0.0001, "loss": 0.1113, "step": 6420 }, { "epoch": 18.267045454545453, "grad_norm": 2.4189605712890625, "learning_rate": 0.0001, "loss": 0.1117, "step": 6430 }, { "epoch": 18.295454545454547, "grad_norm": 2.6895744800567627, "learning_rate": 0.0001, "loss": 0.1122, "step": 6440 }, { "epoch": 18.323863636363637, "grad_norm": 2.517216920852661, "learning_rate": 0.0001, "loss": 0.1132, "step": 6450 }, { "epoch": 18.352272727272727, "grad_norm": 2.251976251602173, "learning_rate": 0.0001, "loss": 0.1058, "step": 6460 }, { "epoch": 18.380681818181817, "grad_norm": 2.233076333999634, "learning_rate": 0.0001, "loss": 0.1123, "step": 6470 }, { "epoch": 18.40909090909091, "grad_norm": 2.5515904426574707, "learning_rate": 0.0001, "loss": 0.1107, "step": 6480 }, { "epoch": 18.4375, "grad_norm": 2.60249662399292, "learning_rate": 0.0001, "loss": 0.1089, "step": 6490 }, { "epoch": 18.46590909090909, "grad_norm": 2.640946388244629, "learning_rate": 0.0001, "loss": 0.1107, "step": 6500 }, { "epoch": 18.494318181818183, "grad_norm": 2.663269281387329, "learning_rate": 0.0001, "loss": 0.1085, "step": 6510 }, { "epoch": 18.522727272727273, "grad_norm": 2.4106152057647705, "learning_rate": 0.0001, "loss": 0.1098, "step": 6520 }, { "epoch": 18.551136363636363, "grad_norm": 2.3369884490966797, "learning_rate": 0.0001, "loss": 0.1096, "step": 6530 }, { "epoch": 18.579545454545453, "grad_norm": 2.346574544906616, "learning_rate": 0.0001, "loss": 0.104, "step": 6540 }, { "epoch": 18.607954545454547, "grad_norm": 2.2791831493377686, "learning_rate": 0.0001, "loss": 0.1105, "step": 6550 }, { "epoch": 18.636363636363637, "grad_norm": 2.364088773727417, "learning_rate": 0.0001, "loss": 0.1113, "step": 6560 }, { "epoch": 18.664772727272727, "grad_norm": 2.5654282569885254, "learning_rate": 0.0001, "loss": 0.1067, "step": 6570 }, { "epoch": 18.693181818181817, "grad_norm": 2.577658176422119, "learning_rate": 0.0001, "loss": 0.1117, "step": 6580 }, { "epoch": 18.72159090909091, "grad_norm": 2.4139275550842285, "learning_rate": 0.0001, "loss": 0.1043, "step": 6590 }, { "epoch": 18.75, "grad_norm": 2.5888614654541016, "learning_rate": 0.0001, "loss": 0.1082, "step": 6600 }, { "epoch": 18.77840909090909, "grad_norm": 2.3184974193573, "learning_rate": 0.0001, "loss": 0.1083, "step": 6610 }, { "epoch": 18.806818181818183, "grad_norm": 2.522383451461792, "learning_rate": 0.0001, "loss": 0.1083, "step": 6620 }, { "epoch": 18.835227272727273, "grad_norm": 2.2055583000183105, "learning_rate": 0.0001, "loss": 0.1076, "step": 6630 }, { "epoch": 18.863636363636363, "grad_norm": 2.58622145652771, "learning_rate": 0.0001, "loss": 0.1087, "step": 6640 }, { "epoch": 18.892045454545453, "grad_norm": 2.3860034942626953, "learning_rate": 0.0001, "loss": 0.1108, "step": 6650 }, { "epoch": 18.920454545454547, "grad_norm": 2.6108431816101074, "learning_rate": 0.0001, "loss": 0.1089, "step": 6660 }, { "epoch": 18.948863636363637, "grad_norm": 2.1429636478424072, "learning_rate": 0.0001, "loss": 0.1127, "step": 6670 }, { "epoch": 18.977272727272727, "grad_norm": 2.1483328342437744, "learning_rate": 0.0001, "loss": 0.107, "step": 6680 }, { "epoch": 19.005681818181817, "grad_norm": 2.524930477142334, "learning_rate": 0.0001, "loss": 0.1176, "step": 6690 }, { "epoch": 19.03409090909091, "grad_norm": 2.55420184135437, "learning_rate": 0.0001, "loss": 0.1134, "step": 6700 }, { "epoch": 19.0625, "grad_norm": 2.4037156105041504, "learning_rate": 0.0001, "loss": 0.1121, "step": 6710 }, { "epoch": 19.09090909090909, "grad_norm": 2.545936107635498, "learning_rate": 0.0001, "loss": 0.1038, "step": 6720 }, { "epoch": 19.119318181818183, "grad_norm": 2.2953386306762695, "learning_rate": 0.0001, "loss": 0.1045, "step": 6730 }, { "epoch": 19.147727272727273, "grad_norm": 2.4761712551116943, "learning_rate": 0.0001, "loss": 0.1022, "step": 6740 }, { "epoch": 19.176136363636363, "grad_norm": 2.2314484119415283, "learning_rate": 0.0001, "loss": 0.1048, "step": 6750 }, { "epoch": 19.204545454545453, "grad_norm": 2.306548833847046, "learning_rate": 0.0001, "loss": 0.1068, "step": 6760 }, { "epoch": 19.232954545454547, "grad_norm": 2.563133955001831, "learning_rate": 0.0001, "loss": 0.1051, "step": 6770 }, { "epoch": 19.261363636363637, "grad_norm": 2.306220531463623, "learning_rate": 0.0001, "loss": 0.1028, "step": 6780 }, { "epoch": 19.289772727272727, "grad_norm": 2.3580806255340576, "learning_rate": 0.0001, "loss": 0.1026, "step": 6790 }, { "epoch": 19.318181818181817, "grad_norm": 2.317422866821289, "learning_rate": 0.0001, "loss": 0.1045, "step": 6800 }, { "epoch": 19.34659090909091, "grad_norm": 2.4174487590789795, "learning_rate": 0.0001, "loss": 0.1047, "step": 6810 }, { "epoch": 19.375, "grad_norm": 2.417792558670044, "learning_rate": 0.0001, "loss": 0.1062, "step": 6820 }, { "epoch": 19.40340909090909, "grad_norm": 2.08555269241333, "learning_rate": 0.0001, "loss": 0.1075, "step": 6830 }, { "epoch": 19.431818181818183, "grad_norm": 2.052635431289673, "learning_rate": 0.0001, "loss": 0.1086, "step": 6840 }, { "epoch": 19.460227272727273, "grad_norm": 2.216602325439453, "learning_rate": 0.0001, "loss": 0.1077, "step": 6850 }, { "epoch": 19.488636363636363, "grad_norm": 2.393385410308838, "learning_rate": 0.0001, "loss": 0.1084, "step": 6860 }, { "epoch": 19.517045454545453, "grad_norm": 2.2516062259674072, "learning_rate": 0.0001, "loss": 0.1081, "step": 6870 }, { "epoch": 19.545454545454547, "grad_norm": 2.2450714111328125, "learning_rate": 0.0001, "loss": 0.1057, "step": 6880 }, { "epoch": 19.573863636363637, "grad_norm": 2.2236733436584473, "learning_rate": 0.0001, "loss": 0.1057, "step": 6890 }, { "epoch": 19.602272727272727, "grad_norm": 2.0041747093200684, "learning_rate": 0.0001, "loss": 0.1092, "step": 6900 }, { "epoch": 19.630681818181817, "grad_norm": 2.264723300933838, "learning_rate": 0.0001, "loss": 0.107, "step": 6910 }, { "epoch": 19.65909090909091, "grad_norm": 2.467823028564453, "learning_rate": 0.0001, "loss": 0.1042, "step": 6920 }, { "epoch": 19.6875, "grad_norm": 2.2700631618499756, "learning_rate": 0.0001, "loss": 0.1037, "step": 6930 }, { "epoch": 19.71590909090909, "grad_norm": 2.2037792205810547, "learning_rate": 0.0001, "loss": 0.1012, "step": 6940 }, { "epoch": 19.744318181818183, "grad_norm": 2.0393118858337402, "learning_rate": 0.0001, "loss": 0.1018, "step": 6950 }, { "epoch": 19.772727272727273, "grad_norm": 2.0741636753082275, "learning_rate": 0.0001, "loss": 0.1095, "step": 6960 }, { "epoch": 19.801136363636363, "grad_norm": 1.9817142486572266, "learning_rate": 0.0001, "loss": 0.102, "step": 6970 }, { "epoch": 19.829545454545453, "grad_norm": 2.222271680831909, "learning_rate": 0.0001, "loss": 0.1096, "step": 6980 }, { "epoch": 19.857954545454547, "grad_norm": 2.0376973152160645, "learning_rate": 0.0001, "loss": 0.1086, "step": 6990 }, { "epoch": 19.886363636363637, "grad_norm": 1.9794195890426636, "learning_rate": 0.0001, "loss": 0.106, "step": 7000 }, { "epoch": 19.914772727272727, "grad_norm": 2.154062032699585, "learning_rate": 0.0001, "loss": 0.1117, "step": 7010 }, { "epoch": 19.943181818181817, "grad_norm": 2.5424580574035645, "learning_rate": 0.0001, "loss": 0.1079, "step": 7020 }, { "epoch": 19.97159090909091, "grad_norm": 2.4422006607055664, "learning_rate": 0.0001, "loss": 0.1005, "step": 7030 }, { "epoch": 20.0, "grad_norm": 2.433610200881958, "learning_rate": 0.0001, "loss": 0.1051, "step": 7040 }, { "epoch": 20.02840909090909, "grad_norm": 2.1430225372314453, "learning_rate": 0.0001, "loss": 0.1003, "step": 7050 }, { "epoch": 20.056818181818183, "grad_norm": 2.1949706077575684, "learning_rate": 0.0001, "loss": 0.0975, "step": 7060 }, { "epoch": 20.085227272727273, "grad_norm": 2.3351924419403076, "learning_rate": 0.0001, "loss": 0.1076, "step": 7070 }, { "epoch": 20.113636363636363, "grad_norm": 2.2610702514648438, "learning_rate": 0.0001, "loss": 0.1052, "step": 7080 }, { "epoch": 20.142045454545453, "grad_norm": 2.0343222618103027, "learning_rate": 0.0001, "loss": 0.105, "step": 7090 }, { "epoch": 20.170454545454547, "grad_norm": 2.3669273853302, "learning_rate": 0.0001, "loss": 0.1037, "step": 7100 }, { "epoch": 20.198863636363637, "grad_norm": 2.224647283554077, "learning_rate": 0.0001, "loss": 0.1007, "step": 7110 }, { "epoch": 20.227272727272727, "grad_norm": 2.3760414123535156, "learning_rate": 0.0001, "loss": 0.1087, "step": 7120 }, { "epoch": 20.255681818181817, "grad_norm": 2.272942543029785, "learning_rate": 0.0001, "loss": 0.1051, "step": 7130 }, { "epoch": 20.28409090909091, "grad_norm": 2.159137487411499, "learning_rate": 0.0001, "loss": 0.1053, "step": 7140 }, { "epoch": 20.3125, "grad_norm": 2.2997374534606934, "learning_rate": 0.0001, "loss": 0.1046, "step": 7150 }, { "epoch": 20.34090909090909, "grad_norm": 2.431882381439209, "learning_rate": 0.0001, "loss": 0.1038, "step": 7160 }, { "epoch": 20.369318181818183, "grad_norm": 2.410435676574707, "learning_rate": 0.0001, "loss": 0.1018, "step": 7170 }, { "epoch": 20.397727272727273, "grad_norm": 2.3939435482025146, "learning_rate": 0.0001, "loss": 0.102, "step": 7180 }, { "epoch": 20.426136363636363, "grad_norm": 2.132279396057129, "learning_rate": 0.0001, "loss": 0.1034, "step": 7190 }, { "epoch": 20.454545454545453, "grad_norm": 2.2916312217712402, "learning_rate": 0.0001, "loss": 0.1052, "step": 7200 }, { "epoch": 20.482954545454547, "grad_norm": 2.5798282623291016, "learning_rate": 0.0001, "loss": 0.1026, "step": 7210 }, { "epoch": 20.511363636363637, "grad_norm": 2.2447385787963867, "learning_rate": 0.0001, "loss": 0.0993, "step": 7220 }, { "epoch": 20.539772727272727, "grad_norm": 2.2323153018951416, "learning_rate": 0.0001, "loss": 0.1009, "step": 7230 }, { "epoch": 20.568181818181817, "grad_norm": 1.8763328790664673, "learning_rate": 0.0001, "loss": 0.1053, "step": 7240 }, { "epoch": 20.59659090909091, "grad_norm": 1.501619815826416, "learning_rate": 0.0001, "loss": 0.1027, "step": 7250 }, { "epoch": 20.625, "grad_norm": 1.9412376880645752, "learning_rate": 0.0001, "loss": 0.1056, "step": 7260 }, { "epoch": 20.65340909090909, "grad_norm": 2.07375431060791, "learning_rate": 0.0001, "loss": 0.1047, "step": 7270 }, { "epoch": 20.681818181818183, "grad_norm": 2.0877957344055176, "learning_rate": 0.0001, "loss": 0.1035, "step": 7280 }, { "epoch": 20.710227272727273, "grad_norm": 2.4032297134399414, "learning_rate": 0.0001, "loss": 0.1039, "step": 7290 }, { "epoch": 20.738636363636363, "grad_norm": 2.4037721157073975, "learning_rate": 0.0001, "loss": 0.0988, "step": 7300 }, { "epoch": 20.767045454545453, "grad_norm": 1.8980406522750854, "learning_rate": 0.0001, "loss": 0.1024, "step": 7310 }, { "epoch": 20.795454545454547, "grad_norm": 2.298187732696533, "learning_rate": 0.0001, "loss": 0.1065, "step": 7320 }, { "epoch": 20.823863636363637, "grad_norm": 2.2373411655426025, "learning_rate": 0.0001, "loss": 0.1107, "step": 7330 }, { "epoch": 20.852272727272727, "grad_norm": 2.2414472103118896, "learning_rate": 0.0001, "loss": 0.1063, "step": 7340 }, { "epoch": 20.880681818181817, "grad_norm": 1.9661855697631836, "learning_rate": 0.0001, "loss": 0.1055, "step": 7350 }, { "epoch": 20.90909090909091, "grad_norm": 1.9864373207092285, "learning_rate": 0.0001, "loss": 0.1013, "step": 7360 }, { "epoch": 20.9375, "grad_norm": 2.239394187927246, "learning_rate": 0.0001, "loss": 0.1028, "step": 7370 }, { "epoch": 20.96590909090909, "grad_norm": 1.8729074001312256, "learning_rate": 0.0001, "loss": 0.1045, "step": 7380 }, { "epoch": 20.994318181818183, "grad_norm": 1.9113003015518188, "learning_rate": 0.0001, "loss": 0.1061, "step": 7390 }, { "epoch": 21.022727272727273, "grad_norm": 1.733733892440796, "learning_rate": 0.0001, "loss": 0.1029, "step": 7400 }, { "epoch": 21.051136363636363, "grad_norm": 1.908080816268921, "learning_rate": 0.0001, "loss": 0.1055, "step": 7410 }, { "epoch": 21.079545454545453, "grad_norm": 2.046468496322632, "learning_rate": 0.0001, "loss": 0.103, "step": 7420 }, { "epoch": 21.107954545454547, "grad_norm": 2.1874492168426514, "learning_rate": 0.0001, "loss": 0.1023, "step": 7430 }, { "epoch": 21.136363636363637, "grad_norm": 1.7166926860809326, "learning_rate": 0.0001, "loss": 0.1023, "step": 7440 }, { "epoch": 21.164772727272727, "grad_norm": 2.2620322704315186, "learning_rate": 0.0001, "loss": 0.1019, "step": 7450 }, { "epoch": 21.193181818181817, "grad_norm": 2.283912181854248, "learning_rate": 0.0001, "loss": 0.1044, "step": 7460 }, { "epoch": 21.22159090909091, "grad_norm": 2.0486154556274414, "learning_rate": 0.0001, "loss": 0.1014, "step": 7470 }, { "epoch": 21.25, "grad_norm": 1.8538813591003418, "learning_rate": 0.0001, "loss": 0.1014, "step": 7480 }, { "epoch": 21.27840909090909, "grad_norm": 1.9765312671661377, "learning_rate": 0.0001, "loss": 0.0973, "step": 7490 }, { "epoch": 21.306818181818183, "grad_norm": 1.8284136056900024, "learning_rate": 0.0001, "loss": 0.1008, "step": 7500 }, { "epoch": 21.335227272727273, "grad_norm": 1.8823169469833374, "learning_rate": 0.0001, "loss": 0.1045, "step": 7510 }, { "epoch": 21.363636363636363, "grad_norm": 1.8679107427597046, "learning_rate": 0.0001, "loss": 0.0994, "step": 7520 }, { "epoch": 21.392045454545453, "grad_norm": 2.5383877754211426, "learning_rate": 0.0001, "loss": 0.1018, "step": 7530 }, { "epoch": 21.420454545454547, "grad_norm": 1.8846964836120605, "learning_rate": 0.0001, "loss": 0.1014, "step": 7540 }, { "epoch": 21.448863636363637, "grad_norm": 2.0011348724365234, "learning_rate": 0.0001, "loss": 0.101, "step": 7550 }, { "epoch": 21.477272727272727, "grad_norm": 2.220806360244751, "learning_rate": 0.0001, "loss": 0.0957, "step": 7560 }, { "epoch": 21.505681818181817, "grad_norm": 2.1716644763946533, "learning_rate": 0.0001, "loss": 0.1043, "step": 7570 }, { "epoch": 21.53409090909091, "grad_norm": 2.017302989959717, "learning_rate": 0.0001, "loss": 0.101, "step": 7580 }, { "epoch": 21.5625, "grad_norm": 1.7568871974945068, "learning_rate": 0.0001, "loss": 0.101, "step": 7590 }, { "epoch": 21.59090909090909, "grad_norm": 1.4694595336914062, "learning_rate": 0.0001, "loss": 0.0992, "step": 7600 }, { "epoch": 21.619318181818183, "grad_norm": 1.6587533950805664, "learning_rate": 0.0001, "loss": 0.0947, "step": 7610 }, { "epoch": 21.647727272727273, "grad_norm": 1.5999675989151, "learning_rate": 0.0001, "loss": 0.0947, "step": 7620 }, { "epoch": 21.676136363636363, "grad_norm": 1.8176852464675903, "learning_rate": 0.0001, "loss": 0.1049, "step": 7630 }, { "epoch": 21.704545454545453, "grad_norm": 1.8811140060424805, "learning_rate": 0.0001, "loss": 0.1026, "step": 7640 }, { "epoch": 21.732954545454547, "grad_norm": 2.0048694610595703, "learning_rate": 0.0001, "loss": 0.106, "step": 7650 }, { "epoch": 21.761363636363637, "grad_norm": 2.050529956817627, "learning_rate": 0.0001, "loss": 0.102, "step": 7660 }, { "epoch": 21.789772727272727, "grad_norm": 2.1366333961486816, "learning_rate": 0.0001, "loss": 0.1052, "step": 7670 }, { "epoch": 21.818181818181817, "grad_norm": 2.113969564437866, "learning_rate": 0.0001, "loss": 0.1044, "step": 7680 }, { "epoch": 21.84659090909091, "grad_norm": 1.9199646711349487, "learning_rate": 0.0001, "loss": 0.1019, "step": 7690 }, { "epoch": 21.875, "grad_norm": 2.162484884262085, "learning_rate": 0.0001, "loss": 0.0991, "step": 7700 }, { "epoch": 21.90340909090909, "grad_norm": 2.1541666984558105, "learning_rate": 0.0001, "loss": 0.0977, "step": 7710 }, { "epoch": 21.931818181818183, "grad_norm": 2.0925753116607666, "learning_rate": 0.0001, "loss": 0.0971, "step": 7720 }, { "epoch": 21.960227272727273, "grad_norm": 2.3108863830566406, "learning_rate": 0.0001, "loss": 0.0937, "step": 7730 }, { "epoch": 21.988636363636363, "grad_norm": 2.105069398880005, "learning_rate": 0.0001, "loss": 0.0945, "step": 7740 }, { "epoch": 22.017045454545453, "grad_norm": 2.304133892059326, "learning_rate": 0.0001, "loss": 0.0999, "step": 7750 }, { "epoch": 22.045454545454547, "grad_norm": 1.9112257957458496, "learning_rate": 0.0001, "loss": 0.0944, "step": 7760 }, { "epoch": 22.073863636363637, "grad_norm": 1.9586460590362549, "learning_rate": 0.0001, "loss": 0.0978, "step": 7770 }, { "epoch": 22.102272727272727, "grad_norm": 2.0921013355255127, "learning_rate": 0.0001, "loss": 0.1018, "step": 7780 }, { "epoch": 22.130681818181817, "grad_norm": 2.0148942470550537, "learning_rate": 0.0001, "loss": 0.0995, "step": 7790 }, { "epoch": 22.15909090909091, "grad_norm": 1.9254063367843628, "learning_rate": 0.0001, "loss": 0.1026, "step": 7800 }, { "epoch": 22.1875, "grad_norm": 1.7932246923446655, "learning_rate": 0.0001, "loss": 0.1009, "step": 7810 }, { "epoch": 22.21590909090909, "grad_norm": 2.0023248195648193, "learning_rate": 0.0001, "loss": 0.1006, "step": 7820 }, { "epoch": 22.244318181818183, "grad_norm": 2.1428818702697754, "learning_rate": 0.0001, "loss": 0.0965, "step": 7830 }, { "epoch": 22.272727272727273, "grad_norm": 2.021170139312744, "learning_rate": 0.0001, "loss": 0.1, "step": 7840 }, { "epoch": 22.301136363636363, "grad_norm": 1.9330023527145386, "learning_rate": 0.0001, "loss": 0.0981, "step": 7850 }, { "epoch": 22.329545454545453, "grad_norm": 2.1269373893737793, "learning_rate": 0.0001, "loss": 0.096, "step": 7860 }, { "epoch": 22.357954545454547, "grad_norm": 2.0002894401550293, "learning_rate": 0.0001, "loss": 0.0983, "step": 7870 }, { "epoch": 22.386363636363637, "grad_norm": 1.7350810766220093, "learning_rate": 0.0001, "loss": 0.0979, "step": 7880 }, { "epoch": 22.414772727272727, "grad_norm": 2.5721471309661865, "learning_rate": 0.0001, "loss": 0.096, "step": 7890 }, { "epoch": 22.443181818181817, "grad_norm": 2.0510294437408447, "learning_rate": 0.0001, "loss": 0.0967, "step": 7900 }, { "epoch": 22.47159090909091, "grad_norm": 1.7467889785766602, "learning_rate": 0.0001, "loss": 0.1012, "step": 7910 }, { "epoch": 22.5, "grad_norm": 1.881221055984497, "learning_rate": 0.0001, "loss": 0.0977, "step": 7920 }, { "epoch": 22.52840909090909, "grad_norm": 1.6960866451263428, "learning_rate": 0.0001, "loss": 0.1013, "step": 7930 }, { "epoch": 22.556818181818183, "grad_norm": 1.9011884927749634, "learning_rate": 0.0001, "loss": 0.0998, "step": 7940 }, { "epoch": 22.585227272727273, "grad_norm": 2.289515972137451, "learning_rate": 0.0001, "loss": 0.1015, "step": 7950 }, { "epoch": 22.613636363636363, "grad_norm": 2.148452043533325, "learning_rate": 0.0001, "loss": 0.098, "step": 7960 }, { "epoch": 22.642045454545453, "grad_norm": 2.1038877964019775, "learning_rate": 0.0001, "loss": 0.0994, "step": 7970 }, { "epoch": 22.670454545454547, "grad_norm": 2.1400623321533203, "learning_rate": 0.0001, "loss": 0.0948, "step": 7980 }, { "epoch": 22.698863636363637, "grad_norm": 1.864814043045044, "learning_rate": 0.0001, "loss": 0.0917, "step": 7990 }, { "epoch": 22.727272727272727, "grad_norm": 1.9961179494857788, "learning_rate": 0.0001, "loss": 0.0962, "step": 8000 }, { "epoch": 22.755681818181817, "grad_norm": 1.9807307720184326, "learning_rate": 0.0001, "loss": 0.0954, "step": 8010 }, { "epoch": 22.78409090909091, "grad_norm": 1.8741666078567505, "learning_rate": 0.0001, "loss": 0.0944, "step": 8020 }, { "epoch": 22.8125, "grad_norm": 2.272317409515381, "learning_rate": 0.0001, "loss": 0.1037, "step": 8030 }, { "epoch": 22.84090909090909, "grad_norm": 1.9558086395263672, "learning_rate": 0.0001, "loss": 0.0963, "step": 8040 }, { "epoch": 22.869318181818183, "grad_norm": 2.009176731109619, "learning_rate": 0.0001, "loss": 0.1018, "step": 8050 }, { "epoch": 22.897727272727273, "grad_norm": 2.2107458114624023, "learning_rate": 0.0001, "loss": 0.1025, "step": 8060 }, { "epoch": 22.926136363636363, "grad_norm": 2.5531365871429443, "learning_rate": 0.0001, "loss": 0.1018, "step": 8070 }, { "epoch": 22.954545454545453, "grad_norm": 2.2472565174102783, "learning_rate": 0.0001, "loss": 0.0982, "step": 8080 }, { "epoch": 22.982954545454547, "grad_norm": 1.920031189918518, "learning_rate": 0.0001, "loss": 0.0943, "step": 8090 }, { "epoch": 23.011363636363637, "grad_norm": 1.6725058555603027, "learning_rate": 0.0001, "loss": 0.0951, "step": 8100 }, { "epoch": 23.039772727272727, "grad_norm": 2.308568239212036, "learning_rate": 0.0001, "loss": 0.092, "step": 8110 }, { "epoch": 23.068181818181817, "grad_norm": 1.8834666013717651, "learning_rate": 0.0001, "loss": 0.0927, "step": 8120 }, { "epoch": 23.09659090909091, "grad_norm": 1.8598517179489136, "learning_rate": 0.0001, "loss": 0.095, "step": 8130 }, { "epoch": 23.125, "grad_norm": 2.1915621757507324, "learning_rate": 0.0001, "loss": 0.0929, "step": 8140 }, { "epoch": 23.15340909090909, "grad_norm": 2.160149335861206, "learning_rate": 0.0001, "loss": 0.0964, "step": 8150 }, { "epoch": 23.181818181818183, "grad_norm": 1.9698961973190308, "learning_rate": 0.0001, "loss": 0.0996, "step": 8160 }, { "epoch": 23.210227272727273, "grad_norm": 1.9553509950637817, "learning_rate": 0.0001, "loss": 0.0948, "step": 8170 }, { "epoch": 23.238636363636363, "grad_norm": 1.9348289966583252, "learning_rate": 0.0001, "loss": 0.0945, "step": 8180 }, { "epoch": 23.267045454545453, "grad_norm": 2.053300142288208, "learning_rate": 0.0001, "loss": 0.0953, "step": 8190 }, { "epoch": 23.295454545454547, "grad_norm": 1.8271958827972412, "learning_rate": 0.0001, "loss": 0.092, "step": 8200 }, { "epoch": 23.323863636363637, "grad_norm": 1.5689889192581177, "learning_rate": 0.0001, "loss": 0.1003, "step": 8210 }, { "epoch": 23.352272727272727, "grad_norm": 2.1360859870910645, "learning_rate": 0.0001, "loss": 0.0956, "step": 8220 }, { "epoch": 23.380681818181817, "grad_norm": 1.819110631942749, "learning_rate": 0.0001, "loss": 0.0964, "step": 8230 }, { "epoch": 23.40909090909091, "grad_norm": 2.107375383377075, "learning_rate": 0.0001, "loss": 0.0967, "step": 8240 }, { "epoch": 23.4375, "grad_norm": 1.8408470153808594, "learning_rate": 0.0001, "loss": 0.0929, "step": 8250 }, { "epoch": 23.46590909090909, "grad_norm": 1.9134175777435303, "learning_rate": 0.0001, "loss": 0.0956, "step": 8260 }, { "epoch": 23.494318181818183, "grad_norm": 1.71891188621521, "learning_rate": 0.0001, "loss": 0.0939, "step": 8270 }, { "epoch": 23.522727272727273, "grad_norm": 1.5312421321868896, "learning_rate": 0.0001, "loss": 0.0987, "step": 8280 }, { "epoch": 23.551136363636363, "grad_norm": 1.7557247877120972, "learning_rate": 0.0001, "loss": 0.0975, "step": 8290 }, { "epoch": 23.579545454545453, "grad_norm": 1.8017261028289795, "learning_rate": 0.0001, "loss": 0.0974, "step": 8300 }, { "epoch": 23.607954545454547, "grad_norm": 1.5881474018096924, "learning_rate": 0.0001, "loss": 0.0987, "step": 8310 }, { "epoch": 23.636363636363637, "grad_norm": 1.8395788669586182, "learning_rate": 0.0001, "loss": 0.1008, "step": 8320 }, { "epoch": 23.664772727272727, "grad_norm": 1.791631817817688, "learning_rate": 0.0001, "loss": 0.0983, "step": 8330 }, { "epoch": 23.693181818181817, "grad_norm": 1.6137861013412476, "learning_rate": 0.0001, "loss": 0.0982, "step": 8340 }, { "epoch": 23.72159090909091, "grad_norm": 1.9976779222488403, "learning_rate": 0.0001, "loss": 0.1021, "step": 8350 }, { "epoch": 23.75, "grad_norm": 1.667160987854004, "learning_rate": 0.0001, "loss": 0.0982, "step": 8360 }, { "epoch": 23.77840909090909, "grad_norm": 1.5434305667877197, "learning_rate": 0.0001, "loss": 0.0967, "step": 8370 }, { "epoch": 23.806818181818183, "grad_norm": 1.8221416473388672, "learning_rate": 0.0001, "loss": 0.0971, "step": 8380 }, { "epoch": 23.835227272727273, "grad_norm": 1.9259772300720215, "learning_rate": 0.0001, "loss": 0.0969, "step": 8390 }, { "epoch": 23.863636363636363, "grad_norm": 1.9943630695343018, "learning_rate": 0.0001, "loss": 0.0993, "step": 8400 }, { "epoch": 23.892045454545453, "grad_norm": 1.5301824808120728, "learning_rate": 0.0001, "loss": 0.0944, "step": 8410 }, { "epoch": 23.920454545454547, "grad_norm": 2.062227964401245, "learning_rate": 0.0001, "loss": 0.0949, "step": 8420 }, { "epoch": 23.948863636363637, "grad_norm": 1.7410181760787964, "learning_rate": 0.0001, "loss": 0.0975, "step": 8430 }, { "epoch": 23.977272727272727, "grad_norm": 1.7448116540908813, "learning_rate": 0.0001, "loss": 0.0941, "step": 8440 }, { "epoch": 24.005681818181817, "grad_norm": 2.3489348888397217, "learning_rate": 0.0001, "loss": 0.0946, "step": 8450 }, { "epoch": 24.03409090909091, "grad_norm": 2.110835075378418, "learning_rate": 0.0001, "loss": 0.0929, "step": 8460 }, { "epoch": 24.0625, "grad_norm": 2.4186344146728516, "learning_rate": 0.0001, "loss": 0.0982, "step": 8470 }, { "epoch": 24.09090909090909, "grad_norm": 1.9502896070480347, "learning_rate": 0.0001, "loss": 0.0969, "step": 8480 }, { "epoch": 24.119318181818183, "grad_norm": 1.9351022243499756, "learning_rate": 0.0001, "loss": 0.094, "step": 8490 }, { "epoch": 24.147727272727273, "grad_norm": 1.8484196662902832, "learning_rate": 0.0001, "loss": 0.0935, "step": 8500 }, { "epoch": 24.176136363636363, "grad_norm": 1.8879474401474, "learning_rate": 0.0001, "loss": 0.0947, "step": 8510 }, { "epoch": 24.204545454545453, "grad_norm": 2.4570751190185547, "learning_rate": 0.0001, "loss": 0.0918, "step": 8520 }, { "epoch": 24.232954545454547, "grad_norm": 2.6654608249664307, "learning_rate": 0.0001, "loss": 0.0977, "step": 8530 }, { "epoch": 24.261363636363637, "grad_norm": 2.244088888168335, "learning_rate": 0.0001, "loss": 0.0935, "step": 8540 }, { "epoch": 24.289772727272727, "grad_norm": 2.7572576999664307, "learning_rate": 0.0001, "loss": 0.0956, "step": 8550 }, { "epoch": 24.318181818181817, "grad_norm": 2.1149368286132812, "learning_rate": 0.0001, "loss": 0.0929, "step": 8560 }, { "epoch": 24.34659090909091, "grad_norm": 1.9651392698287964, "learning_rate": 0.0001, "loss": 0.093, "step": 8570 }, { "epoch": 24.375, "grad_norm": 2.118886947631836, "learning_rate": 0.0001, "loss": 0.0918, "step": 8580 }, { "epoch": 24.40340909090909, "grad_norm": 1.857898235321045, "learning_rate": 0.0001, "loss": 0.0912, "step": 8590 }, { "epoch": 24.431818181818183, "grad_norm": 1.8843599557876587, "learning_rate": 0.0001, "loss": 0.0923, "step": 8600 }, { "epoch": 24.460227272727273, "grad_norm": 1.8303879499435425, "learning_rate": 0.0001, "loss": 0.0916, "step": 8610 }, { "epoch": 24.488636363636363, "grad_norm": 2.0222115516662598, "learning_rate": 0.0001, "loss": 0.0917, "step": 8620 }, { "epoch": 24.517045454545453, "grad_norm": 1.773148536682129, "learning_rate": 0.0001, "loss": 0.0939, "step": 8630 }, { "epoch": 24.545454545454547, "grad_norm": 1.9186317920684814, "learning_rate": 0.0001, "loss": 0.09, "step": 8640 }, { "epoch": 24.573863636363637, "grad_norm": 1.938623309135437, "learning_rate": 0.0001, "loss": 0.0935, "step": 8650 }, { "epoch": 24.602272727272727, "grad_norm": 2.09529972076416, "learning_rate": 0.0001, "loss": 0.0936, "step": 8660 }, { "epoch": 24.630681818181817, "grad_norm": 2.1375720500946045, "learning_rate": 0.0001, "loss": 0.0901, "step": 8670 }, { "epoch": 24.65909090909091, "grad_norm": 3.3729183673858643, "learning_rate": 0.0001, "loss": 0.0936, "step": 8680 }, { "epoch": 24.6875, "grad_norm": 2.765795946121216, "learning_rate": 0.0001, "loss": 0.0942, "step": 8690 }, { "epoch": 24.71590909090909, "grad_norm": 3.0943186283111572, "learning_rate": 0.0001, "loss": 0.0915, "step": 8700 }, { "epoch": 24.744318181818183, "grad_norm": 2.4649555683135986, "learning_rate": 0.0001, "loss": 0.0902, "step": 8710 }, { "epoch": 24.772727272727273, "grad_norm": 2.455824851989746, "learning_rate": 0.0001, "loss": 0.0953, "step": 8720 }, { "epoch": 24.801136363636363, "grad_norm": 2.0996339321136475, "learning_rate": 0.0001, "loss": 0.0949, "step": 8730 }, { "epoch": 24.829545454545453, "grad_norm": 2.2499396800994873, "learning_rate": 0.0001, "loss": 0.0915, "step": 8740 }, { "epoch": 24.857954545454547, "grad_norm": 1.920745611190796, "learning_rate": 0.0001, "loss": 0.09, "step": 8750 }, { "epoch": 24.886363636363637, "grad_norm": 1.906348705291748, "learning_rate": 0.0001, "loss": 0.0921, "step": 8760 }, { "epoch": 24.914772727272727, "grad_norm": 1.6197078227996826, "learning_rate": 0.0001, "loss": 0.0889, "step": 8770 }, { "epoch": 24.943181818181817, "grad_norm": 1.6164134740829468, "learning_rate": 0.0001, "loss": 0.0907, "step": 8780 }, { "epoch": 24.97159090909091, "grad_norm": 1.7616385221481323, "learning_rate": 0.0001, "loss": 0.0928, "step": 8790 }, { "epoch": 25.0, "grad_norm": 1.5803983211517334, "learning_rate": 0.0001, "loss": 0.0926, "step": 8800 }, { "epoch": 25.02840909090909, "grad_norm": 1.6062462329864502, "learning_rate": 0.0001, "loss": 0.0961, "step": 8810 }, { "epoch": 25.056818181818183, "grad_norm": 1.5102510452270508, "learning_rate": 0.0001, "loss": 0.0917, "step": 8820 }, { "epoch": 25.085227272727273, "grad_norm": 1.6694464683532715, "learning_rate": 0.0001, "loss": 0.0946, "step": 8830 }, { "epoch": 25.113636363636363, "grad_norm": 1.6508196592330933, "learning_rate": 0.0001, "loss": 0.0926, "step": 8840 }, { "epoch": 25.142045454545453, "grad_norm": 1.7550101280212402, "learning_rate": 0.0001, "loss": 0.0912, "step": 8850 }, { "epoch": 25.170454545454547, "grad_norm": 1.6361439228057861, "learning_rate": 0.0001, "loss": 0.0986, "step": 8860 }, { "epoch": 25.198863636363637, "grad_norm": 1.810949683189392, "learning_rate": 0.0001, "loss": 0.0952, "step": 8870 }, { "epoch": 25.227272727272727, "grad_norm": 1.7442113161087036, "learning_rate": 0.0001, "loss": 0.0936, "step": 8880 }, { "epoch": 25.255681818181817, "grad_norm": 1.9462569952011108, "learning_rate": 0.0001, "loss": 0.0978, "step": 8890 }, { "epoch": 25.28409090909091, "grad_norm": 1.865937352180481, "learning_rate": 0.0001, "loss": 0.0949, "step": 8900 }, { "epoch": 25.3125, "grad_norm": 1.5846524238586426, "learning_rate": 0.0001, "loss": 0.0912, "step": 8910 }, { "epoch": 25.34090909090909, "grad_norm": 1.6086736917495728, "learning_rate": 0.0001, "loss": 0.0945, "step": 8920 }, { "epoch": 25.369318181818183, "grad_norm": 1.665158748626709, "learning_rate": 0.0001, "loss": 0.0895, "step": 8930 }, { "epoch": 25.397727272727273, "grad_norm": 1.7580013275146484, "learning_rate": 0.0001, "loss": 0.0888, "step": 8940 }, { "epoch": 25.426136363636363, "grad_norm": 1.765702247619629, "learning_rate": 0.0001, "loss": 0.0975, "step": 8950 }, { "epoch": 25.454545454545453, "grad_norm": 1.6462661027908325, "learning_rate": 0.0001, "loss": 0.0956, "step": 8960 }, { "epoch": 25.482954545454547, "grad_norm": 2.0281505584716797, "learning_rate": 0.0001, "loss": 0.0912, "step": 8970 }, { "epoch": 25.511363636363637, "grad_norm": 1.7537845373153687, "learning_rate": 0.0001, "loss": 0.0932, "step": 8980 }, { "epoch": 25.539772727272727, "grad_norm": 1.776159644126892, "learning_rate": 0.0001, "loss": 0.0967, "step": 8990 }, { "epoch": 25.568181818181817, "grad_norm": 1.6971244812011719, "learning_rate": 0.0001, "loss": 0.0924, "step": 9000 }, { "epoch": 25.59659090909091, "grad_norm": 1.4512749910354614, "learning_rate": 0.0001, "loss": 0.0897, "step": 9010 }, { "epoch": 25.625, "grad_norm": 1.7332180738449097, "learning_rate": 0.0001, "loss": 0.0923, "step": 9020 }, { "epoch": 25.65340909090909, "grad_norm": 1.9260343313217163, "learning_rate": 0.0001, "loss": 0.0945, "step": 9030 }, { "epoch": 25.681818181818183, "grad_norm": 1.56917405128479, "learning_rate": 0.0001, "loss": 0.0959, "step": 9040 }, { "epoch": 25.710227272727273, "grad_norm": 1.4904402494430542, "learning_rate": 0.0001, "loss": 0.0961, "step": 9050 }, { "epoch": 25.738636363636363, "grad_norm": 1.7849690914154053, "learning_rate": 0.0001, "loss": 0.0987, "step": 9060 }, { "epoch": 25.767045454545453, "grad_norm": 1.63370943069458, "learning_rate": 0.0001, "loss": 0.0994, "step": 9070 }, { "epoch": 25.795454545454547, "grad_norm": 2.0049540996551514, "learning_rate": 0.0001, "loss": 0.0929, "step": 9080 }, { "epoch": 25.823863636363637, "grad_norm": 1.876146912574768, "learning_rate": 0.0001, "loss": 0.0967, "step": 9090 }, { "epoch": 25.852272727272727, "grad_norm": 1.7067279815673828, "learning_rate": 0.0001, "loss": 0.0968, "step": 9100 }, { "epoch": 25.880681818181817, "grad_norm": 1.7685781717300415, "learning_rate": 0.0001, "loss": 0.093, "step": 9110 }, { "epoch": 25.90909090909091, "grad_norm": 1.493255376815796, "learning_rate": 0.0001, "loss": 0.0905, "step": 9120 }, { "epoch": 25.9375, "grad_norm": 1.7333801984786987, "learning_rate": 0.0001, "loss": 0.0947, "step": 9130 }, { "epoch": 25.96590909090909, "grad_norm": 1.5893990993499756, "learning_rate": 0.0001, "loss": 0.092, "step": 9140 }, { "epoch": 25.994318181818183, "grad_norm": 1.9104375839233398, "learning_rate": 0.0001, "loss": 0.0963, "step": 9150 }, { "epoch": 26.022727272727273, "grad_norm": 1.5926457643508911, "learning_rate": 0.0001, "loss": 0.0939, "step": 9160 }, { "epoch": 26.051136363636363, "grad_norm": 1.5772978067398071, "learning_rate": 0.0001, "loss": 0.0872, "step": 9170 }, { "epoch": 26.079545454545453, "grad_norm": 1.5457425117492676, "learning_rate": 0.0001, "loss": 0.0927, "step": 9180 }, { "epoch": 26.107954545454547, "grad_norm": 1.6755262613296509, "learning_rate": 0.0001, "loss": 0.0954, "step": 9190 }, { "epoch": 26.136363636363637, "grad_norm": 1.461090326309204, "learning_rate": 0.0001, "loss": 0.0936, "step": 9200 }, { "epoch": 26.164772727272727, "grad_norm": 1.4528868198394775, "learning_rate": 0.0001, "loss": 0.0937, "step": 9210 }, { "epoch": 26.193181818181817, "grad_norm": 1.5315214395523071, "learning_rate": 0.0001, "loss": 0.1011, "step": 9220 }, { "epoch": 26.22159090909091, "grad_norm": 1.560640573501587, "learning_rate": 0.0001, "loss": 0.092, "step": 9230 }, { "epoch": 26.25, "grad_norm": 1.4106330871582031, "learning_rate": 0.0001, "loss": 0.0971, "step": 9240 }, { "epoch": 26.27840909090909, "grad_norm": 1.4047380685806274, "learning_rate": 0.0001, "loss": 0.0924, "step": 9250 }, { "epoch": 26.306818181818183, "grad_norm": 1.4862667322158813, "learning_rate": 0.0001, "loss": 0.0979, "step": 9260 }, { "epoch": 26.335227272727273, "grad_norm": 1.5880588293075562, "learning_rate": 0.0001, "loss": 0.0934, "step": 9270 }, { "epoch": 26.363636363636363, "grad_norm": 1.428328514099121, "learning_rate": 0.0001, "loss": 0.0971, "step": 9280 }, { "epoch": 26.392045454545453, "grad_norm": 1.5301146507263184, "learning_rate": 0.0001, "loss": 0.095, "step": 9290 }, { "epoch": 26.420454545454547, "grad_norm": 1.6134599447250366, "learning_rate": 0.0001, "loss": 0.0975, "step": 9300 }, { "epoch": 26.448863636363637, "grad_norm": 1.497191071510315, "learning_rate": 0.0001, "loss": 0.0948, "step": 9310 }, { "epoch": 26.477272727272727, "grad_norm": 1.7432132959365845, "learning_rate": 0.0001, "loss": 0.0956, "step": 9320 }, { "epoch": 26.505681818181817, "grad_norm": 1.3511826992034912, "learning_rate": 0.0001, "loss": 0.0919, "step": 9330 }, { "epoch": 26.53409090909091, "grad_norm": 1.5720796585083008, "learning_rate": 0.0001, "loss": 0.0936, "step": 9340 }, { "epoch": 26.5625, "grad_norm": 1.5396133661270142, "learning_rate": 0.0001, "loss": 0.0959, "step": 9350 }, { "epoch": 26.59090909090909, "grad_norm": 1.6743911504745483, "learning_rate": 0.0001, "loss": 0.0936, "step": 9360 }, { "epoch": 26.619318181818183, "grad_norm": 1.85866117477417, "learning_rate": 0.0001, "loss": 0.0995, "step": 9370 }, { "epoch": 26.647727272727273, "grad_norm": 1.370617151260376, "learning_rate": 0.0001, "loss": 0.0962, "step": 9380 }, { "epoch": 26.676136363636363, "grad_norm": 1.953228235244751, "learning_rate": 0.0001, "loss": 0.0981, "step": 9390 }, { "epoch": 26.704545454545453, "grad_norm": 2.1395270824432373, "learning_rate": 0.0001, "loss": 0.094, "step": 9400 }, { "epoch": 26.732954545454547, "grad_norm": 2.1154062747955322, "learning_rate": 0.0001, "loss": 0.0969, "step": 9410 }, { "epoch": 26.761363636363637, "grad_norm": 1.7266603708267212, "learning_rate": 0.0001, "loss": 0.094, "step": 9420 }, { "epoch": 26.789772727272727, "grad_norm": 1.8902325630187988, "learning_rate": 0.0001, "loss": 0.0912, "step": 9430 }, { "epoch": 26.818181818181817, "grad_norm": 1.7739678621292114, "learning_rate": 0.0001, "loss": 0.0894, "step": 9440 }, { "epoch": 26.84659090909091, "grad_norm": 1.9259507656097412, "learning_rate": 0.0001, "loss": 0.0915, "step": 9450 }, { "epoch": 26.875, "grad_norm": 1.898050308227539, "learning_rate": 0.0001, "loss": 0.0892, "step": 9460 }, { "epoch": 26.90340909090909, "grad_norm": 1.8099193572998047, "learning_rate": 0.0001, "loss": 0.09, "step": 9470 }, { "epoch": 26.931818181818183, "grad_norm": 1.7650624513626099, "learning_rate": 0.0001, "loss": 0.0917, "step": 9480 }, { "epoch": 26.960227272727273, "grad_norm": 1.813428521156311, "learning_rate": 0.0001, "loss": 0.085, "step": 9490 }, { "epoch": 26.988636363636363, "grad_norm": 1.67322838306427, "learning_rate": 0.0001, "loss": 0.0919, "step": 9500 }, { "epoch": 27.017045454545453, "grad_norm": 1.668229103088379, "learning_rate": 0.0001, "loss": 0.0889, "step": 9510 }, { "epoch": 27.045454545454547, "grad_norm": 1.6641284227371216, "learning_rate": 0.0001, "loss": 0.0871, "step": 9520 }, { "epoch": 27.073863636363637, "grad_norm": 1.5563348531723022, "learning_rate": 0.0001, "loss": 0.0856, "step": 9530 }, { "epoch": 27.102272727272727, "grad_norm": 1.72633957862854, "learning_rate": 0.0001, "loss": 0.0898, "step": 9540 }, { "epoch": 27.130681818181817, "grad_norm": 1.5098172426223755, "learning_rate": 0.0001, "loss": 0.0893, "step": 9550 }, { "epoch": 27.15909090909091, "grad_norm": 1.7383455038070679, "learning_rate": 0.0001, "loss": 0.0859, "step": 9560 }, { "epoch": 27.1875, "grad_norm": 1.732844352722168, "learning_rate": 0.0001, "loss": 0.0954, "step": 9570 }, { "epoch": 27.21590909090909, "grad_norm": 1.5359463691711426, "learning_rate": 0.0001, "loss": 0.09, "step": 9580 }, { "epoch": 27.244318181818183, "grad_norm": 1.5415890216827393, "learning_rate": 0.0001, "loss": 0.0947, "step": 9590 }, { "epoch": 27.272727272727273, "grad_norm": 1.702905297279358, "learning_rate": 0.0001, "loss": 0.0868, "step": 9600 }, { "epoch": 27.301136363636363, "grad_norm": 1.5514419078826904, "learning_rate": 0.0001, "loss": 0.0931, "step": 9610 }, { "epoch": 27.329545454545453, "grad_norm": 1.6951237916946411, "learning_rate": 0.0001, "loss": 0.0946, "step": 9620 }, { "epoch": 27.357954545454547, "grad_norm": 1.726776123046875, "learning_rate": 0.0001, "loss": 0.0914, "step": 9630 }, { "epoch": 27.386363636363637, "grad_norm": 1.5069643259048462, "learning_rate": 0.0001, "loss": 0.0955, "step": 9640 }, { "epoch": 27.414772727272727, "grad_norm": 1.468774676322937, "learning_rate": 0.0001, "loss": 0.0895, "step": 9650 }, { "epoch": 27.443181818181817, "grad_norm": 1.7724437713623047, "learning_rate": 0.0001, "loss": 0.0914, "step": 9660 }, { "epoch": 27.47159090909091, "grad_norm": 1.3602412939071655, "learning_rate": 0.0001, "loss": 0.0851, "step": 9670 }, { "epoch": 27.5, "grad_norm": 1.5905205011367798, "learning_rate": 0.0001, "loss": 0.0892, "step": 9680 }, { "epoch": 27.52840909090909, "grad_norm": 1.389020562171936, "learning_rate": 0.0001, "loss": 0.0882, "step": 9690 }, { "epoch": 27.556818181818183, "grad_norm": 1.7958135604858398, "learning_rate": 0.0001, "loss": 0.0896, "step": 9700 }, { "epoch": 27.585227272727273, "grad_norm": 1.629370093345642, "learning_rate": 0.0001, "loss": 0.0945, "step": 9710 }, { "epoch": 27.613636363636363, "grad_norm": 1.8372656106948853, "learning_rate": 0.0001, "loss": 0.0917, "step": 9720 }, { "epoch": 27.642045454545453, "grad_norm": 2.0454485416412354, "learning_rate": 0.0001, "loss": 0.0911, "step": 9730 }, { "epoch": 27.670454545454547, "grad_norm": 1.712260365486145, "learning_rate": 0.0001, "loss": 0.0974, "step": 9740 }, { "epoch": 27.698863636363637, "grad_norm": 1.8884317874908447, "learning_rate": 0.0001, "loss": 0.0891, "step": 9750 }, { "epoch": 27.727272727272727, "grad_norm": 1.5852235555648804, "learning_rate": 0.0001, "loss": 0.0898, "step": 9760 }, { "epoch": 27.755681818181817, "grad_norm": 1.4623692035675049, "learning_rate": 0.0001, "loss": 0.0866, "step": 9770 }, { "epoch": 27.78409090909091, "grad_norm": 1.6662063598632812, "learning_rate": 0.0001, "loss": 0.0875, "step": 9780 }, { "epoch": 27.8125, "grad_norm": 1.5903691053390503, "learning_rate": 0.0001, "loss": 0.0898, "step": 9790 }, { "epoch": 27.84090909090909, "grad_norm": 1.575780987739563, "learning_rate": 0.0001, "loss": 0.0918, "step": 9800 }, { "epoch": 27.869318181818183, "grad_norm": 1.4641938209533691, "learning_rate": 0.0001, "loss": 0.0865, "step": 9810 }, { "epoch": 27.897727272727273, "grad_norm": 1.3061418533325195, "learning_rate": 0.0001, "loss": 0.0837, "step": 9820 }, { "epoch": 27.926136363636363, "grad_norm": 1.5644803047180176, "learning_rate": 0.0001, "loss": 0.0894, "step": 9830 }, { "epoch": 27.954545454545453, "grad_norm": 1.9139891862869263, "learning_rate": 0.0001, "loss": 0.0925, "step": 9840 }, { "epoch": 27.982954545454547, "grad_norm": 1.4878668785095215, "learning_rate": 0.0001, "loss": 0.0892, "step": 9850 }, { "epoch": 28.011363636363637, "grad_norm": 1.41493821144104, "learning_rate": 0.0001, "loss": 0.0914, "step": 9860 }, { "epoch": 28.039772727272727, "grad_norm": 1.5278867483139038, "learning_rate": 0.0001, "loss": 0.0875, "step": 9870 }, { "epoch": 28.068181818181817, "grad_norm": 1.3877768516540527, "learning_rate": 0.0001, "loss": 0.0914, "step": 9880 }, { "epoch": 28.09659090909091, "grad_norm": 1.3210440874099731, "learning_rate": 0.0001, "loss": 0.0854, "step": 9890 }, { "epoch": 28.125, "grad_norm": 1.3031365871429443, "learning_rate": 0.0001, "loss": 0.0854, "step": 9900 }, { "epoch": 28.15340909090909, "grad_norm": 1.334957242012024, "learning_rate": 0.0001, "loss": 0.0926, "step": 9910 }, { "epoch": 28.181818181818183, "grad_norm": 1.3106921911239624, "learning_rate": 0.0001, "loss": 0.0938, "step": 9920 }, { "epoch": 28.210227272727273, "grad_norm": 1.9076578617095947, "learning_rate": 0.0001, "loss": 0.0869, "step": 9930 }, { "epoch": 28.238636363636363, "grad_norm": 1.8114360570907593, "learning_rate": 0.0001, "loss": 0.0923, "step": 9940 }, { "epoch": 28.267045454545453, "grad_norm": 1.475202202796936, "learning_rate": 0.0001, "loss": 0.0942, "step": 9950 }, { "epoch": 28.295454545454547, "grad_norm": 1.6145161390304565, "learning_rate": 0.0001, "loss": 0.0842, "step": 9960 }, { "epoch": 28.323863636363637, "grad_norm": 1.5492805242538452, "learning_rate": 0.0001, "loss": 0.0866, "step": 9970 }, { "epoch": 28.352272727272727, "grad_norm": 1.6605106592178345, "learning_rate": 0.0001, "loss": 0.0917, "step": 9980 }, { "epoch": 28.380681818181817, "grad_norm": 1.6075584888458252, "learning_rate": 0.0001, "loss": 0.0888, "step": 9990 }, { "epoch": 28.40909090909091, "grad_norm": 1.3253341913223267, "learning_rate": 0.0001, "loss": 0.0883, "step": 10000 }, { "epoch": 28.4375, "grad_norm": 1.5245485305786133, "learning_rate": 0.0001, "loss": 0.0899, "step": 10010 }, { "epoch": 28.46590909090909, "grad_norm": 1.7123736143112183, "learning_rate": 0.0001, "loss": 0.0868, "step": 10020 }, { "epoch": 28.494318181818183, "grad_norm": 1.572593092918396, "learning_rate": 0.0001, "loss": 0.0865, "step": 10030 }, { "epoch": 28.522727272727273, "grad_norm": 1.693306565284729, "learning_rate": 0.0001, "loss": 0.0904, "step": 10040 }, { "epoch": 28.551136363636363, "grad_norm": 1.8397178649902344, "learning_rate": 0.0001, "loss": 0.0898, "step": 10050 }, { "epoch": 28.579545454545453, "grad_norm": 1.6443665027618408, "learning_rate": 0.0001, "loss": 0.0911, "step": 10060 }, { "epoch": 28.607954545454547, "grad_norm": 1.5029046535491943, "learning_rate": 0.0001, "loss": 0.0918, "step": 10070 }, { "epoch": 28.636363636363637, "grad_norm": 1.4545835256576538, "learning_rate": 0.0001, "loss": 0.0851, "step": 10080 }, { "epoch": 28.664772727272727, "grad_norm": 1.6282371282577515, "learning_rate": 0.0001, "loss": 0.0879, "step": 10090 }, { "epoch": 28.693181818181817, "grad_norm": 1.659529209136963, "learning_rate": 0.0001, "loss": 0.0881, "step": 10100 }, { "epoch": 28.72159090909091, "grad_norm": 1.797834873199463, "learning_rate": 0.0001, "loss": 0.0919, "step": 10110 }, { "epoch": 28.75, "grad_norm": 1.6592271327972412, "learning_rate": 0.0001, "loss": 0.0833, "step": 10120 }, { "epoch": 28.77840909090909, "grad_norm": 1.4718973636627197, "learning_rate": 0.0001, "loss": 0.0918, "step": 10130 }, { "epoch": 28.806818181818183, "grad_norm": 1.2769532203674316, "learning_rate": 0.0001, "loss": 0.0915, "step": 10140 }, { "epoch": 28.835227272727273, "grad_norm": 1.3063241243362427, "learning_rate": 0.0001, "loss": 0.0856, "step": 10150 }, { "epoch": 28.863636363636363, "grad_norm": 1.497151494026184, "learning_rate": 0.0001, "loss": 0.0927, "step": 10160 }, { "epoch": 28.892045454545453, "grad_norm": 1.538161277770996, "learning_rate": 0.0001, "loss": 0.0849, "step": 10170 }, { "epoch": 28.920454545454547, "grad_norm": 1.5118201971054077, "learning_rate": 0.0001, "loss": 0.0891, "step": 10180 }, { "epoch": 28.948863636363637, "grad_norm": 1.5277782678604126, "learning_rate": 0.0001, "loss": 0.0872, "step": 10190 }, { "epoch": 28.977272727272727, "grad_norm": 1.4347714185714722, "learning_rate": 0.0001, "loss": 0.0868, "step": 10200 }, { "epoch": 29.005681818181817, "grad_norm": 1.3337539434432983, "learning_rate": 0.0001, "loss": 0.0872, "step": 10210 }, { "epoch": 29.03409090909091, "grad_norm": 1.6862537860870361, "learning_rate": 0.0001, "loss": 0.0841, "step": 10220 }, { "epoch": 29.0625, "grad_norm": 1.4856092929840088, "learning_rate": 0.0001, "loss": 0.0844, "step": 10230 }, { "epoch": 29.09090909090909, "grad_norm": 1.6301956176757812, "learning_rate": 0.0001, "loss": 0.0855, "step": 10240 }, { "epoch": 29.119318181818183, "grad_norm": 1.4501256942749023, "learning_rate": 0.0001, "loss": 0.0866, "step": 10250 }, { "epoch": 29.147727272727273, "grad_norm": 1.4976732730865479, "learning_rate": 0.0001, "loss": 0.0871, "step": 10260 }, { "epoch": 29.176136363636363, "grad_norm": 1.5367155075073242, "learning_rate": 0.0001, "loss": 0.083, "step": 10270 }, { "epoch": 29.204545454545453, "grad_norm": 1.4822138547897339, "learning_rate": 0.0001, "loss": 0.0877, "step": 10280 }, { "epoch": 29.232954545454547, "grad_norm": 1.3127697706222534, "learning_rate": 0.0001, "loss": 0.0813, "step": 10290 }, { "epoch": 29.261363636363637, "grad_norm": 1.4777271747589111, "learning_rate": 0.0001, "loss": 0.0886, "step": 10300 }, { "epoch": 29.289772727272727, "grad_norm": 1.4947670698165894, "learning_rate": 0.0001, "loss": 0.0887, "step": 10310 }, { "epoch": 29.318181818181817, "grad_norm": 1.7451188564300537, "learning_rate": 0.0001, "loss": 0.0892, "step": 10320 }, { "epoch": 29.34659090909091, "grad_norm": 1.5838991403579712, "learning_rate": 0.0001, "loss": 0.0867, "step": 10330 }, { "epoch": 29.375, "grad_norm": 1.4703574180603027, "learning_rate": 0.0001, "loss": 0.0879, "step": 10340 }, { "epoch": 29.40340909090909, "grad_norm": 1.335748553276062, "learning_rate": 0.0001, "loss": 0.0838, "step": 10350 }, { "epoch": 29.431818181818183, "grad_norm": 1.5957832336425781, "learning_rate": 0.0001, "loss": 0.0849, "step": 10360 }, { "epoch": 29.460227272727273, "grad_norm": 1.7073551416397095, "learning_rate": 0.0001, "loss": 0.0866, "step": 10370 }, { "epoch": 29.488636363636363, "grad_norm": 1.4526339769363403, "learning_rate": 0.0001, "loss": 0.0864, "step": 10380 }, { "epoch": 29.517045454545453, "grad_norm": 1.439193606376648, "learning_rate": 0.0001, "loss": 0.0876, "step": 10390 }, { "epoch": 29.545454545454547, "grad_norm": 1.460564136505127, "learning_rate": 0.0001, "loss": 0.0871, "step": 10400 }, { "epoch": 29.573863636363637, "grad_norm": 1.3847678899765015, "learning_rate": 0.0001, "loss": 0.0813, "step": 10410 }, { "epoch": 29.602272727272727, "grad_norm": 1.4355100393295288, "learning_rate": 0.0001, "loss": 0.084, "step": 10420 }, { "epoch": 29.630681818181817, "grad_norm": 1.5265635251998901, "learning_rate": 0.0001, "loss": 0.085, "step": 10430 }, { "epoch": 29.65909090909091, "grad_norm": 1.5522079467773438, "learning_rate": 0.0001, "loss": 0.0898, "step": 10440 }, { "epoch": 29.6875, "grad_norm": 1.5721166133880615, "learning_rate": 0.0001, "loss": 0.0882, "step": 10450 }, { "epoch": 29.71590909090909, "grad_norm": 1.7917033433914185, "learning_rate": 0.0001, "loss": 0.0908, "step": 10460 }, { "epoch": 29.744318181818183, "grad_norm": 1.5711758136749268, "learning_rate": 0.0001, "loss": 0.085, "step": 10470 }, { "epoch": 29.772727272727273, "grad_norm": 1.7206867933273315, "learning_rate": 0.0001, "loss": 0.0861, "step": 10480 }, { "epoch": 29.801136363636363, "grad_norm": 2.0375149250030518, "learning_rate": 0.0001, "loss": 0.0908, "step": 10490 }, { "epoch": 29.829545454545453, "grad_norm": 1.7295266389846802, "learning_rate": 0.0001, "loss": 0.0853, "step": 10500 }, { "epoch": 29.857954545454547, "grad_norm": 1.5999189615249634, "learning_rate": 0.0001, "loss": 0.0831, "step": 10510 }, { "epoch": 29.886363636363637, "grad_norm": 1.6992350816726685, "learning_rate": 0.0001, "loss": 0.0891, "step": 10520 }, { "epoch": 29.914772727272727, "grad_norm": 1.819216012954712, "learning_rate": 0.0001, "loss": 0.0801, "step": 10530 }, { "epoch": 29.943181818181817, "grad_norm": 1.4884485006332397, "learning_rate": 0.0001, "loss": 0.0881, "step": 10540 }, { "epoch": 29.97159090909091, "grad_norm": 1.6694735288619995, "learning_rate": 0.0001, "loss": 0.0832, "step": 10550 }, { "epoch": 30.0, "grad_norm": 1.6155649423599243, "learning_rate": 0.0001, "loss": 0.0838, "step": 10560 }, { "epoch": 30.02840909090909, "grad_norm": 1.3682494163513184, "learning_rate": 0.0001, "loss": 0.081, "step": 10570 }, { "epoch": 30.056818181818183, "grad_norm": 1.2710999250411987, "learning_rate": 0.0001, "loss": 0.0797, "step": 10580 }, { "epoch": 30.085227272727273, "grad_norm": 1.2718095779418945, "learning_rate": 0.0001, "loss": 0.0838, "step": 10590 }, { "epoch": 30.113636363636363, "grad_norm": 1.4790953397750854, "learning_rate": 0.0001, "loss": 0.0832, "step": 10600 }, { "epoch": 30.142045454545453, "grad_norm": 4.220450401306152, "learning_rate": 0.0001, "loss": 0.0899, "step": 10610 }, { "epoch": 30.170454545454547, "grad_norm": 3.7375528812408447, "learning_rate": 0.0001, "loss": 0.0921, "step": 10620 }, { "epoch": 30.198863636363637, "grad_norm": 2.229771614074707, "learning_rate": 0.0001, "loss": 0.0843, "step": 10630 }, { "epoch": 30.227272727272727, "grad_norm": 1.9127345085144043, "learning_rate": 0.0001, "loss": 0.0843, "step": 10640 }, { "epoch": 30.255681818181817, "grad_norm": 1.7945572137832642, "learning_rate": 0.0001, "loss": 0.0855, "step": 10650 }, { "epoch": 30.28409090909091, "grad_norm": 2.2238516807556152, "learning_rate": 0.0001, "loss": 0.08, "step": 10660 }, { "epoch": 30.3125, "grad_norm": 2.0408074855804443, "learning_rate": 0.0001, "loss": 0.0812, "step": 10670 }, { "epoch": 30.34090909090909, "grad_norm": 1.6794369220733643, "learning_rate": 0.0001, "loss": 0.0808, "step": 10680 }, { "epoch": 30.369318181818183, "grad_norm": 1.8030424118041992, "learning_rate": 0.0001, "loss": 0.0819, "step": 10690 }, { "epoch": 30.397727272727273, "grad_norm": 1.8233095407485962, "learning_rate": 0.0001, "loss": 0.0819, "step": 10700 }, { "epoch": 30.426136363636363, "grad_norm": 1.6274789571762085, "learning_rate": 0.0001, "loss": 0.0806, "step": 10710 }, { "epoch": 30.454545454545453, "grad_norm": 1.6231640577316284, "learning_rate": 0.0001, "loss": 0.0777, "step": 10720 }, { "epoch": 30.482954545454547, "grad_norm": 1.6162152290344238, "learning_rate": 0.0001, "loss": 0.0864, "step": 10730 }, { "epoch": 30.511363636363637, "grad_norm": 1.9388537406921387, "learning_rate": 0.0001, "loss": 0.0833, "step": 10740 }, { "epoch": 30.539772727272727, "grad_norm": 1.3377724885940552, "learning_rate": 0.0001, "loss": 0.0836, "step": 10750 }, { "epoch": 30.568181818181817, "grad_norm": 1.4415621757507324, "learning_rate": 0.0001, "loss": 0.0805, "step": 10760 }, { "epoch": 30.59659090909091, "grad_norm": 1.4234329462051392, "learning_rate": 0.0001, "loss": 0.088, "step": 10770 }, { "epoch": 30.625, "grad_norm": 1.5712944269180298, "learning_rate": 0.0001, "loss": 0.0866, "step": 10780 }, { "epoch": 30.65340909090909, "grad_norm": 1.3370726108551025, "learning_rate": 0.0001, "loss": 0.086, "step": 10790 }, { "epoch": 30.681818181818183, "grad_norm": 1.976441502571106, "learning_rate": 0.0001, "loss": 0.0894, "step": 10800 }, { "epoch": 30.710227272727273, "grad_norm": 1.6814191341400146, "learning_rate": 0.0001, "loss": 0.083, "step": 10810 }, { "epoch": 30.738636363636363, "grad_norm": 1.406453013420105, "learning_rate": 0.0001, "loss": 0.087, "step": 10820 }, { "epoch": 30.767045454545453, "grad_norm": 1.406148910522461, "learning_rate": 0.0001, "loss": 0.0879, "step": 10830 }, { "epoch": 30.795454545454547, "grad_norm": 1.2990154027938843, "learning_rate": 0.0001, "loss": 0.0826, "step": 10840 }, { "epoch": 30.823863636363637, "grad_norm": 1.3759732246398926, "learning_rate": 0.0001, "loss": 0.0826, "step": 10850 }, { "epoch": 30.852272727272727, "grad_norm": 1.6295632123947144, "learning_rate": 0.0001, "loss": 0.086, "step": 10860 }, { "epoch": 30.880681818181817, "grad_norm": 1.4913337230682373, "learning_rate": 0.0001, "loss": 0.0879, "step": 10870 }, { "epoch": 30.90909090909091, "grad_norm": 1.6488226652145386, "learning_rate": 0.0001, "loss": 0.0865, "step": 10880 }, { "epoch": 30.9375, "grad_norm": 1.4675461053848267, "learning_rate": 0.0001, "loss": 0.0819, "step": 10890 }, { "epoch": 30.96590909090909, "grad_norm": 1.4237877130508423, "learning_rate": 0.0001, "loss": 0.0878, "step": 10900 }, { "epoch": 30.994318181818183, "grad_norm": 1.673284888267517, "learning_rate": 0.0001, "loss": 0.0876, "step": 10910 }, { "epoch": 31.022727272727273, "grad_norm": 1.6682919263839722, "learning_rate": 0.0001, "loss": 0.0815, "step": 10920 }, { "epoch": 31.051136363636363, "grad_norm": 1.567307949066162, "learning_rate": 0.0001, "loss": 0.0914, "step": 10930 }, { "epoch": 31.079545454545453, "grad_norm": 1.646187424659729, "learning_rate": 0.0001, "loss": 0.0891, "step": 10940 }, { "epoch": 31.107954545454547, "grad_norm": 1.3649544715881348, "learning_rate": 0.0001, "loss": 0.0822, "step": 10950 }, { "epoch": 31.136363636363637, "grad_norm": 1.5282686948776245, "learning_rate": 0.0001, "loss": 0.0854, "step": 10960 }, { "epoch": 31.164772727272727, "grad_norm": 1.5806162357330322, "learning_rate": 0.0001, "loss": 0.0835, "step": 10970 }, { "epoch": 31.193181818181817, "grad_norm": 1.44295334815979, "learning_rate": 0.0001, "loss": 0.0872, "step": 10980 }, { "epoch": 31.22159090909091, "grad_norm": 1.354772925376892, "learning_rate": 0.0001, "loss": 0.0843, "step": 10990 }, { "epoch": 31.25, "grad_norm": 1.0891101360321045, "learning_rate": 0.0001, "loss": 0.0873, "step": 11000 }, { "epoch": 31.27840909090909, "grad_norm": 1.6048698425292969, "learning_rate": 0.0001, "loss": 0.0872, "step": 11010 }, { "epoch": 31.306818181818183, "grad_norm": 1.6361032724380493, "learning_rate": 0.0001, "loss": 0.084, "step": 11020 }, { "epoch": 31.335227272727273, "grad_norm": 1.4586684703826904, "learning_rate": 0.0001, "loss": 0.0855, "step": 11030 }, { "epoch": 31.363636363636363, "grad_norm": 1.440597653388977, "learning_rate": 0.0001, "loss": 0.0847, "step": 11040 }, { "epoch": 31.392045454545453, "grad_norm": 1.579797387123108, "learning_rate": 0.0001, "loss": 0.085, "step": 11050 }, { "epoch": 31.420454545454547, "grad_norm": 1.5352915525436401, "learning_rate": 0.0001, "loss": 0.0847, "step": 11060 }, { "epoch": 31.448863636363637, "grad_norm": 1.5840104818344116, "learning_rate": 0.0001, "loss": 0.0853, "step": 11070 }, { "epoch": 31.477272727272727, "grad_norm": 1.4220658540725708, "learning_rate": 0.0001, "loss": 0.0834, "step": 11080 }, { "epoch": 31.505681818181817, "grad_norm": 1.3997197151184082, "learning_rate": 0.0001, "loss": 0.0874, "step": 11090 }, { "epoch": 31.53409090909091, "grad_norm": 1.4210138320922852, "learning_rate": 0.0001, "loss": 0.0884, "step": 11100 }, { "epoch": 31.5625, "grad_norm": 1.6847612857818604, "learning_rate": 0.0001, "loss": 0.0881, "step": 11110 }, { "epoch": 31.59090909090909, "grad_norm": 1.4961141347885132, "learning_rate": 0.0001, "loss": 0.0891, "step": 11120 }, { "epoch": 31.619318181818183, "grad_norm": 1.3087717294692993, "learning_rate": 0.0001, "loss": 0.0857, "step": 11130 }, { "epoch": 31.647727272727273, "grad_norm": 1.3386777639389038, "learning_rate": 0.0001, "loss": 0.0828, "step": 11140 }, { "epoch": 31.676136363636363, "grad_norm": 1.3143402338027954, "learning_rate": 0.0001, "loss": 0.0859, "step": 11150 }, { "epoch": 31.704545454545453, "grad_norm": 2.002627372741699, "learning_rate": 0.0001, "loss": 0.0895, "step": 11160 }, { "epoch": 31.732954545454547, "grad_norm": 1.8589037656784058, "learning_rate": 0.0001, "loss": 0.0922, "step": 11170 }, { "epoch": 31.761363636363637, "grad_norm": 1.910962700843811, "learning_rate": 0.0001, "loss": 0.0927, "step": 11180 }, { "epoch": 31.789772727272727, "grad_norm": 1.631377100944519, "learning_rate": 0.0001, "loss": 0.091, "step": 11190 }, { "epoch": 31.818181818181817, "grad_norm": 1.6555157899856567, "learning_rate": 0.0001, "loss": 0.0869, "step": 11200 }, { "epoch": 31.84659090909091, "grad_norm": 1.6346405744552612, "learning_rate": 0.0001, "loss": 0.0883, "step": 11210 }, { "epoch": 31.875, "grad_norm": 1.2686203718185425, "learning_rate": 0.0001, "loss": 0.0856, "step": 11220 }, { "epoch": 31.90340909090909, "grad_norm": 1.062166690826416, "learning_rate": 0.0001, "loss": 0.0823, "step": 11230 }, { "epoch": 31.931818181818183, "grad_norm": 1.0907399654388428, "learning_rate": 0.0001, "loss": 0.0825, "step": 11240 }, { "epoch": 31.960227272727273, "grad_norm": 1.4261188507080078, "learning_rate": 0.0001, "loss": 0.0857, "step": 11250 }, { "epoch": 31.988636363636363, "grad_norm": 1.3329896926879883, "learning_rate": 0.0001, "loss": 0.083, "step": 11260 }, { "epoch": 32.01704545454545, "grad_norm": 1.3007625341415405, "learning_rate": 0.0001, "loss": 0.0843, "step": 11270 }, { "epoch": 32.04545454545455, "grad_norm": 1.3520420789718628, "learning_rate": 0.0001, "loss": 0.0834, "step": 11280 }, { "epoch": 32.07386363636363, "grad_norm": 1.2438174486160278, "learning_rate": 0.0001, "loss": 0.0852, "step": 11290 }, { "epoch": 32.10227272727273, "grad_norm": 1.422757863998413, "learning_rate": 0.0001, "loss": 0.0834, "step": 11300 }, { "epoch": 32.13068181818182, "grad_norm": 1.2228397130966187, "learning_rate": 0.0001, "loss": 0.0852, "step": 11310 }, { "epoch": 32.15909090909091, "grad_norm": 1.2606850862503052, "learning_rate": 0.0001, "loss": 0.0853, "step": 11320 }, { "epoch": 32.1875, "grad_norm": 1.2866079807281494, "learning_rate": 0.0001, "loss": 0.0834, "step": 11330 }, { "epoch": 32.21590909090909, "grad_norm": 1.2392500638961792, "learning_rate": 0.0001, "loss": 0.0808, "step": 11340 }, { "epoch": 32.24431818181818, "grad_norm": 1.3232767581939697, "learning_rate": 0.0001, "loss": 0.0842, "step": 11350 }, { "epoch": 32.27272727272727, "grad_norm": 1.2873938083648682, "learning_rate": 0.0001, "loss": 0.086, "step": 11360 }, { "epoch": 32.30113636363637, "grad_norm": 1.2557556629180908, "learning_rate": 0.0001, "loss": 0.0828, "step": 11370 }, { "epoch": 32.32954545454545, "grad_norm": 1.2549748420715332, "learning_rate": 0.0001, "loss": 0.0877, "step": 11380 }, { "epoch": 32.35795454545455, "grad_norm": 1.199981689453125, "learning_rate": 0.0001, "loss": 0.0876, "step": 11390 }, { "epoch": 32.38636363636363, "grad_norm": 1.204467535018921, "learning_rate": 0.0001, "loss": 0.0804, "step": 11400 }, { "epoch": 32.41477272727273, "grad_norm": 1.5204373598098755, "learning_rate": 0.0001, "loss": 0.087, "step": 11410 }, { "epoch": 32.44318181818182, "grad_norm": 1.773655652999878, "learning_rate": 0.0001, "loss": 0.0824, "step": 11420 }, { "epoch": 32.47159090909091, "grad_norm": 2.06709885597229, "learning_rate": 0.0001, "loss": 0.0848, "step": 11430 }, { "epoch": 32.5, "grad_norm": 1.8769580125808716, "learning_rate": 0.0001, "loss": 0.0848, "step": 11440 }, { "epoch": 32.52840909090909, "grad_norm": 1.8511193990707397, "learning_rate": 0.0001, "loss": 0.0834, "step": 11450 }, { "epoch": 32.55681818181818, "grad_norm": 1.5107983350753784, "learning_rate": 0.0001, "loss": 0.0855, "step": 11460 }, { "epoch": 32.58522727272727, "grad_norm": 1.7288358211517334, "learning_rate": 0.0001, "loss": 0.0806, "step": 11470 }, { "epoch": 32.61363636363637, "grad_norm": 1.7605435848236084, "learning_rate": 0.0001, "loss": 0.0795, "step": 11480 }, { "epoch": 32.64204545454545, "grad_norm": 1.3793346881866455, "learning_rate": 0.0001, "loss": 0.0768, "step": 11490 }, { "epoch": 32.67045454545455, "grad_norm": 1.468401551246643, "learning_rate": 0.0001, "loss": 0.0773, "step": 11500 }, { "epoch": 32.69886363636363, "grad_norm": 1.4680513143539429, "learning_rate": 0.0001, "loss": 0.0832, "step": 11510 }, { "epoch": 32.72727272727273, "grad_norm": 1.3982374668121338, "learning_rate": 0.0001, "loss": 0.081, "step": 11520 }, { "epoch": 32.75568181818182, "grad_norm": 1.5383310317993164, "learning_rate": 0.0001, "loss": 0.0809, "step": 11530 }, { "epoch": 32.78409090909091, "grad_norm": 1.2121058702468872, "learning_rate": 0.0001, "loss": 0.078, "step": 11540 }, { "epoch": 32.8125, "grad_norm": 1.4773236513137817, "learning_rate": 0.0001, "loss": 0.0792, "step": 11550 }, { "epoch": 32.84090909090909, "grad_norm": 1.153468370437622, "learning_rate": 0.0001, "loss": 0.0786, "step": 11560 }, { "epoch": 32.86931818181818, "grad_norm": 1.1868444681167603, "learning_rate": 0.0001, "loss": 0.0839, "step": 11570 }, { "epoch": 32.89772727272727, "grad_norm": 1.2777554988861084, "learning_rate": 0.0001, "loss": 0.0803, "step": 11580 }, { "epoch": 32.92613636363637, "grad_norm": 1.643078088760376, "learning_rate": 0.0001, "loss": 0.0798, "step": 11590 }, { "epoch": 32.95454545454545, "grad_norm": 1.6660960912704468, "learning_rate": 0.0001, "loss": 0.0804, "step": 11600 }, { "epoch": 32.98295454545455, "grad_norm": 1.773538589477539, "learning_rate": 0.0001, "loss": 0.0779, "step": 11610 }, { "epoch": 33.01136363636363, "grad_norm": 1.5937694311141968, "learning_rate": 0.0001, "loss": 0.0801, "step": 11620 }, { "epoch": 33.03977272727273, "grad_norm": 1.380580186843872, "learning_rate": 0.0001, "loss": 0.0763, "step": 11630 }, { "epoch": 33.06818181818182, "grad_norm": 1.3506697416305542, "learning_rate": 0.0001, "loss": 0.0765, "step": 11640 }, { "epoch": 33.09659090909091, "grad_norm": 1.3326584100723267, "learning_rate": 0.0001, "loss": 0.0845, "step": 11650 }, { "epoch": 33.125, "grad_norm": 1.3809016942977905, "learning_rate": 0.0001, "loss": 0.0786, "step": 11660 }, { "epoch": 33.15340909090909, "grad_norm": 1.5046674013137817, "learning_rate": 0.0001, "loss": 0.0781, "step": 11670 }, { "epoch": 33.18181818181818, "grad_norm": 1.373900294303894, "learning_rate": 0.0001, "loss": 0.0794, "step": 11680 }, { "epoch": 33.21022727272727, "grad_norm": 1.6719146966934204, "learning_rate": 0.0001, "loss": 0.0828, "step": 11690 }, { "epoch": 33.23863636363637, "grad_norm": 1.2766826152801514, "learning_rate": 0.0001, "loss": 0.0841, "step": 11700 }, { "epoch": 33.26704545454545, "grad_norm": 1.2881532907485962, "learning_rate": 0.0001, "loss": 0.0781, "step": 11710 }, { "epoch": 33.29545454545455, "grad_norm": 1.5469038486480713, "learning_rate": 0.0001, "loss": 0.0814, "step": 11720 }, { "epoch": 33.32386363636363, "grad_norm": 1.404578685760498, "learning_rate": 0.0001, "loss": 0.0818, "step": 11730 }, { "epoch": 33.35227272727273, "grad_norm": 1.504791021347046, "learning_rate": 0.0001, "loss": 0.0773, "step": 11740 }, { "epoch": 33.38068181818182, "grad_norm": 1.8719531297683716, "learning_rate": 0.0001, "loss": 0.0797, "step": 11750 }, { "epoch": 33.40909090909091, "grad_norm": 1.418943166732788, "learning_rate": 0.0001, "loss": 0.0768, "step": 11760 }, { "epoch": 33.4375, "grad_norm": 1.6536691188812256, "learning_rate": 0.0001, "loss": 0.0772, "step": 11770 }, { "epoch": 33.46590909090909, "grad_norm": 1.4639066457748413, "learning_rate": 0.0001, "loss": 0.076, "step": 11780 }, { "epoch": 33.49431818181818, "grad_norm": 1.4900221824645996, "learning_rate": 0.0001, "loss": 0.0816, "step": 11790 }, { "epoch": 33.52272727272727, "grad_norm": 1.6789690256118774, "learning_rate": 0.0001, "loss": 0.0807, "step": 11800 }, { "epoch": 33.55113636363637, "grad_norm": 1.3068170547485352, "learning_rate": 0.0001, "loss": 0.083, "step": 11810 }, { "epoch": 33.57954545454545, "grad_norm": 1.1812000274658203, "learning_rate": 0.0001, "loss": 0.0844, "step": 11820 }, { "epoch": 33.60795454545455, "grad_norm": 1.303970456123352, "learning_rate": 0.0001, "loss": 0.077, "step": 11830 }, { "epoch": 33.63636363636363, "grad_norm": 1.3309065103530884, "learning_rate": 0.0001, "loss": 0.08, "step": 11840 }, { "epoch": 33.66477272727273, "grad_norm": 1.4080289602279663, "learning_rate": 0.0001, "loss": 0.0801, "step": 11850 }, { "epoch": 33.69318181818182, "grad_norm": 1.5647964477539062, "learning_rate": 0.0001, "loss": 0.0822, "step": 11860 }, { "epoch": 33.72159090909091, "grad_norm": 1.4176783561706543, "learning_rate": 0.0001, "loss": 0.0827, "step": 11870 }, { "epoch": 33.75, "grad_norm": 1.288572072982788, "learning_rate": 0.0001, "loss": 0.079, "step": 11880 }, { "epoch": 33.77840909090909, "grad_norm": 1.22144615650177, "learning_rate": 0.0001, "loss": 0.0835, "step": 11890 }, { "epoch": 33.80681818181818, "grad_norm": 1.6118239164352417, "learning_rate": 0.0001, "loss": 0.0823, "step": 11900 }, { "epoch": 33.83522727272727, "grad_norm": 1.3921666145324707, "learning_rate": 0.0001, "loss": 0.0801, "step": 11910 }, { "epoch": 33.86363636363637, "grad_norm": 1.2439217567443848, "learning_rate": 0.0001, "loss": 0.081, "step": 11920 }, { "epoch": 33.89204545454545, "grad_norm": 1.4157015085220337, "learning_rate": 0.0001, "loss": 0.0792, "step": 11930 }, { "epoch": 33.92045454545455, "grad_norm": 1.2521430253982544, "learning_rate": 0.0001, "loss": 0.0815, "step": 11940 }, { "epoch": 33.94886363636363, "grad_norm": 1.3754600286483765, "learning_rate": 0.0001, "loss": 0.0816, "step": 11950 }, { "epoch": 33.97727272727273, "grad_norm": 1.2032493352890015, "learning_rate": 0.0001, "loss": 0.0788, "step": 11960 }, { "epoch": 34.00568181818182, "grad_norm": 1.2101504802703857, "learning_rate": 0.0001, "loss": 0.0782, "step": 11970 }, { "epoch": 34.03409090909091, "grad_norm": 1.4837169647216797, "learning_rate": 0.0001, "loss": 0.0785, "step": 11980 }, { "epoch": 34.0625, "grad_norm": 1.3385684490203857, "learning_rate": 0.0001, "loss": 0.079, "step": 11990 }, { "epoch": 34.09090909090909, "grad_norm": 1.264683723449707, "learning_rate": 0.0001, "loss": 0.0773, "step": 12000 }, { "epoch": 34.11931818181818, "grad_norm": 1.3990920782089233, "learning_rate": 0.0001, "loss": 0.076, "step": 12010 }, { "epoch": 34.14772727272727, "grad_norm": 1.4375253915786743, "learning_rate": 0.0001, "loss": 0.0832, "step": 12020 }, { "epoch": 34.17613636363637, "grad_norm": 1.555679202079773, "learning_rate": 0.0001, "loss": 0.0784, "step": 12030 }, { "epoch": 34.20454545454545, "grad_norm": 1.1317201852798462, "learning_rate": 0.0001, "loss": 0.0825, "step": 12040 }, { "epoch": 34.23295454545455, "grad_norm": 1.1169489622116089, "learning_rate": 0.0001, "loss": 0.0768, "step": 12050 }, { "epoch": 34.26136363636363, "grad_norm": 1.3788570165634155, "learning_rate": 0.0001, "loss": 0.0773, "step": 12060 }, { "epoch": 34.28977272727273, "grad_norm": 1.4683411121368408, "learning_rate": 0.0001, "loss": 0.0808, "step": 12070 }, { "epoch": 34.31818181818182, "grad_norm": 1.2734488248825073, "learning_rate": 0.0001, "loss": 0.0799, "step": 12080 }, { "epoch": 34.34659090909091, "grad_norm": 1.175338625907898, "learning_rate": 0.0001, "loss": 0.086, "step": 12090 }, { "epoch": 34.375, "grad_norm": 1.2136415243148804, "learning_rate": 0.0001, "loss": 0.0827, "step": 12100 }, { "epoch": 34.40340909090909, "grad_norm": 1.1225967407226562, "learning_rate": 0.0001, "loss": 0.0801, "step": 12110 }, { "epoch": 34.43181818181818, "grad_norm": 1.2012512683868408, "learning_rate": 0.0001, "loss": 0.0796, "step": 12120 }, { "epoch": 34.46022727272727, "grad_norm": 1.3884018659591675, "learning_rate": 0.0001, "loss": 0.0818, "step": 12130 }, { "epoch": 34.48863636363637, "grad_norm": 1.3851127624511719, "learning_rate": 0.0001, "loss": 0.0788, "step": 12140 }, { "epoch": 34.51704545454545, "grad_norm": 1.442125678062439, "learning_rate": 0.0001, "loss": 0.078, "step": 12150 }, { "epoch": 34.54545454545455, "grad_norm": 1.3751837015151978, "learning_rate": 0.0001, "loss": 0.0795, "step": 12160 }, { "epoch": 34.57386363636363, "grad_norm": 1.5753248929977417, "learning_rate": 0.0001, "loss": 0.078, "step": 12170 }, { "epoch": 34.60227272727273, "grad_norm": 1.5038411617279053, "learning_rate": 0.0001, "loss": 0.0804, "step": 12180 }, { "epoch": 34.63068181818182, "grad_norm": 1.1918764114379883, "learning_rate": 0.0001, "loss": 0.0789, "step": 12190 }, { "epoch": 34.65909090909091, "grad_norm": 1.28202223777771, "learning_rate": 0.0001, "loss": 0.0805, "step": 12200 }, { "epoch": 34.6875, "grad_norm": 1.1731418371200562, "learning_rate": 0.0001, "loss": 0.0844, "step": 12210 }, { "epoch": 34.71590909090909, "grad_norm": 1.4400376081466675, "learning_rate": 0.0001, "loss": 0.0793, "step": 12220 }, { "epoch": 34.74431818181818, "grad_norm": 1.279740810394287, "learning_rate": 0.0001, "loss": 0.0837, "step": 12230 }, { "epoch": 34.77272727272727, "grad_norm": 1.084643840789795, "learning_rate": 0.0001, "loss": 0.0785, "step": 12240 }, { "epoch": 34.80113636363637, "grad_norm": 0.9900233745574951, "learning_rate": 0.0001, "loss": 0.0761, "step": 12250 }, { "epoch": 34.82954545454545, "grad_norm": 1.505629539489746, "learning_rate": 0.0001, "loss": 0.0811, "step": 12260 }, { "epoch": 34.85795454545455, "grad_norm": 1.2708748579025269, "learning_rate": 0.0001, "loss": 0.0805, "step": 12270 }, { "epoch": 34.88636363636363, "grad_norm": 1.3224505186080933, "learning_rate": 0.0001, "loss": 0.0797, "step": 12280 }, { "epoch": 34.91477272727273, "grad_norm": 1.1570143699645996, "learning_rate": 0.0001, "loss": 0.08, "step": 12290 }, { "epoch": 34.94318181818182, "grad_norm": 1.260339379310608, "learning_rate": 0.0001, "loss": 0.0788, "step": 12300 }, { "epoch": 34.97159090909091, "grad_norm": 1.1624943017959595, "learning_rate": 0.0001, "loss": 0.0794, "step": 12310 }, { "epoch": 35.0, "grad_norm": 1.2895053625106812, "learning_rate": 0.0001, "loss": 0.0772, "step": 12320 }, { "epoch": 35.02840909090909, "grad_norm": 1.2539187669754028, "learning_rate": 0.0001, "loss": 0.0751, "step": 12330 }, { "epoch": 35.05681818181818, "grad_norm": 1.303320288658142, "learning_rate": 0.0001, "loss": 0.0853, "step": 12340 }, { "epoch": 35.08522727272727, "grad_norm": 1.1098580360412598, "learning_rate": 0.0001, "loss": 0.082, "step": 12350 }, { "epoch": 35.11363636363637, "grad_norm": 1.5412824153900146, "learning_rate": 0.0001, "loss": 0.0832, "step": 12360 }, { "epoch": 35.14204545454545, "grad_norm": 1.3514188528060913, "learning_rate": 0.0001, "loss": 0.0761, "step": 12370 }, { "epoch": 35.17045454545455, "grad_norm": 1.380109190940857, "learning_rate": 0.0001, "loss": 0.0747, "step": 12380 }, { "epoch": 35.19886363636363, "grad_norm": 1.1679573059082031, "learning_rate": 0.0001, "loss": 0.0797, "step": 12390 }, { "epoch": 35.22727272727273, "grad_norm": 1.2729599475860596, "learning_rate": 0.0001, "loss": 0.0795, "step": 12400 }, { "epoch": 35.25568181818182, "grad_norm": 1.273162841796875, "learning_rate": 0.0001, "loss": 0.0801, "step": 12410 }, { "epoch": 35.28409090909091, "grad_norm": 1.4882563352584839, "learning_rate": 0.0001, "loss": 0.0781, "step": 12420 }, { "epoch": 35.3125, "grad_norm": 1.2322689294815063, "learning_rate": 0.0001, "loss": 0.082, "step": 12430 }, { "epoch": 35.34090909090909, "grad_norm": 1.324062705039978, "learning_rate": 0.0001, "loss": 0.0762, "step": 12440 }, { "epoch": 35.36931818181818, "grad_norm": 1.236574411392212, "learning_rate": 0.0001, "loss": 0.0766, "step": 12450 }, { "epoch": 35.39772727272727, "grad_norm": 1.17245614528656, "learning_rate": 0.0001, "loss": 0.0785, "step": 12460 }, { "epoch": 35.42613636363637, "grad_norm": 1.337213397026062, "learning_rate": 0.0001, "loss": 0.0811, "step": 12470 }, { "epoch": 35.45454545454545, "grad_norm": 1.299586534500122, "learning_rate": 0.0001, "loss": 0.0821, "step": 12480 }, { "epoch": 35.48295454545455, "grad_norm": 1.2936147451400757, "learning_rate": 0.0001, "loss": 0.0799, "step": 12490 }, { "epoch": 35.51136363636363, "grad_norm": 1.203636646270752, "learning_rate": 0.0001, "loss": 0.0826, "step": 12500 }, { "epoch": 35.53977272727273, "grad_norm": 1.260736346244812, "learning_rate": 0.0001, "loss": 0.0754, "step": 12510 }, { "epoch": 35.56818181818182, "grad_norm": 1.2215540409088135, "learning_rate": 0.0001, "loss": 0.0806, "step": 12520 }, { "epoch": 35.59659090909091, "grad_norm": 1.176795244216919, "learning_rate": 0.0001, "loss": 0.0741, "step": 12530 }, { "epoch": 35.625, "grad_norm": 1.3341177701950073, "learning_rate": 0.0001, "loss": 0.0804, "step": 12540 }, { "epoch": 35.65340909090909, "grad_norm": 1.2871227264404297, "learning_rate": 0.0001, "loss": 0.0839, "step": 12550 }, { "epoch": 35.68181818181818, "grad_norm": 1.2276510000228882, "learning_rate": 0.0001, "loss": 0.0824, "step": 12560 }, { "epoch": 35.71022727272727, "grad_norm": 1.1885565519332886, "learning_rate": 0.0001, "loss": 0.0794, "step": 12570 }, { "epoch": 35.73863636363637, "grad_norm": 1.3643691539764404, "learning_rate": 0.0001, "loss": 0.0829, "step": 12580 }, { "epoch": 35.76704545454545, "grad_norm": 1.1791919469833374, "learning_rate": 0.0001, "loss": 0.0798, "step": 12590 }, { "epoch": 35.79545454545455, "grad_norm": 1.14838707447052, "learning_rate": 0.0001, "loss": 0.077, "step": 12600 }, { "epoch": 35.82386363636363, "grad_norm": 1.418837308883667, "learning_rate": 0.0001, "loss": 0.0773, "step": 12610 }, { "epoch": 35.85227272727273, "grad_norm": 1.3161529302597046, "learning_rate": 0.0001, "loss": 0.0827, "step": 12620 }, { "epoch": 35.88068181818182, "grad_norm": 1.7161197662353516, "learning_rate": 0.0001, "loss": 0.0823, "step": 12630 }, { "epoch": 35.90909090909091, "grad_norm": 1.4447532892227173, "learning_rate": 0.0001, "loss": 0.0795, "step": 12640 }, { "epoch": 35.9375, "grad_norm": 1.743769884109497, "learning_rate": 0.0001, "loss": 0.0742, "step": 12650 }, { "epoch": 35.96590909090909, "grad_norm": 1.4263979196548462, "learning_rate": 0.0001, "loss": 0.0773, "step": 12660 }, { "epoch": 35.99431818181818, "grad_norm": 1.431891679763794, "learning_rate": 0.0001, "loss": 0.0765, "step": 12670 }, { "epoch": 36.02272727272727, "grad_norm": 1.2072255611419678, "learning_rate": 0.0001, "loss": 0.0764, "step": 12680 }, { "epoch": 36.05113636363637, "grad_norm": 1.2662785053253174, "learning_rate": 0.0001, "loss": 0.0763, "step": 12690 }, { "epoch": 36.07954545454545, "grad_norm": 1.184961199760437, "learning_rate": 0.0001, "loss": 0.0696, "step": 12700 }, { "epoch": 36.10795454545455, "grad_norm": 1.1873055696487427, "learning_rate": 0.0001, "loss": 0.0758, "step": 12710 }, { "epoch": 36.13636363636363, "grad_norm": 1.1388927698135376, "learning_rate": 0.0001, "loss": 0.0731, "step": 12720 }, { "epoch": 36.16477272727273, "grad_norm": 1.415749430656433, "learning_rate": 0.0001, "loss": 0.0774, "step": 12730 }, { "epoch": 36.19318181818182, "grad_norm": 1.6537916660308838, "learning_rate": 0.0001, "loss": 0.0806, "step": 12740 }, { "epoch": 36.22159090909091, "grad_norm": 1.479551076889038, "learning_rate": 0.0001, "loss": 0.0787, "step": 12750 }, { "epoch": 36.25, "grad_norm": 1.4956623315811157, "learning_rate": 0.0001, "loss": 0.0746, "step": 12760 }, { "epoch": 36.27840909090909, "grad_norm": 1.4210376739501953, "learning_rate": 0.0001, "loss": 0.0791, "step": 12770 }, { "epoch": 36.30681818181818, "grad_norm": 1.4137691259384155, "learning_rate": 0.0001, "loss": 0.0746, "step": 12780 }, { "epoch": 36.33522727272727, "grad_norm": 1.5666520595550537, "learning_rate": 0.0001, "loss": 0.0795, "step": 12790 }, { "epoch": 36.36363636363637, "grad_norm": 2.1862380504608154, "learning_rate": 0.0001, "loss": 0.0799, "step": 12800 }, { "epoch": 36.39204545454545, "grad_norm": 2.0382354259490967, "learning_rate": 0.0001, "loss": 0.076, "step": 12810 }, { "epoch": 36.42045454545455, "grad_norm": 1.9271392822265625, "learning_rate": 0.0001, "loss": 0.0719, "step": 12820 }, { "epoch": 36.44886363636363, "grad_norm": 1.600040078163147, "learning_rate": 0.0001, "loss": 0.0778, "step": 12830 }, { "epoch": 36.47727272727273, "grad_norm": 1.6347086429595947, "learning_rate": 0.0001, "loss": 0.071, "step": 12840 }, { "epoch": 36.50568181818182, "grad_norm": 1.4493471384048462, "learning_rate": 0.0001, "loss": 0.0753, "step": 12850 }, { "epoch": 36.53409090909091, "grad_norm": 1.5854036808013916, "learning_rate": 0.0001, "loss": 0.0737, "step": 12860 }, { "epoch": 36.5625, "grad_norm": 1.3838077783584595, "learning_rate": 0.0001, "loss": 0.0744, "step": 12870 }, { "epoch": 36.59090909090909, "grad_norm": 1.5723717212677002, "learning_rate": 0.0001, "loss": 0.0716, "step": 12880 }, { "epoch": 36.61931818181818, "grad_norm": 1.6842889785766602, "learning_rate": 0.0001, "loss": 0.0732, "step": 12890 }, { "epoch": 36.64772727272727, "grad_norm": 1.209652066230774, "learning_rate": 0.0001, "loss": 0.0741, "step": 12900 }, { "epoch": 36.67613636363637, "grad_norm": 1.4279730319976807, "learning_rate": 0.0001, "loss": 0.0762, "step": 12910 }, { "epoch": 36.70454545454545, "grad_norm": 1.6031663417816162, "learning_rate": 0.0001, "loss": 0.0701, "step": 12920 }, { "epoch": 36.73295454545455, "grad_norm": 1.3902431726455688, "learning_rate": 0.0001, "loss": 0.0785, "step": 12930 }, { "epoch": 36.76136363636363, "grad_norm": 1.3266302347183228, "learning_rate": 0.0001, "loss": 0.0758, "step": 12940 }, { "epoch": 36.78977272727273, "grad_norm": 1.4028958082199097, "learning_rate": 0.0001, "loss": 0.0741, "step": 12950 }, { "epoch": 36.81818181818182, "grad_norm": 1.494147539138794, "learning_rate": 0.0001, "loss": 0.0716, "step": 12960 }, { "epoch": 36.84659090909091, "grad_norm": 1.4022266864776611, "learning_rate": 0.0001, "loss": 0.0751, "step": 12970 }, { "epoch": 36.875, "grad_norm": 1.1766167879104614, "learning_rate": 0.0001, "loss": 0.0718, "step": 12980 }, { "epoch": 36.90340909090909, "grad_norm": 1.346309781074524, "learning_rate": 0.0001, "loss": 0.0773, "step": 12990 }, { "epoch": 36.93181818181818, "grad_norm": 1.2325224876403809, "learning_rate": 0.0001, "loss": 0.0771, "step": 13000 }, { "epoch": 36.96022727272727, "grad_norm": 1.4607832431793213, "learning_rate": 0.0001, "loss": 0.0731, "step": 13010 }, { "epoch": 36.98863636363637, "grad_norm": 1.426300287246704, "learning_rate": 0.0001, "loss": 0.0744, "step": 13020 }, { "epoch": 37.01704545454545, "grad_norm": 1.3669778108596802, "learning_rate": 0.0001, "loss": 0.0717, "step": 13030 }, { "epoch": 37.04545454545455, "grad_norm": 1.3466182947158813, "learning_rate": 0.0001, "loss": 0.0708, "step": 13040 }, { "epoch": 37.07386363636363, "grad_norm": 1.3738116025924683, "learning_rate": 0.0001, "loss": 0.0792, "step": 13050 }, { "epoch": 37.10227272727273, "grad_norm": 1.414943814277649, "learning_rate": 0.0001, "loss": 0.0772, "step": 13060 }, { "epoch": 37.13068181818182, "grad_norm": 1.4304829835891724, "learning_rate": 0.0001, "loss": 0.0728, "step": 13070 }, { "epoch": 37.15909090909091, "grad_norm": 1.280889630317688, "learning_rate": 0.0001, "loss": 0.076, "step": 13080 }, { "epoch": 37.1875, "grad_norm": 1.4901535511016846, "learning_rate": 0.0001, "loss": 0.0743, "step": 13090 }, { "epoch": 37.21590909090909, "grad_norm": 1.2257989645004272, "learning_rate": 0.0001, "loss": 0.0739, "step": 13100 }, { "epoch": 37.24431818181818, "grad_norm": 1.345968246459961, "learning_rate": 0.0001, "loss": 0.0746, "step": 13110 }, { "epoch": 37.27272727272727, "grad_norm": 1.5769586563110352, "learning_rate": 0.0001, "loss": 0.0776, "step": 13120 }, { "epoch": 37.30113636363637, "grad_norm": 1.3803303241729736, "learning_rate": 0.0001, "loss": 0.0747, "step": 13130 }, { "epoch": 37.32954545454545, "grad_norm": 1.2624835968017578, "learning_rate": 0.0001, "loss": 0.0758, "step": 13140 }, { "epoch": 37.35795454545455, "grad_norm": 1.2229900360107422, "learning_rate": 0.0001, "loss": 0.0739, "step": 13150 }, { "epoch": 37.38636363636363, "grad_norm": 1.0312384366989136, "learning_rate": 0.0001, "loss": 0.0748, "step": 13160 }, { "epoch": 37.41477272727273, "grad_norm": 1.174302339553833, "learning_rate": 0.0001, "loss": 0.0771, "step": 13170 }, { "epoch": 37.44318181818182, "grad_norm": 1.4073001146316528, "learning_rate": 0.0001, "loss": 0.0804, "step": 13180 }, { "epoch": 37.47159090909091, "grad_norm": 1.2546253204345703, "learning_rate": 0.0001, "loss": 0.0738, "step": 13190 }, { "epoch": 37.5, "grad_norm": 1.0951519012451172, "learning_rate": 0.0001, "loss": 0.0778, "step": 13200 }, { "epoch": 37.52840909090909, "grad_norm": 1.2326979637145996, "learning_rate": 0.0001, "loss": 0.0758, "step": 13210 }, { "epoch": 37.55681818181818, "grad_norm": 1.4816584587097168, "learning_rate": 0.0001, "loss": 0.0742, "step": 13220 }, { "epoch": 37.58522727272727, "grad_norm": 1.2732244729995728, "learning_rate": 0.0001, "loss": 0.0792, "step": 13230 }, { "epoch": 37.61363636363637, "grad_norm": 1.2015769481658936, "learning_rate": 0.0001, "loss": 0.0774, "step": 13240 }, { "epoch": 37.64204545454545, "grad_norm": 1.4944400787353516, "learning_rate": 0.0001, "loss": 0.0764, "step": 13250 }, { "epoch": 37.67045454545455, "grad_norm": 1.596579670906067, "learning_rate": 0.0001, "loss": 0.0765, "step": 13260 }, { "epoch": 37.69886363636363, "grad_norm": 1.1385072469711304, "learning_rate": 0.0001, "loss": 0.0729, "step": 13270 }, { "epoch": 37.72727272727273, "grad_norm": 1.2156968116760254, "learning_rate": 0.0001, "loss": 0.0782, "step": 13280 }, { "epoch": 37.75568181818182, "grad_norm": 1.4938963651657104, "learning_rate": 0.0001, "loss": 0.0718, "step": 13290 }, { "epoch": 37.78409090909091, "grad_norm": 1.3732664585113525, "learning_rate": 0.0001, "loss": 0.0763, "step": 13300 }, { "epoch": 37.8125, "grad_norm": 1.195371389389038, "learning_rate": 0.0001, "loss": 0.0767, "step": 13310 }, { "epoch": 37.84090909090909, "grad_norm": 1.272255301475525, "learning_rate": 0.0001, "loss": 0.074, "step": 13320 }, { "epoch": 37.86931818181818, "grad_norm": 1.451314926147461, "learning_rate": 0.0001, "loss": 0.0764, "step": 13330 }, { "epoch": 37.89772727272727, "grad_norm": 1.2322098016738892, "learning_rate": 0.0001, "loss": 0.0804, "step": 13340 }, { "epoch": 37.92613636363637, "grad_norm": 1.3791121244430542, "learning_rate": 0.0001, "loss": 0.07, "step": 13350 }, { "epoch": 37.95454545454545, "grad_norm": 1.0808604955673218, "learning_rate": 0.0001, "loss": 0.073, "step": 13360 }, { "epoch": 37.98295454545455, "grad_norm": 1.199265480041504, "learning_rate": 0.0001, "loss": 0.0705, "step": 13370 }, { "epoch": 38.01136363636363, "grad_norm": 1.1241381168365479, "learning_rate": 0.0001, "loss": 0.0733, "step": 13380 }, { "epoch": 38.03977272727273, "grad_norm": 1.279880404472351, "learning_rate": 0.0001, "loss": 0.0782, "step": 13390 }, { "epoch": 38.06818181818182, "grad_norm": 1.2983338832855225, "learning_rate": 0.0001, "loss": 0.073, "step": 13400 }, { "epoch": 38.09659090909091, "grad_norm": 1.2902085781097412, "learning_rate": 0.0001, "loss": 0.0778, "step": 13410 }, { "epoch": 38.125, "grad_norm": 1.2779277563095093, "learning_rate": 0.0001, "loss": 0.0726, "step": 13420 }, { "epoch": 38.15340909090909, "grad_norm": 1.4885528087615967, "learning_rate": 0.0001, "loss": 0.0762, "step": 13430 }, { "epoch": 38.18181818181818, "grad_norm": 1.3437947034835815, "learning_rate": 0.0001, "loss": 0.0772, "step": 13440 }, { "epoch": 38.21022727272727, "grad_norm": 1.2803330421447754, "learning_rate": 0.0001, "loss": 0.0773, "step": 13450 }, { "epoch": 38.23863636363637, "grad_norm": 1.2626174688339233, "learning_rate": 0.0001, "loss": 0.0757, "step": 13460 }, { "epoch": 38.26704545454545, "grad_norm": 1.3316041231155396, "learning_rate": 0.0001, "loss": 0.0755, "step": 13470 }, { "epoch": 38.29545454545455, "grad_norm": 1.2017104625701904, "learning_rate": 0.0001, "loss": 0.0737, "step": 13480 }, { "epoch": 38.32386363636363, "grad_norm": 1.158381462097168, "learning_rate": 0.0001, "loss": 0.0785, "step": 13490 }, { "epoch": 38.35227272727273, "grad_norm": 1.5186028480529785, "learning_rate": 0.0001, "loss": 0.0765, "step": 13500 }, { "epoch": 38.38068181818182, "grad_norm": 1.3031560182571411, "learning_rate": 0.0001, "loss": 0.0772, "step": 13510 }, { "epoch": 38.40909090909091, "grad_norm": 1.0976595878601074, "learning_rate": 0.0001, "loss": 0.077, "step": 13520 }, { "epoch": 38.4375, "grad_norm": 1.3816660642623901, "learning_rate": 0.0001, "loss": 0.0767, "step": 13530 }, { "epoch": 38.46590909090909, "grad_norm": 1.3313119411468506, "learning_rate": 0.0001, "loss": 0.0759, "step": 13540 }, { "epoch": 38.49431818181818, "grad_norm": 1.124053716659546, "learning_rate": 0.0001, "loss": 0.0761, "step": 13550 }, { "epoch": 38.52272727272727, "grad_norm": 1.1821684837341309, "learning_rate": 0.0001, "loss": 0.0777, "step": 13560 }, { "epoch": 38.55113636363637, "grad_norm": 1.1764349937438965, "learning_rate": 0.0001, "loss": 0.0755, "step": 13570 }, { "epoch": 38.57954545454545, "grad_norm": 1.2892837524414062, "learning_rate": 0.0001, "loss": 0.0766, "step": 13580 }, { "epoch": 38.60795454545455, "grad_norm": 1.2852972745895386, "learning_rate": 0.0001, "loss": 0.0749, "step": 13590 }, { "epoch": 38.63636363636363, "grad_norm": 1.0687012672424316, "learning_rate": 0.0001, "loss": 0.0715, "step": 13600 }, { "epoch": 38.66477272727273, "grad_norm": 1.088240385055542, "learning_rate": 0.0001, "loss": 0.0722, "step": 13610 }, { "epoch": 38.69318181818182, "grad_norm": 1.0786799192428589, "learning_rate": 0.0001, "loss": 0.0729, "step": 13620 }, { "epoch": 38.72159090909091, "grad_norm": 1.2037075757980347, "learning_rate": 0.0001, "loss": 0.0734, "step": 13630 }, { "epoch": 38.75, "grad_norm": 1.2549363374710083, "learning_rate": 0.0001, "loss": 0.0737, "step": 13640 }, { "epoch": 38.77840909090909, "grad_norm": 1.194278597831726, "learning_rate": 0.0001, "loss": 0.0733, "step": 13650 }, { "epoch": 38.80681818181818, "grad_norm": 1.5739684104919434, "learning_rate": 0.0001, "loss": 0.0714, "step": 13660 }, { "epoch": 38.83522727272727, "grad_norm": 1.3770755529403687, "learning_rate": 0.0001, "loss": 0.0728, "step": 13670 }, { "epoch": 38.86363636363637, "grad_norm": 1.294053316116333, "learning_rate": 0.0001, "loss": 0.0678, "step": 13680 }, { "epoch": 38.89204545454545, "grad_norm": 1.2223044633865356, "learning_rate": 0.0001, "loss": 0.0733, "step": 13690 }, { "epoch": 38.92045454545455, "grad_norm": 1.3834986686706543, "learning_rate": 0.0001, "loss": 0.0716, "step": 13700 }, { "epoch": 38.94886363636363, "grad_norm": 1.2326815128326416, "learning_rate": 0.0001, "loss": 0.0719, "step": 13710 }, { "epoch": 38.97727272727273, "grad_norm": 1.0974726676940918, "learning_rate": 0.0001, "loss": 0.0731, "step": 13720 }, { "epoch": 39.00568181818182, "grad_norm": 1.2831435203552246, "learning_rate": 0.0001, "loss": 0.0749, "step": 13730 }, { "epoch": 39.03409090909091, "grad_norm": 1.2879631519317627, "learning_rate": 0.0001, "loss": 0.0756, "step": 13740 }, { "epoch": 39.0625, "grad_norm": 1.3753108978271484, "learning_rate": 0.0001, "loss": 0.0723, "step": 13750 }, { "epoch": 39.09090909090909, "grad_norm": 1.29164719581604, "learning_rate": 0.0001, "loss": 0.0722, "step": 13760 }, { "epoch": 39.11931818181818, "grad_norm": 1.0802414417266846, "learning_rate": 0.0001, "loss": 0.0749, "step": 13770 }, { "epoch": 39.14772727272727, "grad_norm": 1.1304676532745361, "learning_rate": 0.0001, "loss": 0.0735, "step": 13780 }, { "epoch": 39.17613636363637, "grad_norm": 1.0044450759887695, "learning_rate": 0.0001, "loss": 0.0731, "step": 13790 }, { "epoch": 39.20454545454545, "grad_norm": 0.9919958114624023, "learning_rate": 0.0001, "loss": 0.0731, "step": 13800 }, { "epoch": 39.23295454545455, "grad_norm": 1.1686891317367554, "learning_rate": 0.0001, "loss": 0.0733, "step": 13810 }, { "epoch": 39.26136363636363, "grad_norm": 1.145544409751892, "learning_rate": 0.0001, "loss": 0.0703, "step": 13820 }, { "epoch": 39.28977272727273, "grad_norm": 1.1628680229187012, "learning_rate": 0.0001, "loss": 0.0739, "step": 13830 }, { "epoch": 39.31818181818182, "grad_norm": 1.199570655822754, "learning_rate": 0.0001, "loss": 0.0734, "step": 13840 }, { "epoch": 39.34659090909091, "grad_norm": 1.2358002662658691, "learning_rate": 0.0001, "loss": 0.0733, "step": 13850 }, { "epoch": 39.375, "grad_norm": 1.3280445337295532, "learning_rate": 0.0001, "loss": 0.0711, "step": 13860 }, { "epoch": 39.40340909090909, "grad_norm": 1.103401780128479, "learning_rate": 0.0001, "loss": 0.0698, "step": 13870 }, { "epoch": 39.43181818181818, "grad_norm": 1.4654299020767212, "learning_rate": 0.0001, "loss": 0.0724, "step": 13880 }, { "epoch": 39.46022727272727, "grad_norm": 1.3928581476211548, "learning_rate": 0.0001, "loss": 0.0728, "step": 13890 }, { "epoch": 39.48863636363637, "grad_norm": 1.2623183727264404, "learning_rate": 0.0001, "loss": 0.0761, "step": 13900 }, { "epoch": 39.51704545454545, "grad_norm": 1.0778528451919556, "learning_rate": 0.0001, "loss": 0.0699, "step": 13910 }, { "epoch": 39.54545454545455, "grad_norm": 1.1330665349960327, "learning_rate": 0.0001, "loss": 0.0777, "step": 13920 }, { "epoch": 39.57386363636363, "grad_norm": 1.1069144010543823, "learning_rate": 0.0001, "loss": 0.0757, "step": 13930 }, { "epoch": 39.60227272727273, "grad_norm": 1.1391063928604126, "learning_rate": 0.0001, "loss": 0.0715, "step": 13940 }, { "epoch": 39.63068181818182, "grad_norm": 1.3045668601989746, "learning_rate": 0.0001, "loss": 0.0744, "step": 13950 }, { "epoch": 39.65909090909091, "grad_norm": 1.3357367515563965, "learning_rate": 0.0001, "loss": 0.0769, "step": 13960 }, { "epoch": 39.6875, "grad_norm": 1.3155001401901245, "learning_rate": 0.0001, "loss": 0.0743, "step": 13970 }, { "epoch": 39.71590909090909, "grad_norm": 1.215240716934204, "learning_rate": 0.0001, "loss": 0.0778, "step": 13980 }, { "epoch": 39.74431818181818, "grad_norm": 1.0790923833847046, "learning_rate": 0.0001, "loss": 0.0748, "step": 13990 }, { "epoch": 39.77272727272727, "grad_norm": 1.0089366436004639, "learning_rate": 0.0001, "loss": 0.0738, "step": 14000 }, { "epoch": 39.80113636363637, "grad_norm": 1.0045474767684937, "learning_rate": 0.0001, "loss": 0.0748, "step": 14010 }, { "epoch": 39.82954545454545, "grad_norm": 1.0148450136184692, "learning_rate": 0.0001, "loss": 0.0752, "step": 14020 }, { "epoch": 39.85795454545455, "grad_norm": 1.1197142601013184, "learning_rate": 0.0001, "loss": 0.0762, "step": 14030 }, { "epoch": 39.88636363636363, "grad_norm": 1.324987530708313, "learning_rate": 0.0001, "loss": 0.075, "step": 14040 }, { "epoch": 39.91477272727273, "grad_norm": 1.3428856134414673, "learning_rate": 0.0001, "loss": 0.0791, "step": 14050 }, { "epoch": 39.94318181818182, "grad_norm": 1.025665521621704, "learning_rate": 0.0001, "loss": 0.0761, "step": 14060 }, { "epoch": 39.97159090909091, "grad_norm": 1.2127065658569336, "learning_rate": 0.0001, "loss": 0.0739, "step": 14070 }, { "epoch": 40.0, "grad_norm": 1.304028868675232, "learning_rate": 0.0001, "loss": 0.0708, "step": 14080 }, { "epoch": 40.02840909090909, "grad_norm": 1.194968819618225, "learning_rate": 0.0001, "loss": 0.0732, "step": 14090 }, { "epoch": 40.05681818181818, "grad_norm": 1.2401859760284424, "learning_rate": 0.0001, "loss": 0.0718, "step": 14100 }, { "epoch": 40.08522727272727, "grad_norm": 1.2328866720199585, "learning_rate": 0.0001, "loss": 0.0711, "step": 14110 }, { "epoch": 40.11363636363637, "grad_norm": 1.178868055343628, "learning_rate": 0.0001, "loss": 0.0729, "step": 14120 }, { "epoch": 40.14204545454545, "grad_norm": 1.2818002700805664, "learning_rate": 0.0001, "loss": 0.0656, "step": 14130 }, { "epoch": 40.17045454545455, "grad_norm": 1.167518138885498, "learning_rate": 0.0001, "loss": 0.0708, "step": 14140 }, { "epoch": 40.19886363636363, "grad_norm": 1.28043794631958, "learning_rate": 0.0001, "loss": 0.0727, "step": 14150 }, { "epoch": 40.22727272727273, "grad_norm": 0.9946736097335815, "learning_rate": 0.0001, "loss": 0.0725, "step": 14160 }, { "epoch": 40.25568181818182, "grad_norm": 1.186169981956482, "learning_rate": 0.0001, "loss": 0.0716, "step": 14170 }, { "epoch": 40.28409090909091, "grad_norm": 1.138846755027771, "learning_rate": 0.0001, "loss": 0.0744, "step": 14180 }, { "epoch": 40.3125, "grad_norm": 1.1993027925491333, "learning_rate": 0.0001, "loss": 0.0741, "step": 14190 }, { "epoch": 40.34090909090909, "grad_norm": 1.4275126457214355, "learning_rate": 0.0001, "loss": 0.0724, "step": 14200 }, { "epoch": 40.36931818181818, "grad_norm": 1.4426121711730957, "learning_rate": 0.0001, "loss": 0.0728, "step": 14210 }, { "epoch": 40.39772727272727, "grad_norm": 1.7519258260726929, "learning_rate": 0.0001, "loss": 0.0723, "step": 14220 }, { "epoch": 40.42613636363637, "grad_norm": 1.821484923362732, "learning_rate": 0.0001, "loss": 0.0727, "step": 14230 }, { "epoch": 40.45454545454545, "grad_norm": 1.4972963333129883, "learning_rate": 0.0001, "loss": 0.0699, "step": 14240 }, { "epoch": 40.48295454545455, "grad_norm": 1.267853021621704, "learning_rate": 0.0001, "loss": 0.0722, "step": 14250 }, { "epoch": 40.51136363636363, "grad_norm": 1.0927573442459106, "learning_rate": 0.0001, "loss": 0.069, "step": 14260 }, { "epoch": 40.53977272727273, "grad_norm": 1.4752814769744873, "learning_rate": 0.0001, "loss": 0.0672, "step": 14270 }, { "epoch": 40.56818181818182, "grad_norm": 1.8464324474334717, "learning_rate": 0.0001, "loss": 0.0738, "step": 14280 }, { "epoch": 40.59659090909091, "grad_norm": 1.4506888389587402, "learning_rate": 0.0001, "loss": 0.0693, "step": 14290 }, { "epoch": 40.625, "grad_norm": 1.7628158330917358, "learning_rate": 0.0001, "loss": 0.068, "step": 14300 }, { "epoch": 40.65340909090909, "grad_norm": 1.4462136030197144, "learning_rate": 0.0001, "loss": 0.0713, "step": 14310 }, { "epoch": 40.68181818181818, "grad_norm": 1.3682395219802856, "learning_rate": 0.0001, "loss": 0.0688, "step": 14320 }, { "epoch": 40.71022727272727, "grad_norm": 1.5475159883499146, "learning_rate": 0.0001, "loss": 0.0689, "step": 14330 }, { "epoch": 40.73863636363637, "grad_norm": 1.2970659732818604, "learning_rate": 0.0001, "loss": 0.071, "step": 14340 }, { "epoch": 40.76704545454545, "grad_norm": 1.2972766160964966, "learning_rate": 0.0001, "loss": 0.0691, "step": 14350 }, { "epoch": 40.79545454545455, "grad_norm": 1.5972353219985962, "learning_rate": 0.0001, "loss": 0.0668, "step": 14360 }, { "epoch": 40.82386363636363, "grad_norm": 1.2204009294509888, "learning_rate": 0.0001, "loss": 0.0688, "step": 14370 }, { "epoch": 40.85227272727273, "grad_norm": 1.4080297946929932, "learning_rate": 0.0001, "loss": 0.0724, "step": 14380 }, { "epoch": 40.88068181818182, "grad_norm": 1.5412365198135376, "learning_rate": 0.0001, "loss": 0.0688, "step": 14390 }, { "epoch": 40.90909090909091, "grad_norm": 1.3308732509613037, "learning_rate": 0.0001, "loss": 0.0703, "step": 14400 }, { "epoch": 40.9375, "grad_norm": 1.2614033222198486, "learning_rate": 0.0001, "loss": 0.0676, "step": 14410 }, { "epoch": 40.96590909090909, "grad_norm": 1.3623310327529907, "learning_rate": 0.0001, "loss": 0.0691, "step": 14420 }, { "epoch": 40.99431818181818, "grad_norm": 1.3249821662902832, "learning_rate": 0.0001, "loss": 0.0709, "step": 14430 }, { "epoch": 41.02272727272727, "grad_norm": 1.224489450454712, "learning_rate": 0.0001, "loss": 0.0677, "step": 14440 }, { "epoch": 41.05113636363637, "grad_norm": 1.2260276079177856, "learning_rate": 0.0001, "loss": 0.0687, "step": 14450 }, { "epoch": 41.07954545454545, "grad_norm": 1.0387647151947021, "learning_rate": 0.0001, "loss": 0.0671, "step": 14460 }, { "epoch": 41.10795454545455, "grad_norm": 1.3038042783737183, "learning_rate": 0.0001, "loss": 0.0709, "step": 14470 }, { "epoch": 41.13636363636363, "grad_norm": 1.170926570892334, "learning_rate": 0.0001, "loss": 0.078, "step": 14480 }, { "epoch": 41.16477272727273, "grad_norm": 1.118465781211853, "learning_rate": 0.0001, "loss": 0.072, "step": 14490 }, { "epoch": 41.19318181818182, "grad_norm": 1.1143956184387207, "learning_rate": 0.0001, "loss": 0.0716, "step": 14500 }, { "epoch": 41.22159090909091, "grad_norm": 1.1609805822372437, "learning_rate": 0.0001, "loss": 0.0724, "step": 14510 }, { "epoch": 41.25, "grad_norm": 1.0995590686798096, "learning_rate": 0.0001, "loss": 0.0727, "step": 14520 }, { "epoch": 41.27840909090909, "grad_norm": 1.0863865613937378, "learning_rate": 0.0001, "loss": 0.0706, "step": 14530 }, { "epoch": 41.30681818181818, "grad_norm": 1.0013092756271362, "learning_rate": 0.0001, "loss": 0.0698, "step": 14540 }, { "epoch": 41.33522727272727, "grad_norm": 1.1944328546524048, "learning_rate": 0.0001, "loss": 0.0739, "step": 14550 }, { "epoch": 41.36363636363637, "grad_norm": 1.015170931816101, "learning_rate": 0.0001, "loss": 0.0699, "step": 14560 }, { "epoch": 41.39204545454545, "grad_norm": 1.0812183618545532, "learning_rate": 0.0001, "loss": 0.0672, "step": 14570 }, { "epoch": 41.42045454545455, "grad_norm": 1.2042665481567383, "learning_rate": 0.0001, "loss": 0.0695, "step": 14580 }, { "epoch": 41.44886363636363, "grad_norm": 1.3115397691726685, "learning_rate": 0.0001, "loss": 0.0704, "step": 14590 }, { "epoch": 41.47727272727273, "grad_norm": 1.325830101966858, "learning_rate": 0.0001, "loss": 0.0737, "step": 14600 }, { "epoch": 41.50568181818182, "grad_norm": 1.1970694065093994, "learning_rate": 0.0001, "loss": 0.0721, "step": 14610 }, { "epoch": 41.53409090909091, "grad_norm": 1.0794974565505981, "learning_rate": 0.0001, "loss": 0.0714, "step": 14620 }, { "epoch": 41.5625, "grad_norm": 0.8236928582191467, "learning_rate": 0.0001, "loss": 0.0741, "step": 14630 }, { "epoch": 41.59090909090909, "grad_norm": 0.8976597785949707, "learning_rate": 0.0001, "loss": 0.0713, "step": 14640 }, { "epoch": 41.61931818181818, "grad_norm": 0.84346604347229, "learning_rate": 0.0001, "loss": 0.071, "step": 14650 }, { "epoch": 41.64772727272727, "grad_norm": 0.9527440667152405, "learning_rate": 0.0001, "loss": 0.0691, "step": 14660 }, { "epoch": 41.67613636363637, "grad_norm": 1.0900448560714722, "learning_rate": 0.0001, "loss": 0.0708, "step": 14670 }, { "epoch": 41.70454545454545, "grad_norm": 1.057309627532959, "learning_rate": 0.0001, "loss": 0.0743, "step": 14680 }, { "epoch": 41.73295454545455, "grad_norm": 1.161765456199646, "learning_rate": 0.0001, "loss": 0.0693, "step": 14690 }, { "epoch": 41.76136363636363, "grad_norm": 1.0732311010360718, "learning_rate": 0.0001, "loss": 0.0744, "step": 14700 }, { "epoch": 41.78977272727273, "grad_norm": 0.9801560044288635, "learning_rate": 0.0001, "loss": 0.0713, "step": 14710 }, { "epoch": 41.81818181818182, "grad_norm": 0.952893853187561, "learning_rate": 0.0001, "loss": 0.0705, "step": 14720 }, { "epoch": 41.84659090909091, "grad_norm": 1.247277021408081, "learning_rate": 0.0001, "loss": 0.0722, "step": 14730 }, { "epoch": 41.875, "grad_norm": 1.1207205057144165, "learning_rate": 0.0001, "loss": 0.0691, "step": 14740 }, { "epoch": 41.90340909090909, "grad_norm": 1.397698163986206, "learning_rate": 0.0001, "loss": 0.0718, "step": 14750 }, { "epoch": 41.93181818181818, "grad_norm": 1.3861267566680908, "learning_rate": 0.0001, "loss": 0.0726, "step": 14760 }, { "epoch": 41.96022727272727, "grad_norm": 1.3560404777526855, "learning_rate": 0.0001, "loss": 0.0725, "step": 14770 }, { "epoch": 41.98863636363637, "grad_norm": 1.32721745967865, "learning_rate": 0.0001, "loss": 0.071, "step": 14780 }, { "epoch": 42.01704545454545, "grad_norm": 1.1854987144470215, "learning_rate": 0.0001, "loss": 0.0712, "step": 14790 }, { "epoch": 42.04545454545455, "grad_norm": 1.1440140008926392, "learning_rate": 0.0001, "loss": 0.0718, "step": 14800 }, { "epoch": 42.07386363636363, "grad_norm": 1.064915657043457, "learning_rate": 0.0001, "loss": 0.0687, "step": 14810 }, { "epoch": 42.10227272727273, "grad_norm": 1.279728889465332, "learning_rate": 0.0001, "loss": 0.0681, "step": 14820 }, { "epoch": 42.13068181818182, "grad_norm": 1.2349141836166382, "learning_rate": 0.0001, "loss": 0.0651, "step": 14830 }, { "epoch": 42.15909090909091, "grad_norm": 1.2165275812149048, "learning_rate": 0.0001, "loss": 0.0667, "step": 14840 }, { "epoch": 42.1875, "grad_norm": 1.3267191648483276, "learning_rate": 0.0001, "loss": 0.071, "step": 14850 }, { "epoch": 42.21590909090909, "grad_norm": 0.9966840147972107, "learning_rate": 0.0001, "loss": 0.0703, "step": 14860 }, { "epoch": 42.24431818181818, "grad_norm": 1.1279515027999878, "learning_rate": 0.0001, "loss": 0.068, "step": 14870 }, { "epoch": 42.27272727272727, "grad_norm": 1.3340145349502563, "learning_rate": 0.0001, "loss": 0.0705, "step": 14880 }, { "epoch": 42.30113636363637, "grad_norm": 1.193154215812683, "learning_rate": 0.0001, "loss": 0.0686, "step": 14890 }, { "epoch": 42.32954545454545, "grad_norm": 1.1050076484680176, "learning_rate": 0.0001, "loss": 0.0664, "step": 14900 }, { "epoch": 42.35795454545455, "grad_norm": 1.1995725631713867, "learning_rate": 0.0001, "loss": 0.0677, "step": 14910 }, { "epoch": 42.38636363636363, "grad_norm": 1.0086230039596558, "learning_rate": 0.0001, "loss": 0.0665, "step": 14920 }, { "epoch": 42.41477272727273, "grad_norm": 0.8045913577079773, "learning_rate": 0.0001, "loss": 0.0673, "step": 14930 }, { "epoch": 42.44318181818182, "grad_norm": 0.9728057980537415, "learning_rate": 0.0001, "loss": 0.0712, "step": 14940 }, { "epoch": 42.47159090909091, "grad_norm": 1.2215827703475952, "learning_rate": 0.0001, "loss": 0.0702, "step": 14950 }, { "epoch": 42.5, "grad_norm": 1.3105379343032837, "learning_rate": 0.0001, "loss": 0.0745, "step": 14960 }, { "epoch": 42.52840909090909, "grad_norm": 1.5222160816192627, "learning_rate": 0.0001, "loss": 0.0719, "step": 14970 }, { "epoch": 42.55681818181818, "grad_norm": 1.5078243017196655, "learning_rate": 0.0001, "loss": 0.0722, "step": 14980 }, { "epoch": 42.58522727272727, "grad_norm": 1.5456411838531494, "learning_rate": 0.0001, "loss": 0.0701, "step": 14990 }, { "epoch": 42.61363636363637, "grad_norm": 1.329354166984558, "learning_rate": 0.0001, "loss": 0.0727, "step": 15000 }, { "epoch": 42.64204545454545, "grad_norm": 1.4910674095153809, "learning_rate": 0.0001, "loss": 0.0684, "step": 15010 }, { "epoch": 42.67045454545455, "grad_norm": 1.3281280994415283, "learning_rate": 0.0001, "loss": 0.0707, "step": 15020 }, { "epoch": 42.69886363636363, "grad_norm": 1.312171220779419, "learning_rate": 0.0001, "loss": 0.0683, "step": 15030 }, { "epoch": 42.72727272727273, "grad_norm": 1.8369287252426147, "learning_rate": 0.0001, "loss": 0.0695, "step": 15040 }, { "epoch": 42.75568181818182, "grad_norm": 1.3710136413574219, "learning_rate": 0.0001, "loss": 0.0684, "step": 15050 }, { "epoch": 42.78409090909091, "grad_norm": 1.343110203742981, "learning_rate": 0.0001, "loss": 0.0685, "step": 15060 }, { "epoch": 42.8125, "grad_norm": 1.2409188747406006, "learning_rate": 0.0001, "loss": 0.0666, "step": 15070 }, { "epoch": 42.84090909090909, "grad_norm": 1.375400185585022, "learning_rate": 0.0001, "loss": 0.0678, "step": 15080 }, { "epoch": 42.86931818181818, "grad_norm": 1.4058459997177124, "learning_rate": 0.0001, "loss": 0.0686, "step": 15090 }, { "epoch": 42.89772727272727, "grad_norm": 1.4581125974655151, "learning_rate": 0.0001, "loss": 0.0726, "step": 15100 }, { "epoch": 42.92613636363637, "grad_norm": 1.129081130027771, "learning_rate": 0.0001, "loss": 0.0705, "step": 15110 }, { "epoch": 42.95454545454545, "grad_norm": 1.0302661657333374, "learning_rate": 0.0001, "loss": 0.0653, "step": 15120 }, { "epoch": 42.98295454545455, "grad_norm": 0.974215030670166, "learning_rate": 0.0001, "loss": 0.0671, "step": 15130 }, { "epoch": 43.01136363636363, "grad_norm": 1.0873677730560303, "learning_rate": 0.0001, "loss": 0.0674, "step": 15140 }, { "epoch": 43.03977272727273, "grad_norm": 1.3052396774291992, "learning_rate": 0.0001, "loss": 0.069, "step": 15150 }, { "epoch": 43.06818181818182, "grad_norm": 1.1269701719284058, "learning_rate": 0.0001, "loss": 0.0685, "step": 15160 }, { "epoch": 43.09659090909091, "grad_norm": 1.0936193466186523, "learning_rate": 0.0001, "loss": 0.0674, "step": 15170 }, { "epoch": 43.125, "grad_norm": 1.168410301208496, "learning_rate": 0.0001, "loss": 0.0705, "step": 15180 }, { "epoch": 43.15340909090909, "grad_norm": 1.151530385017395, "learning_rate": 0.0001, "loss": 0.0662, "step": 15190 }, { "epoch": 43.18181818181818, "grad_norm": 1.0683995485305786, "learning_rate": 0.0001, "loss": 0.07, "step": 15200 }, { "epoch": 43.21022727272727, "grad_norm": 1.1150685548782349, "learning_rate": 0.0001, "loss": 0.07, "step": 15210 }, { "epoch": 43.23863636363637, "grad_norm": 1.1715824604034424, "learning_rate": 0.0001, "loss": 0.0693, "step": 15220 }, { "epoch": 43.26704545454545, "grad_norm": 1.2279553413391113, "learning_rate": 0.0001, "loss": 0.0663, "step": 15230 }, { "epoch": 43.29545454545455, "grad_norm": 1.020858883857727, "learning_rate": 0.0001, "loss": 0.0716, "step": 15240 }, { "epoch": 43.32386363636363, "grad_norm": 1.4411025047302246, "learning_rate": 0.0001, "loss": 0.0724, "step": 15250 }, { "epoch": 43.35227272727273, "grad_norm": 1.0164995193481445, "learning_rate": 0.0001, "loss": 0.0726, "step": 15260 }, { "epoch": 43.38068181818182, "grad_norm": 1.092726469039917, "learning_rate": 0.0001, "loss": 0.0701, "step": 15270 }, { "epoch": 43.40909090909091, "grad_norm": 0.9105169773101807, "learning_rate": 0.0001, "loss": 0.0713, "step": 15280 }, { "epoch": 43.4375, "grad_norm": 1.0798728466033936, "learning_rate": 0.0001, "loss": 0.0711, "step": 15290 }, { "epoch": 43.46590909090909, "grad_norm": 1.08077871799469, "learning_rate": 0.0001, "loss": 0.0703, "step": 15300 }, { "epoch": 43.49431818181818, "grad_norm": 1.2410343885421753, "learning_rate": 0.0001, "loss": 0.0712, "step": 15310 }, { "epoch": 43.52272727272727, "grad_norm": 1.0531221628189087, "learning_rate": 0.0001, "loss": 0.0677, "step": 15320 }, { "epoch": 43.55113636363637, "grad_norm": 1.0954484939575195, "learning_rate": 0.0001, "loss": 0.0706, "step": 15330 }, { "epoch": 43.57954545454545, "grad_norm": 1.155243158340454, "learning_rate": 0.0001, "loss": 0.0712, "step": 15340 }, { "epoch": 43.60795454545455, "grad_norm": 0.9618707299232483, "learning_rate": 0.0001, "loss": 0.0708, "step": 15350 }, { "epoch": 43.63636363636363, "grad_norm": 1.0885404348373413, "learning_rate": 0.0001, "loss": 0.0645, "step": 15360 }, { "epoch": 43.66477272727273, "grad_norm": 1.2146936655044556, "learning_rate": 0.0001, "loss": 0.0696, "step": 15370 }, { "epoch": 43.69318181818182, "grad_norm": 1.3160111904144287, "learning_rate": 0.0001, "loss": 0.0703, "step": 15380 }, { "epoch": 43.72159090909091, "grad_norm": 1.0108137130737305, "learning_rate": 0.0001, "loss": 0.0734, "step": 15390 }, { "epoch": 43.75, "grad_norm": 1.0981762409210205, "learning_rate": 0.0001, "loss": 0.0683, "step": 15400 }, { "epoch": 43.77840909090909, "grad_norm": 1.1093978881835938, "learning_rate": 0.0001, "loss": 0.0691, "step": 15410 }, { "epoch": 43.80681818181818, "grad_norm": 1.1320979595184326, "learning_rate": 0.0001, "loss": 0.0704, "step": 15420 }, { "epoch": 43.83522727272727, "grad_norm": 1.089289665222168, "learning_rate": 0.0001, "loss": 0.069, "step": 15430 }, { "epoch": 43.86363636363637, "grad_norm": 1.138124704360962, "learning_rate": 0.0001, "loss": 0.0681, "step": 15440 }, { "epoch": 43.89204545454545, "grad_norm": 1.080330491065979, "learning_rate": 0.0001, "loss": 0.0678, "step": 15450 }, { "epoch": 43.92045454545455, "grad_norm": 0.9879652857780457, "learning_rate": 0.0001, "loss": 0.0728, "step": 15460 }, { "epoch": 43.94886363636363, "grad_norm": 1.2704243659973145, "learning_rate": 0.0001, "loss": 0.0671, "step": 15470 }, { "epoch": 43.97727272727273, "grad_norm": 1.0896133184432983, "learning_rate": 0.0001, "loss": 0.0709, "step": 15480 }, { "epoch": 44.00568181818182, "grad_norm": 1.3182461261749268, "learning_rate": 0.0001, "loss": 0.0697, "step": 15490 }, { "epoch": 44.03409090909091, "grad_norm": 1.1766080856323242, "learning_rate": 0.0001, "loss": 0.0683, "step": 15500 }, { "epoch": 44.0625, "grad_norm": 1.1963214874267578, "learning_rate": 0.0001, "loss": 0.0729, "step": 15510 }, { "epoch": 44.09090909090909, "grad_norm": 1.32454252243042, "learning_rate": 0.0001, "loss": 0.0702, "step": 15520 }, { "epoch": 44.11931818181818, "grad_norm": 0.9734973907470703, "learning_rate": 0.0001, "loss": 0.0722, "step": 15530 }, { "epoch": 44.14772727272727, "grad_norm": 0.9227120280265808, "learning_rate": 0.0001, "loss": 0.0679, "step": 15540 }, { "epoch": 44.17613636363637, "grad_norm": 1.0899708271026611, "learning_rate": 0.0001, "loss": 0.0698, "step": 15550 }, { "epoch": 44.20454545454545, "grad_norm": 0.9477924704551697, "learning_rate": 0.0001, "loss": 0.0739, "step": 15560 }, { "epoch": 44.23295454545455, "grad_norm": 0.9881532788276672, "learning_rate": 0.0001, "loss": 0.0682, "step": 15570 }, { "epoch": 44.26136363636363, "grad_norm": 1.1461341381072998, "learning_rate": 0.0001, "loss": 0.0682, "step": 15580 }, { "epoch": 44.28977272727273, "grad_norm": 1.1203874349594116, "learning_rate": 0.0001, "loss": 0.0668, "step": 15590 }, { "epoch": 44.31818181818182, "grad_norm": 1.113659381866455, "learning_rate": 0.0001, "loss": 0.0714, "step": 15600 }, { "epoch": 44.34659090909091, "grad_norm": 0.9644593596458435, "learning_rate": 0.0001, "loss": 0.0665, "step": 15610 }, { "epoch": 44.375, "grad_norm": 1.232541561126709, "learning_rate": 0.0001, "loss": 0.0693, "step": 15620 }, { "epoch": 44.40340909090909, "grad_norm": 1.1204942464828491, "learning_rate": 0.0001, "loss": 0.0671, "step": 15630 }, { "epoch": 44.43181818181818, "grad_norm": 1.2313846349716187, "learning_rate": 0.0001, "loss": 0.0711, "step": 15640 }, { "epoch": 44.46022727272727, "grad_norm": 1.1166387796401978, "learning_rate": 0.0001, "loss": 0.0648, "step": 15650 }, { "epoch": 44.48863636363637, "grad_norm": 1.1295506954193115, "learning_rate": 0.0001, "loss": 0.0706, "step": 15660 }, { "epoch": 44.51704545454545, "grad_norm": 1.103320598602295, "learning_rate": 0.0001, "loss": 0.0692, "step": 15670 }, { "epoch": 44.54545454545455, "grad_norm": 0.8729053139686584, "learning_rate": 0.0001, "loss": 0.0701, "step": 15680 }, { "epoch": 44.57386363636363, "grad_norm": 0.8826537728309631, "learning_rate": 0.0001, "loss": 0.0729, "step": 15690 }, { "epoch": 44.60227272727273, "grad_norm": 1.1283091306686401, "learning_rate": 0.0001, "loss": 0.0691, "step": 15700 }, { "epoch": 44.63068181818182, "grad_norm": 1.0607900619506836, "learning_rate": 0.0001, "loss": 0.0719, "step": 15710 }, { "epoch": 44.65909090909091, "grad_norm": 0.8510501384735107, "learning_rate": 0.0001, "loss": 0.0731, "step": 15720 }, { "epoch": 44.6875, "grad_norm": 0.9762911796569824, "learning_rate": 0.0001, "loss": 0.0685, "step": 15730 }, { "epoch": 44.71590909090909, "grad_norm": 0.9683955907821655, "learning_rate": 0.0001, "loss": 0.0732, "step": 15740 }, { "epoch": 44.74431818181818, "grad_norm": 1.0100533962249756, "learning_rate": 0.0001, "loss": 0.0707, "step": 15750 }, { "epoch": 44.77272727272727, "grad_norm": 0.860872745513916, "learning_rate": 0.0001, "loss": 0.0674, "step": 15760 }, { "epoch": 44.80113636363637, "grad_norm": 1.065424919128418, "learning_rate": 0.0001, "loss": 0.0706, "step": 15770 }, { "epoch": 44.82954545454545, "grad_norm": 0.9998086094856262, "learning_rate": 0.0001, "loss": 0.0727, "step": 15780 }, { "epoch": 44.85795454545455, "grad_norm": 1.1579582691192627, "learning_rate": 0.0001, "loss": 0.0688, "step": 15790 }, { "epoch": 44.88636363636363, "grad_norm": 1.0913549661636353, "learning_rate": 0.0001, "loss": 0.0713, "step": 15800 }, { "epoch": 44.91477272727273, "grad_norm": 1.1674264669418335, "learning_rate": 0.0001, "loss": 0.0695, "step": 15810 }, { "epoch": 44.94318181818182, "grad_norm": 1.1136603355407715, "learning_rate": 0.0001, "loss": 0.0698, "step": 15820 }, { "epoch": 44.97159090909091, "grad_norm": 1.3215253353118896, "learning_rate": 0.0001, "loss": 0.0709, "step": 15830 }, { "epoch": 45.0, "grad_norm": 1.308078646659851, "learning_rate": 0.0001, "loss": 0.0685, "step": 15840 }, { "epoch": 45.02840909090909, "grad_norm": 1.174768090248108, "learning_rate": 0.0001, "loss": 0.072, "step": 15850 }, { "epoch": 45.05681818181818, "grad_norm": 1.1076934337615967, "learning_rate": 0.0001, "loss": 0.067, "step": 15860 }, { "epoch": 45.08522727272727, "grad_norm": 1.202553629875183, "learning_rate": 0.0001, "loss": 0.0698, "step": 15870 }, { "epoch": 45.11363636363637, "grad_norm": 1.2212430238723755, "learning_rate": 0.0001, "loss": 0.0671, "step": 15880 }, { "epoch": 45.14204545454545, "grad_norm": 1.785838007926941, "learning_rate": 0.0001, "loss": 0.0698, "step": 15890 }, { "epoch": 45.17045454545455, "grad_norm": 1.2640763521194458, "learning_rate": 0.0001, "loss": 0.0717, "step": 15900 }, { "epoch": 45.19886363636363, "grad_norm": 1.1825248003005981, "learning_rate": 0.0001, "loss": 0.0646, "step": 15910 }, { "epoch": 45.22727272727273, "grad_norm": 1.1265792846679688, "learning_rate": 0.0001, "loss": 0.0666, "step": 15920 }, { "epoch": 45.25568181818182, "grad_norm": 1.1798592805862427, "learning_rate": 0.0001, "loss": 0.0645, "step": 15930 }, { "epoch": 45.28409090909091, "grad_norm": 0.996825635433197, "learning_rate": 0.0001, "loss": 0.0659, "step": 15940 }, { "epoch": 45.3125, "grad_norm": 1.1232649087905884, "learning_rate": 0.0001, "loss": 0.0699, "step": 15950 }, { "epoch": 45.34090909090909, "grad_norm": 1.0283252000808716, "learning_rate": 0.0001, "loss": 0.0697, "step": 15960 }, { "epoch": 45.36931818181818, "grad_norm": 1.0725383758544922, "learning_rate": 0.0001, "loss": 0.0655, "step": 15970 }, { "epoch": 45.39772727272727, "grad_norm": 0.88676917552948, "learning_rate": 0.0001, "loss": 0.068, "step": 15980 }, { "epoch": 45.42613636363637, "grad_norm": 0.8896072506904602, "learning_rate": 0.0001, "loss": 0.0689, "step": 15990 }, { "epoch": 45.45454545454545, "grad_norm": 1.008349061012268, "learning_rate": 0.0001, "loss": 0.0664, "step": 16000 }, { "epoch": 45.48295454545455, "grad_norm": 0.9974130988121033, "learning_rate": 0.0001, "loss": 0.0672, "step": 16010 }, { "epoch": 45.51136363636363, "grad_norm": 1.0345064401626587, "learning_rate": 0.0001, "loss": 0.0678, "step": 16020 }, { "epoch": 45.53977272727273, "grad_norm": 1.2052901983261108, "learning_rate": 0.0001, "loss": 0.0655, "step": 16030 }, { "epoch": 45.56818181818182, "grad_norm": 1.0479304790496826, "learning_rate": 0.0001, "loss": 0.0677, "step": 16040 }, { "epoch": 45.59659090909091, "grad_norm": 1.1516709327697754, "learning_rate": 0.0001, "loss": 0.0724, "step": 16050 }, { "epoch": 45.625, "grad_norm": 1.3962410688400269, "learning_rate": 0.0001, "loss": 0.0685, "step": 16060 }, { "epoch": 45.65340909090909, "grad_norm": 1.3850163221359253, "learning_rate": 0.0001, "loss": 0.0698, "step": 16070 }, { "epoch": 45.68181818181818, "grad_norm": 1.6799479722976685, "learning_rate": 0.0001, "loss": 0.069, "step": 16080 }, { "epoch": 45.71022727272727, "grad_norm": 0.9734259247779846, "learning_rate": 0.0001, "loss": 0.0663, "step": 16090 }, { "epoch": 45.73863636363637, "grad_norm": 1.6348193883895874, "learning_rate": 0.0001, "loss": 0.0656, "step": 16100 }, { "epoch": 45.76704545454545, "grad_norm": 1.480636715888977, "learning_rate": 0.0001, "loss": 0.0638, "step": 16110 }, { "epoch": 45.79545454545455, "grad_norm": 1.3442875146865845, "learning_rate": 0.0001, "loss": 0.0707, "step": 16120 }, { "epoch": 45.82386363636363, "grad_norm": 1.4665964841842651, "learning_rate": 0.0001, "loss": 0.065, "step": 16130 }, { "epoch": 45.85227272727273, "grad_norm": 1.2870675325393677, "learning_rate": 0.0001, "loss": 0.0657, "step": 16140 }, { "epoch": 45.88068181818182, "grad_norm": 1.2831790447235107, "learning_rate": 0.0001, "loss": 0.0655, "step": 16150 }, { "epoch": 45.90909090909091, "grad_norm": 1.2384798526763916, "learning_rate": 0.0001, "loss": 0.0691, "step": 16160 }, { "epoch": 45.9375, "grad_norm": 1.0735738277435303, "learning_rate": 0.0001, "loss": 0.0671, "step": 16170 }, { "epoch": 45.96590909090909, "grad_norm": 1.3313257694244385, "learning_rate": 0.0001, "loss": 0.0643, "step": 16180 }, { "epoch": 45.99431818181818, "grad_norm": 1.170076608657837, "learning_rate": 0.0001, "loss": 0.068, "step": 16190 }, { "epoch": 46.02272727272727, "grad_norm": 1.383847713470459, "learning_rate": 0.0001, "loss": 0.0648, "step": 16200 }, { "epoch": 46.05113636363637, "grad_norm": 1.4958339929580688, "learning_rate": 0.0001, "loss": 0.0684, "step": 16210 }, { "epoch": 46.07954545454545, "grad_norm": 0.9990864992141724, "learning_rate": 0.0001, "loss": 0.0663, "step": 16220 }, { "epoch": 46.10795454545455, "grad_norm": 1.1924002170562744, "learning_rate": 0.0001, "loss": 0.0653, "step": 16230 }, { "epoch": 46.13636363636363, "grad_norm": 1.2387312650680542, "learning_rate": 0.0001, "loss": 0.0637, "step": 16240 }, { "epoch": 46.16477272727273, "grad_norm": 1.2327535152435303, "learning_rate": 0.0001, "loss": 0.0668, "step": 16250 }, { "epoch": 46.19318181818182, "grad_norm": 1.0648465156555176, "learning_rate": 0.0001, "loss": 0.0661, "step": 16260 }, { "epoch": 46.22159090909091, "grad_norm": 0.9485817551612854, "learning_rate": 0.0001, "loss": 0.0655, "step": 16270 }, { "epoch": 46.25, "grad_norm": 1.2529845237731934, "learning_rate": 0.0001, "loss": 0.0652, "step": 16280 }, { "epoch": 46.27840909090909, "grad_norm": 1.0558775663375854, "learning_rate": 0.0001, "loss": 0.0631, "step": 16290 }, { "epoch": 46.30681818181818, "grad_norm": 1.1467417478561401, "learning_rate": 0.0001, "loss": 0.0659, "step": 16300 }, { "epoch": 46.33522727272727, "grad_norm": 1.4327044486999512, "learning_rate": 0.0001, "loss": 0.0664, "step": 16310 }, { "epoch": 46.36363636363637, "grad_norm": 1.25315260887146, "learning_rate": 0.0001, "loss": 0.0721, "step": 16320 }, { "epoch": 46.39204545454545, "grad_norm": 1.2470471858978271, "learning_rate": 0.0001, "loss": 0.066, "step": 16330 }, { "epoch": 46.42045454545455, "grad_norm": 1.2044808864593506, "learning_rate": 0.0001, "loss": 0.0679, "step": 16340 }, { "epoch": 46.44886363636363, "grad_norm": 1.3889199495315552, "learning_rate": 0.0001, "loss": 0.0621, "step": 16350 }, { "epoch": 46.47727272727273, "grad_norm": 1.1649527549743652, "learning_rate": 0.0001, "loss": 0.0651, "step": 16360 }, { "epoch": 46.50568181818182, "grad_norm": 1.0425108671188354, "learning_rate": 0.0001, "loss": 0.0621, "step": 16370 }, { "epoch": 46.53409090909091, "grad_norm": 1.1113712787628174, "learning_rate": 0.0001, "loss": 0.0634, "step": 16380 }, { "epoch": 46.5625, "grad_norm": 1.1670324802398682, "learning_rate": 0.0001, "loss": 0.0642, "step": 16390 }, { "epoch": 46.59090909090909, "grad_norm": 1.195335030555725, "learning_rate": 0.0001, "loss": 0.0672, "step": 16400 }, { "epoch": 46.61931818181818, "grad_norm": 1.1802396774291992, "learning_rate": 0.0001, "loss": 0.0678, "step": 16410 }, { "epoch": 46.64772727272727, "grad_norm": 1.2869985103607178, "learning_rate": 0.0001, "loss": 0.0655, "step": 16420 }, { "epoch": 46.67613636363637, "grad_norm": 1.2032572031021118, "learning_rate": 0.0001, "loss": 0.0651, "step": 16430 }, { "epoch": 46.70454545454545, "grad_norm": 1.116356611251831, "learning_rate": 0.0001, "loss": 0.0647, "step": 16440 }, { "epoch": 46.73295454545455, "grad_norm": 1.3527079820632935, "learning_rate": 0.0001, "loss": 0.0686, "step": 16450 }, { "epoch": 46.76136363636363, "grad_norm": 1.105823040008545, "learning_rate": 0.0001, "loss": 0.0654, "step": 16460 }, { "epoch": 46.78977272727273, "grad_norm": 1.1165571212768555, "learning_rate": 0.0001, "loss": 0.0661, "step": 16470 }, { "epoch": 46.81818181818182, "grad_norm": 1.0524123907089233, "learning_rate": 0.0001, "loss": 0.0662, "step": 16480 }, { "epoch": 46.84659090909091, "grad_norm": 1.0740056037902832, "learning_rate": 0.0001, "loss": 0.0673, "step": 16490 }, { "epoch": 46.875, "grad_norm": 1.0902903079986572, "learning_rate": 0.0001, "loss": 0.0713, "step": 16500 }, { "epoch": 46.90340909090909, "grad_norm": 1.1585133075714111, "learning_rate": 0.0001, "loss": 0.0668, "step": 16510 }, { "epoch": 46.93181818181818, "grad_norm": 0.8834289312362671, "learning_rate": 0.0001, "loss": 0.0683, "step": 16520 }, { "epoch": 46.96022727272727, "grad_norm": 1.0335935354232788, "learning_rate": 0.0001, "loss": 0.0684, "step": 16530 }, { "epoch": 46.98863636363637, "grad_norm": 1.1123617887496948, "learning_rate": 0.0001, "loss": 0.0665, "step": 16540 }, { "epoch": 47.01704545454545, "grad_norm": 1.012212872505188, "learning_rate": 0.0001, "loss": 0.0663, "step": 16550 }, { "epoch": 47.04545454545455, "grad_norm": 0.8919275999069214, "learning_rate": 0.0001, "loss": 0.0684, "step": 16560 }, { "epoch": 47.07386363636363, "grad_norm": 1.0549639463424683, "learning_rate": 0.0001, "loss": 0.0699, "step": 16570 }, { "epoch": 47.10227272727273, "grad_norm": 1.048153281211853, "learning_rate": 0.0001, "loss": 0.0657, "step": 16580 }, { "epoch": 47.13068181818182, "grad_norm": 0.9343200325965881, "learning_rate": 0.0001, "loss": 0.0698, "step": 16590 }, { "epoch": 47.15909090909091, "grad_norm": 0.970174252986908, "learning_rate": 0.0001, "loss": 0.0691, "step": 16600 }, { "epoch": 47.1875, "grad_norm": 0.8680684566497803, "learning_rate": 0.0001, "loss": 0.0669, "step": 16610 }, { "epoch": 47.21590909090909, "grad_norm": 1.1044062376022339, "learning_rate": 0.0001, "loss": 0.0678, "step": 16620 }, { "epoch": 47.24431818181818, "grad_norm": 1.067230224609375, "learning_rate": 0.0001, "loss": 0.0694, "step": 16630 }, { "epoch": 47.27272727272727, "grad_norm": 1.0942895412445068, "learning_rate": 0.0001, "loss": 0.0677, "step": 16640 }, { "epoch": 47.30113636363637, "grad_norm": 0.955245852470398, "learning_rate": 0.0001, "loss": 0.0649, "step": 16650 }, { "epoch": 47.32954545454545, "grad_norm": 0.9594484567642212, "learning_rate": 0.0001, "loss": 0.0675, "step": 16660 }, { "epoch": 47.35795454545455, "grad_norm": 1.0073819160461426, "learning_rate": 0.0001, "loss": 0.0635, "step": 16670 }, { "epoch": 47.38636363636363, "grad_norm": 1.0799858570098877, "learning_rate": 0.0001, "loss": 0.0673, "step": 16680 }, { "epoch": 47.41477272727273, "grad_norm": 1.113233208656311, "learning_rate": 0.0001, "loss": 0.0686, "step": 16690 }, { "epoch": 47.44318181818182, "grad_norm": 0.9608368277549744, "learning_rate": 0.0001, "loss": 0.0644, "step": 16700 }, { "epoch": 47.47159090909091, "grad_norm": 0.9679139256477356, "learning_rate": 0.0001, "loss": 0.0648, "step": 16710 }, { "epoch": 47.5, "grad_norm": 0.9586361050605774, "learning_rate": 0.0001, "loss": 0.0636, "step": 16720 }, { "epoch": 47.52840909090909, "grad_norm": 0.7961944341659546, "learning_rate": 0.0001, "loss": 0.0649, "step": 16730 }, { "epoch": 47.55681818181818, "grad_norm": 0.891774594783783, "learning_rate": 0.0001, "loss": 0.0647, "step": 16740 }, { "epoch": 47.58522727272727, "grad_norm": 1.0160497426986694, "learning_rate": 0.0001, "loss": 0.062, "step": 16750 }, { "epoch": 47.61363636363637, "grad_norm": 1.0420070886611938, "learning_rate": 0.0001, "loss": 0.0662, "step": 16760 }, { "epoch": 47.64204545454545, "grad_norm": 1.0268901586532593, "learning_rate": 0.0001, "loss": 0.0671, "step": 16770 }, { "epoch": 47.67045454545455, "grad_norm": 0.8848260641098022, "learning_rate": 0.0001, "loss": 0.0675, "step": 16780 }, { "epoch": 47.69886363636363, "grad_norm": 0.9918054938316345, "learning_rate": 0.0001, "loss": 0.0667, "step": 16790 }, { "epoch": 47.72727272727273, "grad_norm": 0.9512577056884766, "learning_rate": 0.0001, "loss": 0.0699, "step": 16800 }, { "epoch": 47.75568181818182, "grad_norm": 1.1260731220245361, "learning_rate": 0.0001, "loss": 0.0679, "step": 16810 }, { "epoch": 47.78409090909091, "grad_norm": 1.003982424736023, "learning_rate": 0.0001, "loss": 0.0644, "step": 16820 }, { "epoch": 47.8125, "grad_norm": 0.9938884973526001, "learning_rate": 0.0001, "loss": 0.0677, "step": 16830 }, { "epoch": 47.84090909090909, "grad_norm": 1.222053050994873, "learning_rate": 0.0001, "loss": 0.0631, "step": 16840 }, { "epoch": 47.86931818181818, "grad_norm": 0.9429041147232056, "learning_rate": 0.0001, "loss": 0.0656, "step": 16850 }, { "epoch": 47.89772727272727, "grad_norm": 1.0304569005966187, "learning_rate": 0.0001, "loss": 0.0632, "step": 16860 }, { "epoch": 47.92613636363637, "grad_norm": 0.8651162981987, "learning_rate": 0.0001, "loss": 0.0638, "step": 16870 }, { "epoch": 47.95454545454545, "grad_norm": 1.1727617979049683, "learning_rate": 0.0001, "loss": 0.0641, "step": 16880 }, { "epoch": 47.98295454545455, "grad_norm": 0.9716474413871765, "learning_rate": 0.0001, "loss": 0.0672, "step": 16890 }, { "epoch": 48.01136363636363, "grad_norm": 0.9646078944206238, "learning_rate": 0.0001, "loss": 0.0642, "step": 16900 }, { "epoch": 48.03977272727273, "grad_norm": 0.7903724312782288, "learning_rate": 0.0001, "loss": 0.0652, "step": 16910 }, { "epoch": 48.06818181818182, "grad_norm": 0.7010796070098877, "learning_rate": 0.0001, "loss": 0.065, "step": 16920 }, { "epoch": 48.09659090909091, "grad_norm": 0.9780798554420471, "learning_rate": 0.0001, "loss": 0.064, "step": 16930 }, { "epoch": 48.125, "grad_norm": 0.8578932285308838, "learning_rate": 0.0001, "loss": 0.065, "step": 16940 }, { "epoch": 48.15340909090909, "grad_norm": 1.0814779996871948, "learning_rate": 0.0001, "loss": 0.0629, "step": 16950 }, { "epoch": 48.18181818181818, "grad_norm": 1.1047223806381226, "learning_rate": 0.0001, "loss": 0.0659, "step": 16960 }, { "epoch": 48.21022727272727, "grad_norm": 1.0128185749053955, "learning_rate": 0.0001, "loss": 0.07, "step": 16970 }, { "epoch": 48.23863636363637, "grad_norm": 1.191439151763916, "learning_rate": 0.0001, "loss": 0.0683, "step": 16980 }, { "epoch": 48.26704545454545, "grad_norm": 1.02851140499115, "learning_rate": 0.0001, "loss": 0.0646, "step": 16990 }, { "epoch": 48.29545454545455, "grad_norm": 0.9317130446434021, "learning_rate": 0.0001, "loss": 0.0666, "step": 17000 }, { "epoch": 48.32386363636363, "grad_norm": 0.8646169900894165, "learning_rate": 0.0001, "loss": 0.0645, "step": 17010 }, { "epoch": 48.35227272727273, "grad_norm": 0.8524001240730286, "learning_rate": 0.0001, "loss": 0.0654, "step": 17020 }, { "epoch": 48.38068181818182, "grad_norm": 0.7672250270843506, "learning_rate": 0.0001, "loss": 0.0676, "step": 17030 }, { "epoch": 48.40909090909091, "grad_norm": 0.9044290781021118, "learning_rate": 0.0001, "loss": 0.0657, "step": 17040 }, { "epoch": 48.4375, "grad_norm": 0.8433730602264404, "learning_rate": 0.0001, "loss": 0.0653, "step": 17050 }, { "epoch": 48.46590909090909, "grad_norm": 0.7054641842842102, "learning_rate": 0.0001, "loss": 0.0642, "step": 17060 }, { "epoch": 48.49431818181818, "grad_norm": 0.7396852374076843, "learning_rate": 0.0001, "loss": 0.0654, "step": 17070 }, { "epoch": 48.52272727272727, "grad_norm": 0.8507287502288818, "learning_rate": 0.0001, "loss": 0.063, "step": 17080 }, { "epoch": 48.55113636363637, "grad_norm": 0.7744329571723938, "learning_rate": 0.0001, "loss": 0.0652, "step": 17090 }, { "epoch": 48.57954545454545, "grad_norm": 0.8510982990264893, "learning_rate": 0.0001, "loss": 0.0652, "step": 17100 }, { "epoch": 48.60795454545455, "grad_norm": 0.9002107977867126, "learning_rate": 0.0001, "loss": 0.0652, "step": 17110 }, { "epoch": 48.63636363636363, "grad_norm": 1.3234931230545044, "learning_rate": 0.0001, "loss": 0.0644, "step": 17120 }, { "epoch": 48.66477272727273, "grad_norm": 1.2231804132461548, "learning_rate": 0.0001, "loss": 0.0651, "step": 17130 }, { "epoch": 48.69318181818182, "grad_norm": 1.0435932874679565, "learning_rate": 0.0001, "loss": 0.063, "step": 17140 }, { "epoch": 48.72159090909091, "grad_norm": 1.1819216012954712, "learning_rate": 0.0001, "loss": 0.0639, "step": 17150 }, { "epoch": 48.75, "grad_norm": 1.3706990480422974, "learning_rate": 0.0001, "loss": 0.0649, "step": 17160 }, { "epoch": 48.77840909090909, "grad_norm": 1.0659633874893188, "learning_rate": 0.0001, "loss": 0.0643, "step": 17170 }, { "epoch": 48.80681818181818, "grad_norm": 1.1273925304412842, "learning_rate": 0.0001, "loss": 0.0647, "step": 17180 }, { "epoch": 48.83522727272727, "grad_norm": 1.0904277563095093, "learning_rate": 0.0001, "loss": 0.0629, "step": 17190 }, { "epoch": 48.86363636363637, "grad_norm": 1.4346485137939453, "learning_rate": 0.0001, "loss": 0.0658, "step": 17200 }, { "epoch": 48.89204545454545, "grad_norm": 1.2752107381820679, "learning_rate": 0.0001, "loss": 0.0666, "step": 17210 }, { "epoch": 48.92045454545455, "grad_norm": 1.179890751838684, "learning_rate": 0.0001, "loss": 0.0634, "step": 17220 }, { "epoch": 48.94886363636363, "grad_norm": 1.0924259424209595, "learning_rate": 0.0001, "loss": 0.063, "step": 17230 }, { "epoch": 48.97727272727273, "grad_norm": 1.2451024055480957, "learning_rate": 0.0001, "loss": 0.063, "step": 17240 }, { "epoch": 49.00568181818182, "grad_norm": 1.29100501537323, "learning_rate": 0.0001, "loss": 0.0625, "step": 17250 }, { "epoch": 49.03409090909091, "grad_norm": 1.175595998764038, "learning_rate": 0.0001, "loss": 0.065, "step": 17260 }, { "epoch": 49.0625, "grad_norm": 1.084436297416687, "learning_rate": 0.0001, "loss": 0.0617, "step": 17270 }, { "epoch": 49.09090909090909, "grad_norm": 1.1387672424316406, "learning_rate": 0.0001, "loss": 0.0597, "step": 17280 }, { "epoch": 49.11931818181818, "grad_norm": 1.0709339380264282, "learning_rate": 0.0001, "loss": 0.0642, "step": 17290 }, { "epoch": 49.14772727272727, "grad_norm": 1.1278772354125977, "learning_rate": 0.0001, "loss": 0.0592, "step": 17300 }, { "epoch": 49.17613636363637, "grad_norm": 1.2357215881347656, "learning_rate": 0.0001, "loss": 0.0645, "step": 17310 }, { "epoch": 49.20454545454545, "grad_norm": 1.3850127458572388, "learning_rate": 0.0001, "loss": 0.0643, "step": 17320 }, { "epoch": 49.23295454545455, "grad_norm": 1.5667473077774048, "learning_rate": 0.0001, "loss": 0.064, "step": 17330 }, { "epoch": 49.26136363636363, "grad_norm": 2.0329859256744385, "learning_rate": 0.0001, "loss": 0.0638, "step": 17340 }, { "epoch": 49.28977272727273, "grad_norm": 1.9868841171264648, "learning_rate": 0.0001, "loss": 0.0619, "step": 17350 }, { "epoch": 49.31818181818182, "grad_norm": 1.650707721710205, "learning_rate": 0.0001, "loss": 0.0622, "step": 17360 }, { "epoch": 49.34659090909091, "grad_norm": 1.6646281480789185, "learning_rate": 0.0001, "loss": 0.0603, "step": 17370 }, { "epoch": 49.375, "grad_norm": 1.3677852153778076, "learning_rate": 0.0001, "loss": 0.061, "step": 17380 }, { "epoch": 49.40340909090909, "grad_norm": 1.33896005153656, "learning_rate": 0.0001, "loss": 0.0588, "step": 17390 }, { "epoch": 49.43181818181818, "grad_norm": 1.3581352233886719, "learning_rate": 0.0001, "loss": 0.0617, "step": 17400 }, { "epoch": 49.46022727272727, "grad_norm": 1.3389812707901, "learning_rate": 0.0001, "loss": 0.0574, "step": 17410 }, { "epoch": 49.48863636363637, "grad_norm": 1.304565191268921, "learning_rate": 0.0001, "loss": 0.0628, "step": 17420 }, { "epoch": 49.51704545454545, "grad_norm": 1.9376806020736694, "learning_rate": 0.0001, "loss": 0.0611, "step": 17430 }, { "epoch": 49.54545454545455, "grad_norm": 1.8597931861877441, "learning_rate": 0.0001, "loss": 0.0617, "step": 17440 }, { "epoch": 49.57386363636363, "grad_norm": 1.4572594165802002, "learning_rate": 0.0001, "loss": 0.0615, "step": 17450 }, { "epoch": 49.60227272727273, "grad_norm": 1.7948307991027832, "learning_rate": 0.0001, "loss": 0.0604, "step": 17460 }, { "epoch": 49.63068181818182, "grad_norm": 1.3871301412582397, "learning_rate": 0.0001, "loss": 0.0616, "step": 17470 }, { "epoch": 49.65909090909091, "grad_norm": 1.322991132736206, "learning_rate": 0.0001, "loss": 0.0601, "step": 17480 }, { "epoch": 49.6875, "grad_norm": 1.2961491346359253, "learning_rate": 0.0001, "loss": 0.0604, "step": 17490 }, { "epoch": 49.71590909090909, "grad_norm": 1.283707618713379, "learning_rate": 0.0001, "loss": 0.0609, "step": 17500 }, { "epoch": 49.74431818181818, "grad_norm": 1.142791986465454, "learning_rate": 0.0001, "loss": 0.0609, "step": 17510 }, { "epoch": 49.77272727272727, "grad_norm": 0.8002589344978333, "learning_rate": 0.0001, "loss": 0.0618, "step": 17520 }, { "epoch": 49.80113636363637, "grad_norm": 0.9746940732002258, "learning_rate": 0.0001, "loss": 0.0597, "step": 17530 }, { "epoch": 49.82954545454545, "grad_norm": 0.965501606464386, "learning_rate": 0.0001, "loss": 0.0646, "step": 17540 }, { "epoch": 49.85795454545455, "grad_norm": 1.053093671798706, "learning_rate": 0.0001, "loss": 0.0643, "step": 17550 }, { "epoch": 49.88636363636363, "grad_norm": 1.2206720113754272, "learning_rate": 0.0001, "loss": 0.0618, "step": 17560 }, { "epoch": 49.91477272727273, "grad_norm": 1.094285488128662, "learning_rate": 0.0001, "loss": 0.0606, "step": 17570 }, { "epoch": 49.94318181818182, "grad_norm": 1.9030991792678833, "learning_rate": 0.0001, "loss": 0.0679, "step": 17580 }, { "epoch": 49.97159090909091, "grad_norm": 1.452059030532837, "learning_rate": 0.0001, "loss": 0.0687, "step": 17590 }, { "epoch": 50.0, "grad_norm": 1.4255893230438232, "learning_rate": 0.0001, "loss": 0.0641, "step": 17600 }, { "epoch": 50.02840909090909, "grad_norm": 1.547782063484192, "learning_rate": 0.0001, "loss": 0.0607, "step": 17610 }, { "epoch": 50.05681818181818, "grad_norm": 1.533228874206543, "learning_rate": 0.0001, "loss": 0.0623, "step": 17620 }, { "epoch": 50.08522727272727, "grad_norm": 2.1276297569274902, "learning_rate": 0.0001, "loss": 0.062, "step": 17630 }, { "epoch": 50.11363636363637, "grad_norm": 1.5320310592651367, "learning_rate": 0.0001, "loss": 0.059, "step": 17640 }, { "epoch": 50.14204545454545, "grad_norm": 1.4868521690368652, "learning_rate": 0.0001, "loss": 0.0583, "step": 17650 }, { "epoch": 50.17045454545455, "grad_norm": 1.3425029516220093, "learning_rate": 0.0001, "loss": 0.0625, "step": 17660 }, { "epoch": 50.19886363636363, "grad_norm": 1.683226466178894, "learning_rate": 0.0001, "loss": 0.0624, "step": 17670 }, { "epoch": 50.22727272727273, "grad_norm": 1.6359002590179443, "learning_rate": 0.0001, "loss": 0.0584, "step": 17680 }, { "epoch": 50.25568181818182, "grad_norm": 1.1186552047729492, "learning_rate": 0.0001, "loss": 0.0624, "step": 17690 }, { "epoch": 50.28409090909091, "grad_norm": 0.9917884469032288, "learning_rate": 0.0001, "loss": 0.0568, "step": 17700 }, { "epoch": 50.3125, "grad_norm": 1.3089747428894043, "learning_rate": 0.0001, "loss": 0.0608, "step": 17710 }, { "epoch": 50.34090909090909, "grad_norm": 1.094618320465088, "learning_rate": 0.0001, "loss": 0.0628, "step": 17720 }, { "epoch": 50.36931818181818, "grad_norm": 1.0189076662063599, "learning_rate": 0.0001, "loss": 0.061, "step": 17730 }, { "epoch": 50.39772727272727, "grad_norm": 1.0388256311416626, "learning_rate": 0.0001, "loss": 0.0642, "step": 17740 }, { "epoch": 50.42613636363637, "grad_norm": 1.2349307537078857, "learning_rate": 0.0001, "loss": 0.0614, "step": 17750 }, { "epoch": 50.45454545454545, "grad_norm": 1.1165496110916138, "learning_rate": 0.0001, "loss": 0.0615, "step": 17760 }, { "epoch": 50.48295454545455, "grad_norm": 1.3187179565429688, "learning_rate": 0.0001, "loss": 0.064, "step": 17770 }, { "epoch": 50.51136363636363, "grad_norm": 1.1244080066680908, "learning_rate": 0.0001, "loss": 0.0631, "step": 17780 }, { "epoch": 50.53977272727273, "grad_norm": 1.1014071702957153, "learning_rate": 0.0001, "loss": 0.0612, "step": 17790 }, { "epoch": 50.56818181818182, "grad_norm": 1.0804203748703003, "learning_rate": 0.0001, "loss": 0.0615, "step": 17800 }, { "epoch": 50.59659090909091, "grad_norm": 1.1172409057617188, "learning_rate": 0.0001, "loss": 0.0583, "step": 17810 }, { "epoch": 50.625, "grad_norm": 1.2156904935836792, "learning_rate": 0.0001, "loss": 0.0635, "step": 17820 }, { "epoch": 50.65340909090909, "grad_norm": 1.0518922805786133, "learning_rate": 0.0001, "loss": 0.0649, "step": 17830 }, { "epoch": 50.68181818181818, "grad_norm": 1.0572881698608398, "learning_rate": 0.0001, "loss": 0.0653, "step": 17840 }, { "epoch": 50.71022727272727, "grad_norm": 0.9975048899650574, "learning_rate": 0.0001, "loss": 0.0637, "step": 17850 }, { "epoch": 50.73863636363637, "grad_norm": 1.0606894493103027, "learning_rate": 0.0001, "loss": 0.0629, "step": 17860 }, { "epoch": 50.76704545454545, "grad_norm": 1.1085385084152222, "learning_rate": 0.0001, "loss": 0.0638, "step": 17870 }, { "epoch": 50.79545454545455, "grad_norm": 1.0079888105392456, "learning_rate": 0.0001, "loss": 0.0645, "step": 17880 }, { "epoch": 50.82386363636363, "grad_norm": 0.9119659662246704, "learning_rate": 0.0001, "loss": 0.0657, "step": 17890 }, { "epoch": 50.85227272727273, "grad_norm": 1.0529975891113281, "learning_rate": 0.0001, "loss": 0.0642, "step": 17900 }, { "epoch": 50.88068181818182, "grad_norm": 1.101491928100586, "learning_rate": 0.0001, "loss": 0.0642, "step": 17910 }, { "epoch": 50.90909090909091, "grad_norm": 1.049623966217041, "learning_rate": 0.0001, "loss": 0.0659, "step": 17920 }, { "epoch": 50.9375, "grad_norm": 0.855363130569458, "learning_rate": 0.0001, "loss": 0.0624, "step": 17930 }, { "epoch": 50.96590909090909, "grad_norm": 0.8307511210441589, "learning_rate": 0.0001, "loss": 0.0639, "step": 17940 }, { "epoch": 50.99431818181818, "grad_norm": 0.9840141534805298, "learning_rate": 0.0001, "loss": 0.0638, "step": 17950 }, { "epoch": 51.02272727272727, "grad_norm": 0.7411724328994751, "learning_rate": 0.0001, "loss": 0.0653, "step": 17960 }, { "epoch": 51.05113636363637, "grad_norm": 1.1654945611953735, "learning_rate": 0.0001, "loss": 0.0655, "step": 17970 }, { "epoch": 51.07954545454545, "grad_norm": 1.0566688776016235, "learning_rate": 0.0001, "loss": 0.0717, "step": 17980 }, { "epoch": 51.10795454545455, "grad_norm": 1.1673790216445923, "learning_rate": 0.0001, "loss": 0.0642, "step": 17990 }, { "epoch": 51.13636363636363, "grad_norm": 1.0396006107330322, "learning_rate": 0.0001, "loss": 0.0672, "step": 18000 }, { "epoch": 51.16477272727273, "grad_norm": 0.9316548705101013, "learning_rate": 0.0001, "loss": 0.0688, "step": 18010 }, { "epoch": 51.19318181818182, "grad_norm": 1.0011225938796997, "learning_rate": 0.0001, "loss": 0.0623, "step": 18020 }, { "epoch": 51.22159090909091, "grad_norm": 1.2169394493103027, "learning_rate": 0.0001, "loss": 0.0668, "step": 18030 }, { "epoch": 51.25, "grad_norm": 1.0183131694793701, "learning_rate": 0.0001, "loss": 0.0637, "step": 18040 }, { "epoch": 51.27840909090909, "grad_norm": 1.3156449794769287, "learning_rate": 0.0001, "loss": 0.0674, "step": 18050 }, { "epoch": 51.30681818181818, "grad_norm": 1.2456082105636597, "learning_rate": 0.0001, "loss": 0.0677, "step": 18060 }, { "epoch": 51.33522727272727, "grad_norm": 1.2302768230438232, "learning_rate": 0.0001, "loss": 0.064, "step": 18070 }, { "epoch": 51.36363636363637, "grad_norm": 1.3110893964767456, "learning_rate": 0.0001, "loss": 0.0643, "step": 18080 }, { "epoch": 51.39204545454545, "grad_norm": 1.0676565170288086, "learning_rate": 0.0001, "loss": 0.0631, "step": 18090 }, { "epoch": 51.42045454545455, "grad_norm": 0.8240692019462585, "learning_rate": 0.0001, "loss": 0.0613, "step": 18100 }, { "epoch": 51.44886363636363, "grad_norm": 1.0472469329833984, "learning_rate": 0.0001, "loss": 0.0635, "step": 18110 }, { "epoch": 51.47727272727273, "grad_norm": 1.0723049640655518, "learning_rate": 0.0001, "loss": 0.0626, "step": 18120 }, { "epoch": 51.50568181818182, "grad_norm": 0.8317203521728516, "learning_rate": 0.0001, "loss": 0.0631, "step": 18130 }, { "epoch": 51.53409090909091, "grad_norm": 0.8893545269966125, "learning_rate": 0.0001, "loss": 0.0642, "step": 18140 }, { "epoch": 51.5625, "grad_norm": 1.143960952758789, "learning_rate": 0.0001, "loss": 0.0645, "step": 18150 }, { "epoch": 51.59090909090909, "grad_norm": 1.1038347482681274, "learning_rate": 0.0001, "loss": 0.0654, "step": 18160 }, { "epoch": 51.61931818181818, "grad_norm": 0.9441390037536621, "learning_rate": 0.0001, "loss": 0.0614, "step": 18170 }, { "epoch": 51.64772727272727, "grad_norm": 1.1258492469787598, "learning_rate": 0.0001, "loss": 0.0609, "step": 18180 }, { "epoch": 51.67613636363637, "grad_norm": 1.1269819736480713, "learning_rate": 0.0001, "loss": 0.0637, "step": 18190 }, { "epoch": 51.70454545454545, "grad_norm": 0.8500455021858215, "learning_rate": 0.0001, "loss": 0.0653, "step": 18200 }, { "epoch": 51.73295454545455, "grad_norm": 0.8912470936775208, "learning_rate": 0.0001, "loss": 0.0639, "step": 18210 }, { "epoch": 51.76136363636363, "grad_norm": 1.0278549194335938, "learning_rate": 0.0001, "loss": 0.0603, "step": 18220 }, { "epoch": 51.78977272727273, "grad_norm": 0.9313192367553711, "learning_rate": 0.0001, "loss": 0.0656, "step": 18230 }, { "epoch": 51.81818181818182, "grad_norm": 0.8399008512496948, "learning_rate": 0.0001, "loss": 0.066, "step": 18240 }, { "epoch": 51.84659090909091, "grad_norm": 0.8020362257957458, "learning_rate": 0.0001, "loss": 0.0647, "step": 18250 }, { "epoch": 51.875, "grad_norm": 0.9596895575523376, "learning_rate": 0.0001, "loss": 0.0631, "step": 18260 }, { "epoch": 51.90340909090909, "grad_norm": 0.8371610045433044, "learning_rate": 0.0001, "loss": 0.0669, "step": 18270 }, { "epoch": 51.93181818181818, "grad_norm": 0.774612307548523, "learning_rate": 0.0001, "loss": 0.0637, "step": 18280 }, { "epoch": 51.96022727272727, "grad_norm": 0.9038988351821899, "learning_rate": 0.0001, "loss": 0.0662, "step": 18290 }, { "epoch": 51.98863636363637, "grad_norm": 1.150199055671692, "learning_rate": 0.0001, "loss": 0.0631, "step": 18300 }, { "epoch": 52.01704545454545, "grad_norm": 1.3316466808319092, "learning_rate": 0.0001, "loss": 0.0621, "step": 18310 }, { "epoch": 52.04545454545455, "grad_norm": 1.0422097444534302, "learning_rate": 0.0001, "loss": 0.0657, "step": 18320 }, { "epoch": 52.07386363636363, "grad_norm": 1.0511544942855835, "learning_rate": 0.0001, "loss": 0.064, "step": 18330 }, { "epoch": 52.10227272727273, "grad_norm": 0.9210748672485352, "learning_rate": 0.0001, "loss": 0.0624, "step": 18340 }, { "epoch": 52.13068181818182, "grad_norm": 1.0048185586929321, "learning_rate": 0.0001, "loss": 0.0614, "step": 18350 }, { "epoch": 52.15909090909091, "grad_norm": 1.0767287015914917, "learning_rate": 0.0001, "loss": 0.0613, "step": 18360 }, { "epoch": 52.1875, "grad_norm": 1.0105133056640625, "learning_rate": 0.0001, "loss": 0.0615, "step": 18370 }, { "epoch": 52.21590909090909, "grad_norm": 0.7021766304969788, "learning_rate": 0.0001, "loss": 0.068, "step": 18380 }, { "epoch": 52.24431818181818, "grad_norm": 0.8372295498847961, "learning_rate": 0.0001, "loss": 0.0624, "step": 18390 }, { "epoch": 52.27272727272727, "grad_norm": 0.7811925411224365, "learning_rate": 0.0001, "loss": 0.0649, "step": 18400 }, { "epoch": 52.30113636363637, "grad_norm": 0.821349024772644, "learning_rate": 0.0001, "loss": 0.0654, "step": 18410 }, { "epoch": 52.32954545454545, "grad_norm": 0.7928653359413147, "learning_rate": 0.0001, "loss": 0.0626, "step": 18420 }, { "epoch": 52.35795454545455, "grad_norm": 0.8362237215042114, "learning_rate": 0.0001, "loss": 0.0629, "step": 18430 }, { "epoch": 52.38636363636363, "grad_norm": 0.9375684261322021, "learning_rate": 0.0001, "loss": 0.0659, "step": 18440 }, { "epoch": 52.41477272727273, "grad_norm": 0.9780309796333313, "learning_rate": 0.0001, "loss": 0.0631, "step": 18450 }, { "epoch": 52.44318181818182, "grad_norm": 1.062294840812683, "learning_rate": 0.0001, "loss": 0.0665, "step": 18460 }, { "epoch": 52.47159090909091, "grad_norm": 1.0660057067871094, "learning_rate": 0.0001, "loss": 0.065, "step": 18470 }, { "epoch": 52.5, "grad_norm": 1.05930495262146, "learning_rate": 0.0001, "loss": 0.0626, "step": 18480 }, { "epoch": 52.52840909090909, "grad_norm": 0.8140740394592285, "learning_rate": 0.0001, "loss": 0.0615, "step": 18490 }, { "epoch": 52.55681818181818, "grad_norm": 1.0095051527023315, "learning_rate": 0.0001, "loss": 0.0605, "step": 18500 }, { "epoch": 52.58522727272727, "grad_norm": 0.9089073538780212, "learning_rate": 0.0001, "loss": 0.0616, "step": 18510 }, { "epoch": 52.61363636363637, "grad_norm": 0.8695672154426575, "learning_rate": 0.0001, "loss": 0.0623, "step": 18520 }, { "epoch": 52.64204545454545, "grad_norm": 1.0662381649017334, "learning_rate": 0.0001, "loss": 0.0639, "step": 18530 }, { "epoch": 52.67045454545455, "grad_norm": 1.0227075815200806, "learning_rate": 0.0001, "loss": 0.0632, "step": 18540 }, { "epoch": 52.69886363636363, "grad_norm": 1.003777265548706, "learning_rate": 0.0001, "loss": 0.0629, "step": 18550 }, { "epoch": 52.72727272727273, "grad_norm": 0.9030758738517761, "learning_rate": 0.0001, "loss": 0.0591, "step": 18560 }, { "epoch": 52.75568181818182, "grad_norm": 1.0003148317337036, "learning_rate": 0.0001, "loss": 0.0632, "step": 18570 }, { "epoch": 52.78409090909091, "grad_norm": 0.8785012364387512, "learning_rate": 0.0001, "loss": 0.0622, "step": 18580 }, { "epoch": 52.8125, "grad_norm": 1.1217682361602783, "learning_rate": 0.0001, "loss": 0.0636, "step": 18590 }, { "epoch": 52.84090909090909, "grad_norm": 1.0676538944244385, "learning_rate": 0.0001, "loss": 0.0617, "step": 18600 }, { "epoch": 52.86931818181818, "grad_norm": 1.2667808532714844, "learning_rate": 0.0001, "loss": 0.0634, "step": 18610 }, { "epoch": 52.89772727272727, "grad_norm": 1.255224347114563, "learning_rate": 0.0001, "loss": 0.0632, "step": 18620 }, { "epoch": 52.92613636363637, "grad_norm": 1.0495514869689941, "learning_rate": 0.0001, "loss": 0.0635, "step": 18630 }, { "epoch": 52.95454545454545, "grad_norm": 1.2018243074417114, "learning_rate": 0.0001, "loss": 0.066, "step": 18640 }, { "epoch": 52.98295454545455, "grad_norm": 1.1866649389266968, "learning_rate": 0.0001, "loss": 0.0592, "step": 18650 }, { "epoch": 53.01136363636363, "grad_norm": 1.3282562494277954, "learning_rate": 0.0001, "loss": 0.0644, "step": 18660 }, { "epoch": 53.03977272727273, "grad_norm": 1.1297610998153687, "learning_rate": 0.0001, "loss": 0.0636, "step": 18670 }, { "epoch": 53.06818181818182, "grad_norm": 1.1281236410140991, "learning_rate": 0.0001, "loss": 0.0598, "step": 18680 }, { "epoch": 53.09659090909091, "grad_norm": 1.2281813621520996, "learning_rate": 0.0001, "loss": 0.0605, "step": 18690 }, { "epoch": 53.125, "grad_norm": 1.1154251098632812, "learning_rate": 0.0001, "loss": 0.0613, "step": 18700 }, { "epoch": 53.15340909090909, "grad_norm": 0.9964898824691772, "learning_rate": 0.0001, "loss": 0.0616, "step": 18710 }, { "epoch": 53.18181818181818, "grad_norm": 1.0880987644195557, "learning_rate": 0.0001, "loss": 0.0582, "step": 18720 }, { "epoch": 53.21022727272727, "grad_norm": 1.1195552349090576, "learning_rate": 0.0001, "loss": 0.0612, "step": 18730 }, { "epoch": 53.23863636363637, "grad_norm": 1.1419169902801514, "learning_rate": 0.0001, "loss": 0.0616, "step": 18740 }, { "epoch": 53.26704545454545, "grad_norm": 0.9695098400115967, "learning_rate": 0.0001, "loss": 0.0617, "step": 18750 }, { "epoch": 53.29545454545455, "grad_norm": 1.0621428489685059, "learning_rate": 0.0001, "loss": 0.0632, "step": 18760 }, { "epoch": 53.32386363636363, "grad_norm": 1.047326922416687, "learning_rate": 0.0001, "loss": 0.0603, "step": 18770 }, { "epoch": 53.35227272727273, "grad_norm": 1.062296748161316, "learning_rate": 0.0001, "loss": 0.0639, "step": 18780 }, { "epoch": 53.38068181818182, "grad_norm": 1.134915828704834, "learning_rate": 0.0001, "loss": 0.0619, "step": 18790 }, { "epoch": 53.40909090909091, "grad_norm": 1.040932536125183, "learning_rate": 0.0001, "loss": 0.0629, "step": 18800 }, { "epoch": 53.4375, "grad_norm": 1.0221163034439087, "learning_rate": 0.0001, "loss": 0.0603, "step": 18810 }, { "epoch": 53.46590909090909, "grad_norm": 0.9637789726257324, "learning_rate": 0.0001, "loss": 0.0614, "step": 18820 }, { "epoch": 53.49431818181818, "grad_norm": 1.0450648069381714, "learning_rate": 0.0001, "loss": 0.06, "step": 18830 }, { "epoch": 53.52272727272727, "grad_norm": 0.9436495900154114, "learning_rate": 0.0001, "loss": 0.0622, "step": 18840 }, { "epoch": 53.55113636363637, "grad_norm": 1.0419658422470093, "learning_rate": 0.0001, "loss": 0.0617, "step": 18850 }, { "epoch": 53.57954545454545, "grad_norm": 0.8948044180870056, "learning_rate": 0.0001, "loss": 0.0604, "step": 18860 }, { "epoch": 53.60795454545455, "grad_norm": 1.0394561290740967, "learning_rate": 0.0001, "loss": 0.0623, "step": 18870 }, { "epoch": 53.63636363636363, "grad_norm": 1.00216543674469, "learning_rate": 0.0001, "loss": 0.0597, "step": 18880 }, { "epoch": 53.66477272727273, "grad_norm": 0.7522396445274353, "learning_rate": 0.0001, "loss": 0.0618, "step": 18890 }, { "epoch": 53.69318181818182, "grad_norm": 1.162223219871521, "learning_rate": 0.0001, "loss": 0.063, "step": 18900 }, { "epoch": 53.72159090909091, "grad_norm": 1.0378843545913696, "learning_rate": 0.0001, "loss": 0.0645, "step": 18910 }, { "epoch": 53.75, "grad_norm": 0.890440821647644, "learning_rate": 0.0001, "loss": 0.0615, "step": 18920 }, { "epoch": 53.77840909090909, "grad_norm": 0.825039803981781, "learning_rate": 0.0001, "loss": 0.0611, "step": 18930 }, { "epoch": 53.80681818181818, "grad_norm": 0.8410844802856445, "learning_rate": 0.0001, "loss": 0.0608, "step": 18940 }, { "epoch": 53.83522727272727, "grad_norm": 1.1039787530899048, "learning_rate": 0.0001, "loss": 0.0653, "step": 18950 }, { "epoch": 53.86363636363637, "grad_norm": 0.9757326245307922, "learning_rate": 0.0001, "loss": 0.0616, "step": 18960 }, { "epoch": 53.89204545454545, "grad_norm": 0.7968357801437378, "learning_rate": 0.0001, "loss": 0.0639, "step": 18970 }, { "epoch": 53.92045454545455, "grad_norm": 0.9554797410964966, "learning_rate": 0.0001, "loss": 0.0605, "step": 18980 }, { "epoch": 53.94886363636363, "grad_norm": 0.8579493165016174, "learning_rate": 0.0001, "loss": 0.0584, "step": 18990 }, { "epoch": 53.97727272727273, "grad_norm": 0.841977059841156, "learning_rate": 0.0001, "loss": 0.0638, "step": 19000 }, { "epoch": 54.00568181818182, "grad_norm": 0.7986201643943787, "learning_rate": 0.0001, "loss": 0.0593, "step": 19010 }, { "epoch": 54.03409090909091, "grad_norm": 1.3590857982635498, "learning_rate": 0.0001, "loss": 0.0612, "step": 19020 }, { "epoch": 54.0625, "grad_norm": 0.7293931841850281, "learning_rate": 0.0001, "loss": 0.0606, "step": 19030 }, { "epoch": 54.09090909090909, "grad_norm": 0.8448790907859802, "learning_rate": 0.0001, "loss": 0.0649, "step": 19040 }, { "epoch": 54.11931818181818, "grad_norm": 0.9543153047561646, "learning_rate": 0.0001, "loss": 0.0627, "step": 19050 }, { "epoch": 54.14772727272727, "grad_norm": 0.859277606010437, "learning_rate": 0.0001, "loss": 0.0657, "step": 19060 }, { "epoch": 54.17613636363637, "grad_norm": 0.9446835517883301, "learning_rate": 0.0001, "loss": 0.0598, "step": 19070 }, { "epoch": 54.20454545454545, "grad_norm": 0.9382405281066895, "learning_rate": 0.0001, "loss": 0.0624, "step": 19080 }, { "epoch": 54.23295454545455, "grad_norm": 1.0334746837615967, "learning_rate": 0.0001, "loss": 0.0619, "step": 19090 }, { "epoch": 54.26136363636363, "grad_norm": 1.0028332471847534, "learning_rate": 0.0001, "loss": 0.0611, "step": 19100 }, { "epoch": 54.28977272727273, "grad_norm": 1.056213140487671, "learning_rate": 0.0001, "loss": 0.0626, "step": 19110 }, { "epoch": 54.31818181818182, "grad_norm": 0.9702300429344177, "learning_rate": 0.0001, "loss": 0.0677, "step": 19120 }, { "epoch": 54.34659090909091, "grad_norm": 1.210434079170227, "learning_rate": 0.0001, "loss": 0.0629, "step": 19130 }, { "epoch": 54.375, "grad_norm": 1.048459529876709, "learning_rate": 0.0001, "loss": 0.0628, "step": 19140 }, { "epoch": 54.40340909090909, "grad_norm": 0.9593278765678406, "learning_rate": 0.0001, "loss": 0.0605, "step": 19150 }, { "epoch": 54.43181818181818, "grad_norm": 0.7503321170806885, "learning_rate": 0.0001, "loss": 0.0589, "step": 19160 }, { "epoch": 54.46022727272727, "grad_norm": 0.8162446618080139, "learning_rate": 0.0001, "loss": 0.0623, "step": 19170 }, { "epoch": 54.48863636363637, "grad_norm": 0.619260311126709, "learning_rate": 0.0001, "loss": 0.0598, "step": 19180 }, { "epoch": 54.51704545454545, "grad_norm": 0.6872047781944275, "learning_rate": 0.0001, "loss": 0.0646, "step": 19190 }, { "epoch": 54.54545454545455, "grad_norm": 0.9017942547798157, "learning_rate": 0.0001, "loss": 0.0642, "step": 19200 }, { "epoch": 54.57386363636363, "grad_norm": 0.9055486917495728, "learning_rate": 0.0001, "loss": 0.0615, "step": 19210 }, { "epoch": 54.60227272727273, "grad_norm": 0.9028745889663696, "learning_rate": 0.0001, "loss": 0.0607, "step": 19220 }, { "epoch": 54.63068181818182, "grad_norm": 0.937928318977356, "learning_rate": 0.0001, "loss": 0.0618, "step": 19230 }, { "epoch": 54.65909090909091, "grad_norm": 0.9727193713188171, "learning_rate": 0.0001, "loss": 0.0616, "step": 19240 }, { "epoch": 54.6875, "grad_norm": 0.8907310366630554, "learning_rate": 0.0001, "loss": 0.0606, "step": 19250 }, { "epoch": 54.71590909090909, "grad_norm": 0.8963820934295654, "learning_rate": 0.0001, "loss": 0.0588, "step": 19260 }, { "epoch": 54.74431818181818, "grad_norm": 0.966294527053833, "learning_rate": 0.0001, "loss": 0.0604, "step": 19270 }, { "epoch": 54.77272727272727, "grad_norm": 0.8197779059410095, "learning_rate": 0.0001, "loss": 0.0632, "step": 19280 }, { "epoch": 54.80113636363637, "grad_norm": 0.9076131582260132, "learning_rate": 0.0001, "loss": 0.063, "step": 19290 }, { "epoch": 54.82954545454545, "grad_norm": 1.0960373878479004, "learning_rate": 0.0001, "loss": 0.0629, "step": 19300 }, { "epoch": 54.85795454545455, "grad_norm": 1.0276180505752563, "learning_rate": 0.0001, "loss": 0.0614, "step": 19310 }, { "epoch": 54.88636363636363, "grad_norm": 0.8789876699447632, "learning_rate": 0.0001, "loss": 0.0603, "step": 19320 }, { "epoch": 54.91477272727273, "grad_norm": 0.9199723601341248, "learning_rate": 0.0001, "loss": 0.0596, "step": 19330 }, { "epoch": 54.94318181818182, "grad_norm": 0.8767564296722412, "learning_rate": 0.0001, "loss": 0.061, "step": 19340 }, { "epoch": 54.97159090909091, "grad_norm": 0.8243430256843567, "learning_rate": 0.0001, "loss": 0.0598, "step": 19350 }, { "epoch": 55.0, "grad_norm": 0.8050703406333923, "learning_rate": 0.0001, "loss": 0.0614, "step": 19360 }, { "epoch": 55.02840909090909, "grad_norm": 0.7457884550094604, "learning_rate": 0.0001, "loss": 0.0591, "step": 19370 }, { "epoch": 55.05681818181818, "grad_norm": 0.7916040420532227, "learning_rate": 0.0001, "loss": 0.0608, "step": 19380 }, { "epoch": 55.08522727272727, "grad_norm": 1.00780189037323, "learning_rate": 0.0001, "loss": 0.0611, "step": 19390 }, { "epoch": 55.11363636363637, "grad_norm": 1.1122857332229614, "learning_rate": 0.0001, "loss": 0.0638, "step": 19400 }, { "epoch": 55.14204545454545, "grad_norm": 1.1893644332885742, "learning_rate": 0.0001, "loss": 0.0606, "step": 19410 }, { "epoch": 55.17045454545455, "grad_norm": 1.3157379627227783, "learning_rate": 0.0001, "loss": 0.0601, "step": 19420 }, { "epoch": 55.19886363636363, "grad_norm": 1.305027961730957, "learning_rate": 0.0001, "loss": 0.063, "step": 19430 }, { "epoch": 55.22727272727273, "grad_norm": 1.2850711345672607, "learning_rate": 0.0001, "loss": 0.0601, "step": 19440 }, { "epoch": 55.25568181818182, "grad_norm": 1.2181235551834106, "learning_rate": 0.0001, "loss": 0.0606, "step": 19450 }, { "epoch": 55.28409090909091, "grad_norm": 1.2655651569366455, "learning_rate": 0.0001, "loss": 0.0586, "step": 19460 }, { "epoch": 55.3125, "grad_norm": 1.0834294557571411, "learning_rate": 0.0001, "loss": 0.0592, "step": 19470 }, { "epoch": 55.34090909090909, "grad_norm": 0.8892400860786438, "learning_rate": 0.0001, "loss": 0.0594, "step": 19480 }, { "epoch": 55.36931818181818, "grad_norm": 1.0693202018737793, "learning_rate": 0.0001, "loss": 0.0631, "step": 19490 }, { "epoch": 55.39772727272727, "grad_norm": 0.9103065729141235, "learning_rate": 0.0001, "loss": 0.0599, "step": 19500 }, { "epoch": 55.42613636363637, "grad_norm": 0.9410889744758606, "learning_rate": 0.0001, "loss": 0.057, "step": 19510 }, { "epoch": 55.45454545454545, "grad_norm": 0.9349491000175476, "learning_rate": 0.0001, "loss": 0.0617, "step": 19520 }, { "epoch": 55.48295454545455, "grad_norm": 0.7211048007011414, "learning_rate": 0.0001, "loss": 0.0623, "step": 19530 }, { "epoch": 55.51136363636363, "grad_norm": 1.076320767402649, "learning_rate": 0.0001, "loss": 0.063, "step": 19540 }, { "epoch": 55.53977272727273, "grad_norm": 1.0220001935958862, "learning_rate": 0.0001, "loss": 0.0641, "step": 19550 }, { "epoch": 55.56818181818182, "grad_norm": 1.0938565731048584, "learning_rate": 0.0001, "loss": 0.0648, "step": 19560 }, { "epoch": 55.59659090909091, "grad_norm": 1.207349181175232, "learning_rate": 0.0001, "loss": 0.0618, "step": 19570 }, { "epoch": 55.625, "grad_norm": 0.8213934302330017, "learning_rate": 0.0001, "loss": 0.0618, "step": 19580 }, { "epoch": 55.65340909090909, "grad_norm": 0.9144793748855591, "learning_rate": 0.0001, "loss": 0.0611, "step": 19590 }, { "epoch": 55.68181818181818, "grad_norm": 0.8391266465187073, "learning_rate": 0.0001, "loss": 0.0593, "step": 19600 }, { "epoch": 55.71022727272727, "grad_norm": 0.8670185804367065, "learning_rate": 0.0001, "loss": 0.0612, "step": 19610 }, { "epoch": 55.73863636363637, "grad_norm": 0.909506618976593, "learning_rate": 0.0001, "loss": 0.0622, "step": 19620 }, { "epoch": 55.76704545454545, "grad_norm": 0.746269166469574, "learning_rate": 0.0001, "loss": 0.0613, "step": 19630 }, { "epoch": 55.79545454545455, "grad_norm": 0.6903102993965149, "learning_rate": 0.0001, "loss": 0.0604, "step": 19640 }, { "epoch": 55.82386363636363, "grad_norm": 0.7787826061248779, "learning_rate": 0.0001, "loss": 0.0617, "step": 19650 }, { "epoch": 55.85227272727273, "grad_norm": 0.8575695753097534, "learning_rate": 0.0001, "loss": 0.061, "step": 19660 }, { "epoch": 55.88068181818182, "grad_norm": 1.386139154434204, "learning_rate": 0.0001, "loss": 0.0617, "step": 19670 }, { "epoch": 55.90909090909091, "grad_norm": 1.4138883352279663, "learning_rate": 0.0001, "loss": 0.0616, "step": 19680 }, { "epoch": 55.9375, "grad_norm": 1.166518211364746, "learning_rate": 0.0001, "loss": 0.0607, "step": 19690 }, { "epoch": 55.96590909090909, "grad_norm": 1.1394963264465332, "learning_rate": 0.0001, "loss": 0.0603, "step": 19700 }, { "epoch": 55.99431818181818, "grad_norm": 1.0092238187789917, "learning_rate": 0.0001, "loss": 0.0604, "step": 19710 }, { "epoch": 56.02272727272727, "grad_norm": 1.2652308940887451, "learning_rate": 0.0001, "loss": 0.0618, "step": 19720 }, { "epoch": 56.05113636363637, "grad_norm": 1.0221445560455322, "learning_rate": 0.0001, "loss": 0.0605, "step": 19730 }, { "epoch": 56.07954545454545, "grad_norm": 1.1227253675460815, "learning_rate": 0.0001, "loss": 0.0593, "step": 19740 }, { "epoch": 56.10795454545455, "grad_norm": 1.105058193206787, "learning_rate": 0.0001, "loss": 0.0584, "step": 19750 }, { "epoch": 56.13636363636363, "grad_norm": 0.9531204104423523, "learning_rate": 0.0001, "loss": 0.0614, "step": 19760 }, { "epoch": 56.16477272727273, "grad_norm": 0.7669575214385986, "learning_rate": 0.0001, "loss": 0.0583, "step": 19770 }, { "epoch": 56.19318181818182, "grad_norm": 1.0532715320587158, "learning_rate": 0.0001, "loss": 0.0572, "step": 19780 }, { "epoch": 56.22159090909091, "grad_norm": 1.0322656631469727, "learning_rate": 0.0001, "loss": 0.0596, "step": 19790 }, { "epoch": 56.25, "grad_norm": 1.2287739515304565, "learning_rate": 0.0001, "loss": 0.0567, "step": 19800 }, { "epoch": 56.27840909090909, "grad_norm": 1.2664307355880737, "learning_rate": 0.0001, "loss": 0.0575, "step": 19810 }, { "epoch": 56.30681818181818, "grad_norm": 1.059367060661316, "learning_rate": 0.0001, "loss": 0.0595, "step": 19820 }, { "epoch": 56.33522727272727, "grad_norm": 0.7184119820594788, "learning_rate": 0.0001, "loss": 0.0607, "step": 19830 }, { "epoch": 56.36363636363637, "grad_norm": 0.996780276298523, "learning_rate": 0.0001, "loss": 0.0594, "step": 19840 }, { "epoch": 56.39204545454545, "grad_norm": 0.8815504312515259, "learning_rate": 0.0001, "loss": 0.0583, "step": 19850 }, { "epoch": 56.42045454545455, "grad_norm": 1.0592563152313232, "learning_rate": 0.0001, "loss": 0.0595, "step": 19860 }, { "epoch": 56.44886363636363, "grad_norm": 0.7666848301887512, "learning_rate": 0.0001, "loss": 0.0588, "step": 19870 }, { "epoch": 56.47727272727273, "grad_norm": 0.7527984976768494, "learning_rate": 0.0001, "loss": 0.061, "step": 19880 }, { "epoch": 56.50568181818182, "grad_norm": 1.3445789813995361, "learning_rate": 0.0001, "loss": 0.0625, "step": 19890 }, { "epoch": 56.53409090909091, "grad_norm": 1.4454388618469238, "learning_rate": 0.0001, "loss": 0.0613, "step": 19900 }, { "epoch": 56.5625, "grad_norm": 1.112709641456604, "learning_rate": 0.0001, "loss": 0.0611, "step": 19910 }, { "epoch": 56.59090909090909, "grad_norm": 1.1458951234817505, "learning_rate": 0.0001, "loss": 0.0586, "step": 19920 }, { "epoch": 56.61931818181818, "grad_norm": 1.526114821434021, "learning_rate": 0.0001, "loss": 0.059, "step": 19930 }, { "epoch": 56.64772727272727, "grad_norm": 1.657968282699585, "learning_rate": 0.0001, "loss": 0.0615, "step": 19940 }, { "epoch": 56.67613636363637, "grad_norm": 1.5407483577728271, "learning_rate": 0.0001, "loss": 0.0566, "step": 19950 }, { "epoch": 56.70454545454545, "grad_norm": 1.186934232711792, "learning_rate": 0.0001, "loss": 0.0583, "step": 19960 }, { "epoch": 56.73295454545455, "grad_norm": 0.9510246515274048, "learning_rate": 0.0001, "loss": 0.0554, "step": 19970 }, { "epoch": 56.76136363636363, "grad_norm": 0.8539232611656189, "learning_rate": 0.0001, "loss": 0.0566, "step": 19980 }, { "epoch": 56.78977272727273, "grad_norm": 1.1108481884002686, "learning_rate": 0.0001, "loss": 0.0539, "step": 19990 }, { "epoch": 56.81818181818182, "grad_norm": 1.3073898553848267, "learning_rate": 0.0001, "loss": 0.0571, "step": 20000 }, { "epoch": 56.84659090909091, "grad_norm": 1.0064797401428223, "learning_rate": 0.0001, "loss": 0.0569, "step": 20010 }, { "epoch": 56.875, "grad_norm": 1.1430519819259644, "learning_rate": 0.0001, "loss": 0.058, "step": 20020 }, { "epoch": 56.90340909090909, "grad_norm": 1.2745853662490845, "learning_rate": 0.0001, "loss": 0.0581, "step": 20030 }, { "epoch": 56.93181818181818, "grad_norm": 1.700903296470642, "learning_rate": 0.0001, "loss": 0.0655, "step": 20040 }, { "epoch": 56.96022727272727, "grad_norm": 1.9346566200256348, "learning_rate": 0.0001, "loss": 0.0595, "step": 20050 }, { "epoch": 56.98863636363637, "grad_norm": 1.8350406885147095, "learning_rate": 0.0001, "loss": 0.058, "step": 20060 }, { "epoch": 57.01704545454545, "grad_norm": 1.5323282480239868, "learning_rate": 0.0001, "loss": 0.0602, "step": 20070 }, { "epoch": 57.04545454545455, "grad_norm": 1.794053077697754, "learning_rate": 0.0001, "loss": 0.0552, "step": 20080 }, { "epoch": 57.07386363636363, "grad_norm": 2.6057610511779785, "learning_rate": 0.0001, "loss": 0.0596, "step": 20090 }, { "epoch": 57.10227272727273, "grad_norm": 1.8744699954986572, "learning_rate": 0.0001, "loss": 0.0558, "step": 20100 }, { "epoch": 57.13068181818182, "grad_norm": 2.1725265979766846, "learning_rate": 0.0001, "loss": 0.0573, "step": 20110 }, { "epoch": 57.15909090909091, "grad_norm": 1.8657861948013306, "learning_rate": 0.0001, "loss": 0.0546, "step": 20120 }, { "epoch": 57.1875, "grad_norm": 4.145320415496826, "learning_rate": 0.0001, "loss": 0.0647, "step": 20130 }, { "epoch": 57.21590909090909, "grad_norm": 3.5790581703186035, "learning_rate": 0.0001, "loss": 0.0546, "step": 20140 }, { "epoch": 57.24431818181818, "grad_norm": 1.8389453887939453, "learning_rate": 0.0001, "loss": 0.0529, "step": 20150 }, { "epoch": 57.27272727272727, "grad_norm": 1.848724603652954, "learning_rate": 0.0001, "loss": 0.0652, "step": 20160 }, { "epoch": 57.30113636363637, "grad_norm": 2.9280498027801514, "learning_rate": 0.0001, "loss": 0.0571, "step": 20170 }, { "epoch": 57.32954545454545, "grad_norm": 3.1636435985565186, "learning_rate": 0.0001, "loss": 0.0539, "step": 20180 }, { "epoch": 57.35795454545455, "grad_norm": 1.8078699111938477, "learning_rate": 0.0001, "loss": 0.0524, "step": 20190 }, { "epoch": 57.38636363636363, "grad_norm": 2.467681884765625, "learning_rate": 0.0001, "loss": 0.0533, "step": 20200 }, { "epoch": 57.41477272727273, "grad_norm": 1.868058204650879, "learning_rate": 0.0001, "loss": 0.054, "step": 20210 }, { "epoch": 57.44318181818182, "grad_norm": 1.2513569593429565, "learning_rate": 0.0001, "loss": 0.0498, "step": 20220 }, { "epoch": 57.47159090909091, "grad_norm": 1.9683401584625244, "learning_rate": 0.0001, "loss": 0.0507, "step": 20230 }, { "epoch": 57.5, "grad_norm": 1.4425514936447144, "learning_rate": 0.0001, "loss": 0.0527, "step": 20240 }, { "epoch": 57.52840909090909, "grad_norm": 1.2349433898925781, "learning_rate": 0.0001, "loss": 0.058, "step": 20250 }, { "epoch": 57.55681818181818, "grad_norm": 1.2428669929504395, "learning_rate": 0.0001, "loss": 0.0541, "step": 20260 }, { "epoch": 57.58522727272727, "grad_norm": 1.4915668964385986, "learning_rate": 0.0001, "loss": 0.0558, "step": 20270 }, { "epoch": 57.61363636363637, "grad_norm": 1.5012083053588867, "learning_rate": 0.0001, "loss": 0.0564, "step": 20280 }, { "epoch": 57.64204545454545, "grad_norm": 1.287113070487976, "learning_rate": 0.0001, "loss": 0.0545, "step": 20290 }, { "epoch": 57.67045454545455, "grad_norm": 1.545423984527588, "learning_rate": 0.0001, "loss": 0.0558, "step": 20300 }, { "epoch": 57.69886363636363, "grad_norm": 0.989124596118927, "learning_rate": 0.0001, "loss": 0.0579, "step": 20310 }, { "epoch": 57.72727272727273, "grad_norm": 1.1454150676727295, "learning_rate": 0.0001, "loss": 0.0562, "step": 20320 }, { "epoch": 57.75568181818182, "grad_norm": 1.7752538919448853, "learning_rate": 0.0001, "loss": 0.0583, "step": 20330 }, { "epoch": 57.78409090909091, "grad_norm": 1.3004405498504639, "learning_rate": 0.0001, "loss": 0.0559, "step": 20340 }, { "epoch": 57.8125, "grad_norm": 1.165421962738037, "learning_rate": 0.0001, "loss": 0.0554, "step": 20350 }, { "epoch": 57.84090909090909, "grad_norm": 1.3419017791748047, "learning_rate": 0.0001, "loss": 0.0553, "step": 20360 }, { "epoch": 57.86931818181818, "grad_norm": 1.2583837509155273, "learning_rate": 0.0001, "loss": 0.0574, "step": 20370 }, { "epoch": 57.89772727272727, "grad_norm": 1.1867432594299316, "learning_rate": 0.0001, "loss": 0.0551, "step": 20380 }, { "epoch": 57.92613636363637, "grad_norm": 1.191956639289856, "learning_rate": 0.0001, "loss": 0.0549, "step": 20390 }, { "epoch": 57.95454545454545, "grad_norm": 1.0556656122207642, "learning_rate": 0.0001, "loss": 0.0575, "step": 20400 }, { "epoch": 57.98295454545455, "grad_norm": 1.2158259153366089, "learning_rate": 0.0001, "loss": 0.0547, "step": 20410 }, { "epoch": 58.01136363636363, "grad_norm": 0.9880191087722778, "learning_rate": 0.0001, "loss": 0.0576, "step": 20420 }, { "epoch": 58.03977272727273, "grad_norm": 1.150399923324585, "learning_rate": 0.0001, "loss": 0.0589, "step": 20430 }, { "epoch": 58.06818181818182, "grad_norm": 0.9008199572563171, "learning_rate": 0.0001, "loss": 0.0567, "step": 20440 }, { "epoch": 58.09659090909091, "grad_norm": 0.8457942008972168, "learning_rate": 0.0001, "loss": 0.0575, "step": 20450 }, { "epoch": 58.125, "grad_norm": 0.8913554549217224, "learning_rate": 0.0001, "loss": 0.0566, "step": 20460 }, { "epoch": 58.15340909090909, "grad_norm": 1.0136390924453735, "learning_rate": 0.0001, "loss": 0.0612, "step": 20470 }, { "epoch": 58.18181818181818, "grad_norm": 1.0893242359161377, "learning_rate": 0.0001, "loss": 0.0602, "step": 20480 }, { "epoch": 58.21022727272727, "grad_norm": 0.8883498311042786, "learning_rate": 0.0001, "loss": 0.0627, "step": 20490 }, { "epoch": 58.23863636363637, "grad_norm": 0.7249606251716614, "learning_rate": 0.0001, "loss": 0.0637, "step": 20500 }, { "epoch": 58.26704545454545, "grad_norm": 0.83709716796875, "learning_rate": 0.0001, "loss": 0.0623, "step": 20510 }, { "epoch": 58.29545454545455, "grad_norm": 0.8498445749282837, "learning_rate": 0.0001, "loss": 0.0604, "step": 20520 }, { "epoch": 58.32386363636363, "grad_norm": 0.8652094602584839, "learning_rate": 0.0001, "loss": 0.0581, "step": 20530 }, { "epoch": 58.35227272727273, "grad_norm": 0.933368444442749, "learning_rate": 0.0001, "loss": 0.0607, "step": 20540 }, { "epoch": 58.38068181818182, "grad_norm": 1.022032380104065, "learning_rate": 0.0001, "loss": 0.0581, "step": 20550 }, { "epoch": 58.40909090909091, "grad_norm": 0.884529173374176, "learning_rate": 0.0001, "loss": 0.0584, "step": 20560 }, { "epoch": 58.4375, "grad_norm": 0.7771308422088623, "learning_rate": 0.0001, "loss": 0.0612, "step": 20570 }, { "epoch": 58.46590909090909, "grad_norm": 1.055200457572937, "learning_rate": 0.0001, "loss": 0.0599, "step": 20580 }, { "epoch": 58.49431818181818, "grad_norm": 0.9750531315803528, "learning_rate": 0.0001, "loss": 0.0598, "step": 20590 }, { "epoch": 58.52272727272727, "grad_norm": 0.9744542241096497, "learning_rate": 0.0001, "loss": 0.0627, "step": 20600 }, { "epoch": 58.55113636363637, "grad_norm": 1.1760064363479614, "learning_rate": 0.0001, "loss": 0.0616, "step": 20610 }, { "epoch": 58.57954545454545, "grad_norm": 1.0370769500732422, "learning_rate": 0.0001, "loss": 0.0611, "step": 20620 }, { "epoch": 58.60795454545455, "grad_norm": 1.3704447746276855, "learning_rate": 0.0001, "loss": 0.0599, "step": 20630 }, { "epoch": 58.63636363636363, "grad_norm": 1.098480463027954, "learning_rate": 0.0001, "loss": 0.0622, "step": 20640 }, { "epoch": 58.66477272727273, "grad_norm": 1.4485833644866943, "learning_rate": 0.0001, "loss": 0.0605, "step": 20650 }, { "epoch": 58.69318181818182, "grad_norm": 1.1087062358856201, "learning_rate": 0.0001, "loss": 0.0618, "step": 20660 }, { "epoch": 58.72159090909091, "grad_norm": 1.0735747814178467, "learning_rate": 0.0001, "loss": 0.0629, "step": 20670 }, { "epoch": 58.75, "grad_norm": 1.2116317749023438, "learning_rate": 0.0001, "loss": 0.0578, "step": 20680 }, { "epoch": 58.77840909090909, "grad_norm": 0.8660669922828674, "learning_rate": 0.0001, "loss": 0.0605, "step": 20690 }, { "epoch": 58.80681818181818, "grad_norm": 0.9465160369873047, "learning_rate": 0.0001, "loss": 0.0602, "step": 20700 }, { "epoch": 58.83522727272727, "grad_norm": 1.117857575416565, "learning_rate": 0.0001, "loss": 0.0577, "step": 20710 }, { "epoch": 58.86363636363637, "grad_norm": 1.4936555624008179, "learning_rate": 0.0001, "loss": 0.064, "step": 20720 }, { "epoch": 58.89204545454545, "grad_norm": 1.6041721105575562, "learning_rate": 0.0001, "loss": 0.0621, "step": 20730 }, { "epoch": 58.92045454545455, "grad_norm": 1.4327595233917236, "learning_rate": 0.0001, "loss": 0.0575, "step": 20740 }, { "epoch": 58.94886363636363, "grad_norm": 1.3365850448608398, "learning_rate": 0.0001, "loss": 0.0582, "step": 20750 }, { "epoch": 58.97727272727273, "grad_norm": 1.3097879886627197, "learning_rate": 0.0001, "loss": 0.0568, "step": 20760 }, { "epoch": 59.00568181818182, "grad_norm": 1.252744436264038, "learning_rate": 0.0001, "loss": 0.0598, "step": 20770 }, { "epoch": 59.03409090909091, "grad_norm": 0.9207512140274048, "learning_rate": 0.0001, "loss": 0.0588, "step": 20780 }, { "epoch": 59.0625, "grad_norm": 1.2421956062316895, "learning_rate": 0.0001, "loss": 0.0568, "step": 20790 }, { "epoch": 59.09090909090909, "grad_norm": 0.8143938183784485, "learning_rate": 0.0001, "loss": 0.0578, "step": 20800 }, { "epoch": 59.11931818181818, "grad_norm": 0.9497162103652954, "learning_rate": 0.0001, "loss": 0.0563, "step": 20810 }, { "epoch": 59.14772727272727, "grad_norm": 1.0337462425231934, "learning_rate": 0.0001, "loss": 0.0572, "step": 20820 }, { "epoch": 59.17613636363637, "grad_norm": 0.9024602770805359, "learning_rate": 0.0001, "loss": 0.0586, "step": 20830 }, { "epoch": 59.20454545454545, "grad_norm": 0.9900998473167419, "learning_rate": 0.0001, "loss": 0.058, "step": 20840 }, { "epoch": 59.23295454545455, "grad_norm": 0.9825519919395447, "learning_rate": 0.0001, "loss": 0.0619, "step": 20850 }, { "epoch": 59.26136363636363, "grad_norm": 0.9931562542915344, "learning_rate": 0.0001, "loss": 0.0606, "step": 20860 }, { "epoch": 59.28977272727273, "grad_norm": 0.8159868121147156, "learning_rate": 0.0001, "loss": 0.0588, "step": 20870 }, { "epoch": 59.31818181818182, "grad_norm": 1.0349278450012207, "learning_rate": 0.0001, "loss": 0.0622, "step": 20880 }, { "epoch": 59.34659090909091, "grad_norm": 0.8769015073776245, "learning_rate": 0.0001, "loss": 0.0603, "step": 20890 }, { "epoch": 59.375, "grad_norm": 0.94813472032547, "learning_rate": 0.0001, "loss": 0.0591, "step": 20900 }, { "epoch": 59.40340909090909, "grad_norm": 1.1130449771881104, "learning_rate": 0.0001, "loss": 0.0608, "step": 20910 }, { "epoch": 59.43181818181818, "grad_norm": 0.9908705353736877, "learning_rate": 0.0001, "loss": 0.0595, "step": 20920 }, { "epoch": 59.46022727272727, "grad_norm": 1.1722239255905151, "learning_rate": 0.0001, "loss": 0.0589, "step": 20930 }, { "epoch": 59.48863636363637, "grad_norm": 1.0646426677703857, "learning_rate": 0.0001, "loss": 0.0617, "step": 20940 }, { "epoch": 59.51704545454545, "grad_norm": 0.9773575067520142, "learning_rate": 0.0001, "loss": 0.0584, "step": 20950 }, { "epoch": 59.54545454545455, "grad_norm": 0.9211640954017639, "learning_rate": 0.0001, "loss": 0.0588, "step": 20960 }, { "epoch": 59.57386363636363, "grad_norm": 0.7611501216888428, "learning_rate": 0.0001, "loss": 0.0589, "step": 20970 }, { "epoch": 59.60227272727273, "grad_norm": 0.877237856388092, "learning_rate": 0.0001, "loss": 0.0615, "step": 20980 }, { "epoch": 59.63068181818182, "grad_norm": 0.921630322933197, "learning_rate": 0.0001, "loss": 0.0604, "step": 20990 }, { "epoch": 59.65909090909091, "grad_norm": 0.9630839824676514, "learning_rate": 0.0001, "loss": 0.0589, "step": 21000 }, { "epoch": 59.6875, "grad_norm": 0.9061483144760132, "learning_rate": 0.0001, "loss": 0.0584, "step": 21010 }, { "epoch": 59.71590909090909, "grad_norm": 0.847222626209259, "learning_rate": 0.0001, "loss": 0.0651, "step": 21020 }, { "epoch": 59.74431818181818, "grad_norm": 0.8466194868087769, "learning_rate": 0.0001, "loss": 0.0589, "step": 21030 }, { "epoch": 59.77272727272727, "grad_norm": 1.054270625114441, "learning_rate": 0.0001, "loss": 0.0598, "step": 21040 }, { "epoch": 59.80113636363637, "grad_norm": 1.2087162733078003, "learning_rate": 0.0001, "loss": 0.0594, "step": 21050 }, { "epoch": 59.82954545454545, "grad_norm": 1.2614654302597046, "learning_rate": 0.0001, "loss": 0.06, "step": 21060 }, { "epoch": 59.85795454545455, "grad_norm": 1.2037792205810547, "learning_rate": 0.0001, "loss": 0.0611, "step": 21070 }, { "epoch": 59.88636363636363, "grad_norm": 1.1717537641525269, "learning_rate": 0.0001, "loss": 0.0596, "step": 21080 }, { "epoch": 59.91477272727273, "grad_norm": 1.085711121559143, "learning_rate": 0.0001, "loss": 0.0579, "step": 21090 }, { "epoch": 59.94318181818182, "grad_norm": 0.8551573753356934, "learning_rate": 0.0001, "loss": 0.0599, "step": 21100 }, { "epoch": 59.97159090909091, "grad_norm": 0.7682262063026428, "learning_rate": 0.0001, "loss": 0.0595, "step": 21110 }, { "epoch": 60.0, "grad_norm": 0.6982107162475586, "learning_rate": 0.0001, "loss": 0.0619, "step": 21120 }, { "epoch": 60.02840909090909, "grad_norm": 0.9472159147262573, "learning_rate": 0.0001, "loss": 0.0614, "step": 21130 }, { "epoch": 60.05681818181818, "grad_norm": 0.8628683686256409, "learning_rate": 0.0001, "loss": 0.0609, "step": 21140 }, { "epoch": 60.08522727272727, "grad_norm": 0.6452422142028809, "learning_rate": 0.0001, "loss": 0.0622, "step": 21150 }, { "epoch": 60.11363636363637, "grad_norm": 0.6607347726821899, "learning_rate": 0.0001, "loss": 0.0611, "step": 21160 }, { "epoch": 60.14204545454545, "grad_norm": 0.6297292709350586, "learning_rate": 0.0001, "loss": 0.06, "step": 21170 }, { "epoch": 60.17045454545455, "grad_norm": 0.705437958240509, "learning_rate": 0.0001, "loss": 0.0598, "step": 21180 }, { "epoch": 60.19886363636363, "grad_norm": 0.64570552110672, "learning_rate": 0.0001, "loss": 0.0579, "step": 21190 }, { "epoch": 60.22727272727273, "grad_norm": 0.8154585361480713, "learning_rate": 0.0001, "loss": 0.0557, "step": 21200 }, { "epoch": 60.25568181818182, "grad_norm": 0.8044834136962891, "learning_rate": 0.0001, "loss": 0.0575, "step": 21210 }, { "epoch": 60.28409090909091, "grad_norm": 0.984665036201477, "learning_rate": 0.0001, "loss": 0.0582, "step": 21220 }, { "epoch": 60.3125, "grad_norm": 0.8446553349494934, "learning_rate": 0.0001, "loss": 0.0591, "step": 21230 }, { "epoch": 60.34090909090909, "grad_norm": 0.9261309504508972, "learning_rate": 0.0001, "loss": 0.0607, "step": 21240 }, { "epoch": 60.36931818181818, "grad_norm": 1.1594133377075195, "learning_rate": 0.0001, "loss": 0.0562, "step": 21250 }, { "epoch": 60.39772727272727, "grad_norm": 1.2541656494140625, "learning_rate": 0.0001, "loss": 0.0568, "step": 21260 }, { "epoch": 60.42613636363637, "grad_norm": 0.9860923290252686, "learning_rate": 0.0001, "loss": 0.0604, "step": 21270 }, { "epoch": 60.45454545454545, "grad_norm": 1.032243251800537, "learning_rate": 0.0001, "loss": 0.0572, "step": 21280 }, { "epoch": 60.48295454545455, "grad_norm": 1.0910956859588623, "learning_rate": 0.0001, "loss": 0.0575, "step": 21290 }, { "epoch": 60.51136363636363, "grad_norm": 1.106114387512207, "learning_rate": 0.0001, "loss": 0.0581, "step": 21300 }, { "epoch": 60.53977272727273, "grad_norm": 1.733173131942749, "learning_rate": 0.0001, "loss": 0.0624, "step": 21310 }, { "epoch": 60.56818181818182, "grad_norm": 1.755391001701355, "learning_rate": 0.0001, "loss": 0.0586, "step": 21320 }, { "epoch": 60.59659090909091, "grad_norm": 1.6064823865890503, "learning_rate": 0.0001, "loss": 0.0564, "step": 21330 }, { "epoch": 60.625, "grad_norm": 1.0228577852249146, "learning_rate": 0.0001, "loss": 0.0563, "step": 21340 }, { "epoch": 60.65340909090909, "grad_norm": 1.1767072677612305, "learning_rate": 0.0001, "loss": 0.0566, "step": 21350 }, { "epoch": 60.68181818181818, "grad_norm": 0.9804391264915466, "learning_rate": 0.0001, "loss": 0.0564, "step": 21360 }, { "epoch": 60.71022727272727, "grad_norm": 1.459820032119751, "learning_rate": 0.0001, "loss": 0.0549, "step": 21370 }, { "epoch": 60.73863636363637, "grad_norm": 1.2355256080627441, "learning_rate": 0.0001, "loss": 0.0562, "step": 21380 }, { "epoch": 60.76704545454545, "grad_norm": 0.9702253341674805, "learning_rate": 0.0001, "loss": 0.0582, "step": 21390 }, { "epoch": 60.79545454545455, "grad_norm": 0.8217170834541321, "learning_rate": 0.0001, "loss": 0.0577, "step": 21400 }, { "epoch": 60.82386363636363, "grad_norm": 1.1219531297683716, "learning_rate": 0.0001, "loss": 0.0571, "step": 21410 }, { "epoch": 60.85227272727273, "grad_norm": 1.0550838708877563, "learning_rate": 0.0001, "loss": 0.0573, "step": 21420 }, { "epoch": 60.88068181818182, "grad_norm": 1.1298226118087769, "learning_rate": 0.0001, "loss": 0.0567, "step": 21430 }, { "epoch": 60.90909090909091, "grad_norm": 1.1814019680023193, "learning_rate": 0.0001, "loss": 0.0602, "step": 21440 }, { "epoch": 60.9375, "grad_norm": 1.0315929651260376, "learning_rate": 0.0001, "loss": 0.0605, "step": 21450 }, { "epoch": 60.96590909090909, "grad_norm": 1.0429394245147705, "learning_rate": 0.0001, "loss": 0.0576, "step": 21460 }, { "epoch": 60.99431818181818, "grad_norm": 1.109660029411316, "learning_rate": 0.0001, "loss": 0.0579, "step": 21470 }, { "epoch": 61.02272727272727, "grad_norm": 0.8597354292869568, "learning_rate": 0.0001, "loss": 0.0553, "step": 21480 }, { "epoch": 61.05113636363637, "grad_norm": 0.7767676711082458, "learning_rate": 0.0001, "loss": 0.0551, "step": 21490 }, { "epoch": 61.07954545454545, "grad_norm": 0.9464530944824219, "learning_rate": 0.0001, "loss": 0.0545, "step": 21500 }, { "epoch": 61.10795454545455, "grad_norm": 0.8091188073158264, "learning_rate": 0.0001, "loss": 0.0596, "step": 21510 }, { "epoch": 61.13636363636363, "grad_norm": 0.9647312760353088, "learning_rate": 0.0001, "loss": 0.0556, "step": 21520 }, { "epoch": 61.16477272727273, "grad_norm": 1.1387494802474976, "learning_rate": 0.0001, "loss": 0.0577, "step": 21530 }, { "epoch": 61.19318181818182, "grad_norm": 0.8227630853652954, "learning_rate": 0.0001, "loss": 0.0566, "step": 21540 }, { "epoch": 61.22159090909091, "grad_norm": 1.2130613327026367, "learning_rate": 0.0001, "loss": 0.0565, "step": 21550 }, { "epoch": 61.25, "grad_norm": 1.1565511226654053, "learning_rate": 0.0001, "loss": 0.0583, "step": 21560 }, { "epoch": 61.27840909090909, "grad_norm": 1.049648404121399, "learning_rate": 0.0001, "loss": 0.0556, "step": 21570 }, { "epoch": 61.30681818181818, "grad_norm": 1.279056429862976, "learning_rate": 0.0001, "loss": 0.0566, "step": 21580 }, { "epoch": 61.33522727272727, "grad_norm": 0.8837600350379944, "learning_rate": 0.0001, "loss": 0.0579, "step": 21590 }, { "epoch": 61.36363636363637, "grad_norm": 0.9417069554328918, "learning_rate": 0.0001, "loss": 0.0574, "step": 21600 }, { "epoch": 61.39204545454545, "grad_norm": 0.7844614386558533, "learning_rate": 0.0001, "loss": 0.0554, "step": 21610 }, { "epoch": 61.42045454545455, "grad_norm": 0.9132207632064819, "learning_rate": 0.0001, "loss": 0.0603, "step": 21620 }, { "epoch": 61.44886363636363, "grad_norm": 0.8967658877372742, "learning_rate": 0.0001, "loss": 0.0568, "step": 21630 }, { "epoch": 61.47727272727273, "grad_norm": 0.8098888397216797, "learning_rate": 0.0001, "loss": 0.0599, "step": 21640 }, { "epoch": 61.50568181818182, "grad_norm": 0.856517493724823, "learning_rate": 0.0001, "loss": 0.0556, "step": 21650 }, { "epoch": 61.53409090909091, "grad_norm": 0.7805210947990417, "learning_rate": 0.0001, "loss": 0.0603, "step": 21660 }, { "epoch": 61.5625, "grad_norm": 0.8382397294044495, "learning_rate": 0.0001, "loss": 0.0578, "step": 21670 }, { "epoch": 61.59090909090909, "grad_norm": 0.9148212671279907, "learning_rate": 0.0001, "loss": 0.0588, "step": 21680 }, { "epoch": 61.61931818181818, "grad_norm": 0.774455726146698, "learning_rate": 0.0001, "loss": 0.0573, "step": 21690 }, { "epoch": 61.64772727272727, "grad_norm": 0.6750848889350891, "learning_rate": 0.0001, "loss": 0.0559, "step": 21700 }, { "epoch": 61.67613636363637, "grad_norm": 0.7105973362922668, "learning_rate": 0.0001, "loss": 0.0583, "step": 21710 }, { "epoch": 61.70454545454545, "grad_norm": 1.188941240310669, "learning_rate": 0.0001, "loss": 0.0608, "step": 21720 }, { "epoch": 61.73295454545455, "grad_norm": 1.0456372499465942, "learning_rate": 0.0001, "loss": 0.0599, "step": 21730 }, { "epoch": 61.76136363636363, "grad_norm": 0.9662376642227173, "learning_rate": 0.0001, "loss": 0.0588, "step": 21740 }, { "epoch": 61.78977272727273, "grad_norm": 1.0228948593139648, "learning_rate": 0.0001, "loss": 0.0577, "step": 21750 }, { "epoch": 61.81818181818182, "grad_norm": 1.133011817932129, "learning_rate": 0.0001, "loss": 0.0564, "step": 21760 }, { "epoch": 61.84659090909091, "grad_norm": 1.138669729232788, "learning_rate": 0.0001, "loss": 0.0562, "step": 21770 }, { "epoch": 61.875, "grad_norm": 0.9990003705024719, "learning_rate": 0.0001, "loss": 0.056, "step": 21780 }, { "epoch": 61.90340909090909, "grad_norm": 0.8538486957550049, "learning_rate": 0.0001, "loss": 0.057, "step": 21790 }, { "epoch": 61.93181818181818, "grad_norm": 0.9877942204475403, "learning_rate": 0.0001, "loss": 0.0582, "step": 21800 }, { "epoch": 61.96022727272727, "grad_norm": 0.7974020838737488, "learning_rate": 0.0001, "loss": 0.0566, "step": 21810 }, { "epoch": 61.98863636363637, "grad_norm": 0.9531463980674744, "learning_rate": 0.0001, "loss": 0.0569, "step": 21820 }, { "epoch": 62.01704545454545, "grad_norm": 0.8213363289833069, "learning_rate": 0.0001, "loss": 0.0562, "step": 21830 }, { "epoch": 62.04545454545455, "grad_norm": 0.7434073686599731, "learning_rate": 0.0001, "loss": 0.0608, "step": 21840 }, { "epoch": 62.07386363636363, "grad_norm": 1.0179238319396973, "learning_rate": 0.0001, "loss": 0.057, "step": 21850 }, { "epoch": 62.10227272727273, "grad_norm": 0.8162310719490051, "learning_rate": 0.0001, "loss": 0.056, "step": 21860 }, { "epoch": 62.13068181818182, "grad_norm": 0.749879777431488, "learning_rate": 0.0001, "loss": 0.0587, "step": 21870 }, { "epoch": 62.15909090909091, "grad_norm": 0.7732171416282654, "learning_rate": 0.0001, "loss": 0.0536, "step": 21880 }, { "epoch": 62.1875, "grad_norm": 0.7089868783950806, "learning_rate": 0.0001, "loss": 0.0551, "step": 21890 }, { "epoch": 62.21590909090909, "grad_norm": 0.7101325988769531, "learning_rate": 0.0001, "loss": 0.0546, "step": 21900 }, { "epoch": 62.24431818181818, "grad_norm": 0.7819742560386658, "learning_rate": 0.0001, "loss": 0.0575, "step": 21910 }, { "epoch": 62.27272727272727, "grad_norm": 0.6902415752410889, "learning_rate": 0.0001, "loss": 0.0551, "step": 21920 }, { "epoch": 62.30113636363637, "grad_norm": 0.8869762420654297, "learning_rate": 0.0001, "loss": 0.0582, "step": 21930 }, { "epoch": 62.32954545454545, "grad_norm": 1.154178500175476, "learning_rate": 0.0001, "loss": 0.0571, "step": 21940 }, { "epoch": 62.35795454545455, "grad_norm": 0.8612966537475586, "learning_rate": 0.0001, "loss": 0.0562, "step": 21950 }, { "epoch": 62.38636363636363, "grad_norm": 0.8771665692329407, "learning_rate": 0.0001, "loss": 0.0573, "step": 21960 }, { "epoch": 62.41477272727273, "grad_norm": 0.9387429356575012, "learning_rate": 0.0001, "loss": 0.0555, "step": 21970 }, { "epoch": 62.44318181818182, "grad_norm": 0.7586554884910583, "learning_rate": 0.0001, "loss": 0.0597, "step": 21980 }, { "epoch": 62.47159090909091, "grad_norm": 0.809971809387207, "learning_rate": 0.0001, "loss": 0.0588, "step": 21990 }, { "epoch": 62.5, "grad_norm": 0.9225670099258423, "learning_rate": 0.0001, "loss": 0.0577, "step": 22000 }, { "epoch": 62.52840909090909, "grad_norm": 0.9746827483177185, "learning_rate": 0.0001, "loss": 0.0559, "step": 22010 }, { "epoch": 62.55681818181818, "grad_norm": 0.8748590350151062, "learning_rate": 0.0001, "loss": 0.0565, "step": 22020 }, { "epoch": 62.58522727272727, "grad_norm": 1.0417462587356567, "learning_rate": 0.0001, "loss": 0.0554, "step": 22030 }, { "epoch": 62.61363636363637, "grad_norm": 0.9173468351364136, "learning_rate": 0.0001, "loss": 0.0555, "step": 22040 }, { "epoch": 62.64204545454545, "grad_norm": 0.9043504595756531, "learning_rate": 0.0001, "loss": 0.0533, "step": 22050 }, { "epoch": 62.67045454545455, "grad_norm": 0.6783923506736755, "learning_rate": 0.0001, "loss": 0.0563, "step": 22060 }, { "epoch": 62.69886363636363, "grad_norm": 0.6317957043647766, "learning_rate": 0.0001, "loss": 0.0539, "step": 22070 }, { "epoch": 62.72727272727273, "grad_norm": 0.7598891258239746, "learning_rate": 0.0001, "loss": 0.056, "step": 22080 }, { "epoch": 62.75568181818182, "grad_norm": 0.880479633808136, "learning_rate": 0.0001, "loss": 0.0572, "step": 22090 }, { "epoch": 62.78409090909091, "grad_norm": 0.8354278802871704, "learning_rate": 0.0001, "loss": 0.0565, "step": 22100 }, { "epoch": 62.8125, "grad_norm": 0.9944140911102295, "learning_rate": 0.0001, "loss": 0.0524, "step": 22110 }, { "epoch": 62.84090909090909, "grad_norm": 0.9438153505325317, "learning_rate": 0.0001, "loss": 0.053, "step": 22120 }, { "epoch": 62.86931818181818, "grad_norm": 1.1514655351638794, "learning_rate": 0.0001, "loss": 0.0561, "step": 22130 }, { "epoch": 62.89772727272727, "grad_norm": 1.0064482688903809, "learning_rate": 0.0001, "loss": 0.053, "step": 22140 }, { "epoch": 62.92613636363637, "grad_norm": 0.8011857271194458, "learning_rate": 0.0001, "loss": 0.0557, "step": 22150 }, { "epoch": 62.95454545454545, "grad_norm": 0.7131310105323792, "learning_rate": 0.0001, "loss": 0.057, "step": 22160 }, { "epoch": 62.98295454545455, "grad_norm": 0.7730833292007446, "learning_rate": 0.0001, "loss": 0.0554, "step": 22170 }, { "epoch": 63.01136363636363, "grad_norm": 0.8114839792251587, "learning_rate": 0.0001, "loss": 0.0554, "step": 22180 }, { "epoch": 63.03977272727273, "grad_norm": 0.714423656463623, "learning_rate": 0.0001, "loss": 0.0564, "step": 22190 }, { "epoch": 63.06818181818182, "grad_norm": 1.106858730316162, "learning_rate": 0.0001, "loss": 0.0556, "step": 22200 }, { "epoch": 63.09659090909091, "grad_norm": 1.0788785219192505, "learning_rate": 0.0001, "loss": 0.0575, "step": 22210 }, { "epoch": 63.125, "grad_norm": 1.0309641361236572, "learning_rate": 0.0001, "loss": 0.0566, "step": 22220 }, { "epoch": 63.15340909090909, "grad_norm": 0.9370083808898926, "learning_rate": 0.0001, "loss": 0.0545, "step": 22230 }, { "epoch": 63.18181818181818, "grad_norm": 1.0266667604446411, "learning_rate": 0.0001, "loss": 0.0558, "step": 22240 }, { "epoch": 63.21022727272727, "grad_norm": 1.0693178176879883, "learning_rate": 0.0001, "loss": 0.0543, "step": 22250 }, { "epoch": 63.23863636363637, "grad_norm": 1.409576416015625, "learning_rate": 0.0001, "loss": 0.055, "step": 22260 }, { "epoch": 63.26704545454545, "grad_norm": 1.0419577360153198, "learning_rate": 0.0001, "loss": 0.0562, "step": 22270 }, { "epoch": 63.29545454545455, "grad_norm": 0.9042021632194519, "learning_rate": 0.0001, "loss": 0.0537, "step": 22280 }, { "epoch": 63.32386363636363, "grad_norm": 0.9674776196479797, "learning_rate": 0.0001, "loss": 0.0551, "step": 22290 }, { "epoch": 63.35227272727273, "grad_norm": 1.08954918384552, "learning_rate": 0.0001, "loss": 0.0549, "step": 22300 }, { "epoch": 63.38068181818182, "grad_norm": 1.0163633823394775, "learning_rate": 0.0001, "loss": 0.0553, "step": 22310 }, { "epoch": 63.40909090909091, "grad_norm": 1.0497983694076538, "learning_rate": 0.0001, "loss": 0.0553, "step": 22320 }, { "epoch": 63.4375, "grad_norm": 0.7507213354110718, "learning_rate": 0.0001, "loss": 0.0543, "step": 22330 }, { "epoch": 63.46590909090909, "grad_norm": 0.9704498648643494, "learning_rate": 0.0001, "loss": 0.0542, "step": 22340 }, { "epoch": 63.49431818181818, "grad_norm": 0.9345890283584595, "learning_rate": 0.0001, "loss": 0.0562, "step": 22350 }, { "epoch": 63.52272727272727, "grad_norm": 0.8323131799697876, "learning_rate": 0.0001, "loss": 0.0554, "step": 22360 }, { "epoch": 63.55113636363637, "grad_norm": 0.8425998687744141, "learning_rate": 0.0001, "loss": 0.0551, "step": 22370 }, { "epoch": 63.57954545454545, "grad_norm": 0.7790317535400391, "learning_rate": 0.0001, "loss": 0.055, "step": 22380 }, { "epoch": 63.60795454545455, "grad_norm": 0.679909348487854, "learning_rate": 0.0001, "loss": 0.0564, "step": 22390 }, { "epoch": 63.63636363636363, "grad_norm": 0.6957236528396606, "learning_rate": 0.0001, "loss": 0.0521, "step": 22400 }, { "epoch": 63.66477272727273, "grad_norm": 0.7631174325942993, "learning_rate": 0.0001, "loss": 0.0539, "step": 22410 }, { "epoch": 63.69318181818182, "grad_norm": 0.8418110609054565, "learning_rate": 0.0001, "loss": 0.0587, "step": 22420 }, { "epoch": 63.72159090909091, "grad_norm": 0.9006642699241638, "learning_rate": 0.0001, "loss": 0.0548, "step": 22430 }, { "epoch": 63.75, "grad_norm": 1.0244066715240479, "learning_rate": 0.0001, "loss": 0.0523, "step": 22440 }, { "epoch": 63.77840909090909, "grad_norm": 0.8364577293395996, "learning_rate": 0.0001, "loss": 0.0561, "step": 22450 }, { "epoch": 63.80681818181818, "grad_norm": 1.0565218925476074, "learning_rate": 0.0001, "loss": 0.0575, "step": 22460 }, { "epoch": 63.83522727272727, "grad_norm": 0.994922935962677, "learning_rate": 0.0001, "loss": 0.0564, "step": 22470 }, { "epoch": 63.86363636363637, "grad_norm": 1.0308321714401245, "learning_rate": 0.0001, "loss": 0.054, "step": 22480 }, { "epoch": 63.89204545454545, "grad_norm": 0.9732064604759216, "learning_rate": 0.0001, "loss": 0.0537, "step": 22490 }, { "epoch": 63.92045454545455, "grad_norm": 0.8068335056304932, "learning_rate": 0.0001, "loss": 0.0563, "step": 22500 }, { "epoch": 63.94886363636363, "grad_norm": 0.7390735745429993, "learning_rate": 0.0001, "loss": 0.0569, "step": 22510 }, { "epoch": 63.97727272727273, "grad_norm": 0.7898790240287781, "learning_rate": 0.0001, "loss": 0.0552, "step": 22520 }, { "epoch": 64.00568181818181, "grad_norm": 0.8491553068161011, "learning_rate": 0.0001, "loss": 0.0554, "step": 22530 }, { "epoch": 64.0340909090909, "grad_norm": 0.7833629846572876, "learning_rate": 0.0001, "loss": 0.0538, "step": 22540 }, { "epoch": 64.0625, "grad_norm": 0.8554551005363464, "learning_rate": 0.0001, "loss": 0.055, "step": 22550 }, { "epoch": 64.0909090909091, "grad_norm": 0.8123806715011597, "learning_rate": 0.0001, "loss": 0.0545, "step": 22560 }, { "epoch": 64.11931818181819, "grad_norm": 0.7412775754928589, "learning_rate": 0.0001, "loss": 0.0535, "step": 22570 }, { "epoch": 64.14772727272727, "grad_norm": 0.6799927949905396, "learning_rate": 0.0001, "loss": 0.054, "step": 22580 }, { "epoch": 64.17613636363636, "grad_norm": 0.8203033804893494, "learning_rate": 0.0001, "loss": 0.0539, "step": 22590 }, { "epoch": 64.20454545454545, "grad_norm": 0.7033742666244507, "learning_rate": 0.0001, "loss": 0.0522, "step": 22600 }, { "epoch": 64.23295454545455, "grad_norm": 0.7215442061424255, "learning_rate": 0.0001, "loss": 0.0542, "step": 22610 }, { "epoch": 64.26136363636364, "grad_norm": 0.5706157684326172, "learning_rate": 0.0001, "loss": 0.0542, "step": 22620 }, { "epoch": 64.28977272727273, "grad_norm": 0.6812960505485535, "learning_rate": 0.0001, "loss": 0.0551, "step": 22630 }, { "epoch": 64.31818181818181, "grad_norm": 0.6466752886772156, "learning_rate": 0.0001, "loss": 0.0539, "step": 22640 }, { "epoch": 64.3465909090909, "grad_norm": 0.7440110445022583, "learning_rate": 0.0001, "loss": 0.0548, "step": 22650 }, { "epoch": 64.375, "grad_norm": 0.7839359641075134, "learning_rate": 0.0001, "loss": 0.0562, "step": 22660 }, { "epoch": 64.4034090909091, "grad_norm": 0.8440365195274353, "learning_rate": 0.0001, "loss": 0.0545, "step": 22670 }, { "epoch": 64.43181818181819, "grad_norm": 0.774544358253479, "learning_rate": 0.0001, "loss": 0.0573, "step": 22680 }, { "epoch": 64.46022727272727, "grad_norm": 0.601563036441803, "learning_rate": 0.0001, "loss": 0.0558, "step": 22690 }, { "epoch": 64.48863636363636, "grad_norm": 0.7574673891067505, "learning_rate": 0.0001, "loss": 0.0535, "step": 22700 }, { "epoch": 64.51704545454545, "grad_norm": 0.836617648601532, "learning_rate": 0.0001, "loss": 0.0549, "step": 22710 }, { "epoch": 64.54545454545455, "grad_norm": 1.0453118085861206, "learning_rate": 0.0001, "loss": 0.0548, "step": 22720 }, { "epoch": 64.57386363636364, "grad_norm": 1.0585159063339233, "learning_rate": 0.0001, "loss": 0.056, "step": 22730 }, { "epoch": 64.60227272727273, "grad_norm": 1.0894050598144531, "learning_rate": 0.0001, "loss": 0.055, "step": 22740 }, { "epoch": 64.63068181818181, "grad_norm": 0.9796726107597351, "learning_rate": 0.0001, "loss": 0.0526, "step": 22750 }, { "epoch": 64.6590909090909, "grad_norm": 0.9019004702568054, "learning_rate": 0.0001, "loss": 0.0568, "step": 22760 }, { "epoch": 64.6875, "grad_norm": 0.7627422213554382, "learning_rate": 0.0001, "loss": 0.0508, "step": 22770 }, { "epoch": 64.7159090909091, "grad_norm": 0.7884737849235535, "learning_rate": 0.0001, "loss": 0.0553, "step": 22780 }, { "epoch": 64.74431818181819, "grad_norm": 0.7071219682693481, "learning_rate": 0.0001, "loss": 0.0547, "step": 22790 }, { "epoch": 64.77272727272727, "grad_norm": 0.7242072820663452, "learning_rate": 0.0001, "loss": 0.0531, "step": 22800 }, { "epoch": 64.80113636363636, "grad_norm": 0.9985579252243042, "learning_rate": 0.0001, "loss": 0.0542, "step": 22810 }, { "epoch": 64.82954545454545, "grad_norm": 0.7592743635177612, "learning_rate": 0.0001, "loss": 0.0557, "step": 22820 }, { "epoch": 64.85795454545455, "grad_norm": 0.6539085507392883, "learning_rate": 0.0001, "loss": 0.0531, "step": 22830 }, { "epoch": 64.88636363636364, "grad_norm": 0.699675977230072, "learning_rate": 0.0001, "loss": 0.056, "step": 22840 }, { "epoch": 64.91477272727273, "grad_norm": 0.830615222454071, "learning_rate": 0.0001, "loss": 0.053, "step": 22850 }, { "epoch": 64.94318181818181, "grad_norm": 0.760208785533905, "learning_rate": 0.0001, "loss": 0.0549, "step": 22860 }, { "epoch": 64.9715909090909, "grad_norm": 0.7713984847068787, "learning_rate": 0.0001, "loss": 0.0542, "step": 22870 }, { "epoch": 65.0, "grad_norm": 0.7789033055305481, "learning_rate": 0.0001, "loss": 0.0532, "step": 22880 }, { "epoch": 65.0284090909091, "grad_norm": 0.780392050743103, "learning_rate": 0.0001, "loss": 0.0549, "step": 22890 }, { "epoch": 65.05681818181819, "grad_norm": 0.7739676237106323, "learning_rate": 0.0001, "loss": 0.0531, "step": 22900 }, { "epoch": 65.08522727272727, "grad_norm": 0.7210514545440674, "learning_rate": 0.0001, "loss": 0.0533, "step": 22910 }, { "epoch": 65.11363636363636, "grad_norm": 1.0596988201141357, "learning_rate": 0.0001, "loss": 0.0536, "step": 22920 }, { "epoch": 65.14204545454545, "grad_norm": 1.2048275470733643, "learning_rate": 0.0001, "loss": 0.0524, "step": 22930 }, { "epoch": 65.17045454545455, "grad_norm": 0.8827832937240601, "learning_rate": 0.0001, "loss": 0.0537, "step": 22940 }, { "epoch": 65.19886363636364, "grad_norm": 0.889045238494873, "learning_rate": 0.0001, "loss": 0.052, "step": 22950 }, { "epoch": 65.22727272727273, "grad_norm": 1.0528745651245117, "learning_rate": 0.0001, "loss": 0.0545, "step": 22960 }, { "epoch": 65.25568181818181, "grad_norm": 1.0414397716522217, "learning_rate": 0.0001, "loss": 0.0526, "step": 22970 }, { "epoch": 65.2840909090909, "grad_norm": 1.096603512763977, "learning_rate": 0.0001, "loss": 0.0533, "step": 22980 }, { "epoch": 65.3125, "grad_norm": 0.8513028025627136, "learning_rate": 0.0001, "loss": 0.0521, "step": 22990 }, { "epoch": 65.3409090909091, "grad_norm": 1.2057493925094604, "learning_rate": 0.0001, "loss": 0.053, "step": 23000 }, { "epoch": 65.36931818181819, "grad_norm": 1.0297787189483643, "learning_rate": 0.0001, "loss": 0.0523, "step": 23010 }, { "epoch": 65.39772727272727, "grad_norm": 0.8249045610427856, "learning_rate": 0.0001, "loss": 0.0526, "step": 23020 }, { "epoch": 65.42613636363636, "grad_norm": 0.949597179889679, "learning_rate": 0.0001, "loss": 0.0531, "step": 23030 }, { "epoch": 65.45454545454545, "grad_norm": 0.7203119397163391, "learning_rate": 0.0001, "loss": 0.0537, "step": 23040 }, { "epoch": 65.48295454545455, "grad_norm": 0.7339312434196472, "learning_rate": 0.0001, "loss": 0.0558, "step": 23050 }, { "epoch": 65.51136363636364, "grad_norm": 0.716767430305481, "learning_rate": 0.0001, "loss": 0.0536, "step": 23060 }, { "epoch": 65.53977272727273, "grad_norm": 0.8138099312782288, "learning_rate": 0.0001, "loss": 0.0545, "step": 23070 }, { "epoch": 65.56818181818181, "grad_norm": 0.7120342254638672, "learning_rate": 0.0001, "loss": 0.053, "step": 23080 }, { "epoch": 65.5965909090909, "grad_norm": 0.7150150537490845, "learning_rate": 0.0001, "loss": 0.054, "step": 23090 }, { "epoch": 65.625, "grad_norm": 0.7718611359596252, "learning_rate": 0.0001, "loss": 0.0556, "step": 23100 }, { "epoch": 65.6534090909091, "grad_norm": 0.842397153377533, "learning_rate": 0.0001, "loss": 0.0518, "step": 23110 }, { "epoch": 65.68181818181819, "grad_norm": 0.8018172979354858, "learning_rate": 0.0001, "loss": 0.0526, "step": 23120 }, { "epoch": 65.71022727272727, "grad_norm": 0.756478488445282, "learning_rate": 0.0001, "loss": 0.054, "step": 23130 }, { "epoch": 65.73863636363636, "grad_norm": 0.8237600922584534, "learning_rate": 0.0001, "loss": 0.0556, "step": 23140 }, { "epoch": 65.76704545454545, "grad_norm": 0.6838138103485107, "learning_rate": 0.0001, "loss": 0.0539, "step": 23150 }, { "epoch": 65.79545454545455, "grad_norm": 0.7186658382415771, "learning_rate": 0.0001, "loss": 0.0543, "step": 23160 }, { "epoch": 65.82386363636364, "grad_norm": 0.8290245532989502, "learning_rate": 0.0001, "loss": 0.055, "step": 23170 }, { "epoch": 65.85227272727273, "grad_norm": 0.7229530811309814, "learning_rate": 0.0001, "loss": 0.0545, "step": 23180 }, { "epoch": 65.88068181818181, "grad_norm": 0.6716543436050415, "learning_rate": 0.0001, "loss": 0.055, "step": 23190 }, { "epoch": 65.9090909090909, "grad_norm": 0.8731271028518677, "learning_rate": 0.0001, "loss": 0.0578, "step": 23200 }, { "epoch": 65.9375, "grad_norm": 0.8098838329315186, "learning_rate": 0.0001, "loss": 0.0544, "step": 23210 }, { "epoch": 65.9659090909091, "grad_norm": 1.0541036128997803, "learning_rate": 0.0001, "loss": 0.0548, "step": 23220 }, { "epoch": 65.99431818181819, "grad_norm": 0.8643235564231873, "learning_rate": 0.0001, "loss": 0.0555, "step": 23230 }, { "epoch": 66.02272727272727, "grad_norm": 0.8315423130989075, "learning_rate": 0.0001, "loss": 0.0543, "step": 23240 }, { "epoch": 66.05113636363636, "grad_norm": 0.6959272623062134, "learning_rate": 0.0001, "loss": 0.0541, "step": 23250 }, { "epoch": 66.07954545454545, "grad_norm": 0.7065873742103577, "learning_rate": 0.0001, "loss": 0.0526, "step": 23260 }, { "epoch": 66.10795454545455, "grad_norm": 0.9436522722244263, "learning_rate": 0.0001, "loss": 0.0538, "step": 23270 }, { "epoch": 66.13636363636364, "grad_norm": 0.6383907794952393, "learning_rate": 0.0001, "loss": 0.0526, "step": 23280 }, { "epoch": 66.16477272727273, "grad_norm": 0.8061172962188721, "learning_rate": 0.0001, "loss": 0.0569, "step": 23290 }, { "epoch": 66.19318181818181, "grad_norm": 0.6677034497261047, "learning_rate": 0.0001, "loss": 0.0554, "step": 23300 }, { "epoch": 66.2215909090909, "grad_norm": 0.8233653903007507, "learning_rate": 0.0001, "loss": 0.0542, "step": 23310 }, { "epoch": 66.25, "grad_norm": 0.8707202672958374, "learning_rate": 0.0001, "loss": 0.0536, "step": 23320 }, { "epoch": 66.2784090909091, "grad_norm": 0.7756959199905396, "learning_rate": 0.0001, "loss": 0.056, "step": 23330 }, { "epoch": 66.30681818181819, "grad_norm": 0.8138574361801147, "learning_rate": 0.0001, "loss": 0.055, "step": 23340 }, { "epoch": 66.33522727272727, "grad_norm": 0.8435407876968384, "learning_rate": 0.0001, "loss": 0.0572, "step": 23350 }, { "epoch": 66.36363636363636, "grad_norm": 0.8531373143196106, "learning_rate": 0.0001, "loss": 0.0558, "step": 23360 }, { "epoch": 66.39204545454545, "grad_norm": 0.9886962175369263, "learning_rate": 0.0001, "loss": 0.0562, "step": 23370 }, { "epoch": 66.42045454545455, "grad_norm": 0.9955214262008667, "learning_rate": 0.0001, "loss": 0.0561, "step": 23380 }, { "epoch": 66.44886363636364, "grad_norm": 1.1781306266784668, "learning_rate": 0.0001, "loss": 0.0532, "step": 23390 }, { "epoch": 66.47727272727273, "grad_norm": 1.1715068817138672, "learning_rate": 0.0001, "loss": 0.0557, "step": 23400 }, { "epoch": 66.50568181818181, "grad_norm": 1.1786881685256958, "learning_rate": 0.0001, "loss": 0.0519, "step": 23410 }, { "epoch": 66.5340909090909, "grad_norm": 1.2845433950424194, "learning_rate": 0.0001, "loss": 0.056, "step": 23420 }, { "epoch": 66.5625, "grad_norm": 1.0063714981079102, "learning_rate": 0.0001, "loss": 0.0524, "step": 23430 }, { "epoch": 66.5909090909091, "grad_norm": 1.013217806816101, "learning_rate": 0.0001, "loss": 0.0532, "step": 23440 }, { "epoch": 66.61931818181819, "grad_norm": 1.0957231521606445, "learning_rate": 0.0001, "loss": 0.0509, "step": 23450 }, { "epoch": 66.64772727272727, "grad_norm": 0.9889658689498901, "learning_rate": 0.0001, "loss": 0.0519, "step": 23460 }, { "epoch": 66.67613636363636, "grad_norm": 0.9741299748420715, "learning_rate": 0.0001, "loss": 0.0512, "step": 23470 }, { "epoch": 66.70454545454545, "grad_norm": 1.234862208366394, "learning_rate": 0.0001, "loss": 0.0529, "step": 23480 }, { "epoch": 66.73295454545455, "grad_norm": 0.8468987345695496, "learning_rate": 0.0001, "loss": 0.0536, "step": 23490 }, { "epoch": 66.76136363636364, "grad_norm": 0.765661358833313, "learning_rate": 0.0001, "loss": 0.0508, "step": 23500 }, { "epoch": 66.78977272727273, "grad_norm": 1.1269277334213257, "learning_rate": 0.0001, "loss": 0.0516, "step": 23510 }, { "epoch": 66.81818181818181, "grad_norm": 0.9379168748855591, "learning_rate": 0.0001, "loss": 0.0516, "step": 23520 }, { "epoch": 66.8465909090909, "grad_norm": 1.3234306573867798, "learning_rate": 0.0001, "loss": 0.0518, "step": 23530 }, { "epoch": 66.875, "grad_norm": 0.9817354083061218, "learning_rate": 0.0001, "loss": 0.0539, "step": 23540 }, { "epoch": 66.9034090909091, "grad_norm": 1.1395480632781982, "learning_rate": 0.0001, "loss": 0.0524, "step": 23550 }, { "epoch": 66.93181818181819, "grad_norm": 0.9638949036598206, "learning_rate": 0.0001, "loss": 0.0511, "step": 23560 }, { "epoch": 66.96022727272727, "grad_norm": 0.8169605135917664, "learning_rate": 0.0001, "loss": 0.0517, "step": 23570 }, { "epoch": 66.98863636363636, "grad_norm": 0.861230731010437, "learning_rate": 0.0001, "loss": 0.0537, "step": 23580 }, { "epoch": 67.01704545454545, "grad_norm": 0.656604528427124, "learning_rate": 0.0001, "loss": 0.0541, "step": 23590 }, { "epoch": 67.04545454545455, "grad_norm": 0.7038812041282654, "learning_rate": 0.0001, "loss": 0.0545, "step": 23600 }, { "epoch": 67.07386363636364, "grad_norm": 0.627716064453125, "learning_rate": 0.0001, "loss": 0.0517, "step": 23610 }, { "epoch": 67.10227272727273, "grad_norm": 0.857126772403717, "learning_rate": 0.0001, "loss": 0.0531, "step": 23620 }, { "epoch": 67.13068181818181, "grad_norm": 0.8336479067802429, "learning_rate": 0.0001, "loss": 0.0531, "step": 23630 }, { "epoch": 67.1590909090909, "grad_norm": 0.6836590766906738, "learning_rate": 0.0001, "loss": 0.0527, "step": 23640 }, { "epoch": 67.1875, "grad_norm": 0.8017722964286804, "learning_rate": 0.0001, "loss": 0.0535, "step": 23650 }, { "epoch": 67.2159090909091, "grad_norm": 0.7780610918998718, "learning_rate": 0.0001, "loss": 0.0535, "step": 23660 }, { "epoch": 67.24431818181819, "grad_norm": 0.7484387755393982, "learning_rate": 0.0001, "loss": 0.0526, "step": 23670 }, { "epoch": 67.27272727272727, "grad_norm": 0.6655145287513733, "learning_rate": 0.0001, "loss": 0.0513, "step": 23680 }, { "epoch": 67.30113636363636, "grad_norm": 0.6643372774124146, "learning_rate": 0.0001, "loss": 0.0535, "step": 23690 }, { "epoch": 67.32954545454545, "grad_norm": 0.7950687408447266, "learning_rate": 0.0001, "loss": 0.0517, "step": 23700 }, { "epoch": 67.35795454545455, "grad_norm": 0.8306936621665955, "learning_rate": 0.0001, "loss": 0.0534, "step": 23710 }, { "epoch": 67.38636363636364, "grad_norm": 0.8002460598945618, "learning_rate": 0.0001, "loss": 0.0513, "step": 23720 }, { "epoch": 67.41477272727273, "grad_norm": 0.7444122433662415, "learning_rate": 0.0001, "loss": 0.0531, "step": 23730 }, { "epoch": 67.44318181818181, "grad_norm": 0.6906100511550903, "learning_rate": 0.0001, "loss": 0.0509, "step": 23740 }, { "epoch": 67.4715909090909, "grad_norm": 0.7418623566627502, "learning_rate": 0.0001, "loss": 0.0546, "step": 23750 }, { "epoch": 67.5, "grad_norm": 0.7137703895568848, "learning_rate": 0.0001, "loss": 0.0536, "step": 23760 }, { "epoch": 67.5284090909091, "grad_norm": 0.793405294418335, "learning_rate": 0.0001, "loss": 0.0554, "step": 23770 }, { "epoch": 67.55681818181819, "grad_norm": 0.7513002157211304, "learning_rate": 0.0001, "loss": 0.0568, "step": 23780 }, { "epoch": 67.58522727272727, "grad_norm": 0.7225285172462463, "learning_rate": 0.0001, "loss": 0.0534, "step": 23790 }, { "epoch": 67.61363636363636, "grad_norm": 0.690243661403656, "learning_rate": 0.0001, "loss": 0.0517, "step": 23800 }, { "epoch": 67.64204545454545, "grad_norm": 0.6749371290206909, "learning_rate": 0.0001, "loss": 0.054, "step": 23810 }, { "epoch": 67.67045454545455, "grad_norm": 0.7889542579650879, "learning_rate": 0.0001, "loss": 0.0535, "step": 23820 }, { "epoch": 67.69886363636364, "grad_norm": 1.2622177600860596, "learning_rate": 0.0001, "loss": 0.052, "step": 23830 }, { "epoch": 67.72727272727273, "grad_norm": 1.8038215637207031, "learning_rate": 0.0001, "loss": 0.0534, "step": 23840 }, { "epoch": 67.75568181818181, "grad_norm": 1.4471378326416016, "learning_rate": 0.0001, "loss": 0.054, "step": 23850 }, { "epoch": 67.7840909090909, "grad_norm": 1.2452491521835327, "learning_rate": 0.0001, "loss": 0.051, "step": 23860 }, { "epoch": 67.8125, "grad_norm": 1.2193328142166138, "learning_rate": 0.0001, "loss": 0.0498, "step": 23870 }, { "epoch": 67.8409090909091, "grad_norm": 1.1352566480636597, "learning_rate": 0.0001, "loss": 0.0512, "step": 23880 }, { "epoch": 67.86931818181819, "grad_norm": 1.0166912078857422, "learning_rate": 0.0001, "loss": 0.0527, "step": 23890 }, { "epoch": 67.89772727272727, "grad_norm": 0.9358308911323547, "learning_rate": 0.0001, "loss": 0.054, "step": 23900 }, { "epoch": 67.92613636363636, "grad_norm": 1.0616742372512817, "learning_rate": 0.0001, "loss": 0.0512, "step": 23910 }, { "epoch": 67.95454545454545, "grad_norm": 0.9217783808708191, "learning_rate": 0.0001, "loss": 0.0502, "step": 23920 }, { "epoch": 67.98295454545455, "grad_norm": 1.0423084497451782, "learning_rate": 0.0001, "loss": 0.0518, "step": 23930 }, { "epoch": 68.01136363636364, "grad_norm": 1.1823982000350952, "learning_rate": 0.0001, "loss": 0.0534, "step": 23940 }, { "epoch": 68.03977272727273, "grad_norm": 0.9482648968696594, "learning_rate": 0.0001, "loss": 0.0531, "step": 23950 }, { "epoch": 68.06818181818181, "grad_norm": 0.7669751644134521, "learning_rate": 0.0001, "loss": 0.0516, "step": 23960 }, { "epoch": 68.0965909090909, "grad_norm": 1.1928632259368896, "learning_rate": 0.0001, "loss": 0.0535, "step": 23970 }, { "epoch": 68.125, "grad_norm": 0.9698597192764282, "learning_rate": 0.0001, "loss": 0.0541, "step": 23980 }, { "epoch": 68.1534090909091, "grad_norm": 1.0423868894577026, "learning_rate": 0.0001, "loss": 0.0541, "step": 23990 }, { "epoch": 68.18181818181819, "grad_norm": 1.2554688453674316, "learning_rate": 0.0001, "loss": 0.0532, "step": 24000 }, { "epoch": 68.21022727272727, "grad_norm": 1.3134796619415283, "learning_rate": 0.0001, "loss": 0.0545, "step": 24010 }, { "epoch": 68.23863636363636, "grad_norm": 1.2554820775985718, "learning_rate": 0.0001, "loss": 0.0509, "step": 24020 }, { "epoch": 68.26704545454545, "grad_norm": 0.9832156896591187, "learning_rate": 0.0001, "loss": 0.0497, "step": 24030 }, { "epoch": 68.29545454545455, "grad_norm": 1.0172799825668335, "learning_rate": 0.0001, "loss": 0.0482, "step": 24040 }, { "epoch": 68.32386363636364, "grad_norm": 1.321234107017517, "learning_rate": 0.0001, "loss": 0.0529, "step": 24050 }, { "epoch": 68.35227272727273, "grad_norm": 1.5265092849731445, "learning_rate": 0.0001, "loss": 0.051, "step": 24060 }, { "epoch": 68.38068181818181, "grad_norm": 1.1719361543655396, "learning_rate": 0.0001, "loss": 0.0512, "step": 24070 }, { "epoch": 68.4090909090909, "grad_norm": 1.2583420276641846, "learning_rate": 0.0001, "loss": 0.0494, "step": 24080 }, { "epoch": 68.4375, "grad_norm": 1.0527803897857666, "learning_rate": 0.0001, "loss": 0.0516, "step": 24090 }, { "epoch": 68.4659090909091, "grad_norm": 1.011395812034607, "learning_rate": 0.0001, "loss": 0.05, "step": 24100 }, { "epoch": 68.49431818181819, "grad_norm": 1.037021517753601, "learning_rate": 0.0001, "loss": 0.0499, "step": 24110 }, { "epoch": 68.52272727272727, "grad_norm": 0.9442154765129089, "learning_rate": 0.0001, "loss": 0.051, "step": 24120 }, { "epoch": 68.55113636363636, "grad_norm": 0.8959128260612488, "learning_rate": 0.0001, "loss": 0.051, "step": 24130 }, { "epoch": 68.57954545454545, "grad_norm": 0.9579172730445862, "learning_rate": 0.0001, "loss": 0.0504, "step": 24140 }, { "epoch": 68.60795454545455, "grad_norm": 1.0895936489105225, "learning_rate": 0.0001, "loss": 0.0488, "step": 24150 }, { "epoch": 68.63636363636364, "grad_norm": 0.7573409676551819, "learning_rate": 0.0001, "loss": 0.0491, "step": 24160 }, { "epoch": 68.66477272727273, "grad_norm": 0.8774531483650208, "learning_rate": 0.0001, "loss": 0.0481, "step": 24170 }, { "epoch": 68.69318181818181, "grad_norm": 0.8967164158821106, "learning_rate": 0.0001, "loss": 0.0509, "step": 24180 }, { "epoch": 68.7215909090909, "grad_norm": 1.0587197542190552, "learning_rate": 0.0001, "loss": 0.0541, "step": 24190 }, { "epoch": 68.75, "grad_norm": 1.1898106336593628, "learning_rate": 0.0001, "loss": 0.0512, "step": 24200 }, { "epoch": 68.7784090909091, "grad_norm": 1.0361658334732056, "learning_rate": 0.0001, "loss": 0.0513, "step": 24210 }, { "epoch": 68.80681818181819, "grad_norm": 0.935555100440979, "learning_rate": 0.0001, "loss": 0.0518, "step": 24220 }, { "epoch": 68.83522727272727, "grad_norm": 1.032720923423767, "learning_rate": 0.0001, "loss": 0.0511, "step": 24230 }, { "epoch": 68.86363636363636, "grad_norm": 0.8149229884147644, "learning_rate": 0.0001, "loss": 0.052, "step": 24240 }, { "epoch": 68.89204545454545, "grad_norm": 0.8590128421783447, "learning_rate": 0.0001, "loss": 0.0515, "step": 24250 }, { "epoch": 68.92045454545455, "grad_norm": 0.9247688055038452, "learning_rate": 0.0001, "loss": 0.0518, "step": 24260 }, { "epoch": 68.94886363636364, "grad_norm": 0.9311433434486389, "learning_rate": 0.0001, "loss": 0.0522, "step": 24270 }, { "epoch": 68.97727272727273, "grad_norm": 0.7276850938796997, "learning_rate": 0.0001, "loss": 0.0533, "step": 24280 }, { "epoch": 69.00568181818181, "grad_norm": 0.6716181039810181, "learning_rate": 0.0001, "loss": 0.0537, "step": 24290 }, { "epoch": 69.0340909090909, "grad_norm": 0.9270053505897522, "learning_rate": 0.0001, "loss": 0.0527, "step": 24300 }, { "epoch": 69.0625, "grad_norm": 0.666446328163147, "learning_rate": 0.0001, "loss": 0.0511, "step": 24310 }, { "epoch": 69.0909090909091, "grad_norm": 0.8492375016212463, "learning_rate": 0.0001, "loss": 0.0514, "step": 24320 }, { "epoch": 69.11931818181819, "grad_norm": 0.8447439074516296, "learning_rate": 0.0001, "loss": 0.0487, "step": 24330 }, { "epoch": 69.14772727272727, "grad_norm": 0.7112112045288086, "learning_rate": 0.0001, "loss": 0.0521, "step": 24340 }, { "epoch": 69.17613636363636, "grad_norm": 0.8900835514068604, "learning_rate": 0.0001, "loss": 0.0554, "step": 24350 }, { "epoch": 69.20454545454545, "grad_norm": 0.7511789798736572, "learning_rate": 0.0001, "loss": 0.0511, "step": 24360 }, { "epoch": 69.23295454545455, "grad_norm": 0.6234313249588013, "learning_rate": 0.0001, "loss": 0.0516, "step": 24370 }, { "epoch": 69.26136363636364, "grad_norm": 0.8581838011741638, "learning_rate": 0.0001, "loss": 0.0503, "step": 24380 }, { "epoch": 69.28977272727273, "grad_norm": 0.6439953446388245, "learning_rate": 0.0001, "loss": 0.0522, "step": 24390 }, { "epoch": 69.31818181818181, "grad_norm": 0.805645763874054, "learning_rate": 0.0001, "loss": 0.0531, "step": 24400 }, { "epoch": 69.3465909090909, "grad_norm": 0.7699912786483765, "learning_rate": 0.0001, "loss": 0.0555, "step": 24410 }, { "epoch": 69.375, "grad_norm": 0.7186166644096375, "learning_rate": 0.0001, "loss": 0.0552, "step": 24420 }, { "epoch": 69.4034090909091, "grad_norm": 0.8284119963645935, "learning_rate": 0.0001, "loss": 0.0514, "step": 24430 }, { "epoch": 69.43181818181819, "grad_norm": 0.8688386082649231, "learning_rate": 0.0001, "loss": 0.0529, "step": 24440 }, { "epoch": 69.46022727272727, "grad_norm": 1.1181520223617554, "learning_rate": 0.0001, "loss": 0.0544, "step": 24450 }, { "epoch": 69.48863636363636, "grad_norm": 1.0079569816589355, "learning_rate": 0.0001, "loss": 0.0536, "step": 24460 }, { "epoch": 69.51704545454545, "grad_norm": 1.6781340837478638, "learning_rate": 0.0001, "loss": 0.0526, "step": 24470 }, { "epoch": 69.54545454545455, "grad_norm": 1.3601493835449219, "learning_rate": 0.0001, "loss": 0.0515, "step": 24480 }, { "epoch": 69.57386363636364, "grad_norm": 0.9561741948127747, "learning_rate": 0.0001, "loss": 0.0514, "step": 24490 }, { "epoch": 69.60227272727273, "grad_norm": 0.8902744650840759, "learning_rate": 0.0001, "loss": 0.0496, "step": 24500 }, { "epoch": 69.63068181818181, "grad_norm": 0.9449006915092468, "learning_rate": 0.0001, "loss": 0.0512, "step": 24510 }, { "epoch": 69.6590909090909, "grad_norm": 1.1786518096923828, "learning_rate": 0.0001, "loss": 0.0525, "step": 24520 }, { "epoch": 69.6875, "grad_norm": 1.233751893043518, "learning_rate": 0.0001, "loss": 0.0507, "step": 24530 }, { "epoch": 69.7159090909091, "grad_norm": 0.9357305765151978, "learning_rate": 0.0001, "loss": 0.0528, "step": 24540 }, { "epoch": 69.74431818181819, "grad_norm": 1.226325511932373, "learning_rate": 0.0001, "loss": 0.0515, "step": 24550 }, { "epoch": 69.77272727272727, "grad_norm": 0.8865256309509277, "learning_rate": 0.0001, "loss": 0.0497, "step": 24560 }, { "epoch": 69.80113636363636, "grad_norm": 1.6879335641860962, "learning_rate": 0.0001, "loss": 0.0522, "step": 24570 }, { "epoch": 69.82954545454545, "grad_norm": 1.2305909395217896, "learning_rate": 0.0001, "loss": 0.0502, "step": 24580 }, { "epoch": 69.85795454545455, "grad_norm": 1.1654038429260254, "learning_rate": 0.0001, "loss": 0.0514, "step": 24590 }, { "epoch": 69.88636363636364, "grad_norm": 0.8632926940917969, "learning_rate": 0.0001, "loss": 0.0507, "step": 24600 }, { "epoch": 69.91477272727273, "grad_norm": 0.7934690713882446, "learning_rate": 0.0001, "loss": 0.0497, "step": 24610 }, { "epoch": 69.94318181818181, "grad_norm": 0.7492729425430298, "learning_rate": 0.0001, "loss": 0.0496, "step": 24620 }, { "epoch": 69.9715909090909, "grad_norm": 0.7984905242919922, "learning_rate": 0.0001, "loss": 0.0503, "step": 24630 }, { "epoch": 70.0, "grad_norm": 0.8478935956954956, "learning_rate": 0.0001, "loss": 0.0514, "step": 24640 }, { "epoch": 70.0284090909091, "grad_norm": 0.7653668522834778, "learning_rate": 0.0001, "loss": 0.0517, "step": 24650 }, { "epoch": 70.05681818181819, "grad_norm": 0.7579995393753052, "learning_rate": 0.0001, "loss": 0.053, "step": 24660 }, { "epoch": 70.08522727272727, "grad_norm": 0.9072360992431641, "learning_rate": 0.0001, "loss": 0.0515, "step": 24670 }, { "epoch": 70.11363636363636, "grad_norm": 0.7853196859359741, "learning_rate": 0.0001, "loss": 0.0512, "step": 24680 }, { "epoch": 70.14204545454545, "grad_norm": 0.7733336091041565, "learning_rate": 0.0001, "loss": 0.0566, "step": 24690 }, { "epoch": 70.17045454545455, "grad_norm": 0.8603296279907227, "learning_rate": 0.0001, "loss": 0.0522, "step": 24700 }, { "epoch": 70.19886363636364, "grad_norm": 1.4242461919784546, "learning_rate": 0.0001, "loss": 0.0529, "step": 24710 }, { "epoch": 70.22727272727273, "grad_norm": 1.4059160947799683, "learning_rate": 0.0001, "loss": 0.0512, "step": 24720 }, { "epoch": 70.25568181818181, "grad_norm": 1.3890278339385986, "learning_rate": 0.0001, "loss": 0.0523, "step": 24730 }, { "epoch": 70.2840909090909, "grad_norm": 1.2617861032485962, "learning_rate": 0.0001, "loss": 0.0472, "step": 24740 }, { "epoch": 70.3125, "grad_norm": 1.1536449193954468, "learning_rate": 0.0001, "loss": 0.0507, "step": 24750 }, { "epoch": 70.3409090909091, "grad_norm": 1.032045602798462, "learning_rate": 0.0001, "loss": 0.0502, "step": 24760 }, { "epoch": 70.36931818181819, "grad_norm": 0.9999845623970032, "learning_rate": 0.0001, "loss": 0.0511, "step": 24770 }, { "epoch": 70.39772727272727, "grad_norm": 0.9327858090400696, "learning_rate": 0.0001, "loss": 0.0497, "step": 24780 }, { "epoch": 70.42613636363636, "grad_norm": 1.1740491390228271, "learning_rate": 0.0001, "loss": 0.0509, "step": 24790 }, { "epoch": 70.45454545454545, "grad_norm": 0.8893155455589294, "learning_rate": 0.0001, "loss": 0.0494, "step": 24800 }, { "epoch": 70.48295454545455, "grad_norm": 0.8836989998817444, "learning_rate": 0.0001, "loss": 0.0522, "step": 24810 }, { "epoch": 70.51136363636364, "grad_norm": 1.0119452476501465, "learning_rate": 0.0001, "loss": 0.0497, "step": 24820 }, { "epoch": 70.53977272727273, "grad_norm": 0.9030247330665588, "learning_rate": 0.0001, "loss": 0.0522, "step": 24830 }, { "epoch": 70.56818181818181, "grad_norm": 0.7199386358261108, "learning_rate": 0.0001, "loss": 0.0514, "step": 24840 }, { "epoch": 70.5965909090909, "grad_norm": 0.8884567022323608, "learning_rate": 0.0001, "loss": 0.0504, "step": 24850 }, { "epoch": 70.625, "grad_norm": 0.6433593034744263, "learning_rate": 0.0001, "loss": 0.0531, "step": 24860 }, { "epoch": 70.6534090909091, "grad_norm": 0.6983967423439026, "learning_rate": 0.0001, "loss": 0.0494, "step": 24870 }, { "epoch": 70.68181818181819, "grad_norm": 0.647629976272583, "learning_rate": 0.0001, "loss": 0.0499, "step": 24880 }, { "epoch": 70.71022727272727, "grad_norm": 0.71266108751297, "learning_rate": 0.0001, "loss": 0.0512, "step": 24890 }, { "epoch": 70.73863636363636, "grad_norm": 0.6877172589302063, "learning_rate": 0.0001, "loss": 0.0499, "step": 24900 }, { "epoch": 70.76704545454545, "grad_norm": 0.6937993168830872, "learning_rate": 0.0001, "loss": 0.0496, "step": 24910 }, { "epoch": 70.79545454545455, "grad_norm": 0.5959415435791016, "learning_rate": 0.0001, "loss": 0.0489, "step": 24920 }, { "epoch": 70.82386363636364, "grad_norm": 0.6399363279342651, "learning_rate": 0.0001, "loss": 0.0512, "step": 24930 }, { "epoch": 70.85227272727273, "grad_norm": 0.7871550917625427, "learning_rate": 0.0001, "loss": 0.0494, "step": 24940 }, { "epoch": 70.88068181818181, "grad_norm": 0.7523185610771179, "learning_rate": 0.0001, "loss": 0.0536, "step": 24950 }, { "epoch": 70.9090909090909, "grad_norm": 0.7533581852912903, "learning_rate": 0.0001, "loss": 0.0525, "step": 24960 }, { "epoch": 70.9375, "grad_norm": 0.7682768106460571, "learning_rate": 0.0001, "loss": 0.0512, "step": 24970 }, { "epoch": 70.9659090909091, "grad_norm": 0.8463433980941772, "learning_rate": 0.0001, "loss": 0.051, "step": 24980 }, { "epoch": 70.99431818181819, "grad_norm": 1.0878268480300903, "learning_rate": 0.0001, "loss": 0.049, "step": 24990 }, { "epoch": 71.02272727272727, "grad_norm": 0.7139332294464111, "learning_rate": 0.0001, "loss": 0.0523, "step": 25000 }, { "epoch": 71.05113636363636, "grad_norm": 0.6780238747596741, "learning_rate": 0.0001, "loss": 0.05, "step": 25010 }, { "epoch": 71.07954545454545, "grad_norm": 0.6342650055885315, "learning_rate": 0.0001, "loss": 0.0518, "step": 25020 }, { "epoch": 71.10795454545455, "grad_norm": 0.6704277992248535, "learning_rate": 0.0001, "loss": 0.0506, "step": 25030 }, { "epoch": 71.13636363636364, "grad_norm": 0.7333451509475708, "learning_rate": 0.0001, "loss": 0.0494, "step": 25040 }, { "epoch": 71.16477272727273, "grad_norm": 0.8710368275642395, "learning_rate": 0.0001, "loss": 0.0508, "step": 25050 }, { "epoch": 71.19318181818181, "grad_norm": 0.9135860204696655, "learning_rate": 0.0001, "loss": 0.053, "step": 25060 }, { "epoch": 71.2215909090909, "grad_norm": 0.7403706908226013, "learning_rate": 0.0001, "loss": 0.0505, "step": 25070 }, { "epoch": 71.25, "grad_norm": 0.6618191003799438, "learning_rate": 0.0001, "loss": 0.048, "step": 25080 }, { "epoch": 71.2784090909091, "grad_norm": 0.7856776714324951, "learning_rate": 0.0001, "loss": 0.0476, "step": 25090 }, { "epoch": 71.30681818181819, "grad_norm": 0.7596649527549744, "learning_rate": 0.0001, "loss": 0.0481, "step": 25100 }, { "epoch": 71.33522727272727, "grad_norm": 0.8146116733551025, "learning_rate": 0.0001, "loss": 0.0474, "step": 25110 }, { "epoch": 71.36363636363636, "grad_norm": 0.6791525483131409, "learning_rate": 0.0001, "loss": 0.0484, "step": 25120 }, { "epoch": 71.39204545454545, "grad_norm": 0.7217307090759277, "learning_rate": 0.0001, "loss": 0.0499, "step": 25130 }, { "epoch": 71.42045454545455, "grad_norm": 0.6544477939605713, "learning_rate": 0.0001, "loss": 0.0531, "step": 25140 }, { "epoch": 71.44886363636364, "grad_norm": 0.6746852397918701, "learning_rate": 0.0001, "loss": 0.0521, "step": 25150 }, { "epoch": 71.47727272727273, "grad_norm": 0.5204148888587952, "learning_rate": 0.0001, "loss": 0.0506, "step": 25160 }, { "epoch": 71.50568181818181, "grad_norm": 0.5109100937843323, "learning_rate": 0.0001, "loss": 0.0493, "step": 25170 }, { "epoch": 71.5340909090909, "grad_norm": 0.6211031675338745, "learning_rate": 0.0001, "loss": 0.0543, "step": 25180 }, { "epoch": 71.5625, "grad_norm": 0.677085280418396, "learning_rate": 0.0001, "loss": 0.0527, "step": 25190 }, { "epoch": 71.5909090909091, "grad_norm": 0.6960747838020325, "learning_rate": 0.0001, "loss": 0.0502, "step": 25200 }, { "epoch": 71.61931818181819, "grad_norm": 0.6580451130867004, "learning_rate": 0.0001, "loss": 0.0505, "step": 25210 }, { "epoch": 71.64772727272727, "grad_norm": 0.6775358319282532, "learning_rate": 0.0001, "loss": 0.0491, "step": 25220 }, { "epoch": 71.67613636363636, "grad_norm": 0.6583216786384583, "learning_rate": 0.0001, "loss": 0.0505, "step": 25230 }, { "epoch": 71.70454545454545, "grad_norm": 0.7078356146812439, "learning_rate": 0.0001, "loss": 0.0483, "step": 25240 }, { "epoch": 71.73295454545455, "grad_norm": 0.7176387906074524, "learning_rate": 0.0001, "loss": 0.0487, "step": 25250 }, { "epoch": 71.76136363636364, "grad_norm": 0.749264657497406, "learning_rate": 0.0001, "loss": 0.0523, "step": 25260 }, { "epoch": 71.78977272727273, "grad_norm": 0.6820817589759827, "learning_rate": 0.0001, "loss": 0.0498, "step": 25270 }, { "epoch": 71.81818181818181, "grad_norm": 0.6245760917663574, "learning_rate": 0.0001, "loss": 0.0499, "step": 25280 }, { "epoch": 71.8465909090909, "grad_norm": 0.5692148804664612, "learning_rate": 0.0001, "loss": 0.0491, "step": 25290 }, { "epoch": 71.875, "grad_norm": 0.6304931640625, "learning_rate": 0.0001, "loss": 0.0504, "step": 25300 }, { "epoch": 71.9034090909091, "grad_norm": 0.546541690826416, "learning_rate": 0.0001, "loss": 0.0492, "step": 25310 }, { "epoch": 71.93181818181819, "grad_norm": 0.5972326993942261, "learning_rate": 0.0001, "loss": 0.0505, "step": 25320 }, { "epoch": 71.96022727272727, "grad_norm": 0.6020660996437073, "learning_rate": 0.0001, "loss": 0.0505, "step": 25330 }, { "epoch": 71.98863636363636, "grad_norm": 0.48787808418273926, "learning_rate": 0.0001, "loss": 0.0523, "step": 25340 }, { "epoch": 72.01704545454545, "grad_norm": 0.7013693451881409, "learning_rate": 0.0001, "loss": 0.0508, "step": 25350 }, { "epoch": 72.04545454545455, "grad_norm": 0.5541148781776428, "learning_rate": 0.0001, "loss": 0.049, "step": 25360 }, { "epoch": 72.07386363636364, "grad_norm": 0.6003844738006592, "learning_rate": 0.0001, "loss": 0.0509, "step": 25370 }, { "epoch": 72.10227272727273, "grad_norm": 0.8124470114707947, "learning_rate": 0.0001, "loss": 0.0532, "step": 25380 }, { "epoch": 72.13068181818181, "grad_norm": 0.6087120771408081, "learning_rate": 0.0001, "loss": 0.0513, "step": 25390 }, { "epoch": 72.1590909090909, "grad_norm": 0.6835238337516785, "learning_rate": 0.0001, "loss": 0.0514, "step": 25400 }, { "epoch": 72.1875, "grad_norm": 0.758734405040741, "learning_rate": 0.0001, "loss": 0.0502, "step": 25410 }, { "epoch": 72.2159090909091, "grad_norm": 0.745496392250061, "learning_rate": 0.0001, "loss": 0.0496, "step": 25420 }, { "epoch": 72.24431818181819, "grad_norm": 0.7484995722770691, "learning_rate": 0.0001, "loss": 0.0518, "step": 25430 }, { "epoch": 72.27272727272727, "grad_norm": 0.7207466959953308, "learning_rate": 0.0001, "loss": 0.0506, "step": 25440 }, { "epoch": 72.30113636363636, "grad_norm": 0.866812527179718, "learning_rate": 0.0001, "loss": 0.0514, "step": 25450 }, { "epoch": 72.32954545454545, "grad_norm": 0.7610346674919128, "learning_rate": 0.0001, "loss": 0.0505, "step": 25460 }, { "epoch": 72.35795454545455, "grad_norm": 0.6176382899284363, "learning_rate": 0.0001, "loss": 0.0493, "step": 25470 }, { "epoch": 72.38636363636364, "grad_norm": 0.7282941937446594, "learning_rate": 0.0001, "loss": 0.0492, "step": 25480 }, { "epoch": 72.41477272727273, "grad_norm": 0.6433279514312744, "learning_rate": 0.0001, "loss": 0.051, "step": 25490 }, { "epoch": 72.44318181818181, "grad_norm": 0.6624048352241516, "learning_rate": 0.0001, "loss": 0.0507, "step": 25500 }, { "epoch": 72.4715909090909, "grad_norm": 0.6259250044822693, "learning_rate": 0.0001, "loss": 0.0508, "step": 25510 }, { "epoch": 72.5, "grad_norm": 0.5815131068229675, "learning_rate": 0.0001, "loss": 0.0509, "step": 25520 }, { "epoch": 72.5284090909091, "grad_norm": 0.4949661195278168, "learning_rate": 0.0001, "loss": 0.0502, "step": 25530 }, { "epoch": 72.55681818181819, "grad_norm": 0.6070393323898315, "learning_rate": 0.0001, "loss": 0.0536, "step": 25540 }, { "epoch": 72.58522727272727, "grad_norm": 0.9325839281082153, "learning_rate": 0.0001, "loss": 0.0512, "step": 25550 }, { "epoch": 72.61363636363636, "grad_norm": 0.6207942962646484, "learning_rate": 0.0001, "loss": 0.051, "step": 25560 }, { "epoch": 72.64204545454545, "grad_norm": 0.7251754403114319, "learning_rate": 0.0001, "loss": 0.0511, "step": 25570 }, { "epoch": 72.67045454545455, "grad_norm": 0.7657225131988525, "learning_rate": 0.0001, "loss": 0.053, "step": 25580 }, { "epoch": 72.69886363636364, "grad_norm": 0.6369885802268982, "learning_rate": 0.0001, "loss": 0.0537, "step": 25590 }, { "epoch": 72.72727272727273, "grad_norm": 0.82183837890625, "learning_rate": 0.0001, "loss": 0.0503, "step": 25600 }, { "epoch": 72.75568181818181, "grad_norm": 0.670074999332428, "learning_rate": 0.0001, "loss": 0.0508, "step": 25610 }, { "epoch": 72.7840909090909, "grad_norm": 0.7039807438850403, "learning_rate": 0.0001, "loss": 0.0487, "step": 25620 }, { "epoch": 72.8125, "grad_norm": 0.6067477464675903, "learning_rate": 0.0001, "loss": 0.0516, "step": 25630 }, { "epoch": 72.8409090909091, "grad_norm": 0.6139563918113708, "learning_rate": 0.0001, "loss": 0.0485, "step": 25640 }, { "epoch": 72.86931818181819, "grad_norm": 0.8261796832084656, "learning_rate": 0.0001, "loss": 0.0477, "step": 25650 }, { "epoch": 72.89772727272727, "grad_norm": 0.7676610350608826, "learning_rate": 0.0001, "loss": 0.0495, "step": 25660 }, { "epoch": 72.92613636363636, "grad_norm": 0.6294236779212952, "learning_rate": 0.0001, "loss": 0.0501, "step": 25670 }, { "epoch": 72.95454545454545, "grad_norm": 0.5884844660758972, "learning_rate": 0.0001, "loss": 0.047, "step": 25680 }, { "epoch": 72.98295454545455, "grad_norm": 0.6261518001556396, "learning_rate": 0.0001, "loss": 0.0494, "step": 25690 }, { "epoch": 73.01136363636364, "grad_norm": 0.5874741673469543, "learning_rate": 0.0001, "loss": 0.0492, "step": 25700 }, { "epoch": 73.03977272727273, "grad_norm": 0.558462917804718, "learning_rate": 0.0001, "loss": 0.0485, "step": 25710 }, { "epoch": 73.06818181818181, "grad_norm": 1.5096638202667236, "learning_rate": 0.0001, "loss": 0.0482, "step": 25720 }, { "epoch": 73.0965909090909, "grad_norm": 0.8970916867256165, "learning_rate": 0.0001, "loss": 0.0485, "step": 25730 }, { "epoch": 73.125, "grad_norm": 1.2470207214355469, "learning_rate": 0.0001, "loss": 0.0474, "step": 25740 }, { "epoch": 73.1534090909091, "grad_norm": 1.1245193481445312, "learning_rate": 0.0001, "loss": 0.0464, "step": 25750 }, { "epoch": 73.18181818181819, "grad_norm": 1.018660068511963, "learning_rate": 0.0001, "loss": 0.0487, "step": 25760 }, { "epoch": 73.21022727272727, "grad_norm": 1.0547358989715576, "learning_rate": 0.0001, "loss": 0.049, "step": 25770 }, { "epoch": 73.23863636363636, "grad_norm": 1.0074411630630493, "learning_rate": 0.0001, "loss": 0.0495, "step": 25780 }, { "epoch": 73.26704545454545, "grad_norm": 1.107343077659607, "learning_rate": 0.0001, "loss": 0.0495, "step": 25790 }, { "epoch": 73.29545454545455, "grad_norm": 1.0003204345703125, "learning_rate": 0.0001, "loss": 0.0461, "step": 25800 }, { "epoch": 73.32386363636364, "grad_norm": 1.152951955795288, "learning_rate": 0.0001, "loss": 0.0485, "step": 25810 }, { "epoch": 73.35227272727273, "grad_norm": 1.0957775115966797, "learning_rate": 0.0001, "loss": 0.0465, "step": 25820 }, { "epoch": 73.38068181818181, "grad_norm": 0.9931585192680359, "learning_rate": 0.0001, "loss": 0.0468, "step": 25830 }, { "epoch": 73.4090909090909, "grad_norm": 0.9358384609222412, "learning_rate": 0.0001, "loss": 0.0475, "step": 25840 }, { "epoch": 73.4375, "grad_norm": 1.6903265714645386, "learning_rate": 0.0001, "loss": 0.0561, "step": 25850 }, { "epoch": 73.4659090909091, "grad_norm": 1.554337501525879, "learning_rate": 0.0001, "loss": 0.0494, "step": 25860 }, { "epoch": 73.49431818181819, "grad_norm": 1.531087875366211, "learning_rate": 0.0001, "loss": 0.0486, "step": 25870 }, { "epoch": 73.52272727272727, "grad_norm": 1.144465684890747, "learning_rate": 0.0001, "loss": 0.0472, "step": 25880 }, { "epoch": 73.55113636363636, "grad_norm": 1.1745659112930298, "learning_rate": 0.0001, "loss": 0.0475, "step": 25890 }, { "epoch": 73.57954545454545, "grad_norm": 1.1764057874679565, "learning_rate": 0.0001, "loss": 0.0474, "step": 25900 }, { "epoch": 73.60795454545455, "grad_norm": 1.2074421644210815, "learning_rate": 0.0001, "loss": 0.0456, "step": 25910 }, { "epoch": 73.63636363636364, "grad_norm": 1.036546230316162, "learning_rate": 0.0001, "loss": 0.0474, "step": 25920 }, { "epoch": 73.66477272727273, "grad_norm": 1.5946331024169922, "learning_rate": 0.0001, "loss": 0.0471, "step": 25930 }, { "epoch": 73.69318181818181, "grad_norm": 1.7375640869140625, "learning_rate": 0.0001, "loss": 0.0482, "step": 25940 }, { "epoch": 73.7215909090909, "grad_norm": 1.494059443473816, "learning_rate": 0.0001, "loss": 0.0498, "step": 25950 }, { "epoch": 73.75, "grad_norm": 1.3823585510253906, "learning_rate": 0.0001, "loss": 0.0461, "step": 25960 }, { "epoch": 73.7784090909091, "grad_norm": 1.4736156463623047, "learning_rate": 0.0001, "loss": 0.0476, "step": 25970 }, { "epoch": 73.80681818181819, "grad_norm": 1.3302404880523682, "learning_rate": 0.0001, "loss": 0.0465, "step": 25980 }, { "epoch": 73.83522727272727, "grad_norm": 1.0495837926864624, "learning_rate": 0.0001, "loss": 0.0465, "step": 25990 }, { "epoch": 73.86363636363636, "grad_norm": 1.1226849555969238, "learning_rate": 0.0001, "loss": 0.0463, "step": 26000 }, { "epoch": 73.89204545454545, "grad_norm": 1.1718180179595947, "learning_rate": 0.0001, "loss": 0.0461, "step": 26010 }, { "epoch": 73.92045454545455, "grad_norm": 1.1437042951583862, "learning_rate": 0.0001, "loss": 0.0468, "step": 26020 }, { "epoch": 73.94886363636364, "grad_norm": 0.9460147619247437, "learning_rate": 0.0001, "loss": 0.0454, "step": 26030 }, { "epoch": 73.97727272727273, "grad_norm": 0.7734537720680237, "learning_rate": 0.0001, "loss": 0.0463, "step": 26040 }, { "epoch": 74.00568181818181, "grad_norm": 0.7331590056419373, "learning_rate": 0.0001, "loss": 0.0479, "step": 26050 }, { "epoch": 74.0340909090909, "grad_norm": 0.8983361721038818, "learning_rate": 0.0001, "loss": 0.0481, "step": 26060 }, { "epoch": 74.0625, "grad_norm": 0.7752969861030579, "learning_rate": 0.0001, "loss": 0.0457, "step": 26070 }, { "epoch": 74.0909090909091, "grad_norm": 1.147444725036621, "learning_rate": 0.0001, "loss": 0.0488, "step": 26080 }, { "epoch": 74.11931818181819, "grad_norm": 1.5672545433044434, "learning_rate": 0.0001, "loss": 0.0646, "step": 26090 }, { "epoch": 74.14772727272727, "grad_norm": 1.861153483390808, "learning_rate": 0.0001, "loss": 0.0496, "step": 26100 }, { "epoch": 74.17613636363636, "grad_norm": 2.55692720413208, "learning_rate": 0.0001, "loss": 0.0485, "step": 26110 }, { "epoch": 74.20454545454545, "grad_norm": 1.756406545639038, "learning_rate": 0.0001, "loss": 0.0462, "step": 26120 }, { "epoch": 74.23295454545455, "grad_norm": 1.4010676145553589, "learning_rate": 0.0001, "loss": 0.0459, "step": 26130 }, { "epoch": 74.26136363636364, "grad_norm": 1.0524970293045044, "learning_rate": 0.0001, "loss": 0.0452, "step": 26140 }, { "epoch": 74.28977272727273, "grad_norm": 1.089568853378296, "learning_rate": 0.0001, "loss": 0.0454, "step": 26150 }, { "epoch": 74.31818181818181, "grad_norm": 1.5746029615402222, "learning_rate": 0.0001, "loss": 0.0458, "step": 26160 }, { "epoch": 74.3465909090909, "grad_norm": 1.353350281715393, "learning_rate": 0.0001, "loss": 0.0462, "step": 26170 }, { "epoch": 74.375, "grad_norm": 0.9193561673164368, "learning_rate": 0.0001, "loss": 0.045, "step": 26180 }, { "epoch": 74.4034090909091, "grad_norm": 0.794005811214447, "learning_rate": 0.0001, "loss": 0.0444, "step": 26190 }, { "epoch": 74.43181818181819, "grad_norm": 0.7287346124649048, "learning_rate": 0.0001, "loss": 0.0478, "step": 26200 }, { "epoch": 74.46022727272727, "grad_norm": 0.9359661340713501, "learning_rate": 0.0001, "loss": 0.045, "step": 26210 }, { "epoch": 74.48863636363636, "grad_norm": 0.8077235221862793, "learning_rate": 0.0001, "loss": 0.0471, "step": 26220 }, { "epoch": 74.51704545454545, "grad_norm": 0.7505087852478027, "learning_rate": 0.0001, "loss": 0.0471, "step": 26230 }, { "epoch": 74.54545454545455, "grad_norm": 0.9545241594314575, "learning_rate": 0.0001, "loss": 0.0464, "step": 26240 }, { "epoch": 74.57386363636364, "grad_norm": 0.8987447619438171, "learning_rate": 0.0001, "loss": 0.0479, "step": 26250 }, { "epoch": 74.60227272727273, "grad_norm": 0.7390574812889099, "learning_rate": 0.0001, "loss": 0.0487, "step": 26260 }, { "epoch": 74.63068181818181, "grad_norm": 0.7332533001899719, "learning_rate": 0.0001, "loss": 0.0501, "step": 26270 }, { "epoch": 74.6590909090909, "grad_norm": 0.9167255163192749, "learning_rate": 0.0001, "loss": 0.0473, "step": 26280 }, { "epoch": 74.6875, "grad_norm": 0.6819099187850952, "learning_rate": 0.0001, "loss": 0.0487, "step": 26290 }, { "epoch": 74.7159090909091, "grad_norm": 0.8224645853042603, "learning_rate": 0.0001, "loss": 0.045, "step": 26300 }, { "epoch": 74.74431818181819, "grad_norm": 0.8777086138725281, "learning_rate": 0.0001, "loss": 0.0493, "step": 26310 }, { "epoch": 74.77272727272727, "grad_norm": 0.8701086044311523, "learning_rate": 0.0001, "loss": 0.0496, "step": 26320 }, { "epoch": 74.80113636363636, "grad_norm": 0.8876749873161316, "learning_rate": 0.0001, "loss": 0.0486, "step": 26330 }, { "epoch": 74.82954545454545, "grad_norm": 1.0940346717834473, "learning_rate": 0.0001, "loss": 0.05, "step": 26340 }, { "epoch": 74.85795454545455, "grad_norm": 0.8126282095909119, "learning_rate": 0.0001, "loss": 0.05, "step": 26350 }, { "epoch": 74.88636363636364, "grad_norm": 0.6576694250106812, "learning_rate": 0.0001, "loss": 0.0477, "step": 26360 }, { "epoch": 74.91477272727273, "grad_norm": 0.8096992373466492, "learning_rate": 0.0001, "loss": 0.0492, "step": 26370 }, { "epoch": 74.94318181818181, "grad_norm": 0.7710022926330566, "learning_rate": 0.0001, "loss": 0.0469, "step": 26380 }, { "epoch": 74.9715909090909, "grad_norm": 0.6302091479301453, "learning_rate": 0.0001, "loss": 0.0487, "step": 26390 }, { "epoch": 75.0, "grad_norm": 0.5126988291740417, "learning_rate": 0.0001, "loss": 0.0476, "step": 26400 }, { "epoch": 75.0284090909091, "grad_norm": 0.6366093754768372, "learning_rate": 0.0001, "loss": 0.0481, "step": 26410 }, { "epoch": 75.05681818181819, "grad_norm": 0.6395828127861023, "learning_rate": 0.0001, "loss": 0.0488, "step": 26420 }, { "epoch": 75.08522727272727, "grad_norm": 0.6386353969573975, "learning_rate": 0.0001, "loss": 0.0515, "step": 26430 }, { "epoch": 75.11363636363636, "grad_norm": 0.7400466799736023, "learning_rate": 0.0001, "loss": 0.0502, "step": 26440 }, { "epoch": 75.14204545454545, "grad_norm": 0.6518636345863342, "learning_rate": 0.0001, "loss": 0.0503, "step": 26450 }, { "epoch": 75.17045454545455, "grad_norm": 1.10001540184021, "learning_rate": 0.0001, "loss": 0.0503, "step": 26460 }, { "epoch": 75.19886363636364, "grad_norm": 1.4311150312423706, "learning_rate": 0.0001, "loss": 0.0517, "step": 26470 }, { "epoch": 75.22727272727273, "grad_norm": 1.1874371767044067, "learning_rate": 0.0001, "loss": 0.0502, "step": 26480 }, { "epoch": 75.25568181818181, "grad_norm": 1.4475220441818237, "learning_rate": 0.0001, "loss": 0.0475, "step": 26490 }, { "epoch": 75.2840909090909, "grad_norm": 1.1500955820083618, "learning_rate": 0.0001, "loss": 0.049, "step": 26500 }, { "epoch": 75.3125, "grad_norm": 1.1389282941818237, "learning_rate": 0.0001, "loss": 0.0468, "step": 26510 }, { "epoch": 75.3409090909091, "grad_norm": 0.9480587840080261, "learning_rate": 0.0001, "loss": 0.0447, "step": 26520 }, { "epoch": 75.36931818181819, "grad_norm": 0.8605413436889648, "learning_rate": 0.0001, "loss": 0.0445, "step": 26530 }, { "epoch": 75.39772727272727, "grad_norm": 0.8746979832649231, "learning_rate": 0.0001, "loss": 0.0459, "step": 26540 }, { "epoch": 75.42613636363636, "grad_norm": 0.9024845957756042, "learning_rate": 0.0001, "loss": 0.0472, "step": 26550 }, { "epoch": 75.45454545454545, "grad_norm": 1.183405876159668, "learning_rate": 0.0001, "loss": 0.046, "step": 26560 }, { "epoch": 75.48295454545455, "grad_norm": 0.9437256455421448, "learning_rate": 0.0001, "loss": 0.0477, "step": 26570 }, { "epoch": 75.51136363636364, "grad_norm": 0.8547454476356506, "learning_rate": 0.0001, "loss": 0.0454, "step": 26580 }, { "epoch": 75.53977272727273, "grad_norm": 0.8393595218658447, "learning_rate": 0.0001, "loss": 0.0486, "step": 26590 }, { "epoch": 75.56818181818181, "grad_norm": 0.9341287016868591, "learning_rate": 0.0001, "loss": 0.0464, "step": 26600 }, { "epoch": 75.5965909090909, "grad_norm": 0.9698324203491211, "learning_rate": 0.0001, "loss": 0.0483, "step": 26610 }, { "epoch": 75.625, "grad_norm": 0.8105787038803101, "learning_rate": 0.0001, "loss": 0.0484, "step": 26620 }, { "epoch": 75.6534090909091, "grad_norm": 1.2497771978378296, "learning_rate": 0.0001, "loss": 0.049, "step": 26630 }, { "epoch": 75.68181818181819, "grad_norm": 1.0008628368377686, "learning_rate": 0.0001, "loss": 0.0487, "step": 26640 }, { "epoch": 75.71022727272727, "grad_norm": 1.203723669052124, "learning_rate": 0.0001, "loss": 0.0483, "step": 26650 }, { "epoch": 75.73863636363636, "grad_norm": 1.1215006113052368, "learning_rate": 0.0001, "loss": 0.0483, "step": 26660 }, { "epoch": 75.76704545454545, "grad_norm": 0.9968708753585815, "learning_rate": 0.0001, "loss": 0.0468, "step": 26670 }, { "epoch": 75.79545454545455, "grad_norm": 0.8633506298065186, "learning_rate": 0.0001, "loss": 0.0472, "step": 26680 }, { "epoch": 75.82386363636364, "grad_norm": 1.0756628513336182, "learning_rate": 0.0001, "loss": 0.0486, "step": 26690 }, { "epoch": 75.85227272727273, "grad_norm": 0.7618953585624695, "learning_rate": 0.0001, "loss": 0.0493, "step": 26700 }, { "epoch": 75.88068181818181, "grad_norm": 0.9463710188865662, "learning_rate": 0.0001, "loss": 0.0487, "step": 26710 }, { "epoch": 75.9090909090909, "grad_norm": 0.7390015125274658, "learning_rate": 0.0001, "loss": 0.0489, "step": 26720 }, { "epoch": 75.9375, "grad_norm": 0.5806778073310852, "learning_rate": 0.0001, "loss": 0.0506, "step": 26730 }, { "epoch": 75.9659090909091, "grad_norm": 0.6981925368309021, "learning_rate": 0.0001, "loss": 0.0498, "step": 26740 }, { "epoch": 75.99431818181819, "grad_norm": 0.7363477349281311, "learning_rate": 0.0001, "loss": 0.0493, "step": 26750 }, { "epoch": 76.02272727272727, "grad_norm": 0.7735875248908997, "learning_rate": 0.0001, "loss": 0.0491, "step": 26760 }, { "epoch": 76.05113636363636, "grad_norm": 0.6567436456680298, "learning_rate": 0.0001, "loss": 0.0489, "step": 26770 }, { "epoch": 76.07954545454545, "grad_norm": 0.6639755368232727, "learning_rate": 0.0001, "loss": 0.0481, "step": 26780 }, { "epoch": 76.10795454545455, "grad_norm": 0.5334902405738831, "learning_rate": 0.0001, "loss": 0.0481, "step": 26790 }, { "epoch": 76.13636363636364, "grad_norm": 0.6336926221847534, "learning_rate": 0.0001, "loss": 0.0481, "step": 26800 }, { "epoch": 76.16477272727273, "grad_norm": 0.5552213191986084, "learning_rate": 0.0001, "loss": 0.0492, "step": 26810 }, { "epoch": 76.19318181818181, "grad_norm": 0.5877450108528137, "learning_rate": 0.0001, "loss": 0.0475, "step": 26820 }, { "epoch": 76.2215909090909, "grad_norm": 0.6252912878990173, "learning_rate": 0.0001, "loss": 0.0491, "step": 26830 }, { "epoch": 76.25, "grad_norm": 0.7641172409057617, "learning_rate": 0.0001, "loss": 0.0495, "step": 26840 }, { "epoch": 76.2784090909091, "grad_norm": 0.5870921611785889, "learning_rate": 0.0001, "loss": 0.0491, "step": 26850 }, { "epoch": 76.30681818181819, "grad_norm": 0.6940385699272156, "learning_rate": 0.0001, "loss": 0.0473, "step": 26860 }, { "epoch": 76.33522727272727, "grad_norm": 0.6808137893676758, "learning_rate": 0.0001, "loss": 0.0497, "step": 26870 }, { "epoch": 76.36363636363636, "grad_norm": 0.7470855712890625, "learning_rate": 0.0001, "loss": 0.0478, "step": 26880 }, { "epoch": 76.39204545454545, "grad_norm": 0.7515180110931396, "learning_rate": 0.0001, "loss": 0.0475, "step": 26890 }, { "epoch": 76.42045454545455, "grad_norm": 0.5889479517936707, "learning_rate": 0.0001, "loss": 0.0487, "step": 26900 }, { "epoch": 76.44886363636364, "grad_norm": 0.600273609161377, "learning_rate": 0.0001, "loss": 0.0479, "step": 26910 }, { "epoch": 76.47727272727273, "grad_norm": 0.7066619396209717, "learning_rate": 0.0001, "loss": 0.05, "step": 26920 }, { "epoch": 76.50568181818181, "grad_norm": 0.794434666633606, "learning_rate": 0.0001, "loss": 0.0493, "step": 26930 }, { "epoch": 76.5340909090909, "grad_norm": 0.517598569393158, "learning_rate": 0.0001, "loss": 0.0462, "step": 26940 }, { "epoch": 76.5625, "grad_norm": 0.7150055170059204, "learning_rate": 0.0001, "loss": 0.0495, "step": 26950 }, { "epoch": 76.5909090909091, "grad_norm": 0.6902920603752136, "learning_rate": 0.0001, "loss": 0.0494, "step": 26960 }, { "epoch": 76.61931818181819, "grad_norm": 0.8069965839385986, "learning_rate": 0.0001, "loss": 0.0476, "step": 26970 }, { "epoch": 76.64772727272727, "grad_norm": 0.7762559056282043, "learning_rate": 0.0001, "loss": 0.0477, "step": 26980 }, { "epoch": 76.67613636363636, "grad_norm": 0.7302852272987366, "learning_rate": 0.0001, "loss": 0.0484, "step": 26990 }, { "epoch": 76.70454545454545, "grad_norm": 0.7422618865966797, "learning_rate": 0.0001, "loss": 0.0469, "step": 27000 }, { "epoch": 76.73295454545455, "grad_norm": 0.719369649887085, "learning_rate": 0.0001, "loss": 0.0477, "step": 27010 }, { "epoch": 76.76136363636364, "grad_norm": 0.7470158338546753, "learning_rate": 0.0001, "loss": 0.0466, "step": 27020 }, { "epoch": 76.78977272727273, "grad_norm": 0.8050602078437805, "learning_rate": 0.0001, "loss": 0.0458, "step": 27030 }, { "epoch": 76.81818181818181, "grad_norm": 0.617423415184021, "learning_rate": 0.0001, "loss": 0.0486, "step": 27040 }, { "epoch": 76.8465909090909, "grad_norm": 0.505489706993103, "learning_rate": 0.0001, "loss": 0.0456, "step": 27050 }, { "epoch": 76.875, "grad_norm": 0.5494895577430725, "learning_rate": 0.0001, "loss": 0.0464, "step": 27060 }, { "epoch": 76.9034090909091, "grad_norm": 0.5236529111862183, "learning_rate": 0.0001, "loss": 0.0455, "step": 27070 }, { "epoch": 76.93181818181819, "grad_norm": 0.6138285994529724, "learning_rate": 0.0001, "loss": 0.045, "step": 27080 }, { "epoch": 76.96022727272727, "grad_norm": 0.6185349225997925, "learning_rate": 0.0001, "loss": 0.0499, "step": 27090 }, { "epoch": 76.98863636363636, "grad_norm": 0.6920495629310608, "learning_rate": 0.0001, "loss": 0.0496, "step": 27100 }, { "epoch": 77.01704545454545, "grad_norm": 0.7093076109886169, "learning_rate": 0.0001, "loss": 0.0475, "step": 27110 }, { "epoch": 77.04545454545455, "grad_norm": 0.9286143779754639, "learning_rate": 0.0001, "loss": 0.0471, "step": 27120 }, { "epoch": 77.07386363636364, "grad_norm": 1.0601509809494019, "learning_rate": 0.0001, "loss": 0.0479, "step": 27130 }, { "epoch": 77.10227272727273, "grad_norm": 0.9617673754692078, "learning_rate": 0.0001, "loss": 0.0468, "step": 27140 }, { "epoch": 77.13068181818181, "grad_norm": 1.2405085563659668, "learning_rate": 0.0001, "loss": 0.0454, "step": 27150 }, { "epoch": 77.1590909090909, "grad_norm": 0.8260979056358337, "learning_rate": 0.0001, "loss": 0.0461, "step": 27160 }, { "epoch": 77.1875, "grad_norm": 0.6605196595191956, "learning_rate": 0.0001, "loss": 0.047, "step": 27170 }, { "epoch": 77.2159090909091, "grad_norm": 0.7948494553565979, "learning_rate": 0.0001, "loss": 0.0464, "step": 27180 }, { "epoch": 77.24431818181819, "grad_norm": 0.8191278576850891, "learning_rate": 0.0001, "loss": 0.0479, "step": 27190 }, { "epoch": 77.27272727272727, "grad_norm": 0.7254536151885986, "learning_rate": 0.0001, "loss": 0.0496, "step": 27200 }, { "epoch": 77.30113636363636, "grad_norm": 0.6482180953025818, "learning_rate": 0.0001, "loss": 0.046, "step": 27210 }, { "epoch": 77.32954545454545, "grad_norm": 0.8210635781288147, "learning_rate": 0.0001, "loss": 0.0464, "step": 27220 }, { "epoch": 77.35795454545455, "grad_norm": 0.6660655736923218, "learning_rate": 0.0001, "loss": 0.0465, "step": 27230 }, { "epoch": 77.38636363636364, "grad_norm": 0.6348584294319153, "learning_rate": 0.0001, "loss": 0.0456, "step": 27240 }, { "epoch": 77.41477272727273, "grad_norm": 0.8718886375427246, "learning_rate": 0.0001, "loss": 0.0463, "step": 27250 }, { "epoch": 77.44318181818181, "grad_norm": 0.919781506061554, "learning_rate": 0.0001, "loss": 0.0453, "step": 27260 }, { "epoch": 77.4715909090909, "grad_norm": 0.9934787154197693, "learning_rate": 0.0001, "loss": 0.0471, "step": 27270 }, { "epoch": 77.5, "grad_norm": 0.9608179330825806, "learning_rate": 0.0001, "loss": 0.0463, "step": 27280 }, { "epoch": 77.5284090909091, "grad_norm": 0.7589172720909119, "learning_rate": 0.0001, "loss": 0.0491, "step": 27290 }, { "epoch": 77.55681818181819, "grad_norm": 0.9856165647506714, "learning_rate": 0.0001, "loss": 0.0459, "step": 27300 }, { "epoch": 77.58522727272727, "grad_norm": 0.8956001996994019, "learning_rate": 0.0001, "loss": 0.0454, "step": 27310 }, { "epoch": 77.61363636363636, "grad_norm": 1.1567500829696655, "learning_rate": 0.0001, "loss": 0.048, "step": 27320 }, { "epoch": 77.64204545454545, "grad_norm": 0.9312670826911926, "learning_rate": 0.0001, "loss": 0.0452, "step": 27330 }, { "epoch": 77.67045454545455, "grad_norm": 0.8068075776100159, "learning_rate": 0.0001, "loss": 0.0451, "step": 27340 }, { "epoch": 77.69886363636364, "grad_norm": 0.5929557085037231, "learning_rate": 0.0001, "loss": 0.0475, "step": 27350 }, { "epoch": 77.72727272727273, "grad_norm": 0.8782048225402832, "learning_rate": 0.0001, "loss": 0.0467, "step": 27360 }, { "epoch": 77.75568181818181, "grad_norm": 1.0722332000732422, "learning_rate": 0.0001, "loss": 0.0468, "step": 27370 }, { "epoch": 77.7840909090909, "grad_norm": 0.7801492810249329, "learning_rate": 0.0001, "loss": 0.0468, "step": 27380 }, { "epoch": 77.8125, "grad_norm": 0.605384886264801, "learning_rate": 0.0001, "loss": 0.0472, "step": 27390 }, { "epoch": 77.8409090909091, "grad_norm": 0.9070475101470947, "learning_rate": 0.0001, "loss": 0.0495, "step": 27400 }, { "epoch": 77.86931818181819, "grad_norm": 1.0343130826950073, "learning_rate": 0.0001, "loss": 0.0463, "step": 27410 }, { "epoch": 77.89772727272727, "grad_norm": 0.7611730098724365, "learning_rate": 0.0001, "loss": 0.0468, "step": 27420 }, { "epoch": 77.92613636363636, "grad_norm": 0.8008614182472229, "learning_rate": 0.0001, "loss": 0.0469, "step": 27430 }, { "epoch": 77.95454545454545, "grad_norm": 0.7293544411659241, "learning_rate": 0.0001, "loss": 0.0478, "step": 27440 }, { "epoch": 77.98295454545455, "grad_norm": 0.832565188407898, "learning_rate": 0.0001, "loss": 0.0474, "step": 27450 }, { "epoch": 78.01136363636364, "grad_norm": 0.7416606545448303, "learning_rate": 0.0001, "loss": 0.0464, "step": 27460 }, { "epoch": 78.03977272727273, "grad_norm": 0.8914027214050293, "learning_rate": 0.0001, "loss": 0.0455, "step": 27470 }, { "epoch": 78.06818181818181, "grad_norm": 0.6268876194953918, "learning_rate": 0.0001, "loss": 0.0446, "step": 27480 }, { "epoch": 78.0965909090909, "grad_norm": 0.7498577237129211, "learning_rate": 0.0001, "loss": 0.0454, "step": 27490 }, { "epoch": 78.125, "grad_norm": 0.7658631801605225, "learning_rate": 0.0001, "loss": 0.046, "step": 27500 }, { "epoch": 78.1534090909091, "grad_norm": 0.9924762845039368, "learning_rate": 0.0001, "loss": 0.0457, "step": 27510 }, { "epoch": 78.18181818181819, "grad_norm": 0.8507946133613586, "learning_rate": 0.0001, "loss": 0.0442, "step": 27520 }, { "epoch": 78.21022727272727, "grad_norm": 0.8294076323509216, "learning_rate": 0.0001, "loss": 0.0457, "step": 27530 }, { "epoch": 78.23863636363636, "grad_norm": 0.8344864249229431, "learning_rate": 0.0001, "loss": 0.0455, "step": 27540 }, { "epoch": 78.26704545454545, "grad_norm": 0.6620252132415771, "learning_rate": 0.0001, "loss": 0.0454, "step": 27550 }, { "epoch": 78.29545454545455, "grad_norm": 0.7037463784217834, "learning_rate": 0.0001, "loss": 0.0471, "step": 27560 }, { "epoch": 78.32386363636364, "grad_norm": 0.7051752209663391, "learning_rate": 0.0001, "loss": 0.0457, "step": 27570 }, { "epoch": 78.35227272727273, "grad_norm": 0.7858211398124695, "learning_rate": 0.0001, "loss": 0.0467, "step": 27580 }, { "epoch": 78.38068181818181, "grad_norm": 0.7993125915527344, "learning_rate": 0.0001, "loss": 0.0469, "step": 27590 }, { "epoch": 78.4090909090909, "grad_norm": 0.7271113395690918, "learning_rate": 0.0001, "loss": 0.0465, "step": 27600 }, { "epoch": 78.4375, "grad_norm": 0.7963870763778687, "learning_rate": 0.0001, "loss": 0.0465, "step": 27610 }, { "epoch": 78.4659090909091, "grad_norm": 0.9144273400306702, "learning_rate": 0.0001, "loss": 0.0468, "step": 27620 }, { "epoch": 78.49431818181819, "grad_norm": 0.8622909784317017, "learning_rate": 0.0001, "loss": 0.0456, "step": 27630 }, { "epoch": 78.52272727272727, "grad_norm": 0.7054391503334045, "learning_rate": 0.0001, "loss": 0.0457, "step": 27640 }, { "epoch": 78.55113636363636, "grad_norm": 0.7337654232978821, "learning_rate": 0.0001, "loss": 0.0464, "step": 27650 }, { "epoch": 78.57954545454545, "grad_norm": 0.6751934885978699, "learning_rate": 0.0001, "loss": 0.0476, "step": 27660 }, { "epoch": 78.60795454545455, "grad_norm": 0.7194545269012451, "learning_rate": 0.0001, "loss": 0.047, "step": 27670 }, { "epoch": 78.63636363636364, "grad_norm": 0.7210686802864075, "learning_rate": 0.0001, "loss": 0.0456, "step": 27680 }, { "epoch": 78.66477272727273, "grad_norm": 0.9098225235939026, "learning_rate": 0.0001, "loss": 0.0465, "step": 27690 }, { "epoch": 78.69318181818181, "grad_norm": 0.9643121361732483, "learning_rate": 0.0001, "loss": 0.0462, "step": 27700 }, { "epoch": 78.7215909090909, "grad_norm": 1.057265043258667, "learning_rate": 0.0001, "loss": 0.0463, "step": 27710 }, { "epoch": 78.75, "grad_norm": 0.8858153820037842, "learning_rate": 0.0001, "loss": 0.0477, "step": 27720 }, { "epoch": 78.7784090909091, "grad_norm": 0.7570127248764038, "learning_rate": 0.0001, "loss": 0.0448, "step": 27730 }, { "epoch": 78.80681818181819, "grad_norm": 0.8737295269966125, "learning_rate": 0.0001, "loss": 0.046, "step": 27740 }, { "epoch": 78.83522727272727, "grad_norm": 0.8919824957847595, "learning_rate": 0.0001, "loss": 0.0455, "step": 27750 }, { "epoch": 78.86363636363636, "grad_norm": 0.834999144077301, "learning_rate": 0.0001, "loss": 0.0444, "step": 27760 }, { "epoch": 78.89204545454545, "grad_norm": 0.7257569432258606, "learning_rate": 0.0001, "loss": 0.0442, "step": 27770 }, { "epoch": 78.92045454545455, "grad_norm": 0.7738620042800903, "learning_rate": 0.0001, "loss": 0.0475, "step": 27780 }, { "epoch": 78.94886363636364, "grad_norm": 0.6493619084358215, "learning_rate": 0.0001, "loss": 0.0442, "step": 27790 }, { "epoch": 78.97727272727273, "grad_norm": 0.7581443190574646, "learning_rate": 0.0001, "loss": 0.0465, "step": 27800 }, { "epoch": 79.00568181818181, "grad_norm": 0.8084586262702942, "learning_rate": 0.0001, "loss": 0.0456, "step": 27810 }, { "epoch": 79.0340909090909, "grad_norm": 0.6828577518463135, "learning_rate": 0.0001, "loss": 0.046, "step": 27820 }, { "epoch": 79.0625, "grad_norm": 0.6780984401702881, "learning_rate": 0.0001, "loss": 0.044, "step": 27830 }, { "epoch": 79.0909090909091, "grad_norm": 0.7520745396614075, "learning_rate": 0.0001, "loss": 0.0452, "step": 27840 }, { "epoch": 79.11931818181819, "grad_norm": 0.6034306883811951, "learning_rate": 0.0001, "loss": 0.0452, "step": 27850 }, { "epoch": 79.14772727272727, "grad_norm": 0.8241128921508789, "learning_rate": 0.0001, "loss": 0.0439, "step": 27860 }, { "epoch": 79.17613636363636, "grad_norm": 0.7252616286277771, "learning_rate": 0.0001, "loss": 0.0445, "step": 27870 }, { "epoch": 79.20454545454545, "grad_norm": 0.8703776597976685, "learning_rate": 0.0001, "loss": 0.046, "step": 27880 }, { "epoch": 79.23295454545455, "grad_norm": 0.6853988766670227, "learning_rate": 0.0001, "loss": 0.0457, "step": 27890 }, { "epoch": 79.26136363636364, "grad_norm": 0.5899876356124878, "learning_rate": 0.0001, "loss": 0.0455, "step": 27900 }, { "epoch": 79.28977272727273, "grad_norm": 0.5329005122184753, "learning_rate": 0.0001, "loss": 0.0442, "step": 27910 }, { "epoch": 79.31818181818181, "grad_norm": 0.5844208002090454, "learning_rate": 0.0001, "loss": 0.0449, "step": 27920 }, { "epoch": 79.3465909090909, "grad_norm": 0.5217543840408325, "learning_rate": 0.0001, "loss": 0.0441, "step": 27930 }, { "epoch": 79.375, "grad_norm": 0.631199300289154, "learning_rate": 0.0001, "loss": 0.0452, "step": 27940 }, { "epoch": 79.4034090909091, "grad_norm": 0.5435271859169006, "learning_rate": 0.0001, "loss": 0.0452, "step": 27950 }, { "epoch": 79.43181818181819, "grad_norm": 0.5514788627624512, "learning_rate": 0.0001, "loss": 0.0463, "step": 27960 }, { "epoch": 79.46022727272727, "grad_norm": 0.6063737273216248, "learning_rate": 0.0001, "loss": 0.0462, "step": 27970 }, { "epoch": 79.48863636363636, "grad_norm": 0.6440352201461792, "learning_rate": 0.0001, "loss": 0.0459, "step": 27980 }, { "epoch": 79.51704545454545, "grad_norm": 0.6347674131393433, "learning_rate": 0.0001, "loss": 0.045, "step": 27990 }, { "epoch": 79.54545454545455, "grad_norm": 0.5119302272796631, "learning_rate": 0.0001, "loss": 0.0443, "step": 28000 }, { "epoch": 79.57386363636364, "grad_norm": 0.665009617805481, "learning_rate": 0.0001, "loss": 0.0443, "step": 28010 }, { "epoch": 79.60227272727273, "grad_norm": 1.1492528915405273, "learning_rate": 0.0001, "loss": 0.0471, "step": 28020 }, { "epoch": 79.63068181818181, "grad_norm": 0.6289621591567993, "learning_rate": 0.0001, "loss": 0.0471, "step": 28030 }, { "epoch": 79.6590909090909, "grad_norm": 0.6949747204780579, "learning_rate": 0.0001, "loss": 0.0459, "step": 28040 }, { "epoch": 79.6875, "grad_norm": 0.7128562331199646, "learning_rate": 0.0001, "loss": 0.0487, "step": 28050 }, { "epoch": 79.7159090909091, "grad_norm": 0.7679532766342163, "learning_rate": 0.0001, "loss": 0.0463, "step": 28060 }, { "epoch": 79.74431818181819, "grad_norm": 1.129748821258545, "learning_rate": 0.0001, "loss": 0.0485, "step": 28070 }, { "epoch": 79.77272727272727, "grad_norm": 1.1302276849746704, "learning_rate": 0.0001, "loss": 0.0447, "step": 28080 }, { "epoch": 79.80113636363636, "grad_norm": 1.242452621459961, "learning_rate": 0.0001, "loss": 0.0453, "step": 28090 }, { "epoch": 79.82954545454545, "grad_norm": 1.3404399156570435, "learning_rate": 0.0001, "loss": 0.0474, "step": 28100 }, { "epoch": 79.85795454545455, "grad_norm": 1.3003270626068115, "learning_rate": 0.0001, "loss": 0.0461, "step": 28110 }, { "epoch": 79.88636363636364, "grad_norm": 1.1596304178237915, "learning_rate": 0.0001, "loss": 0.045, "step": 28120 }, { "epoch": 79.91477272727273, "grad_norm": 1.0483144521713257, "learning_rate": 0.0001, "loss": 0.0467, "step": 28130 }, { "epoch": 79.94318181818181, "grad_norm": 0.9983393549919128, "learning_rate": 0.0001, "loss": 0.045, "step": 28140 }, { "epoch": 79.9715909090909, "grad_norm": 0.8313050270080566, "learning_rate": 0.0001, "loss": 0.0428, "step": 28150 }, { "epoch": 80.0, "grad_norm": 0.9205082654953003, "learning_rate": 0.0001, "loss": 0.0468, "step": 28160 }, { "epoch": 80.0284090909091, "grad_norm": 0.8589319586753845, "learning_rate": 0.0001, "loss": 0.0464, "step": 28170 }, { "epoch": 80.05681818181819, "grad_norm": 0.9702844023704529, "learning_rate": 0.0001, "loss": 0.0424, "step": 28180 }, { "epoch": 80.08522727272727, "grad_norm": 0.8930377960205078, "learning_rate": 0.0001, "loss": 0.0448, "step": 28190 }, { "epoch": 80.11363636363636, "grad_norm": 0.721045970916748, "learning_rate": 0.0001, "loss": 0.044, "step": 28200 }, { "epoch": 80.14204545454545, "grad_norm": 0.7323723435401917, "learning_rate": 0.0001, "loss": 0.0448, "step": 28210 }, { "epoch": 80.17045454545455, "grad_norm": 1.3309814929962158, "learning_rate": 0.0001, "loss": 0.0447, "step": 28220 }, { "epoch": 80.19886363636364, "grad_norm": 1.14047110080719, "learning_rate": 0.0001, "loss": 0.045, "step": 28230 }, { "epoch": 80.22727272727273, "grad_norm": 1.6657135486602783, "learning_rate": 0.0001, "loss": 0.0454, "step": 28240 }, { "epoch": 80.25568181818181, "grad_norm": 1.1823036670684814, "learning_rate": 0.0001, "loss": 0.0442, "step": 28250 }, { "epoch": 80.2840909090909, "grad_norm": 0.9169923663139343, "learning_rate": 0.0001, "loss": 0.044, "step": 28260 }, { "epoch": 80.3125, "grad_norm": 0.8880297541618347, "learning_rate": 0.0001, "loss": 0.0447, "step": 28270 }, { "epoch": 80.3409090909091, "grad_norm": 0.778057336807251, "learning_rate": 0.0001, "loss": 0.0438, "step": 28280 }, { "epoch": 80.36931818181819, "grad_norm": 0.837870180606842, "learning_rate": 0.0001, "loss": 0.0439, "step": 28290 }, { "epoch": 80.39772727272727, "grad_norm": 0.8132756352424622, "learning_rate": 0.0001, "loss": 0.0457, "step": 28300 }, { "epoch": 80.42613636363636, "grad_norm": 0.9789218306541443, "learning_rate": 0.0001, "loss": 0.0453, "step": 28310 }, { "epoch": 80.45454545454545, "grad_norm": 1.0025867223739624, "learning_rate": 0.0001, "loss": 0.0424, "step": 28320 }, { "epoch": 80.48295454545455, "grad_norm": 0.8962274789810181, "learning_rate": 0.0001, "loss": 0.0434, "step": 28330 }, { "epoch": 80.51136363636364, "grad_norm": 0.9207645058631897, "learning_rate": 0.0001, "loss": 0.0441, "step": 28340 }, { "epoch": 80.53977272727273, "grad_norm": 0.7955727577209473, "learning_rate": 0.0001, "loss": 0.0439, "step": 28350 }, { "epoch": 80.56818181818181, "grad_norm": 0.7597567439079285, "learning_rate": 0.0001, "loss": 0.0442, "step": 28360 }, { "epoch": 80.5965909090909, "grad_norm": 0.9111728072166443, "learning_rate": 0.0001, "loss": 0.0432, "step": 28370 }, { "epoch": 80.625, "grad_norm": 0.7924389243125916, "learning_rate": 0.0001, "loss": 0.0443, "step": 28380 }, { "epoch": 80.6534090909091, "grad_norm": 0.7888645529747009, "learning_rate": 0.0001, "loss": 0.047, "step": 28390 }, { "epoch": 80.68181818181819, "grad_norm": 0.7492277026176453, "learning_rate": 0.0001, "loss": 0.0451, "step": 28400 }, { "epoch": 80.71022727272727, "grad_norm": 0.5666723251342773, "learning_rate": 0.0001, "loss": 0.0441, "step": 28410 }, { "epoch": 80.73863636363636, "grad_norm": 0.5718184113502502, "learning_rate": 0.0001, "loss": 0.044, "step": 28420 }, { "epoch": 80.76704545454545, "grad_norm": 0.5507611632347107, "learning_rate": 0.0001, "loss": 0.045, "step": 28430 }, { "epoch": 80.79545454545455, "grad_norm": 0.6398160457611084, "learning_rate": 0.0001, "loss": 0.045, "step": 28440 }, { "epoch": 80.82386363636364, "grad_norm": 0.6520999073982239, "learning_rate": 0.0001, "loss": 0.0446, "step": 28450 }, { "epoch": 80.85227272727273, "grad_norm": 0.6655693054199219, "learning_rate": 0.0001, "loss": 0.0443, "step": 28460 }, { "epoch": 80.88068181818181, "grad_norm": 0.6784167885780334, "learning_rate": 0.0001, "loss": 0.044, "step": 28470 }, { "epoch": 80.9090909090909, "grad_norm": 0.6713101267814636, "learning_rate": 0.0001, "loss": 0.0462, "step": 28480 }, { "epoch": 80.9375, "grad_norm": 0.6968391537666321, "learning_rate": 0.0001, "loss": 0.0455, "step": 28490 }, { "epoch": 80.9659090909091, "grad_norm": 0.5912553668022156, "learning_rate": 0.0001, "loss": 0.0458, "step": 28500 }, { "epoch": 80.99431818181819, "grad_norm": 0.6505694389343262, "learning_rate": 0.0001, "loss": 0.0455, "step": 28510 }, { "epoch": 81.02272727272727, "grad_norm": 0.6993499994277954, "learning_rate": 0.0001, "loss": 0.0438, "step": 28520 }, { "epoch": 81.05113636363636, "grad_norm": 0.8444068431854248, "learning_rate": 0.0001, "loss": 0.0448, "step": 28530 }, { "epoch": 81.07954545454545, "grad_norm": 0.642077624797821, "learning_rate": 0.0001, "loss": 0.0446, "step": 28540 }, { "epoch": 81.10795454545455, "grad_norm": 0.6671980023384094, "learning_rate": 0.0001, "loss": 0.0442, "step": 28550 }, { "epoch": 81.13636363636364, "grad_norm": 0.7422840595245361, "learning_rate": 0.0001, "loss": 0.043, "step": 28560 }, { "epoch": 81.16477272727273, "grad_norm": 0.7244125604629517, "learning_rate": 0.0001, "loss": 0.043, "step": 28570 }, { "epoch": 81.19318181818181, "grad_norm": 0.5740301609039307, "learning_rate": 0.0001, "loss": 0.043, "step": 28580 }, { "epoch": 81.2215909090909, "grad_norm": 0.5553819537162781, "learning_rate": 0.0001, "loss": 0.0454, "step": 28590 }, { "epoch": 81.25, "grad_norm": 0.4433813989162445, "learning_rate": 0.0001, "loss": 0.0443, "step": 28600 }, { "epoch": 81.2784090909091, "grad_norm": 0.5169538855552673, "learning_rate": 0.0001, "loss": 0.0438, "step": 28610 }, { "epoch": 81.30681818181819, "grad_norm": 0.5083233118057251, "learning_rate": 0.0001, "loss": 0.0452, "step": 28620 }, { "epoch": 81.33522727272727, "grad_norm": 0.5519469976425171, "learning_rate": 0.0001, "loss": 0.0425, "step": 28630 }, { "epoch": 81.36363636363636, "grad_norm": 0.731152355670929, "learning_rate": 0.0001, "loss": 0.0425, "step": 28640 }, { "epoch": 81.39204545454545, "grad_norm": 0.4356805086135864, "learning_rate": 0.0001, "loss": 0.0447, "step": 28650 }, { "epoch": 81.42045454545455, "grad_norm": 0.812091052532196, "learning_rate": 0.0001, "loss": 0.0415, "step": 28660 }, { "epoch": 81.44886363636364, "grad_norm": 0.5545047521591187, "learning_rate": 0.0001, "loss": 0.0411, "step": 28670 }, { "epoch": 81.47727272727273, "grad_norm": 0.8585087656974792, "learning_rate": 0.0001, "loss": 0.0439, "step": 28680 }, { "epoch": 81.50568181818181, "grad_norm": 0.7256617546081543, "learning_rate": 0.0001, "loss": 0.0445, "step": 28690 }, { "epoch": 81.5340909090909, "grad_norm": 0.7761886119842529, "learning_rate": 0.0001, "loss": 0.044, "step": 28700 }, { "epoch": 81.5625, "grad_norm": 0.7050015330314636, "learning_rate": 0.0001, "loss": 0.0449, "step": 28710 }, { "epoch": 81.5909090909091, "grad_norm": 1.3305480480194092, "learning_rate": 0.0001, "loss": 0.047, "step": 28720 }, { "epoch": 81.61931818181819, "grad_norm": 0.9800511598587036, "learning_rate": 0.0001, "loss": 0.0465, "step": 28730 }, { "epoch": 81.64772727272727, "grad_norm": 1.166397213935852, "learning_rate": 0.0001, "loss": 0.0447, "step": 28740 }, { "epoch": 81.67613636363636, "grad_norm": 1.0785977840423584, "learning_rate": 0.0001, "loss": 0.0446, "step": 28750 }, { "epoch": 81.70454545454545, "grad_norm": 0.912084698677063, "learning_rate": 0.0001, "loss": 0.0441, "step": 28760 }, { "epoch": 81.73295454545455, "grad_norm": 0.9032609462738037, "learning_rate": 0.0001, "loss": 0.0461, "step": 28770 }, { "epoch": 81.76136363636364, "grad_norm": 1.6895674467086792, "learning_rate": 0.0001, "loss": 0.0436, "step": 28780 }, { "epoch": 81.78977272727273, "grad_norm": 1.4334778785705566, "learning_rate": 0.0001, "loss": 0.0475, "step": 28790 }, { "epoch": 81.81818181818181, "grad_norm": 1.5415606498718262, "learning_rate": 0.0001, "loss": 0.0449, "step": 28800 }, { "epoch": 81.8465909090909, "grad_norm": 1.1889894008636475, "learning_rate": 0.0001, "loss": 0.0453, "step": 28810 }, { "epoch": 81.875, "grad_norm": 1.0240460634231567, "learning_rate": 0.0001, "loss": 0.0428, "step": 28820 }, { "epoch": 81.9034090909091, "grad_norm": 0.9590867757797241, "learning_rate": 0.0001, "loss": 0.0419, "step": 28830 }, { "epoch": 81.93181818181819, "grad_norm": 0.776984453201294, "learning_rate": 0.0001, "loss": 0.0418, "step": 28840 }, { "epoch": 81.96022727272727, "grad_norm": 1.2868221998214722, "learning_rate": 0.0001, "loss": 0.0462, "step": 28850 }, { "epoch": 81.98863636363636, "grad_norm": 1.035592794418335, "learning_rate": 0.0001, "loss": 0.0447, "step": 28860 }, { "epoch": 82.01704545454545, "grad_norm": 1.3734735250473022, "learning_rate": 0.0001, "loss": 0.0432, "step": 28870 }, { "epoch": 82.04545454545455, "grad_norm": 1.5599828958511353, "learning_rate": 0.0001, "loss": 0.0423, "step": 28880 }, { "epoch": 82.07386363636364, "grad_norm": 1.185050368309021, "learning_rate": 0.0001, "loss": 0.043, "step": 28890 }, { "epoch": 82.10227272727273, "grad_norm": 1.5994446277618408, "learning_rate": 0.0001, "loss": 0.0405, "step": 28900 }, { "epoch": 82.13068181818181, "grad_norm": 1.3795400857925415, "learning_rate": 0.0001, "loss": 0.0421, "step": 28910 }, { "epoch": 82.1590909090909, "grad_norm": 1.2823818922042847, "learning_rate": 0.0001, "loss": 0.0409, "step": 28920 }, { "epoch": 82.1875, "grad_norm": 1.39549720287323, "learning_rate": 0.0001, "loss": 0.041, "step": 28930 }, { "epoch": 82.2159090909091, "grad_norm": 1.1723809242248535, "learning_rate": 0.0001, "loss": 0.0403, "step": 28940 }, { "epoch": 82.24431818181819, "grad_norm": 0.7764673829078674, "learning_rate": 0.0001, "loss": 0.0405, "step": 28950 }, { "epoch": 82.27272727272727, "grad_norm": 0.7350073456764221, "learning_rate": 0.0001, "loss": 0.0426, "step": 28960 }, { "epoch": 82.30113636363636, "grad_norm": 0.8376882672309875, "learning_rate": 0.0001, "loss": 0.0417, "step": 28970 }, { "epoch": 82.32954545454545, "grad_norm": 0.7426922917366028, "learning_rate": 0.0001, "loss": 0.0426, "step": 28980 }, { "epoch": 82.35795454545455, "grad_norm": 1.0266258716583252, "learning_rate": 0.0001, "loss": 0.0433, "step": 28990 }, { "epoch": 82.38636363636364, "grad_norm": 0.6878785490989685, "learning_rate": 0.0001, "loss": 0.0414, "step": 29000 }, { "epoch": 82.41477272727273, "grad_norm": 0.8112396597862244, "learning_rate": 0.0001, "loss": 0.0436, "step": 29010 }, { "epoch": 82.44318181818181, "grad_norm": 0.7438217997550964, "learning_rate": 0.0001, "loss": 0.0428, "step": 29020 }, { "epoch": 82.4715909090909, "grad_norm": 0.5908733010292053, "learning_rate": 0.0001, "loss": 0.0428, "step": 29030 }, { "epoch": 82.5, "grad_norm": 0.7258604168891907, "learning_rate": 0.0001, "loss": 0.0439, "step": 29040 }, { "epoch": 82.5284090909091, "grad_norm": 0.5995661616325378, "learning_rate": 0.0001, "loss": 0.044, "step": 29050 }, { "epoch": 82.55681818181819, "grad_norm": 0.7221328616142273, "learning_rate": 0.0001, "loss": 0.0426, "step": 29060 }, { "epoch": 82.58522727272727, "grad_norm": 0.596004068851471, "learning_rate": 0.0001, "loss": 0.0454, "step": 29070 }, { "epoch": 82.61363636363636, "grad_norm": 0.5565772652626038, "learning_rate": 0.0001, "loss": 0.045, "step": 29080 }, { "epoch": 82.64204545454545, "grad_norm": 0.697248637676239, "learning_rate": 0.0001, "loss": 0.0458, "step": 29090 }, { "epoch": 82.67045454545455, "grad_norm": 0.6714693903923035, "learning_rate": 0.0001, "loss": 0.0453, "step": 29100 }, { "epoch": 82.69886363636364, "grad_norm": 0.7975269556045532, "learning_rate": 0.0001, "loss": 0.0467, "step": 29110 }, { "epoch": 82.72727272727273, "grad_norm": 0.870853841304779, "learning_rate": 0.0001, "loss": 0.0434, "step": 29120 }, { "epoch": 82.75568181818181, "grad_norm": 0.7218012809753418, "learning_rate": 0.0001, "loss": 0.0428, "step": 29130 }, { "epoch": 82.7840909090909, "grad_norm": 0.7033442258834839, "learning_rate": 0.0001, "loss": 0.0452, "step": 29140 }, { "epoch": 82.8125, "grad_norm": 0.7944255471229553, "learning_rate": 0.0001, "loss": 0.0437, "step": 29150 }, { "epoch": 82.8409090909091, "grad_norm": 0.712996244430542, "learning_rate": 0.0001, "loss": 0.0448, "step": 29160 }, { "epoch": 82.86931818181819, "grad_norm": 0.7248801589012146, "learning_rate": 0.0001, "loss": 0.0458, "step": 29170 }, { "epoch": 82.89772727272727, "grad_norm": 0.8134949803352356, "learning_rate": 0.0001, "loss": 0.0461, "step": 29180 }, { "epoch": 82.92613636363636, "grad_norm": 0.5927881002426147, "learning_rate": 0.0001, "loss": 0.0433, "step": 29190 }, { "epoch": 82.95454545454545, "grad_norm": 0.5970407128334045, "learning_rate": 0.0001, "loss": 0.0453, "step": 29200 }, { "epoch": 82.98295454545455, "grad_norm": 0.5957374572753906, "learning_rate": 0.0001, "loss": 0.0443, "step": 29210 }, { "epoch": 83.01136363636364, "grad_norm": 1.196097493171692, "learning_rate": 0.0001, "loss": 0.0513, "step": 29220 }, { "epoch": 83.03977272727273, "grad_norm": 1.275525450706482, "learning_rate": 0.0001, "loss": 0.0465, "step": 29230 }, { "epoch": 83.06818181818181, "grad_norm": 1.131506085395813, "learning_rate": 0.0001, "loss": 0.0453, "step": 29240 }, { "epoch": 83.0965909090909, "grad_norm": 1.2795711755752563, "learning_rate": 0.0001, "loss": 0.0448, "step": 29250 }, { "epoch": 83.125, "grad_norm": 1.0409256219863892, "learning_rate": 0.0001, "loss": 0.0438, "step": 29260 }, { "epoch": 83.1534090909091, "grad_norm": 1.1085119247436523, "learning_rate": 0.0001, "loss": 0.0439, "step": 29270 }, { "epoch": 83.18181818181819, "grad_norm": 1.0388301610946655, "learning_rate": 0.0001, "loss": 0.0463, "step": 29280 }, { "epoch": 83.21022727272727, "grad_norm": 0.8974637985229492, "learning_rate": 0.0001, "loss": 0.0437, "step": 29290 }, { "epoch": 83.23863636363636, "grad_norm": 0.8485453724861145, "learning_rate": 0.0001, "loss": 0.0437, "step": 29300 }, { "epoch": 83.26704545454545, "grad_norm": 0.7687119841575623, "learning_rate": 0.0001, "loss": 0.0411, "step": 29310 }, { "epoch": 83.29545454545455, "grad_norm": 0.844149649143219, "learning_rate": 0.0001, "loss": 0.0442, "step": 29320 }, { "epoch": 83.32386363636364, "grad_norm": 0.8516110181808472, "learning_rate": 0.0001, "loss": 0.0439, "step": 29330 }, { "epoch": 83.35227272727273, "grad_norm": 0.6241595149040222, "learning_rate": 0.0001, "loss": 0.0426, "step": 29340 }, { "epoch": 83.38068181818181, "grad_norm": 0.7771625518798828, "learning_rate": 0.0001, "loss": 0.0426, "step": 29350 }, { "epoch": 83.4090909090909, "grad_norm": 0.6839377284049988, "learning_rate": 0.0001, "loss": 0.0459, "step": 29360 }, { "epoch": 83.4375, "grad_norm": 0.7105209231376648, "learning_rate": 0.0001, "loss": 0.0428, "step": 29370 }, { "epoch": 83.4659090909091, "grad_norm": 1.2164829969406128, "learning_rate": 0.0001, "loss": 0.0415, "step": 29380 }, { "epoch": 83.49431818181819, "grad_norm": 0.969587504863739, "learning_rate": 0.0001, "loss": 0.046, "step": 29390 }, { "epoch": 83.52272727272727, "grad_norm": 0.8211323618888855, "learning_rate": 0.0001, "loss": 0.0429, "step": 29400 }, { "epoch": 83.55113636363636, "grad_norm": 0.6561827659606934, "learning_rate": 0.0001, "loss": 0.0429, "step": 29410 }, { "epoch": 83.57954545454545, "grad_norm": 0.694988489151001, "learning_rate": 0.0001, "loss": 0.0446, "step": 29420 }, { "epoch": 83.60795454545455, "grad_norm": 0.8020559549331665, "learning_rate": 0.0001, "loss": 0.0431, "step": 29430 }, { "epoch": 83.63636363636364, "grad_norm": 0.9569689631462097, "learning_rate": 0.0001, "loss": 0.0449, "step": 29440 }, { "epoch": 83.66477272727273, "grad_norm": 0.874990701675415, "learning_rate": 0.0001, "loss": 0.0452, "step": 29450 }, { "epoch": 83.69318181818181, "grad_norm": 0.6901691555976868, "learning_rate": 0.0001, "loss": 0.045, "step": 29460 }, { "epoch": 83.7215909090909, "grad_norm": 0.7540920376777649, "learning_rate": 0.0001, "loss": 0.0448, "step": 29470 }, { "epoch": 83.75, "grad_norm": 0.8184428811073303, "learning_rate": 0.0001, "loss": 0.0443, "step": 29480 }, { "epoch": 83.7784090909091, "grad_norm": 0.9438989162445068, "learning_rate": 0.0001, "loss": 0.0456, "step": 29490 }, { "epoch": 83.80681818181819, "grad_norm": 0.8632564544677734, "learning_rate": 0.0001, "loss": 0.0432, "step": 29500 }, { "epoch": 83.83522727272727, "grad_norm": 0.7080706357955933, "learning_rate": 0.0001, "loss": 0.0444, "step": 29510 }, { "epoch": 83.86363636363636, "grad_norm": 1.5293716192245483, "learning_rate": 0.0001, "loss": 0.0451, "step": 29520 }, { "epoch": 83.89204545454545, "grad_norm": 1.1970537900924683, "learning_rate": 0.0001, "loss": 0.0435, "step": 29530 }, { "epoch": 83.92045454545455, "grad_norm": 1.4087823629379272, "learning_rate": 0.0001, "loss": 0.0448, "step": 29540 }, { "epoch": 83.94886363636364, "grad_norm": 0.9186455011367798, "learning_rate": 0.0001, "loss": 0.0442, "step": 29550 }, { "epoch": 83.97727272727273, "grad_norm": 0.89197838306427, "learning_rate": 0.0001, "loss": 0.0417, "step": 29560 }, { "epoch": 84.00568181818181, "grad_norm": 0.8925532698631287, "learning_rate": 0.0001, "loss": 0.0433, "step": 29570 }, { "epoch": 84.0340909090909, "grad_norm": 0.9491750597953796, "learning_rate": 0.0001, "loss": 0.0458, "step": 29580 }, { "epoch": 84.0625, "grad_norm": 0.7977159023284912, "learning_rate": 0.0001, "loss": 0.0455, "step": 29590 }, { "epoch": 84.0909090909091, "grad_norm": 0.7931260466575623, "learning_rate": 0.0001, "loss": 0.0451, "step": 29600 }, { "epoch": 84.11931818181819, "grad_norm": 0.691261887550354, "learning_rate": 0.0001, "loss": 0.0434, "step": 29610 }, { "epoch": 84.14772727272727, "grad_norm": 0.9392285346984863, "learning_rate": 0.0001, "loss": 0.0456, "step": 29620 }, { "epoch": 84.17613636363636, "grad_norm": 1.9056662321090698, "learning_rate": 0.0001, "loss": 0.0456, "step": 29630 }, { "epoch": 84.20454545454545, "grad_norm": 1.3401696681976318, "learning_rate": 0.0001, "loss": 0.0439, "step": 29640 }, { "epoch": 84.23295454545455, "grad_norm": 1.1489585638046265, "learning_rate": 0.0001, "loss": 0.0456, "step": 29650 }, { "epoch": 84.26136363636364, "grad_norm": 1.2388312816619873, "learning_rate": 0.0001, "loss": 0.0452, "step": 29660 }, { "epoch": 84.28977272727273, "grad_norm": 1.1380677223205566, "learning_rate": 0.0001, "loss": 0.0417, "step": 29670 }, { "epoch": 84.31818181818181, "grad_norm": 0.9601713418960571, "learning_rate": 0.0001, "loss": 0.0433, "step": 29680 }, { "epoch": 84.3465909090909, "grad_norm": 1.2920958995819092, "learning_rate": 0.0001, "loss": 0.0425, "step": 29690 }, { "epoch": 84.375, "grad_norm": 1.1546963453292847, "learning_rate": 0.0001, "loss": 0.0429, "step": 29700 }, { "epoch": 84.4034090909091, "grad_norm": 1.6051952838897705, "learning_rate": 0.0001, "loss": 0.0439, "step": 29710 }, { "epoch": 84.43181818181819, "grad_norm": 1.979384422302246, "learning_rate": 0.0001, "loss": 0.042, "step": 29720 }, { "epoch": 84.46022727272727, "grad_norm": 2.053717613220215, "learning_rate": 0.0001, "loss": 0.043, "step": 29730 }, { "epoch": 84.48863636363636, "grad_norm": 1.1979448795318604, "learning_rate": 0.0001, "loss": 0.0426, "step": 29740 }, { "epoch": 84.51704545454545, "grad_norm": 1.6801135540008545, "learning_rate": 0.0001, "loss": 0.04, "step": 29750 }, { "epoch": 84.54545454545455, "grad_norm": 0.7504470348358154, "learning_rate": 0.0001, "loss": 0.0406, "step": 29760 }, { "epoch": 84.57386363636364, "grad_norm": 1.1721948385238647, "learning_rate": 0.0001, "loss": 0.0405, "step": 29770 }, { "epoch": 84.60227272727273, "grad_norm": 0.9734560251235962, "learning_rate": 0.0001, "loss": 0.0406, "step": 29780 }, { "epoch": 84.63068181818181, "grad_norm": 1.0226346254348755, "learning_rate": 0.0001, "loss": 0.0403, "step": 29790 }, { "epoch": 84.6590909090909, "grad_norm": 0.7675327658653259, "learning_rate": 0.0001, "loss": 0.0405, "step": 29800 }, { "epoch": 84.6875, "grad_norm": 0.9482449889183044, "learning_rate": 0.0001, "loss": 0.0419, "step": 29810 }, { "epoch": 84.7159090909091, "grad_norm": 0.7545673251152039, "learning_rate": 0.0001, "loss": 0.0422, "step": 29820 }, { "epoch": 84.74431818181819, "grad_norm": 0.7988566160202026, "learning_rate": 0.0001, "loss": 0.0433, "step": 29830 }, { "epoch": 84.77272727272727, "grad_norm": 0.9122010469436646, "learning_rate": 0.0001, "loss": 0.0433, "step": 29840 }, { "epoch": 84.80113636363636, "grad_norm": 0.8561978936195374, "learning_rate": 0.0001, "loss": 0.0443, "step": 29850 }, { "epoch": 84.82954545454545, "grad_norm": 0.6838624477386475, "learning_rate": 0.0001, "loss": 0.0426, "step": 29860 }, { "epoch": 84.85795454545455, "grad_norm": 1.1277817487716675, "learning_rate": 0.0001, "loss": 0.0441, "step": 29870 }, { "epoch": 84.88636363636364, "grad_norm": 1.5995237827301025, "learning_rate": 0.0001, "loss": 0.0462, "step": 29880 }, { "epoch": 84.91477272727273, "grad_norm": 1.4446282386779785, "learning_rate": 0.0001, "loss": 0.0451, "step": 29890 }, { "epoch": 84.94318181818181, "grad_norm": 1.1589558124542236, "learning_rate": 0.0001, "loss": 0.0436, "step": 29900 }, { "epoch": 84.9715909090909, "grad_norm": 1.063513159751892, "learning_rate": 0.0001, "loss": 0.0427, "step": 29910 }, { "epoch": 85.0, "grad_norm": 0.8769459128379822, "learning_rate": 0.0001, "loss": 0.0433, "step": 29920 }, { "epoch": 85.0284090909091, "grad_norm": 0.9075056910514832, "learning_rate": 0.0001, "loss": 0.0441, "step": 29930 }, { "epoch": 85.05681818181819, "grad_norm": 0.6981393098831177, "learning_rate": 0.0001, "loss": 0.0441, "step": 29940 }, { "epoch": 85.08522727272727, "grad_norm": 0.757666826248169, "learning_rate": 0.0001, "loss": 0.0436, "step": 29950 }, { "epoch": 85.11363636363636, "grad_norm": 0.7865346074104309, "learning_rate": 0.0001, "loss": 0.0431, "step": 29960 }, { "epoch": 85.14204545454545, "grad_norm": 0.8140910267829895, "learning_rate": 0.0001, "loss": 0.0452, "step": 29970 }, { "epoch": 85.17045454545455, "grad_norm": 0.6491031646728516, "learning_rate": 0.0001, "loss": 0.0434, "step": 29980 }, { "epoch": 85.19886363636364, "grad_norm": 0.8176889419555664, "learning_rate": 0.0001, "loss": 0.0448, "step": 29990 }, { "epoch": 85.22727272727273, "grad_norm": 0.8975611925125122, "learning_rate": 0.0001, "loss": 0.0434, "step": 30000 }, { "epoch": 85.25568181818181, "grad_norm": 0.6582433581352234, "learning_rate": 0.0001, "loss": 0.0431, "step": 30010 }, { "epoch": 85.2840909090909, "grad_norm": 0.7108902931213379, "learning_rate": 0.0001, "loss": 0.0439, "step": 30020 }, { "epoch": 85.3125, "grad_norm": 0.8365264534950256, "learning_rate": 0.0001, "loss": 0.043, "step": 30030 }, { "epoch": 85.3409090909091, "grad_norm": 1.003644347190857, "learning_rate": 0.0001, "loss": 0.0436, "step": 30040 }, { "epoch": 85.36931818181819, "grad_norm": 0.8627144694328308, "learning_rate": 0.0001, "loss": 0.043, "step": 30050 }, { "epoch": 85.39772727272727, "grad_norm": 0.8255655765533447, "learning_rate": 0.0001, "loss": 0.044, "step": 30060 }, { "epoch": 85.42613636363636, "grad_norm": 0.7838973999023438, "learning_rate": 0.0001, "loss": 0.043, "step": 30070 }, { "epoch": 85.45454545454545, "grad_norm": 0.7875524163246155, "learning_rate": 0.0001, "loss": 0.0432, "step": 30080 }, { "epoch": 85.48295454545455, "grad_norm": 0.9446814656257629, "learning_rate": 0.0001, "loss": 0.044, "step": 30090 }, { "epoch": 85.51136363636364, "grad_norm": 0.6380667686462402, "learning_rate": 0.0001, "loss": 0.0431, "step": 30100 }, { "epoch": 85.53977272727273, "grad_norm": 0.8755031228065491, "learning_rate": 0.0001, "loss": 0.0434, "step": 30110 }, { "epoch": 85.56818181818181, "grad_norm": 0.6563933491706848, "learning_rate": 0.0001, "loss": 0.0432, "step": 30120 }, { "epoch": 85.5965909090909, "grad_norm": 0.7544918060302734, "learning_rate": 0.0001, "loss": 0.0435, "step": 30130 }, { "epoch": 85.625, "grad_norm": 1.112111210823059, "learning_rate": 0.0001, "loss": 0.0463, "step": 30140 }, { "epoch": 85.6534090909091, "grad_norm": 0.9833115935325623, "learning_rate": 0.0001, "loss": 0.0433, "step": 30150 }, { "epoch": 85.68181818181819, "grad_norm": 0.914084255695343, "learning_rate": 0.0001, "loss": 0.0436, "step": 30160 }, { "epoch": 85.71022727272727, "grad_norm": 1.0748567581176758, "learning_rate": 0.0001, "loss": 0.0428, "step": 30170 }, { "epoch": 85.73863636363636, "grad_norm": 0.6668802499771118, "learning_rate": 0.0001, "loss": 0.0418, "step": 30180 }, { "epoch": 85.76704545454545, "grad_norm": 0.8170040845870972, "learning_rate": 0.0001, "loss": 0.0435, "step": 30190 }, { "epoch": 85.79545454545455, "grad_norm": 0.7252139449119568, "learning_rate": 0.0001, "loss": 0.0413, "step": 30200 }, { "epoch": 85.82386363636364, "grad_norm": 0.621457576751709, "learning_rate": 0.0001, "loss": 0.0436, "step": 30210 }, { "epoch": 85.85227272727273, "grad_norm": 0.7499610185623169, "learning_rate": 0.0001, "loss": 0.0448, "step": 30220 }, { "epoch": 85.88068181818181, "grad_norm": 0.6016923785209656, "learning_rate": 0.0001, "loss": 0.0419, "step": 30230 }, { "epoch": 85.9090909090909, "grad_norm": 0.9071959853172302, "learning_rate": 0.0001, "loss": 0.0429, "step": 30240 }, { "epoch": 85.9375, "grad_norm": 0.6232897043228149, "learning_rate": 0.0001, "loss": 0.0428, "step": 30250 }, { "epoch": 85.9659090909091, "grad_norm": 0.7610146999359131, "learning_rate": 0.0001, "loss": 0.0442, "step": 30260 }, { "epoch": 85.99431818181819, "grad_norm": 0.6071455478668213, "learning_rate": 0.0001, "loss": 0.0437, "step": 30270 }, { "epoch": 86.02272727272727, "grad_norm": 0.6175063848495483, "learning_rate": 0.0001, "loss": 0.04, "step": 30280 }, { "epoch": 86.05113636363636, "grad_norm": 0.7343127727508545, "learning_rate": 0.0001, "loss": 0.0434, "step": 30290 }, { "epoch": 86.07954545454545, "grad_norm": 0.7600955367088318, "learning_rate": 0.0001, "loss": 0.0421, "step": 30300 }, { "epoch": 86.10795454545455, "grad_norm": 0.5361849665641785, "learning_rate": 0.0001, "loss": 0.0418, "step": 30310 }, { "epoch": 86.13636363636364, "grad_norm": 0.6238926649093628, "learning_rate": 0.0001, "loss": 0.0419, "step": 30320 }, { "epoch": 86.16477272727273, "grad_norm": 0.530532717704773, "learning_rate": 0.0001, "loss": 0.0416, "step": 30330 }, { "epoch": 86.19318181818181, "grad_norm": 0.6140005588531494, "learning_rate": 0.0001, "loss": 0.0418, "step": 30340 }, { "epoch": 86.2215909090909, "grad_norm": 0.5914357900619507, "learning_rate": 0.0001, "loss": 0.0416, "step": 30350 }, { "epoch": 86.25, "grad_norm": 0.5596780776977539, "learning_rate": 0.0001, "loss": 0.0427, "step": 30360 }, { "epoch": 86.2784090909091, "grad_norm": 0.5929566025733948, "learning_rate": 0.0001, "loss": 0.0425, "step": 30370 }, { "epoch": 86.30681818181819, "grad_norm": 0.6372137069702148, "learning_rate": 0.0001, "loss": 0.043, "step": 30380 }, { "epoch": 86.33522727272727, "grad_norm": 0.6501078009605408, "learning_rate": 0.0001, "loss": 0.0437, "step": 30390 }, { "epoch": 86.36363636363636, "grad_norm": 0.57587730884552, "learning_rate": 0.0001, "loss": 0.0417, "step": 30400 }, { "epoch": 86.39204545454545, "grad_norm": 0.6445661187171936, "learning_rate": 0.0001, "loss": 0.0419, "step": 30410 }, { "epoch": 86.42045454545455, "grad_norm": 0.7396770715713501, "learning_rate": 0.0001, "loss": 0.0457, "step": 30420 }, { "epoch": 86.44886363636364, "grad_norm": 0.673376202583313, "learning_rate": 0.0001, "loss": 0.0432, "step": 30430 }, { "epoch": 86.47727272727273, "grad_norm": 0.6946069598197937, "learning_rate": 0.0001, "loss": 0.0438, "step": 30440 }, { "epoch": 86.50568181818181, "grad_norm": 0.6759048104286194, "learning_rate": 0.0001, "loss": 0.0432, "step": 30450 }, { "epoch": 86.5340909090909, "grad_norm": 0.5420788526535034, "learning_rate": 0.0001, "loss": 0.0421, "step": 30460 }, { "epoch": 86.5625, "grad_norm": 0.6719872355461121, "learning_rate": 0.0001, "loss": 0.0418, "step": 30470 }, { "epoch": 86.5909090909091, "grad_norm": 0.6998466849327087, "learning_rate": 0.0001, "loss": 0.0426, "step": 30480 }, { "epoch": 86.61931818181819, "grad_norm": 0.6579269766807556, "learning_rate": 0.0001, "loss": 0.0429, "step": 30490 }, { "epoch": 86.64772727272727, "grad_norm": 0.7772161364555359, "learning_rate": 0.0001, "loss": 0.043, "step": 30500 }, { "epoch": 86.67613636363636, "grad_norm": 0.5968173742294312, "learning_rate": 0.0001, "loss": 0.0433, "step": 30510 }, { "epoch": 86.70454545454545, "grad_norm": 0.6808553338050842, "learning_rate": 0.0001, "loss": 0.0435, "step": 30520 }, { "epoch": 86.73295454545455, "grad_norm": 0.7286439538002014, "learning_rate": 0.0001, "loss": 0.044, "step": 30530 }, { "epoch": 86.76136363636364, "grad_norm": 0.6226254105567932, "learning_rate": 0.0001, "loss": 0.0437, "step": 30540 }, { "epoch": 86.78977272727273, "grad_norm": 0.61110919713974, "learning_rate": 0.0001, "loss": 0.0447, "step": 30550 }, { "epoch": 86.81818181818181, "grad_norm": 0.6020199656486511, "learning_rate": 0.0001, "loss": 0.0432, "step": 30560 }, { "epoch": 86.8465909090909, "grad_norm": 0.630531907081604, "learning_rate": 0.0001, "loss": 0.0432, "step": 30570 }, { "epoch": 86.875, "grad_norm": 0.5699210166931152, "learning_rate": 0.0001, "loss": 0.0429, "step": 30580 }, { "epoch": 86.9034090909091, "grad_norm": 0.5879133343696594, "learning_rate": 0.0001, "loss": 0.0447, "step": 30590 }, { "epoch": 86.93181818181819, "grad_norm": 0.6827641129493713, "learning_rate": 0.0001, "loss": 0.0446, "step": 30600 }, { "epoch": 86.96022727272727, "grad_norm": 0.581780731678009, "learning_rate": 0.0001, "loss": 0.0456, "step": 30610 }, { "epoch": 86.98863636363636, "grad_norm": 0.5517546534538269, "learning_rate": 0.0001, "loss": 0.0442, "step": 30620 }, { "epoch": 87.01704545454545, "grad_norm": 0.6034563779830933, "learning_rate": 0.0001, "loss": 0.0423, "step": 30630 }, { "epoch": 87.04545454545455, "grad_norm": 0.5161349177360535, "learning_rate": 0.0001, "loss": 0.0433, "step": 30640 }, { "epoch": 87.07386363636364, "grad_norm": 0.6517345309257507, "learning_rate": 0.0001, "loss": 0.0438, "step": 30650 }, { "epoch": 87.10227272727273, "grad_norm": 0.7236988544464111, "learning_rate": 0.0001, "loss": 0.0436, "step": 30660 }, { "epoch": 87.13068181818181, "grad_norm": 0.5659297704696655, "learning_rate": 0.0001, "loss": 0.043, "step": 30670 }, { "epoch": 87.1590909090909, "grad_norm": 0.5743705630302429, "learning_rate": 0.0001, "loss": 0.0442, "step": 30680 }, { "epoch": 87.1875, "grad_norm": 0.4794277846813202, "learning_rate": 0.0001, "loss": 0.0442, "step": 30690 }, { "epoch": 87.2159090909091, "grad_norm": 0.6366981267929077, "learning_rate": 0.0001, "loss": 0.0425, "step": 30700 }, { "epoch": 87.24431818181819, "grad_norm": 0.6959528923034668, "learning_rate": 0.0001, "loss": 0.0433, "step": 30710 }, { "epoch": 87.27272727272727, "grad_norm": 0.5670670866966248, "learning_rate": 0.0001, "loss": 0.0451, "step": 30720 }, { "epoch": 87.30113636363636, "grad_norm": 0.5087947845458984, "learning_rate": 0.0001, "loss": 0.0429, "step": 30730 }, { "epoch": 87.32954545454545, "grad_norm": 0.638629138469696, "learning_rate": 0.0001, "loss": 0.0408, "step": 30740 }, { "epoch": 87.35795454545455, "grad_norm": 0.5405479073524475, "learning_rate": 0.0001, "loss": 0.0424, "step": 30750 }, { "epoch": 87.38636363636364, "grad_norm": 0.5827491283416748, "learning_rate": 0.0001, "loss": 0.044, "step": 30760 }, { "epoch": 87.41477272727273, "grad_norm": 0.8291541337966919, "learning_rate": 0.0001, "loss": 0.0423, "step": 30770 }, { "epoch": 87.44318181818181, "grad_norm": 0.5378076434135437, "learning_rate": 0.0001, "loss": 0.0447, "step": 30780 }, { "epoch": 87.4715909090909, "grad_norm": 0.5673062801361084, "learning_rate": 0.0001, "loss": 0.0426, "step": 30790 }, { "epoch": 87.5, "grad_norm": 0.6628111004829407, "learning_rate": 0.0001, "loss": 0.0454, "step": 30800 }, { "epoch": 87.5284090909091, "grad_norm": 0.5737188458442688, "learning_rate": 0.0001, "loss": 0.0447, "step": 30810 }, { "epoch": 87.55681818181819, "grad_norm": 0.49959149956703186, "learning_rate": 0.0001, "loss": 0.0438, "step": 30820 }, { "epoch": 87.58522727272727, "grad_norm": 0.579260528087616, "learning_rate": 0.0001, "loss": 0.0461, "step": 30830 }, { "epoch": 87.61363636363636, "grad_norm": 0.6071043014526367, "learning_rate": 0.0001, "loss": 0.0453, "step": 30840 }, { "epoch": 87.64204545454545, "grad_norm": 0.8908697366714478, "learning_rate": 0.0001, "loss": 0.0453, "step": 30850 }, { "epoch": 87.67045454545455, "grad_norm": 0.7816733717918396, "learning_rate": 0.0001, "loss": 0.0456, "step": 30860 }, { "epoch": 87.69886363636364, "grad_norm": 0.837706446647644, "learning_rate": 0.0001, "loss": 0.0439, "step": 30870 }, { "epoch": 87.72727272727273, "grad_norm": 0.6439931988716125, "learning_rate": 0.0001, "loss": 0.0448, "step": 30880 }, { "epoch": 87.75568181818181, "grad_norm": 0.5524504780769348, "learning_rate": 0.0001, "loss": 0.0429, "step": 30890 }, { "epoch": 87.7840909090909, "grad_norm": 0.5548602342605591, "learning_rate": 0.0001, "loss": 0.0436, "step": 30900 }, { "epoch": 87.8125, "grad_norm": 0.5937913656234741, "learning_rate": 0.0001, "loss": 0.043, "step": 30910 }, { "epoch": 87.8409090909091, "grad_norm": 0.49119383096694946, "learning_rate": 0.0001, "loss": 0.0418, "step": 30920 }, { "epoch": 87.86931818181819, "grad_norm": 0.5523015260696411, "learning_rate": 0.0001, "loss": 0.0426, "step": 30930 }, { "epoch": 87.89772727272727, "grad_norm": 0.6836235523223877, "learning_rate": 0.0001, "loss": 0.0436, "step": 30940 }, { "epoch": 87.92613636363636, "grad_norm": 0.728946328163147, "learning_rate": 0.0001, "loss": 0.0435, "step": 30950 }, { "epoch": 87.95454545454545, "grad_norm": 0.7053698301315308, "learning_rate": 0.0001, "loss": 0.0433, "step": 30960 }, { "epoch": 87.98295454545455, "grad_norm": 0.7172105312347412, "learning_rate": 0.0001, "loss": 0.0434, "step": 30970 }, { "epoch": 88.01136363636364, "grad_norm": 0.8821631073951721, "learning_rate": 0.0001, "loss": 0.0425, "step": 30980 }, { "epoch": 88.03977272727273, "grad_norm": 0.837773323059082, "learning_rate": 0.0001, "loss": 0.045, "step": 30990 }, { "epoch": 88.06818181818181, "grad_norm": 1.007065773010254, "learning_rate": 0.0001, "loss": 0.0422, "step": 31000 }, { "epoch": 88.0965909090909, "grad_norm": 0.8076823353767395, "learning_rate": 0.0001, "loss": 0.0433, "step": 31010 }, { "epoch": 88.125, "grad_norm": 1.02508544921875, "learning_rate": 0.0001, "loss": 0.0439, "step": 31020 }, { "epoch": 88.1534090909091, "grad_norm": 0.726391077041626, "learning_rate": 0.0001, "loss": 0.0424, "step": 31030 }, { "epoch": 88.18181818181819, "grad_norm": 0.78676837682724, "learning_rate": 0.0001, "loss": 0.0427, "step": 31040 }, { "epoch": 88.21022727272727, "grad_norm": 0.7329301834106445, "learning_rate": 0.0001, "loss": 0.0427, "step": 31050 }, { "epoch": 88.23863636363636, "grad_norm": 0.6445389986038208, "learning_rate": 0.0001, "loss": 0.0421, "step": 31060 }, { "epoch": 88.26704545454545, "grad_norm": 0.6451675295829773, "learning_rate": 0.0001, "loss": 0.0426, "step": 31070 }, { "epoch": 88.29545454545455, "grad_norm": 0.7502676248550415, "learning_rate": 0.0001, "loss": 0.0437, "step": 31080 }, { "epoch": 88.32386363636364, "grad_norm": 0.8322815299034119, "learning_rate": 0.0001, "loss": 0.0426, "step": 31090 }, { "epoch": 88.35227272727273, "grad_norm": 0.8641359210014343, "learning_rate": 0.0001, "loss": 0.0423, "step": 31100 }, { "epoch": 88.38068181818181, "grad_norm": 0.8692095279693604, "learning_rate": 0.0001, "loss": 0.0438, "step": 31110 }, { "epoch": 88.4090909090909, "grad_norm": 0.6443942785263062, "learning_rate": 0.0001, "loss": 0.0427, "step": 31120 }, { "epoch": 88.4375, "grad_norm": 0.7591652274131775, "learning_rate": 0.0001, "loss": 0.0411, "step": 31130 }, { "epoch": 88.4659090909091, "grad_norm": 0.8515008687973022, "learning_rate": 0.0001, "loss": 0.0432, "step": 31140 }, { "epoch": 88.49431818181819, "grad_norm": 0.7373746633529663, "learning_rate": 0.0001, "loss": 0.0452, "step": 31150 }, { "epoch": 88.52272727272727, "grad_norm": 2.620330810546875, "learning_rate": 0.0001, "loss": 0.0438, "step": 31160 }, { "epoch": 88.55113636363636, "grad_norm": 0.7088080048561096, "learning_rate": 0.0001, "loss": 0.0426, "step": 31170 }, { "epoch": 88.57954545454545, "grad_norm": 0.8215885758399963, "learning_rate": 0.0001, "loss": 0.0429, "step": 31180 }, { "epoch": 88.60795454545455, "grad_norm": 0.8965012431144714, "learning_rate": 0.0001, "loss": 0.0411, "step": 31190 }, { "epoch": 88.63636363636364, "grad_norm": 1.090306282043457, "learning_rate": 0.0001, "loss": 0.0417, "step": 31200 }, { "epoch": 88.66477272727273, "grad_norm": 0.9115955829620361, "learning_rate": 0.0001, "loss": 0.0415, "step": 31210 }, { "epoch": 88.69318181818181, "grad_norm": 1.4948323965072632, "learning_rate": 0.0001, "loss": 0.0408, "step": 31220 }, { "epoch": 88.7215909090909, "grad_norm": 1.177667498588562, "learning_rate": 0.0001, "loss": 0.0427, "step": 31230 }, { "epoch": 88.75, "grad_norm": 1.1721715927124023, "learning_rate": 0.0001, "loss": 0.04, "step": 31240 }, { "epoch": 88.7784090909091, "grad_norm": 1.04111647605896, "learning_rate": 0.0001, "loss": 0.0408, "step": 31250 }, { "epoch": 88.80681818181819, "grad_norm": 1.1286450624465942, "learning_rate": 0.0001, "loss": 0.0407, "step": 31260 }, { "epoch": 88.83522727272727, "grad_norm": 0.8720760941505432, "learning_rate": 0.0001, "loss": 0.0419, "step": 31270 }, { "epoch": 88.86363636363636, "grad_norm": 0.7654427289962769, "learning_rate": 0.0001, "loss": 0.04, "step": 31280 }, { "epoch": 88.89204545454545, "grad_norm": 0.6938006281852722, "learning_rate": 0.0001, "loss": 0.0414, "step": 31290 }, { "epoch": 88.92045454545455, "grad_norm": 0.6255007386207581, "learning_rate": 0.0001, "loss": 0.0417, "step": 31300 }, { "epoch": 88.94886363636364, "grad_norm": 0.881568193435669, "learning_rate": 0.0001, "loss": 0.0406, "step": 31310 }, { "epoch": 88.97727272727273, "grad_norm": 0.7803657650947571, "learning_rate": 0.0001, "loss": 0.0426, "step": 31320 }, { "epoch": 89.00568181818181, "grad_norm": 0.5337231159210205, "learning_rate": 0.0001, "loss": 0.0409, "step": 31330 }, { "epoch": 89.0340909090909, "grad_norm": 0.6003879904747009, "learning_rate": 0.0001, "loss": 0.0422, "step": 31340 }, { "epoch": 89.0625, "grad_norm": 0.694319486618042, "learning_rate": 0.0001, "loss": 0.0406, "step": 31350 }, { "epoch": 89.0909090909091, "grad_norm": 0.6574826836585999, "learning_rate": 0.0001, "loss": 0.0405, "step": 31360 }, { "epoch": 89.11931818181819, "grad_norm": 0.6312698125839233, "learning_rate": 0.0001, "loss": 0.0405, "step": 31370 }, { "epoch": 89.14772727272727, "grad_norm": 0.5977025032043457, "learning_rate": 0.0001, "loss": 0.0392, "step": 31380 }, { "epoch": 89.17613636363636, "grad_norm": 0.6093351244926453, "learning_rate": 0.0001, "loss": 0.0409, "step": 31390 }, { "epoch": 89.20454545454545, "grad_norm": 0.563823938369751, "learning_rate": 0.0001, "loss": 0.042, "step": 31400 }, { "epoch": 89.23295454545455, "grad_norm": 0.6710460782051086, "learning_rate": 0.0001, "loss": 0.0397, "step": 31410 }, { "epoch": 89.26136363636364, "grad_norm": 0.6708618998527527, "learning_rate": 0.0001, "loss": 0.0409, "step": 31420 }, { "epoch": 89.28977272727273, "grad_norm": 0.7974550127983093, "learning_rate": 0.0001, "loss": 0.0421, "step": 31430 }, { "epoch": 89.31818181818181, "grad_norm": 0.7905569672584534, "learning_rate": 0.0001, "loss": 0.0415, "step": 31440 }, { "epoch": 89.3465909090909, "grad_norm": 0.6526768207550049, "learning_rate": 0.0001, "loss": 0.0419, "step": 31450 }, { "epoch": 89.375, "grad_norm": 0.6570956707000732, "learning_rate": 0.0001, "loss": 0.0413, "step": 31460 }, { "epoch": 89.4034090909091, "grad_norm": 0.5645592212677002, "learning_rate": 0.0001, "loss": 0.0421, "step": 31470 }, { "epoch": 89.43181818181819, "grad_norm": 0.7741744518280029, "learning_rate": 0.0001, "loss": 0.043, "step": 31480 }, { "epoch": 89.46022727272727, "grad_norm": 0.8912363052368164, "learning_rate": 0.0001, "loss": 0.0423, "step": 31490 }, { "epoch": 89.48863636363636, "grad_norm": 1.4925516843795776, "learning_rate": 0.0001, "loss": 0.0419, "step": 31500 }, { "epoch": 89.51704545454545, "grad_norm": 1.1449397802352905, "learning_rate": 0.0001, "loss": 0.0406, "step": 31510 }, { "epoch": 89.54545454545455, "grad_norm": 0.7447580099105835, "learning_rate": 0.0001, "loss": 0.0414, "step": 31520 }, { "epoch": 89.57386363636364, "grad_norm": 0.8966802358627319, "learning_rate": 0.0001, "loss": 0.0403, "step": 31530 }, { "epoch": 89.60227272727273, "grad_norm": 0.8590907454490662, "learning_rate": 0.0001, "loss": 0.0403, "step": 31540 }, { "epoch": 89.63068181818181, "grad_norm": 1.0364528894424438, "learning_rate": 0.0001, "loss": 0.0428, "step": 31550 }, { "epoch": 89.6590909090909, "grad_norm": 1.0565521717071533, "learning_rate": 0.0001, "loss": 0.0419, "step": 31560 }, { "epoch": 89.6875, "grad_norm": 0.8635243773460388, "learning_rate": 0.0001, "loss": 0.0435, "step": 31570 }, { "epoch": 89.7159090909091, "grad_norm": 0.8231905698776245, "learning_rate": 0.0001, "loss": 0.0415, "step": 31580 }, { "epoch": 89.74431818181819, "grad_norm": 0.683319628238678, "learning_rate": 0.0001, "loss": 0.0411, "step": 31590 }, { "epoch": 89.77272727272727, "grad_norm": 0.696625292301178, "learning_rate": 0.0001, "loss": 0.0413, "step": 31600 }, { "epoch": 89.80113636363636, "grad_norm": 0.6028037667274475, "learning_rate": 0.0001, "loss": 0.0405, "step": 31610 }, { "epoch": 89.82954545454545, "grad_norm": 1.206365704536438, "learning_rate": 0.0001, "loss": 0.044, "step": 31620 }, { "epoch": 89.85795454545455, "grad_norm": 1.4085060358047485, "learning_rate": 0.0001, "loss": 0.0415, "step": 31630 }, { "epoch": 89.88636363636364, "grad_norm": 1.3718105554580688, "learning_rate": 0.0001, "loss": 0.0439, "step": 31640 }, { "epoch": 89.91477272727273, "grad_norm": 0.872251570224762, "learning_rate": 0.0001, "loss": 0.0399, "step": 31650 }, { "epoch": 89.94318181818181, "grad_norm": 0.8787030577659607, "learning_rate": 0.0001, "loss": 0.0417, "step": 31660 }, { "epoch": 89.9715909090909, "grad_norm": 0.9750413298606873, "learning_rate": 0.0001, "loss": 0.0412, "step": 31670 }, { "epoch": 90.0, "grad_norm": 1.8003432750701904, "learning_rate": 0.0001, "loss": 0.0428, "step": 31680 }, { "epoch": 90.0284090909091, "grad_norm": 1.7299727201461792, "learning_rate": 0.0001, "loss": 0.0432, "step": 31690 }, { "epoch": 90.05681818181819, "grad_norm": 2.0806267261505127, "learning_rate": 0.0001, "loss": 0.0413, "step": 31700 }, { "epoch": 90.08522727272727, "grad_norm": 1.1748923063278198, "learning_rate": 0.0001, "loss": 0.0415, "step": 31710 }, { "epoch": 90.11363636363636, "grad_norm": 1.2577301263809204, "learning_rate": 0.0001, "loss": 0.0392, "step": 31720 }, { "epoch": 90.14204545454545, "grad_norm": 1.1263160705566406, "learning_rate": 0.0001, "loss": 0.0394, "step": 31730 }, { "epoch": 90.17045454545455, "grad_norm": 0.8332096338272095, "learning_rate": 0.0001, "loss": 0.0396, "step": 31740 }, { "epoch": 90.19886363636364, "grad_norm": 0.9236270189285278, "learning_rate": 0.0001, "loss": 0.0397, "step": 31750 }, { "epoch": 90.22727272727273, "grad_norm": 0.826349675655365, "learning_rate": 0.0001, "loss": 0.0388, "step": 31760 }, { "epoch": 90.25568181818181, "grad_norm": 0.7999365329742432, "learning_rate": 0.0001, "loss": 0.0414, "step": 31770 }, { "epoch": 90.2840909090909, "grad_norm": 0.8490392565727234, "learning_rate": 0.0001, "loss": 0.0411, "step": 31780 }, { "epoch": 90.3125, "grad_norm": 0.8082181811332703, "learning_rate": 0.0001, "loss": 0.0401, "step": 31790 }, { "epoch": 90.3409090909091, "grad_norm": 0.9047965407371521, "learning_rate": 0.0001, "loss": 0.0415, "step": 31800 }, { "epoch": 90.36931818181819, "grad_norm": 0.7220473885536194, "learning_rate": 0.0001, "loss": 0.0401, "step": 31810 }, { "epoch": 90.39772727272727, "grad_norm": 1.0218350887298584, "learning_rate": 0.0001, "loss": 0.0409, "step": 31820 }, { "epoch": 90.42613636363636, "grad_norm": 0.6703020930290222, "learning_rate": 0.0001, "loss": 0.0399, "step": 31830 }, { "epoch": 90.45454545454545, "grad_norm": 0.6317295432090759, "learning_rate": 0.0001, "loss": 0.0416, "step": 31840 }, { "epoch": 90.48295454545455, "grad_norm": 0.5803297758102417, "learning_rate": 0.0001, "loss": 0.0428, "step": 31850 }, { "epoch": 90.51136363636364, "grad_norm": 0.7607895135879517, "learning_rate": 0.0001, "loss": 0.0409, "step": 31860 }, { "epoch": 90.53977272727273, "grad_norm": 0.6137414574623108, "learning_rate": 0.0001, "loss": 0.0409, "step": 31870 }, { "epoch": 90.56818181818181, "grad_norm": 0.5825350284576416, "learning_rate": 0.0001, "loss": 0.0409, "step": 31880 }, { "epoch": 90.5965909090909, "grad_norm": 0.564761757850647, "learning_rate": 0.0001, "loss": 0.0413, "step": 31890 }, { "epoch": 90.625, "grad_norm": 1.0057430267333984, "learning_rate": 0.0001, "loss": 0.041, "step": 31900 }, { "epoch": 90.6534090909091, "grad_norm": 0.7206169962882996, "learning_rate": 0.0001, "loss": 0.0414, "step": 31910 }, { "epoch": 90.68181818181819, "grad_norm": 0.8694084286689758, "learning_rate": 0.0001, "loss": 0.0415, "step": 31920 }, { "epoch": 90.71022727272727, "grad_norm": 0.9109015464782715, "learning_rate": 0.0001, "loss": 0.0397, "step": 31930 }, { "epoch": 90.73863636363636, "grad_norm": 1.422331690788269, "learning_rate": 0.0001, "loss": 0.0426, "step": 31940 }, { "epoch": 90.76704545454545, "grad_norm": 0.6990547180175781, "learning_rate": 0.0001, "loss": 0.0408, "step": 31950 }, { "epoch": 90.79545454545455, "grad_norm": 0.8138213753700256, "learning_rate": 0.0001, "loss": 0.041, "step": 31960 }, { "epoch": 90.82386363636364, "grad_norm": 0.9061129093170166, "learning_rate": 0.0001, "loss": 0.0415, "step": 31970 }, { "epoch": 90.85227272727273, "grad_norm": 0.5697906613349915, "learning_rate": 0.0001, "loss": 0.0426, "step": 31980 }, { "epoch": 90.88068181818181, "grad_norm": 0.6935226917266846, "learning_rate": 0.0001, "loss": 0.0415, "step": 31990 }, { "epoch": 90.9090909090909, "grad_norm": 0.6459117531776428, "learning_rate": 0.0001, "loss": 0.0418, "step": 32000 }, { "epoch": 90.9375, "grad_norm": 0.8231947422027588, "learning_rate": 0.0001, "loss": 0.0417, "step": 32010 }, { "epoch": 90.9659090909091, "grad_norm": 1.047978401184082, "learning_rate": 0.0001, "loss": 0.0409, "step": 32020 }, { "epoch": 90.99431818181819, "grad_norm": 0.5649544596672058, "learning_rate": 0.0001, "loss": 0.0416, "step": 32030 }, { "epoch": 91.02272727272727, "grad_norm": 0.6621559262275696, "learning_rate": 0.0001, "loss": 0.041, "step": 32040 }, { "epoch": 91.05113636363636, "grad_norm": 0.8017844557762146, "learning_rate": 0.0001, "loss": 0.0409, "step": 32050 }, { "epoch": 91.07954545454545, "grad_norm": 0.7462131977081299, "learning_rate": 0.0001, "loss": 0.0421, "step": 32060 }, { "epoch": 91.10795454545455, "grad_norm": 0.7743386030197144, "learning_rate": 0.0001, "loss": 0.0419, "step": 32070 }, { "epoch": 91.13636363636364, "grad_norm": 0.6021215915679932, "learning_rate": 0.0001, "loss": 0.0424, "step": 32080 }, { "epoch": 91.16477272727273, "grad_norm": 0.6302787065505981, "learning_rate": 0.0001, "loss": 0.0401, "step": 32090 }, { "epoch": 91.19318181818181, "grad_norm": 0.5769550800323486, "learning_rate": 0.0001, "loss": 0.0419, "step": 32100 }, { "epoch": 91.2215909090909, "grad_norm": 0.6698492169380188, "learning_rate": 0.0001, "loss": 0.0407, "step": 32110 }, { "epoch": 91.25, "grad_norm": 0.6460458040237427, "learning_rate": 0.0001, "loss": 0.0406, "step": 32120 }, { "epoch": 91.2784090909091, "grad_norm": 0.693789541721344, "learning_rate": 0.0001, "loss": 0.0415, "step": 32130 }, { "epoch": 91.30681818181819, "grad_norm": 0.7158985733985901, "learning_rate": 0.0001, "loss": 0.0428, "step": 32140 }, { "epoch": 91.33522727272727, "grad_norm": 0.618126630783081, "learning_rate": 0.0001, "loss": 0.0433, "step": 32150 }, { "epoch": 91.36363636363636, "grad_norm": 0.6397842168807983, "learning_rate": 0.0001, "loss": 0.0434, "step": 32160 }, { "epoch": 91.39204545454545, "grad_norm": 0.7809488773345947, "learning_rate": 0.0001, "loss": 0.042, "step": 32170 }, { "epoch": 91.42045454545455, "grad_norm": 0.5916877388954163, "learning_rate": 0.0001, "loss": 0.0438, "step": 32180 }, { "epoch": 91.44886363636364, "grad_norm": 0.6435518860816956, "learning_rate": 0.0001, "loss": 0.0428, "step": 32190 }, { "epoch": 91.47727272727273, "grad_norm": 0.7912642955780029, "learning_rate": 0.0001, "loss": 0.0406, "step": 32200 }, { "epoch": 91.50568181818181, "grad_norm": 0.692492663860321, "learning_rate": 0.0001, "loss": 0.0419, "step": 32210 }, { "epoch": 91.5340909090909, "grad_norm": 0.6788442730903625, "learning_rate": 0.0001, "loss": 0.0416, "step": 32220 }, { "epoch": 91.5625, "grad_norm": 0.7223365902900696, "learning_rate": 0.0001, "loss": 0.0405, "step": 32230 }, { "epoch": 91.5909090909091, "grad_norm": 0.7962009310722351, "learning_rate": 0.0001, "loss": 0.0418, "step": 32240 }, { "epoch": 91.61931818181819, "grad_norm": 0.8391971588134766, "learning_rate": 0.0001, "loss": 0.0415, "step": 32250 }, { "epoch": 91.64772727272727, "grad_norm": 0.963829517364502, "learning_rate": 0.0001, "loss": 0.0393, "step": 32260 }, { "epoch": 91.67613636363636, "grad_norm": 0.7977566719055176, "learning_rate": 0.0001, "loss": 0.041, "step": 32270 }, { "epoch": 91.70454545454545, "grad_norm": 0.9379525780677795, "learning_rate": 0.0001, "loss": 0.0409, "step": 32280 }, { "epoch": 91.73295454545455, "grad_norm": 0.8582242727279663, "learning_rate": 0.0001, "loss": 0.0421, "step": 32290 }, { "epoch": 91.76136363636364, "grad_norm": 0.8872998952865601, "learning_rate": 0.0001, "loss": 0.0407, "step": 32300 }, { "epoch": 91.78977272727273, "grad_norm": 0.6711391806602478, "learning_rate": 0.0001, "loss": 0.0409, "step": 32310 }, { "epoch": 91.81818181818181, "grad_norm": 0.890733540058136, "learning_rate": 0.0001, "loss": 0.0419, "step": 32320 }, { "epoch": 91.8465909090909, "grad_norm": 0.9312843680381775, "learning_rate": 0.0001, "loss": 0.0399, "step": 32330 }, { "epoch": 91.875, "grad_norm": 0.6852923035621643, "learning_rate": 0.0001, "loss": 0.0425, "step": 32340 }, { "epoch": 91.9034090909091, "grad_norm": 0.7489289045333862, "learning_rate": 0.0001, "loss": 0.0418, "step": 32350 }, { "epoch": 91.93181818181819, "grad_norm": 0.7574262022972107, "learning_rate": 0.0001, "loss": 0.0431, "step": 32360 }, { "epoch": 91.96022727272727, "grad_norm": 0.7518380284309387, "learning_rate": 0.0001, "loss": 0.0415, "step": 32370 }, { "epoch": 91.98863636363636, "grad_norm": 0.7089730501174927, "learning_rate": 0.0001, "loss": 0.0427, "step": 32380 }, { "epoch": 92.01704545454545, "grad_norm": 0.7275684475898743, "learning_rate": 0.0001, "loss": 0.0415, "step": 32390 }, { "epoch": 92.04545454545455, "grad_norm": 0.6037976741790771, "learning_rate": 0.0001, "loss": 0.0395, "step": 32400 }, { "epoch": 92.07386363636364, "grad_norm": 0.5874007940292358, "learning_rate": 0.0001, "loss": 0.0404, "step": 32410 }, { "epoch": 92.10227272727273, "grad_norm": 0.5332598686218262, "learning_rate": 0.0001, "loss": 0.0412, "step": 32420 }, { "epoch": 92.13068181818181, "grad_norm": 0.7063932418823242, "learning_rate": 0.0001, "loss": 0.0417, "step": 32430 }, { "epoch": 92.1590909090909, "grad_norm": 0.7319120168685913, "learning_rate": 0.0001, "loss": 0.0393, "step": 32440 }, { "epoch": 92.1875, "grad_norm": 0.6438412070274353, "learning_rate": 0.0001, "loss": 0.04, "step": 32450 }, { "epoch": 92.2159090909091, "grad_norm": 0.5752390027046204, "learning_rate": 0.0001, "loss": 0.0409, "step": 32460 }, { "epoch": 92.24431818181819, "grad_norm": 0.7674922943115234, "learning_rate": 0.0001, "loss": 0.0419, "step": 32470 }, { "epoch": 92.27272727272727, "grad_norm": 0.5381972789764404, "learning_rate": 0.0001, "loss": 0.0404, "step": 32480 }, { "epoch": 92.30113636363636, "grad_norm": 0.766947329044342, "learning_rate": 0.0001, "loss": 0.0431, "step": 32490 }, { "epoch": 92.32954545454545, "grad_norm": 0.5970397591590881, "learning_rate": 0.0001, "loss": 0.0418, "step": 32500 }, { "epoch": 92.35795454545455, "grad_norm": 0.5418734550476074, "learning_rate": 0.0001, "loss": 0.0411, "step": 32510 }, { "epoch": 92.38636363636364, "grad_norm": 0.5782895684242249, "learning_rate": 0.0001, "loss": 0.0418, "step": 32520 }, { "epoch": 92.41477272727273, "grad_norm": 0.5378401875495911, "learning_rate": 0.0001, "loss": 0.0414, "step": 32530 }, { "epoch": 92.44318181818181, "grad_norm": 0.5437403321266174, "learning_rate": 0.0001, "loss": 0.0414, "step": 32540 }, { "epoch": 92.4715909090909, "grad_norm": 0.574937641620636, "learning_rate": 0.0001, "loss": 0.0404, "step": 32550 }, { "epoch": 92.5, "grad_norm": 0.4711826741695404, "learning_rate": 0.0001, "loss": 0.042, "step": 32560 }, { "epoch": 92.5284090909091, "grad_norm": 0.5091038346290588, "learning_rate": 0.0001, "loss": 0.0408, "step": 32570 }, { "epoch": 92.55681818181819, "grad_norm": 0.5985186696052551, "learning_rate": 0.0001, "loss": 0.0407, "step": 32580 }, { "epoch": 92.58522727272727, "grad_norm": 0.6167530417442322, "learning_rate": 0.0001, "loss": 0.0402, "step": 32590 }, { "epoch": 92.61363636363636, "grad_norm": 0.7481162548065186, "learning_rate": 0.0001, "loss": 0.0402, "step": 32600 }, { "epoch": 92.64204545454545, "grad_norm": 0.6328353881835938, "learning_rate": 0.0001, "loss": 0.0403, "step": 32610 }, { "epoch": 92.67045454545455, "grad_norm": 0.4634016752243042, "learning_rate": 0.0001, "loss": 0.0423, "step": 32620 }, { "epoch": 92.69886363636364, "grad_norm": 0.5572225451469421, "learning_rate": 0.0001, "loss": 0.0394, "step": 32630 }, { "epoch": 92.72727272727273, "grad_norm": 0.5547319650650024, "learning_rate": 0.0001, "loss": 0.0403, "step": 32640 }, { "epoch": 92.75568181818181, "grad_norm": 0.5432265996932983, "learning_rate": 0.0001, "loss": 0.0398, "step": 32650 }, { "epoch": 92.7840909090909, "grad_norm": 0.7217846512794495, "learning_rate": 0.0001, "loss": 0.0417, "step": 32660 }, { "epoch": 92.8125, "grad_norm": 0.7317110896110535, "learning_rate": 0.0001, "loss": 0.0417, "step": 32670 }, { "epoch": 92.8409090909091, "grad_norm": 0.76151442527771, "learning_rate": 0.0001, "loss": 0.0383, "step": 32680 }, { "epoch": 92.86931818181819, "grad_norm": 0.6238445043563843, "learning_rate": 0.0001, "loss": 0.0405, "step": 32690 }, { "epoch": 92.89772727272727, "grad_norm": 0.5886904001235962, "learning_rate": 0.0001, "loss": 0.0413, "step": 32700 }, { "epoch": 92.92613636363636, "grad_norm": 0.46394994854927063, "learning_rate": 0.0001, "loss": 0.0404, "step": 32710 }, { "epoch": 92.95454545454545, "grad_norm": 0.7471411824226379, "learning_rate": 0.0001, "loss": 0.0403, "step": 32720 }, { "epoch": 92.98295454545455, "grad_norm": 0.6481496095657349, "learning_rate": 0.0001, "loss": 0.0403, "step": 32730 }, { "epoch": 93.01136363636364, "grad_norm": 0.6740915775299072, "learning_rate": 0.0001, "loss": 0.0414, "step": 32740 }, { "epoch": 93.03977272727273, "grad_norm": 0.4988252520561218, "learning_rate": 0.0001, "loss": 0.0406, "step": 32750 }, { "epoch": 93.06818181818181, "grad_norm": 0.5601629614830017, "learning_rate": 0.0001, "loss": 0.0417, "step": 32760 }, { "epoch": 93.0965909090909, "grad_norm": 0.5621158480644226, "learning_rate": 0.0001, "loss": 0.0411, "step": 32770 }, { "epoch": 93.125, "grad_norm": 0.5953294038772583, "learning_rate": 0.0001, "loss": 0.0404, "step": 32780 }, { "epoch": 93.1534090909091, "grad_norm": 0.6276609897613525, "learning_rate": 0.0001, "loss": 0.0402, "step": 32790 }, { "epoch": 93.18181818181819, "grad_norm": 0.6082143783569336, "learning_rate": 0.0001, "loss": 0.0397, "step": 32800 }, { "epoch": 93.21022727272727, "grad_norm": 0.7014224529266357, "learning_rate": 0.0001, "loss": 0.0408, "step": 32810 }, { "epoch": 93.23863636363636, "grad_norm": 0.6491138339042664, "learning_rate": 0.0001, "loss": 0.0403, "step": 32820 }, { "epoch": 93.26704545454545, "grad_norm": 0.7243189215660095, "learning_rate": 0.0001, "loss": 0.0414, "step": 32830 }, { "epoch": 93.29545454545455, "grad_norm": 0.5766690969467163, "learning_rate": 0.0001, "loss": 0.0399, "step": 32840 }, { "epoch": 93.32386363636364, "grad_norm": 0.7540706992149353, "learning_rate": 0.0001, "loss": 0.0385, "step": 32850 }, { "epoch": 93.35227272727273, "grad_norm": 0.7168294787406921, "learning_rate": 0.0001, "loss": 0.0402, "step": 32860 }, { "epoch": 93.38068181818181, "grad_norm": 0.8406569361686707, "learning_rate": 0.0001, "loss": 0.04, "step": 32870 }, { "epoch": 93.4090909090909, "grad_norm": 0.8490883708000183, "learning_rate": 0.0001, "loss": 0.0403, "step": 32880 }, { "epoch": 93.4375, "grad_norm": 0.5935730338096619, "learning_rate": 0.0001, "loss": 0.0408, "step": 32890 }, { "epoch": 93.4659090909091, "grad_norm": 0.5843430161476135, "learning_rate": 0.0001, "loss": 0.0418, "step": 32900 }, { "epoch": 93.49431818181819, "grad_norm": 0.8435496091842651, "learning_rate": 0.0001, "loss": 0.04, "step": 32910 }, { "epoch": 93.52272727272727, "grad_norm": 0.599806547164917, "learning_rate": 0.0001, "loss": 0.0404, "step": 32920 }, { "epoch": 93.55113636363636, "grad_norm": 0.6157388687133789, "learning_rate": 0.0001, "loss": 0.0401, "step": 32930 }, { "epoch": 93.57954545454545, "grad_norm": 0.6935839056968689, "learning_rate": 0.0001, "loss": 0.0402, "step": 32940 }, { "epoch": 93.60795454545455, "grad_norm": 0.6349316835403442, "learning_rate": 0.0001, "loss": 0.0406, "step": 32950 }, { "epoch": 93.63636363636364, "grad_norm": 0.5609252452850342, "learning_rate": 0.0001, "loss": 0.0386, "step": 32960 }, { "epoch": 93.66477272727273, "grad_norm": 0.6227506399154663, "learning_rate": 0.0001, "loss": 0.0407, "step": 32970 }, { "epoch": 93.69318181818181, "grad_norm": 0.671882688999176, "learning_rate": 0.0001, "loss": 0.0419, "step": 32980 }, { "epoch": 93.7215909090909, "grad_norm": 0.8065037727355957, "learning_rate": 0.0001, "loss": 0.0409, "step": 32990 }, { "epoch": 93.75, "grad_norm": 0.7018570303916931, "learning_rate": 0.0001, "loss": 0.0389, "step": 33000 }, { "epoch": 93.7784090909091, "grad_norm": 0.7340584993362427, "learning_rate": 0.0001, "loss": 0.0408, "step": 33010 }, { "epoch": 93.80681818181819, "grad_norm": 0.6570980548858643, "learning_rate": 0.0001, "loss": 0.0406, "step": 33020 }, { "epoch": 93.83522727272727, "grad_norm": 0.6870690584182739, "learning_rate": 0.0001, "loss": 0.039, "step": 33030 }, { "epoch": 93.86363636363636, "grad_norm": 0.6674038171768188, "learning_rate": 0.0001, "loss": 0.0415, "step": 33040 }, { "epoch": 93.89204545454545, "grad_norm": 0.5651020407676697, "learning_rate": 0.0001, "loss": 0.0405, "step": 33050 }, { "epoch": 93.92045454545455, "grad_norm": 0.7089354991912842, "learning_rate": 0.0001, "loss": 0.041, "step": 33060 }, { "epoch": 93.94886363636364, "grad_norm": 0.5789372324943542, "learning_rate": 0.0001, "loss": 0.0413, "step": 33070 }, { "epoch": 93.97727272727273, "grad_norm": 0.8415607213973999, "learning_rate": 0.0001, "loss": 0.043, "step": 33080 }, { "epoch": 94.00568181818181, "grad_norm": 0.7195010185241699, "learning_rate": 0.0001, "loss": 0.0399, "step": 33090 }, { "epoch": 94.0340909090909, "grad_norm": 0.6305752992630005, "learning_rate": 0.0001, "loss": 0.04, "step": 33100 }, { "epoch": 94.0625, "grad_norm": 0.6140927076339722, "learning_rate": 0.0001, "loss": 0.0406, "step": 33110 }, { "epoch": 94.0909090909091, "grad_norm": 0.761303186416626, "learning_rate": 0.0001, "loss": 0.0392, "step": 33120 }, { "epoch": 94.11931818181819, "grad_norm": 0.7136927247047424, "learning_rate": 0.0001, "loss": 0.04, "step": 33130 }, { "epoch": 94.14772727272727, "grad_norm": 0.5686725378036499, "learning_rate": 0.0001, "loss": 0.0403, "step": 33140 }, { "epoch": 94.17613636363636, "grad_norm": 0.7701740860939026, "learning_rate": 0.0001, "loss": 0.0395, "step": 33150 }, { "epoch": 94.20454545454545, "grad_norm": 1.336498737335205, "learning_rate": 0.0001, "loss": 0.0386, "step": 33160 }, { "epoch": 94.23295454545455, "grad_norm": 1.33478581905365, "learning_rate": 0.0001, "loss": 0.0394, "step": 33170 }, { "epoch": 94.26136363636364, "grad_norm": 1.4353957176208496, "learning_rate": 0.0001, "loss": 0.039, "step": 33180 }, { "epoch": 94.28977272727273, "grad_norm": 1.2320867776870728, "learning_rate": 0.0001, "loss": 0.039, "step": 33190 }, { "epoch": 94.31818181818181, "grad_norm": 1.4484091997146606, "learning_rate": 0.0001, "loss": 0.0419, "step": 33200 }, { "epoch": 94.3465909090909, "grad_norm": 1.3309117555618286, "learning_rate": 0.0001, "loss": 0.0398, "step": 33210 }, { "epoch": 94.375, "grad_norm": 1.4127764701843262, "learning_rate": 0.0001, "loss": 0.0385, "step": 33220 }, { "epoch": 94.4034090909091, "grad_norm": 1.5341440439224243, "learning_rate": 0.0001, "loss": 0.0371, "step": 33230 }, { "epoch": 94.43181818181819, "grad_norm": 1.1439406871795654, "learning_rate": 0.0001, "loss": 0.0366, "step": 33240 }, { "epoch": 94.46022727272727, "grad_norm": 1.584952473640442, "learning_rate": 0.0001, "loss": 0.039, "step": 33250 }, { "epoch": 94.48863636363636, "grad_norm": 1.6550278663635254, "learning_rate": 0.0001, "loss": 0.0383, "step": 33260 }, { "epoch": 94.51704545454545, "grad_norm": 1.1844305992126465, "learning_rate": 0.0001, "loss": 0.0382, "step": 33270 }, { "epoch": 94.54545454545455, "grad_norm": 1.3612699508666992, "learning_rate": 0.0001, "loss": 0.037, "step": 33280 }, { "epoch": 94.57386363636364, "grad_norm": 1.3791062831878662, "learning_rate": 0.0001, "loss": 0.0378, "step": 33290 }, { "epoch": 94.60227272727273, "grad_norm": 0.9146853089332581, "learning_rate": 0.0001, "loss": 0.0386, "step": 33300 }, { "epoch": 94.63068181818181, "grad_norm": 0.9118925929069519, "learning_rate": 0.0001, "loss": 0.0379, "step": 33310 }, { "epoch": 94.6590909090909, "grad_norm": 0.9594295024871826, "learning_rate": 0.0001, "loss": 0.0393, "step": 33320 }, { "epoch": 94.6875, "grad_norm": 0.9613752365112305, "learning_rate": 0.0001, "loss": 0.0405, "step": 33330 }, { "epoch": 94.7159090909091, "grad_norm": 0.8596879839897156, "learning_rate": 0.0001, "loss": 0.0403, "step": 33340 }, { "epoch": 94.74431818181819, "grad_norm": 0.8098438382148743, "learning_rate": 0.0001, "loss": 0.041, "step": 33350 }, { "epoch": 94.77272727272727, "grad_norm": 1.0036510229110718, "learning_rate": 0.0001, "loss": 0.0398, "step": 33360 }, { "epoch": 94.80113636363636, "grad_norm": 0.8176660537719727, "learning_rate": 0.0001, "loss": 0.041, "step": 33370 }, { "epoch": 94.82954545454545, "grad_norm": 1.1036738157272339, "learning_rate": 0.0001, "loss": 0.0407, "step": 33380 }, { "epoch": 94.85795454545455, "grad_norm": 0.7849661111831665, "learning_rate": 0.0001, "loss": 0.0401, "step": 33390 }, { "epoch": 94.88636363636364, "grad_norm": 0.7073894739151001, "learning_rate": 0.0001, "loss": 0.0405, "step": 33400 }, { "epoch": 94.91477272727273, "grad_norm": 0.8646548986434937, "learning_rate": 0.0001, "loss": 0.0405, "step": 33410 }, { "epoch": 94.94318181818181, "grad_norm": 0.7283008694648743, "learning_rate": 0.0001, "loss": 0.0406, "step": 33420 }, { "epoch": 94.9715909090909, "grad_norm": 0.8491483926773071, "learning_rate": 0.0001, "loss": 0.0406, "step": 33430 }, { "epoch": 95.0, "grad_norm": 0.985192060470581, "learning_rate": 0.0001, "loss": 0.0409, "step": 33440 }, { "epoch": 95.0284090909091, "grad_norm": 0.7137681841850281, "learning_rate": 0.0001, "loss": 0.0384, "step": 33450 }, { "epoch": 95.05681818181819, "grad_norm": 0.6383925676345825, "learning_rate": 0.0001, "loss": 0.04, "step": 33460 }, { "epoch": 95.08522727272727, "grad_norm": 0.6039495468139648, "learning_rate": 0.0001, "loss": 0.0397, "step": 33470 }, { "epoch": 95.11363636363636, "grad_norm": 0.7503993511199951, "learning_rate": 0.0001, "loss": 0.0389, "step": 33480 }, { "epoch": 95.14204545454545, "grad_norm": 0.8905356526374817, "learning_rate": 0.0001, "loss": 0.0413, "step": 33490 }, { "epoch": 95.17045454545455, "grad_norm": 0.8237714171409607, "learning_rate": 0.0001, "loss": 0.0406, "step": 33500 }, { "epoch": 95.19886363636364, "grad_norm": 0.9115204811096191, "learning_rate": 0.0001, "loss": 0.0411, "step": 33510 }, { "epoch": 95.22727272727273, "grad_norm": 0.7420920729637146, "learning_rate": 0.0001, "loss": 0.0405, "step": 33520 }, { "epoch": 95.25568181818181, "grad_norm": 0.6619880199432373, "learning_rate": 0.0001, "loss": 0.0426, "step": 33530 }, { "epoch": 95.2840909090909, "grad_norm": 0.7541500926017761, "learning_rate": 0.0001, "loss": 0.0411, "step": 33540 }, { "epoch": 95.3125, "grad_norm": 0.6301935911178589, "learning_rate": 0.0001, "loss": 0.0402, "step": 33550 }, { "epoch": 95.3409090909091, "grad_norm": 0.677110493183136, "learning_rate": 0.0001, "loss": 0.0415, "step": 33560 }, { "epoch": 95.36931818181819, "grad_norm": 0.6058023571968079, "learning_rate": 0.0001, "loss": 0.0402, "step": 33570 }, { "epoch": 95.39772727272727, "grad_norm": 0.6188281774520874, "learning_rate": 0.0001, "loss": 0.0403, "step": 33580 }, { "epoch": 95.42613636363636, "grad_norm": 0.6876928210258484, "learning_rate": 0.0001, "loss": 0.0393, "step": 33590 }, { "epoch": 95.45454545454545, "grad_norm": 0.7519726753234863, "learning_rate": 0.0001, "loss": 0.0396, "step": 33600 }, { "epoch": 95.48295454545455, "grad_norm": 0.9113184809684753, "learning_rate": 0.0001, "loss": 0.0379, "step": 33610 }, { "epoch": 95.51136363636364, "grad_norm": 0.6155601739883423, "learning_rate": 0.0001, "loss": 0.0407, "step": 33620 }, { "epoch": 95.53977272727273, "grad_norm": 0.5960917472839355, "learning_rate": 0.0001, "loss": 0.0391, "step": 33630 }, { "epoch": 95.56818181818181, "grad_norm": 0.849075198173523, "learning_rate": 0.0001, "loss": 0.0389, "step": 33640 }, { "epoch": 95.5965909090909, "grad_norm": 0.6028590798377991, "learning_rate": 0.0001, "loss": 0.0405, "step": 33650 }, { "epoch": 95.625, "grad_norm": 0.7840140461921692, "learning_rate": 0.0001, "loss": 0.0392, "step": 33660 }, { "epoch": 95.6534090909091, "grad_norm": 1.1838630437850952, "learning_rate": 0.0001, "loss": 0.0399, "step": 33670 }, { "epoch": 95.68181818181819, "grad_norm": 1.1603631973266602, "learning_rate": 0.0001, "loss": 0.0392, "step": 33680 }, { "epoch": 95.71022727272727, "grad_norm": 1.3737505674362183, "learning_rate": 0.0001, "loss": 0.0423, "step": 33690 }, { "epoch": 95.73863636363636, "grad_norm": 1.0241883993148804, "learning_rate": 0.0001, "loss": 0.0396, "step": 33700 }, { "epoch": 95.76704545454545, "grad_norm": 1.056270718574524, "learning_rate": 0.0001, "loss": 0.0385, "step": 33710 }, { "epoch": 95.79545454545455, "grad_norm": 0.6747000217437744, "learning_rate": 0.0001, "loss": 0.0395, "step": 33720 }, { "epoch": 95.82386363636364, "grad_norm": 0.7106748819351196, "learning_rate": 0.0001, "loss": 0.0401, "step": 33730 }, { "epoch": 95.85227272727273, "grad_norm": 0.7365654706954956, "learning_rate": 0.0001, "loss": 0.0404, "step": 33740 }, { "epoch": 95.88068181818181, "grad_norm": 0.7549445033073425, "learning_rate": 0.0001, "loss": 0.0407, "step": 33750 }, { "epoch": 95.9090909090909, "grad_norm": 0.8147190809249878, "learning_rate": 0.0001, "loss": 0.0405, "step": 33760 }, { "epoch": 95.9375, "grad_norm": 0.7287954092025757, "learning_rate": 0.0001, "loss": 0.0393, "step": 33770 }, { "epoch": 95.9659090909091, "grad_norm": 0.771390974521637, "learning_rate": 0.0001, "loss": 0.0389, "step": 33780 }, { "epoch": 95.99431818181819, "grad_norm": 1.2401736974716187, "learning_rate": 0.0001, "loss": 0.0397, "step": 33790 }, { "epoch": 96.02272727272727, "grad_norm": 0.8751268982887268, "learning_rate": 0.0001, "loss": 0.04, "step": 33800 }, { "epoch": 96.05113636363636, "grad_norm": 0.7138180732727051, "learning_rate": 0.0001, "loss": 0.0402, "step": 33810 }, { "epoch": 96.07954545454545, "grad_norm": 0.7193799614906311, "learning_rate": 0.0001, "loss": 0.0382, "step": 33820 }, { "epoch": 96.10795454545455, "grad_norm": 0.8099432587623596, "learning_rate": 0.0001, "loss": 0.0381, "step": 33830 }, { "epoch": 96.13636363636364, "grad_norm": 0.76226407289505, "learning_rate": 0.0001, "loss": 0.04, "step": 33840 }, { "epoch": 96.16477272727273, "grad_norm": 0.6789332628250122, "learning_rate": 0.0001, "loss": 0.0392, "step": 33850 }, { "epoch": 96.19318181818181, "grad_norm": 0.6385184526443481, "learning_rate": 0.0001, "loss": 0.039, "step": 33860 }, { "epoch": 96.2215909090909, "grad_norm": 0.6390976309776306, "learning_rate": 0.0001, "loss": 0.0384, "step": 33870 }, { "epoch": 96.25, "grad_norm": 0.597517192363739, "learning_rate": 0.0001, "loss": 0.0379, "step": 33880 }, { "epoch": 96.2784090909091, "grad_norm": 0.7059200406074524, "learning_rate": 0.0001, "loss": 0.0391, "step": 33890 }, { "epoch": 96.30681818181819, "grad_norm": 0.7767623066902161, "learning_rate": 0.0001, "loss": 0.0399, "step": 33900 }, { "epoch": 96.33522727272727, "grad_norm": 0.5218889117240906, "learning_rate": 0.0001, "loss": 0.0405, "step": 33910 }, { "epoch": 96.36363636363636, "grad_norm": 0.6701022386550903, "learning_rate": 0.0001, "loss": 0.0399, "step": 33920 }, { "epoch": 96.39204545454545, "grad_norm": 0.7549053430557251, "learning_rate": 0.0001, "loss": 0.0402, "step": 33930 }, { "epoch": 96.42045454545455, "grad_norm": 0.7081325650215149, "learning_rate": 0.0001, "loss": 0.0394, "step": 33940 }, { "epoch": 96.44886363636364, "grad_norm": 0.7790707349777222, "learning_rate": 0.0001, "loss": 0.0415, "step": 33950 }, { "epoch": 96.47727272727273, "grad_norm": 0.6598635315895081, "learning_rate": 0.0001, "loss": 0.0391, "step": 33960 }, { "epoch": 96.50568181818181, "grad_norm": 0.6724303364753723, "learning_rate": 0.0001, "loss": 0.0399, "step": 33970 }, { "epoch": 96.5340909090909, "grad_norm": 0.5733104944229126, "learning_rate": 0.0001, "loss": 0.0419, "step": 33980 }, { "epoch": 96.5625, "grad_norm": 0.5401538610458374, "learning_rate": 0.0001, "loss": 0.0388, "step": 33990 }, { "epoch": 96.5909090909091, "grad_norm": 0.614717423915863, "learning_rate": 0.0001, "loss": 0.0391, "step": 34000 }, { "epoch": 96.61931818181819, "grad_norm": 0.5657342672348022, "learning_rate": 0.0001, "loss": 0.0386, "step": 34010 }, { "epoch": 96.64772727272727, "grad_norm": 1.142454981803894, "learning_rate": 0.0001, "loss": 0.0382, "step": 34020 }, { "epoch": 96.67613636363636, "grad_norm": 1.0728636980056763, "learning_rate": 0.0001, "loss": 0.0411, "step": 34030 }, { "epoch": 96.70454545454545, "grad_norm": 1.2795010805130005, "learning_rate": 0.0001, "loss": 0.0398, "step": 34040 }, { "epoch": 96.73295454545455, "grad_norm": 1.0543229579925537, "learning_rate": 0.0001, "loss": 0.0387, "step": 34050 }, { "epoch": 96.76136363636364, "grad_norm": 1.0960166454315186, "learning_rate": 0.0001, "loss": 0.04, "step": 34060 }, { "epoch": 96.78977272727273, "grad_norm": 1.0084160566329956, "learning_rate": 0.0001, "loss": 0.0391, "step": 34070 }, { "epoch": 96.81818181818181, "grad_norm": 0.9923943281173706, "learning_rate": 0.0001, "loss": 0.0379, "step": 34080 }, { "epoch": 96.8465909090909, "grad_norm": 0.8087642788887024, "learning_rate": 0.0001, "loss": 0.0391, "step": 34090 }, { "epoch": 96.875, "grad_norm": 1.208733081817627, "learning_rate": 0.0001, "loss": 0.0391, "step": 34100 }, { "epoch": 96.9034090909091, "grad_norm": 0.9292431473731995, "learning_rate": 0.0001, "loss": 0.0403, "step": 34110 }, { "epoch": 96.93181818181819, "grad_norm": 0.8942785263061523, "learning_rate": 0.0001, "loss": 0.0397, "step": 34120 }, { "epoch": 96.96022727272727, "grad_norm": 0.863674521446228, "learning_rate": 0.0001, "loss": 0.0388, "step": 34130 }, { "epoch": 96.98863636363636, "grad_norm": 1.0691416263580322, "learning_rate": 0.0001, "loss": 0.0379, "step": 34140 }, { "epoch": 97.01704545454545, "grad_norm": 0.5712941884994507, "learning_rate": 0.0001, "loss": 0.0384, "step": 34150 }, { "epoch": 97.04545454545455, "grad_norm": 0.6319429278373718, "learning_rate": 0.0001, "loss": 0.0382, "step": 34160 }, { "epoch": 97.07386363636364, "grad_norm": 0.9614266157150269, "learning_rate": 0.0001, "loss": 0.0378, "step": 34170 }, { "epoch": 97.10227272727273, "grad_norm": 0.7226883769035339, "learning_rate": 0.0001, "loss": 0.0393, "step": 34180 }, { "epoch": 97.13068181818181, "grad_norm": 0.7708411812782288, "learning_rate": 0.0001, "loss": 0.0396, "step": 34190 }, { "epoch": 97.1590909090909, "grad_norm": 0.7161945104598999, "learning_rate": 0.0001, "loss": 0.0392, "step": 34200 }, { "epoch": 97.1875, "grad_norm": 0.650200366973877, "learning_rate": 0.0001, "loss": 0.0406, "step": 34210 }, { "epoch": 97.2159090909091, "grad_norm": 0.6397100687026978, "learning_rate": 0.0001, "loss": 0.039, "step": 34220 }, { "epoch": 97.24431818181819, "grad_norm": 0.7142146825790405, "learning_rate": 0.0001, "loss": 0.0387, "step": 34230 }, { "epoch": 97.27272727272727, "grad_norm": 0.6408595442771912, "learning_rate": 0.0001, "loss": 0.0388, "step": 34240 }, { "epoch": 97.30113636363636, "grad_norm": 0.6152955889701843, "learning_rate": 0.0001, "loss": 0.0387, "step": 34250 }, { "epoch": 97.32954545454545, "grad_norm": 0.7230101227760315, "learning_rate": 0.0001, "loss": 0.0381, "step": 34260 }, { "epoch": 97.35795454545455, "grad_norm": 0.945869505405426, "learning_rate": 0.0001, "loss": 0.0387, "step": 34270 }, { "epoch": 97.38636363636364, "grad_norm": 0.9501050114631653, "learning_rate": 0.0001, "loss": 0.0399, "step": 34280 }, { "epoch": 97.41477272727273, "grad_norm": 0.9380021691322327, "learning_rate": 0.0001, "loss": 0.0402, "step": 34290 }, { "epoch": 97.44318181818181, "grad_norm": 1.3023756742477417, "learning_rate": 0.0001, "loss": 0.0396, "step": 34300 }, { "epoch": 97.4715909090909, "grad_norm": 1.0887079238891602, "learning_rate": 0.0001, "loss": 0.0402, "step": 34310 }, { "epoch": 97.5, "grad_norm": 1.2671388387680054, "learning_rate": 0.0001, "loss": 0.0406, "step": 34320 }, { "epoch": 97.5284090909091, "grad_norm": 0.7356063723564148, "learning_rate": 0.0001, "loss": 0.0402, "step": 34330 }, { "epoch": 97.55681818181819, "grad_norm": 1.0338929891586304, "learning_rate": 0.0001, "loss": 0.0394, "step": 34340 }, { "epoch": 97.58522727272727, "grad_norm": 1.080224633216858, "learning_rate": 0.0001, "loss": 0.0393, "step": 34350 }, { "epoch": 97.61363636363636, "grad_norm": 1.2610585689544678, "learning_rate": 0.0001, "loss": 0.0413, "step": 34360 }, { "epoch": 97.64204545454545, "grad_norm": 1.3167310953140259, "learning_rate": 0.0001, "loss": 0.0409, "step": 34370 }, { "epoch": 97.67045454545455, "grad_norm": 1.0706456899642944, "learning_rate": 0.0001, "loss": 0.0402, "step": 34380 }, { "epoch": 97.69886363636364, "grad_norm": 0.8218298554420471, "learning_rate": 0.0001, "loss": 0.0387, "step": 34390 }, { "epoch": 97.72727272727273, "grad_norm": 1.1640470027923584, "learning_rate": 0.0001, "loss": 0.0386, "step": 34400 }, { "epoch": 97.75568181818181, "grad_norm": 0.8588812947273254, "learning_rate": 0.0001, "loss": 0.0394, "step": 34410 }, { "epoch": 97.7840909090909, "grad_norm": 0.892999529838562, "learning_rate": 0.0001, "loss": 0.0379, "step": 34420 }, { "epoch": 97.8125, "grad_norm": 0.7717360258102417, "learning_rate": 0.0001, "loss": 0.0397, "step": 34430 }, { "epoch": 97.8409090909091, "grad_norm": 0.8617984652519226, "learning_rate": 0.0001, "loss": 0.0404, "step": 34440 }, { "epoch": 97.86931818181819, "grad_norm": 0.8200704455375671, "learning_rate": 0.0001, "loss": 0.0394, "step": 34450 }, { "epoch": 97.89772727272727, "grad_norm": 0.6376478672027588, "learning_rate": 0.0001, "loss": 0.0391, "step": 34460 }, { "epoch": 97.92613636363636, "grad_norm": 0.9599566459655762, "learning_rate": 0.0001, "loss": 0.0386, "step": 34470 }, { "epoch": 97.95454545454545, "grad_norm": 0.7544838786125183, "learning_rate": 0.0001, "loss": 0.0401, "step": 34480 }, { "epoch": 97.98295454545455, "grad_norm": 0.6836613416671753, "learning_rate": 0.0001, "loss": 0.0393, "step": 34490 }, { "epoch": 98.01136363636364, "grad_norm": 0.806623101234436, "learning_rate": 0.0001, "loss": 0.0403, "step": 34500 }, { "epoch": 98.03977272727273, "grad_norm": 0.9929761290550232, "learning_rate": 0.0001, "loss": 0.0404, "step": 34510 }, { "epoch": 98.06818181818181, "grad_norm": 0.7506119012832642, "learning_rate": 0.0001, "loss": 0.0397, "step": 34520 }, { "epoch": 98.0965909090909, "grad_norm": 0.7990569472312927, "learning_rate": 0.0001, "loss": 0.0409, "step": 34530 }, { "epoch": 98.125, "grad_norm": 0.8026459813117981, "learning_rate": 0.0001, "loss": 0.0403, "step": 34540 }, { "epoch": 98.1534090909091, "grad_norm": 0.7655097842216492, "learning_rate": 0.0001, "loss": 0.0403, "step": 34550 }, { "epoch": 98.18181818181819, "grad_norm": 0.626440703868866, "learning_rate": 0.0001, "loss": 0.0398, "step": 34560 }, { "epoch": 98.21022727272727, "grad_norm": 0.5965021252632141, "learning_rate": 0.0001, "loss": 0.04, "step": 34570 }, { "epoch": 98.23863636363636, "grad_norm": 0.6253120303153992, "learning_rate": 0.0001, "loss": 0.0405, "step": 34580 }, { "epoch": 98.26704545454545, "grad_norm": 0.728787362575531, "learning_rate": 0.0001, "loss": 0.0396, "step": 34590 }, { "epoch": 98.29545454545455, "grad_norm": 0.6482828259468079, "learning_rate": 0.0001, "loss": 0.0403, "step": 34600 }, { "epoch": 98.32386363636364, "grad_norm": 0.6943103671073914, "learning_rate": 0.0001, "loss": 0.0405, "step": 34610 }, { "epoch": 98.35227272727273, "grad_norm": 0.7795711159706116, "learning_rate": 0.0001, "loss": 0.0406, "step": 34620 }, { "epoch": 98.38068181818181, "grad_norm": 0.7584307193756104, "learning_rate": 0.0001, "loss": 0.0401, "step": 34630 }, { "epoch": 98.4090909090909, "grad_norm": 0.583733320236206, "learning_rate": 0.0001, "loss": 0.0392, "step": 34640 }, { "epoch": 98.4375, "grad_norm": 1.0233768224716187, "learning_rate": 0.0001, "loss": 0.0403, "step": 34650 }, { "epoch": 98.4659090909091, "grad_norm": 0.7381771206855774, "learning_rate": 0.0001, "loss": 0.0401, "step": 34660 }, { "epoch": 98.49431818181819, "grad_norm": 1.0933961868286133, "learning_rate": 0.0001, "loss": 0.0406, "step": 34670 }, { "epoch": 98.52272727272727, "grad_norm": 0.8754884600639343, "learning_rate": 0.0001, "loss": 0.0392, "step": 34680 }, { "epoch": 98.55113636363636, "grad_norm": 0.658818781375885, "learning_rate": 0.0001, "loss": 0.0384, "step": 34690 }, { "epoch": 98.57954545454545, "grad_norm": 0.8832113146781921, "learning_rate": 0.0001, "loss": 0.0387, "step": 34700 }, { "epoch": 98.60795454545455, "grad_norm": 0.6543091535568237, "learning_rate": 0.0001, "loss": 0.04, "step": 34710 }, { "epoch": 98.63636363636364, "grad_norm": 0.5386546850204468, "learning_rate": 0.0001, "loss": 0.0378, "step": 34720 }, { "epoch": 98.66477272727273, "grad_norm": 0.4984776973724365, "learning_rate": 0.0001, "loss": 0.0381, "step": 34730 }, { "epoch": 98.69318181818181, "grad_norm": 0.5178702473640442, "learning_rate": 0.0001, "loss": 0.04, "step": 34740 }, { "epoch": 98.7215909090909, "grad_norm": 0.7301501035690308, "learning_rate": 0.0001, "loss": 0.0389, "step": 34750 }, { "epoch": 98.75, "grad_norm": 0.5040386319160461, "learning_rate": 0.0001, "loss": 0.0389, "step": 34760 }, { "epoch": 98.7784090909091, "grad_norm": 0.4964589774608612, "learning_rate": 0.0001, "loss": 0.0391, "step": 34770 }, { "epoch": 98.80681818181819, "grad_norm": 0.6709886193275452, "learning_rate": 0.0001, "loss": 0.038, "step": 34780 }, { "epoch": 98.83522727272727, "grad_norm": 0.6795845031738281, "learning_rate": 0.0001, "loss": 0.0392, "step": 34790 }, { "epoch": 98.86363636363636, "grad_norm": 0.6201198101043701, "learning_rate": 0.0001, "loss": 0.0386, "step": 34800 }, { "epoch": 98.89204545454545, "grad_norm": 0.5602060556411743, "learning_rate": 0.0001, "loss": 0.0374, "step": 34810 }, { "epoch": 98.92045454545455, "grad_norm": 0.6460253000259399, "learning_rate": 0.0001, "loss": 0.0399, "step": 34820 }, { "epoch": 98.94886363636364, "grad_norm": 0.6049633026123047, "learning_rate": 0.0001, "loss": 0.0407, "step": 34830 }, { "epoch": 98.97727272727273, "grad_norm": 0.6341941356658936, "learning_rate": 0.0001, "loss": 0.0388, "step": 34840 }, { "epoch": 99.00568181818181, "grad_norm": 0.6305556893348694, "learning_rate": 0.0001, "loss": 0.0414, "step": 34850 }, { "epoch": 99.0340909090909, "grad_norm": 0.5669991970062256, "learning_rate": 0.0001, "loss": 0.0399, "step": 34860 }, { "epoch": 99.0625, "grad_norm": 0.6804741024971008, "learning_rate": 0.0001, "loss": 0.0387, "step": 34870 }, { "epoch": 99.0909090909091, "grad_norm": 0.6002970933914185, "learning_rate": 0.0001, "loss": 0.0391, "step": 34880 }, { "epoch": 99.11931818181819, "grad_norm": 0.6289021968841553, "learning_rate": 0.0001, "loss": 0.0395, "step": 34890 }, { "epoch": 99.14772727272727, "grad_norm": 0.5393396019935608, "learning_rate": 0.0001, "loss": 0.0395, "step": 34900 }, { "epoch": 99.17613636363636, "grad_norm": 0.6519724130630493, "learning_rate": 0.0001, "loss": 0.0402, "step": 34910 }, { "epoch": 99.20454545454545, "grad_norm": 0.6385529637336731, "learning_rate": 0.0001, "loss": 0.0392, "step": 34920 }, { "epoch": 99.23295454545455, "grad_norm": 0.7283846139907837, "learning_rate": 0.0001, "loss": 0.0415, "step": 34930 }, { "epoch": 99.26136363636364, "grad_norm": 0.5138035416603088, "learning_rate": 0.0001, "loss": 0.0379, "step": 34940 }, { "epoch": 99.28977272727273, "grad_norm": 0.6235365867614746, "learning_rate": 0.0001, "loss": 0.0383, "step": 34950 }, { "epoch": 99.31818181818181, "grad_norm": 0.6972271800041199, "learning_rate": 0.0001, "loss": 0.0392, "step": 34960 }, { "epoch": 99.3465909090909, "grad_norm": 0.7025635242462158, "learning_rate": 0.0001, "loss": 0.0403, "step": 34970 }, { "epoch": 99.375, "grad_norm": 0.5961763858795166, "learning_rate": 0.0001, "loss": 0.0396, "step": 34980 }, { "epoch": 99.4034090909091, "grad_norm": 0.6492330431938171, "learning_rate": 0.0001, "loss": 0.0394, "step": 34990 }, { "epoch": 99.43181818181819, "grad_norm": 0.6340757012367249, "learning_rate": 0.0001, "loss": 0.039, "step": 35000 }, { "epoch": 99.46022727272727, "grad_norm": 0.6484765410423279, "learning_rate": 0.0001, "loss": 0.039, "step": 35010 }, { "epoch": 99.48863636363636, "grad_norm": 0.5331886410713196, "learning_rate": 0.0001, "loss": 0.038, "step": 35020 }, { "epoch": 99.51704545454545, "grad_norm": 0.4786685109138489, "learning_rate": 0.0001, "loss": 0.0379, "step": 35030 }, { "epoch": 99.54545454545455, "grad_norm": 0.5610359311103821, "learning_rate": 0.0001, "loss": 0.0388, "step": 35040 }, { "epoch": 99.57386363636364, "grad_norm": 0.5391741394996643, "learning_rate": 0.0001, "loss": 0.039, "step": 35050 }, { "epoch": 99.60227272727273, "grad_norm": 0.610522985458374, "learning_rate": 0.0001, "loss": 0.0389, "step": 35060 }, { "epoch": 99.63068181818181, "grad_norm": 0.6739444732666016, "learning_rate": 0.0001, "loss": 0.0393, "step": 35070 }, { "epoch": 99.6590909090909, "grad_norm": 0.5937843918800354, "learning_rate": 0.0001, "loss": 0.0398, "step": 35080 }, { "epoch": 99.6875, "grad_norm": 0.9213070869445801, "learning_rate": 0.0001, "loss": 0.0389, "step": 35090 }, { "epoch": 99.7159090909091, "grad_norm": 1.3140711784362793, "learning_rate": 0.0001, "loss": 0.0376, "step": 35100 }, { "epoch": 99.74431818181819, "grad_norm": 1.2353553771972656, "learning_rate": 0.0001, "loss": 0.0391, "step": 35110 }, { "epoch": 99.77272727272727, "grad_norm": 0.7020501494407654, "learning_rate": 0.0001, "loss": 0.0375, "step": 35120 }, { "epoch": 99.80113636363636, "grad_norm": 0.8266453742980957, "learning_rate": 0.0001, "loss": 0.039, "step": 35130 }, { "epoch": 99.82954545454545, "grad_norm": 0.7972448468208313, "learning_rate": 0.0001, "loss": 0.0382, "step": 35140 }, { "epoch": 99.85795454545455, "grad_norm": 0.7136993408203125, "learning_rate": 0.0001, "loss": 0.0379, "step": 35150 }, { "epoch": 99.88636363636364, "grad_norm": 0.794268012046814, "learning_rate": 0.0001, "loss": 0.0386, "step": 35160 }, { "epoch": 99.91477272727273, "grad_norm": 0.7449502348899841, "learning_rate": 0.0001, "loss": 0.0381, "step": 35170 }, { "epoch": 99.94318181818181, "grad_norm": 0.5898183584213257, "learning_rate": 0.0001, "loss": 0.0377, "step": 35180 }, { "epoch": 99.9715909090909, "grad_norm": 0.5253759622573853, "learning_rate": 0.0001, "loss": 0.0382, "step": 35190 }, { "epoch": 100.0, "grad_norm": 0.5536308288574219, "learning_rate": 0.0001, "loss": 0.0381, "step": 35200 }, { "epoch": 100.0284090909091, "grad_norm": 0.712748110294342, "learning_rate": 0.0001, "loss": 0.0381, "step": 35210 }, { "epoch": 100.05681818181819, "grad_norm": 0.6205607056617737, "learning_rate": 0.0001, "loss": 0.0382, "step": 35220 }, { "epoch": 100.08522727272727, "grad_norm": 0.8103065490722656, "learning_rate": 0.0001, "loss": 0.0375, "step": 35230 }, { "epoch": 100.11363636363636, "grad_norm": 0.8175324201583862, "learning_rate": 0.0001, "loss": 0.0373, "step": 35240 }, { "epoch": 100.14204545454545, "grad_norm": 0.6265504956245422, "learning_rate": 0.0001, "loss": 0.0373, "step": 35250 }, { "epoch": 100.17045454545455, "grad_norm": 0.7531624436378479, "learning_rate": 0.0001, "loss": 0.0383, "step": 35260 }, { "epoch": 100.19886363636364, "grad_norm": 0.7789162993431091, "learning_rate": 0.0001, "loss": 0.0375, "step": 35270 }, { "epoch": 100.22727272727273, "grad_norm": 0.7431286573410034, "learning_rate": 0.0001, "loss": 0.0374, "step": 35280 }, { "epoch": 100.25568181818181, "grad_norm": 0.6338279247283936, "learning_rate": 0.0001, "loss": 0.0378, "step": 35290 }, { "epoch": 100.2840909090909, "grad_norm": 0.6069151759147644, "learning_rate": 0.0001, "loss": 0.039, "step": 35300 }, { "epoch": 100.3125, "grad_norm": 0.6290576457977295, "learning_rate": 0.0001, "loss": 0.0387, "step": 35310 }, { "epoch": 100.3409090909091, "grad_norm": 0.5072388052940369, "learning_rate": 0.0001, "loss": 0.0386, "step": 35320 }, { "epoch": 100.36931818181819, "grad_norm": 0.9051946401596069, "learning_rate": 0.0001, "loss": 0.0398, "step": 35330 }, { "epoch": 100.39772727272727, "grad_norm": 0.9270437955856323, "learning_rate": 0.0001, "loss": 0.0384, "step": 35340 }, { "epoch": 100.42613636363636, "grad_norm": 1.1097337007522583, "learning_rate": 0.0001, "loss": 0.039, "step": 35350 }, { "epoch": 100.45454545454545, "grad_norm": 0.753572404384613, "learning_rate": 0.0001, "loss": 0.0379, "step": 35360 }, { "epoch": 100.48295454545455, "grad_norm": 0.8269745111465454, "learning_rate": 0.0001, "loss": 0.0374, "step": 35370 }, { "epoch": 100.51136363636364, "grad_norm": 0.9077835083007812, "learning_rate": 0.0001, "loss": 0.0385, "step": 35380 }, { "epoch": 100.53977272727273, "grad_norm": 0.863038182258606, "learning_rate": 0.0001, "loss": 0.0376, "step": 35390 }, { "epoch": 100.56818181818181, "grad_norm": 0.7924647927284241, "learning_rate": 0.0001, "loss": 0.0387, "step": 35400 }, { "epoch": 100.5965909090909, "grad_norm": 0.8401536345481873, "learning_rate": 0.0001, "loss": 0.0376, "step": 35410 }, { "epoch": 100.625, "grad_norm": 0.8418139219284058, "learning_rate": 0.0001, "loss": 0.0378, "step": 35420 }, { "epoch": 100.6534090909091, "grad_norm": 0.8648441433906555, "learning_rate": 0.0001, "loss": 0.0393, "step": 35430 }, { "epoch": 100.68181818181819, "grad_norm": 0.7955145239830017, "learning_rate": 0.0001, "loss": 0.0395, "step": 35440 }, { "epoch": 100.71022727272727, "grad_norm": 0.759017825126648, "learning_rate": 0.0001, "loss": 0.0384, "step": 35450 }, { "epoch": 100.73863636363636, "grad_norm": 0.7723873853683472, "learning_rate": 0.0001, "loss": 0.0379, "step": 35460 }, { "epoch": 100.76704545454545, "grad_norm": 0.5847647190093994, "learning_rate": 0.0001, "loss": 0.039, "step": 35470 }, { "epoch": 100.79545454545455, "grad_norm": 0.7090848684310913, "learning_rate": 0.0001, "loss": 0.0377, "step": 35480 }, { "epoch": 100.82386363636364, "grad_norm": 0.5850080847740173, "learning_rate": 0.0001, "loss": 0.0378, "step": 35490 }, { "epoch": 100.85227272727273, "grad_norm": 0.6634331345558167, "learning_rate": 0.0001, "loss": 0.0375, "step": 35500 }, { "epoch": 100.88068181818181, "grad_norm": 0.5659244060516357, "learning_rate": 0.0001, "loss": 0.0377, "step": 35510 }, { "epoch": 100.9090909090909, "grad_norm": 0.6139445304870605, "learning_rate": 0.0001, "loss": 0.0386, "step": 35520 }, { "epoch": 100.9375, "grad_norm": 0.5992445945739746, "learning_rate": 0.0001, "loss": 0.0369, "step": 35530 }, { "epoch": 100.9659090909091, "grad_norm": 0.705426037311554, "learning_rate": 0.0001, "loss": 0.0385, "step": 35540 }, { "epoch": 100.99431818181819, "grad_norm": 0.8458812832832336, "learning_rate": 0.0001, "loss": 0.0381, "step": 35550 }, { "epoch": 101.02272727272727, "grad_norm": 0.8477011919021606, "learning_rate": 0.0001, "loss": 0.0383, "step": 35560 }, { "epoch": 101.05113636363636, "grad_norm": 0.7048535346984863, "learning_rate": 0.0001, "loss": 0.038, "step": 35570 }, { "epoch": 101.07954545454545, "grad_norm": 0.9626438617706299, "learning_rate": 0.0001, "loss": 0.0372, "step": 35580 }, { "epoch": 101.10795454545455, "grad_norm": 0.8232958912849426, "learning_rate": 0.0001, "loss": 0.0377, "step": 35590 }, { "epoch": 101.13636363636364, "grad_norm": 1.2208539247512817, "learning_rate": 0.0001, "loss": 0.0388, "step": 35600 }, { "epoch": 101.16477272727273, "grad_norm": 0.8574521541595459, "learning_rate": 0.0001, "loss": 0.038, "step": 35610 }, { "epoch": 101.19318181818181, "grad_norm": 0.536631166934967, "learning_rate": 0.0001, "loss": 0.0388, "step": 35620 }, { "epoch": 101.2215909090909, "grad_norm": 0.6252682209014893, "learning_rate": 0.0001, "loss": 0.0382, "step": 35630 }, { "epoch": 101.25, "grad_norm": 0.6429247260093689, "learning_rate": 0.0001, "loss": 0.0384, "step": 35640 }, { "epoch": 101.2784090909091, "grad_norm": 0.6166262626647949, "learning_rate": 0.0001, "loss": 0.0382, "step": 35650 }, { "epoch": 101.30681818181819, "grad_norm": 0.6511677503585815, "learning_rate": 0.0001, "loss": 0.0377, "step": 35660 }, { "epoch": 101.33522727272727, "grad_norm": 0.6906519532203674, "learning_rate": 0.0001, "loss": 0.038, "step": 35670 }, { "epoch": 101.36363636363636, "grad_norm": 0.6717009544372559, "learning_rate": 0.0001, "loss": 0.0389, "step": 35680 }, { "epoch": 101.39204545454545, "grad_norm": 0.6030964255332947, "learning_rate": 0.0001, "loss": 0.0407, "step": 35690 }, { "epoch": 101.42045454545455, "grad_norm": 0.6418792009353638, "learning_rate": 0.0001, "loss": 0.0394, "step": 35700 }, { "epoch": 101.44886363636364, "grad_norm": 0.7006118297576904, "learning_rate": 0.0001, "loss": 0.0394, "step": 35710 }, { "epoch": 101.47727272727273, "grad_norm": 0.7065404653549194, "learning_rate": 0.0001, "loss": 0.0379, "step": 35720 }, { "epoch": 101.50568181818181, "grad_norm": 0.7001347541809082, "learning_rate": 0.0001, "loss": 0.0382, "step": 35730 }, { "epoch": 101.5340909090909, "grad_norm": 0.7850422859191895, "learning_rate": 0.0001, "loss": 0.039, "step": 35740 }, { "epoch": 101.5625, "grad_norm": 0.6983082294464111, "learning_rate": 0.0001, "loss": 0.039, "step": 35750 }, { "epoch": 101.5909090909091, "grad_norm": 0.6168388724327087, "learning_rate": 0.0001, "loss": 0.0376, "step": 35760 }, { "epoch": 101.61931818181819, "grad_norm": 0.6485419869422913, "learning_rate": 0.0001, "loss": 0.0373, "step": 35770 }, { "epoch": 101.64772727272727, "grad_norm": 0.694078803062439, "learning_rate": 0.0001, "loss": 0.0384, "step": 35780 }, { "epoch": 101.67613636363636, "grad_norm": 0.6054084300994873, "learning_rate": 0.0001, "loss": 0.0374, "step": 35790 }, { "epoch": 101.70454545454545, "grad_norm": 0.7031105756759644, "learning_rate": 0.0001, "loss": 0.0387, "step": 35800 }, { "epoch": 101.73295454545455, "grad_norm": 0.6772159337997437, "learning_rate": 0.0001, "loss": 0.0374, "step": 35810 }, { "epoch": 101.76136363636364, "grad_norm": 0.742162823677063, "learning_rate": 0.0001, "loss": 0.0363, "step": 35820 }, { "epoch": 101.78977272727273, "grad_norm": 0.6221441626548767, "learning_rate": 0.0001, "loss": 0.0385, "step": 35830 }, { "epoch": 101.81818181818181, "grad_norm": 0.5692674517631531, "learning_rate": 0.0001, "loss": 0.0386, "step": 35840 }, { "epoch": 101.8465909090909, "grad_norm": 0.5200670957565308, "learning_rate": 0.0001, "loss": 0.0383, "step": 35850 }, { "epoch": 101.875, "grad_norm": 0.44014811515808105, "learning_rate": 0.0001, "loss": 0.0384, "step": 35860 }, { "epoch": 101.9034090909091, "grad_norm": 0.5131399035453796, "learning_rate": 0.0001, "loss": 0.0378, "step": 35870 }, { "epoch": 101.93181818181819, "grad_norm": 0.7803551554679871, "learning_rate": 0.0001, "loss": 0.0386, "step": 35880 }, { "epoch": 101.96022727272727, "grad_norm": 1.0714529752731323, "learning_rate": 0.0001, "loss": 0.0381, "step": 35890 }, { "epoch": 101.98863636363636, "grad_norm": 0.8870387077331543, "learning_rate": 0.0001, "loss": 0.0366, "step": 35900 }, { "epoch": 102.01704545454545, "grad_norm": 0.814504861831665, "learning_rate": 0.0001, "loss": 0.0372, "step": 35910 }, { "epoch": 102.04545454545455, "grad_norm": 1.2375514507293701, "learning_rate": 0.0001, "loss": 0.0383, "step": 35920 }, { "epoch": 102.07386363636364, "grad_norm": 1.0883283615112305, "learning_rate": 0.0001, "loss": 0.0385, "step": 35930 }, { "epoch": 102.10227272727273, "grad_norm": 0.7217296361923218, "learning_rate": 0.0001, "loss": 0.0378, "step": 35940 }, { "epoch": 102.13068181818181, "grad_norm": 0.9220253229141235, "learning_rate": 0.0001, "loss": 0.036, "step": 35950 }, { "epoch": 102.1590909090909, "grad_norm": 0.9065287113189697, "learning_rate": 0.0001, "loss": 0.0387, "step": 35960 }, { "epoch": 102.1875, "grad_norm": 1.2304972410202026, "learning_rate": 0.0001, "loss": 0.0372, "step": 35970 }, { "epoch": 102.2159090909091, "grad_norm": 0.9404845833778381, "learning_rate": 0.0001, "loss": 0.0372, "step": 35980 }, { "epoch": 102.24431818181819, "grad_norm": 0.9212031960487366, "learning_rate": 0.0001, "loss": 0.0362, "step": 35990 }, { "epoch": 102.27272727272727, "grad_norm": 0.7656883597373962, "learning_rate": 0.0001, "loss": 0.038, "step": 36000 }, { "epoch": 102.30113636363636, "grad_norm": 0.7965297102928162, "learning_rate": 0.0001, "loss": 0.0382, "step": 36010 }, { "epoch": 102.32954545454545, "grad_norm": 0.6216439604759216, "learning_rate": 0.0001, "loss": 0.0379, "step": 36020 }, { "epoch": 102.35795454545455, "grad_norm": 0.6738339066505432, "learning_rate": 0.0001, "loss": 0.0389, "step": 36030 }, { "epoch": 102.38636363636364, "grad_norm": 0.6152580380439758, "learning_rate": 0.0001, "loss": 0.0383, "step": 36040 }, { "epoch": 102.41477272727273, "grad_norm": 0.6139420866966248, "learning_rate": 0.0001, "loss": 0.0379, "step": 36050 }, { "epoch": 102.44318181818181, "grad_norm": 0.5595870614051819, "learning_rate": 0.0001, "loss": 0.0378, "step": 36060 }, { "epoch": 102.4715909090909, "grad_norm": 0.721856951713562, "learning_rate": 0.0001, "loss": 0.0369, "step": 36070 }, { "epoch": 102.5, "grad_norm": 0.7998674511909485, "learning_rate": 0.0001, "loss": 0.0372, "step": 36080 }, { "epoch": 102.5284090909091, "grad_norm": 0.8735951781272888, "learning_rate": 0.0001, "loss": 0.0384, "step": 36090 }, { "epoch": 102.55681818181819, "grad_norm": 0.7308524250984192, "learning_rate": 0.0001, "loss": 0.0388, "step": 36100 }, { "epoch": 102.58522727272727, "grad_norm": 0.7623921036720276, "learning_rate": 0.0001, "loss": 0.0373, "step": 36110 }, { "epoch": 102.61363636363636, "grad_norm": 0.8306724429130554, "learning_rate": 0.0001, "loss": 0.0382, "step": 36120 }, { "epoch": 102.64204545454545, "grad_norm": 0.8680564165115356, "learning_rate": 0.0001, "loss": 0.0374, "step": 36130 }, { "epoch": 102.67045454545455, "grad_norm": 0.8162680864334106, "learning_rate": 0.0001, "loss": 0.0382, "step": 36140 }, { "epoch": 102.69886363636364, "grad_norm": 0.6912002563476562, "learning_rate": 0.0001, "loss": 0.0371, "step": 36150 }, { "epoch": 102.72727272727273, "grad_norm": 0.7910269498825073, "learning_rate": 0.0001, "loss": 0.0375, "step": 36160 }, { "epoch": 102.75568181818181, "grad_norm": 0.6541531682014465, "learning_rate": 0.0001, "loss": 0.039, "step": 36170 }, { "epoch": 102.7840909090909, "grad_norm": 0.5540030002593994, "learning_rate": 0.0001, "loss": 0.0384, "step": 36180 }, { "epoch": 102.8125, "grad_norm": 0.6944588422775269, "learning_rate": 0.0001, "loss": 0.0387, "step": 36190 }, { "epoch": 102.8409090909091, "grad_norm": 0.6281765699386597, "learning_rate": 0.0001, "loss": 0.0377, "step": 36200 }, { "epoch": 102.86931818181819, "grad_norm": 0.5334039330482483, "learning_rate": 0.0001, "loss": 0.0374, "step": 36210 }, { "epoch": 102.89772727272727, "grad_norm": 0.6771912574768066, "learning_rate": 0.0001, "loss": 0.038, "step": 36220 }, { "epoch": 102.92613636363636, "grad_norm": 0.5143633484840393, "learning_rate": 0.0001, "loss": 0.0367, "step": 36230 }, { "epoch": 102.95454545454545, "grad_norm": 0.5875553488731384, "learning_rate": 0.0001, "loss": 0.0385, "step": 36240 }, { "epoch": 102.98295454545455, "grad_norm": 0.5696831941604614, "learning_rate": 0.0001, "loss": 0.038, "step": 36250 }, { "epoch": 103.01136363636364, "grad_norm": 0.6309694647789001, "learning_rate": 0.0001, "loss": 0.0377, "step": 36260 }, { "epoch": 103.03977272727273, "grad_norm": 0.8848923444747925, "learning_rate": 0.0001, "loss": 0.0391, "step": 36270 }, { "epoch": 103.06818181818181, "grad_norm": 0.578700840473175, "learning_rate": 0.0001, "loss": 0.0375, "step": 36280 }, { "epoch": 103.0965909090909, "grad_norm": 0.6599058508872986, "learning_rate": 0.0001, "loss": 0.0388, "step": 36290 }, { "epoch": 103.125, "grad_norm": 0.7933474779129028, "learning_rate": 0.0001, "loss": 0.0375, "step": 36300 }, { "epoch": 103.1534090909091, "grad_norm": 0.8254300355911255, "learning_rate": 0.0001, "loss": 0.0378, "step": 36310 }, { "epoch": 103.18181818181819, "grad_norm": 0.6838820576667786, "learning_rate": 0.0001, "loss": 0.0369, "step": 36320 }, { "epoch": 103.21022727272727, "grad_norm": 0.686337947845459, "learning_rate": 0.0001, "loss": 0.0361, "step": 36330 }, { "epoch": 103.23863636363636, "grad_norm": 0.768295168876648, "learning_rate": 0.0001, "loss": 0.0374, "step": 36340 }, { "epoch": 103.26704545454545, "grad_norm": 0.7727903127670288, "learning_rate": 0.0001, "loss": 0.0384, "step": 36350 }, { "epoch": 103.29545454545455, "grad_norm": 0.4438778758049011, "learning_rate": 0.0001, "loss": 0.0363, "step": 36360 }, { "epoch": 103.32386363636364, "grad_norm": 0.5905418395996094, "learning_rate": 0.0001, "loss": 0.0383, "step": 36370 }, { "epoch": 103.35227272727273, "grad_norm": 0.6414166688919067, "learning_rate": 0.0001, "loss": 0.0388, "step": 36380 }, { "epoch": 103.38068181818181, "grad_norm": 0.6797493100166321, "learning_rate": 0.0001, "loss": 0.0386, "step": 36390 }, { "epoch": 103.4090909090909, "grad_norm": 0.653895914554596, "learning_rate": 0.0001, "loss": 0.0398, "step": 36400 }, { "epoch": 103.4375, "grad_norm": 0.6173151731491089, "learning_rate": 0.0001, "loss": 0.0392, "step": 36410 }, { "epoch": 103.4659090909091, "grad_norm": 0.47727423906326294, "learning_rate": 0.0001, "loss": 0.0369, "step": 36420 }, { "epoch": 103.49431818181819, "grad_norm": 0.6933102011680603, "learning_rate": 0.0001, "loss": 0.0385, "step": 36430 }, { "epoch": 103.52272727272727, "grad_norm": 0.6333000063896179, "learning_rate": 0.0001, "loss": 0.0382, "step": 36440 }, { "epoch": 103.55113636363636, "grad_norm": 0.5522160530090332, "learning_rate": 0.0001, "loss": 0.0376, "step": 36450 }, { "epoch": 103.57954545454545, "grad_norm": 0.6237143278121948, "learning_rate": 0.0001, "loss": 0.039, "step": 36460 }, { "epoch": 103.60795454545455, "grad_norm": 0.7558906674385071, "learning_rate": 0.0001, "loss": 0.0395, "step": 36470 }, { "epoch": 103.63636363636364, "grad_norm": 0.5823392868041992, "learning_rate": 0.0001, "loss": 0.0392, "step": 36480 }, { "epoch": 103.66477272727273, "grad_norm": 0.6411099433898926, "learning_rate": 0.0001, "loss": 0.0384, "step": 36490 }, { "epoch": 103.69318181818181, "grad_norm": 0.5531007051467896, "learning_rate": 0.0001, "loss": 0.0371, "step": 36500 }, { "epoch": 103.7215909090909, "grad_norm": 0.5332525968551636, "learning_rate": 0.0001, "loss": 0.0382, "step": 36510 }, { "epoch": 103.75, "grad_norm": 0.6390560269355774, "learning_rate": 0.0001, "loss": 0.0376, "step": 36520 }, { "epoch": 103.7784090909091, "grad_norm": 0.6145923137664795, "learning_rate": 0.0001, "loss": 0.0368, "step": 36530 }, { "epoch": 103.80681818181819, "grad_norm": 0.663539707660675, "learning_rate": 0.0001, "loss": 0.0373, "step": 36540 }, { "epoch": 103.83522727272727, "grad_norm": 0.6167774796485901, "learning_rate": 0.0001, "loss": 0.0391, "step": 36550 }, { "epoch": 103.86363636363636, "grad_norm": 0.7627246975898743, "learning_rate": 0.0001, "loss": 0.0388, "step": 36560 }, { "epoch": 103.89204545454545, "grad_norm": 0.5639641880989075, "learning_rate": 0.0001, "loss": 0.0394, "step": 36570 }, { "epoch": 103.92045454545455, "grad_norm": 0.6381582617759705, "learning_rate": 0.0001, "loss": 0.0398, "step": 36580 }, { "epoch": 103.94886363636364, "grad_norm": 0.6323047876358032, "learning_rate": 0.0001, "loss": 0.0391, "step": 36590 }, { "epoch": 103.97727272727273, "grad_norm": 0.5740509629249573, "learning_rate": 0.0001, "loss": 0.0383, "step": 36600 }, { "epoch": 104.00568181818181, "grad_norm": 0.6786510348320007, "learning_rate": 0.0001, "loss": 0.0375, "step": 36610 }, { "epoch": 104.0340909090909, "grad_norm": 0.6328359246253967, "learning_rate": 0.0001, "loss": 0.0377, "step": 36620 }, { "epoch": 104.0625, "grad_norm": 0.5744404792785645, "learning_rate": 0.0001, "loss": 0.0391, "step": 36630 }, { "epoch": 104.0909090909091, "grad_norm": 0.6046051979064941, "learning_rate": 0.0001, "loss": 0.0381, "step": 36640 }, { "epoch": 104.11931818181819, "grad_norm": 0.6407805681228638, "learning_rate": 0.0001, "loss": 0.0387, "step": 36650 }, { "epoch": 104.14772727272727, "grad_norm": 0.8871857523918152, "learning_rate": 0.0001, "loss": 0.038, "step": 36660 }, { "epoch": 104.17613636363636, "grad_norm": 0.8872131705284119, "learning_rate": 0.0001, "loss": 0.0376, "step": 36670 }, { "epoch": 104.20454545454545, "grad_norm": 0.7679946422576904, "learning_rate": 0.0001, "loss": 0.0386, "step": 36680 }, { "epoch": 104.23295454545455, "grad_norm": 0.6552764177322388, "learning_rate": 0.0001, "loss": 0.0381, "step": 36690 }, { "epoch": 104.26136363636364, "grad_norm": 0.8447787761688232, "learning_rate": 0.0001, "loss": 0.039, "step": 36700 }, { "epoch": 104.28977272727273, "grad_norm": 0.6279290318489075, "learning_rate": 0.0001, "loss": 0.0407, "step": 36710 }, { "epoch": 104.31818181818181, "grad_norm": 0.5877067446708679, "learning_rate": 0.0001, "loss": 0.0415, "step": 36720 }, { "epoch": 104.3465909090909, "grad_norm": 0.8654998540878296, "learning_rate": 0.0001, "loss": 0.0408, "step": 36730 }, { "epoch": 104.375, "grad_norm": 0.6628789901733398, "learning_rate": 0.0001, "loss": 0.0415, "step": 36740 }, { "epoch": 104.4034090909091, "grad_norm": 1.0221258401870728, "learning_rate": 0.0001, "loss": 0.0401, "step": 36750 }, { "epoch": 104.43181818181819, "grad_norm": 1.2567273378372192, "learning_rate": 0.0001, "loss": 0.0392, "step": 36760 }, { "epoch": 104.46022727272727, "grad_norm": 1.0356628894805908, "learning_rate": 0.0001, "loss": 0.0404, "step": 36770 }, { "epoch": 104.48863636363636, "grad_norm": 1.1455295085906982, "learning_rate": 0.0001, "loss": 0.0397, "step": 36780 }, { "epoch": 104.51704545454545, "grad_norm": 1.0565097332000732, "learning_rate": 0.0001, "loss": 0.0392, "step": 36790 }, { "epoch": 104.54545454545455, "grad_norm": 0.8333950638771057, "learning_rate": 0.0001, "loss": 0.0389, "step": 36800 }, { "epoch": 104.57386363636364, "grad_norm": 0.8816116452217102, "learning_rate": 0.0001, "loss": 0.0382, "step": 36810 }, { "epoch": 104.60227272727273, "grad_norm": 0.8084827661514282, "learning_rate": 0.0001, "loss": 0.038, "step": 36820 }, { "epoch": 104.63068181818181, "grad_norm": 0.7814130187034607, "learning_rate": 0.0001, "loss": 0.0384, "step": 36830 }, { "epoch": 104.6590909090909, "grad_norm": 0.6202470064163208, "learning_rate": 0.0001, "loss": 0.0383, "step": 36840 }, { "epoch": 104.6875, "grad_norm": 0.6300608515739441, "learning_rate": 0.0001, "loss": 0.038, "step": 36850 }, { "epoch": 104.7159090909091, "grad_norm": 0.6419079303741455, "learning_rate": 0.0001, "loss": 0.0383, "step": 36860 }, { "epoch": 104.74431818181819, "grad_norm": 0.6812740564346313, "learning_rate": 0.0001, "loss": 0.0389, "step": 36870 }, { "epoch": 104.77272727272727, "grad_norm": 0.8751927614212036, "learning_rate": 0.0001, "loss": 0.0398, "step": 36880 }, { "epoch": 104.80113636363636, "grad_norm": 0.694141149520874, "learning_rate": 0.0001, "loss": 0.0394, "step": 36890 }, { "epoch": 104.82954545454545, "grad_norm": 0.6310260891914368, "learning_rate": 0.0001, "loss": 0.0388, "step": 36900 }, { "epoch": 104.85795454545455, "grad_norm": 0.45405858755111694, "learning_rate": 0.0001, "loss": 0.037, "step": 36910 }, { "epoch": 104.88636363636364, "grad_norm": 0.7782346606254578, "learning_rate": 0.0001, "loss": 0.0381, "step": 36920 }, { "epoch": 104.91477272727273, "grad_norm": 0.8152375221252441, "learning_rate": 0.0001, "loss": 0.0376, "step": 36930 }, { "epoch": 104.94318181818181, "grad_norm": 0.6959502696990967, "learning_rate": 0.0001, "loss": 0.0376, "step": 36940 }, { "epoch": 104.9715909090909, "grad_norm": 0.805332362651825, "learning_rate": 0.0001, "loss": 0.037, "step": 36950 }, { "epoch": 105.0, "grad_norm": 0.8097184896469116, "learning_rate": 0.0001, "loss": 0.0384, "step": 36960 }, { "epoch": 105.0284090909091, "grad_norm": 0.611344039440155, "learning_rate": 0.0001, "loss": 0.0371, "step": 36970 }, { "epoch": 105.05681818181819, "grad_norm": 0.8121931552886963, "learning_rate": 0.0001, "loss": 0.0383, "step": 36980 }, { "epoch": 105.08522727272727, "grad_norm": 0.965552568435669, "learning_rate": 0.0001, "loss": 0.0375, "step": 36990 }, { "epoch": 105.11363636363636, "grad_norm": 0.772780179977417, "learning_rate": 0.0001, "loss": 0.0384, "step": 37000 }, { "epoch": 105.14204545454545, "grad_norm": 0.8075849413871765, "learning_rate": 0.0001, "loss": 0.0381, "step": 37010 }, { "epoch": 105.17045454545455, "grad_norm": 0.8520001173019409, "learning_rate": 0.0001, "loss": 0.0389, "step": 37020 }, { "epoch": 105.19886363636364, "grad_norm": 0.9963288903236389, "learning_rate": 0.0001, "loss": 0.0381, "step": 37030 }, { "epoch": 105.22727272727273, "grad_norm": 1.0980024337768555, "learning_rate": 0.0001, "loss": 0.038, "step": 37040 }, { "epoch": 105.25568181818181, "grad_norm": 0.8528324961662292, "learning_rate": 0.0001, "loss": 0.0374, "step": 37050 }, { "epoch": 105.2840909090909, "grad_norm": 1.0183886289596558, "learning_rate": 0.0001, "loss": 0.0364, "step": 37060 }, { "epoch": 105.3125, "grad_norm": 0.9727984666824341, "learning_rate": 0.0001, "loss": 0.0386, "step": 37070 }, { "epoch": 105.3409090909091, "grad_norm": 1.1144496202468872, "learning_rate": 0.0001, "loss": 0.0376, "step": 37080 }, { "epoch": 105.36931818181819, "grad_norm": 1.2052735090255737, "learning_rate": 0.0001, "loss": 0.0377, "step": 37090 }, { "epoch": 105.39772727272727, "grad_norm": 0.855655312538147, "learning_rate": 0.0001, "loss": 0.0367, "step": 37100 }, { "epoch": 105.42613636363636, "grad_norm": 0.714371383190155, "learning_rate": 0.0001, "loss": 0.0373, "step": 37110 }, { "epoch": 105.45454545454545, "grad_norm": 0.7759353518486023, "learning_rate": 0.0001, "loss": 0.0385, "step": 37120 }, { "epoch": 105.48295454545455, "grad_norm": 0.8793251514434814, "learning_rate": 0.0001, "loss": 0.0386, "step": 37130 }, { "epoch": 105.51136363636364, "grad_norm": 0.8084626793861389, "learning_rate": 0.0001, "loss": 0.0383, "step": 37140 }, { "epoch": 105.53977272727273, "grad_norm": 0.8977124094963074, "learning_rate": 0.0001, "loss": 0.0382, "step": 37150 }, { "epoch": 105.56818181818181, "grad_norm": 0.6372682452201843, "learning_rate": 0.0001, "loss": 0.0384, "step": 37160 }, { "epoch": 105.5965909090909, "grad_norm": 0.6709948182106018, "learning_rate": 0.0001, "loss": 0.0386, "step": 37170 }, { "epoch": 105.625, "grad_norm": 0.8853369951248169, "learning_rate": 0.0001, "loss": 0.0385, "step": 37180 }, { "epoch": 105.6534090909091, "grad_norm": 0.6816346645355225, "learning_rate": 0.0001, "loss": 0.0377, "step": 37190 }, { "epoch": 105.68181818181819, "grad_norm": 0.8214898705482483, "learning_rate": 0.0001, "loss": 0.0376, "step": 37200 }, { "epoch": 105.71022727272727, "grad_norm": 0.7285953164100647, "learning_rate": 0.0001, "loss": 0.0407, "step": 37210 }, { "epoch": 105.73863636363636, "grad_norm": 0.7041904926300049, "learning_rate": 0.0001, "loss": 0.0389, "step": 37220 }, { "epoch": 105.76704545454545, "grad_norm": 0.7178189754486084, "learning_rate": 0.0001, "loss": 0.0373, "step": 37230 }, { "epoch": 105.79545454545455, "grad_norm": 1.0095086097717285, "learning_rate": 0.0001, "loss": 0.0383, "step": 37240 }, { "epoch": 105.82386363636364, "grad_norm": 0.9176076054573059, "learning_rate": 0.0001, "loss": 0.0378, "step": 37250 }, { "epoch": 105.85227272727273, "grad_norm": 0.8059961795806885, "learning_rate": 0.0001, "loss": 0.0376, "step": 37260 }, { "epoch": 105.88068181818181, "grad_norm": 0.8492456078529358, "learning_rate": 0.0001, "loss": 0.0386, "step": 37270 }, { "epoch": 105.9090909090909, "grad_norm": 0.8832942247390747, "learning_rate": 0.0001, "loss": 0.0384, "step": 37280 }, { "epoch": 105.9375, "grad_norm": 0.7502552270889282, "learning_rate": 0.0001, "loss": 0.0379, "step": 37290 }, { "epoch": 105.9659090909091, "grad_norm": 0.6862600445747375, "learning_rate": 0.0001, "loss": 0.0379, "step": 37300 }, { "epoch": 105.99431818181819, "grad_norm": 0.6588712334632874, "learning_rate": 0.0001, "loss": 0.0363, "step": 37310 }, { "epoch": 106.02272727272727, "grad_norm": 0.5885828137397766, "learning_rate": 0.0001, "loss": 0.0372, "step": 37320 }, { "epoch": 106.05113636363636, "grad_norm": 0.5425651669502258, "learning_rate": 0.0001, "loss": 0.0375, "step": 37330 }, { "epoch": 106.07954545454545, "grad_norm": 0.5327818393707275, "learning_rate": 0.0001, "loss": 0.0369, "step": 37340 }, { "epoch": 106.10795454545455, "grad_norm": 0.6177202463150024, "learning_rate": 0.0001, "loss": 0.0374, "step": 37350 }, { "epoch": 106.13636363636364, "grad_norm": 0.473233699798584, "learning_rate": 0.0001, "loss": 0.0382, "step": 37360 }, { "epoch": 106.16477272727273, "grad_norm": 0.48036202788352966, "learning_rate": 0.0001, "loss": 0.0378, "step": 37370 }, { "epoch": 106.19318181818181, "grad_norm": 0.699809193611145, "learning_rate": 0.0001, "loss": 0.0371, "step": 37380 }, { "epoch": 106.2215909090909, "grad_norm": 0.865032434463501, "learning_rate": 0.0001, "loss": 0.0367, "step": 37390 }, { "epoch": 106.25, "grad_norm": 0.707669198513031, "learning_rate": 0.0001, "loss": 0.0369, "step": 37400 }, { "epoch": 106.2784090909091, "grad_norm": 0.6514762043952942, "learning_rate": 0.0001, "loss": 0.0353, "step": 37410 }, { "epoch": 106.30681818181819, "grad_norm": 0.7002271413803101, "learning_rate": 0.0001, "loss": 0.0362, "step": 37420 }, { "epoch": 106.33522727272727, "grad_norm": 0.6672790050506592, "learning_rate": 0.0001, "loss": 0.0352, "step": 37430 }, { "epoch": 106.36363636363636, "grad_norm": 0.7932654619216919, "learning_rate": 0.0001, "loss": 0.0376, "step": 37440 }, { "epoch": 106.39204545454545, "grad_norm": 0.6026307940483093, "learning_rate": 0.0001, "loss": 0.0372, "step": 37450 }, { "epoch": 106.42045454545455, "grad_norm": 0.6472262144088745, "learning_rate": 0.0001, "loss": 0.0362, "step": 37460 }, { "epoch": 106.44886363636364, "grad_norm": 0.6252856254577637, "learning_rate": 0.0001, "loss": 0.0379, "step": 37470 }, { "epoch": 106.47727272727273, "grad_norm": 0.5449088215827942, "learning_rate": 0.0001, "loss": 0.038, "step": 37480 }, { "epoch": 106.50568181818181, "grad_norm": 0.7351489663124084, "learning_rate": 0.0001, "loss": 0.0365, "step": 37490 }, { "epoch": 106.5340909090909, "grad_norm": 0.684830367565155, "learning_rate": 0.0001, "loss": 0.0364, "step": 37500 }, { "epoch": 106.5625, "grad_norm": 0.522684633731842, "learning_rate": 0.0001, "loss": 0.0367, "step": 37510 }, { "epoch": 106.5909090909091, "grad_norm": 0.5345920920372009, "learning_rate": 0.0001, "loss": 0.0356, "step": 37520 }, { "epoch": 106.61931818181819, "grad_norm": 0.5353555083274841, "learning_rate": 0.0001, "loss": 0.037, "step": 37530 }, { "epoch": 106.64772727272727, "grad_norm": 0.631646454334259, "learning_rate": 0.0001, "loss": 0.0365, "step": 37540 }, { "epoch": 106.67613636363636, "grad_norm": 0.6440027952194214, "learning_rate": 0.0001, "loss": 0.0378, "step": 37550 }, { "epoch": 106.70454545454545, "grad_norm": 0.5962269306182861, "learning_rate": 0.0001, "loss": 0.0359, "step": 37560 }, { "epoch": 106.73295454545455, "grad_norm": 0.6566577553749084, "learning_rate": 0.0001, "loss": 0.0369, "step": 37570 }, { "epoch": 106.76136363636364, "grad_norm": 0.6731533408164978, "learning_rate": 0.0001, "loss": 0.0372, "step": 37580 }, { "epoch": 106.78977272727273, "grad_norm": 0.8614497780799866, "learning_rate": 0.0001, "loss": 0.0365, "step": 37590 }, { "epoch": 106.81818181818181, "grad_norm": 0.7013105154037476, "learning_rate": 0.0001, "loss": 0.0354, "step": 37600 }, { "epoch": 106.8465909090909, "grad_norm": 0.7781490087509155, "learning_rate": 0.0001, "loss": 0.0366, "step": 37610 }, { "epoch": 106.875, "grad_norm": 0.6093642711639404, "learning_rate": 0.0001, "loss": 0.0377, "step": 37620 }, { "epoch": 106.9034090909091, "grad_norm": 0.5649243593215942, "learning_rate": 0.0001, "loss": 0.0364, "step": 37630 }, { "epoch": 106.93181818181819, "grad_norm": 0.6890942454338074, "learning_rate": 0.0001, "loss": 0.0367, "step": 37640 }, { "epoch": 106.96022727272727, "grad_norm": 0.7046976089477539, "learning_rate": 0.0001, "loss": 0.0369, "step": 37650 }, { "epoch": 106.98863636363636, "grad_norm": 0.6145200729370117, "learning_rate": 0.0001, "loss": 0.0363, "step": 37660 }, { "epoch": 107.01704545454545, "grad_norm": 0.4534114897251129, "learning_rate": 0.0001, "loss": 0.0368, "step": 37670 }, { "epoch": 107.04545454545455, "grad_norm": 0.519280195236206, "learning_rate": 0.0001, "loss": 0.038, "step": 37680 }, { "epoch": 107.07386363636364, "grad_norm": 0.6525757312774658, "learning_rate": 0.0001, "loss": 0.0368, "step": 37690 }, { "epoch": 107.10227272727273, "grad_norm": 0.6470338106155396, "learning_rate": 0.0001, "loss": 0.0373, "step": 37700 }, { "epoch": 107.13068181818181, "grad_norm": 0.5215091705322266, "learning_rate": 0.0001, "loss": 0.0371, "step": 37710 }, { "epoch": 107.1590909090909, "grad_norm": 0.48153358697891235, "learning_rate": 0.0001, "loss": 0.0375, "step": 37720 }, { "epoch": 107.1875, "grad_norm": 0.6757826209068298, "learning_rate": 0.0001, "loss": 0.0388, "step": 37730 }, { "epoch": 107.2159090909091, "grad_norm": 0.7484776377677917, "learning_rate": 0.0001, "loss": 0.0371, "step": 37740 }, { "epoch": 107.24431818181819, "grad_norm": 0.6073766946792603, "learning_rate": 0.0001, "loss": 0.035, "step": 37750 }, { "epoch": 107.27272727272727, "grad_norm": 0.45944106578826904, "learning_rate": 0.0001, "loss": 0.0366, "step": 37760 }, { "epoch": 107.30113636363636, "grad_norm": 0.481180876493454, "learning_rate": 0.0001, "loss": 0.0367, "step": 37770 }, { "epoch": 107.32954545454545, "grad_norm": 0.5007697343826294, "learning_rate": 0.0001, "loss": 0.0364, "step": 37780 }, { "epoch": 107.35795454545455, "grad_norm": 0.6877015233039856, "learning_rate": 0.0001, "loss": 0.038, "step": 37790 }, { "epoch": 107.38636363636364, "grad_norm": 1.0906400680541992, "learning_rate": 0.0001, "loss": 0.0383, "step": 37800 }, { "epoch": 107.41477272727273, "grad_norm": 0.9992173910140991, "learning_rate": 0.0001, "loss": 0.0367, "step": 37810 }, { "epoch": 107.44318181818181, "grad_norm": 1.065973162651062, "learning_rate": 0.0001, "loss": 0.0367, "step": 37820 }, { "epoch": 107.4715909090909, "grad_norm": 0.8227842450141907, "learning_rate": 0.0001, "loss": 0.0377, "step": 37830 }, { "epoch": 107.5, "grad_norm": 0.830697238445282, "learning_rate": 0.0001, "loss": 0.0376, "step": 37840 }, { "epoch": 107.5284090909091, "grad_norm": 0.652164101600647, "learning_rate": 0.0001, "loss": 0.0376, "step": 37850 }, { "epoch": 107.55681818181819, "grad_norm": 0.6938498020172119, "learning_rate": 0.0001, "loss": 0.0389, "step": 37860 }, { "epoch": 107.58522727272727, "grad_norm": 0.680587887763977, "learning_rate": 0.0001, "loss": 0.0366, "step": 37870 }, { "epoch": 107.61363636363636, "grad_norm": 0.6912381649017334, "learning_rate": 0.0001, "loss": 0.0377, "step": 37880 }, { "epoch": 107.64204545454545, "grad_norm": 0.6087629795074463, "learning_rate": 0.0001, "loss": 0.037, "step": 37890 }, { "epoch": 107.67045454545455, "grad_norm": 0.8172582983970642, "learning_rate": 0.0001, "loss": 0.0385, "step": 37900 }, { "epoch": 107.69886363636364, "grad_norm": 0.7405523657798767, "learning_rate": 0.0001, "loss": 0.0376, "step": 37910 }, { "epoch": 107.72727272727273, "grad_norm": 0.6305014491081238, "learning_rate": 0.0001, "loss": 0.0384, "step": 37920 }, { "epoch": 107.75568181818181, "grad_norm": 0.6484267711639404, "learning_rate": 0.0001, "loss": 0.0388, "step": 37930 }, { "epoch": 107.7840909090909, "grad_norm": 0.6926651000976562, "learning_rate": 0.0001, "loss": 0.0382, "step": 37940 }, { "epoch": 107.8125, "grad_norm": 0.7457519769668579, "learning_rate": 0.0001, "loss": 0.04, "step": 37950 }, { "epoch": 107.8409090909091, "grad_norm": 0.6257100701332092, "learning_rate": 0.0001, "loss": 0.0372, "step": 37960 }, { "epoch": 107.86931818181819, "grad_norm": 0.6201730966567993, "learning_rate": 0.0001, "loss": 0.0368, "step": 37970 }, { "epoch": 107.89772727272727, "grad_norm": 0.6591887474060059, "learning_rate": 0.0001, "loss": 0.0376, "step": 37980 }, { "epoch": 107.92613636363636, "grad_norm": 0.827599287033081, "learning_rate": 0.0001, "loss": 0.0375, "step": 37990 }, { "epoch": 107.95454545454545, "grad_norm": 1.455817699432373, "learning_rate": 0.0001, "loss": 0.0374, "step": 38000 }, { "epoch": 107.98295454545455, "grad_norm": 0.6563143730163574, "learning_rate": 0.0001, "loss": 0.0386, "step": 38010 }, { "epoch": 108.01136363636364, "grad_norm": 0.7723172307014465, "learning_rate": 0.0001, "loss": 0.0359, "step": 38020 }, { "epoch": 108.03977272727273, "grad_norm": 1.2289658784866333, "learning_rate": 0.0001, "loss": 0.0374, "step": 38030 }, { "epoch": 108.06818181818181, "grad_norm": 0.977079451084137, "learning_rate": 0.0001, "loss": 0.0351, "step": 38040 }, { "epoch": 108.0965909090909, "grad_norm": 0.7695228457450867, "learning_rate": 0.0001, "loss": 0.036, "step": 38050 }, { "epoch": 108.125, "grad_norm": 0.8520819544792175, "learning_rate": 0.0001, "loss": 0.0358, "step": 38060 }, { "epoch": 108.1534090909091, "grad_norm": 0.6254966855049133, "learning_rate": 0.0001, "loss": 0.0373, "step": 38070 }, { "epoch": 108.18181818181819, "grad_norm": 0.7900024056434631, "learning_rate": 0.0001, "loss": 0.0363, "step": 38080 }, { "epoch": 108.21022727272727, "grad_norm": 0.7880393266677856, "learning_rate": 0.0001, "loss": 0.037, "step": 38090 }, { "epoch": 108.23863636363636, "grad_norm": 0.7509173154830933, "learning_rate": 0.0001, "loss": 0.0362, "step": 38100 }, { "epoch": 108.26704545454545, "grad_norm": 0.5946815013885498, "learning_rate": 0.0001, "loss": 0.0375, "step": 38110 }, { "epoch": 108.29545454545455, "grad_norm": 0.703536331653595, "learning_rate": 0.0001, "loss": 0.0364, "step": 38120 }, { "epoch": 108.32386363636364, "grad_norm": 0.6964169144630432, "learning_rate": 0.0001, "loss": 0.0365, "step": 38130 }, { "epoch": 108.35227272727273, "grad_norm": 0.778841495513916, "learning_rate": 0.0001, "loss": 0.0367, "step": 38140 }, { "epoch": 108.38068181818181, "grad_norm": 0.8367446064949036, "learning_rate": 0.0001, "loss": 0.0371, "step": 38150 }, { "epoch": 108.4090909090909, "grad_norm": 0.4970041811466217, "learning_rate": 0.0001, "loss": 0.0351, "step": 38160 }, { "epoch": 108.4375, "grad_norm": 0.584697961807251, "learning_rate": 0.0001, "loss": 0.0376, "step": 38170 }, { "epoch": 108.4659090909091, "grad_norm": 0.5434656143188477, "learning_rate": 0.0001, "loss": 0.0374, "step": 38180 }, { "epoch": 108.49431818181819, "grad_norm": 0.8836389183998108, "learning_rate": 0.0001, "loss": 0.0364, "step": 38190 }, { "epoch": 108.52272727272727, "grad_norm": 0.7299078702926636, "learning_rate": 0.0001, "loss": 0.0371, "step": 38200 }, { "epoch": 108.55113636363636, "grad_norm": 0.6886661052703857, "learning_rate": 0.0001, "loss": 0.0367, "step": 38210 }, { "epoch": 108.57954545454545, "grad_norm": 0.6602329611778259, "learning_rate": 0.0001, "loss": 0.0377, "step": 38220 }, { "epoch": 108.60795454545455, "grad_norm": 0.5685455799102783, "learning_rate": 0.0001, "loss": 0.0366, "step": 38230 }, { "epoch": 108.63636363636364, "grad_norm": 0.5193608403205872, "learning_rate": 0.0001, "loss": 0.0366, "step": 38240 }, { "epoch": 108.66477272727273, "grad_norm": 0.4227427840232849, "learning_rate": 0.0001, "loss": 0.036, "step": 38250 }, { "epoch": 108.69318181818181, "grad_norm": 0.5614402890205383, "learning_rate": 0.0001, "loss": 0.037, "step": 38260 }, { "epoch": 108.7215909090909, "grad_norm": 0.4981783330440521, "learning_rate": 0.0001, "loss": 0.0363, "step": 38270 }, { "epoch": 108.75, "grad_norm": 0.5758175849914551, "learning_rate": 0.0001, "loss": 0.0353, "step": 38280 }, { "epoch": 108.7784090909091, "grad_norm": 0.5388387441635132, "learning_rate": 0.0001, "loss": 0.0366, "step": 38290 }, { "epoch": 108.80681818181819, "grad_norm": 0.6330029964447021, "learning_rate": 0.0001, "loss": 0.0353, "step": 38300 }, { "epoch": 108.83522727272727, "grad_norm": 0.6576165556907654, "learning_rate": 0.0001, "loss": 0.0366, "step": 38310 }, { "epoch": 108.86363636363636, "grad_norm": 0.6450666189193726, "learning_rate": 0.0001, "loss": 0.0377, "step": 38320 }, { "epoch": 108.89204545454545, "grad_norm": 0.5270466208457947, "learning_rate": 0.0001, "loss": 0.037, "step": 38330 }, { "epoch": 108.92045454545455, "grad_norm": 0.6218364834785461, "learning_rate": 0.0001, "loss": 0.036, "step": 38340 }, { "epoch": 108.94886363636364, "grad_norm": 0.7764090895652771, "learning_rate": 0.0001, "loss": 0.0372, "step": 38350 }, { "epoch": 108.97727272727273, "grad_norm": 0.7030161619186401, "learning_rate": 0.0001, "loss": 0.0374, "step": 38360 }, { "epoch": 109.00568181818181, "grad_norm": 0.9898335933685303, "learning_rate": 0.0001, "loss": 0.0359, "step": 38370 }, { "epoch": 109.0340909090909, "grad_norm": 1.0687246322631836, "learning_rate": 0.0001, "loss": 0.0379, "step": 38380 }, { "epoch": 109.0625, "grad_norm": 0.8941261768341064, "learning_rate": 0.0001, "loss": 0.037, "step": 38390 }, { "epoch": 109.0909090909091, "grad_norm": 0.9911275506019592, "learning_rate": 0.0001, "loss": 0.0373, "step": 38400 }, { "epoch": 109.11931818181819, "grad_norm": 0.7970879077911377, "learning_rate": 0.0001, "loss": 0.0371, "step": 38410 }, { "epoch": 109.14772727272727, "grad_norm": 0.5866600871086121, "learning_rate": 0.0001, "loss": 0.0355, "step": 38420 }, { "epoch": 109.17613636363636, "grad_norm": 0.7543792128562927, "learning_rate": 0.0001, "loss": 0.0374, "step": 38430 }, { "epoch": 109.20454545454545, "grad_norm": 0.9520198702812195, "learning_rate": 0.0001, "loss": 0.0376, "step": 38440 }, { "epoch": 109.23295454545455, "grad_norm": 0.898628830909729, "learning_rate": 0.0001, "loss": 0.0365, "step": 38450 }, { "epoch": 109.26136363636364, "grad_norm": 0.7924801707267761, "learning_rate": 0.0001, "loss": 0.0367, "step": 38460 }, { "epoch": 109.28977272727273, "grad_norm": 0.6956962943077087, "learning_rate": 0.0001, "loss": 0.0357, "step": 38470 }, { "epoch": 109.31818181818181, "grad_norm": 0.6093046069145203, "learning_rate": 0.0001, "loss": 0.0381, "step": 38480 }, { "epoch": 109.3465909090909, "grad_norm": 0.8115942478179932, "learning_rate": 0.0001, "loss": 0.0359, "step": 38490 }, { "epoch": 109.375, "grad_norm": 0.6574427485466003, "learning_rate": 0.0001, "loss": 0.0354, "step": 38500 }, { "epoch": 109.4034090909091, "grad_norm": 0.48985546827316284, "learning_rate": 0.0001, "loss": 0.0362, "step": 38510 }, { "epoch": 109.43181818181819, "grad_norm": 0.5606568455696106, "learning_rate": 0.0001, "loss": 0.0375, "step": 38520 }, { "epoch": 109.46022727272727, "grad_norm": 0.5550665855407715, "learning_rate": 0.0001, "loss": 0.0351, "step": 38530 }, { "epoch": 109.48863636363636, "grad_norm": 0.6146281361579895, "learning_rate": 0.0001, "loss": 0.0363, "step": 38540 }, { "epoch": 109.51704545454545, "grad_norm": 0.5746130347251892, "learning_rate": 0.0001, "loss": 0.0354, "step": 38550 }, { "epoch": 109.54545454545455, "grad_norm": 0.5612075328826904, "learning_rate": 0.0001, "loss": 0.0359, "step": 38560 }, { "epoch": 109.57386363636364, "grad_norm": 0.623532235622406, "learning_rate": 0.0001, "loss": 0.0352, "step": 38570 }, { "epoch": 109.60227272727273, "grad_norm": 0.5237706899642944, "learning_rate": 0.0001, "loss": 0.0374, "step": 38580 }, { "epoch": 109.63068181818181, "grad_norm": 0.7601568102836609, "learning_rate": 0.0001, "loss": 0.0356, "step": 38590 }, { "epoch": 109.6590909090909, "grad_norm": 0.7682206630706787, "learning_rate": 0.0001, "loss": 0.0367, "step": 38600 }, { "epoch": 109.6875, "grad_norm": 0.6917203664779663, "learning_rate": 0.0001, "loss": 0.0362, "step": 38610 }, { "epoch": 109.7159090909091, "grad_norm": 0.7227908372879028, "learning_rate": 0.0001, "loss": 0.0355, "step": 38620 }, { "epoch": 109.74431818181819, "grad_norm": 0.6287977695465088, "learning_rate": 0.0001, "loss": 0.0356, "step": 38630 }, { "epoch": 109.77272727272727, "grad_norm": 0.41634276509284973, "learning_rate": 0.0001, "loss": 0.0372, "step": 38640 }, { "epoch": 109.80113636363636, "grad_norm": 0.5135223865509033, "learning_rate": 0.0001, "loss": 0.0374, "step": 38650 }, { "epoch": 109.82954545454545, "grad_norm": 0.57159423828125, "learning_rate": 0.0001, "loss": 0.0362, "step": 38660 }, { "epoch": 109.85795454545455, "grad_norm": 0.7393842935562134, "learning_rate": 0.0001, "loss": 0.0361, "step": 38670 }, { "epoch": 109.88636363636364, "grad_norm": 0.8454589247703552, "learning_rate": 0.0001, "loss": 0.0365, "step": 38680 }, { "epoch": 109.91477272727273, "grad_norm": 0.6690040230751038, "learning_rate": 0.0001, "loss": 0.0354, "step": 38690 }, { "epoch": 109.94318181818181, "grad_norm": 1.1583824157714844, "learning_rate": 0.0001, "loss": 0.0375, "step": 38700 }, { "epoch": 109.9715909090909, "grad_norm": 0.675502598285675, "learning_rate": 0.0001, "loss": 0.0366, "step": 38710 }, { "epoch": 110.0, "grad_norm": 0.711791455745697, "learning_rate": 0.0001, "loss": 0.0379, "step": 38720 }, { "epoch": 110.0284090909091, "grad_norm": 1.1118009090423584, "learning_rate": 0.0001, "loss": 0.0376, "step": 38730 }, { "epoch": 110.05681818181819, "grad_norm": 1.0338175296783447, "learning_rate": 0.0001, "loss": 0.038, "step": 38740 }, { "epoch": 110.08522727272727, "grad_norm": 1.348634123802185, "learning_rate": 0.0001, "loss": 0.0381, "step": 38750 }, { "epoch": 110.11363636363636, "grad_norm": 1.231924295425415, "learning_rate": 0.0001, "loss": 0.0376, "step": 38760 }, { "epoch": 110.14204545454545, "grad_norm": 1.2100741863250732, "learning_rate": 0.0001, "loss": 0.039, "step": 38770 }, { "epoch": 110.17045454545455, "grad_norm": 0.7521945834159851, "learning_rate": 0.0001, "loss": 0.038, "step": 38780 }, { "epoch": 110.19886363636364, "grad_norm": 1.066770315170288, "learning_rate": 0.0001, "loss": 0.0376, "step": 38790 }, { "epoch": 110.22727272727273, "grad_norm": 0.9602776765823364, "learning_rate": 0.0001, "loss": 0.0372, "step": 38800 }, { "epoch": 110.25568181818181, "grad_norm": 1.193345069885254, "learning_rate": 0.0001, "loss": 0.0368, "step": 38810 }, { "epoch": 110.2840909090909, "grad_norm": 0.9646350741386414, "learning_rate": 0.0001, "loss": 0.0365, "step": 38820 }, { "epoch": 110.3125, "grad_norm": 1.0762289762496948, "learning_rate": 0.0001, "loss": 0.0356, "step": 38830 }, { "epoch": 110.3409090909091, "grad_norm": 1.0231586694717407, "learning_rate": 0.0001, "loss": 0.0351, "step": 38840 }, { "epoch": 110.36931818181819, "grad_norm": 0.8765637278556824, "learning_rate": 0.0001, "loss": 0.0365, "step": 38850 }, { "epoch": 110.39772727272727, "grad_norm": 0.7653793692588806, "learning_rate": 0.0001, "loss": 0.0354, "step": 38860 }, { "epoch": 110.42613636363636, "grad_norm": 0.7483477592468262, "learning_rate": 0.0001, "loss": 0.0367, "step": 38870 }, { "epoch": 110.45454545454545, "grad_norm": 0.7942159175872803, "learning_rate": 0.0001, "loss": 0.0361, "step": 38880 }, { "epoch": 110.48295454545455, "grad_norm": 0.7775100469589233, "learning_rate": 0.0001, "loss": 0.0355, "step": 38890 }, { "epoch": 110.51136363636364, "grad_norm": 0.6128475666046143, "learning_rate": 0.0001, "loss": 0.0381, "step": 38900 }, { "epoch": 110.53977272727273, "grad_norm": 0.6405518054962158, "learning_rate": 0.0001, "loss": 0.0355, "step": 38910 }, { "epoch": 110.56818181818181, "grad_norm": 0.6473897695541382, "learning_rate": 0.0001, "loss": 0.0356, "step": 38920 }, { "epoch": 110.5965909090909, "grad_norm": 0.8890331387519836, "learning_rate": 0.0001, "loss": 0.0374, "step": 38930 }, { "epoch": 110.625, "grad_norm": 0.9068458676338196, "learning_rate": 0.0001, "loss": 0.0362, "step": 38940 }, { "epoch": 110.6534090909091, "grad_norm": 0.8818080425262451, "learning_rate": 0.0001, "loss": 0.0359, "step": 38950 }, { "epoch": 110.68181818181819, "grad_norm": 1.1605606079101562, "learning_rate": 0.0001, "loss": 0.0358, "step": 38960 }, { "epoch": 110.71022727272727, "grad_norm": 0.8575087785720825, "learning_rate": 0.0001, "loss": 0.036, "step": 38970 }, { "epoch": 110.73863636363636, "grad_norm": 0.68806391954422, "learning_rate": 0.0001, "loss": 0.0359, "step": 38980 }, { "epoch": 110.76704545454545, "grad_norm": 0.7244641184806824, "learning_rate": 0.0001, "loss": 0.0356, "step": 38990 }, { "epoch": 110.79545454545455, "grad_norm": 0.7463381886482239, "learning_rate": 0.0001, "loss": 0.0369, "step": 39000 }, { "epoch": 110.82386363636364, "grad_norm": 0.6760275959968567, "learning_rate": 0.0001, "loss": 0.0373, "step": 39010 }, { "epoch": 110.85227272727273, "grad_norm": 0.6048280000686646, "learning_rate": 0.0001, "loss": 0.0351, "step": 39020 }, { "epoch": 110.88068181818181, "grad_norm": 0.6237334609031677, "learning_rate": 0.0001, "loss": 0.0361, "step": 39030 }, { "epoch": 110.9090909090909, "grad_norm": 0.5601615309715271, "learning_rate": 0.0001, "loss": 0.0356, "step": 39040 }, { "epoch": 110.9375, "grad_norm": 0.657729983329773, "learning_rate": 0.0001, "loss": 0.0364, "step": 39050 }, { "epoch": 110.9659090909091, "grad_norm": 0.7096065282821655, "learning_rate": 0.0001, "loss": 0.0366, "step": 39060 }, { "epoch": 110.99431818181819, "grad_norm": 0.6007034182548523, "learning_rate": 0.0001, "loss": 0.0361, "step": 39070 }, { "epoch": 111.02272727272727, "grad_norm": 0.675838053226471, "learning_rate": 0.0001, "loss": 0.0361, "step": 39080 }, { "epoch": 111.05113636363636, "grad_norm": 0.6892589330673218, "learning_rate": 0.0001, "loss": 0.0361, "step": 39090 }, { "epoch": 111.07954545454545, "grad_norm": 0.8822354674339294, "learning_rate": 0.0001, "loss": 0.0356, "step": 39100 }, { "epoch": 111.10795454545455, "grad_norm": 0.7115966081619263, "learning_rate": 0.0001, "loss": 0.0353, "step": 39110 }, { "epoch": 111.13636363636364, "grad_norm": 0.7949591279029846, "learning_rate": 0.0001, "loss": 0.0373, "step": 39120 }, { "epoch": 111.16477272727273, "grad_norm": 0.8462267518043518, "learning_rate": 0.0001, "loss": 0.0345, "step": 39130 }, { "epoch": 111.19318181818181, "grad_norm": 0.7834585905075073, "learning_rate": 0.0001, "loss": 0.0359, "step": 39140 }, { "epoch": 111.2215909090909, "grad_norm": 0.7656166553497314, "learning_rate": 0.0001, "loss": 0.0359, "step": 39150 }, { "epoch": 111.25, "grad_norm": 0.599009096622467, "learning_rate": 0.0001, "loss": 0.036, "step": 39160 }, { "epoch": 111.2784090909091, "grad_norm": 0.8071345090866089, "learning_rate": 0.0001, "loss": 0.0376, "step": 39170 }, { "epoch": 111.30681818181819, "grad_norm": 0.6010581254959106, "learning_rate": 0.0001, "loss": 0.037, "step": 39180 }, { "epoch": 111.33522727272727, "grad_norm": 0.6166114807128906, "learning_rate": 0.0001, "loss": 0.0361, "step": 39190 }, { "epoch": 111.36363636363636, "grad_norm": 0.5775583982467651, "learning_rate": 0.0001, "loss": 0.0368, "step": 39200 }, { "epoch": 111.39204545454545, "grad_norm": 0.5263673067092896, "learning_rate": 0.0001, "loss": 0.0368, "step": 39210 }, { "epoch": 111.42045454545455, "grad_norm": 1.2878767251968384, "learning_rate": 0.0001, "loss": 0.037, "step": 39220 }, { "epoch": 111.44886363636364, "grad_norm": 1.5461721420288086, "learning_rate": 0.0001, "loss": 0.0365, "step": 39230 }, { "epoch": 111.47727272727273, "grad_norm": 1.5255823135375977, "learning_rate": 0.0001, "loss": 0.037, "step": 39240 }, { "epoch": 111.50568181818181, "grad_norm": 1.0932375192642212, "learning_rate": 0.0001, "loss": 0.036, "step": 39250 }, { "epoch": 111.5340909090909, "grad_norm": 0.901646614074707, "learning_rate": 0.0001, "loss": 0.0359, "step": 39260 }, { "epoch": 111.5625, "grad_norm": 0.6453569531440735, "learning_rate": 0.0001, "loss": 0.035, "step": 39270 }, { "epoch": 111.5909090909091, "grad_norm": 0.5468852519989014, "learning_rate": 0.0001, "loss": 0.0355, "step": 39280 }, { "epoch": 111.61931818181819, "grad_norm": 0.7907230257987976, "learning_rate": 0.0001, "loss": 0.0364, "step": 39290 }, { "epoch": 111.64772727272727, "grad_norm": 0.8564003109931946, "learning_rate": 0.0001, "loss": 0.0354, "step": 39300 }, { "epoch": 111.67613636363636, "grad_norm": 0.9764010310173035, "learning_rate": 0.0001, "loss": 0.037, "step": 39310 }, { "epoch": 111.70454545454545, "grad_norm": 0.8942014575004578, "learning_rate": 0.0001, "loss": 0.0349, "step": 39320 }, { "epoch": 111.73295454545455, "grad_norm": 0.5502811074256897, "learning_rate": 0.0001, "loss": 0.0349, "step": 39330 }, { "epoch": 111.76136363636364, "grad_norm": 0.8766093850135803, "learning_rate": 0.0001, "loss": 0.0348, "step": 39340 }, { "epoch": 111.78977272727273, "grad_norm": 0.98386150598526, "learning_rate": 0.0001, "loss": 0.0353, "step": 39350 }, { "epoch": 111.81818181818181, "grad_norm": 0.9076448678970337, "learning_rate": 0.0001, "loss": 0.0364, "step": 39360 }, { "epoch": 111.8465909090909, "grad_norm": 0.9458385109901428, "learning_rate": 0.0001, "loss": 0.0357, "step": 39370 }, { "epoch": 111.875, "grad_norm": 0.7242578864097595, "learning_rate": 0.0001, "loss": 0.0363, "step": 39380 }, { "epoch": 111.9034090909091, "grad_norm": 0.9158487915992737, "learning_rate": 0.0001, "loss": 0.0355, "step": 39390 }, { "epoch": 111.93181818181819, "grad_norm": 0.7818379402160645, "learning_rate": 0.0001, "loss": 0.037, "step": 39400 }, { "epoch": 111.96022727272727, "grad_norm": 0.8776116967201233, "learning_rate": 0.0001, "loss": 0.0371, "step": 39410 }, { "epoch": 111.98863636363636, "grad_norm": 0.8241115808486938, "learning_rate": 0.0001, "loss": 0.0365, "step": 39420 }, { "epoch": 112.01704545454545, "grad_norm": 0.5597853660583496, "learning_rate": 0.0001, "loss": 0.0358, "step": 39430 }, { "epoch": 112.04545454545455, "grad_norm": 0.6248016953468323, "learning_rate": 0.0001, "loss": 0.0355, "step": 39440 }, { "epoch": 112.07386363636364, "grad_norm": 0.5273939967155457, "learning_rate": 0.0001, "loss": 0.0355, "step": 39450 }, { "epoch": 112.10227272727273, "grad_norm": 0.4962223172187805, "learning_rate": 0.0001, "loss": 0.0355, "step": 39460 }, { "epoch": 112.13068181818181, "grad_norm": 0.6138328313827515, "learning_rate": 0.0001, "loss": 0.0365, "step": 39470 }, { "epoch": 112.1590909090909, "grad_norm": 0.4526226222515106, "learning_rate": 0.0001, "loss": 0.0363, "step": 39480 }, { "epoch": 112.1875, "grad_norm": 0.5858993530273438, "learning_rate": 0.0001, "loss": 0.036, "step": 39490 }, { "epoch": 112.2159090909091, "grad_norm": 0.6115566492080688, "learning_rate": 0.0001, "loss": 0.0345, "step": 39500 }, { "epoch": 112.24431818181819, "grad_norm": 0.5232250094413757, "learning_rate": 0.0001, "loss": 0.0365, "step": 39510 }, { "epoch": 112.27272727272727, "grad_norm": 0.5535911321640015, "learning_rate": 0.0001, "loss": 0.0347, "step": 39520 }, { "epoch": 112.30113636363636, "grad_norm": 0.7334476113319397, "learning_rate": 0.0001, "loss": 0.0349, "step": 39530 }, { "epoch": 112.32954545454545, "grad_norm": 0.569206178188324, "learning_rate": 0.0001, "loss": 0.0344, "step": 39540 }, { "epoch": 112.35795454545455, "grad_norm": 0.9461883902549744, "learning_rate": 0.0001, "loss": 0.0361, "step": 39550 }, { "epoch": 112.38636363636364, "grad_norm": 0.8097552061080933, "learning_rate": 0.0001, "loss": 0.0363, "step": 39560 }, { "epoch": 112.41477272727273, "grad_norm": 0.6988733410835266, "learning_rate": 0.0001, "loss": 0.0366, "step": 39570 }, { "epoch": 112.44318181818181, "grad_norm": 0.7010588645935059, "learning_rate": 0.0001, "loss": 0.0358, "step": 39580 }, { "epoch": 112.4715909090909, "grad_norm": 0.6968778967857361, "learning_rate": 0.0001, "loss": 0.0368, "step": 39590 }, { "epoch": 112.5, "grad_norm": 0.7270570397377014, "learning_rate": 0.0001, "loss": 0.0357, "step": 39600 }, { "epoch": 112.5284090909091, "grad_norm": 0.5744696855545044, "learning_rate": 0.0001, "loss": 0.0367, "step": 39610 }, { "epoch": 112.55681818181819, "grad_norm": 0.6304721832275391, "learning_rate": 0.0001, "loss": 0.0357, "step": 39620 }, { "epoch": 112.58522727272727, "grad_norm": 0.6305207014083862, "learning_rate": 0.0001, "loss": 0.0357, "step": 39630 }, { "epoch": 112.61363636363636, "grad_norm": 0.7027773261070251, "learning_rate": 0.0001, "loss": 0.0365, "step": 39640 }, { "epoch": 112.64204545454545, "grad_norm": 0.6099282503128052, "learning_rate": 0.0001, "loss": 0.0361, "step": 39650 }, { "epoch": 112.67045454545455, "grad_norm": 1.234322428703308, "learning_rate": 0.0001, "loss": 0.0361, "step": 39660 }, { "epoch": 112.69886363636364, "grad_norm": 1.157665729522705, "learning_rate": 0.0001, "loss": 0.0358, "step": 39670 }, { "epoch": 112.72727272727273, "grad_norm": 1.371219277381897, "learning_rate": 0.0001, "loss": 0.0377, "step": 39680 }, { "epoch": 112.75568181818181, "grad_norm": 1.147692322731018, "learning_rate": 0.0001, "loss": 0.0364, "step": 39690 }, { "epoch": 112.7840909090909, "grad_norm": 0.7648851871490479, "learning_rate": 0.0001, "loss": 0.0342, "step": 39700 }, { "epoch": 112.8125, "grad_norm": 0.6350628733634949, "learning_rate": 0.0001, "loss": 0.036, "step": 39710 }, { "epoch": 112.8409090909091, "grad_norm": 0.6837986707687378, "learning_rate": 0.0001, "loss": 0.0343, "step": 39720 }, { "epoch": 112.86931818181819, "grad_norm": 0.6625135540962219, "learning_rate": 0.0001, "loss": 0.0345, "step": 39730 }, { "epoch": 112.89772727272727, "grad_norm": 0.8770467638969421, "learning_rate": 0.0001, "loss": 0.0354, "step": 39740 }, { "epoch": 112.92613636363636, "grad_norm": 0.8206093311309814, "learning_rate": 0.0001, "loss": 0.0367, "step": 39750 }, { "epoch": 112.95454545454545, "grad_norm": 0.8660224080085754, "learning_rate": 0.0001, "loss": 0.0362, "step": 39760 }, { "epoch": 112.98295454545455, "grad_norm": 0.6569976806640625, "learning_rate": 0.0001, "loss": 0.0371, "step": 39770 }, { "epoch": 113.01136363636364, "grad_norm": 0.9233992099761963, "learning_rate": 0.0001, "loss": 0.0352, "step": 39780 }, { "epoch": 113.03977272727273, "grad_norm": 0.8801051378250122, "learning_rate": 0.0001, "loss": 0.035, "step": 39790 }, { "epoch": 113.06818181818181, "grad_norm": 0.773917019367218, "learning_rate": 0.0001, "loss": 0.0359, "step": 39800 }, { "epoch": 113.0965909090909, "grad_norm": 0.7063732147216797, "learning_rate": 0.0001, "loss": 0.0354, "step": 39810 }, { "epoch": 113.125, "grad_norm": 0.7631141543388367, "learning_rate": 0.0001, "loss": 0.0364, "step": 39820 }, { "epoch": 113.1534090909091, "grad_norm": 0.7640942931175232, "learning_rate": 0.0001, "loss": 0.0351, "step": 39830 }, { "epoch": 113.18181818181819, "grad_norm": 0.6806530356407166, "learning_rate": 0.0001, "loss": 0.0351, "step": 39840 }, { "epoch": 113.21022727272727, "grad_norm": 0.627130389213562, "learning_rate": 0.0001, "loss": 0.0358, "step": 39850 }, { "epoch": 113.23863636363636, "grad_norm": 0.4664798378944397, "learning_rate": 0.0001, "loss": 0.0354, "step": 39860 }, { "epoch": 113.26704545454545, "grad_norm": 0.6047282814979553, "learning_rate": 0.0001, "loss": 0.0346, "step": 39870 }, { "epoch": 113.29545454545455, "grad_norm": 0.8172615170478821, "learning_rate": 0.0001, "loss": 0.0346, "step": 39880 }, { "epoch": 113.32386363636364, "grad_norm": 0.7047287821769714, "learning_rate": 0.0001, "loss": 0.0364, "step": 39890 }, { "epoch": 113.35227272727273, "grad_norm": 0.6957646012306213, "learning_rate": 0.0001, "loss": 0.0361, "step": 39900 }, { "epoch": 113.38068181818181, "grad_norm": 0.7700342535972595, "learning_rate": 0.0001, "loss": 0.0352, "step": 39910 }, { "epoch": 113.4090909090909, "grad_norm": 0.6716678738594055, "learning_rate": 0.0001, "loss": 0.0355, "step": 39920 }, { "epoch": 113.4375, "grad_norm": 0.6276105046272278, "learning_rate": 0.0001, "loss": 0.036, "step": 39930 }, { "epoch": 113.4659090909091, "grad_norm": 0.7621968388557434, "learning_rate": 0.0001, "loss": 0.0359, "step": 39940 }, { "epoch": 113.49431818181819, "grad_norm": 0.5619763135910034, "learning_rate": 0.0001, "loss": 0.0362, "step": 39950 }, { "epoch": 113.52272727272727, "grad_norm": 0.5187546014785767, "learning_rate": 0.0001, "loss": 0.0356, "step": 39960 }, { "epoch": 113.55113636363636, "grad_norm": 0.7270740866661072, "learning_rate": 0.0001, "loss": 0.0357, "step": 39970 }, { "epoch": 113.57954545454545, "grad_norm": 0.49849000573158264, "learning_rate": 0.0001, "loss": 0.0365, "step": 39980 }, { "epoch": 113.60795454545455, "grad_norm": 1.0504893064498901, "learning_rate": 0.0001, "loss": 0.0353, "step": 39990 }, { "epoch": 113.63636363636364, "grad_norm": 0.6417130827903748, "learning_rate": 0.0001, "loss": 0.0342, "step": 40000 }, { "epoch": 113.66477272727273, "grad_norm": 0.6625685691833496, "learning_rate": 0.0001, "loss": 0.0359, "step": 40010 }, { "epoch": 113.69318181818181, "grad_norm": 0.6271093487739563, "learning_rate": 0.0001, "loss": 0.0352, "step": 40020 }, { "epoch": 113.7215909090909, "grad_norm": 0.6028963923454285, "learning_rate": 0.0001, "loss": 0.0357, "step": 40030 }, { "epoch": 113.75, "grad_norm": 0.5805331468582153, "learning_rate": 0.0001, "loss": 0.0368, "step": 40040 }, { "epoch": 113.7784090909091, "grad_norm": 0.594214677810669, "learning_rate": 0.0001, "loss": 0.0367, "step": 40050 }, { "epoch": 113.80681818181819, "grad_norm": 0.6908999681472778, "learning_rate": 0.0001, "loss": 0.0353, "step": 40060 }, { "epoch": 113.83522727272727, "grad_norm": 0.53608238697052, "learning_rate": 0.0001, "loss": 0.0358, "step": 40070 }, { "epoch": 113.86363636363636, "grad_norm": 0.4885925352573395, "learning_rate": 0.0001, "loss": 0.0353, "step": 40080 }, { "epoch": 113.89204545454545, "grad_norm": 0.5205367803573608, "learning_rate": 0.0001, "loss": 0.0359, "step": 40090 }, { "epoch": 113.92045454545455, "grad_norm": 0.5592257976531982, "learning_rate": 0.0001, "loss": 0.0356, "step": 40100 }, { "epoch": 113.94886363636364, "grad_norm": 0.5714760422706604, "learning_rate": 0.0001, "loss": 0.0373, "step": 40110 }, { "epoch": 113.97727272727273, "grad_norm": 0.6751970648765564, "learning_rate": 0.0001, "loss": 0.0361, "step": 40120 }, { "epoch": 114.00568181818181, "grad_norm": 0.6371946334838867, "learning_rate": 0.0001, "loss": 0.0362, "step": 40130 }, { "epoch": 114.0340909090909, "grad_norm": 0.6978771686553955, "learning_rate": 0.0001, "loss": 0.0353, "step": 40140 }, { "epoch": 114.0625, "grad_norm": 0.598162829875946, "learning_rate": 0.0001, "loss": 0.0357, "step": 40150 }, { "epoch": 114.0909090909091, "grad_norm": 0.6158064007759094, "learning_rate": 0.0001, "loss": 0.0364, "step": 40160 }, { "epoch": 114.11931818181819, "grad_norm": 0.5208731889724731, "learning_rate": 0.0001, "loss": 0.0356, "step": 40170 }, { "epoch": 114.14772727272727, "grad_norm": 0.5302230715751648, "learning_rate": 0.0001, "loss": 0.0353, "step": 40180 }, { "epoch": 114.17613636363636, "grad_norm": 0.5512319803237915, "learning_rate": 0.0001, "loss": 0.0347, "step": 40190 }, { "epoch": 114.20454545454545, "grad_norm": 0.43418312072753906, "learning_rate": 0.0001, "loss": 0.0354, "step": 40200 }, { "epoch": 114.23295454545455, "grad_norm": 0.5973665118217468, "learning_rate": 0.0001, "loss": 0.0352, "step": 40210 }, { "epoch": 114.26136363636364, "grad_norm": 0.5855032801628113, "learning_rate": 0.0001, "loss": 0.0354, "step": 40220 }, { "epoch": 114.28977272727273, "grad_norm": 0.5427087545394897, "learning_rate": 0.0001, "loss": 0.035, "step": 40230 }, { "epoch": 114.31818181818181, "grad_norm": 0.7128482460975647, "learning_rate": 0.0001, "loss": 0.0352, "step": 40240 }, { "epoch": 114.3465909090909, "grad_norm": 0.4120272696018219, "learning_rate": 0.0001, "loss": 0.036, "step": 40250 }, { "epoch": 114.375, "grad_norm": 0.4165058135986328, "learning_rate": 0.0001, "loss": 0.035, "step": 40260 }, { "epoch": 114.4034090909091, "grad_norm": 0.4229552447795868, "learning_rate": 0.0001, "loss": 0.0353, "step": 40270 }, { "epoch": 114.43181818181819, "grad_norm": 0.526804506778717, "learning_rate": 0.0001, "loss": 0.035, "step": 40280 }, { "epoch": 114.46022727272727, "grad_norm": 0.5471256971359253, "learning_rate": 0.0001, "loss": 0.0355, "step": 40290 }, { "epoch": 114.48863636363636, "grad_norm": 0.7126163840293884, "learning_rate": 0.0001, "loss": 0.0336, "step": 40300 }, { "epoch": 114.51704545454545, "grad_norm": 0.5300715565681458, "learning_rate": 0.0001, "loss": 0.0351, "step": 40310 }, { "epoch": 114.54545454545455, "grad_norm": 0.5716768503189087, "learning_rate": 0.0001, "loss": 0.0346, "step": 40320 }, { "epoch": 114.57386363636364, "grad_norm": 0.5660290122032166, "learning_rate": 0.0001, "loss": 0.0354, "step": 40330 }, { "epoch": 114.60227272727273, "grad_norm": 0.4344553053379059, "learning_rate": 0.0001, "loss": 0.0349, "step": 40340 }, { "epoch": 114.63068181818181, "grad_norm": 0.5412406921386719, "learning_rate": 0.0001, "loss": 0.0358, "step": 40350 }, { "epoch": 114.6590909090909, "grad_norm": 0.6711289286613464, "learning_rate": 0.0001, "loss": 0.0364, "step": 40360 }, { "epoch": 114.6875, "grad_norm": 0.783184289932251, "learning_rate": 0.0001, "loss": 0.0358, "step": 40370 }, { "epoch": 114.7159090909091, "grad_norm": 0.819743812084198, "learning_rate": 0.0001, "loss": 0.0343, "step": 40380 }, { "epoch": 114.74431818181819, "grad_norm": 0.7587737441062927, "learning_rate": 0.0001, "loss": 0.0354, "step": 40390 }, { "epoch": 114.77272727272727, "grad_norm": 0.6675707101821899, "learning_rate": 0.0001, "loss": 0.0356, "step": 40400 }, { "epoch": 114.80113636363636, "grad_norm": 0.5723679065704346, "learning_rate": 0.0001, "loss": 0.0368, "step": 40410 }, { "epoch": 114.82954545454545, "grad_norm": 0.6985281109809875, "learning_rate": 0.0001, "loss": 0.0362, "step": 40420 }, { "epoch": 114.85795454545455, "grad_norm": 1.0145719051361084, "learning_rate": 0.0001, "loss": 0.0357, "step": 40430 }, { "epoch": 114.88636363636364, "grad_norm": 1.0415736436843872, "learning_rate": 0.0001, "loss": 0.0352, "step": 40440 }, { "epoch": 114.91477272727273, "grad_norm": 0.74834805727005, "learning_rate": 0.0001, "loss": 0.0362, "step": 40450 }, { "epoch": 114.94318181818181, "grad_norm": 1.022322654724121, "learning_rate": 0.0001, "loss": 0.0346, "step": 40460 }, { "epoch": 114.9715909090909, "grad_norm": 0.8849312663078308, "learning_rate": 0.0001, "loss": 0.0357, "step": 40470 }, { "epoch": 115.0, "grad_norm": 0.602628231048584, "learning_rate": 0.0001, "loss": 0.0347, "step": 40480 }, { "epoch": 115.0284090909091, "grad_norm": 0.8007441163063049, "learning_rate": 0.0001, "loss": 0.0366, "step": 40490 }, { "epoch": 115.05681818181819, "grad_norm": 0.8990688323974609, "learning_rate": 0.0001, "loss": 0.0349, "step": 40500 }, { "epoch": 115.08522727272727, "grad_norm": 0.8828384876251221, "learning_rate": 0.0001, "loss": 0.0345, "step": 40510 }, { "epoch": 115.11363636363636, "grad_norm": 1.0378295183181763, "learning_rate": 0.0001, "loss": 0.0344, "step": 40520 }, { "epoch": 115.14204545454545, "grad_norm": 1.0264496803283691, "learning_rate": 0.0001, "loss": 0.0352, "step": 40530 }, { "epoch": 115.17045454545455, "grad_norm": 1.1999590396881104, "learning_rate": 0.0001, "loss": 0.0343, "step": 40540 }, { "epoch": 115.19886363636364, "grad_norm": 0.8743994832038879, "learning_rate": 0.0001, "loss": 0.0346, "step": 40550 }, { "epoch": 115.22727272727273, "grad_norm": 0.5800889730453491, "learning_rate": 0.0001, "loss": 0.0352, "step": 40560 }, { "epoch": 115.25568181818181, "grad_norm": 0.8396671414375305, "learning_rate": 0.0001, "loss": 0.0354, "step": 40570 }, { "epoch": 115.2840909090909, "grad_norm": 0.8173549175262451, "learning_rate": 0.0001, "loss": 0.0345, "step": 40580 }, { "epoch": 115.3125, "grad_norm": 0.7395663857460022, "learning_rate": 0.0001, "loss": 0.035, "step": 40590 }, { "epoch": 115.3409090909091, "grad_norm": 0.6188920140266418, "learning_rate": 0.0001, "loss": 0.0352, "step": 40600 }, { "epoch": 115.36931818181819, "grad_norm": 0.5487187504768372, "learning_rate": 0.0001, "loss": 0.0363, "step": 40610 }, { "epoch": 115.39772727272727, "grad_norm": 0.5410171151161194, "learning_rate": 0.0001, "loss": 0.0354, "step": 40620 }, { "epoch": 115.42613636363636, "grad_norm": 0.5235713124275208, "learning_rate": 0.0001, "loss": 0.0352, "step": 40630 }, { "epoch": 115.45454545454545, "grad_norm": 0.7061126232147217, "learning_rate": 0.0001, "loss": 0.035, "step": 40640 }, { "epoch": 115.48295454545455, "grad_norm": 0.7242769598960876, "learning_rate": 0.0001, "loss": 0.0356, "step": 40650 }, { "epoch": 115.51136363636364, "grad_norm": 0.6157847046852112, "learning_rate": 0.0001, "loss": 0.0353, "step": 40660 }, { "epoch": 115.53977272727273, "grad_norm": 0.6777865886688232, "learning_rate": 0.0001, "loss": 0.0361, "step": 40670 }, { "epoch": 115.56818181818181, "grad_norm": 0.5488711595535278, "learning_rate": 0.0001, "loss": 0.0365, "step": 40680 }, { "epoch": 115.5965909090909, "grad_norm": 0.7855844497680664, "learning_rate": 0.0001, "loss": 0.036, "step": 40690 }, { "epoch": 115.625, "grad_norm": 0.640532910823822, "learning_rate": 0.0001, "loss": 0.0365, "step": 40700 }, { "epoch": 115.6534090909091, "grad_norm": 0.5868560671806335, "learning_rate": 0.0001, "loss": 0.038, "step": 40710 }, { "epoch": 115.68181818181819, "grad_norm": 0.8778387308120728, "learning_rate": 0.0001, "loss": 0.0354, "step": 40720 }, { "epoch": 115.71022727272727, "grad_norm": 0.6132923364639282, "learning_rate": 0.0001, "loss": 0.0364, "step": 40730 }, { "epoch": 115.73863636363636, "grad_norm": 0.6299883127212524, "learning_rate": 0.0001, "loss": 0.0357, "step": 40740 }, { "epoch": 115.76704545454545, "grad_norm": 0.6556488871574402, "learning_rate": 0.0001, "loss": 0.0356, "step": 40750 }, { "epoch": 115.79545454545455, "grad_norm": 0.7025827169418335, "learning_rate": 0.0001, "loss": 0.036, "step": 40760 }, { "epoch": 115.82386363636364, "grad_norm": 0.49530184268951416, "learning_rate": 0.0001, "loss": 0.037, "step": 40770 }, { "epoch": 115.85227272727273, "grad_norm": 0.6061105728149414, "learning_rate": 0.0001, "loss": 0.0356, "step": 40780 }, { "epoch": 115.88068181818181, "grad_norm": 0.5180947184562683, "learning_rate": 0.0001, "loss": 0.0366, "step": 40790 }, { "epoch": 115.9090909090909, "grad_norm": 0.584722101688385, "learning_rate": 0.0001, "loss": 0.0361, "step": 40800 }, { "epoch": 115.9375, "grad_norm": 0.5374404788017273, "learning_rate": 0.0001, "loss": 0.0353, "step": 40810 }, { "epoch": 115.9659090909091, "grad_norm": 0.5954856276512146, "learning_rate": 0.0001, "loss": 0.0357, "step": 40820 }, { "epoch": 115.99431818181819, "grad_norm": 0.5415761470794678, "learning_rate": 0.0001, "loss": 0.0372, "step": 40830 }, { "epoch": 116.02272727272727, "grad_norm": 0.6761354804039001, "learning_rate": 0.0001, "loss": 0.0372, "step": 40840 }, { "epoch": 116.05113636363636, "grad_norm": 0.5434542894363403, "learning_rate": 0.0001, "loss": 0.0355, "step": 40850 }, { "epoch": 116.07954545454545, "grad_norm": 0.67738276720047, "learning_rate": 0.0001, "loss": 0.036, "step": 40860 }, { "epoch": 116.10795454545455, "grad_norm": 0.5063143968582153, "learning_rate": 0.0001, "loss": 0.0357, "step": 40870 }, { "epoch": 116.13636363636364, "grad_norm": 0.642759382724762, "learning_rate": 0.0001, "loss": 0.0363, "step": 40880 }, { "epoch": 116.16477272727273, "grad_norm": 0.6486865878105164, "learning_rate": 0.0001, "loss": 0.0368, "step": 40890 }, { "epoch": 116.19318181818181, "grad_norm": 0.6188929080963135, "learning_rate": 0.0001, "loss": 0.0401, "step": 40900 }, { "epoch": 116.2215909090909, "grad_norm": 0.5677132606506348, "learning_rate": 0.0001, "loss": 0.0391, "step": 40910 }, { "epoch": 116.25, "grad_norm": 0.583200991153717, "learning_rate": 0.0001, "loss": 0.0379, "step": 40920 }, { "epoch": 116.2784090909091, "grad_norm": 0.6927564740180969, "learning_rate": 0.0001, "loss": 0.037, "step": 40930 }, { "epoch": 116.30681818181819, "grad_norm": 0.5694494247436523, "learning_rate": 0.0001, "loss": 0.0365, "step": 40940 }, { "epoch": 116.33522727272727, "grad_norm": 0.6229391098022461, "learning_rate": 0.0001, "loss": 0.0379, "step": 40950 }, { "epoch": 116.36363636363636, "grad_norm": 0.6101709008216858, "learning_rate": 0.0001, "loss": 0.0375, "step": 40960 }, { "epoch": 116.39204545454545, "grad_norm": 0.5544459819793701, "learning_rate": 0.0001, "loss": 0.0356, "step": 40970 }, { "epoch": 116.42045454545455, "grad_norm": 0.6003863215446472, "learning_rate": 0.0001, "loss": 0.0368, "step": 40980 }, { "epoch": 116.44886363636364, "grad_norm": 0.735579788684845, "learning_rate": 0.0001, "loss": 0.0352, "step": 40990 }, { "epoch": 116.47727272727273, "grad_norm": 0.5912768244743347, "learning_rate": 0.0001, "loss": 0.0356, "step": 41000 }, { "epoch": 116.50568181818181, "grad_norm": 0.5649675726890564, "learning_rate": 0.0001, "loss": 0.0357, "step": 41010 }, { "epoch": 116.5340909090909, "grad_norm": 0.9016214609146118, "learning_rate": 0.0001, "loss": 0.0354, "step": 41020 }, { "epoch": 116.5625, "grad_norm": 0.6981037259101868, "learning_rate": 0.0001, "loss": 0.0372, "step": 41030 }, { "epoch": 116.5909090909091, "grad_norm": 0.7360560297966003, "learning_rate": 0.0001, "loss": 0.0359, "step": 41040 }, { "epoch": 116.61931818181819, "grad_norm": 0.9419438242912292, "learning_rate": 0.0001, "loss": 0.0361, "step": 41050 }, { "epoch": 116.64772727272727, "grad_norm": 0.7652466297149658, "learning_rate": 0.0001, "loss": 0.0354, "step": 41060 }, { "epoch": 116.67613636363636, "grad_norm": 0.8323368430137634, "learning_rate": 0.0001, "loss": 0.0369, "step": 41070 }, { "epoch": 116.70454545454545, "grad_norm": 0.625653088092804, "learning_rate": 0.0001, "loss": 0.0348, "step": 41080 }, { "epoch": 116.73295454545455, "grad_norm": 0.8039312362670898, "learning_rate": 0.0001, "loss": 0.0362, "step": 41090 }, { "epoch": 116.76136363636364, "grad_norm": 0.7268533706665039, "learning_rate": 0.0001, "loss": 0.0341, "step": 41100 }, { "epoch": 116.78977272727273, "grad_norm": 0.7572269439697266, "learning_rate": 0.0001, "loss": 0.0354, "step": 41110 }, { "epoch": 116.81818181818181, "grad_norm": 0.695713460445404, "learning_rate": 0.0001, "loss": 0.0359, "step": 41120 }, { "epoch": 116.8465909090909, "grad_norm": 0.7137623429298401, "learning_rate": 0.0001, "loss": 0.0346, "step": 41130 }, { "epoch": 116.875, "grad_norm": 0.6217263340950012, "learning_rate": 0.0001, "loss": 0.0359, "step": 41140 }, { "epoch": 116.9034090909091, "grad_norm": 0.6812773942947388, "learning_rate": 0.0001, "loss": 0.0355, "step": 41150 }, { "epoch": 116.93181818181819, "grad_norm": 0.6269494891166687, "learning_rate": 0.0001, "loss": 0.0354, "step": 41160 }, { "epoch": 116.96022727272727, "grad_norm": 0.5813413262367249, "learning_rate": 0.0001, "loss": 0.0354, "step": 41170 }, { "epoch": 116.98863636363636, "grad_norm": 0.47251439094543457, "learning_rate": 0.0001, "loss": 0.0346, "step": 41180 }, { "epoch": 117.01704545454545, "grad_norm": 0.82036954164505, "learning_rate": 0.0001, "loss": 0.0352, "step": 41190 }, { "epoch": 117.04545454545455, "grad_norm": 0.8919968605041504, "learning_rate": 0.0001, "loss": 0.0374, "step": 41200 }, { "epoch": 117.07386363636364, "grad_norm": 0.5334138870239258, "learning_rate": 0.0001, "loss": 0.0356, "step": 41210 }, { "epoch": 117.10227272727273, "grad_norm": 0.4660698473453522, "learning_rate": 0.0001, "loss": 0.0353, "step": 41220 }, { "epoch": 117.13068181818181, "grad_norm": 0.6906890869140625, "learning_rate": 0.0001, "loss": 0.0358, "step": 41230 }, { "epoch": 117.1590909090909, "grad_norm": 0.8132134675979614, "learning_rate": 0.0001, "loss": 0.0352, "step": 41240 }, { "epoch": 117.1875, "grad_norm": 0.8870444297790527, "learning_rate": 0.0001, "loss": 0.0359, "step": 41250 }, { "epoch": 117.2159090909091, "grad_norm": 0.7599701285362244, "learning_rate": 0.0001, "loss": 0.0348, "step": 41260 }, { "epoch": 117.24431818181819, "grad_norm": 0.8724075555801392, "learning_rate": 0.0001, "loss": 0.0352, "step": 41270 }, { "epoch": 117.27272727272727, "grad_norm": 0.6574482917785645, "learning_rate": 0.0001, "loss": 0.0358, "step": 41280 }, { "epoch": 117.30113636363636, "grad_norm": 0.7404472231864929, "learning_rate": 0.0001, "loss": 0.0345, "step": 41290 }, { "epoch": 117.32954545454545, "grad_norm": 0.8209742903709412, "learning_rate": 0.0001, "loss": 0.0351, "step": 41300 }, { "epoch": 117.35795454545455, "grad_norm": 1.0132182836532593, "learning_rate": 0.0001, "loss": 0.0356, "step": 41310 }, { "epoch": 117.38636363636364, "grad_norm": 1.2387603521347046, "learning_rate": 0.0001, "loss": 0.0363, "step": 41320 }, { "epoch": 117.41477272727273, "grad_norm": 1.3428541421890259, "learning_rate": 0.0001, "loss": 0.0353, "step": 41330 }, { "epoch": 117.44318181818181, "grad_norm": 1.3659002780914307, "learning_rate": 0.0001, "loss": 0.0345, "step": 41340 }, { "epoch": 117.4715909090909, "grad_norm": 1.2876734733581543, "learning_rate": 0.0001, "loss": 0.0337, "step": 41350 }, { "epoch": 117.5, "grad_norm": 1.0242540836334229, "learning_rate": 0.0001, "loss": 0.0346, "step": 41360 }, { "epoch": 117.5284090909091, "grad_norm": 0.6913342475891113, "learning_rate": 0.0001, "loss": 0.0341, "step": 41370 }, { "epoch": 117.55681818181819, "grad_norm": 0.7995432019233704, "learning_rate": 0.0001, "loss": 0.0336, "step": 41380 }, { "epoch": 117.58522727272727, "grad_norm": 0.8866602182388306, "learning_rate": 0.0001, "loss": 0.0332, "step": 41390 }, { "epoch": 117.61363636363636, "grad_norm": 0.6081330180168152, "learning_rate": 0.0001, "loss": 0.0351, "step": 41400 }, { "epoch": 117.64204545454545, "grad_norm": 0.7164961099624634, "learning_rate": 0.0001, "loss": 0.0324, "step": 41410 }, { "epoch": 117.67045454545455, "grad_norm": 0.6436015963554382, "learning_rate": 0.0001, "loss": 0.0345, "step": 41420 }, { "epoch": 117.69886363636364, "grad_norm": 0.7437272667884827, "learning_rate": 0.0001, "loss": 0.0342, "step": 41430 }, { "epoch": 117.72727272727273, "grad_norm": 0.723030686378479, "learning_rate": 0.0001, "loss": 0.0347, "step": 41440 }, { "epoch": 117.75568181818181, "grad_norm": 0.7317346334457397, "learning_rate": 0.0001, "loss": 0.0344, "step": 41450 }, { "epoch": 117.7840909090909, "grad_norm": 0.7966662049293518, "learning_rate": 0.0001, "loss": 0.0349, "step": 41460 }, { "epoch": 117.8125, "grad_norm": 0.6614232659339905, "learning_rate": 0.0001, "loss": 0.0351, "step": 41470 }, { "epoch": 117.8409090909091, "grad_norm": 0.7168434858322144, "learning_rate": 0.0001, "loss": 0.0358, "step": 41480 }, { "epoch": 117.86931818181819, "grad_norm": 0.5962741374969482, "learning_rate": 0.0001, "loss": 0.0338, "step": 41490 }, { "epoch": 117.89772727272727, "grad_norm": 0.6524445414543152, "learning_rate": 0.0001, "loss": 0.035, "step": 41500 }, { "epoch": 117.92613636363636, "grad_norm": 0.4581736624240875, "learning_rate": 0.0001, "loss": 0.0345, "step": 41510 }, { "epoch": 117.95454545454545, "grad_norm": 0.6047722101211548, "learning_rate": 0.0001, "loss": 0.035, "step": 41520 }, { "epoch": 117.98295454545455, "grad_norm": 0.9901228547096252, "learning_rate": 0.0001, "loss": 0.0351, "step": 41530 }, { "epoch": 118.01136363636364, "grad_norm": 0.8791798949241638, "learning_rate": 0.0001, "loss": 0.034, "step": 41540 }, { "epoch": 118.03977272727273, "grad_norm": 0.727450966835022, "learning_rate": 0.0001, "loss": 0.0354, "step": 41550 }, { "epoch": 118.06818181818181, "grad_norm": 0.638188898563385, "learning_rate": 0.0001, "loss": 0.0343, "step": 41560 }, { "epoch": 118.0965909090909, "grad_norm": 0.7026476263999939, "learning_rate": 0.0001, "loss": 0.0331, "step": 41570 }, { "epoch": 118.125, "grad_norm": 0.7015557885169983, "learning_rate": 0.0001, "loss": 0.0356, "step": 41580 }, { "epoch": 118.1534090909091, "grad_norm": 0.7547580599784851, "learning_rate": 0.0001, "loss": 0.0348, "step": 41590 }, { "epoch": 118.18181818181819, "grad_norm": 0.6024928092956543, "learning_rate": 0.0001, "loss": 0.0342, "step": 41600 }, { "epoch": 118.21022727272727, "grad_norm": 0.7490041255950928, "learning_rate": 0.0001, "loss": 0.0363, "step": 41610 }, { "epoch": 118.23863636363636, "grad_norm": 0.540759265422821, "learning_rate": 0.0001, "loss": 0.0365, "step": 41620 }, { "epoch": 118.26704545454545, "grad_norm": 0.6325246691703796, "learning_rate": 0.0001, "loss": 0.0356, "step": 41630 }, { "epoch": 118.29545454545455, "grad_norm": 0.8828673362731934, "learning_rate": 0.0001, "loss": 0.0356, "step": 41640 }, { "epoch": 118.32386363636364, "grad_norm": 1.1287953853607178, "learning_rate": 0.0001, "loss": 0.0343, "step": 41650 }, { "epoch": 118.35227272727273, "grad_norm": 1.034847617149353, "learning_rate": 0.0001, "loss": 0.0359, "step": 41660 }, { "epoch": 118.38068181818181, "grad_norm": 0.6673224568367004, "learning_rate": 0.0001, "loss": 0.035, "step": 41670 }, { "epoch": 118.4090909090909, "grad_norm": 0.5625375509262085, "learning_rate": 0.0001, "loss": 0.0363, "step": 41680 }, { "epoch": 118.4375, "grad_norm": 0.759629487991333, "learning_rate": 0.0001, "loss": 0.0352, "step": 41690 }, { "epoch": 118.4659090909091, "grad_norm": 0.9632283449172974, "learning_rate": 0.0001, "loss": 0.0367, "step": 41700 }, { "epoch": 118.49431818181819, "grad_norm": 0.9728202819824219, "learning_rate": 0.0001, "loss": 0.0361, "step": 41710 }, { "epoch": 118.52272727272727, "grad_norm": 0.6623541712760925, "learning_rate": 0.0001, "loss": 0.0354, "step": 41720 }, { "epoch": 118.55113636363636, "grad_norm": 0.5848730206489563, "learning_rate": 0.0001, "loss": 0.0355, "step": 41730 }, { "epoch": 118.57954545454545, "grad_norm": 0.5939080715179443, "learning_rate": 0.0001, "loss": 0.0354, "step": 41740 }, { "epoch": 118.60795454545455, "grad_norm": 0.4702250063419342, "learning_rate": 0.0001, "loss": 0.0345, "step": 41750 }, { "epoch": 118.63636363636364, "grad_norm": 0.6767942905426025, "learning_rate": 0.0001, "loss": 0.0347, "step": 41760 }, { "epoch": 118.66477272727273, "grad_norm": 0.7504387497901917, "learning_rate": 0.0001, "loss": 0.0341, "step": 41770 }, { "epoch": 118.69318181818181, "grad_norm": 0.5079829096794128, "learning_rate": 0.0001, "loss": 0.0356, "step": 41780 }, { "epoch": 118.7215909090909, "grad_norm": 0.5891208648681641, "learning_rate": 0.0001, "loss": 0.0351, "step": 41790 }, { "epoch": 118.75, "grad_norm": 0.8426125049591064, "learning_rate": 0.0001, "loss": 0.0342, "step": 41800 }, { "epoch": 118.7784090909091, "grad_norm": 0.6993535757064819, "learning_rate": 0.0001, "loss": 0.0345, "step": 41810 }, { "epoch": 118.80681818181819, "grad_norm": 0.8501553535461426, "learning_rate": 0.0001, "loss": 0.0353, "step": 41820 }, { "epoch": 118.83522727272727, "grad_norm": 0.7962374091148376, "learning_rate": 0.0001, "loss": 0.0347, "step": 41830 }, { "epoch": 118.86363636363636, "grad_norm": 0.6130684018135071, "learning_rate": 0.0001, "loss": 0.0347, "step": 41840 }, { "epoch": 118.89204545454545, "grad_norm": 0.6343328952789307, "learning_rate": 0.0001, "loss": 0.0337, "step": 41850 }, { "epoch": 118.92045454545455, "grad_norm": 0.5049582123756409, "learning_rate": 0.0001, "loss": 0.0354, "step": 41860 }, { "epoch": 118.94886363636364, "grad_norm": 0.6091402769088745, "learning_rate": 0.0001, "loss": 0.0354, "step": 41870 }, { "epoch": 118.97727272727273, "grad_norm": 0.5600488781929016, "learning_rate": 0.0001, "loss": 0.034, "step": 41880 }, { "epoch": 119.00568181818181, "grad_norm": 0.6329433917999268, "learning_rate": 0.0001, "loss": 0.035, "step": 41890 }, { "epoch": 119.0340909090909, "grad_norm": 0.589053213596344, "learning_rate": 0.0001, "loss": 0.0324, "step": 41900 }, { "epoch": 119.0625, "grad_norm": 0.456606924533844, "learning_rate": 0.0001, "loss": 0.0346, "step": 41910 }, { "epoch": 119.0909090909091, "grad_norm": 0.5378241539001465, "learning_rate": 0.0001, "loss": 0.0336, "step": 41920 }, { "epoch": 119.11931818181819, "grad_norm": 0.6176024675369263, "learning_rate": 0.0001, "loss": 0.0341, "step": 41930 }, { "epoch": 119.14772727272727, "grad_norm": 0.50550776720047, "learning_rate": 0.0001, "loss": 0.0343, "step": 41940 }, { "epoch": 119.17613636363636, "grad_norm": 0.5080638527870178, "learning_rate": 0.0001, "loss": 0.0354, "step": 41950 }, { "epoch": 119.20454545454545, "grad_norm": 0.5049692392349243, "learning_rate": 0.0001, "loss": 0.0333, "step": 41960 }, { "epoch": 119.23295454545455, "grad_norm": 0.5414020419120789, "learning_rate": 0.0001, "loss": 0.0344, "step": 41970 }, { "epoch": 119.26136363636364, "grad_norm": 0.592162549495697, "learning_rate": 0.0001, "loss": 0.0354, "step": 41980 }, { "epoch": 119.28977272727273, "grad_norm": 1.7480474710464478, "learning_rate": 0.0001, "loss": 0.0342, "step": 41990 }, { "epoch": 119.31818181818181, "grad_norm": 1.2184784412384033, "learning_rate": 0.0001, "loss": 0.0353, "step": 42000 }, { "epoch": 119.3465909090909, "grad_norm": 1.2475852966308594, "learning_rate": 0.0001, "loss": 0.0342, "step": 42010 }, { "epoch": 119.375, "grad_norm": 1.83725905418396, "learning_rate": 0.0001, "loss": 0.0343, "step": 42020 }, { "epoch": 119.4034090909091, "grad_norm": 1.1545556783676147, "learning_rate": 0.0001, "loss": 0.0341, "step": 42030 }, { "epoch": 119.43181818181819, "grad_norm": 1.2570842504501343, "learning_rate": 0.0001, "loss": 0.0336, "step": 42040 }, { "epoch": 119.46022727272727, "grad_norm": 1.244055151939392, "learning_rate": 0.0001, "loss": 0.0336, "step": 42050 }, { "epoch": 119.48863636363636, "grad_norm": 0.766913652420044, "learning_rate": 0.0001, "loss": 0.0333, "step": 42060 }, { "epoch": 119.51704545454545, "grad_norm": 0.6588661074638367, "learning_rate": 0.0001, "loss": 0.0329, "step": 42070 }, { "epoch": 119.54545454545455, "grad_norm": 0.5511944890022278, "learning_rate": 0.0001, "loss": 0.0346, "step": 42080 }, { "epoch": 119.57386363636364, "grad_norm": 0.7311079502105713, "learning_rate": 0.0001, "loss": 0.0335, "step": 42090 }, { "epoch": 119.60227272727273, "grad_norm": 0.6783902049064636, "learning_rate": 0.0001, "loss": 0.0341, "step": 42100 }, { "epoch": 119.63068181818181, "grad_norm": 0.7213060855865479, "learning_rate": 0.0001, "loss": 0.035, "step": 42110 }, { "epoch": 119.6590909090909, "grad_norm": 0.6389514207839966, "learning_rate": 0.0001, "loss": 0.034, "step": 42120 }, { "epoch": 119.6875, "grad_norm": 0.6836166977882385, "learning_rate": 0.0001, "loss": 0.0337, "step": 42130 }, { "epoch": 119.7159090909091, "grad_norm": 0.6873879432678223, "learning_rate": 0.0001, "loss": 0.0351, "step": 42140 }, { "epoch": 119.74431818181819, "grad_norm": 0.6492112278938293, "learning_rate": 0.0001, "loss": 0.0356, "step": 42150 }, { "epoch": 119.77272727272727, "grad_norm": 0.6663733720779419, "learning_rate": 0.0001, "loss": 0.0348, "step": 42160 }, { "epoch": 119.80113636363636, "grad_norm": 1.1575038433074951, "learning_rate": 0.0001, "loss": 0.0351, "step": 42170 }, { "epoch": 119.82954545454545, "grad_norm": 0.878379225730896, "learning_rate": 0.0001, "loss": 0.0343, "step": 42180 }, { "epoch": 119.85795454545455, "grad_norm": 0.6646192073822021, "learning_rate": 0.0001, "loss": 0.0355, "step": 42190 }, { "epoch": 119.88636363636364, "grad_norm": 0.7406749129295349, "learning_rate": 0.0001, "loss": 0.0356, "step": 42200 }, { "epoch": 119.91477272727273, "grad_norm": 0.6425890326499939, "learning_rate": 0.0001, "loss": 0.0348, "step": 42210 }, { "epoch": 119.94318181818181, "grad_norm": 0.8351615071296692, "learning_rate": 0.0001, "loss": 0.0349, "step": 42220 }, { "epoch": 119.9715909090909, "grad_norm": 0.6505674719810486, "learning_rate": 0.0001, "loss": 0.0359, "step": 42230 }, { "epoch": 120.0, "grad_norm": 0.6099982857704163, "learning_rate": 0.0001, "loss": 0.0355, "step": 42240 }, { "epoch": 120.0284090909091, "grad_norm": 0.604333758354187, "learning_rate": 0.0001, "loss": 0.035, "step": 42250 }, { "epoch": 120.05681818181819, "grad_norm": 0.5962154269218445, "learning_rate": 0.0001, "loss": 0.0355, "step": 42260 }, { "epoch": 120.08522727272727, "grad_norm": 0.5833165645599365, "learning_rate": 0.0001, "loss": 0.0358, "step": 42270 }, { "epoch": 120.11363636363636, "grad_norm": 0.5957491397857666, "learning_rate": 0.0001, "loss": 0.0345, "step": 42280 }, { "epoch": 120.14204545454545, "grad_norm": 0.6122380495071411, "learning_rate": 0.0001, "loss": 0.0346, "step": 42290 }, { "epoch": 120.17045454545455, "grad_norm": 0.5932005643844604, "learning_rate": 0.0001, "loss": 0.0348, "step": 42300 }, { "epoch": 120.19886363636364, "grad_norm": 0.5959727168083191, "learning_rate": 0.0001, "loss": 0.0345, "step": 42310 }, { "epoch": 120.22727272727273, "grad_norm": 0.5512206554412842, "learning_rate": 0.0001, "loss": 0.0352, "step": 42320 }, { "epoch": 120.25568181818181, "grad_norm": 0.535960853099823, "learning_rate": 0.0001, "loss": 0.0349, "step": 42330 }, { "epoch": 120.2840909090909, "grad_norm": 0.5918856263160706, "learning_rate": 0.0001, "loss": 0.0353, "step": 42340 }, { "epoch": 120.3125, "grad_norm": 0.5420950055122375, "learning_rate": 0.0001, "loss": 0.0341, "step": 42350 }, { "epoch": 120.3409090909091, "grad_norm": 0.5215405225753784, "learning_rate": 0.0001, "loss": 0.0343, "step": 42360 }, { "epoch": 120.36931818181819, "grad_norm": 0.48384472727775574, "learning_rate": 0.0001, "loss": 0.0334, "step": 42370 }, { "epoch": 120.39772727272727, "grad_norm": 0.40733209252357483, "learning_rate": 0.0001, "loss": 0.0328, "step": 42380 }, { "epoch": 120.42613636363636, "grad_norm": 0.5102291703224182, "learning_rate": 0.0001, "loss": 0.0334, "step": 42390 }, { "epoch": 120.45454545454545, "grad_norm": 0.7866867184638977, "learning_rate": 0.0001, "loss": 0.0337, "step": 42400 }, { "epoch": 120.48295454545455, "grad_norm": 1.2717626094818115, "learning_rate": 0.0001, "loss": 0.0341, "step": 42410 }, { "epoch": 120.51136363636364, "grad_norm": 1.0195508003234863, "learning_rate": 0.0001, "loss": 0.0343, "step": 42420 }, { "epoch": 120.53977272727273, "grad_norm": 0.6073787212371826, "learning_rate": 0.0001, "loss": 0.0344, "step": 42430 }, { "epoch": 120.56818181818181, "grad_norm": 0.6645169854164124, "learning_rate": 0.0001, "loss": 0.0325, "step": 42440 }, { "epoch": 120.5965909090909, "grad_norm": 0.9627057313919067, "learning_rate": 0.0001, "loss": 0.0348, "step": 42450 }, { "epoch": 120.625, "grad_norm": 0.8808974623680115, "learning_rate": 0.0001, "loss": 0.0355, "step": 42460 }, { "epoch": 120.6534090909091, "grad_norm": 0.8835111856460571, "learning_rate": 0.0001, "loss": 0.0349, "step": 42470 }, { "epoch": 120.68181818181819, "grad_norm": 0.6539061665534973, "learning_rate": 0.0001, "loss": 0.0359, "step": 42480 }, { "epoch": 120.71022727272727, "grad_norm": 0.7671836018562317, "learning_rate": 0.0001, "loss": 0.0348, "step": 42490 }, { "epoch": 120.73863636363636, "grad_norm": 0.6868611574172974, "learning_rate": 0.0001, "loss": 0.0352, "step": 42500 }, { "epoch": 120.76704545454545, "grad_norm": 0.613646388053894, "learning_rate": 0.0001, "loss": 0.0343, "step": 42510 }, { "epoch": 120.79545454545455, "grad_norm": 0.6648271083831787, "learning_rate": 0.0001, "loss": 0.0351, "step": 42520 }, { "epoch": 120.82386363636364, "grad_norm": 0.5934839248657227, "learning_rate": 0.0001, "loss": 0.0344, "step": 42530 }, { "epoch": 120.85227272727273, "grad_norm": 0.6186235547065735, "learning_rate": 0.0001, "loss": 0.034, "step": 42540 }, { "epoch": 120.88068181818181, "grad_norm": 0.46539390087127686, "learning_rate": 0.0001, "loss": 0.034, "step": 42550 }, { "epoch": 120.9090909090909, "grad_norm": 0.5254285931587219, "learning_rate": 0.0001, "loss": 0.0351, "step": 42560 }, { "epoch": 120.9375, "grad_norm": 0.6108625531196594, "learning_rate": 0.0001, "loss": 0.0346, "step": 42570 }, { "epoch": 120.9659090909091, "grad_norm": 0.6213340759277344, "learning_rate": 0.0001, "loss": 0.0342, "step": 42580 }, { "epoch": 120.99431818181819, "grad_norm": 0.5728833675384521, "learning_rate": 0.0001, "loss": 0.0349, "step": 42590 }, { "epoch": 121.02272727272727, "grad_norm": 0.47747138142585754, "learning_rate": 0.0001, "loss": 0.0345, "step": 42600 }, { "epoch": 121.05113636363636, "grad_norm": 0.5983991026878357, "learning_rate": 0.0001, "loss": 0.0349, "step": 42610 }, { "epoch": 121.07954545454545, "grad_norm": 1.5157920122146606, "learning_rate": 0.0001, "loss": 0.0355, "step": 42620 }, { "epoch": 121.10795454545455, "grad_norm": 1.188115119934082, "learning_rate": 0.0001, "loss": 0.0345, "step": 42630 }, { "epoch": 121.13636363636364, "grad_norm": 0.8782476782798767, "learning_rate": 0.0001, "loss": 0.0339, "step": 42640 }, { "epoch": 121.16477272727273, "grad_norm": 1.138963222503662, "learning_rate": 0.0001, "loss": 0.0342, "step": 42650 }, { "epoch": 121.19318181818181, "grad_norm": 0.7090156674385071, "learning_rate": 0.0001, "loss": 0.035, "step": 42660 }, { "epoch": 121.2215909090909, "grad_norm": 1.0206100940704346, "learning_rate": 0.0001, "loss": 0.0341, "step": 42670 }, { "epoch": 121.25, "grad_norm": 0.8041273355484009, "learning_rate": 0.0001, "loss": 0.0349, "step": 42680 }, { "epoch": 121.2784090909091, "grad_norm": 0.7158668637275696, "learning_rate": 0.0001, "loss": 0.0355, "step": 42690 }, { "epoch": 121.30681818181819, "grad_norm": 0.6531086564064026, "learning_rate": 0.0001, "loss": 0.034, "step": 42700 }, { "epoch": 121.33522727272727, "grad_norm": 0.7232179641723633, "learning_rate": 0.0001, "loss": 0.0343, "step": 42710 }, { "epoch": 121.36363636363636, "grad_norm": 0.6331400275230408, "learning_rate": 0.0001, "loss": 0.0344, "step": 42720 }, { "epoch": 121.39204545454545, "grad_norm": 0.7535783052444458, "learning_rate": 0.0001, "loss": 0.0338, "step": 42730 }, { "epoch": 121.42045454545455, "grad_norm": 0.721626341342926, "learning_rate": 0.0001, "loss": 0.034, "step": 42740 }, { "epoch": 121.44886363636364, "grad_norm": 0.6356875896453857, "learning_rate": 0.0001, "loss": 0.0342, "step": 42750 }, { "epoch": 121.47727272727273, "grad_norm": 0.6131756901741028, "learning_rate": 0.0001, "loss": 0.034, "step": 42760 }, { "epoch": 121.50568181818181, "grad_norm": 0.577969491481781, "learning_rate": 0.0001, "loss": 0.0356, "step": 42770 }, { "epoch": 121.5340909090909, "grad_norm": 0.5389184951782227, "learning_rate": 0.0001, "loss": 0.0339, "step": 42780 }, { "epoch": 121.5625, "grad_norm": 0.6019951105117798, "learning_rate": 0.0001, "loss": 0.0351, "step": 42790 }, { "epoch": 121.5909090909091, "grad_norm": 0.5243552923202515, "learning_rate": 0.0001, "loss": 0.0338, "step": 42800 }, { "epoch": 121.61931818181819, "grad_norm": 0.5662066340446472, "learning_rate": 0.0001, "loss": 0.0337, "step": 42810 }, { "epoch": 121.64772727272727, "grad_norm": 0.673316478729248, "learning_rate": 0.0001, "loss": 0.0339, "step": 42820 }, { "epoch": 121.67613636363636, "grad_norm": 0.7163783311843872, "learning_rate": 0.0001, "loss": 0.0337, "step": 42830 }, { "epoch": 121.70454545454545, "grad_norm": 0.8090773820877075, "learning_rate": 0.0001, "loss": 0.036, "step": 42840 }, { "epoch": 121.73295454545455, "grad_norm": 0.97684246301651, "learning_rate": 0.0001, "loss": 0.0364, "step": 42850 }, { "epoch": 121.76136363636364, "grad_norm": 0.8781417012214661, "learning_rate": 0.0001, "loss": 0.035, "step": 42860 }, { "epoch": 121.78977272727273, "grad_norm": 0.6750118136405945, "learning_rate": 0.0001, "loss": 0.0351, "step": 42870 }, { "epoch": 121.81818181818181, "grad_norm": 0.7185530662536621, "learning_rate": 0.0001, "loss": 0.0345, "step": 42880 }, { "epoch": 121.8465909090909, "grad_norm": 0.7951401472091675, "learning_rate": 0.0001, "loss": 0.0361, "step": 42890 }, { "epoch": 121.875, "grad_norm": 0.6739190816879272, "learning_rate": 0.0001, "loss": 0.0345, "step": 42900 }, { "epoch": 121.9034090909091, "grad_norm": 0.5545329451560974, "learning_rate": 0.0001, "loss": 0.0358, "step": 42910 }, { "epoch": 121.93181818181819, "grad_norm": 0.7083759307861328, "learning_rate": 0.0001, "loss": 0.0347, "step": 42920 }, { "epoch": 121.96022727272727, "grad_norm": 0.5985446572303772, "learning_rate": 0.0001, "loss": 0.0362, "step": 42930 }, { "epoch": 121.98863636363636, "grad_norm": 0.6231947541236877, "learning_rate": 0.0001, "loss": 0.0347, "step": 42940 }, { "epoch": 122.01704545454545, "grad_norm": 0.6297039985656738, "learning_rate": 0.0001, "loss": 0.036, "step": 42950 }, { "epoch": 122.04545454545455, "grad_norm": 0.5027703046798706, "learning_rate": 0.0001, "loss": 0.0347, "step": 42960 }, { "epoch": 122.07386363636364, "grad_norm": 0.5628147721290588, "learning_rate": 0.0001, "loss": 0.0341, "step": 42970 }, { "epoch": 122.10227272727273, "grad_norm": 0.5758391618728638, "learning_rate": 0.0001, "loss": 0.0361, "step": 42980 }, { "epoch": 122.13068181818181, "grad_norm": 0.5796226263046265, "learning_rate": 0.0001, "loss": 0.0341, "step": 42990 }, { "epoch": 122.1590909090909, "grad_norm": 0.6459672451019287, "learning_rate": 0.0001, "loss": 0.0345, "step": 43000 }, { "epoch": 122.1875, "grad_norm": 0.7128278613090515, "learning_rate": 0.0001, "loss": 0.0348, "step": 43010 }, { "epoch": 122.2159090909091, "grad_norm": 0.522715151309967, "learning_rate": 0.0001, "loss": 0.0359, "step": 43020 }, { "epoch": 122.24431818181819, "grad_norm": 0.6805800795555115, "learning_rate": 0.0001, "loss": 0.0343, "step": 43030 }, { "epoch": 122.27272727272727, "grad_norm": 0.8094000220298767, "learning_rate": 0.0001, "loss": 0.0343, "step": 43040 }, { "epoch": 122.30113636363636, "grad_norm": 0.617563784122467, "learning_rate": 0.0001, "loss": 0.035, "step": 43050 }, { "epoch": 122.32954545454545, "grad_norm": 0.6483767628669739, "learning_rate": 0.0001, "loss": 0.0353, "step": 43060 }, { "epoch": 122.35795454545455, "grad_norm": 0.6512372493743896, "learning_rate": 0.0001, "loss": 0.0353, "step": 43070 }, { "epoch": 122.38636363636364, "grad_norm": 0.7526001334190369, "learning_rate": 0.0001, "loss": 0.0348, "step": 43080 }, { "epoch": 122.41477272727273, "grad_norm": 0.7624586820602417, "learning_rate": 0.0001, "loss": 0.0342, "step": 43090 }, { "epoch": 122.44318181818181, "grad_norm": 0.5687345862388611, "learning_rate": 0.0001, "loss": 0.034, "step": 43100 }, { "epoch": 122.4715909090909, "grad_norm": 0.60966956615448, "learning_rate": 0.0001, "loss": 0.0343, "step": 43110 }, { "epoch": 122.5, "grad_norm": 0.8027265667915344, "learning_rate": 0.0001, "loss": 0.0344, "step": 43120 }, { "epoch": 122.5284090909091, "grad_norm": 0.5919138193130493, "learning_rate": 0.0001, "loss": 0.0345, "step": 43130 }, { "epoch": 122.55681818181819, "grad_norm": 0.6522887349128723, "learning_rate": 0.0001, "loss": 0.0343, "step": 43140 }, { "epoch": 122.58522727272727, "grad_norm": 0.6283818483352661, "learning_rate": 0.0001, "loss": 0.0343, "step": 43150 }, { "epoch": 122.61363636363636, "grad_norm": 0.8638715147972107, "learning_rate": 0.0001, "loss": 0.0354, "step": 43160 }, { "epoch": 122.64204545454545, "grad_norm": 0.9759404063224792, "learning_rate": 0.0001, "loss": 0.0338, "step": 43170 }, { "epoch": 122.67045454545455, "grad_norm": 0.9701955914497375, "learning_rate": 0.0001, "loss": 0.035, "step": 43180 }, { "epoch": 122.69886363636364, "grad_norm": 0.6288473606109619, "learning_rate": 0.0001, "loss": 0.0336, "step": 43190 }, { "epoch": 122.72727272727273, "grad_norm": 0.798579216003418, "learning_rate": 0.0001, "loss": 0.0343, "step": 43200 }, { "epoch": 122.75568181818181, "grad_norm": 0.7400290369987488, "learning_rate": 0.0001, "loss": 0.0349, "step": 43210 }, { "epoch": 122.7840909090909, "grad_norm": 0.6623448133468628, "learning_rate": 0.0001, "loss": 0.0346, "step": 43220 }, { "epoch": 122.8125, "grad_norm": 0.645901083946228, "learning_rate": 0.0001, "loss": 0.0345, "step": 43230 }, { "epoch": 122.8409090909091, "grad_norm": 0.7637975811958313, "learning_rate": 0.0001, "loss": 0.034, "step": 43240 }, { "epoch": 122.86931818181819, "grad_norm": 0.763724684715271, "learning_rate": 0.0001, "loss": 0.0322, "step": 43250 }, { "epoch": 122.89772727272727, "grad_norm": 0.6974006295204163, "learning_rate": 0.0001, "loss": 0.0356, "step": 43260 }, { "epoch": 122.92613636363636, "grad_norm": 0.749366283416748, "learning_rate": 0.0001, "loss": 0.0337, "step": 43270 }, { "epoch": 122.95454545454545, "grad_norm": 0.6450308561325073, "learning_rate": 0.0001, "loss": 0.0333, "step": 43280 }, { "epoch": 122.98295454545455, "grad_norm": 0.7591984868049622, "learning_rate": 0.0001, "loss": 0.0329, "step": 43290 }, { "epoch": 123.01136363636364, "grad_norm": 0.6023324728012085, "learning_rate": 0.0001, "loss": 0.0333, "step": 43300 }, { "epoch": 123.03977272727273, "grad_norm": 0.6163926720619202, "learning_rate": 0.0001, "loss": 0.0349, "step": 43310 }, { "epoch": 123.06818181818181, "grad_norm": 0.5046906471252441, "learning_rate": 0.0001, "loss": 0.0337, "step": 43320 }, { "epoch": 123.0965909090909, "grad_norm": 0.6651236414909363, "learning_rate": 0.0001, "loss": 0.0344, "step": 43330 }, { "epoch": 123.125, "grad_norm": 0.9277121424674988, "learning_rate": 0.0001, "loss": 0.0342, "step": 43340 }, { "epoch": 123.1534090909091, "grad_norm": 0.847284734249115, "learning_rate": 0.0001, "loss": 0.0344, "step": 43350 }, { "epoch": 123.18181818181819, "grad_norm": 0.7165888547897339, "learning_rate": 0.0001, "loss": 0.033, "step": 43360 }, { "epoch": 123.21022727272727, "grad_norm": 0.5978952646255493, "learning_rate": 0.0001, "loss": 0.0344, "step": 43370 }, { "epoch": 123.23863636363636, "grad_norm": 0.8708299398422241, "learning_rate": 0.0001, "loss": 0.0346, "step": 43380 }, { "epoch": 123.26704545454545, "grad_norm": 0.6764265298843384, "learning_rate": 0.0001, "loss": 0.0342, "step": 43390 }, { "epoch": 123.29545454545455, "grad_norm": 0.9524985551834106, "learning_rate": 0.0001, "loss": 0.0346, "step": 43400 }, { "epoch": 123.32386363636364, "grad_norm": 0.856083333492279, "learning_rate": 0.0001, "loss": 0.0343, "step": 43410 }, { "epoch": 123.35227272727273, "grad_norm": 0.8081461191177368, "learning_rate": 0.0001, "loss": 0.0351, "step": 43420 }, { "epoch": 123.38068181818181, "grad_norm": 0.8981258273124695, "learning_rate": 0.0001, "loss": 0.0345, "step": 43430 }, { "epoch": 123.4090909090909, "grad_norm": 0.7242816090583801, "learning_rate": 0.0001, "loss": 0.034, "step": 43440 }, { "epoch": 123.4375, "grad_norm": 0.983867347240448, "learning_rate": 0.0001, "loss": 0.0351, "step": 43450 }, { "epoch": 123.4659090909091, "grad_norm": 0.9016323089599609, "learning_rate": 0.0001, "loss": 0.0351, "step": 43460 }, { "epoch": 123.49431818181819, "grad_norm": 0.6101610660552979, "learning_rate": 0.0001, "loss": 0.0338, "step": 43470 }, { "epoch": 123.52272727272727, "grad_norm": 0.6919850707054138, "learning_rate": 0.0001, "loss": 0.0333, "step": 43480 }, { "epoch": 123.55113636363636, "grad_norm": 0.668318510055542, "learning_rate": 0.0001, "loss": 0.0349, "step": 43490 }, { "epoch": 123.57954545454545, "grad_norm": 0.6016658544540405, "learning_rate": 0.0001, "loss": 0.0338, "step": 43500 }, { "epoch": 123.60795454545455, "grad_norm": 0.6011306643486023, "learning_rate": 0.0001, "loss": 0.0337, "step": 43510 }, { "epoch": 123.63636363636364, "grad_norm": 0.5712813138961792, "learning_rate": 0.0001, "loss": 0.0329, "step": 43520 }, { "epoch": 123.66477272727273, "grad_norm": 0.5258704423904419, "learning_rate": 0.0001, "loss": 0.0328, "step": 43530 }, { "epoch": 123.69318181818181, "grad_norm": 0.6852856278419495, "learning_rate": 0.0001, "loss": 0.0342, "step": 43540 }, { "epoch": 123.7215909090909, "grad_norm": 0.6821662187576294, "learning_rate": 0.0001, "loss": 0.0338, "step": 43550 }, { "epoch": 123.75, "grad_norm": 0.9406258463859558, "learning_rate": 0.0001, "loss": 0.0336, "step": 43560 }, { "epoch": 123.7784090909091, "grad_norm": 0.6541404128074646, "learning_rate": 0.0001, "loss": 0.0344, "step": 43570 }, { "epoch": 123.80681818181819, "grad_norm": 0.6775745153427124, "learning_rate": 0.0001, "loss": 0.0344, "step": 43580 }, { "epoch": 123.83522727272727, "grad_norm": 0.8705558180809021, "learning_rate": 0.0001, "loss": 0.0344, "step": 43590 }, { "epoch": 123.86363636363636, "grad_norm": 0.8164870142936707, "learning_rate": 0.0001, "loss": 0.034, "step": 43600 }, { "epoch": 123.89204545454545, "grad_norm": 0.6702988147735596, "learning_rate": 0.0001, "loss": 0.0345, "step": 43610 }, { "epoch": 123.92045454545455, "grad_norm": 0.8119181990623474, "learning_rate": 0.0001, "loss": 0.0339, "step": 43620 }, { "epoch": 123.94886363636364, "grad_norm": 0.5951023101806641, "learning_rate": 0.0001, "loss": 0.0341, "step": 43630 }, { "epoch": 123.97727272727273, "grad_norm": 0.7227770090103149, "learning_rate": 0.0001, "loss": 0.0349, "step": 43640 }, { "epoch": 124.00568181818181, "grad_norm": 0.6837309002876282, "learning_rate": 0.0001, "loss": 0.0344, "step": 43650 }, { "epoch": 124.0340909090909, "grad_norm": 0.5477321743965149, "learning_rate": 0.0001, "loss": 0.0332, "step": 43660 }, { "epoch": 124.0625, "grad_norm": 0.5354117751121521, "learning_rate": 0.0001, "loss": 0.0338, "step": 43670 }, { "epoch": 124.0909090909091, "grad_norm": 0.6974219083786011, "learning_rate": 0.0001, "loss": 0.0356, "step": 43680 }, { "epoch": 124.11931818181819, "grad_norm": 0.6769513487815857, "learning_rate": 0.0001, "loss": 0.0337, "step": 43690 }, { "epoch": 124.14772727272727, "grad_norm": 0.7007016539573669, "learning_rate": 0.0001, "loss": 0.0353, "step": 43700 }, { "epoch": 124.17613636363636, "grad_norm": 0.6461037993431091, "learning_rate": 0.0001, "loss": 0.0337, "step": 43710 }, { "epoch": 124.20454545454545, "grad_norm": 0.6010048389434814, "learning_rate": 0.0001, "loss": 0.034, "step": 43720 }, { "epoch": 124.23295454545455, "grad_norm": 0.5394991040229797, "learning_rate": 0.0001, "loss": 0.0335, "step": 43730 }, { "epoch": 124.26136363636364, "grad_norm": 0.48307615518569946, "learning_rate": 0.0001, "loss": 0.035, "step": 43740 }, { "epoch": 124.28977272727273, "grad_norm": 0.5091378092765808, "learning_rate": 0.0001, "loss": 0.0343, "step": 43750 }, { "epoch": 124.31818181818181, "grad_norm": 0.5722032785415649, "learning_rate": 0.0001, "loss": 0.0338, "step": 43760 }, { "epoch": 124.3465909090909, "grad_norm": 0.5467521548271179, "learning_rate": 0.0001, "loss": 0.0354, "step": 43770 }, { "epoch": 124.375, "grad_norm": 0.49909818172454834, "learning_rate": 0.0001, "loss": 0.0345, "step": 43780 }, { "epoch": 124.4034090909091, "grad_norm": 0.6383428573608398, "learning_rate": 0.0001, "loss": 0.0342, "step": 43790 }, { "epoch": 124.43181818181819, "grad_norm": 0.7279382348060608, "learning_rate": 0.0001, "loss": 0.0346, "step": 43800 }, { "epoch": 124.46022727272727, "grad_norm": 0.4871160089969635, "learning_rate": 0.0001, "loss": 0.0343, "step": 43810 }, { "epoch": 124.48863636363636, "grad_norm": 0.5173508524894714, "learning_rate": 0.0001, "loss": 0.0346, "step": 43820 }, { "epoch": 124.51704545454545, "grad_norm": 0.5603179335594177, "learning_rate": 0.0001, "loss": 0.0332, "step": 43830 }, { "epoch": 124.54545454545455, "grad_norm": 0.4561804533004761, "learning_rate": 0.0001, "loss": 0.0351, "step": 43840 }, { "epoch": 124.57386363636364, "grad_norm": 0.6842979788780212, "learning_rate": 0.0001, "loss": 0.0346, "step": 43850 }, { "epoch": 124.60227272727273, "grad_norm": 0.6346740126609802, "learning_rate": 0.0001, "loss": 0.0337, "step": 43860 }, { "epoch": 124.63068181818181, "grad_norm": 0.5946170091629028, "learning_rate": 0.0001, "loss": 0.033, "step": 43870 }, { "epoch": 124.6590909090909, "grad_norm": 0.4583919942378998, "learning_rate": 0.0001, "loss": 0.035, "step": 43880 }, { "epoch": 124.6875, "grad_norm": 0.8892697095870972, "learning_rate": 0.0001, "loss": 0.0343, "step": 43890 }, { "epoch": 124.7159090909091, "grad_norm": 0.7108684182167053, "learning_rate": 0.0001, "loss": 0.0339, "step": 43900 }, { "epoch": 124.74431818181819, "grad_norm": 0.5772215127944946, "learning_rate": 0.0001, "loss": 0.0344, "step": 43910 }, { "epoch": 124.77272727272727, "grad_norm": 0.6915180683135986, "learning_rate": 0.0001, "loss": 0.0346, "step": 43920 }, { "epoch": 124.80113636363636, "grad_norm": 0.8463635444641113, "learning_rate": 0.0001, "loss": 0.034, "step": 43930 }, { "epoch": 124.82954545454545, "grad_norm": 0.700923502445221, "learning_rate": 0.0001, "loss": 0.0341, "step": 43940 }, { "epoch": 124.85795454545455, "grad_norm": 0.6794731616973877, "learning_rate": 0.0001, "loss": 0.0343, "step": 43950 }, { "epoch": 124.88636363636364, "grad_norm": 0.6753823161125183, "learning_rate": 0.0001, "loss": 0.0349, "step": 43960 }, { "epoch": 124.91477272727273, "grad_norm": 0.7288954257965088, "learning_rate": 0.0001, "loss": 0.0343, "step": 43970 }, { "epoch": 124.94318181818181, "grad_norm": 0.8429861664772034, "learning_rate": 0.0001, "loss": 0.0339, "step": 43980 }, { "epoch": 124.9715909090909, "grad_norm": 0.8071476817131042, "learning_rate": 0.0001, "loss": 0.0337, "step": 43990 }, { "epoch": 125.0, "grad_norm": 0.535753607749939, "learning_rate": 0.0001, "loss": 0.0333, "step": 44000 }, { "epoch": 125.0284090909091, "grad_norm": 0.6149882078170776, "learning_rate": 0.0001, "loss": 0.0345, "step": 44010 }, { "epoch": 125.05681818181819, "grad_norm": 0.4777073860168457, "learning_rate": 0.0001, "loss": 0.0333, "step": 44020 }, { "epoch": 125.08522727272727, "grad_norm": 0.5367774367332458, "learning_rate": 0.0001, "loss": 0.0339, "step": 44030 }, { "epoch": 125.11363636363636, "grad_norm": 0.7416982054710388, "learning_rate": 0.0001, "loss": 0.0342, "step": 44040 }, { "epoch": 125.14204545454545, "grad_norm": 0.5535955429077148, "learning_rate": 0.0001, "loss": 0.0345, "step": 44050 }, { "epoch": 125.17045454545455, "grad_norm": 0.5561586022377014, "learning_rate": 0.0001, "loss": 0.0337, "step": 44060 }, { "epoch": 125.19886363636364, "grad_norm": 0.6382668018341064, "learning_rate": 0.0001, "loss": 0.0343, "step": 44070 }, { "epoch": 125.22727272727273, "grad_norm": 0.7232362031936646, "learning_rate": 0.0001, "loss": 0.0339, "step": 44080 }, { "epoch": 125.25568181818181, "grad_norm": 0.5948888659477234, "learning_rate": 0.0001, "loss": 0.034, "step": 44090 }, { "epoch": 125.2840909090909, "grad_norm": 0.6899495124816895, "learning_rate": 0.0001, "loss": 0.0349, "step": 44100 }, { "epoch": 125.3125, "grad_norm": 0.612220287322998, "learning_rate": 0.0001, "loss": 0.035, "step": 44110 }, { "epoch": 125.3409090909091, "grad_norm": 0.731208086013794, "learning_rate": 0.0001, "loss": 0.0345, "step": 44120 }, { "epoch": 125.36931818181819, "grad_norm": 0.6531153917312622, "learning_rate": 0.0001, "loss": 0.0331, "step": 44130 }, { "epoch": 125.39772727272727, "grad_norm": 0.662437379360199, "learning_rate": 0.0001, "loss": 0.034, "step": 44140 }, { "epoch": 125.42613636363636, "grad_norm": 0.6035165786743164, "learning_rate": 0.0001, "loss": 0.0331, "step": 44150 }, { "epoch": 125.45454545454545, "grad_norm": 0.6566755771636963, "learning_rate": 0.0001, "loss": 0.0339, "step": 44160 }, { "epoch": 125.48295454545455, "grad_norm": 0.80877286195755, "learning_rate": 0.0001, "loss": 0.0333, "step": 44170 }, { "epoch": 125.51136363636364, "grad_norm": 0.7452892065048218, "learning_rate": 0.0001, "loss": 0.0344, "step": 44180 }, { "epoch": 125.53977272727273, "grad_norm": 0.7200114727020264, "learning_rate": 0.0001, "loss": 0.0342, "step": 44190 }, { "epoch": 125.56818181818181, "grad_norm": 0.5483570098876953, "learning_rate": 0.0001, "loss": 0.0342, "step": 44200 }, { "epoch": 125.5965909090909, "grad_norm": 0.7039602398872375, "learning_rate": 0.0001, "loss": 0.0337, "step": 44210 }, { "epoch": 125.625, "grad_norm": 0.5269778966903687, "learning_rate": 0.0001, "loss": 0.0349, "step": 44220 }, { "epoch": 125.6534090909091, "grad_norm": 0.5524904131889343, "learning_rate": 0.0001, "loss": 0.035, "step": 44230 }, { "epoch": 125.68181818181819, "grad_norm": 0.6056973934173584, "learning_rate": 0.0001, "loss": 0.0334, "step": 44240 }, { "epoch": 125.71022727272727, "grad_norm": 0.7236668467521667, "learning_rate": 0.0001, "loss": 0.035, "step": 44250 }, { "epoch": 125.73863636363636, "grad_norm": 0.6657916903495789, "learning_rate": 0.0001, "loss": 0.0331, "step": 44260 }, { "epoch": 125.76704545454545, "grad_norm": 0.7973979711532593, "learning_rate": 0.0001, "loss": 0.0342, "step": 44270 }, { "epoch": 125.79545454545455, "grad_norm": 0.7450402975082397, "learning_rate": 0.0001, "loss": 0.0337, "step": 44280 }, { "epoch": 125.82386363636364, "grad_norm": 0.8265330791473389, "learning_rate": 0.0001, "loss": 0.034, "step": 44290 }, { "epoch": 125.85227272727273, "grad_norm": 0.49795201420783997, "learning_rate": 0.0001, "loss": 0.0321, "step": 44300 }, { "epoch": 125.88068181818181, "grad_norm": 0.5793216824531555, "learning_rate": 0.0001, "loss": 0.0336, "step": 44310 }, { "epoch": 125.9090909090909, "grad_norm": 0.5494784116744995, "learning_rate": 0.0001, "loss": 0.0338, "step": 44320 }, { "epoch": 125.9375, "grad_norm": 0.7741979360580444, "learning_rate": 0.0001, "loss": 0.0337, "step": 44330 }, { "epoch": 125.9659090909091, "grad_norm": 0.5874280333518982, "learning_rate": 0.0001, "loss": 0.0341, "step": 44340 }, { "epoch": 125.99431818181819, "grad_norm": 0.5626118183135986, "learning_rate": 0.0001, "loss": 0.0341, "step": 44350 }, { "epoch": 126.02272727272727, "grad_norm": 0.5672938823699951, "learning_rate": 0.0001, "loss": 0.0336, "step": 44360 }, { "epoch": 126.05113636363636, "grad_norm": 0.5379377603530884, "learning_rate": 0.0001, "loss": 0.0336, "step": 44370 }, { "epoch": 126.07954545454545, "grad_norm": 0.6178792715072632, "learning_rate": 0.0001, "loss": 0.0333, "step": 44380 }, { "epoch": 126.10795454545455, "grad_norm": 0.4595714509487152, "learning_rate": 0.0001, "loss": 0.0323, "step": 44390 }, { "epoch": 126.13636363636364, "grad_norm": 0.5440025925636292, "learning_rate": 0.0001, "loss": 0.033, "step": 44400 }, { "epoch": 126.16477272727273, "grad_norm": 0.447454035282135, "learning_rate": 0.0001, "loss": 0.0347, "step": 44410 }, { "epoch": 126.19318181818181, "grad_norm": 0.5264921188354492, "learning_rate": 0.0001, "loss": 0.0342, "step": 44420 }, { "epoch": 126.2215909090909, "grad_norm": 0.8086947202682495, "learning_rate": 0.0001, "loss": 0.0343, "step": 44430 }, { "epoch": 126.25, "grad_norm": 0.743067741394043, "learning_rate": 0.0001, "loss": 0.0332, "step": 44440 }, { "epoch": 126.2784090909091, "grad_norm": 0.5720465779304504, "learning_rate": 0.0001, "loss": 0.034, "step": 44450 }, { "epoch": 126.30681818181819, "grad_norm": 0.6255154609680176, "learning_rate": 0.0001, "loss": 0.035, "step": 44460 }, { "epoch": 126.33522727272727, "grad_norm": 0.7044152617454529, "learning_rate": 0.0001, "loss": 0.0335, "step": 44470 }, { "epoch": 126.36363636363636, "grad_norm": 0.7242276668548584, "learning_rate": 0.0001, "loss": 0.0336, "step": 44480 }, { "epoch": 126.39204545454545, "grad_norm": 0.5152789354324341, "learning_rate": 0.0001, "loss": 0.0348, "step": 44490 }, { "epoch": 126.42045454545455, "grad_norm": 0.8477872610092163, "learning_rate": 0.0001, "loss": 0.0344, "step": 44500 }, { "epoch": 126.44886363636364, "grad_norm": 1.2072283029556274, "learning_rate": 0.0001, "loss": 0.0338, "step": 44510 }, { "epoch": 126.47727272727273, "grad_norm": 1.0745333433151245, "learning_rate": 0.0001, "loss": 0.0337, "step": 44520 }, { "epoch": 126.50568181818181, "grad_norm": 0.6237938404083252, "learning_rate": 0.0001, "loss": 0.0338, "step": 44530 }, { "epoch": 126.5340909090909, "grad_norm": 1.150746464729309, "learning_rate": 0.0001, "loss": 0.0335, "step": 44540 }, { "epoch": 126.5625, "grad_norm": 0.6744441390037537, "learning_rate": 0.0001, "loss": 0.0328, "step": 44550 }, { "epoch": 126.5909090909091, "grad_norm": 0.8944215178489685, "learning_rate": 0.0001, "loss": 0.0335, "step": 44560 }, { "epoch": 126.61931818181819, "grad_norm": 0.8610298037528992, "learning_rate": 0.0001, "loss": 0.033, "step": 44570 }, { "epoch": 126.64772727272727, "grad_norm": 1.040744423866272, "learning_rate": 0.0001, "loss": 0.0337, "step": 44580 }, { "epoch": 126.67613636363636, "grad_norm": 0.8564578890800476, "learning_rate": 0.0001, "loss": 0.0334, "step": 44590 }, { "epoch": 126.70454545454545, "grad_norm": 0.9337520599365234, "learning_rate": 0.0001, "loss": 0.0337, "step": 44600 }, { "epoch": 126.73295454545455, "grad_norm": 0.8132588863372803, "learning_rate": 0.0001, "loss": 0.032, "step": 44610 }, { "epoch": 126.76136363636364, "grad_norm": 0.9135181903839111, "learning_rate": 0.0001, "loss": 0.0332, "step": 44620 }, { "epoch": 126.78977272727273, "grad_norm": 1.0999176502227783, "learning_rate": 0.0001, "loss": 0.0338, "step": 44630 }, { "epoch": 126.81818181818181, "grad_norm": 0.9005665183067322, "learning_rate": 0.0001, "loss": 0.0341, "step": 44640 }, { "epoch": 126.8465909090909, "grad_norm": 0.9482970833778381, "learning_rate": 0.0001, "loss": 0.033, "step": 44650 }, { "epoch": 126.875, "grad_norm": 0.708218514919281, "learning_rate": 0.0001, "loss": 0.0343, "step": 44660 }, { "epoch": 126.9034090909091, "grad_norm": 0.8633854389190674, "learning_rate": 0.0001, "loss": 0.0349, "step": 44670 }, { "epoch": 126.93181818181819, "grad_norm": 0.7367269992828369, "learning_rate": 0.0001, "loss": 0.0342, "step": 44680 }, { "epoch": 126.96022727272727, "grad_norm": 0.954519510269165, "learning_rate": 0.0001, "loss": 0.0327, "step": 44690 }, { "epoch": 126.98863636363636, "grad_norm": 1.0101993083953857, "learning_rate": 0.0001, "loss": 0.0339, "step": 44700 }, { "epoch": 127.01704545454545, "grad_norm": 1.0522290468215942, "learning_rate": 0.0001, "loss": 0.0326, "step": 44710 }, { "epoch": 127.04545454545455, "grad_norm": 1.0212730169296265, "learning_rate": 0.0001, "loss": 0.0328, "step": 44720 }, { "epoch": 127.07386363636364, "grad_norm": 0.8338181972503662, "learning_rate": 0.0001, "loss": 0.0328, "step": 44730 }, { "epoch": 127.10227272727273, "grad_norm": 0.8435046672821045, "learning_rate": 0.0001, "loss": 0.0326, "step": 44740 }, { "epoch": 127.13068181818181, "grad_norm": 0.6481569409370422, "learning_rate": 0.0001, "loss": 0.0323, "step": 44750 }, { "epoch": 127.1590909090909, "grad_norm": 0.525007426738739, "learning_rate": 0.0001, "loss": 0.0334, "step": 44760 }, { "epoch": 127.1875, "grad_norm": 0.7917028069496155, "learning_rate": 0.0001, "loss": 0.0336, "step": 44770 }, { "epoch": 127.2159090909091, "grad_norm": 0.6355962157249451, "learning_rate": 0.0001, "loss": 0.0327, "step": 44780 }, { "epoch": 127.24431818181819, "grad_norm": 0.4666050970554352, "learning_rate": 0.0001, "loss": 0.0331, "step": 44790 }, { "epoch": 127.27272727272727, "grad_norm": 0.6988716125488281, "learning_rate": 0.0001, "loss": 0.0333, "step": 44800 }, { "epoch": 127.30113636363636, "grad_norm": 0.599835216999054, "learning_rate": 0.0001, "loss": 0.0319, "step": 44810 }, { "epoch": 127.32954545454545, "grad_norm": 0.7451519966125488, "learning_rate": 0.0001, "loss": 0.0332, "step": 44820 }, { "epoch": 127.35795454545455, "grad_norm": 0.7405692934989929, "learning_rate": 0.0001, "loss": 0.0334, "step": 44830 }, { "epoch": 127.38636363636364, "grad_norm": 0.7457634806632996, "learning_rate": 0.0001, "loss": 0.0328, "step": 44840 }, { "epoch": 127.41477272727273, "grad_norm": 0.575497031211853, "learning_rate": 0.0001, "loss": 0.0354, "step": 44850 }, { "epoch": 127.44318181818181, "grad_norm": 0.6736639738082886, "learning_rate": 0.0001, "loss": 0.0353, "step": 44860 }, { "epoch": 127.4715909090909, "grad_norm": 0.7018943428993225, "learning_rate": 0.0001, "loss": 0.0342, "step": 44870 }, { "epoch": 127.5, "grad_norm": 0.6599423289299011, "learning_rate": 0.0001, "loss": 0.0338, "step": 44880 }, { "epoch": 127.5284090909091, "grad_norm": 0.6989256739616394, "learning_rate": 0.0001, "loss": 0.0353, "step": 44890 }, { "epoch": 127.55681818181819, "grad_norm": 0.6219071745872498, "learning_rate": 0.0001, "loss": 0.0349, "step": 44900 }, { "epoch": 127.58522727272727, "grad_norm": 0.6060497164726257, "learning_rate": 0.0001, "loss": 0.0351, "step": 44910 }, { "epoch": 127.61363636363636, "grad_norm": 0.8178271651268005, "learning_rate": 0.0001, "loss": 0.0342, "step": 44920 }, { "epoch": 127.64204545454545, "grad_norm": 0.6442059874534607, "learning_rate": 0.0001, "loss": 0.0348, "step": 44930 }, { "epoch": 127.67045454545455, "grad_norm": 0.8080576062202454, "learning_rate": 0.0001, "loss": 0.0332, "step": 44940 }, { "epoch": 127.69886363636364, "grad_norm": 0.8716291189193726, "learning_rate": 0.0001, "loss": 0.0347, "step": 44950 }, { "epoch": 127.72727272727273, "grad_norm": 0.6577211022377014, "learning_rate": 0.0001, "loss": 0.0343, "step": 44960 }, { "epoch": 127.75568181818181, "grad_norm": 0.6145163774490356, "learning_rate": 0.0001, "loss": 0.0338, "step": 44970 }, { "epoch": 127.7840909090909, "grad_norm": 0.6429737210273743, "learning_rate": 0.0001, "loss": 0.0338, "step": 44980 }, { "epoch": 127.8125, "grad_norm": 1.2113999128341675, "learning_rate": 0.0001, "loss": 0.0345, "step": 44990 }, { "epoch": 127.8409090909091, "grad_norm": 1.2781754732131958, "learning_rate": 0.0001, "loss": 0.0341, "step": 45000 }, { "epoch": 127.86931818181819, "grad_norm": 0.8394537568092346, "learning_rate": 0.0001, "loss": 0.0331, "step": 45010 }, { "epoch": 127.89772727272727, "grad_norm": 1.1930190324783325, "learning_rate": 0.0001, "loss": 0.0336, "step": 45020 }, { "epoch": 127.92613636363636, "grad_norm": 0.6906962990760803, "learning_rate": 0.0001, "loss": 0.0334, "step": 45030 }, { "epoch": 127.95454545454545, "grad_norm": 0.891907811164856, "learning_rate": 0.0001, "loss": 0.0341, "step": 45040 }, { "epoch": 127.98295454545455, "grad_norm": 1.1798697710037231, "learning_rate": 0.0001, "loss": 0.0345, "step": 45050 }, { "epoch": 128.01136363636363, "grad_norm": 1.0021605491638184, "learning_rate": 0.0001, "loss": 0.0331, "step": 45060 }, { "epoch": 128.03977272727272, "grad_norm": 1.0694570541381836, "learning_rate": 0.0001, "loss": 0.0346, "step": 45070 }, { "epoch": 128.0681818181818, "grad_norm": 0.9267218708992004, "learning_rate": 0.0001, "loss": 0.0344, "step": 45080 }, { "epoch": 128.0965909090909, "grad_norm": 0.6276006698608398, "learning_rate": 0.0001, "loss": 0.0346, "step": 45090 }, { "epoch": 128.125, "grad_norm": 0.6347649693489075, "learning_rate": 0.0001, "loss": 0.0342, "step": 45100 }, { "epoch": 128.1534090909091, "grad_norm": 0.6764646768569946, "learning_rate": 0.0001, "loss": 0.0357, "step": 45110 }, { "epoch": 128.1818181818182, "grad_norm": 0.6223496794700623, "learning_rate": 0.0001, "loss": 0.0332, "step": 45120 }, { "epoch": 128.21022727272728, "grad_norm": 0.7356517910957336, "learning_rate": 0.0001, "loss": 0.0351, "step": 45130 }, { "epoch": 128.23863636363637, "grad_norm": 0.9081064462661743, "learning_rate": 0.0001, "loss": 0.0354, "step": 45140 }, { "epoch": 128.26704545454547, "grad_norm": 0.8737921118736267, "learning_rate": 0.0001, "loss": 0.0347, "step": 45150 }, { "epoch": 128.29545454545453, "grad_norm": 0.9559502005577087, "learning_rate": 0.0001, "loss": 0.0361, "step": 45160 }, { "epoch": 128.32386363636363, "grad_norm": 0.9400319457054138, "learning_rate": 0.0001, "loss": 0.0342, "step": 45170 }, { "epoch": 128.35227272727272, "grad_norm": 0.9618202447891235, "learning_rate": 0.0001, "loss": 0.0345, "step": 45180 }, { "epoch": 128.3806818181818, "grad_norm": 0.8962883353233337, "learning_rate": 0.0001, "loss": 0.0336, "step": 45190 }, { "epoch": 128.4090909090909, "grad_norm": 0.6514441967010498, "learning_rate": 0.0001, "loss": 0.0339, "step": 45200 }, { "epoch": 128.4375, "grad_norm": 0.7900486588478088, "learning_rate": 0.0001, "loss": 0.0342, "step": 45210 }, { "epoch": 128.4659090909091, "grad_norm": 1.0713646411895752, "learning_rate": 0.0001, "loss": 0.0331, "step": 45220 }, { "epoch": 128.4943181818182, "grad_norm": 0.7977399230003357, "learning_rate": 0.0001, "loss": 0.0338, "step": 45230 }, { "epoch": 128.52272727272728, "grad_norm": 0.7408506870269775, "learning_rate": 0.0001, "loss": 0.0343, "step": 45240 }, { "epoch": 128.55113636363637, "grad_norm": 0.7208636999130249, "learning_rate": 0.0001, "loss": 0.0347, "step": 45250 }, { "epoch": 128.57954545454547, "grad_norm": 0.9026774764060974, "learning_rate": 0.0001, "loss": 0.0337, "step": 45260 }, { "epoch": 128.60795454545453, "grad_norm": 0.7447446584701538, "learning_rate": 0.0001, "loss": 0.034, "step": 45270 }, { "epoch": 128.63636363636363, "grad_norm": 0.6284778714179993, "learning_rate": 0.0001, "loss": 0.0327, "step": 45280 }, { "epoch": 128.66477272727272, "grad_norm": 0.773349404335022, "learning_rate": 0.0001, "loss": 0.0342, "step": 45290 }, { "epoch": 128.6931818181818, "grad_norm": 0.652649462223053, "learning_rate": 0.0001, "loss": 0.0326, "step": 45300 }, { "epoch": 128.7215909090909, "grad_norm": 0.664982795715332, "learning_rate": 0.0001, "loss": 0.033, "step": 45310 }, { "epoch": 128.75, "grad_norm": 0.9043540954589844, "learning_rate": 0.0001, "loss": 0.0335, "step": 45320 }, { "epoch": 128.7784090909091, "grad_norm": 0.6659265160560608, "learning_rate": 0.0001, "loss": 0.0323, "step": 45330 }, { "epoch": 128.8068181818182, "grad_norm": 0.6651855111122131, "learning_rate": 0.0001, "loss": 0.0335, "step": 45340 }, { "epoch": 128.83522727272728, "grad_norm": 0.6218985915184021, "learning_rate": 0.0001, "loss": 0.0332, "step": 45350 }, { "epoch": 128.86363636363637, "grad_norm": 0.7133499979972839, "learning_rate": 0.0001, "loss": 0.034, "step": 45360 }, { "epoch": 128.89204545454547, "grad_norm": 0.6537865996360779, "learning_rate": 0.0001, "loss": 0.0329, "step": 45370 }, { "epoch": 128.92045454545453, "grad_norm": 0.7740278840065002, "learning_rate": 0.0001, "loss": 0.0326, "step": 45380 }, { "epoch": 128.94886363636363, "grad_norm": 0.7691712379455566, "learning_rate": 0.0001, "loss": 0.0355, "step": 45390 }, { "epoch": 128.97727272727272, "grad_norm": 0.7746933698654175, "learning_rate": 0.0001, "loss": 0.0337, "step": 45400 }, { "epoch": 129.0056818181818, "grad_norm": 0.7170177102088928, "learning_rate": 0.0001, "loss": 0.0337, "step": 45410 }, { "epoch": 129.0340909090909, "grad_norm": 0.6429873108863831, "learning_rate": 0.0001, "loss": 0.0335, "step": 45420 }, { "epoch": 129.0625, "grad_norm": 0.8797420263290405, "learning_rate": 0.0001, "loss": 0.0348, "step": 45430 }, { "epoch": 129.0909090909091, "grad_norm": 0.6143895983695984, "learning_rate": 0.0001, "loss": 0.0343, "step": 45440 }, { "epoch": 129.1193181818182, "grad_norm": 0.7619710564613342, "learning_rate": 0.0001, "loss": 0.033, "step": 45450 }, { "epoch": 129.14772727272728, "grad_norm": 0.5125098824501038, "learning_rate": 0.0001, "loss": 0.0344, "step": 45460 }, { "epoch": 129.17613636363637, "grad_norm": 0.7374431490898132, "learning_rate": 0.0001, "loss": 0.0339, "step": 45470 }, { "epoch": 129.20454545454547, "grad_norm": 0.6298514604568481, "learning_rate": 0.0001, "loss": 0.0336, "step": 45480 }, { "epoch": 129.23295454545453, "grad_norm": 0.570999801158905, "learning_rate": 0.0001, "loss": 0.0342, "step": 45490 }, { "epoch": 129.26136363636363, "grad_norm": 0.5362004041671753, "learning_rate": 0.0001, "loss": 0.0342, "step": 45500 }, { "epoch": 129.28977272727272, "grad_norm": 0.6400303840637207, "learning_rate": 0.0001, "loss": 0.0352, "step": 45510 }, { "epoch": 129.3181818181818, "grad_norm": 0.772942066192627, "learning_rate": 0.0001, "loss": 0.0336, "step": 45520 }, { "epoch": 129.3465909090909, "grad_norm": 0.5503178238868713, "learning_rate": 0.0001, "loss": 0.0346, "step": 45530 }, { "epoch": 129.375, "grad_norm": 0.5466687083244324, "learning_rate": 0.0001, "loss": 0.0353, "step": 45540 }, { "epoch": 129.4034090909091, "grad_norm": 0.6772168874740601, "learning_rate": 0.0001, "loss": 0.0338, "step": 45550 }, { "epoch": 129.4318181818182, "grad_norm": 0.6691098809242249, "learning_rate": 0.0001, "loss": 0.0336, "step": 45560 }, { "epoch": 129.46022727272728, "grad_norm": 0.8726401925086975, "learning_rate": 0.0001, "loss": 0.0341, "step": 45570 }, { "epoch": 129.48863636363637, "grad_norm": 1.072825312614441, "learning_rate": 0.0001, "loss": 0.0312, "step": 45580 }, { "epoch": 129.51704545454547, "grad_norm": 1.0415953397750854, "learning_rate": 0.0001, "loss": 0.0339, "step": 45590 }, { "epoch": 129.54545454545453, "grad_norm": 0.5183255672454834, "learning_rate": 0.0001, "loss": 0.0338, "step": 45600 }, { "epoch": 129.57386363636363, "grad_norm": 0.8744351267814636, "learning_rate": 0.0001, "loss": 0.0349, "step": 45610 }, { "epoch": 129.60227272727272, "grad_norm": 0.8066695928573608, "learning_rate": 0.0001, "loss": 0.0326, "step": 45620 }, { "epoch": 129.6306818181818, "grad_norm": 0.7792837619781494, "learning_rate": 0.0001, "loss": 0.035, "step": 45630 }, { "epoch": 129.6590909090909, "grad_norm": 0.8309561610221863, "learning_rate": 0.0001, "loss": 0.0351, "step": 45640 }, { "epoch": 129.6875, "grad_norm": 0.7426981925964355, "learning_rate": 0.0001, "loss": 0.0337, "step": 45650 }, { "epoch": 129.7159090909091, "grad_norm": 0.7349977493286133, "learning_rate": 0.0001, "loss": 0.0339, "step": 45660 }, { "epoch": 129.7443181818182, "grad_norm": 0.6687122583389282, "learning_rate": 0.0001, "loss": 0.0335, "step": 45670 }, { "epoch": 129.77272727272728, "grad_norm": 0.6240840554237366, "learning_rate": 0.0001, "loss": 0.0327, "step": 45680 }, { "epoch": 129.80113636363637, "grad_norm": 0.6070581078529358, "learning_rate": 0.0001, "loss": 0.0322, "step": 45690 }, { "epoch": 129.82954545454547, "grad_norm": 0.6064611673355103, "learning_rate": 0.0001, "loss": 0.0326, "step": 45700 }, { "epoch": 129.85795454545453, "grad_norm": 0.8292164206504822, "learning_rate": 0.0001, "loss": 0.0329, "step": 45710 }, { "epoch": 129.88636363636363, "grad_norm": 0.6600549221038818, "learning_rate": 0.0001, "loss": 0.0334, "step": 45720 }, { "epoch": 129.91477272727272, "grad_norm": 0.8599357604980469, "learning_rate": 0.0001, "loss": 0.0335, "step": 45730 }, { "epoch": 129.9431818181818, "grad_norm": 0.6765527725219727, "learning_rate": 0.0001, "loss": 0.033, "step": 45740 }, { "epoch": 129.9715909090909, "grad_norm": 0.7478293776512146, "learning_rate": 0.0001, "loss": 0.0324, "step": 45750 }, { "epoch": 130.0, "grad_norm": 0.5045310258865356, "learning_rate": 0.0001, "loss": 0.0319, "step": 45760 }, { "epoch": 130.0284090909091, "grad_norm": 0.5499379634857178, "learning_rate": 0.0001, "loss": 0.033, "step": 45770 }, { "epoch": 130.0568181818182, "grad_norm": 0.5516977906227112, "learning_rate": 0.0001, "loss": 0.0327, "step": 45780 }, { "epoch": 130.08522727272728, "grad_norm": 0.4980061948299408, "learning_rate": 0.0001, "loss": 0.033, "step": 45790 }, { "epoch": 130.11363636363637, "grad_norm": 0.47144022583961487, "learning_rate": 0.0001, "loss": 0.0324, "step": 45800 }, { "epoch": 130.14204545454547, "grad_norm": 0.6652446985244751, "learning_rate": 0.0001, "loss": 0.0341, "step": 45810 }, { "epoch": 130.17045454545453, "grad_norm": 0.5668972730636597, "learning_rate": 0.0001, "loss": 0.0327, "step": 45820 }, { "epoch": 130.19886363636363, "grad_norm": 0.5128690004348755, "learning_rate": 0.0001, "loss": 0.0322, "step": 45830 }, { "epoch": 130.22727272727272, "grad_norm": 0.47686073184013367, "learning_rate": 0.0001, "loss": 0.034, "step": 45840 }, { "epoch": 130.2556818181818, "grad_norm": 0.7872583270072937, "learning_rate": 0.0001, "loss": 0.033, "step": 45850 }, { "epoch": 130.2840909090909, "grad_norm": 0.6706736087799072, "learning_rate": 0.0001, "loss": 0.0346, "step": 45860 }, { "epoch": 130.3125, "grad_norm": 0.6146107912063599, "learning_rate": 0.0001, "loss": 0.0329, "step": 45870 }, { "epoch": 130.3409090909091, "grad_norm": 0.7997154593467712, "learning_rate": 0.0001, "loss": 0.0334, "step": 45880 }, { "epoch": 130.3693181818182, "grad_norm": 0.8617053031921387, "learning_rate": 0.0001, "loss": 0.0328, "step": 45890 }, { "epoch": 130.39772727272728, "grad_norm": 0.8967577219009399, "learning_rate": 0.0001, "loss": 0.0339, "step": 45900 }, { "epoch": 130.42613636363637, "grad_norm": 0.7063471674919128, "learning_rate": 0.0001, "loss": 0.0339, "step": 45910 }, { "epoch": 130.45454545454547, "grad_norm": 0.6364906430244446, "learning_rate": 0.0001, "loss": 0.0332, "step": 45920 }, { "epoch": 130.48295454545453, "grad_norm": 0.5470952987670898, "learning_rate": 0.0001, "loss": 0.0324, "step": 45930 }, { "epoch": 130.51136363636363, "grad_norm": 0.8126204013824463, "learning_rate": 0.0001, "loss": 0.0325, "step": 45940 }, { "epoch": 130.53977272727272, "grad_norm": 0.9707944989204407, "learning_rate": 0.0001, "loss": 0.0336, "step": 45950 }, { "epoch": 130.5681818181818, "grad_norm": 0.777581512928009, "learning_rate": 0.0001, "loss": 0.0344, "step": 45960 }, { "epoch": 130.5965909090909, "grad_norm": 0.49622127413749695, "learning_rate": 0.0001, "loss": 0.0318, "step": 45970 }, { "epoch": 130.625, "grad_norm": 0.7890464067459106, "learning_rate": 0.0001, "loss": 0.0341, "step": 45980 }, { "epoch": 130.6534090909091, "grad_norm": 0.6376560926437378, "learning_rate": 0.0001, "loss": 0.0332, "step": 45990 }, { "epoch": 130.6818181818182, "grad_norm": 0.5111783742904663, "learning_rate": 0.0001, "loss": 0.0337, "step": 46000 }, { "epoch": 130.71022727272728, "grad_norm": 0.6418190002441406, "learning_rate": 0.0001, "loss": 0.0343, "step": 46010 }, { "epoch": 130.73863636363637, "grad_norm": 0.5361145734786987, "learning_rate": 0.0001, "loss": 0.0326, "step": 46020 }, { "epoch": 130.76704545454547, "grad_norm": 0.5517832040786743, "learning_rate": 0.0001, "loss": 0.033, "step": 46030 }, { "epoch": 130.79545454545453, "grad_norm": 0.6392123103141785, "learning_rate": 0.0001, "loss": 0.0326, "step": 46040 }, { "epoch": 130.82386363636363, "grad_norm": 0.4958942234516144, "learning_rate": 0.0001, "loss": 0.0327, "step": 46050 }, { "epoch": 130.85227272727272, "grad_norm": 0.5867140293121338, "learning_rate": 0.0001, "loss": 0.0328, "step": 46060 }, { "epoch": 130.8806818181818, "grad_norm": 0.666496217250824, "learning_rate": 0.0001, "loss": 0.0326, "step": 46070 }, { "epoch": 130.9090909090909, "grad_norm": 0.6093063950538635, "learning_rate": 0.0001, "loss": 0.0331, "step": 46080 }, { "epoch": 130.9375, "grad_norm": 0.6793018579483032, "learning_rate": 0.0001, "loss": 0.034, "step": 46090 }, { "epoch": 130.9659090909091, "grad_norm": 0.8787961602210999, "learning_rate": 0.0001, "loss": 0.0327, "step": 46100 }, { "epoch": 130.9943181818182, "grad_norm": 0.8491075038909912, "learning_rate": 0.0001, "loss": 0.033, "step": 46110 }, { "epoch": 131.02272727272728, "grad_norm": 0.8769429922103882, "learning_rate": 0.0001, "loss": 0.033, "step": 46120 }, { "epoch": 131.05113636363637, "grad_norm": 0.6816815137863159, "learning_rate": 0.0001, "loss": 0.0324, "step": 46130 }, { "epoch": 131.07954545454547, "grad_norm": 0.716789722442627, "learning_rate": 0.0001, "loss": 0.0335, "step": 46140 }, { "epoch": 131.10795454545453, "grad_norm": 0.5310009717941284, "learning_rate": 0.0001, "loss": 0.0334, "step": 46150 }, { "epoch": 131.13636363636363, "grad_norm": 0.5277566909790039, "learning_rate": 0.0001, "loss": 0.0335, "step": 46160 }, { "epoch": 131.16477272727272, "grad_norm": 0.48119422793388367, "learning_rate": 0.0001, "loss": 0.0338, "step": 46170 }, { "epoch": 131.1931818181818, "grad_norm": 0.510347843170166, "learning_rate": 0.0001, "loss": 0.0338, "step": 46180 }, { "epoch": 131.2215909090909, "grad_norm": 0.5207124948501587, "learning_rate": 0.0001, "loss": 0.0337, "step": 46190 }, { "epoch": 131.25, "grad_norm": 0.5676156878471375, "learning_rate": 0.0001, "loss": 0.0323, "step": 46200 }, { "epoch": 131.2784090909091, "grad_norm": 0.5713900327682495, "learning_rate": 0.0001, "loss": 0.0335, "step": 46210 }, { "epoch": 131.3068181818182, "grad_norm": 0.6059330701828003, "learning_rate": 0.0001, "loss": 0.0324, "step": 46220 }, { "epoch": 131.33522727272728, "grad_norm": 0.5800050497055054, "learning_rate": 0.0001, "loss": 0.0341, "step": 46230 }, { "epoch": 131.36363636363637, "grad_norm": 0.5849801301956177, "learning_rate": 0.0001, "loss": 0.0338, "step": 46240 }, { "epoch": 131.39204545454547, "grad_norm": 0.46115511655807495, "learning_rate": 0.0001, "loss": 0.0328, "step": 46250 }, { "epoch": 131.42045454545453, "grad_norm": 0.6311548948287964, "learning_rate": 0.0001, "loss": 0.0328, "step": 46260 }, { "epoch": 131.44886363636363, "grad_norm": 0.4590200185775757, "learning_rate": 0.0001, "loss": 0.032, "step": 46270 }, { "epoch": 131.47727272727272, "grad_norm": 0.4637441635131836, "learning_rate": 0.0001, "loss": 0.0332, "step": 46280 }, { "epoch": 131.5056818181818, "grad_norm": 0.5604123473167419, "learning_rate": 0.0001, "loss": 0.0326, "step": 46290 }, { "epoch": 131.5340909090909, "grad_norm": 0.7339373826980591, "learning_rate": 0.0001, "loss": 0.0329, "step": 46300 }, { "epoch": 131.5625, "grad_norm": 0.49434801936149597, "learning_rate": 0.0001, "loss": 0.0328, "step": 46310 }, { "epoch": 131.5909090909091, "grad_norm": 0.5729549527168274, "learning_rate": 0.0001, "loss": 0.0329, "step": 46320 }, { "epoch": 131.6193181818182, "grad_norm": 0.5489758253097534, "learning_rate": 0.0001, "loss": 0.0335, "step": 46330 }, { "epoch": 131.64772727272728, "grad_norm": 0.5991235375404358, "learning_rate": 0.0001, "loss": 0.0333, "step": 46340 }, { "epoch": 131.67613636363637, "grad_norm": 0.5655492544174194, "learning_rate": 0.0001, "loss": 0.0327, "step": 46350 }, { "epoch": 131.70454545454547, "grad_norm": 0.5596151947975159, "learning_rate": 0.0001, "loss": 0.0332, "step": 46360 }, { "epoch": 131.73295454545453, "grad_norm": 0.6359032988548279, "learning_rate": 0.0001, "loss": 0.0317, "step": 46370 }, { "epoch": 131.76136363636363, "grad_norm": 0.6330766677856445, "learning_rate": 0.0001, "loss": 0.0313, "step": 46380 }, { "epoch": 131.78977272727272, "grad_norm": 0.7287530303001404, "learning_rate": 0.0001, "loss": 0.0331, "step": 46390 }, { "epoch": 131.8181818181818, "grad_norm": 0.4960668087005615, "learning_rate": 0.0001, "loss": 0.0337, "step": 46400 }, { "epoch": 131.8465909090909, "grad_norm": 0.7417987585067749, "learning_rate": 0.0001, "loss": 0.0327, "step": 46410 }, { "epoch": 131.875, "grad_norm": 0.5909314155578613, "learning_rate": 0.0001, "loss": 0.0332, "step": 46420 }, { "epoch": 131.9034090909091, "grad_norm": 0.4722643494606018, "learning_rate": 0.0001, "loss": 0.0325, "step": 46430 }, { "epoch": 131.9318181818182, "grad_norm": 0.5753642320632935, "learning_rate": 0.0001, "loss": 0.0328, "step": 46440 }, { "epoch": 131.96022727272728, "grad_norm": 0.5663985013961792, "learning_rate": 0.0001, "loss": 0.0335, "step": 46450 }, { "epoch": 131.98863636363637, "grad_norm": 0.5400167107582092, "learning_rate": 0.0001, "loss": 0.0328, "step": 46460 }, { "epoch": 132.01704545454547, "grad_norm": 0.46016308665275574, "learning_rate": 0.0001, "loss": 0.0331, "step": 46470 }, { "epoch": 132.04545454545453, "grad_norm": 0.5892532467842102, "learning_rate": 0.0001, "loss": 0.0326, "step": 46480 }, { "epoch": 132.07386363636363, "grad_norm": 0.5685364603996277, "learning_rate": 0.0001, "loss": 0.0328, "step": 46490 }, { "epoch": 132.10227272727272, "grad_norm": 0.5203407406806946, "learning_rate": 0.0001, "loss": 0.0332, "step": 46500 }, { "epoch": 132.1306818181818, "grad_norm": 0.5543718338012695, "learning_rate": 0.0001, "loss": 0.0328, "step": 46510 }, { "epoch": 132.1590909090909, "grad_norm": 0.624247670173645, "learning_rate": 0.0001, "loss": 0.0328, "step": 46520 }, { "epoch": 132.1875, "grad_norm": 0.6007071137428284, "learning_rate": 0.0001, "loss": 0.0324, "step": 46530 }, { "epoch": 132.2159090909091, "grad_norm": 0.68288254737854, "learning_rate": 0.0001, "loss": 0.0333, "step": 46540 }, { "epoch": 132.2443181818182, "grad_norm": 1.2552138566970825, "learning_rate": 0.0001, "loss": 0.0331, "step": 46550 }, { "epoch": 132.27272727272728, "grad_norm": 0.9271278381347656, "learning_rate": 0.0001, "loss": 0.0335, "step": 46560 }, { "epoch": 132.30113636363637, "grad_norm": 0.8253664374351501, "learning_rate": 0.0001, "loss": 0.0321, "step": 46570 }, { "epoch": 132.32954545454547, "grad_norm": 0.5965460538864136, "learning_rate": 0.0001, "loss": 0.0331, "step": 46580 }, { "epoch": 132.35795454545453, "grad_norm": 0.6545840501785278, "learning_rate": 0.0001, "loss": 0.0315, "step": 46590 }, { "epoch": 132.38636363636363, "grad_norm": 0.6397396326065063, "learning_rate": 0.0001, "loss": 0.0318, "step": 46600 }, { "epoch": 132.41477272727272, "grad_norm": 0.6624685525894165, "learning_rate": 0.0001, "loss": 0.0339, "step": 46610 }, { "epoch": 132.4431818181818, "grad_norm": 0.7068834900856018, "learning_rate": 0.0001, "loss": 0.0327, "step": 46620 }, { "epoch": 132.4715909090909, "grad_norm": 0.6984627842903137, "learning_rate": 0.0001, "loss": 0.0325, "step": 46630 }, { "epoch": 132.5, "grad_norm": 0.6401987075805664, "learning_rate": 0.0001, "loss": 0.0323, "step": 46640 }, { "epoch": 132.5284090909091, "grad_norm": 0.5327432155609131, "learning_rate": 0.0001, "loss": 0.0321, "step": 46650 }, { "epoch": 132.5568181818182, "grad_norm": 0.5304933190345764, "learning_rate": 0.0001, "loss": 0.0331, "step": 46660 }, { "epoch": 132.58522727272728, "grad_norm": 0.7458568215370178, "learning_rate": 0.0001, "loss": 0.033, "step": 46670 }, { "epoch": 132.61363636363637, "grad_norm": 0.6337578892707825, "learning_rate": 0.0001, "loss": 0.0323, "step": 46680 }, { "epoch": 132.64204545454547, "grad_norm": 0.712566077709198, "learning_rate": 0.0001, "loss": 0.0333, "step": 46690 }, { "epoch": 132.67045454545453, "grad_norm": 0.6466312408447266, "learning_rate": 0.0001, "loss": 0.0332, "step": 46700 }, { "epoch": 132.69886363636363, "grad_norm": 0.6697583794593811, "learning_rate": 0.0001, "loss": 0.0329, "step": 46710 }, { "epoch": 132.72727272727272, "grad_norm": 0.5255504846572876, "learning_rate": 0.0001, "loss": 0.0329, "step": 46720 }, { "epoch": 132.7556818181818, "grad_norm": 1.0727545022964478, "learning_rate": 0.0001, "loss": 0.0336, "step": 46730 }, { "epoch": 132.7840909090909, "grad_norm": 0.7345282435417175, "learning_rate": 0.0001, "loss": 0.0322, "step": 46740 }, { "epoch": 132.8125, "grad_norm": 0.6004593968391418, "learning_rate": 0.0001, "loss": 0.0332, "step": 46750 }, { "epoch": 132.8409090909091, "grad_norm": 0.4870907664299011, "learning_rate": 0.0001, "loss": 0.0339, "step": 46760 }, { "epoch": 132.8693181818182, "grad_norm": 0.7557304501533508, "learning_rate": 0.0001, "loss": 0.033, "step": 46770 }, { "epoch": 132.89772727272728, "grad_norm": 0.9188068509101868, "learning_rate": 0.0001, "loss": 0.0347, "step": 46780 }, { "epoch": 132.92613636363637, "grad_norm": 0.5896967053413391, "learning_rate": 0.0001, "loss": 0.033, "step": 46790 }, { "epoch": 132.95454545454547, "grad_norm": 0.7609643936157227, "learning_rate": 0.0001, "loss": 0.0331, "step": 46800 }, { "epoch": 132.98295454545453, "grad_norm": 0.8466443419456482, "learning_rate": 0.0001, "loss": 0.0318, "step": 46810 }, { "epoch": 133.01136363636363, "grad_norm": 0.8880951404571533, "learning_rate": 0.0001, "loss": 0.0327, "step": 46820 }, { "epoch": 133.03977272727272, "grad_norm": 0.9683408737182617, "learning_rate": 0.0001, "loss": 0.0349, "step": 46830 }, { "epoch": 133.0681818181818, "grad_norm": 0.871724009513855, "learning_rate": 0.0001, "loss": 0.0332, "step": 46840 }, { "epoch": 133.0965909090909, "grad_norm": 0.5088608264923096, "learning_rate": 0.0001, "loss": 0.0336, "step": 46850 }, { "epoch": 133.125, "grad_norm": 0.7277428507804871, "learning_rate": 0.0001, "loss": 0.0325, "step": 46860 }, { "epoch": 133.1534090909091, "grad_norm": 0.7390486598014832, "learning_rate": 0.0001, "loss": 0.0329, "step": 46870 }, { "epoch": 133.1818181818182, "grad_norm": 0.707007884979248, "learning_rate": 0.0001, "loss": 0.0335, "step": 46880 }, { "epoch": 133.21022727272728, "grad_norm": 0.6008827090263367, "learning_rate": 0.0001, "loss": 0.0324, "step": 46890 }, { "epoch": 133.23863636363637, "grad_norm": 0.6185063123703003, "learning_rate": 0.0001, "loss": 0.0317, "step": 46900 }, { "epoch": 133.26704545454547, "grad_norm": 0.67989182472229, "learning_rate": 0.0001, "loss": 0.0324, "step": 46910 }, { "epoch": 133.29545454545453, "grad_norm": 0.548943817615509, "learning_rate": 0.0001, "loss": 0.033, "step": 46920 }, { "epoch": 133.32386363636363, "grad_norm": 0.6702325344085693, "learning_rate": 0.0001, "loss": 0.0322, "step": 46930 }, { "epoch": 133.35227272727272, "grad_norm": 0.7883853316307068, "learning_rate": 0.0001, "loss": 0.0342, "step": 46940 }, { "epoch": 133.3806818181818, "grad_norm": 0.7740568518638611, "learning_rate": 0.0001, "loss": 0.0323, "step": 46950 }, { "epoch": 133.4090909090909, "grad_norm": 0.6243300437927246, "learning_rate": 0.0001, "loss": 0.0329, "step": 46960 }, { "epoch": 133.4375, "grad_norm": 0.7169111371040344, "learning_rate": 0.0001, "loss": 0.0329, "step": 46970 }, { "epoch": 133.4659090909091, "grad_norm": 0.6098527312278748, "learning_rate": 0.0001, "loss": 0.0331, "step": 46980 }, { "epoch": 133.4943181818182, "grad_norm": 0.5657358169555664, "learning_rate": 0.0001, "loss": 0.0326, "step": 46990 }, { "epoch": 133.52272727272728, "grad_norm": 0.5001091957092285, "learning_rate": 0.0001, "loss": 0.0334, "step": 47000 }, { "epoch": 133.55113636363637, "grad_norm": 0.5768367052078247, "learning_rate": 0.0001, "loss": 0.0338, "step": 47010 }, { "epoch": 133.57954545454547, "grad_norm": 0.7666851282119751, "learning_rate": 0.0001, "loss": 0.035, "step": 47020 }, { "epoch": 133.60795454545453, "grad_norm": 0.6639266014099121, "learning_rate": 0.0001, "loss": 0.0338, "step": 47030 }, { "epoch": 133.63636363636363, "grad_norm": 1.1128325462341309, "learning_rate": 0.0001, "loss": 0.0328, "step": 47040 }, { "epoch": 133.66477272727272, "grad_norm": 0.7729830741882324, "learning_rate": 0.0001, "loss": 0.0329, "step": 47050 }, { "epoch": 133.6931818181818, "grad_norm": 0.60756915807724, "learning_rate": 0.0001, "loss": 0.0313, "step": 47060 }, { "epoch": 133.7215909090909, "grad_norm": 0.6942067742347717, "learning_rate": 0.0001, "loss": 0.033, "step": 47070 }, { "epoch": 133.75, "grad_norm": 0.9247666597366333, "learning_rate": 0.0001, "loss": 0.0334, "step": 47080 }, { "epoch": 133.7784090909091, "grad_norm": 0.9722429513931274, "learning_rate": 0.0001, "loss": 0.0332, "step": 47090 }, { "epoch": 133.8068181818182, "grad_norm": 0.6130657196044922, "learning_rate": 0.0001, "loss": 0.0334, "step": 47100 }, { "epoch": 133.83522727272728, "grad_norm": 0.8849529027938843, "learning_rate": 0.0001, "loss": 0.0323, "step": 47110 }, { "epoch": 133.86363636363637, "grad_norm": 0.865003228187561, "learning_rate": 0.0001, "loss": 0.0326, "step": 47120 }, { "epoch": 133.89204545454547, "grad_norm": 0.6581420302391052, "learning_rate": 0.0001, "loss": 0.0332, "step": 47130 }, { "epoch": 133.92045454545453, "grad_norm": 0.7639325857162476, "learning_rate": 0.0001, "loss": 0.0327, "step": 47140 }, { "epoch": 133.94886363636363, "grad_norm": 0.6666318774223328, "learning_rate": 0.0001, "loss": 0.0333, "step": 47150 }, { "epoch": 133.97727272727272, "grad_norm": 1.2542755603790283, "learning_rate": 0.0001, "loss": 0.0323, "step": 47160 }, { "epoch": 134.0056818181818, "grad_norm": 1.2946697473526, "learning_rate": 0.0001, "loss": 0.0321, "step": 47170 }, { "epoch": 134.0340909090909, "grad_norm": 1.3122329711914062, "learning_rate": 0.0001, "loss": 0.032, "step": 47180 }, { "epoch": 134.0625, "grad_norm": 1.2413455247879028, "learning_rate": 0.0001, "loss": 0.0327, "step": 47190 }, { "epoch": 134.0909090909091, "grad_norm": 0.801084578037262, "learning_rate": 0.0001, "loss": 0.0313, "step": 47200 }, { "epoch": 134.1193181818182, "grad_norm": 0.6665140986442566, "learning_rate": 0.0001, "loss": 0.032, "step": 47210 }, { "epoch": 134.14772727272728, "grad_norm": 1.088781714439392, "learning_rate": 0.0001, "loss": 0.031, "step": 47220 }, { "epoch": 134.17613636363637, "grad_norm": 0.8474469780921936, "learning_rate": 0.0001, "loss": 0.0321, "step": 47230 }, { "epoch": 134.20454545454547, "grad_norm": 0.8532624244689941, "learning_rate": 0.0001, "loss": 0.0321, "step": 47240 }, { "epoch": 134.23295454545453, "grad_norm": 0.6617245078086853, "learning_rate": 0.0001, "loss": 0.0337, "step": 47250 }, { "epoch": 134.26136363636363, "grad_norm": 0.7599644064903259, "learning_rate": 0.0001, "loss": 0.0332, "step": 47260 }, { "epoch": 134.28977272727272, "grad_norm": 0.7604702711105347, "learning_rate": 0.0001, "loss": 0.0323, "step": 47270 }, { "epoch": 134.3181818181818, "grad_norm": 0.6242595314979553, "learning_rate": 0.0001, "loss": 0.0326, "step": 47280 }, { "epoch": 134.3465909090909, "grad_norm": 0.8279832601547241, "learning_rate": 0.0001, "loss": 0.033, "step": 47290 }, { "epoch": 134.375, "grad_norm": 0.7884923815727234, "learning_rate": 0.0001, "loss": 0.032, "step": 47300 }, { "epoch": 134.4034090909091, "grad_norm": 0.6375776529312134, "learning_rate": 0.0001, "loss": 0.0333, "step": 47310 }, { "epoch": 134.4318181818182, "grad_norm": 0.5708988308906555, "learning_rate": 0.0001, "loss": 0.0333, "step": 47320 }, { "epoch": 134.46022727272728, "grad_norm": 0.570242702960968, "learning_rate": 0.0001, "loss": 0.0358, "step": 47330 }, { "epoch": 134.48863636363637, "grad_norm": 0.6287363767623901, "learning_rate": 0.0001, "loss": 0.0366, "step": 47340 }, { "epoch": 134.51704545454547, "grad_norm": 0.6968216300010681, "learning_rate": 0.0001, "loss": 0.0371, "step": 47350 }, { "epoch": 134.54545454545453, "grad_norm": 0.48705530166625977, "learning_rate": 0.0001, "loss": 0.0354, "step": 47360 }, { "epoch": 134.57386363636363, "grad_norm": 0.7090975046157837, "learning_rate": 0.0001, "loss": 0.0348, "step": 47370 }, { "epoch": 134.60227272727272, "grad_norm": 0.9196219444274902, "learning_rate": 0.0001, "loss": 0.034, "step": 47380 }, { "epoch": 134.6306818181818, "grad_norm": 0.9817768335342407, "learning_rate": 0.0001, "loss": 0.0346, "step": 47390 }, { "epoch": 134.6590909090909, "grad_norm": 0.8549873232841492, "learning_rate": 0.0001, "loss": 0.0341, "step": 47400 }, { "epoch": 134.6875, "grad_norm": 0.8343051671981812, "learning_rate": 0.0001, "loss": 0.034, "step": 47410 }, { "epoch": 134.7159090909091, "grad_norm": 0.6651965975761414, "learning_rate": 0.0001, "loss": 0.0346, "step": 47420 }, { "epoch": 134.7443181818182, "grad_norm": 0.8093419075012207, "learning_rate": 0.0001, "loss": 0.0365, "step": 47430 }, { "epoch": 134.77272727272728, "grad_norm": 0.7498792409896851, "learning_rate": 0.0001, "loss": 0.0349, "step": 47440 }, { "epoch": 134.80113636363637, "grad_norm": 1.3343470096588135, "learning_rate": 0.0001, "loss": 0.0352, "step": 47450 }, { "epoch": 134.82954545454547, "grad_norm": 1.099355936050415, "learning_rate": 0.0001, "loss": 0.0378, "step": 47460 }, { "epoch": 134.85795454545453, "grad_norm": 1.077439785003662, "learning_rate": 0.0001, "loss": 0.0355, "step": 47470 }, { "epoch": 134.88636363636363, "grad_norm": 0.9508234262466431, "learning_rate": 0.0001, "loss": 0.0356, "step": 47480 }, { "epoch": 134.91477272727272, "grad_norm": 0.8098722696304321, "learning_rate": 0.0001, "loss": 0.0346, "step": 47490 }, { "epoch": 134.9431818181818, "grad_norm": 0.6014454960823059, "learning_rate": 0.0001, "loss": 0.0367, "step": 47500 }, { "epoch": 134.9715909090909, "grad_norm": 0.7299231886863708, "learning_rate": 0.0001, "loss": 0.0332, "step": 47510 }, { "epoch": 135.0, "grad_norm": 0.6908378005027771, "learning_rate": 0.0001, "loss": 0.0352, "step": 47520 }, { "epoch": 135.0284090909091, "grad_norm": 0.7811752557754517, "learning_rate": 0.0001, "loss": 0.0348, "step": 47530 }, { "epoch": 135.0568181818182, "grad_norm": 0.8679901957511902, "learning_rate": 0.0001, "loss": 0.0344, "step": 47540 }, { "epoch": 135.08522727272728, "grad_norm": 0.9140509963035583, "learning_rate": 0.0001, "loss": 0.034, "step": 47550 }, { "epoch": 135.11363636363637, "grad_norm": 0.7968837022781372, "learning_rate": 0.0001, "loss": 0.0335, "step": 47560 }, { "epoch": 135.14204545454547, "grad_norm": 0.7876217365264893, "learning_rate": 0.0001, "loss": 0.0333, "step": 47570 }, { "epoch": 135.17045454545453, "grad_norm": 0.5965085625648499, "learning_rate": 0.0001, "loss": 0.0334, "step": 47580 }, { "epoch": 135.19886363636363, "grad_norm": 0.5109805464744568, "learning_rate": 0.0001, "loss": 0.0326, "step": 47590 }, { "epoch": 135.22727272727272, "grad_norm": 0.5189054012298584, "learning_rate": 0.0001, "loss": 0.0341, "step": 47600 }, { "epoch": 135.2556818181818, "grad_norm": 0.5483865737915039, "learning_rate": 0.0001, "loss": 0.0328, "step": 47610 }, { "epoch": 135.2840909090909, "grad_norm": 0.8424668312072754, "learning_rate": 0.0001, "loss": 0.035, "step": 47620 }, { "epoch": 135.3125, "grad_norm": 1.2137010097503662, "learning_rate": 0.0001, "loss": 0.0343, "step": 47630 }, { "epoch": 135.3409090909091, "grad_norm": 0.8658679127693176, "learning_rate": 0.0001, "loss": 0.0329, "step": 47640 }, { "epoch": 135.3693181818182, "grad_norm": 0.8400396704673767, "learning_rate": 0.0001, "loss": 0.0331, "step": 47650 }, { "epoch": 135.39772727272728, "grad_norm": 0.6896227598190308, "learning_rate": 0.0001, "loss": 0.0332, "step": 47660 }, { "epoch": 135.42613636363637, "grad_norm": 0.8099536895751953, "learning_rate": 0.0001, "loss": 0.0341, "step": 47670 }, { "epoch": 135.45454545454547, "grad_norm": 1.0839929580688477, "learning_rate": 0.0001, "loss": 0.034, "step": 47680 }, { "epoch": 135.48295454545453, "grad_norm": 0.9490888714790344, "learning_rate": 0.0001, "loss": 0.0359, "step": 47690 }, { "epoch": 135.51136363636363, "grad_norm": 0.9408382177352905, "learning_rate": 0.0001, "loss": 0.0366, "step": 47700 }, { "epoch": 135.53977272727272, "grad_norm": 0.6724865436553955, "learning_rate": 0.0001, "loss": 0.037, "step": 47710 }, { "epoch": 135.5681818181818, "grad_norm": 0.889179527759552, "learning_rate": 0.0001, "loss": 0.0378, "step": 47720 }, { "epoch": 135.5965909090909, "grad_norm": 1.22073495388031, "learning_rate": 0.0001, "loss": 0.0369, "step": 47730 }, { "epoch": 135.625, "grad_norm": 1.0256925821304321, "learning_rate": 0.0001, "loss": 0.035, "step": 47740 }, { "epoch": 135.6534090909091, "grad_norm": 0.9867517352104187, "learning_rate": 0.0001, "loss": 0.0343, "step": 47750 }, { "epoch": 135.6818181818182, "grad_norm": 0.9394050240516663, "learning_rate": 0.0001, "loss": 0.0339, "step": 47760 }, { "epoch": 135.71022727272728, "grad_norm": 0.7679027318954468, "learning_rate": 0.0001, "loss": 0.0326, "step": 47770 }, { "epoch": 135.73863636363637, "grad_norm": 0.6860771775245667, "learning_rate": 0.0001, "loss": 0.0323, "step": 47780 }, { "epoch": 135.76704545454547, "grad_norm": 0.6097438335418701, "learning_rate": 0.0001, "loss": 0.0343, "step": 47790 }, { "epoch": 135.79545454545453, "grad_norm": 0.8389930129051208, "learning_rate": 0.0001, "loss": 0.0335, "step": 47800 }, { "epoch": 135.82386363636363, "grad_norm": 0.7888689637184143, "learning_rate": 0.0001, "loss": 0.0335, "step": 47810 }, { "epoch": 135.85227272727272, "grad_norm": 0.9129384756088257, "learning_rate": 0.0001, "loss": 0.0321, "step": 47820 }, { "epoch": 135.8806818181818, "grad_norm": 1.0235633850097656, "learning_rate": 0.0001, "loss": 0.0342, "step": 47830 }, { "epoch": 135.9090909090909, "grad_norm": 0.7481328845024109, "learning_rate": 0.0001, "loss": 0.0326, "step": 47840 }, { "epoch": 135.9375, "grad_norm": 0.5055482387542725, "learning_rate": 0.0001, "loss": 0.0327, "step": 47850 }, { "epoch": 135.9659090909091, "grad_norm": 0.5696248412132263, "learning_rate": 0.0001, "loss": 0.032, "step": 47860 }, { "epoch": 135.9943181818182, "grad_norm": 0.7397261261940002, "learning_rate": 0.0001, "loss": 0.0323, "step": 47870 }, { "epoch": 136.02272727272728, "grad_norm": 0.651668906211853, "learning_rate": 0.0001, "loss": 0.0321, "step": 47880 }, { "epoch": 136.05113636363637, "grad_norm": 0.5386239886283875, "learning_rate": 0.0001, "loss": 0.0316, "step": 47890 }, { "epoch": 136.07954545454547, "grad_norm": 0.4532436430454254, "learning_rate": 0.0001, "loss": 0.0328, "step": 47900 }, { "epoch": 136.10795454545453, "grad_norm": 0.5337704420089722, "learning_rate": 0.0001, "loss": 0.032, "step": 47910 }, { "epoch": 136.13636363636363, "grad_norm": 0.9169813394546509, "learning_rate": 0.0001, "loss": 0.0334, "step": 47920 }, { "epoch": 136.16477272727272, "grad_norm": 0.7223308682441711, "learning_rate": 0.0001, "loss": 0.0319, "step": 47930 }, { "epoch": 136.1931818181818, "grad_norm": 0.8555115461349487, "learning_rate": 0.0001, "loss": 0.0333, "step": 47940 }, { "epoch": 136.2215909090909, "grad_norm": 0.537327229976654, "learning_rate": 0.0001, "loss": 0.0334, "step": 47950 }, { "epoch": 136.25, "grad_norm": 0.8894582986831665, "learning_rate": 0.0001, "loss": 0.033, "step": 47960 }, { "epoch": 136.2784090909091, "grad_norm": 0.555550754070282, "learning_rate": 0.0001, "loss": 0.0341, "step": 47970 }, { "epoch": 136.3068181818182, "grad_norm": 0.6033909916877747, "learning_rate": 0.0001, "loss": 0.0339, "step": 47980 }, { "epoch": 136.33522727272728, "grad_norm": 0.6103994250297546, "learning_rate": 0.0001, "loss": 0.0341, "step": 47990 }, { "epoch": 136.36363636363637, "grad_norm": 0.6190768480300903, "learning_rate": 0.0001, "loss": 0.0321, "step": 48000 }, { "epoch": 136.39204545454547, "grad_norm": 0.7021390795707703, "learning_rate": 0.0001, "loss": 0.034, "step": 48010 }, { "epoch": 136.42045454545453, "grad_norm": 0.5402888059616089, "learning_rate": 0.0001, "loss": 0.0336, "step": 48020 }, { "epoch": 136.44886363636363, "grad_norm": 0.7875863909721375, "learning_rate": 0.0001, "loss": 0.0329, "step": 48030 }, { "epoch": 136.47727272727272, "grad_norm": 0.5794070959091187, "learning_rate": 0.0001, "loss": 0.0333, "step": 48040 }, { "epoch": 136.5056818181818, "grad_norm": 0.7281160950660706, "learning_rate": 0.0001, "loss": 0.034, "step": 48050 }, { "epoch": 136.5340909090909, "grad_norm": 0.6087822318077087, "learning_rate": 0.0001, "loss": 0.0342, "step": 48060 }, { "epoch": 136.5625, "grad_norm": 0.7562154531478882, "learning_rate": 0.0001, "loss": 0.0325, "step": 48070 }, { "epoch": 136.5909090909091, "grad_norm": 0.7490687370300293, "learning_rate": 0.0001, "loss": 0.0336, "step": 48080 }, { "epoch": 136.6193181818182, "grad_norm": 0.6700184345245361, "learning_rate": 0.0001, "loss": 0.0341, "step": 48090 }, { "epoch": 136.64772727272728, "grad_norm": 0.7165958881378174, "learning_rate": 0.0001, "loss": 0.0329, "step": 48100 }, { "epoch": 136.67613636363637, "grad_norm": 0.5619990229606628, "learning_rate": 0.0001, "loss": 0.0335, "step": 48110 }, { "epoch": 136.70454545454547, "grad_norm": 0.6590405106544495, "learning_rate": 0.0001, "loss": 0.0331, "step": 48120 }, { "epoch": 136.73295454545453, "grad_norm": 0.8965096473693848, "learning_rate": 0.0001, "loss": 0.0328, "step": 48130 }, { "epoch": 136.76136363636363, "grad_norm": 0.932625949382782, "learning_rate": 0.0001, "loss": 0.0332, "step": 48140 }, { "epoch": 136.78977272727272, "grad_norm": 0.806952714920044, "learning_rate": 0.0001, "loss": 0.0345, "step": 48150 }, { "epoch": 136.8181818181818, "grad_norm": 0.718571126461029, "learning_rate": 0.0001, "loss": 0.0323, "step": 48160 }, { "epoch": 136.8465909090909, "grad_norm": 0.6784353256225586, "learning_rate": 0.0001, "loss": 0.0324, "step": 48170 }, { "epoch": 136.875, "grad_norm": 0.672789454460144, "learning_rate": 0.0001, "loss": 0.0319, "step": 48180 }, { "epoch": 136.9034090909091, "grad_norm": 0.7343295216560364, "learning_rate": 0.0001, "loss": 0.0316, "step": 48190 }, { "epoch": 136.9318181818182, "grad_norm": 0.6511361598968506, "learning_rate": 0.0001, "loss": 0.0331, "step": 48200 }, { "epoch": 136.96022727272728, "grad_norm": 0.7251712083816528, "learning_rate": 0.0001, "loss": 0.0322, "step": 48210 }, { "epoch": 136.98863636363637, "grad_norm": 0.8382689356803894, "learning_rate": 0.0001, "loss": 0.0327, "step": 48220 }, { "epoch": 137.01704545454547, "grad_norm": 1.2256827354431152, "learning_rate": 0.0001, "loss": 0.0337, "step": 48230 }, { "epoch": 137.04545454545453, "grad_norm": 0.815129816532135, "learning_rate": 0.0001, "loss": 0.0344, "step": 48240 }, { "epoch": 137.07386363636363, "grad_norm": 1.3422629833221436, "learning_rate": 0.0001, "loss": 0.0351, "step": 48250 }, { "epoch": 137.10227272727272, "grad_norm": 1.2612097263336182, "learning_rate": 0.0001, "loss": 0.0335, "step": 48260 }, { "epoch": 137.1306818181818, "grad_norm": 1.3112272024154663, "learning_rate": 0.0001, "loss": 0.0335, "step": 48270 }, { "epoch": 137.1590909090909, "grad_norm": 0.8872440457344055, "learning_rate": 0.0001, "loss": 0.0331, "step": 48280 }, { "epoch": 137.1875, "grad_norm": 0.9705610871315002, "learning_rate": 0.0001, "loss": 0.0338, "step": 48290 }, { "epoch": 137.2159090909091, "grad_norm": 1.0388199090957642, "learning_rate": 0.0001, "loss": 0.0336, "step": 48300 }, { "epoch": 137.2443181818182, "grad_norm": 0.8184165358543396, "learning_rate": 0.0001, "loss": 0.0324, "step": 48310 }, { "epoch": 137.27272727272728, "grad_norm": 1.2038040161132812, "learning_rate": 0.0001, "loss": 0.0325, "step": 48320 }, { "epoch": 137.30113636363637, "grad_norm": 0.8519647121429443, "learning_rate": 0.0001, "loss": 0.0322, "step": 48330 }, { "epoch": 137.32954545454547, "grad_norm": 0.7184175252914429, "learning_rate": 0.0001, "loss": 0.0336, "step": 48340 }, { "epoch": 137.35795454545453, "grad_norm": 0.9237968325614929, "learning_rate": 0.0001, "loss": 0.0338, "step": 48350 }, { "epoch": 137.38636363636363, "grad_norm": 0.5992299914360046, "learning_rate": 0.0001, "loss": 0.032, "step": 48360 }, { "epoch": 137.41477272727272, "grad_norm": 0.914725124835968, "learning_rate": 0.0001, "loss": 0.0327, "step": 48370 }, { "epoch": 137.4431818181818, "grad_norm": 0.8184134364128113, "learning_rate": 0.0001, "loss": 0.0314, "step": 48380 }, { "epoch": 137.4715909090909, "grad_norm": 0.5038926601409912, "learning_rate": 0.0001, "loss": 0.0323, "step": 48390 }, { "epoch": 137.5, "grad_norm": 0.6409310102462769, "learning_rate": 0.0001, "loss": 0.0315, "step": 48400 }, { "epoch": 137.5284090909091, "grad_norm": 0.5178235173225403, "learning_rate": 0.0001, "loss": 0.0323, "step": 48410 }, { "epoch": 137.5568181818182, "grad_norm": 0.7749777436256409, "learning_rate": 0.0001, "loss": 0.0315, "step": 48420 }, { "epoch": 137.58522727272728, "grad_norm": 0.5888954401016235, "learning_rate": 0.0001, "loss": 0.0311, "step": 48430 }, { "epoch": 137.61363636363637, "grad_norm": 0.631784200668335, "learning_rate": 0.0001, "loss": 0.0319, "step": 48440 }, { "epoch": 137.64204545454547, "grad_norm": 0.6141875386238098, "learning_rate": 0.0001, "loss": 0.032, "step": 48450 }, { "epoch": 137.67045454545453, "grad_norm": 0.7147916555404663, "learning_rate": 0.0001, "loss": 0.0314, "step": 48460 }, { "epoch": 137.69886363636363, "grad_norm": 0.6216191649436951, "learning_rate": 0.0001, "loss": 0.0314, "step": 48470 }, { "epoch": 137.72727272727272, "grad_norm": 0.9274272918701172, "learning_rate": 0.0001, "loss": 0.0341, "step": 48480 }, { "epoch": 137.7556818181818, "grad_norm": 1.197992205619812, "learning_rate": 0.0001, "loss": 0.0326, "step": 48490 }, { "epoch": 137.7840909090909, "grad_norm": 1.0352435111999512, "learning_rate": 0.0001, "loss": 0.0315, "step": 48500 }, { "epoch": 137.8125, "grad_norm": 0.9775570631027222, "learning_rate": 0.0001, "loss": 0.0318, "step": 48510 }, { "epoch": 137.8409090909091, "grad_norm": 1.0361013412475586, "learning_rate": 0.0001, "loss": 0.0315, "step": 48520 }, { "epoch": 137.8693181818182, "grad_norm": 0.9069687724113464, "learning_rate": 0.0001, "loss": 0.0317, "step": 48530 }, { "epoch": 137.89772727272728, "grad_norm": 0.6086276173591614, "learning_rate": 0.0001, "loss": 0.0315, "step": 48540 }, { "epoch": 137.92613636363637, "grad_norm": 0.7118426561355591, "learning_rate": 0.0001, "loss": 0.0313, "step": 48550 }, { "epoch": 137.95454545454547, "grad_norm": 0.9204250574111938, "learning_rate": 0.0001, "loss": 0.0325, "step": 48560 }, { "epoch": 137.98295454545453, "grad_norm": 0.7215946316719055, "learning_rate": 0.0001, "loss": 0.0311, "step": 48570 }, { "epoch": 138.01136363636363, "grad_norm": 0.7724602222442627, "learning_rate": 0.0001, "loss": 0.0316, "step": 48580 }, { "epoch": 138.03977272727272, "grad_norm": 0.6827357411384583, "learning_rate": 0.0001, "loss": 0.0312, "step": 48590 }, { "epoch": 138.0681818181818, "grad_norm": 0.46647927165031433, "learning_rate": 0.0001, "loss": 0.032, "step": 48600 }, { "epoch": 138.0965909090909, "grad_norm": 0.5820374488830566, "learning_rate": 0.0001, "loss": 0.0322, "step": 48610 }, { "epoch": 138.125, "grad_norm": 0.9222242832183838, "learning_rate": 0.0001, "loss": 0.0324, "step": 48620 }, { "epoch": 138.1534090909091, "grad_norm": 1.222086787223816, "learning_rate": 0.0001, "loss": 0.0321, "step": 48630 }, { "epoch": 138.1818181818182, "grad_norm": 0.7875241637229919, "learning_rate": 0.0001, "loss": 0.0325, "step": 48640 }, { "epoch": 138.21022727272728, "grad_norm": 0.7012037634849548, "learning_rate": 0.0001, "loss": 0.0315, "step": 48650 }, { "epoch": 138.23863636363637, "grad_norm": 1.2003134489059448, "learning_rate": 0.0001, "loss": 0.0315, "step": 48660 }, { "epoch": 138.26704545454547, "grad_norm": 0.6346859335899353, "learning_rate": 0.0001, "loss": 0.0326, "step": 48670 }, { "epoch": 138.29545454545453, "grad_norm": 0.5016667246818542, "learning_rate": 0.0001, "loss": 0.0322, "step": 48680 }, { "epoch": 138.32386363636363, "grad_norm": 0.5193179845809937, "learning_rate": 0.0001, "loss": 0.0323, "step": 48690 }, { "epoch": 138.35227272727272, "grad_norm": 0.5191895365715027, "learning_rate": 0.0001, "loss": 0.0317, "step": 48700 }, { "epoch": 138.3806818181818, "grad_norm": 0.4985046088695526, "learning_rate": 0.0001, "loss": 0.0319, "step": 48710 }, { "epoch": 138.4090909090909, "grad_norm": 0.6570383310317993, "learning_rate": 0.0001, "loss": 0.0319, "step": 48720 }, { "epoch": 138.4375, "grad_norm": 0.5687001347541809, "learning_rate": 0.0001, "loss": 0.0314, "step": 48730 }, { "epoch": 138.4659090909091, "grad_norm": 0.6005369424819946, "learning_rate": 0.0001, "loss": 0.0328, "step": 48740 }, { "epoch": 138.4943181818182, "grad_norm": 0.5225914716720581, "learning_rate": 0.0001, "loss": 0.0314, "step": 48750 }, { "epoch": 138.52272727272728, "grad_norm": 0.6342768669128418, "learning_rate": 0.0001, "loss": 0.0317, "step": 48760 }, { "epoch": 138.55113636363637, "grad_norm": 0.5711255669593811, "learning_rate": 0.0001, "loss": 0.0326, "step": 48770 }, { "epoch": 138.57954545454547, "grad_norm": 0.6120138764381409, "learning_rate": 0.0001, "loss": 0.032, "step": 48780 }, { "epoch": 138.60795454545453, "grad_norm": 0.7916771769523621, "learning_rate": 0.0001, "loss": 0.0311, "step": 48790 }, { "epoch": 138.63636363636363, "grad_norm": 0.9732442498207092, "learning_rate": 0.0001, "loss": 0.0326, "step": 48800 }, { "epoch": 138.66477272727272, "grad_norm": 1.2003506422042847, "learning_rate": 0.0001, "loss": 0.0341, "step": 48810 }, { "epoch": 138.6931818181818, "grad_norm": 1.2463781833648682, "learning_rate": 0.0001, "loss": 0.0329, "step": 48820 }, { "epoch": 138.7215909090909, "grad_norm": 0.8527126908302307, "learning_rate": 0.0001, "loss": 0.0329, "step": 48830 }, { "epoch": 138.75, "grad_norm": 0.9467070698738098, "learning_rate": 0.0001, "loss": 0.0323, "step": 48840 }, { "epoch": 138.7784090909091, "grad_norm": 1.0931123495101929, "learning_rate": 0.0001, "loss": 0.0318, "step": 48850 }, { "epoch": 138.8068181818182, "grad_norm": 1.0193461179733276, "learning_rate": 0.0001, "loss": 0.0314, "step": 48860 }, { "epoch": 138.83522727272728, "grad_norm": 0.9290983080863953, "learning_rate": 0.0001, "loss": 0.0318, "step": 48870 }, { "epoch": 138.86363636363637, "grad_norm": 0.7232368588447571, "learning_rate": 0.0001, "loss": 0.0318, "step": 48880 }, { "epoch": 138.89204545454547, "grad_norm": 0.6270406246185303, "learning_rate": 0.0001, "loss": 0.0313, "step": 48890 }, { "epoch": 138.92045454545453, "grad_norm": 0.8998165726661682, "learning_rate": 0.0001, "loss": 0.0322, "step": 48900 }, { "epoch": 138.94886363636363, "grad_norm": 0.6000069379806519, "learning_rate": 0.0001, "loss": 0.032, "step": 48910 }, { "epoch": 138.97727272727272, "grad_norm": 0.45096200704574585, "learning_rate": 0.0001, "loss": 0.0322, "step": 48920 }, { "epoch": 139.0056818181818, "grad_norm": 0.6368238925933838, "learning_rate": 0.0001, "loss": 0.0327, "step": 48930 }, { "epoch": 139.0340909090909, "grad_norm": 0.8111124038696289, "learning_rate": 0.0001, "loss": 0.0319, "step": 48940 }, { "epoch": 139.0625, "grad_norm": 0.5319854021072388, "learning_rate": 0.0001, "loss": 0.033, "step": 48950 }, { "epoch": 139.0909090909091, "grad_norm": 0.6842014193534851, "learning_rate": 0.0001, "loss": 0.0335, "step": 48960 }, { "epoch": 139.1193181818182, "grad_norm": 0.5523969531059265, "learning_rate": 0.0001, "loss": 0.0319, "step": 48970 }, { "epoch": 139.14772727272728, "grad_norm": 0.6312474608421326, "learning_rate": 0.0001, "loss": 0.0319, "step": 48980 }, { "epoch": 139.17613636363637, "grad_norm": 0.6765289902687073, "learning_rate": 0.0001, "loss": 0.032, "step": 48990 }, { "epoch": 139.20454545454547, "grad_norm": 0.7405977845191956, "learning_rate": 0.0001, "loss": 0.0319, "step": 49000 }, { "epoch": 139.23295454545453, "grad_norm": 0.9844215512275696, "learning_rate": 0.0001, "loss": 0.0331, "step": 49010 }, { "epoch": 139.26136363636363, "grad_norm": 0.824510395526886, "learning_rate": 0.0001, "loss": 0.032, "step": 49020 }, { "epoch": 139.28977272727272, "grad_norm": 0.6249207258224487, "learning_rate": 0.0001, "loss": 0.0315, "step": 49030 }, { "epoch": 139.3181818181818, "grad_norm": 0.5916060209274292, "learning_rate": 0.0001, "loss": 0.0323, "step": 49040 }, { "epoch": 139.3465909090909, "grad_norm": 0.742417573928833, "learning_rate": 0.0001, "loss": 0.0316, "step": 49050 }, { "epoch": 139.375, "grad_norm": 0.5674957036972046, "learning_rate": 0.0001, "loss": 0.0323, "step": 49060 }, { "epoch": 139.4034090909091, "grad_norm": 0.5021371841430664, "learning_rate": 0.0001, "loss": 0.0309, "step": 49070 }, { "epoch": 139.4318181818182, "grad_norm": 0.5639374256134033, "learning_rate": 0.0001, "loss": 0.0307, "step": 49080 }, { "epoch": 139.46022727272728, "grad_norm": 0.6540923118591309, "learning_rate": 0.0001, "loss": 0.032, "step": 49090 }, { "epoch": 139.48863636363637, "grad_norm": 0.7910658121109009, "learning_rate": 0.0001, "loss": 0.0311, "step": 49100 }, { "epoch": 139.51704545454547, "grad_norm": 0.9734665155410767, "learning_rate": 0.0001, "loss": 0.0315, "step": 49110 }, { "epoch": 139.54545454545453, "grad_norm": 0.6400315165519714, "learning_rate": 0.0001, "loss": 0.0307, "step": 49120 }, { "epoch": 139.57386363636363, "grad_norm": 0.7147937417030334, "learning_rate": 0.0001, "loss": 0.031, "step": 49130 }, { "epoch": 139.60227272727272, "grad_norm": 0.7652814984321594, "learning_rate": 0.0001, "loss": 0.031, "step": 49140 }, { "epoch": 139.6306818181818, "grad_norm": 0.8998138308525085, "learning_rate": 0.0001, "loss": 0.0316, "step": 49150 }, { "epoch": 139.6590909090909, "grad_norm": 0.6835416555404663, "learning_rate": 0.0001, "loss": 0.0311, "step": 49160 }, { "epoch": 139.6875, "grad_norm": 0.6805562376976013, "learning_rate": 0.0001, "loss": 0.0318, "step": 49170 }, { "epoch": 139.7159090909091, "grad_norm": 0.6778475642204285, "learning_rate": 0.0001, "loss": 0.032, "step": 49180 }, { "epoch": 139.7443181818182, "grad_norm": 0.476952463388443, "learning_rate": 0.0001, "loss": 0.0316, "step": 49190 }, { "epoch": 139.77272727272728, "grad_norm": 0.9565654397010803, "learning_rate": 0.0001, "loss": 0.0316, "step": 49200 }, { "epoch": 139.80113636363637, "grad_norm": 0.770005464553833, "learning_rate": 0.0001, "loss": 0.0317, "step": 49210 }, { "epoch": 139.82954545454547, "grad_norm": 0.8685285449028015, "learning_rate": 0.0001, "loss": 0.0319, "step": 49220 }, { "epoch": 139.85795454545453, "grad_norm": 0.6290770769119263, "learning_rate": 0.0001, "loss": 0.0325, "step": 49230 }, { "epoch": 139.88636363636363, "grad_norm": 0.7894107103347778, "learning_rate": 0.0001, "loss": 0.0319, "step": 49240 }, { "epoch": 139.91477272727272, "grad_norm": 0.73750901222229, "learning_rate": 0.0001, "loss": 0.0323, "step": 49250 }, { "epoch": 139.9431818181818, "grad_norm": 0.5105662941932678, "learning_rate": 0.0001, "loss": 0.0314, "step": 49260 }, { "epoch": 139.9715909090909, "grad_norm": 0.5434335470199585, "learning_rate": 0.0001, "loss": 0.0321, "step": 49270 }, { "epoch": 140.0, "grad_norm": 0.5327863097190857, "learning_rate": 0.0001, "loss": 0.0319, "step": 49280 }, { "epoch": 140.0284090909091, "grad_norm": 0.4427546560764313, "learning_rate": 0.0001, "loss": 0.0323, "step": 49290 }, { "epoch": 140.0568181818182, "grad_norm": 0.5727342963218689, "learning_rate": 0.0001, "loss": 0.032, "step": 49300 }, { "epoch": 140.08522727272728, "grad_norm": 0.698470950126648, "learning_rate": 0.0001, "loss": 0.032, "step": 49310 }, { "epoch": 140.11363636363637, "grad_norm": 0.5280812978744507, "learning_rate": 0.0001, "loss": 0.0323, "step": 49320 }, { "epoch": 140.14204545454547, "grad_norm": 0.5219975709915161, "learning_rate": 0.0001, "loss": 0.0323, "step": 49330 }, { "epoch": 140.17045454545453, "grad_norm": 0.603614866733551, "learning_rate": 0.0001, "loss": 0.033, "step": 49340 }, { "epoch": 140.19886363636363, "grad_norm": 0.48663216829299927, "learning_rate": 0.0001, "loss": 0.032, "step": 49350 }, { "epoch": 140.22727272727272, "grad_norm": 0.6505529284477234, "learning_rate": 0.0001, "loss": 0.0318, "step": 49360 }, { "epoch": 140.2556818181818, "grad_norm": 0.7080401182174683, "learning_rate": 0.0001, "loss": 0.0318, "step": 49370 }, { "epoch": 140.2840909090909, "grad_norm": 0.49827688932418823, "learning_rate": 0.0001, "loss": 0.0325, "step": 49380 }, { "epoch": 140.3125, "grad_norm": 0.6238806843757629, "learning_rate": 0.0001, "loss": 0.0324, "step": 49390 }, { "epoch": 140.3409090909091, "grad_norm": 0.5958791971206665, "learning_rate": 0.0001, "loss": 0.0326, "step": 49400 }, { "epoch": 140.3693181818182, "grad_norm": 0.5299167633056641, "learning_rate": 0.0001, "loss": 0.0318, "step": 49410 }, { "epoch": 140.39772727272728, "grad_norm": 0.534270703792572, "learning_rate": 0.0001, "loss": 0.0321, "step": 49420 }, { "epoch": 140.42613636363637, "grad_norm": 0.4572816789150238, "learning_rate": 0.0001, "loss": 0.0325, "step": 49430 }, { "epoch": 140.45454545454547, "grad_norm": 0.7355735301971436, "learning_rate": 0.0001, "loss": 0.033, "step": 49440 }, { "epoch": 140.48295454545453, "grad_norm": 0.8774271607398987, "learning_rate": 0.0001, "loss": 0.032, "step": 49450 }, { "epoch": 140.51136363636363, "grad_norm": 0.5380182266235352, "learning_rate": 0.0001, "loss": 0.0318, "step": 49460 }, { "epoch": 140.53977272727272, "grad_norm": 0.6516682505607605, "learning_rate": 0.0001, "loss": 0.0318, "step": 49470 }, { "epoch": 140.5681818181818, "grad_norm": 0.4943990111351013, "learning_rate": 0.0001, "loss": 0.0301, "step": 49480 }, { "epoch": 140.5965909090909, "grad_norm": 0.5519253611564636, "learning_rate": 0.0001, "loss": 0.0309, "step": 49490 }, { "epoch": 140.625, "grad_norm": 0.6039503216743469, "learning_rate": 0.0001, "loss": 0.0312, "step": 49500 }, { "epoch": 140.6534090909091, "grad_norm": 0.754988431930542, "learning_rate": 0.0001, "loss": 0.0316, "step": 49510 }, { "epoch": 140.6818181818182, "grad_norm": 0.6376588344573975, "learning_rate": 0.0001, "loss": 0.0324, "step": 49520 }, { "epoch": 140.71022727272728, "grad_norm": 0.7755230069160461, "learning_rate": 0.0001, "loss": 0.0323, "step": 49530 }, { "epoch": 140.73863636363637, "grad_norm": 0.6854098439216614, "learning_rate": 0.0001, "loss": 0.0319, "step": 49540 }, { "epoch": 140.76704545454547, "grad_norm": 0.5473119020462036, "learning_rate": 0.0001, "loss": 0.0316, "step": 49550 }, { "epoch": 140.79545454545453, "grad_norm": 0.659775972366333, "learning_rate": 0.0001, "loss": 0.0306, "step": 49560 }, { "epoch": 140.82386363636363, "grad_norm": 0.7851589918136597, "learning_rate": 0.0001, "loss": 0.0317, "step": 49570 }, { "epoch": 140.85227272727272, "grad_norm": 0.4822283983230591, "learning_rate": 0.0001, "loss": 0.0312, "step": 49580 }, { "epoch": 140.8806818181818, "grad_norm": 0.6676194667816162, "learning_rate": 0.0001, "loss": 0.0311, "step": 49590 }, { "epoch": 140.9090909090909, "grad_norm": 0.4834653437137604, "learning_rate": 0.0001, "loss": 0.0314, "step": 49600 }, { "epoch": 140.9375, "grad_norm": 0.6345698833465576, "learning_rate": 0.0001, "loss": 0.03, "step": 49610 }, { "epoch": 140.9659090909091, "grad_norm": 0.45117560029029846, "learning_rate": 0.0001, "loss": 0.0313, "step": 49620 }, { "epoch": 140.9943181818182, "grad_norm": 0.5543921589851379, "learning_rate": 0.0001, "loss": 0.0317, "step": 49630 }, { "epoch": 141.02272727272728, "grad_norm": 0.5042457580566406, "learning_rate": 0.0001, "loss": 0.0305, "step": 49640 }, { "epoch": 141.05113636363637, "grad_norm": 0.47779983282089233, "learning_rate": 0.0001, "loss": 0.0307, "step": 49650 }, { "epoch": 141.07954545454547, "grad_norm": 0.641806423664093, "learning_rate": 0.0001, "loss": 0.0327, "step": 49660 }, { "epoch": 141.10795454545453, "grad_norm": 0.7229103446006775, "learning_rate": 0.0001, "loss": 0.0315, "step": 49670 }, { "epoch": 141.13636363636363, "grad_norm": 0.5515846014022827, "learning_rate": 0.0001, "loss": 0.0318, "step": 49680 }, { "epoch": 141.16477272727272, "grad_norm": 0.6664286255836487, "learning_rate": 0.0001, "loss": 0.0313, "step": 49690 }, { "epoch": 141.1931818181818, "grad_norm": 0.5435540080070496, "learning_rate": 0.0001, "loss": 0.0321, "step": 49700 }, { "epoch": 141.2215909090909, "grad_norm": 0.5387634038925171, "learning_rate": 0.0001, "loss": 0.0327, "step": 49710 }, { "epoch": 141.25, "grad_norm": 0.5133399963378906, "learning_rate": 0.0001, "loss": 0.0331, "step": 49720 }, { "epoch": 141.2784090909091, "grad_norm": 0.7076446413993835, "learning_rate": 0.0001, "loss": 0.0319, "step": 49730 }, { "epoch": 141.3068181818182, "grad_norm": 0.5965754985809326, "learning_rate": 0.0001, "loss": 0.0316, "step": 49740 }, { "epoch": 141.33522727272728, "grad_norm": 0.5514310598373413, "learning_rate": 0.0001, "loss": 0.0314, "step": 49750 }, { "epoch": 141.36363636363637, "grad_norm": 0.6107453107833862, "learning_rate": 0.0001, "loss": 0.032, "step": 49760 }, { "epoch": 141.39204545454547, "grad_norm": 0.6507061719894409, "learning_rate": 0.0001, "loss": 0.0315, "step": 49770 }, { "epoch": 141.42045454545453, "grad_norm": 0.4469587802886963, "learning_rate": 0.0001, "loss": 0.0323, "step": 49780 }, { "epoch": 141.44886363636363, "grad_norm": 0.5173867344856262, "learning_rate": 0.0001, "loss": 0.0309, "step": 49790 }, { "epoch": 141.47727272727272, "grad_norm": 0.4976654052734375, "learning_rate": 0.0001, "loss": 0.0319, "step": 49800 }, { "epoch": 141.5056818181818, "grad_norm": 0.7025495171546936, "learning_rate": 0.0001, "loss": 0.0312, "step": 49810 }, { "epoch": 141.5340909090909, "grad_norm": 0.5611472725868225, "learning_rate": 0.0001, "loss": 0.0323, "step": 49820 }, { "epoch": 141.5625, "grad_norm": 0.5493874549865723, "learning_rate": 0.0001, "loss": 0.0316, "step": 49830 }, { "epoch": 141.5909090909091, "grad_norm": 0.649082601070404, "learning_rate": 0.0001, "loss": 0.0312, "step": 49840 }, { "epoch": 141.6193181818182, "grad_norm": 0.5634473562240601, "learning_rate": 0.0001, "loss": 0.032, "step": 49850 }, { "epoch": 141.64772727272728, "grad_norm": 0.44207829236984253, "learning_rate": 0.0001, "loss": 0.0322, "step": 49860 }, { "epoch": 141.67613636363637, "grad_norm": 1.1749712228775024, "learning_rate": 0.0001, "loss": 0.0314, "step": 49870 }, { "epoch": 141.70454545454547, "grad_norm": 0.8790702223777771, "learning_rate": 0.0001, "loss": 0.0326, "step": 49880 }, { "epoch": 141.73295454545453, "grad_norm": 1.1796921491622925, "learning_rate": 0.0001, "loss": 0.0312, "step": 49890 }, { "epoch": 141.76136363636363, "grad_norm": 0.7493149638175964, "learning_rate": 0.0001, "loss": 0.0322, "step": 49900 }, { "epoch": 141.78977272727272, "grad_norm": 0.6880519986152649, "learning_rate": 0.0001, "loss": 0.032, "step": 49910 }, { "epoch": 141.8181818181818, "grad_norm": 0.7424251437187195, "learning_rate": 0.0001, "loss": 0.0305, "step": 49920 }, { "epoch": 141.8465909090909, "grad_norm": 0.8121177554130554, "learning_rate": 0.0001, "loss": 0.0313, "step": 49930 }, { "epoch": 141.875, "grad_norm": 0.7063887715339661, "learning_rate": 0.0001, "loss": 0.0307, "step": 49940 }, { "epoch": 141.9034090909091, "grad_norm": 0.6800211071968079, "learning_rate": 0.0001, "loss": 0.0319, "step": 49950 }, { "epoch": 141.9318181818182, "grad_norm": 0.800621509552002, "learning_rate": 0.0001, "loss": 0.0312, "step": 49960 }, { "epoch": 141.96022727272728, "grad_norm": 0.7496196627616882, "learning_rate": 0.0001, "loss": 0.0314, "step": 49970 }, { "epoch": 141.98863636363637, "grad_norm": 0.8301522135734558, "learning_rate": 0.0001, "loss": 0.0321, "step": 49980 }, { "epoch": 142.01704545454547, "grad_norm": 1.3548810482025146, "learning_rate": 0.0001, "loss": 0.0333, "step": 49990 }, { "epoch": 142.04545454545453, "grad_norm": 1.1022731065750122, "learning_rate": 0.0001, "loss": 0.0321, "step": 50000 }, { "epoch": 142.04545454545453, "step": 50000, "total_flos": 0.0, "train_loss": 0.090032759501338, "train_runtime": 22271.7421, "train_samples_per_second": 287.36, "train_steps_per_second": 2.245 } ], "logging_steps": 10, "max_steps": 50000, "num_input_tokens_seen": 0, "num_train_epochs": 143, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 128, "trial_name": null, "trial_params": null }