diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,35042 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 142.04545454545453, + "eval_steps": 500, + "global_step": 50000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.028409090909090908, + "grad_norm": 145.83482360839844, + "learning_rate": 0.0001, + "loss": 19.629, + "step": 10 + }, + { + "epoch": 0.056818181818181816, + "grad_norm": 76.42560577392578, + "learning_rate": 0.0001, + "loss": 4.9944, + "step": 20 + }, + { + "epoch": 0.08522727272727272, + "grad_norm": 75.80607604980469, + "learning_rate": 0.0001, + "loss": 4.0315, + "step": 30 + }, + { + "epoch": 0.11363636363636363, + "grad_norm": 73.45606231689453, + "learning_rate": 0.0001, + "loss": 3.6022, + "step": 40 + }, + { + "epoch": 0.14204545454545456, + "grad_norm": 63.843379974365234, + "learning_rate": 0.0001, + "loss": 3.4001, + "step": 50 + }, + { + "epoch": 0.17045454545454544, + "grad_norm": 62.33827209472656, + "learning_rate": 0.0001, + "loss": 3.1379, + "step": 60 + }, + { + "epoch": 0.19886363636363635, + "grad_norm": 48.174617767333984, + "learning_rate": 0.0001, + "loss": 2.9382, + "step": 70 + }, + { + "epoch": 0.22727272727272727, + "grad_norm": 51.949546813964844, + "learning_rate": 0.0001, + "loss": 2.7898, + "step": 80 + }, + { + "epoch": 0.2556818181818182, + "grad_norm": 55.129215240478516, + "learning_rate": 0.0001, + "loss": 2.6821, + "step": 90 + }, + { + "epoch": 0.2840909090909091, + "grad_norm": 57.89436721801758, + "learning_rate": 0.0001, + "loss": 2.6053, + "step": 100 + }, + { + "epoch": 0.3125, + "grad_norm": 60.28406524658203, + "learning_rate": 0.0001, + "loss": 2.4816, + "step": 110 + }, + { + "epoch": 0.3409090909090909, + "grad_norm": 56.817726135253906, + "learning_rate": 0.0001, + "loss": 2.3979, + "step": 120 + }, + { + "epoch": 0.3693181818181818, + "grad_norm": 50.40308380126953, + "learning_rate": 0.0001, + "loss": 2.2238, + "step": 130 + }, + { + "epoch": 0.3977272727272727, + "grad_norm": 50.979698181152344, + "learning_rate": 0.0001, + "loss": 2.1042, + "step": 140 + }, + { + "epoch": 0.42613636363636365, + "grad_norm": 49.087684631347656, + "learning_rate": 0.0001, + "loss": 1.9842, + "step": 150 + }, + { + "epoch": 0.45454545454545453, + "grad_norm": 42.376319885253906, + "learning_rate": 0.0001, + "loss": 1.9271, + "step": 160 + }, + { + "epoch": 0.48295454545454547, + "grad_norm": 52.510475158691406, + "learning_rate": 0.0001, + "loss": 1.8932, + "step": 170 + }, + { + "epoch": 0.5113636363636364, + "grad_norm": 47.899627685546875, + "learning_rate": 0.0001, + "loss": 1.835, + "step": 180 + }, + { + "epoch": 0.5397727272727273, + "grad_norm": 49.3372688293457, + "learning_rate": 0.0001, + "loss": 1.723, + "step": 190 + }, + { + "epoch": 0.5681818181818182, + "grad_norm": 53.45163345336914, + "learning_rate": 0.0001, + "loss": 1.6617, + "step": 200 + }, + { + "epoch": 0.5965909090909091, + "grad_norm": 40.74541091918945, + "learning_rate": 0.0001, + "loss": 1.6427, + "step": 210 + }, + { + "epoch": 0.625, + "grad_norm": 45.38197708129883, + "learning_rate": 0.0001, + "loss": 1.5676, + "step": 220 + }, + { + "epoch": 0.6534090909090909, + "grad_norm": 44.96558380126953, + "learning_rate": 0.0001, + "loss": 1.4756, + "step": 230 + }, + { + "epoch": 0.6818181818181818, + "grad_norm": 39.52370071411133, + "learning_rate": 0.0001, + "loss": 1.4001, + "step": 240 + }, + { + "epoch": 0.7102272727272727, + "grad_norm": 35.26641845703125, + "learning_rate": 0.0001, + "loss": 1.346, + "step": 250 + }, + { + "epoch": 0.7386363636363636, + "grad_norm": 43.04871368408203, + "learning_rate": 0.0001, + "loss": 1.2858, + "step": 260 + }, + { + "epoch": 0.7670454545454546, + "grad_norm": 41.421043395996094, + "learning_rate": 0.0001, + "loss": 1.2311, + "step": 270 + }, + { + "epoch": 0.7954545454545454, + "grad_norm": 34.36821365356445, + "learning_rate": 0.0001, + "loss": 1.1698, + "step": 280 + }, + { + "epoch": 0.8238636363636364, + "grad_norm": 28.939420700073242, + "learning_rate": 0.0001, + "loss": 1.1007, + "step": 290 + }, + { + "epoch": 0.8522727272727273, + "grad_norm": 34.10892868041992, + "learning_rate": 0.0001, + "loss": 1.0837, + "step": 300 + }, + { + "epoch": 0.8806818181818182, + "grad_norm": 32.707054138183594, + "learning_rate": 0.0001, + "loss": 1.037, + "step": 310 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 35.907508850097656, + "learning_rate": 0.0001, + "loss": 1.0093, + "step": 320 + }, + { + "epoch": 0.9375, + "grad_norm": 37.658451080322266, + "learning_rate": 0.0001, + "loss": 0.9501, + "step": 330 + }, + { + "epoch": 0.9659090909090909, + "grad_norm": 29.12462615966797, + "learning_rate": 0.0001, + "loss": 0.9148, + "step": 340 + }, + { + "epoch": 0.9943181818181818, + "grad_norm": 31.38657569885254, + "learning_rate": 0.0001, + "loss": 0.8863, + "step": 350 + }, + { + "epoch": 1.0227272727272727, + "grad_norm": 29.956890106201172, + "learning_rate": 0.0001, + "loss": 0.8712, + "step": 360 + }, + { + "epoch": 1.0511363636363635, + "grad_norm": 26.45981788635254, + "learning_rate": 0.0001, + "loss": 0.8092, + "step": 370 + }, + { + "epoch": 1.0795454545454546, + "grad_norm": 27.67877197265625, + "learning_rate": 0.0001, + "loss": 0.8083, + "step": 380 + }, + { + "epoch": 1.1079545454545454, + "grad_norm": 29.698911666870117, + "learning_rate": 0.0001, + "loss": 0.7937, + "step": 390 + }, + { + "epoch": 1.1363636363636362, + "grad_norm": 25.87833595275879, + "learning_rate": 0.0001, + "loss": 0.7842, + "step": 400 + }, + { + "epoch": 1.1647727272727273, + "grad_norm": 30.7982120513916, + "learning_rate": 0.0001, + "loss": 0.7507, + "step": 410 + }, + { + "epoch": 1.1931818181818181, + "grad_norm": 25.391246795654297, + "learning_rate": 0.0001, + "loss": 0.7208, + "step": 420 + }, + { + "epoch": 1.2215909090909092, + "grad_norm": 22.389162063598633, + "learning_rate": 0.0001, + "loss": 0.6947, + "step": 430 + }, + { + "epoch": 1.25, + "grad_norm": 21.166950225830078, + "learning_rate": 0.0001, + "loss": 0.6735, + "step": 440 + }, + { + "epoch": 1.2784090909090908, + "grad_norm": 20.702722549438477, + "learning_rate": 0.0001, + "loss": 0.6466, + "step": 450 + }, + { + "epoch": 1.3068181818181819, + "grad_norm": 25.78806495666504, + "learning_rate": 0.0001, + "loss": 0.6449, + "step": 460 + }, + { + "epoch": 1.3352272727272727, + "grad_norm": 19.319217681884766, + "learning_rate": 0.0001, + "loss": 0.6416, + "step": 470 + }, + { + "epoch": 1.3636363636363638, + "grad_norm": 22.997730255126953, + "learning_rate": 0.0001, + "loss": 0.6296, + "step": 480 + }, + { + "epoch": 1.3920454545454546, + "grad_norm": 21.263296127319336, + "learning_rate": 0.0001, + "loss": 0.586, + "step": 490 + }, + { + "epoch": 1.4204545454545454, + "grad_norm": 18.75411605834961, + "learning_rate": 0.0001, + "loss": 0.565, + "step": 500 + }, + { + "epoch": 1.4488636363636362, + "grad_norm": 21.52752113342285, + "learning_rate": 0.0001, + "loss": 0.5923, + "step": 510 + }, + { + "epoch": 1.4772727272727273, + "grad_norm": 21.464553833007812, + "learning_rate": 0.0001, + "loss": 0.5726, + "step": 520 + }, + { + "epoch": 1.5056818181818183, + "grad_norm": 19.978652954101562, + "learning_rate": 0.0001, + "loss": 0.5734, + "step": 530 + }, + { + "epoch": 1.5340909090909092, + "grad_norm": 21.40723419189453, + "learning_rate": 0.0001, + "loss": 0.549, + "step": 540 + }, + { + "epoch": 1.5625, + "grad_norm": 18.181068420410156, + "learning_rate": 0.0001, + "loss": 0.5337, + "step": 550 + }, + { + "epoch": 1.5909090909090908, + "grad_norm": 16.99245262145996, + "learning_rate": 0.0001, + "loss": 0.528, + "step": 560 + }, + { + "epoch": 1.6193181818181817, + "grad_norm": 18.75094223022461, + "learning_rate": 0.0001, + "loss": 0.5316, + "step": 570 + }, + { + "epoch": 1.6477272727272727, + "grad_norm": 23.37386703491211, + "learning_rate": 0.0001, + "loss": 0.5116, + "step": 580 + }, + { + "epoch": 1.6761363636363638, + "grad_norm": 20.599090576171875, + "learning_rate": 0.0001, + "loss": 0.5135, + "step": 590 + }, + { + "epoch": 1.7045454545454546, + "grad_norm": 19.43827247619629, + "learning_rate": 0.0001, + "loss": 0.4833, + "step": 600 + }, + { + "epoch": 1.7329545454545454, + "grad_norm": 16.123802185058594, + "learning_rate": 0.0001, + "loss": 0.486, + "step": 610 + }, + { + "epoch": 1.7613636363636362, + "grad_norm": 18.867568969726562, + "learning_rate": 0.0001, + "loss": 0.47, + "step": 620 + }, + { + "epoch": 1.7897727272727273, + "grad_norm": 16.580411911010742, + "learning_rate": 0.0001, + "loss": 0.4803, + "step": 630 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 16.470767974853516, + "learning_rate": 0.0001, + "loss": 0.4763, + "step": 640 + }, + { + "epoch": 1.8465909090909092, + "grad_norm": 15.845813751220703, + "learning_rate": 0.0001, + "loss": 0.4796, + "step": 650 + }, + { + "epoch": 1.875, + "grad_norm": 16.313016891479492, + "learning_rate": 0.0001, + "loss": 0.4726, + "step": 660 + }, + { + "epoch": 1.9034090909090908, + "grad_norm": 16.114171981811523, + "learning_rate": 0.0001, + "loss": 0.4657, + "step": 670 + }, + { + "epoch": 1.9318181818181817, + "grad_norm": 18.153635025024414, + "learning_rate": 0.0001, + "loss": 0.4656, + "step": 680 + }, + { + "epoch": 1.9602272727272727, + "grad_norm": 19.012916564941406, + "learning_rate": 0.0001, + "loss": 0.4543, + "step": 690 + }, + { + "epoch": 1.9886363636363638, + "grad_norm": 15.661040306091309, + "learning_rate": 0.0001, + "loss": 0.4451, + "step": 700 + }, + { + "epoch": 2.0170454545454546, + "grad_norm": 20.665252685546875, + "learning_rate": 0.0001, + "loss": 0.4542, + "step": 710 + }, + { + "epoch": 2.0454545454545454, + "grad_norm": 18.220745086669922, + "learning_rate": 0.0001, + "loss": 0.4615, + "step": 720 + }, + { + "epoch": 2.0738636363636362, + "grad_norm": 15.511617660522461, + "learning_rate": 0.0001, + "loss": 0.4379, + "step": 730 + }, + { + "epoch": 2.102272727272727, + "grad_norm": 16.05436897277832, + "learning_rate": 0.0001, + "loss": 0.4457, + "step": 740 + }, + { + "epoch": 2.1306818181818183, + "grad_norm": 14.067421913146973, + "learning_rate": 0.0001, + "loss": 0.4404, + "step": 750 + }, + { + "epoch": 2.159090909090909, + "grad_norm": 17.595314025878906, + "learning_rate": 0.0001, + "loss": 0.4359, + "step": 760 + }, + { + "epoch": 2.1875, + "grad_norm": 14.292813301086426, + "learning_rate": 0.0001, + "loss": 0.4265, + "step": 770 + }, + { + "epoch": 2.215909090909091, + "grad_norm": 13.254941940307617, + "learning_rate": 0.0001, + "loss": 0.4282, + "step": 780 + }, + { + "epoch": 2.2443181818181817, + "grad_norm": 14.131694793701172, + "learning_rate": 0.0001, + "loss": 0.4154, + "step": 790 + }, + { + "epoch": 2.2727272727272725, + "grad_norm": 16.197458267211914, + "learning_rate": 0.0001, + "loss": 0.4271, + "step": 800 + }, + { + "epoch": 2.3011363636363638, + "grad_norm": 13.791603088378906, + "learning_rate": 0.0001, + "loss": 0.4159, + "step": 810 + }, + { + "epoch": 2.3295454545454546, + "grad_norm": 15.197473526000977, + "learning_rate": 0.0001, + "loss": 0.4011, + "step": 820 + }, + { + "epoch": 2.3579545454545454, + "grad_norm": 16.548952102661133, + "learning_rate": 0.0001, + "loss": 0.4153, + "step": 830 + }, + { + "epoch": 2.3863636363636362, + "grad_norm": 15.021014213562012, + "learning_rate": 0.0001, + "loss": 0.4077, + "step": 840 + }, + { + "epoch": 2.4147727272727275, + "grad_norm": 11.394856452941895, + "learning_rate": 0.0001, + "loss": 0.412, + "step": 850 + }, + { + "epoch": 2.4431818181818183, + "grad_norm": 14.868868827819824, + "learning_rate": 0.0001, + "loss": 0.3898, + "step": 860 + }, + { + "epoch": 2.471590909090909, + "grad_norm": 15.362899780273438, + "learning_rate": 0.0001, + "loss": 0.3985, + "step": 870 + }, + { + "epoch": 2.5, + "grad_norm": 12.927398681640625, + "learning_rate": 0.0001, + "loss": 0.3969, + "step": 880 + }, + { + "epoch": 2.528409090909091, + "grad_norm": 12.813764572143555, + "learning_rate": 0.0001, + "loss": 0.3808, + "step": 890 + }, + { + "epoch": 2.5568181818181817, + "grad_norm": 14.54391860961914, + "learning_rate": 0.0001, + "loss": 0.3786, + "step": 900 + }, + { + "epoch": 2.5852272727272725, + "grad_norm": 12.791834831237793, + "learning_rate": 0.0001, + "loss": 0.3891, + "step": 910 + }, + { + "epoch": 2.6136363636363638, + "grad_norm": 12.401715278625488, + "learning_rate": 0.0001, + "loss": 0.3903, + "step": 920 + }, + { + "epoch": 2.6420454545454546, + "grad_norm": 12.847018241882324, + "learning_rate": 0.0001, + "loss": 0.3741, + "step": 930 + }, + { + "epoch": 2.6704545454545454, + "grad_norm": 14.203393936157227, + "learning_rate": 0.0001, + "loss": 0.3827, + "step": 940 + }, + { + "epoch": 2.6988636363636362, + "grad_norm": 14.513806343078613, + "learning_rate": 0.0001, + "loss": 0.3848, + "step": 950 + }, + { + "epoch": 2.7272727272727275, + "grad_norm": 15.528099060058594, + "learning_rate": 0.0001, + "loss": 0.3861, + "step": 960 + }, + { + "epoch": 2.7556818181818183, + "grad_norm": 16.94673728942871, + "learning_rate": 0.0001, + "loss": 0.3796, + "step": 970 + }, + { + "epoch": 2.784090909090909, + "grad_norm": 14.714554786682129, + "learning_rate": 0.0001, + "loss": 0.3683, + "step": 980 + }, + { + "epoch": 2.8125, + "grad_norm": 13.82036304473877, + "learning_rate": 0.0001, + "loss": 0.3751, + "step": 990 + }, + { + "epoch": 2.840909090909091, + "grad_norm": 13.940563201904297, + "learning_rate": 0.0001, + "loss": 0.3719, + "step": 1000 + }, + { + "epoch": 2.8693181818181817, + "grad_norm": 13.515235900878906, + "learning_rate": 0.0001, + "loss": 0.3761, + "step": 1010 + }, + { + "epoch": 2.8977272727272725, + "grad_norm": 14.955562591552734, + "learning_rate": 0.0001, + "loss": 0.3576, + "step": 1020 + }, + { + "epoch": 2.9261363636363638, + "grad_norm": 11.280851364135742, + "learning_rate": 0.0001, + "loss": 0.3614, + "step": 1030 + }, + { + "epoch": 2.9545454545454546, + "grad_norm": 12.09704303741455, + "learning_rate": 0.0001, + "loss": 0.3661, + "step": 1040 + }, + { + "epoch": 2.9829545454545454, + "grad_norm": 14.392845153808594, + "learning_rate": 0.0001, + "loss": 0.3722, + "step": 1050 + }, + { + "epoch": 3.0113636363636362, + "grad_norm": 13.309704780578613, + "learning_rate": 0.0001, + "loss": 0.3595, + "step": 1060 + }, + { + "epoch": 3.039772727272727, + "grad_norm": 15.42646312713623, + "learning_rate": 0.0001, + "loss": 0.3427, + "step": 1070 + }, + { + "epoch": 3.0681818181818183, + "grad_norm": 10.911493301391602, + "learning_rate": 0.0001, + "loss": 0.3517, + "step": 1080 + }, + { + "epoch": 3.096590909090909, + "grad_norm": 12.293902397155762, + "learning_rate": 0.0001, + "loss": 0.3398, + "step": 1090 + }, + { + "epoch": 3.125, + "grad_norm": 13.060087203979492, + "learning_rate": 0.0001, + "loss": 0.3491, + "step": 1100 + }, + { + "epoch": 3.153409090909091, + "grad_norm": 11.007771492004395, + "learning_rate": 0.0001, + "loss": 0.3566, + "step": 1110 + }, + { + "epoch": 3.1818181818181817, + "grad_norm": 13.680668830871582, + "learning_rate": 0.0001, + "loss": 0.341, + "step": 1120 + }, + { + "epoch": 3.210227272727273, + "grad_norm": 9.585054397583008, + "learning_rate": 0.0001, + "loss": 0.3582, + "step": 1130 + }, + { + "epoch": 3.2386363636363638, + "grad_norm": 12.668915748596191, + "learning_rate": 0.0001, + "loss": 0.351, + "step": 1140 + }, + { + "epoch": 3.2670454545454546, + "grad_norm": 12.355649948120117, + "learning_rate": 0.0001, + "loss": 0.3489, + "step": 1150 + }, + { + "epoch": 3.2954545454545454, + "grad_norm": 11.911877632141113, + "learning_rate": 0.0001, + "loss": 0.3348, + "step": 1160 + }, + { + "epoch": 3.3238636363636362, + "grad_norm": 11.719152450561523, + "learning_rate": 0.0001, + "loss": 0.3364, + "step": 1170 + }, + { + "epoch": 3.3522727272727275, + "grad_norm": 10.242898941040039, + "learning_rate": 0.0001, + "loss": 0.3264, + "step": 1180 + }, + { + "epoch": 3.3806818181818183, + "grad_norm": 9.679462432861328, + "learning_rate": 0.0001, + "loss": 0.3251, + "step": 1190 + }, + { + "epoch": 3.409090909090909, + "grad_norm": 10.055230140686035, + "learning_rate": 0.0001, + "loss": 0.3275, + "step": 1200 + }, + { + "epoch": 3.4375, + "grad_norm": 8.881628036499023, + "learning_rate": 0.0001, + "loss": 0.325, + "step": 1210 + }, + { + "epoch": 3.465909090909091, + "grad_norm": 11.237067222595215, + "learning_rate": 0.0001, + "loss": 0.3497, + "step": 1220 + }, + { + "epoch": 3.4943181818181817, + "grad_norm": 12.494010925292969, + "learning_rate": 0.0001, + "loss": 0.334, + "step": 1230 + }, + { + "epoch": 3.5227272727272725, + "grad_norm": 11.651463508605957, + "learning_rate": 0.0001, + "loss": 0.3291, + "step": 1240 + }, + { + "epoch": 3.5511363636363638, + "grad_norm": 11.586341857910156, + "learning_rate": 0.0001, + "loss": 0.3236, + "step": 1250 + }, + { + "epoch": 3.5795454545454546, + "grad_norm": 10.351299285888672, + "learning_rate": 0.0001, + "loss": 0.3314, + "step": 1260 + }, + { + "epoch": 3.6079545454545454, + "grad_norm": 11.262849807739258, + "learning_rate": 0.0001, + "loss": 0.3375, + "step": 1270 + }, + { + "epoch": 3.6363636363636362, + "grad_norm": 10.904534339904785, + "learning_rate": 0.0001, + "loss": 0.3281, + "step": 1280 + }, + { + "epoch": 3.6647727272727275, + "grad_norm": 8.963489532470703, + "learning_rate": 0.0001, + "loss": 0.3298, + "step": 1290 + }, + { + "epoch": 3.6931818181818183, + "grad_norm": 10.735923767089844, + "learning_rate": 0.0001, + "loss": 0.3272, + "step": 1300 + }, + { + "epoch": 3.721590909090909, + "grad_norm": 12.867420196533203, + "learning_rate": 0.0001, + "loss": 0.3234, + "step": 1310 + }, + { + "epoch": 3.75, + "grad_norm": 11.347630500793457, + "learning_rate": 0.0001, + "loss": 0.3209, + "step": 1320 + }, + { + "epoch": 3.778409090909091, + "grad_norm": 10.211435317993164, + "learning_rate": 0.0001, + "loss": 0.3173, + "step": 1330 + }, + { + "epoch": 3.8068181818181817, + "grad_norm": 8.992242813110352, + "learning_rate": 0.0001, + "loss": 0.3199, + "step": 1340 + }, + { + "epoch": 3.8352272727272725, + "grad_norm": 9.036025047302246, + "learning_rate": 0.0001, + "loss": 0.3124, + "step": 1350 + }, + { + "epoch": 3.8636363636363638, + "grad_norm": 11.357304573059082, + "learning_rate": 0.0001, + "loss": 0.3256, + "step": 1360 + }, + { + "epoch": 3.8920454545454546, + "grad_norm": 12.447697639465332, + "learning_rate": 0.0001, + "loss": 0.3234, + "step": 1370 + }, + { + "epoch": 3.9204545454545454, + "grad_norm": 10.388401985168457, + "learning_rate": 0.0001, + "loss": 0.3134, + "step": 1380 + }, + { + "epoch": 3.9488636363636362, + "grad_norm": 10.227154731750488, + "learning_rate": 0.0001, + "loss": 0.3036, + "step": 1390 + }, + { + "epoch": 3.9772727272727275, + "grad_norm": 11.007589340209961, + "learning_rate": 0.0001, + "loss": 0.3087, + "step": 1400 + }, + { + "epoch": 4.005681818181818, + "grad_norm": 9.138888359069824, + "learning_rate": 0.0001, + "loss": 0.3109, + "step": 1410 + }, + { + "epoch": 4.034090909090909, + "grad_norm": 9.967912673950195, + "learning_rate": 0.0001, + "loss": 0.307, + "step": 1420 + }, + { + "epoch": 4.0625, + "grad_norm": 10.28461742401123, + "learning_rate": 0.0001, + "loss": 0.2973, + "step": 1430 + }, + { + "epoch": 4.090909090909091, + "grad_norm": 9.785955429077148, + "learning_rate": 0.0001, + "loss": 0.3004, + "step": 1440 + }, + { + "epoch": 4.119318181818182, + "grad_norm": 10.850992202758789, + "learning_rate": 0.0001, + "loss": 0.3005, + "step": 1450 + }, + { + "epoch": 4.1477272727272725, + "grad_norm": 12.91825008392334, + "learning_rate": 0.0001, + "loss": 0.3005, + "step": 1460 + }, + { + "epoch": 4.176136363636363, + "grad_norm": 11.555893898010254, + "learning_rate": 0.0001, + "loss": 0.3107, + "step": 1470 + }, + { + "epoch": 4.204545454545454, + "grad_norm": 10.81035327911377, + "learning_rate": 0.0001, + "loss": 0.2977, + "step": 1480 + }, + { + "epoch": 4.232954545454546, + "grad_norm": 11.161906242370605, + "learning_rate": 0.0001, + "loss": 0.2962, + "step": 1490 + }, + { + "epoch": 4.261363636363637, + "grad_norm": 12.040653228759766, + "learning_rate": 0.0001, + "loss": 0.3025, + "step": 1500 + }, + { + "epoch": 4.2897727272727275, + "grad_norm": 9.015117645263672, + "learning_rate": 0.0001, + "loss": 0.292, + "step": 1510 + }, + { + "epoch": 4.318181818181818, + "grad_norm": 10.438865661621094, + "learning_rate": 0.0001, + "loss": 0.2881, + "step": 1520 + }, + { + "epoch": 4.346590909090909, + "grad_norm": 10.363481521606445, + "learning_rate": 0.0001, + "loss": 0.2919, + "step": 1530 + }, + { + "epoch": 4.375, + "grad_norm": 10.898138999938965, + "learning_rate": 0.0001, + "loss": 0.3035, + "step": 1540 + }, + { + "epoch": 4.403409090909091, + "grad_norm": 9.264910697937012, + "learning_rate": 0.0001, + "loss": 0.2866, + "step": 1550 + }, + { + "epoch": 4.431818181818182, + "grad_norm": 9.535000801086426, + "learning_rate": 0.0001, + "loss": 0.2833, + "step": 1560 + }, + { + "epoch": 4.4602272727272725, + "grad_norm": 10.703611373901367, + "learning_rate": 0.0001, + "loss": 0.2977, + "step": 1570 + }, + { + "epoch": 4.488636363636363, + "grad_norm": 8.830336570739746, + "learning_rate": 0.0001, + "loss": 0.2822, + "step": 1580 + }, + { + "epoch": 4.517045454545455, + "grad_norm": 9.80781364440918, + "learning_rate": 0.0001, + "loss": 0.2897, + "step": 1590 + }, + { + "epoch": 4.545454545454545, + "grad_norm": 9.538243293762207, + "learning_rate": 0.0001, + "loss": 0.2893, + "step": 1600 + }, + { + "epoch": 4.573863636363637, + "grad_norm": 7.803942680358887, + "learning_rate": 0.0001, + "loss": 0.2898, + "step": 1610 + }, + { + "epoch": 4.6022727272727275, + "grad_norm": 9.329748153686523, + "learning_rate": 0.0001, + "loss": 0.288, + "step": 1620 + }, + { + "epoch": 4.630681818181818, + "grad_norm": 9.706318855285645, + "learning_rate": 0.0001, + "loss": 0.2824, + "step": 1630 + }, + { + "epoch": 4.659090909090909, + "grad_norm": 8.528480529785156, + "learning_rate": 0.0001, + "loss": 0.2969, + "step": 1640 + }, + { + "epoch": 4.6875, + "grad_norm": 8.045533180236816, + "learning_rate": 0.0001, + "loss": 0.2995, + "step": 1650 + }, + { + "epoch": 4.715909090909091, + "grad_norm": 8.474618911743164, + "learning_rate": 0.0001, + "loss": 0.2906, + "step": 1660 + }, + { + "epoch": 4.744318181818182, + "grad_norm": 9.250617027282715, + "learning_rate": 0.0001, + "loss": 0.2915, + "step": 1670 + }, + { + "epoch": 4.7727272727272725, + "grad_norm": 8.805644989013672, + "learning_rate": 0.0001, + "loss": 0.2835, + "step": 1680 + }, + { + "epoch": 4.801136363636363, + "grad_norm": 8.365328788757324, + "learning_rate": 0.0001, + "loss": 0.2872, + "step": 1690 + }, + { + "epoch": 4.829545454545455, + "grad_norm": 9.336677551269531, + "learning_rate": 0.0001, + "loss": 0.2822, + "step": 1700 + }, + { + "epoch": 4.857954545454545, + "grad_norm": 8.8048095703125, + "learning_rate": 0.0001, + "loss": 0.2802, + "step": 1710 + }, + { + "epoch": 4.886363636363637, + "grad_norm": 10.265268325805664, + "learning_rate": 0.0001, + "loss": 0.2718, + "step": 1720 + }, + { + "epoch": 4.9147727272727275, + "grad_norm": 9.204639434814453, + "learning_rate": 0.0001, + "loss": 0.2874, + "step": 1730 + }, + { + "epoch": 4.943181818181818, + "grad_norm": 8.516647338867188, + "learning_rate": 0.0001, + "loss": 0.2913, + "step": 1740 + }, + { + "epoch": 4.971590909090909, + "grad_norm": 7.545566082000732, + "learning_rate": 0.0001, + "loss": 0.2799, + "step": 1750 + }, + { + "epoch": 5.0, + "grad_norm": 9.611028671264648, + "learning_rate": 0.0001, + "loss": 0.2816, + "step": 1760 + }, + { + "epoch": 5.028409090909091, + "grad_norm": 7.730203151702881, + "learning_rate": 0.0001, + "loss": 0.278, + "step": 1770 + }, + { + "epoch": 5.056818181818182, + "grad_norm": 9.771706581115723, + "learning_rate": 0.0001, + "loss": 0.2764, + "step": 1780 + }, + { + "epoch": 5.0852272727272725, + "grad_norm": 8.7466402053833, + "learning_rate": 0.0001, + "loss": 0.2823, + "step": 1790 + }, + { + "epoch": 5.113636363636363, + "grad_norm": 9.843619346618652, + "learning_rate": 0.0001, + "loss": 0.2654, + "step": 1800 + }, + { + "epoch": 5.142045454545454, + "grad_norm": 8.296882629394531, + "learning_rate": 0.0001, + "loss": 0.2691, + "step": 1810 + }, + { + "epoch": 5.170454545454546, + "grad_norm": 8.18472957611084, + "learning_rate": 0.0001, + "loss": 0.2644, + "step": 1820 + }, + { + "epoch": 5.198863636363637, + "grad_norm": 8.96210765838623, + "learning_rate": 0.0001, + "loss": 0.2688, + "step": 1830 + }, + { + "epoch": 5.2272727272727275, + "grad_norm": 9.177153587341309, + "learning_rate": 0.0001, + "loss": 0.2683, + "step": 1840 + }, + { + "epoch": 5.255681818181818, + "grad_norm": 7.267095565795898, + "learning_rate": 0.0001, + "loss": 0.2673, + "step": 1850 + }, + { + "epoch": 5.284090909090909, + "grad_norm": 8.78824520111084, + "learning_rate": 0.0001, + "loss": 0.2629, + "step": 1860 + }, + { + "epoch": 5.3125, + "grad_norm": 8.33309268951416, + "learning_rate": 0.0001, + "loss": 0.2635, + "step": 1870 + }, + { + "epoch": 5.340909090909091, + "grad_norm": 9.574383735656738, + "learning_rate": 0.0001, + "loss": 0.2563, + "step": 1880 + }, + { + "epoch": 5.369318181818182, + "grad_norm": 7.813918590545654, + "learning_rate": 0.0001, + "loss": 0.2467, + "step": 1890 + }, + { + "epoch": 5.3977272727272725, + "grad_norm": 9.375533103942871, + "learning_rate": 0.0001, + "loss": 0.262, + "step": 1900 + }, + { + "epoch": 5.426136363636363, + "grad_norm": 9.987363815307617, + "learning_rate": 0.0001, + "loss": 0.2618, + "step": 1910 + }, + { + "epoch": 5.454545454545454, + "grad_norm": 10.02425479888916, + "learning_rate": 0.0001, + "loss": 0.2635, + "step": 1920 + }, + { + "epoch": 5.482954545454546, + "grad_norm": 9.342535972595215, + "learning_rate": 0.0001, + "loss": 0.2497, + "step": 1930 + }, + { + "epoch": 5.511363636363637, + "grad_norm": 9.32978343963623, + "learning_rate": 0.0001, + "loss": 0.2592, + "step": 1940 + }, + { + "epoch": 5.5397727272727275, + "grad_norm": 7.348328113555908, + "learning_rate": 0.0001, + "loss": 0.2592, + "step": 1950 + }, + { + "epoch": 5.568181818181818, + "grad_norm": 8.86340045928955, + "learning_rate": 0.0001, + "loss": 0.2541, + "step": 1960 + }, + { + "epoch": 5.596590909090909, + "grad_norm": 8.326016426086426, + "learning_rate": 0.0001, + "loss": 0.2663, + "step": 1970 + }, + { + "epoch": 5.625, + "grad_norm": 8.392045021057129, + "learning_rate": 0.0001, + "loss": 0.2682, + "step": 1980 + }, + { + "epoch": 5.653409090909091, + "grad_norm": 8.57619571685791, + "learning_rate": 0.0001, + "loss": 0.2596, + "step": 1990 + }, + { + "epoch": 5.681818181818182, + "grad_norm": 7.7515058517456055, + "learning_rate": 0.0001, + "loss": 0.2408, + "step": 2000 + }, + { + "epoch": 5.7102272727272725, + "grad_norm": 8.581171989440918, + "learning_rate": 0.0001, + "loss": 0.2492, + "step": 2010 + }, + { + "epoch": 5.738636363636363, + "grad_norm": 8.195562362670898, + "learning_rate": 0.0001, + "loss": 0.2497, + "step": 2020 + }, + { + "epoch": 5.767045454545455, + "grad_norm": 7.793923854827881, + "learning_rate": 0.0001, + "loss": 0.2415, + "step": 2030 + }, + { + "epoch": 5.795454545454545, + "grad_norm": 7.39900016784668, + "learning_rate": 0.0001, + "loss": 0.2436, + "step": 2040 + }, + { + "epoch": 5.823863636363637, + "grad_norm": 8.420592308044434, + "learning_rate": 0.0001, + "loss": 0.2523, + "step": 2050 + }, + { + "epoch": 5.8522727272727275, + "grad_norm": 8.713873863220215, + "learning_rate": 0.0001, + "loss": 0.2623, + "step": 2060 + }, + { + "epoch": 5.880681818181818, + "grad_norm": 9.038602828979492, + "learning_rate": 0.0001, + "loss": 0.2543, + "step": 2070 + }, + { + "epoch": 5.909090909090909, + "grad_norm": 8.842888832092285, + "learning_rate": 0.0001, + "loss": 0.2539, + "step": 2080 + }, + { + "epoch": 5.9375, + "grad_norm": 9.234344482421875, + "learning_rate": 0.0001, + "loss": 0.2393, + "step": 2090 + }, + { + "epoch": 5.965909090909091, + "grad_norm": 7.840005874633789, + "learning_rate": 0.0001, + "loss": 0.2418, + "step": 2100 + }, + { + "epoch": 5.994318181818182, + "grad_norm": 8.143929481506348, + "learning_rate": 0.0001, + "loss": 0.2418, + "step": 2110 + }, + { + "epoch": 6.0227272727272725, + "grad_norm": 7.842228412628174, + "learning_rate": 0.0001, + "loss": 0.2343, + "step": 2120 + }, + { + "epoch": 6.051136363636363, + "grad_norm": 6.924618721008301, + "learning_rate": 0.0001, + "loss": 0.2437, + "step": 2130 + }, + { + "epoch": 6.079545454545454, + "grad_norm": 7.25029993057251, + "learning_rate": 0.0001, + "loss": 0.2463, + "step": 2140 + }, + { + "epoch": 6.107954545454546, + "grad_norm": 8.335989952087402, + "learning_rate": 0.0001, + "loss": 0.2473, + "step": 2150 + }, + { + "epoch": 6.136363636363637, + "grad_norm": 6.865011215209961, + "learning_rate": 0.0001, + "loss": 0.2384, + "step": 2160 + }, + { + "epoch": 6.1647727272727275, + "grad_norm": 8.29775619506836, + "learning_rate": 0.0001, + "loss": 0.2382, + "step": 2170 + }, + { + "epoch": 6.193181818181818, + "grad_norm": 7.266998767852783, + "learning_rate": 0.0001, + "loss": 0.2383, + "step": 2180 + }, + { + "epoch": 6.221590909090909, + "grad_norm": 7.584468364715576, + "learning_rate": 0.0001, + "loss": 0.2263, + "step": 2190 + }, + { + "epoch": 6.25, + "grad_norm": 6.939903259277344, + "learning_rate": 0.0001, + "loss": 0.2418, + "step": 2200 + }, + { + "epoch": 6.278409090909091, + "grad_norm": 6.492012023925781, + "learning_rate": 0.0001, + "loss": 0.2358, + "step": 2210 + }, + { + "epoch": 6.306818181818182, + "grad_norm": 7.337180137634277, + "learning_rate": 0.0001, + "loss": 0.2336, + "step": 2220 + }, + { + "epoch": 6.3352272727272725, + "grad_norm": 8.410757064819336, + "learning_rate": 0.0001, + "loss": 0.2292, + "step": 2230 + }, + { + "epoch": 6.363636363636363, + "grad_norm": 7.204639911651611, + "learning_rate": 0.0001, + "loss": 0.2321, + "step": 2240 + }, + { + "epoch": 6.392045454545454, + "grad_norm": 7.258450508117676, + "learning_rate": 0.0001, + "loss": 0.2356, + "step": 2250 + }, + { + "epoch": 6.420454545454546, + "grad_norm": 8.304643630981445, + "learning_rate": 0.0001, + "loss": 0.2347, + "step": 2260 + }, + { + "epoch": 6.448863636363637, + "grad_norm": 6.700302600860596, + "learning_rate": 0.0001, + "loss": 0.2309, + "step": 2270 + }, + { + "epoch": 6.4772727272727275, + "grad_norm": 7.752438545227051, + "learning_rate": 0.0001, + "loss": 0.2225, + "step": 2280 + }, + { + "epoch": 6.505681818181818, + "grad_norm": 7.962435245513916, + "learning_rate": 0.0001, + "loss": 0.2247, + "step": 2290 + }, + { + "epoch": 6.534090909090909, + "grad_norm": 8.655714988708496, + "learning_rate": 0.0001, + "loss": 0.2299, + "step": 2300 + }, + { + "epoch": 6.5625, + "grad_norm": 6.5540771484375, + "learning_rate": 0.0001, + "loss": 0.2243, + "step": 2310 + }, + { + "epoch": 6.590909090909091, + "grad_norm": 7.325479507446289, + "learning_rate": 0.0001, + "loss": 0.2209, + "step": 2320 + }, + { + "epoch": 6.619318181818182, + "grad_norm": 7.687260150909424, + "learning_rate": 0.0001, + "loss": 0.2219, + "step": 2330 + }, + { + "epoch": 6.6477272727272725, + "grad_norm": 8.61622428894043, + "learning_rate": 0.0001, + "loss": 0.2177, + "step": 2340 + }, + { + "epoch": 6.676136363636363, + "grad_norm": 7.550006866455078, + "learning_rate": 0.0001, + "loss": 0.2309, + "step": 2350 + }, + { + "epoch": 6.704545454545455, + "grad_norm": 7.1695685386657715, + "learning_rate": 0.0001, + "loss": 0.2278, + "step": 2360 + }, + { + "epoch": 6.732954545454545, + "grad_norm": 8.121203422546387, + "learning_rate": 0.0001, + "loss": 0.2192, + "step": 2370 + }, + { + "epoch": 6.761363636363637, + "grad_norm": 9.03805923461914, + "learning_rate": 0.0001, + "loss": 0.2298, + "step": 2380 + }, + { + "epoch": 6.7897727272727275, + "grad_norm": 7.608403205871582, + "learning_rate": 0.0001, + "loss": 0.23, + "step": 2390 + }, + { + "epoch": 6.818181818181818, + "grad_norm": 7.704319953918457, + "learning_rate": 0.0001, + "loss": 0.228, + "step": 2400 + }, + { + "epoch": 6.846590909090909, + "grad_norm": 6.52188777923584, + "learning_rate": 0.0001, + "loss": 0.2206, + "step": 2410 + }, + { + "epoch": 6.875, + "grad_norm": 6.7469635009765625, + "learning_rate": 0.0001, + "loss": 0.2283, + "step": 2420 + }, + { + "epoch": 6.903409090909091, + "grad_norm": 6.883518218994141, + "learning_rate": 0.0001, + "loss": 0.2123, + "step": 2430 + }, + { + "epoch": 6.931818181818182, + "grad_norm": 7.054996013641357, + "learning_rate": 0.0001, + "loss": 0.2237, + "step": 2440 + }, + { + "epoch": 6.9602272727272725, + "grad_norm": 7.665782451629639, + "learning_rate": 0.0001, + "loss": 0.2202, + "step": 2450 + }, + { + "epoch": 6.988636363636363, + "grad_norm": 8.317813873291016, + "learning_rate": 0.0001, + "loss": 0.2252, + "step": 2460 + }, + { + "epoch": 7.017045454545454, + "grad_norm": 7.469433307647705, + "learning_rate": 0.0001, + "loss": 0.2244, + "step": 2470 + }, + { + "epoch": 7.045454545454546, + "grad_norm": 6.86864709854126, + "learning_rate": 0.0001, + "loss": 0.2132, + "step": 2480 + }, + { + "epoch": 7.073863636363637, + "grad_norm": 7.019229412078857, + "learning_rate": 0.0001, + "loss": 0.2191, + "step": 2490 + }, + { + "epoch": 7.1022727272727275, + "grad_norm": 6.337296962738037, + "learning_rate": 0.0001, + "loss": 0.2135, + "step": 2500 + }, + { + "epoch": 7.130681818181818, + "grad_norm": 7.91449499130249, + "learning_rate": 0.0001, + "loss": 0.2132, + "step": 2510 + }, + { + "epoch": 7.159090909090909, + "grad_norm": 7.0960283279418945, + "learning_rate": 0.0001, + "loss": 0.2058, + "step": 2520 + }, + { + "epoch": 7.1875, + "grad_norm": 6.885858058929443, + "learning_rate": 0.0001, + "loss": 0.205, + "step": 2530 + }, + { + "epoch": 7.215909090909091, + "grad_norm": 7.231472015380859, + "learning_rate": 0.0001, + "loss": 0.2125, + "step": 2540 + }, + { + "epoch": 7.244318181818182, + "grad_norm": 6.965603351593018, + "learning_rate": 0.0001, + "loss": 0.209, + "step": 2550 + }, + { + "epoch": 7.2727272727272725, + "grad_norm": 7.230012893676758, + "learning_rate": 0.0001, + "loss": 0.2062, + "step": 2560 + }, + { + "epoch": 7.301136363636363, + "grad_norm": 6.389279842376709, + "learning_rate": 0.0001, + "loss": 0.2033, + "step": 2570 + }, + { + "epoch": 7.329545454545454, + "grad_norm": 6.917042255401611, + "learning_rate": 0.0001, + "loss": 0.2089, + "step": 2580 + }, + { + "epoch": 7.357954545454546, + "grad_norm": 6.476625919342041, + "learning_rate": 0.0001, + "loss": 0.203, + "step": 2590 + }, + { + "epoch": 7.386363636363637, + "grad_norm": 6.501523494720459, + "learning_rate": 0.0001, + "loss": 0.2034, + "step": 2600 + }, + { + "epoch": 7.4147727272727275, + "grad_norm": 4.998976230621338, + "learning_rate": 0.0001, + "loss": 0.2078, + "step": 2610 + }, + { + "epoch": 7.443181818181818, + "grad_norm": 5.617987632751465, + "learning_rate": 0.0001, + "loss": 0.2065, + "step": 2620 + }, + { + "epoch": 7.471590909090909, + "grad_norm": 7.509957313537598, + "learning_rate": 0.0001, + "loss": 0.2069, + "step": 2630 + }, + { + "epoch": 7.5, + "grad_norm": 6.23819637298584, + "learning_rate": 0.0001, + "loss": 0.2067, + "step": 2640 + }, + { + "epoch": 7.528409090909091, + "grad_norm": 6.4593119621276855, + "learning_rate": 0.0001, + "loss": 0.2081, + "step": 2650 + }, + { + "epoch": 7.556818181818182, + "grad_norm": 7.757627964019775, + "learning_rate": 0.0001, + "loss": 0.2045, + "step": 2660 + }, + { + "epoch": 7.5852272727272725, + "grad_norm": 7.729194641113281, + "learning_rate": 0.0001, + "loss": 0.2057, + "step": 2670 + }, + { + "epoch": 7.613636363636363, + "grad_norm": 6.746730804443359, + "learning_rate": 0.0001, + "loss": 0.2055, + "step": 2680 + }, + { + "epoch": 7.642045454545455, + "grad_norm": 6.76716947555542, + "learning_rate": 0.0001, + "loss": 0.2029, + "step": 2690 + }, + { + "epoch": 7.670454545454545, + "grad_norm": 6.230428695678711, + "learning_rate": 0.0001, + "loss": 0.2086, + "step": 2700 + }, + { + "epoch": 7.698863636363637, + "grad_norm": 6.170040607452393, + "learning_rate": 0.0001, + "loss": 0.2088, + "step": 2710 + }, + { + "epoch": 7.7272727272727275, + "grad_norm": 6.0955491065979, + "learning_rate": 0.0001, + "loss": 0.1942, + "step": 2720 + }, + { + "epoch": 7.755681818181818, + "grad_norm": 6.41675329208374, + "learning_rate": 0.0001, + "loss": 0.198, + "step": 2730 + }, + { + "epoch": 7.784090909090909, + "grad_norm": 8.517492294311523, + "learning_rate": 0.0001, + "loss": 0.2057, + "step": 2740 + }, + { + "epoch": 7.8125, + "grad_norm": 6.808162689208984, + "learning_rate": 0.0001, + "loss": 0.2017, + "step": 2750 + }, + { + "epoch": 7.840909090909091, + "grad_norm": 6.75582218170166, + "learning_rate": 0.0001, + "loss": 0.2036, + "step": 2760 + }, + { + "epoch": 7.869318181818182, + "grad_norm": 6.981121063232422, + "learning_rate": 0.0001, + "loss": 0.2075, + "step": 2770 + }, + { + "epoch": 7.8977272727272725, + "grad_norm": 5.264399528503418, + "learning_rate": 0.0001, + "loss": 0.204, + "step": 2780 + }, + { + "epoch": 7.926136363636363, + "grad_norm": 6.4845781326293945, + "learning_rate": 0.0001, + "loss": 0.2064, + "step": 2790 + }, + { + "epoch": 7.954545454545455, + "grad_norm": 7.397743225097656, + "learning_rate": 0.0001, + "loss": 0.2047, + "step": 2800 + }, + { + "epoch": 7.982954545454545, + "grad_norm": 8.000630378723145, + "learning_rate": 0.0001, + "loss": 0.2044, + "step": 2810 + }, + { + "epoch": 8.011363636363637, + "grad_norm": 6.957930088043213, + "learning_rate": 0.0001, + "loss": 0.2003, + "step": 2820 + }, + { + "epoch": 8.039772727272727, + "grad_norm": 6.850410461425781, + "learning_rate": 0.0001, + "loss": 0.1958, + "step": 2830 + }, + { + "epoch": 8.068181818181818, + "grad_norm": 6.901455402374268, + "learning_rate": 0.0001, + "loss": 0.2006, + "step": 2840 + }, + { + "epoch": 8.096590909090908, + "grad_norm": 5.5034871101379395, + "learning_rate": 0.0001, + "loss": 0.1876, + "step": 2850 + }, + { + "epoch": 8.125, + "grad_norm": 6.498006343841553, + "learning_rate": 0.0001, + "loss": 0.191, + "step": 2860 + }, + { + "epoch": 8.153409090909092, + "grad_norm": 5.930977821350098, + "learning_rate": 0.0001, + "loss": 0.1884, + "step": 2870 + }, + { + "epoch": 8.181818181818182, + "grad_norm": 6.002486705780029, + "learning_rate": 0.0001, + "loss": 0.1952, + "step": 2880 + }, + { + "epoch": 8.210227272727273, + "grad_norm": 5.113884925842285, + "learning_rate": 0.0001, + "loss": 0.1904, + "step": 2890 + }, + { + "epoch": 8.238636363636363, + "grad_norm": 6.802750587463379, + "learning_rate": 0.0001, + "loss": 0.1887, + "step": 2900 + }, + { + "epoch": 8.267045454545455, + "grad_norm": 5.978296756744385, + "learning_rate": 0.0001, + "loss": 0.1909, + "step": 2910 + }, + { + "epoch": 8.295454545454545, + "grad_norm": 7.176412105560303, + "learning_rate": 0.0001, + "loss": 0.1913, + "step": 2920 + }, + { + "epoch": 8.323863636363637, + "grad_norm": 6.965484619140625, + "learning_rate": 0.0001, + "loss": 0.1854, + "step": 2930 + }, + { + "epoch": 8.352272727272727, + "grad_norm": 5.903598785400391, + "learning_rate": 0.0001, + "loss": 0.1797, + "step": 2940 + }, + { + "epoch": 8.380681818181818, + "grad_norm": 6.34436559677124, + "learning_rate": 0.0001, + "loss": 0.1807, + "step": 2950 + }, + { + "epoch": 8.409090909090908, + "grad_norm": 5.903111934661865, + "learning_rate": 0.0001, + "loss": 0.1851, + "step": 2960 + }, + { + "epoch": 8.4375, + "grad_norm": 5.883657455444336, + "learning_rate": 0.0001, + "loss": 0.1826, + "step": 2970 + }, + { + "epoch": 8.465909090909092, + "grad_norm": 5.767624378204346, + "learning_rate": 0.0001, + "loss": 0.1862, + "step": 2980 + }, + { + "epoch": 8.494318181818182, + "grad_norm": 5.390651226043701, + "learning_rate": 0.0001, + "loss": 0.1906, + "step": 2990 + }, + { + "epoch": 8.522727272727273, + "grad_norm": 5.619853496551514, + "learning_rate": 0.0001, + "loss": 0.1804, + "step": 3000 + }, + { + "epoch": 8.551136363636363, + "grad_norm": 6.3636932373046875, + "learning_rate": 0.0001, + "loss": 0.1849, + "step": 3010 + }, + { + "epoch": 8.579545454545455, + "grad_norm": 6.031747341156006, + "learning_rate": 0.0001, + "loss": 0.1884, + "step": 3020 + }, + { + "epoch": 8.607954545454545, + "grad_norm": 5.940463066101074, + "learning_rate": 0.0001, + "loss": 0.188, + "step": 3030 + }, + { + "epoch": 8.636363636363637, + "grad_norm": 5.887471675872803, + "learning_rate": 0.0001, + "loss": 0.1798, + "step": 3040 + }, + { + "epoch": 8.664772727272727, + "grad_norm": 5.479545593261719, + "learning_rate": 0.0001, + "loss": 0.1778, + "step": 3050 + }, + { + "epoch": 8.693181818181818, + "grad_norm": 6.690113544464111, + "learning_rate": 0.0001, + "loss": 0.1786, + "step": 3060 + }, + { + "epoch": 8.721590909090908, + "grad_norm": 5.396069049835205, + "learning_rate": 0.0001, + "loss": 0.1785, + "step": 3070 + }, + { + "epoch": 8.75, + "grad_norm": 5.759469509124756, + "learning_rate": 0.0001, + "loss": 0.1718, + "step": 3080 + }, + { + "epoch": 8.778409090909092, + "grad_norm": 4.685205459594727, + "learning_rate": 0.0001, + "loss": 0.172, + "step": 3090 + }, + { + "epoch": 8.806818181818182, + "grad_norm": 5.000999927520752, + "learning_rate": 0.0001, + "loss": 0.1808, + "step": 3100 + }, + { + "epoch": 8.835227272727273, + "grad_norm": 5.158972263336182, + "learning_rate": 0.0001, + "loss": 0.18, + "step": 3110 + }, + { + "epoch": 8.863636363636363, + "grad_norm": 5.847781658172607, + "learning_rate": 0.0001, + "loss": 0.1851, + "step": 3120 + }, + { + "epoch": 8.892045454545455, + "grad_norm": 4.9706645011901855, + "learning_rate": 0.0001, + "loss": 0.1838, + "step": 3130 + }, + { + "epoch": 8.920454545454545, + "grad_norm": 5.0156660079956055, + "learning_rate": 0.0001, + "loss": 0.1825, + "step": 3140 + }, + { + "epoch": 8.948863636363637, + "grad_norm": 5.2722344398498535, + "learning_rate": 0.0001, + "loss": 0.1794, + "step": 3150 + }, + { + "epoch": 8.977272727272727, + "grad_norm": 4.946606159210205, + "learning_rate": 0.0001, + "loss": 0.174, + "step": 3160 + }, + { + "epoch": 9.005681818181818, + "grad_norm": 5.0111846923828125, + "learning_rate": 0.0001, + "loss": 0.1769, + "step": 3170 + }, + { + "epoch": 9.034090909090908, + "grad_norm": 4.587785243988037, + "learning_rate": 0.0001, + "loss": 0.1725, + "step": 3180 + }, + { + "epoch": 9.0625, + "grad_norm": 4.933738708496094, + "learning_rate": 0.0001, + "loss": 0.1812, + "step": 3190 + }, + { + "epoch": 9.090909090909092, + "grad_norm": 5.0207037925720215, + "learning_rate": 0.0001, + "loss": 0.1804, + "step": 3200 + }, + { + "epoch": 9.119318181818182, + "grad_norm": 6.469820022583008, + "learning_rate": 0.0001, + "loss": 0.1731, + "step": 3210 + }, + { + "epoch": 9.147727272727273, + "grad_norm": 5.247611999511719, + "learning_rate": 0.0001, + "loss": 0.1785, + "step": 3220 + }, + { + "epoch": 9.176136363636363, + "grad_norm": 4.957090854644775, + "learning_rate": 0.0001, + "loss": 0.1721, + "step": 3230 + }, + { + "epoch": 9.204545454545455, + "grad_norm": 4.917489051818848, + "learning_rate": 0.0001, + "loss": 0.1767, + "step": 3240 + }, + { + "epoch": 9.232954545454545, + "grad_norm": 6.98730993270874, + "learning_rate": 0.0001, + "loss": 0.1773, + "step": 3250 + }, + { + "epoch": 9.261363636363637, + "grad_norm": 5.937990665435791, + "learning_rate": 0.0001, + "loss": 0.1737, + "step": 3260 + }, + { + "epoch": 9.289772727272727, + "grad_norm": 6.112240791320801, + "learning_rate": 0.0001, + "loss": 0.1708, + "step": 3270 + }, + { + "epoch": 9.318181818181818, + "grad_norm": 5.8593878746032715, + "learning_rate": 0.0001, + "loss": 0.174, + "step": 3280 + }, + { + "epoch": 9.346590909090908, + "grad_norm": 6.075056552886963, + "learning_rate": 0.0001, + "loss": 0.1699, + "step": 3290 + }, + { + "epoch": 9.375, + "grad_norm": 5.816572666168213, + "learning_rate": 0.0001, + "loss": 0.1722, + "step": 3300 + }, + { + "epoch": 9.403409090909092, + "grad_norm": 6.339922904968262, + "learning_rate": 0.0001, + "loss": 0.1653, + "step": 3310 + }, + { + "epoch": 9.431818181818182, + "grad_norm": 5.111523628234863, + "learning_rate": 0.0001, + "loss": 0.1618, + "step": 3320 + }, + { + "epoch": 9.460227272727273, + "grad_norm": 5.104013442993164, + "learning_rate": 0.0001, + "loss": 0.1652, + "step": 3330 + }, + { + "epoch": 9.488636363636363, + "grad_norm": 4.7531280517578125, + "learning_rate": 0.0001, + "loss": 0.1639, + "step": 3340 + }, + { + "epoch": 9.517045454545455, + "grad_norm": 4.486930847167969, + "learning_rate": 0.0001, + "loss": 0.1729, + "step": 3350 + }, + { + "epoch": 9.545454545454545, + "grad_norm": 5.003032684326172, + "learning_rate": 0.0001, + "loss": 0.173, + "step": 3360 + }, + { + "epoch": 9.573863636363637, + "grad_norm": 5.644103050231934, + "learning_rate": 0.0001, + "loss": 0.1694, + "step": 3370 + }, + { + "epoch": 9.602272727272727, + "grad_norm": 5.101214408874512, + "learning_rate": 0.0001, + "loss": 0.1711, + "step": 3380 + }, + { + "epoch": 9.630681818181818, + "grad_norm": 5.529112815856934, + "learning_rate": 0.0001, + "loss": 0.1715, + "step": 3390 + }, + { + "epoch": 9.659090909090908, + "grad_norm": 5.411925792694092, + "learning_rate": 0.0001, + "loss": 0.1675, + "step": 3400 + }, + { + "epoch": 9.6875, + "grad_norm": 5.155153751373291, + "learning_rate": 0.0001, + "loss": 0.1648, + "step": 3410 + }, + { + "epoch": 9.715909090909092, + "grad_norm": 4.77042293548584, + "learning_rate": 0.0001, + "loss": 0.1622, + "step": 3420 + }, + { + "epoch": 9.744318181818182, + "grad_norm": 4.622435092926025, + "learning_rate": 0.0001, + "loss": 0.1678, + "step": 3430 + }, + { + "epoch": 9.772727272727273, + "grad_norm": 5.802976131439209, + "learning_rate": 0.0001, + "loss": 0.1712, + "step": 3440 + }, + { + "epoch": 9.801136363636363, + "grad_norm": 4.810296058654785, + "learning_rate": 0.0001, + "loss": 0.173, + "step": 3450 + }, + { + "epoch": 9.829545454545455, + "grad_norm": 5.124487400054932, + "learning_rate": 0.0001, + "loss": 0.1689, + "step": 3460 + }, + { + "epoch": 9.857954545454545, + "grad_norm": 5.081210136413574, + "learning_rate": 0.0001, + "loss": 0.1625, + "step": 3470 + }, + { + "epoch": 9.886363636363637, + "grad_norm": 5.038453578948975, + "learning_rate": 0.0001, + "loss": 0.1571, + "step": 3480 + }, + { + "epoch": 9.914772727272727, + "grad_norm": 4.524289608001709, + "learning_rate": 0.0001, + "loss": 0.1614, + "step": 3490 + }, + { + "epoch": 9.943181818181818, + "grad_norm": 5.175899505615234, + "learning_rate": 0.0001, + "loss": 0.1601, + "step": 3500 + }, + { + "epoch": 9.971590909090908, + "grad_norm": 4.064411640167236, + "learning_rate": 0.0001, + "loss": 0.1626, + "step": 3510 + }, + { + "epoch": 10.0, + "grad_norm": 4.967013835906982, + "learning_rate": 0.0001, + "loss": 0.1653, + "step": 3520 + }, + { + "epoch": 10.028409090909092, + "grad_norm": 5.4418535232543945, + "learning_rate": 0.0001, + "loss": 0.161, + "step": 3530 + }, + { + "epoch": 10.056818181818182, + "grad_norm": 5.082826614379883, + "learning_rate": 0.0001, + "loss": 0.1602, + "step": 3540 + }, + { + "epoch": 10.085227272727273, + "grad_norm": 4.592067241668701, + "learning_rate": 0.0001, + "loss": 0.1623, + "step": 3550 + }, + { + "epoch": 10.113636363636363, + "grad_norm": 5.288888931274414, + "learning_rate": 0.0001, + "loss": 0.1576, + "step": 3560 + }, + { + "epoch": 10.142045454545455, + "grad_norm": 5.104770660400391, + "learning_rate": 0.0001, + "loss": 0.1593, + "step": 3570 + }, + { + "epoch": 10.170454545454545, + "grad_norm": 4.773959159851074, + "learning_rate": 0.0001, + "loss": 0.1545, + "step": 3580 + }, + { + "epoch": 10.198863636363637, + "grad_norm": 4.410947799682617, + "learning_rate": 0.0001, + "loss": 0.161, + "step": 3590 + }, + { + "epoch": 10.227272727272727, + "grad_norm": 4.374294281005859, + "learning_rate": 0.0001, + "loss": 0.158, + "step": 3600 + }, + { + "epoch": 10.255681818181818, + "grad_norm": 4.402506351470947, + "learning_rate": 0.0001, + "loss": 0.1567, + "step": 3610 + }, + { + "epoch": 10.284090909090908, + "grad_norm": 5.090147495269775, + "learning_rate": 0.0001, + "loss": 0.1603, + "step": 3620 + }, + { + "epoch": 10.3125, + "grad_norm": 5.5478081703186035, + "learning_rate": 0.0001, + "loss": 0.1631, + "step": 3630 + }, + { + "epoch": 10.340909090909092, + "grad_norm": 5.645622730255127, + "learning_rate": 0.0001, + "loss": 0.1597, + "step": 3640 + }, + { + "epoch": 10.369318181818182, + "grad_norm": 4.826333999633789, + "learning_rate": 0.0001, + "loss": 0.1584, + "step": 3650 + }, + { + "epoch": 10.397727272727273, + "grad_norm": 5.210224628448486, + "learning_rate": 0.0001, + "loss": 0.1553, + "step": 3660 + }, + { + "epoch": 10.426136363636363, + "grad_norm": 3.516092300415039, + "learning_rate": 0.0001, + "loss": 0.1543, + "step": 3670 + }, + { + "epoch": 10.454545454545455, + "grad_norm": 4.710558891296387, + "learning_rate": 0.0001, + "loss": 0.1507, + "step": 3680 + }, + { + "epoch": 10.482954545454545, + "grad_norm": 4.940939903259277, + "learning_rate": 0.0001, + "loss": 0.1526, + "step": 3690 + }, + { + "epoch": 10.511363636363637, + "grad_norm": 4.353475093841553, + "learning_rate": 0.0001, + "loss": 0.1537, + "step": 3700 + }, + { + "epoch": 10.539772727272727, + "grad_norm": 3.7736759185791016, + "learning_rate": 0.0001, + "loss": 0.1557, + "step": 3710 + }, + { + "epoch": 10.568181818181818, + "grad_norm": 4.482377529144287, + "learning_rate": 0.0001, + "loss": 0.1636, + "step": 3720 + }, + { + "epoch": 10.596590909090908, + "grad_norm": 4.80997896194458, + "learning_rate": 0.0001, + "loss": 0.1611, + "step": 3730 + }, + { + "epoch": 10.625, + "grad_norm": 6.185352802276611, + "learning_rate": 0.0001, + "loss": 0.1555, + "step": 3740 + }, + { + "epoch": 10.653409090909092, + "grad_norm": 5.383978366851807, + "learning_rate": 0.0001, + "loss": 0.1609, + "step": 3750 + }, + { + "epoch": 10.681818181818182, + "grad_norm": 6.075902938842773, + "learning_rate": 0.0001, + "loss": 0.1612, + "step": 3760 + }, + { + "epoch": 10.710227272727273, + "grad_norm": 5.537624835968018, + "learning_rate": 0.0001, + "loss": 0.155, + "step": 3770 + }, + { + "epoch": 10.738636363636363, + "grad_norm": 4.914467811584473, + "learning_rate": 0.0001, + "loss": 0.1555, + "step": 3780 + }, + { + "epoch": 10.767045454545455, + "grad_norm": 4.567920684814453, + "learning_rate": 0.0001, + "loss": 0.1566, + "step": 3790 + }, + { + "epoch": 10.795454545454545, + "grad_norm": 4.5670390129089355, + "learning_rate": 0.0001, + "loss": 0.1566, + "step": 3800 + }, + { + "epoch": 10.823863636363637, + "grad_norm": 3.629544734954834, + "learning_rate": 0.0001, + "loss": 0.1502, + "step": 3810 + }, + { + "epoch": 10.852272727272727, + "grad_norm": 4.088180065155029, + "learning_rate": 0.0001, + "loss": 0.1594, + "step": 3820 + }, + { + "epoch": 10.880681818181818, + "grad_norm": 4.8524017333984375, + "learning_rate": 0.0001, + "loss": 0.1572, + "step": 3830 + }, + { + "epoch": 10.909090909090908, + "grad_norm": 5.3502888679504395, + "learning_rate": 0.0001, + "loss": 0.1512, + "step": 3840 + }, + { + "epoch": 10.9375, + "grad_norm": 4.959495544433594, + "learning_rate": 0.0001, + "loss": 0.1549, + "step": 3850 + }, + { + "epoch": 10.965909090909092, + "grad_norm": 4.991962432861328, + "learning_rate": 0.0001, + "loss": 0.1504, + "step": 3860 + }, + { + "epoch": 10.994318181818182, + "grad_norm": 4.054560661315918, + "learning_rate": 0.0001, + "loss": 0.1553, + "step": 3870 + }, + { + "epoch": 11.022727272727273, + "grad_norm": 3.9775209426879883, + "learning_rate": 0.0001, + "loss": 0.149, + "step": 3880 + }, + { + "epoch": 11.051136363636363, + "grad_norm": 4.538222312927246, + "learning_rate": 0.0001, + "loss": 0.1505, + "step": 3890 + }, + { + "epoch": 11.079545454545455, + "grad_norm": 5.487000465393066, + "learning_rate": 0.0001, + "loss": 0.15, + "step": 3900 + }, + { + "epoch": 11.107954545454545, + "grad_norm": 5.862754821777344, + "learning_rate": 0.0001, + "loss": 0.1493, + "step": 3910 + }, + { + "epoch": 11.136363636363637, + "grad_norm": 4.4752302169799805, + "learning_rate": 0.0001, + "loss": 0.1524, + "step": 3920 + }, + { + "epoch": 11.164772727272727, + "grad_norm": 4.51123571395874, + "learning_rate": 0.0001, + "loss": 0.1545, + "step": 3930 + }, + { + "epoch": 11.193181818181818, + "grad_norm": 4.44078254699707, + "learning_rate": 0.0001, + "loss": 0.1481, + "step": 3940 + }, + { + "epoch": 11.221590909090908, + "grad_norm": 4.542746067047119, + "learning_rate": 0.0001, + "loss": 0.1496, + "step": 3950 + }, + { + "epoch": 11.25, + "grad_norm": 4.513556003570557, + "learning_rate": 0.0001, + "loss": 0.1456, + "step": 3960 + }, + { + "epoch": 11.278409090909092, + "grad_norm": 5.227005958557129, + "learning_rate": 0.0001, + "loss": 0.146, + "step": 3970 + }, + { + "epoch": 11.306818181818182, + "grad_norm": 4.134369850158691, + "learning_rate": 0.0001, + "loss": 0.1497, + "step": 3980 + }, + { + "epoch": 11.335227272727273, + "grad_norm": 5.030073642730713, + "learning_rate": 0.0001, + "loss": 0.1496, + "step": 3990 + }, + { + "epoch": 11.363636363636363, + "grad_norm": 4.397629737854004, + "learning_rate": 0.0001, + "loss": 0.1462, + "step": 4000 + }, + { + "epoch": 11.392045454545455, + "grad_norm": 4.636000633239746, + "learning_rate": 0.0001, + "loss": 0.144, + "step": 4010 + }, + { + "epoch": 11.420454545454545, + "grad_norm": 4.899885177612305, + "learning_rate": 0.0001, + "loss": 0.1445, + "step": 4020 + }, + { + "epoch": 11.448863636363637, + "grad_norm": 4.209653377532959, + "learning_rate": 0.0001, + "loss": 0.1517, + "step": 4030 + }, + { + "epoch": 11.477272727272727, + "grad_norm": 4.315791606903076, + "learning_rate": 0.0001, + "loss": 0.1423, + "step": 4040 + }, + { + "epoch": 11.505681818181818, + "grad_norm": 4.065213203430176, + "learning_rate": 0.0001, + "loss": 0.1429, + "step": 4050 + }, + { + "epoch": 11.534090909090908, + "grad_norm": 4.354069709777832, + "learning_rate": 0.0001, + "loss": 0.1452, + "step": 4060 + }, + { + "epoch": 11.5625, + "grad_norm": 4.485837459564209, + "learning_rate": 0.0001, + "loss": 0.1429, + "step": 4070 + }, + { + "epoch": 11.590909090909092, + "grad_norm": 4.509272575378418, + "learning_rate": 0.0001, + "loss": 0.1437, + "step": 4080 + }, + { + "epoch": 11.619318181818182, + "grad_norm": 4.269772052764893, + "learning_rate": 0.0001, + "loss": 0.1468, + "step": 4090 + }, + { + "epoch": 11.647727272727273, + "grad_norm": 4.422598361968994, + "learning_rate": 0.0001, + "loss": 0.151, + "step": 4100 + }, + { + "epoch": 11.676136363636363, + "grad_norm": 4.730630874633789, + "learning_rate": 0.0001, + "loss": 0.1497, + "step": 4110 + }, + { + "epoch": 11.704545454545455, + "grad_norm": 5.042013645172119, + "learning_rate": 0.0001, + "loss": 0.1476, + "step": 4120 + }, + { + "epoch": 11.732954545454545, + "grad_norm": 4.182816982269287, + "learning_rate": 0.0001, + "loss": 0.1471, + "step": 4130 + }, + { + "epoch": 11.761363636363637, + "grad_norm": 4.254685401916504, + "learning_rate": 0.0001, + "loss": 0.1371, + "step": 4140 + }, + { + "epoch": 11.789772727272727, + "grad_norm": 4.958248138427734, + "learning_rate": 0.0001, + "loss": 0.1465, + "step": 4150 + }, + { + "epoch": 11.818181818181818, + "grad_norm": 4.743212699890137, + "learning_rate": 0.0001, + "loss": 0.1448, + "step": 4160 + }, + { + "epoch": 11.846590909090908, + "grad_norm": 4.2032084465026855, + "learning_rate": 0.0001, + "loss": 0.1389, + "step": 4170 + }, + { + "epoch": 11.875, + "grad_norm": 4.244325637817383, + "learning_rate": 0.0001, + "loss": 0.1397, + "step": 4180 + }, + { + "epoch": 11.903409090909092, + "grad_norm": 3.134256362915039, + "learning_rate": 0.0001, + "loss": 0.1443, + "step": 4190 + }, + { + "epoch": 11.931818181818182, + "grad_norm": 4.238053321838379, + "learning_rate": 0.0001, + "loss": 0.1485, + "step": 4200 + }, + { + "epoch": 11.960227272727273, + "grad_norm": 4.34376335144043, + "learning_rate": 0.0001, + "loss": 0.1421, + "step": 4210 + }, + { + "epoch": 11.988636363636363, + "grad_norm": 3.7817201614379883, + "learning_rate": 0.0001, + "loss": 0.1441, + "step": 4220 + }, + { + "epoch": 12.017045454545455, + "grad_norm": 3.5958733558654785, + "learning_rate": 0.0001, + "loss": 0.1373, + "step": 4230 + }, + { + "epoch": 12.045454545454545, + "grad_norm": 4.10888147354126, + "learning_rate": 0.0001, + "loss": 0.1405, + "step": 4240 + }, + { + "epoch": 12.073863636363637, + "grad_norm": 3.802342176437378, + "learning_rate": 0.0001, + "loss": 0.1393, + "step": 4250 + }, + { + "epoch": 12.102272727272727, + "grad_norm": 4.85184907913208, + "learning_rate": 0.0001, + "loss": 0.1359, + "step": 4260 + }, + { + "epoch": 12.130681818181818, + "grad_norm": 4.548974514007568, + "learning_rate": 0.0001, + "loss": 0.1402, + "step": 4270 + }, + { + "epoch": 12.159090909090908, + "grad_norm": 4.047370433807373, + "learning_rate": 0.0001, + "loss": 0.143, + "step": 4280 + }, + { + "epoch": 12.1875, + "grad_norm": 4.97476863861084, + "learning_rate": 0.0001, + "loss": 0.1439, + "step": 4290 + }, + { + "epoch": 12.215909090909092, + "grad_norm": 4.076110363006592, + "learning_rate": 0.0001, + "loss": 0.145, + "step": 4300 + }, + { + "epoch": 12.244318181818182, + "grad_norm": 4.098419189453125, + "learning_rate": 0.0001, + "loss": 0.1423, + "step": 4310 + }, + { + "epoch": 12.272727272727273, + "grad_norm": 3.961846351623535, + "learning_rate": 0.0001, + "loss": 0.1369, + "step": 4320 + }, + { + "epoch": 12.301136363636363, + "grad_norm": 4.079448223114014, + "learning_rate": 0.0001, + "loss": 0.141, + "step": 4330 + }, + { + "epoch": 12.329545454545455, + "grad_norm": 3.375678777694702, + "learning_rate": 0.0001, + "loss": 0.1368, + "step": 4340 + }, + { + "epoch": 12.357954545454545, + "grad_norm": 3.7309460639953613, + "learning_rate": 0.0001, + "loss": 0.1338, + "step": 4350 + }, + { + "epoch": 12.386363636363637, + "grad_norm": 4.20289421081543, + "learning_rate": 0.0001, + "loss": 0.1429, + "step": 4360 + }, + { + "epoch": 12.414772727272727, + "grad_norm": 4.175302982330322, + "learning_rate": 0.0001, + "loss": 0.139, + "step": 4370 + }, + { + "epoch": 12.443181818181818, + "grad_norm": 3.7921714782714844, + "learning_rate": 0.0001, + "loss": 0.1408, + "step": 4380 + }, + { + "epoch": 12.471590909090908, + "grad_norm": 4.009100437164307, + "learning_rate": 0.0001, + "loss": 0.1432, + "step": 4390 + }, + { + "epoch": 12.5, + "grad_norm": 3.71403431892395, + "learning_rate": 0.0001, + "loss": 0.1381, + "step": 4400 + }, + { + "epoch": 12.528409090909092, + "grad_norm": 4.153659820556641, + "learning_rate": 0.0001, + "loss": 0.1362, + "step": 4410 + }, + { + "epoch": 12.556818181818182, + "grad_norm": 3.8363094329833984, + "learning_rate": 0.0001, + "loss": 0.1434, + "step": 4420 + }, + { + "epoch": 12.585227272727273, + "grad_norm": 3.9091579914093018, + "learning_rate": 0.0001, + "loss": 0.1372, + "step": 4430 + }, + { + "epoch": 12.613636363636363, + "grad_norm": 4.5517578125, + "learning_rate": 0.0001, + "loss": 0.1388, + "step": 4440 + }, + { + "epoch": 12.642045454545455, + "grad_norm": 3.891643762588501, + "learning_rate": 0.0001, + "loss": 0.1361, + "step": 4450 + }, + { + "epoch": 12.670454545454545, + "grad_norm": 3.9435248374938965, + "learning_rate": 0.0001, + "loss": 0.1417, + "step": 4460 + }, + { + "epoch": 12.698863636363637, + "grad_norm": 3.625453472137451, + "learning_rate": 0.0001, + "loss": 0.1392, + "step": 4470 + }, + { + "epoch": 12.727272727272727, + "grad_norm": 4.054428577423096, + "learning_rate": 0.0001, + "loss": 0.1335, + "step": 4480 + }, + { + "epoch": 12.755681818181818, + "grad_norm": 4.017980098724365, + "learning_rate": 0.0001, + "loss": 0.1409, + "step": 4490 + }, + { + "epoch": 12.784090909090908, + "grad_norm": 3.3853940963745117, + "learning_rate": 0.0001, + "loss": 0.1407, + "step": 4500 + }, + { + "epoch": 12.8125, + "grad_norm": 3.403177261352539, + "learning_rate": 0.0001, + "loss": 0.1344, + "step": 4510 + }, + { + "epoch": 12.840909090909092, + "grad_norm": 3.364267349243164, + "learning_rate": 0.0001, + "loss": 0.1379, + "step": 4520 + }, + { + "epoch": 12.869318181818182, + "grad_norm": 4.48183012008667, + "learning_rate": 0.0001, + "loss": 0.1407, + "step": 4530 + }, + { + "epoch": 12.897727272727273, + "grad_norm": 3.5637905597686768, + "learning_rate": 0.0001, + "loss": 0.1404, + "step": 4540 + }, + { + "epoch": 12.926136363636363, + "grad_norm": 3.4277963638305664, + "learning_rate": 0.0001, + "loss": 0.1357, + "step": 4550 + }, + { + "epoch": 12.954545454545455, + "grad_norm": 3.4155449867248535, + "learning_rate": 0.0001, + "loss": 0.1385, + "step": 4560 + }, + { + "epoch": 12.982954545454545, + "grad_norm": 3.1836628913879395, + "learning_rate": 0.0001, + "loss": 0.1381, + "step": 4570 + }, + { + "epoch": 13.011363636363637, + "grad_norm": 4.119326591491699, + "learning_rate": 0.0001, + "loss": 0.1311, + "step": 4580 + }, + { + "epoch": 13.039772727272727, + "grad_norm": 3.324186086654663, + "learning_rate": 0.0001, + "loss": 0.1341, + "step": 4590 + }, + { + "epoch": 13.068181818181818, + "grad_norm": 3.69582200050354, + "learning_rate": 0.0001, + "loss": 0.1373, + "step": 4600 + }, + { + "epoch": 13.096590909090908, + "grad_norm": 3.6252574920654297, + "learning_rate": 0.0001, + "loss": 0.1368, + "step": 4610 + }, + { + "epoch": 13.125, + "grad_norm": 3.5859949588775635, + "learning_rate": 0.0001, + "loss": 0.1303, + "step": 4620 + }, + { + "epoch": 13.153409090909092, + "grad_norm": 4.536507606506348, + "learning_rate": 0.0001, + "loss": 0.1389, + "step": 4630 + }, + { + "epoch": 13.181818181818182, + "grad_norm": 3.7678303718566895, + "learning_rate": 0.0001, + "loss": 0.1376, + "step": 4640 + }, + { + "epoch": 13.210227272727273, + "grad_norm": 3.8305280208587646, + "learning_rate": 0.0001, + "loss": 0.1299, + "step": 4650 + }, + { + "epoch": 13.238636363636363, + "grad_norm": 4.209882736206055, + "learning_rate": 0.0001, + "loss": 0.1366, + "step": 4660 + }, + { + "epoch": 13.267045454545455, + "grad_norm": 3.751279354095459, + "learning_rate": 0.0001, + "loss": 0.1395, + "step": 4670 + }, + { + "epoch": 13.295454545454545, + "grad_norm": 3.758382558822632, + "learning_rate": 0.0001, + "loss": 0.1371, + "step": 4680 + }, + { + "epoch": 13.323863636363637, + "grad_norm": 4.068879127502441, + "learning_rate": 0.0001, + "loss": 0.1335, + "step": 4690 + }, + { + "epoch": 13.352272727272727, + "grad_norm": 4.470997333526611, + "learning_rate": 0.0001, + "loss": 0.1349, + "step": 4700 + }, + { + "epoch": 13.380681818181818, + "grad_norm": 3.5465259552001953, + "learning_rate": 0.0001, + "loss": 0.1336, + "step": 4710 + }, + { + "epoch": 13.409090909090908, + "grad_norm": 3.6585092544555664, + "learning_rate": 0.0001, + "loss": 0.1279, + "step": 4720 + }, + { + "epoch": 13.4375, + "grad_norm": 3.6728506088256836, + "learning_rate": 0.0001, + "loss": 0.1289, + "step": 4730 + }, + { + "epoch": 13.465909090909092, + "grad_norm": 3.1070103645324707, + "learning_rate": 0.0001, + "loss": 0.1293, + "step": 4740 + }, + { + "epoch": 13.494318181818182, + "grad_norm": 2.9372332096099854, + "learning_rate": 0.0001, + "loss": 0.1329, + "step": 4750 + }, + { + "epoch": 13.522727272727273, + "grad_norm": 3.2514431476593018, + "learning_rate": 0.0001, + "loss": 0.1287, + "step": 4760 + }, + { + "epoch": 13.551136363636363, + "grad_norm": 2.84192156791687, + "learning_rate": 0.0001, + "loss": 0.1372, + "step": 4770 + }, + { + "epoch": 13.579545454545455, + "grad_norm": 3.847137928009033, + "learning_rate": 0.0001, + "loss": 0.1321, + "step": 4780 + }, + { + "epoch": 13.607954545454545, + "grad_norm": 3.7606923580169678, + "learning_rate": 0.0001, + "loss": 0.1337, + "step": 4790 + }, + { + "epoch": 13.636363636363637, + "grad_norm": 3.415740966796875, + "learning_rate": 0.0001, + "loss": 0.1327, + "step": 4800 + }, + { + "epoch": 13.664772727272727, + "grad_norm": 3.71706485748291, + "learning_rate": 0.0001, + "loss": 0.1403, + "step": 4810 + }, + { + "epoch": 13.693181818181818, + "grad_norm": 3.2357699871063232, + "learning_rate": 0.0001, + "loss": 0.1308, + "step": 4820 + }, + { + "epoch": 13.721590909090908, + "grad_norm": 3.241356372833252, + "learning_rate": 0.0001, + "loss": 0.1369, + "step": 4830 + }, + { + "epoch": 13.75, + "grad_norm": 3.0397732257843018, + "learning_rate": 0.0001, + "loss": 0.1356, + "step": 4840 + }, + { + "epoch": 13.778409090909092, + "grad_norm": 3.939297914505005, + "learning_rate": 0.0001, + "loss": 0.1342, + "step": 4850 + }, + { + "epoch": 13.806818181818182, + "grad_norm": 3.530168294906616, + "learning_rate": 0.0001, + "loss": 0.1345, + "step": 4860 + }, + { + "epoch": 13.835227272727273, + "grad_norm": 3.2555956840515137, + "learning_rate": 0.0001, + "loss": 0.1304, + "step": 4870 + }, + { + "epoch": 13.863636363636363, + "grad_norm": 3.490713357925415, + "learning_rate": 0.0001, + "loss": 0.132, + "step": 4880 + }, + { + "epoch": 13.892045454545455, + "grad_norm": 3.034759521484375, + "learning_rate": 0.0001, + "loss": 0.1287, + "step": 4890 + }, + { + "epoch": 13.920454545454545, + "grad_norm": 3.2557218074798584, + "learning_rate": 0.0001, + "loss": 0.1358, + "step": 4900 + }, + { + "epoch": 13.948863636363637, + "grad_norm": 3.692721128463745, + "learning_rate": 0.0001, + "loss": 0.1305, + "step": 4910 + }, + { + "epoch": 13.977272727272727, + "grad_norm": 3.3548946380615234, + "learning_rate": 0.0001, + "loss": 0.1306, + "step": 4920 + }, + { + "epoch": 14.005681818181818, + "grad_norm": 2.9304184913635254, + "learning_rate": 0.0001, + "loss": 0.1268, + "step": 4930 + }, + { + "epoch": 14.034090909090908, + "grad_norm": 2.7205934524536133, + "learning_rate": 0.0001, + "loss": 0.1267, + "step": 4940 + }, + { + "epoch": 14.0625, + "grad_norm": 3.1881885528564453, + "learning_rate": 0.0001, + "loss": 0.1292, + "step": 4950 + }, + { + "epoch": 14.090909090909092, + "grad_norm": 2.813159465789795, + "learning_rate": 0.0001, + "loss": 0.1281, + "step": 4960 + }, + { + "epoch": 14.119318181818182, + "grad_norm": 3.351205348968506, + "learning_rate": 0.0001, + "loss": 0.1336, + "step": 4970 + }, + { + "epoch": 14.147727272727273, + "grad_norm": 3.1499414443969727, + "learning_rate": 0.0001, + "loss": 0.1327, + "step": 4980 + }, + { + "epoch": 14.176136363636363, + "grad_norm": 3.1446123123168945, + "learning_rate": 0.0001, + "loss": 0.1296, + "step": 4990 + }, + { + "epoch": 14.204545454545455, + "grad_norm": 4.177588939666748, + "learning_rate": 0.0001, + "loss": 0.1269, + "step": 5000 + }, + { + "epoch": 14.232954545454545, + "grad_norm": 3.2882914543151855, + "learning_rate": 0.0001, + "loss": 0.1294, + "step": 5010 + }, + { + "epoch": 14.261363636363637, + "grad_norm": 3.151151657104492, + "learning_rate": 0.0001, + "loss": 0.128, + "step": 5020 + }, + { + "epoch": 14.289772727272727, + "grad_norm": 3.507800340652466, + "learning_rate": 0.0001, + "loss": 0.1306, + "step": 5030 + }, + { + "epoch": 14.318181818181818, + "grad_norm": 3.266287088394165, + "learning_rate": 0.0001, + "loss": 0.13, + "step": 5040 + }, + { + "epoch": 14.346590909090908, + "grad_norm": 3.7392666339874268, + "learning_rate": 0.0001, + "loss": 0.1246, + "step": 5050 + }, + { + "epoch": 14.375, + "grad_norm": 3.385209083557129, + "learning_rate": 0.0001, + "loss": 0.1206, + "step": 5060 + }, + { + "epoch": 14.403409090909092, + "grad_norm": 3.0839014053344727, + "learning_rate": 0.0001, + "loss": 0.119, + "step": 5070 + }, + { + "epoch": 14.431818181818182, + "grad_norm": 2.9895691871643066, + "learning_rate": 0.0001, + "loss": 0.13, + "step": 5080 + }, + { + "epoch": 14.460227272727273, + "grad_norm": 3.4198343753814697, + "learning_rate": 0.0001, + "loss": 0.1325, + "step": 5090 + }, + { + "epoch": 14.488636363636363, + "grad_norm": 3.2257754802703857, + "learning_rate": 0.0001, + "loss": 0.13, + "step": 5100 + }, + { + "epoch": 14.517045454545455, + "grad_norm": 2.9251694679260254, + "learning_rate": 0.0001, + "loss": 0.1247, + "step": 5110 + }, + { + "epoch": 14.545454545454545, + "grad_norm": 3.3132123947143555, + "learning_rate": 0.0001, + "loss": 0.1246, + "step": 5120 + }, + { + "epoch": 14.573863636363637, + "grad_norm": 3.5017828941345215, + "learning_rate": 0.0001, + "loss": 0.1265, + "step": 5130 + }, + { + "epoch": 14.602272727272727, + "grad_norm": 3.087315559387207, + "learning_rate": 0.0001, + "loss": 0.1275, + "step": 5140 + }, + { + "epoch": 14.630681818181818, + "grad_norm": 2.8191609382629395, + "learning_rate": 0.0001, + "loss": 0.1278, + "step": 5150 + }, + { + "epoch": 14.659090909090908, + "grad_norm": 3.038038492202759, + "learning_rate": 0.0001, + "loss": 0.1197, + "step": 5160 + }, + { + "epoch": 14.6875, + "grad_norm": 2.9609692096710205, + "learning_rate": 0.0001, + "loss": 0.1212, + "step": 5170 + }, + { + "epoch": 14.715909090909092, + "grad_norm": 3.029618263244629, + "learning_rate": 0.0001, + "loss": 0.1235, + "step": 5180 + }, + { + "epoch": 14.744318181818182, + "grad_norm": 2.6114909648895264, + "learning_rate": 0.0001, + "loss": 0.1286, + "step": 5190 + }, + { + "epoch": 14.772727272727273, + "grad_norm": 2.887552261352539, + "learning_rate": 0.0001, + "loss": 0.126, + "step": 5200 + }, + { + "epoch": 14.801136363636363, + "grad_norm": 3.0050230026245117, + "learning_rate": 0.0001, + "loss": 0.1266, + "step": 5210 + }, + { + "epoch": 14.829545454545455, + "grad_norm": 3.215804100036621, + "learning_rate": 0.0001, + "loss": 0.1281, + "step": 5220 + }, + { + "epoch": 14.857954545454545, + "grad_norm": 3.709592819213867, + "learning_rate": 0.0001, + "loss": 0.1325, + "step": 5230 + }, + { + "epoch": 14.886363636363637, + "grad_norm": 3.143139600753784, + "learning_rate": 0.0001, + "loss": 0.1323, + "step": 5240 + }, + { + "epoch": 14.914772727272727, + "grad_norm": 3.504509925842285, + "learning_rate": 0.0001, + "loss": 0.1201, + "step": 5250 + }, + { + "epoch": 14.943181818181818, + "grad_norm": 3.8694465160369873, + "learning_rate": 0.0001, + "loss": 0.1277, + "step": 5260 + }, + { + "epoch": 14.971590909090908, + "grad_norm": 3.4749040603637695, + "learning_rate": 0.0001, + "loss": 0.1272, + "step": 5270 + }, + { + "epoch": 15.0, + "grad_norm": 3.4868409633636475, + "learning_rate": 0.0001, + "loss": 0.1205, + "step": 5280 + }, + { + "epoch": 15.028409090909092, + "grad_norm": 2.9750540256500244, + "learning_rate": 0.0001, + "loss": 0.126, + "step": 5290 + }, + { + "epoch": 15.056818181818182, + "grad_norm": 3.6922764778137207, + "learning_rate": 0.0001, + "loss": 0.1256, + "step": 5300 + }, + { + "epoch": 15.085227272727273, + "grad_norm": 2.812814712524414, + "learning_rate": 0.0001, + "loss": 0.1181, + "step": 5310 + }, + { + "epoch": 15.113636363636363, + "grad_norm": 3.3117034435272217, + "learning_rate": 0.0001, + "loss": 0.1226, + "step": 5320 + }, + { + "epoch": 15.142045454545455, + "grad_norm": 3.519850492477417, + "learning_rate": 0.0001, + "loss": 0.123, + "step": 5330 + }, + { + "epoch": 15.170454545454545, + "grad_norm": 3.4698708057403564, + "learning_rate": 0.0001, + "loss": 0.1176, + "step": 5340 + }, + { + "epoch": 15.198863636363637, + "grad_norm": 3.4124035835266113, + "learning_rate": 0.0001, + "loss": 0.12, + "step": 5350 + }, + { + "epoch": 15.227272727272727, + "grad_norm": 3.1546342372894287, + "learning_rate": 0.0001, + "loss": 0.1215, + "step": 5360 + }, + { + "epoch": 15.255681818181818, + "grad_norm": 3.2864038944244385, + "learning_rate": 0.0001, + "loss": 0.1178, + "step": 5370 + }, + { + "epoch": 15.284090909090908, + "grad_norm": 3.288776397705078, + "learning_rate": 0.0001, + "loss": 0.1235, + "step": 5380 + }, + { + "epoch": 15.3125, + "grad_norm": 3.0721805095672607, + "learning_rate": 0.0001, + "loss": 0.1166, + "step": 5390 + }, + { + "epoch": 15.340909090909092, + "grad_norm": 2.994493246078491, + "learning_rate": 0.0001, + "loss": 0.119, + "step": 5400 + }, + { + "epoch": 15.369318181818182, + "grad_norm": 3.0647480487823486, + "learning_rate": 0.0001, + "loss": 0.1199, + "step": 5410 + }, + { + "epoch": 15.397727272727273, + "grad_norm": 3.24365496635437, + "learning_rate": 0.0001, + "loss": 0.1185, + "step": 5420 + }, + { + "epoch": 15.426136363636363, + "grad_norm": 2.872796058654785, + "learning_rate": 0.0001, + "loss": 0.1239, + "step": 5430 + }, + { + "epoch": 15.454545454545455, + "grad_norm": 3.0730972290039062, + "learning_rate": 0.0001, + "loss": 0.118, + "step": 5440 + }, + { + "epoch": 15.482954545454545, + "grad_norm": 3.010819911956787, + "learning_rate": 0.0001, + "loss": 0.1282, + "step": 5450 + }, + { + "epoch": 15.511363636363637, + "grad_norm": 3.111093521118164, + "learning_rate": 0.0001, + "loss": 0.1203, + "step": 5460 + }, + { + "epoch": 15.539772727272727, + "grad_norm": 2.3254058361053467, + "learning_rate": 0.0001, + "loss": 0.1196, + "step": 5470 + }, + { + "epoch": 15.568181818181818, + "grad_norm": 2.7858808040618896, + "learning_rate": 0.0001, + "loss": 0.1157, + "step": 5480 + }, + { + "epoch": 15.596590909090908, + "grad_norm": 2.8064205646514893, + "learning_rate": 0.0001, + "loss": 0.1241, + "step": 5490 + }, + { + "epoch": 15.625, + "grad_norm": 3.140082597732544, + "learning_rate": 0.0001, + "loss": 0.123, + "step": 5500 + }, + { + "epoch": 15.653409090909092, + "grad_norm": 3.064652681350708, + "learning_rate": 0.0001, + "loss": 0.1263, + "step": 5510 + }, + { + "epoch": 15.681818181818182, + "grad_norm": 3.274289131164551, + "learning_rate": 0.0001, + "loss": 0.1239, + "step": 5520 + }, + { + "epoch": 15.710227272727273, + "grad_norm": 2.9138309955596924, + "learning_rate": 0.0001, + "loss": 0.1232, + "step": 5530 + }, + { + "epoch": 15.738636363636363, + "grad_norm": 2.9141759872436523, + "learning_rate": 0.0001, + "loss": 0.1222, + "step": 5540 + }, + { + "epoch": 15.767045454545455, + "grad_norm": 2.755699396133423, + "learning_rate": 0.0001, + "loss": 0.1209, + "step": 5550 + }, + { + "epoch": 15.795454545454545, + "grad_norm": 2.7435543537139893, + "learning_rate": 0.0001, + "loss": 0.1205, + "step": 5560 + }, + { + "epoch": 15.823863636363637, + "grad_norm": 2.868746519088745, + "learning_rate": 0.0001, + "loss": 0.1188, + "step": 5570 + }, + { + "epoch": 15.852272727272727, + "grad_norm": 2.853201389312744, + "learning_rate": 0.0001, + "loss": 0.1154, + "step": 5580 + }, + { + "epoch": 15.880681818181818, + "grad_norm": 3.2404487133026123, + "learning_rate": 0.0001, + "loss": 0.1174, + "step": 5590 + }, + { + "epoch": 15.909090909090908, + "grad_norm": 3.210789203643799, + "learning_rate": 0.0001, + "loss": 0.1204, + "step": 5600 + }, + { + "epoch": 15.9375, + "grad_norm": 2.9118998050689697, + "learning_rate": 0.0001, + "loss": 0.1183, + "step": 5610 + }, + { + "epoch": 15.965909090909092, + "grad_norm": 2.8953421115875244, + "learning_rate": 0.0001, + "loss": 0.1182, + "step": 5620 + }, + { + "epoch": 15.994318181818182, + "grad_norm": 2.942523241043091, + "learning_rate": 0.0001, + "loss": 0.1195, + "step": 5630 + }, + { + "epoch": 16.022727272727273, + "grad_norm": 3.2362887859344482, + "learning_rate": 0.0001, + "loss": 0.1157, + "step": 5640 + }, + { + "epoch": 16.051136363636363, + "grad_norm": 2.438734292984009, + "learning_rate": 0.0001, + "loss": 0.1124, + "step": 5650 + }, + { + "epoch": 16.079545454545453, + "grad_norm": 2.5169425010681152, + "learning_rate": 0.0001, + "loss": 0.1113, + "step": 5660 + }, + { + "epoch": 16.107954545454547, + "grad_norm": 2.644383668899536, + "learning_rate": 0.0001, + "loss": 0.1165, + "step": 5670 + }, + { + "epoch": 16.136363636363637, + "grad_norm": 3.3263514041900635, + "learning_rate": 0.0001, + "loss": 0.1221, + "step": 5680 + }, + { + "epoch": 16.164772727272727, + "grad_norm": 2.8352041244506836, + "learning_rate": 0.0001, + "loss": 0.1138, + "step": 5690 + }, + { + "epoch": 16.193181818181817, + "grad_norm": 3.1213154792785645, + "learning_rate": 0.0001, + "loss": 0.1164, + "step": 5700 + }, + { + "epoch": 16.22159090909091, + "grad_norm": 3.123992681503296, + "learning_rate": 0.0001, + "loss": 0.1163, + "step": 5710 + }, + { + "epoch": 16.25, + "grad_norm": 3.104673385620117, + "learning_rate": 0.0001, + "loss": 0.1154, + "step": 5720 + }, + { + "epoch": 16.27840909090909, + "grad_norm": 3.469186544418335, + "learning_rate": 0.0001, + "loss": 0.1137, + "step": 5730 + }, + { + "epoch": 16.306818181818183, + "grad_norm": 3.1163649559020996, + "learning_rate": 0.0001, + "loss": 0.1164, + "step": 5740 + }, + { + "epoch": 16.335227272727273, + "grad_norm": 2.9757080078125, + "learning_rate": 0.0001, + "loss": 0.1215, + "step": 5750 + }, + { + "epoch": 16.363636363636363, + "grad_norm": 3.346102237701416, + "learning_rate": 0.0001, + "loss": 0.114, + "step": 5760 + }, + { + "epoch": 16.392045454545453, + "grad_norm": 3.4598140716552734, + "learning_rate": 0.0001, + "loss": 0.1134, + "step": 5770 + }, + { + "epoch": 16.420454545454547, + "grad_norm": 2.8395731449127197, + "learning_rate": 0.0001, + "loss": 0.1136, + "step": 5780 + }, + { + "epoch": 16.448863636363637, + "grad_norm": 2.390820026397705, + "learning_rate": 0.0001, + "loss": 0.115, + "step": 5790 + }, + { + "epoch": 16.477272727272727, + "grad_norm": 3.3408634662628174, + "learning_rate": 0.0001, + "loss": 0.1191, + "step": 5800 + }, + { + "epoch": 16.505681818181817, + "grad_norm": 2.721245050430298, + "learning_rate": 0.0001, + "loss": 0.1187, + "step": 5810 + }, + { + "epoch": 16.53409090909091, + "grad_norm": 3.057680130004883, + "learning_rate": 0.0001, + "loss": 0.1169, + "step": 5820 + }, + { + "epoch": 16.5625, + "grad_norm": 2.8173437118530273, + "learning_rate": 0.0001, + "loss": 0.1178, + "step": 5830 + }, + { + "epoch": 16.59090909090909, + "grad_norm": 2.824174404144287, + "learning_rate": 0.0001, + "loss": 0.1137, + "step": 5840 + }, + { + "epoch": 16.619318181818183, + "grad_norm": 2.9729907512664795, + "learning_rate": 0.0001, + "loss": 0.1149, + "step": 5850 + }, + { + "epoch": 16.647727272727273, + "grad_norm": 2.893472909927368, + "learning_rate": 0.0001, + "loss": 0.1147, + "step": 5860 + }, + { + "epoch": 16.676136363636363, + "grad_norm": 2.6419155597686768, + "learning_rate": 0.0001, + "loss": 0.1166, + "step": 5870 + }, + { + "epoch": 16.704545454545453, + "grad_norm": 2.341890811920166, + "learning_rate": 0.0001, + "loss": 0.1148, + "step": 5880 + }, + { + "epoch": 16.732954545454547, + "grad_norm": 2.980921506881714, + "learning_rate": 0.0001, + "loss": 0.1134, + "step": 5890 + }, + { + "epoch": 16.761363636363637, + "grad_norm": 2.975208044052124, + "learning_rate": 0.0001, + "loss": 0.1146, + "step": 5900 + }, + { + "epoch": 16.789772727272727, + "grad_norm": 2.528339147567749, + "learning_rate": 0.0001, + "loss": 0.1155, + "step": 5910 + }, + { + "epoch": 16.818181818181817, + "grad_norm": 2.539898633956909, + "learning_rate": 0.0001, + "loss": 0.1137, + "step": 5920 + }, + { + "epoch": 16.84659090909091, + "grad_norm": 2.4367032051086426, + "learning_rate": 0.0001, + "loss": 0.1175, + "step": 5930 + }, + { + "epoch": 16.875, + "grad_norm": 2.5197834968566895, + "learning_rate": 0.0001, + "loss": 0.1158, + "step": 5940 + }, + { + "epoch": 16.90340909090909, + "grad_norm": 2.4279847145080566, + "learning_rate": 0.0001, + "loss": 0.1113, + "step": 5950 + }, + { + "epoch": 16.931818181818183, + "grad_norm": 3.2526116371154785, + "learning_rate": 0.0001, + "loss": 0.1129, + "step": 5960 + }, + { + "epoch": 16.960227272727273, + "grad_norm": 2.5634706020355225, + "learning_rate": 0.0001, + "loss": 0.1146, + "step": 5970 + }, + { + "epoch": 16.988636363636363, + "grad_norm": 2.89918851852417, + "learning_rate": 0.0001, + "loss": 0.1147, + "step": 5980 + }, + { + "epoch": 17.017045454545453, + "grad_norm": 2.8295469284057617, + "learning_rate": 0.0001, + "loss": 0.114, + "step": 5990 + }, + { + "epoch": 17.045454545454547, + "grad_norm": 2.8802335262298584, + "learning_rate": 0.0001, + "loss": 0.1125, + "step": 6000 + }, + { + "epoch": 17.073863636363637, + "grad_norm": 2.6155662536621094, + "learning_rate": 0.0001, + "loss": 0.1158, + "step": 6010 + }, + { + "epoch": 17.102272727272727, + "grad_norm": 2.791156530380249, + "learning_rate": 0.0001, + "loss": 0.1147, + "step": 6020 + }, + { + "epoch": 17.130681818181817, + "grad_norm": 2.7444076538085938, + "learning_rate": 0.0001, + "loss": 0.1172, + "step": 6030 + }, + { + "epoch": 17.15909090909091, + "grad_norm": 3.0765230655670166, + "learning_rate": 0.0001, + "loss": 0.1114, + "step": 6040 + }, + { + "epoch": 17.1875, + "grad_norm": 3.4001102447509766, + "learning_rate": 0.0001, + "loss": 0.1156, + "step": 6050 + }, + { + "epoch": 17.21590909090909, + "grad_norm": 2.574037790298462, + "learning_rate": 0.0001, + "loss": 0.114, + "step": 6060 + }, + { + "epoch": 17.244318181818183, + "grad_norm": 2.428994655609131, + "learning_rate": 0.0001, + "loss": 0.1059, + "step": 6070 + }, + { + "epoch": 17.272727272727273, + "grad_norm": 2.552593469619751, + "learning_rate": 0.0001, + "loss": 0.1132, + "step": 6080 + }, + { + "epoch": 17.301136363636363, + "grad_norm": 2.748263359069824, + "learning_rate": 0.0001, + "loss": 0.1149, + "step": 6090 + }, + { + "epoch": 17.329545454545453, + "grad_norm": 2.565458059310913, + "learning_rate": 0.0001, + "loss": 0.1105, + "step": 6100 + }, + { + "epoch": 17.357954545454547, + "grad_norm": 3.3726043701171875, + "learning_rate": 0.0001, + "loss": 0.1108, + "step": 6110 + }, + { + "epoch": 17.386363636363637, + "grad_norm": 2.640763282775879, + "learning_rate": 0.0001, + "loss": 0.1049, + "step": 6120 + }, + { + "epoch": 17.414772727272727, + "grad_norm": 2.3288469314575195, + "learning_rate": 0.0001, + "loss": 0.1073, + "step": 6130 + }, + { + "epoch": 17.443181818181817, + "grad_norm": 2.47501802444458, + "learning_rate": 0.0001, + "loss": 0.1097, + "step": 6140 + }, + { + "epoch": 17.47159090909091, + "grad_norm": 2.443122148513794, + "learning_rate": 0.0001, + "loss": 0.1167, + "step": 6150 + }, + { + "epoch": 17.5, + "grad_norm": 3.0210578441619873, + "learning_rate": 0.0001, + "loss": 0.1185, + "step": 6160 + }, + { + "epoch": 17.52840909090909, + "grad_norm": 2.7917838096618652, + "learning_rate": 0.0001, + "loss": 0.1176, + "step": 6170 + }, + { + "epoch": 17.556818181818183, + "grad_norm": 2.502795457839966, + "learning_rate": 0.0001, + "loss": 0.1101, + "step": 6180 + }, + { + "epoch": 17.585227272727273, + "grad_norm": 2.6011240482330322, + "learning_rate": 0.0001, + "loss": 0.111, + "step": 6190 + }, + { + "epoch": 17.613636363636363, + "grad_norm": 2.917656183242798, + "learning_rate": 0.0001, + "loss": 0.1093, + "step": 6200 + }, + { + "epoch": 17.642045454545453, + "grad_norm": 2.369063138961792, + "learning_rate": 0.0001, + "loss": 0.1116, + "step": 6210 + }, + { + "epoch": 17.670454545454547, + "grad_norm": 2.5128045082092285, + "learning_rate": 0.0001, + "loss": 0.1111, + "step": 6220 + }, + { + "epoch": 17.698863636363637, + "grad_norm": 3.0461254119873047, + "learning_rate": 0.0001, + "loss": 0.1147, + "step": 6230 + }, + { + "epoch": 17.727272727272727, + "grad_norm": 2.1202504634857178, + "learning_rate": 0.0001, + "loss": 0.1107, + "step": 6240 + }, + { + "epoch": 17.755681818181817, + "grad_norm": 2.3112752437591553, + "learning_rate": 0.0001, + "loss": 0.1086, + "step": 6250 + }, + { + "epoch": 17.78409090909091, + "grad_norm": 2.8188629150390625, + "learning_rate": 0.0001, + "loss": 0.121, + "step": 6260 + }, + { + "epoch": 17.8125, + "grad_norm": 2.883798599243164, + "learning_rate": 0.0001, + "loss": 0.1088, + "step": 6270 + }, + { + "epoch": 17.84090909090909, + "grad_norm": 2.619675397872925, + "learning_rate": 0.0001, + "loss": 0.1121, + "step": 6280 + }, + { + "epoch": 17.869318181818183, + "grad_norm": 2.5900354385375977, + "learning_rate": 0.0001, + "loss": 0.111, + "step": 6290 + }, + { + "epoch": 17.897727272727273, + "grad_norm": 3.2702383995056152, + "learning_rate": 0.0001, + "loss": 0.1123, + "step": 6300 + }, + { + "epoch": 17.926136363636363, + "grad_norm": 2.632286548614502, + "learning_rate": 0.0001, + "loss": 0.1096, + "step": 6310 + }, + { + "epoch": 17.954545454545453, + "grad_norm": 2.598470687866211, + "learning_rate": 0.0001, + "loss": 0.109, + "step": 6320 + }, + { + "epoch": 17.982954545454547, + "grad_norm": 2.3819477558135986, + "learning_rate": 0.0001, + "loss": 0.1118, + "step": 6330 + }, + { + "epoch": 18.011363636363637, + "grad_norm": 2.4921939373016357, + "learning_rate": 0.0001, + "loss": 0.1116, + "step": 6340 + }, + { + "epoch": 18.039772727272727, + "grad_norm": 2.820632219314575, + "learning_rate": 0.0001, + "loss": 0.1052, + "step": 6350 + }, + { + "epoch": 18.068181818181817, + "grad_norm": 2.769113063812256, + "learning_rate": 0.0001, + "loss": 0.1141, + "step": 6360 + }, + { + "epoch": 18.09659090909091, + "grad_norm": 2.58843731880188, + "learning_rate": 0.0001, + "loss": 0.1133, + "step": 6370 + }, + { + "epoch": 18.125, + "grad_norm": 1.958970069885254, + "learning_rate": 0.0001, + "loss": 0.114, + "step": 6380 + }, + { + "epoch": 18.15340909090909, + "grad_norm": 2.466975688934326, + "learning_rate": 0.0001, + "loss": 0.1071, + "step": 6390 + }, + { + "epoch": 18.181818181818183, + "grad_norm": 2.1818594932556152, + "learning_rate": 0.0001, + "loss": 0.1095, + "step": 6400 + }, + { + "epoch": 18.210227272727273, + "grad_norm": 2.3512721061706543, + "learning_rate": 0.0001, + "loss": 0.114, + "step": 6410 + }, + { + "epoch": 18.238636363636363, + "grad_norm": 2.3737564086914062, + "learning_rate": 0.0001, + "loss": 0.1113, + "step": 6420 + }, + { + "epoch": 18.267045454545453, + "grad_norm": 2.4189605712890625, + "learning_rate": 0.0001, + "loss": 0.1117, + "step": 6430 + }, + { + "epoch": 18.295454545454547, + "grad_norm": 2.6895744800567627, + "learning_rate": 0.0001, + "loss": 0.1122, + "step": 6440 + }, + { + "epoch": 18.323863636363637, + "grad_norm": 2.517216920852661, + "learning_rate": 0.0001, + "loss": 0.1132, + "step": 6450 + }, + { + "epoch": 18.352272727272727, + "grad_norm": 2.251976251602173, + "learning_rate": 0.0001, + "loss": 0.1058, + "step": 6460 + }, + { + "epoch": 18.380681818181817, + "grad_norm": 2.233076333999634, + "learning_rate": 0.0001, + "loss": 0.1123, + "step": 6470 + }, + { + "epoch": 18.40909090909091, + "grad_norm": 2.5515904426574707, + "learning_rate": 0.0001, + "loss": 0.1107, + "step": 6480 + }, + { + "epoch": 18.4375, + "grad_norm": 2.60249662399292, + "learning_rate": 0.0001, + "loss": 0.1089, + "step": 6490 + }, + { + "epoch": 18.46590909090909, + "grad_norm": 2.640946388244629, + "learning_rate": 0.0001, + "loss": 0.1107, + "step": 6500 + }, + { + "epoch": 18.494318181818183, + "grad_norm": 2.663269281387329, + "learning_rate": 0.0001, + "loss": 0.1085, + "step": 6510 + }, + { + "epoch": 18.522727272727273, + "grad_norm": 2.4106152057647705, + "learning_rate": 0.0001, + "loss": 0.1098, + "step": 6520 + }, + { + "epoch": 18.551136363636363, + "grad_norm": 2.3369884490966797, + "learning_rate": 0.0001, + "loss": 0.1096, + "step": 6530 + }, + { + "epoch": 18.579545454545453, + "grad_norm": 2.346574544906616, + "learning_rate": 0.0001, + "loss": 0.104, + "step": 6540 + }, + { + "epoch": 18.607954545454547, + "grad_norm": 2.2791831493377686, + "learning_rate": 0.0001, + "loss": 0.1105, + "step": 6550 + }, + { + "epoch": 18.636363636363637, + "grad_norm": 2.364088773727417, + "learning_rate": 0.0001, + "loss": 0.1113, + "step": 6560 + }, + { + "epoch": 18.664772727272727, + "grad_norm": 2.5654282569885254, + "learning_rate": 0.0001, + "loss": 0.1067, + "step": 6570 + }, + { + "epoch": 18.693181818181817, + "grad_norm": 2.577658176422119, + "learning_rate": 0.0001, + "loss": 0.1117, + "step": 6580 + }, + { + "epoch": 18.72159090909091, + "grad_norm": 2.4139275550842285, + "learning_rate": 0.0001, + "loss": 0.1043, + "step": 6590 + }, + { + "epoch": 18.75, + "grad_norm": 2.5888614654541016, + "learning_rate": 0.0001, + "loss": 0.1082, + "step": 6600 + }, + { + "epoch": 18.77840909090909, + "grad_norm": 2.3184974193573, + "learning_rate": 0.0001, + "loss": 0.1083, + "step": 6610 + }, + { + "epoch": 18.806818181818183, + "grad_norm": 2.522383451461792, + "learning_rate": 0.0001, + "loss": 0.1083, + "step": 6620 + }, + { + "epoch": 18.835227272727273, + "grad_norm": 2.2055583000183105, + "learning_rate": 0.0001, + "loss": 0.1076, + "step": 6630 + }, + { + "epoch": 18.863636363636363, + "grad_norm": 2.58622145652771, + "learning_rate": 0.0001, + "loss": 0.1087, + "step": 6640 + }, + { + "epoch": 18.892045454545453, + "grad_norm": 2.3860034942626953, + "learning_rate": 0.0001, + "loss": 0.1108, + "step": 6650 + }, + { + "epoch": 18.920454545454547, + "grad_norm": 2.6108431816101074, + "learning_rate": 0.0001, + "loss": 0.1089, + "step": 6660 + }, + { + "epoch": 18.948863636363637, + "grad_norm": 2.1429636478424072, + "learning_rate": 0.0001, + "loss": 0.1127, + "step": 6670 + }, + { + "epoch": 18.977272727272727, + "grad_norm": 2.1483328342437744, + "learning_rate": 0.0001, + "loss": 0.107, + "step": 6680 + }, + { + "epoch": 19.005681818181817, + "grad_norm": 2.524930477142334, + "learning_rate": 0.0001, + "loss": 0.1176, + "step": 6690 + }, + { + "epoch": 19.03409090909091, + "grad_norm": 2.55420184135437, + "learning_rate": 0.0001, + "loss": 0.1134, + "step": 6700 + }, + { + "epoch": 19.0625, + "grad_norm": 2.4037156105041504, + "learning_rate": 0.0001, + "loss": 0.1121, + "step": 6710 + }, + { + "epoch": 19.09090909090909, + "grad_norm": 2.545936107635498, + "learning_rate": 0.0001, + "loss": 0.1038, + "step": 6720 + }, + { + "epoch": 19.119318181818183, + "grad_norm": 2.2953386306762695, + "learning_rate": 0.0001, + "loss": 0.1045, + "step": 6730 + }, + { + "epoch": 19.147727272727273, + "grad_norm": 2.4761712551116943, + "learning_rate": 0.0001, + "loss": 0.1022, + "step": 6740 + }, + { + "epoch": 19.176136363636363, + "grad_norm": 2.2314484119415283, + "learning_rate": 0.0001, + "loss": 0.1048, + "step": 6750 + }, + { + "epoch": 19.204545454545453, + "grad_norm": 2.306548833847046, + "learning_rate": 0.0001, + "loss": 0.1068, + "step": 6760 + }, + { + "epoch": 19.232954545454547, + "grad_norm": 2.563133955001831, + "learning_rate": 0.0001, + "loss": 0.1051, + "step": 6770 + }, + { + "epoch": 19.261363636363637, + "grad_norm": 2.306220531463623, + "learning_rate": 0.0001, + "loss": 0.1028, + "step": 6780 + }, + { + "epoch": 19.289772727272727, + "grad_norm": 2.3580806255340576, + "learning_rate": 0.0001, + "loss": 0.1026, + "step": 6790 + }, + { + "epoch": 19.318181818181817, + "grad_norm": 2.317422866821289, + "learning_rate": 0.0001, + "loss": 0.1045, + "step": 6800 + }, + { + "epoch": 19.34659090909091, + "grad_norm": 2.4174487590789795, + "learning_rate": 0.0001, + "loss": 0.1047, + "step": 6810 + }, + { + "epoch": 19.375, + "grad_norm": 2.417792558670044, + "learning_rate": 0.0001, + "loss": 0.1062, + "step": 6820 + }, + { + "epoch": 19.40340909090909, + "grad_norm": 2.08555269241333, + "learning_rate": 0.0001, + "loss": 0.1075, + "step": 6830 + }, + { + "epoch": 19.431818181818183, + "grad_norm": 2.052635431289673, + "learning_rate": 0.0001, + "loss": 0.1086, + "step": 6840 + }, + { + "epoch": 19.460227272727273, + "grad_norm": 2.216602325439453, + "learning_rate": 0.0001, + "loss": 0.1077, + "step": 6850 + }, + { + "epoch": 19.488636363636363, + "grad_norm": 2.393385410308838, + "learning_rate": 0.0001, + "loss": 0.1084, + "step": 6860 + }, + { + "epoch": 19.517045454545453, + "grad_norm": 2.2516062259674072, + "learning_rate": 0.0001, + "loss": 0.1081, + "step": 6870 + }, + { + "epoch": 19.545454545454547, + "grad_norm": 2.2450714111328125, + "learning_rate": 0.0001, + "loss": 0.1057, + "step": 6880 + }, + { + "epoch": 19.573863636363637, + "grad_norm": 2.2236733436584473, + "learning_rate": 0.0001, + "loss": 0.1057, + "step": 6890 + }, + { + "epoch": 19.602272727272727, + "grad_norm": 2.0041747093200684, + "learning_rate": 0.0001, + "loss": 0.1092, + "step": 6900 + }, + { + "epoch": 19.630681818181817, + "grad_norm": 2.264723300933838, + "learning_rate": 0.0001, + "loss": 0.107, + "step": 6910 + }, + { + "epoch": 19.65909090909091, + "grad_norm": 2.467823028564453, + "learning_rate": 0.0001, + "loss": 0.1042, + "step": 6920 + }, + { + "epoch": 19.6875, + "grad_norm": 2.2700631618499756, + "learning_rate": 0.0001, + "loss": 0.1037, + "step": 6930 + }, + { + "epoch": 19.71590909090909, + "grad_norm": 2.2037792205810547, + "learning_rate": 0.0001, + "loss": 0.1012, + "step": 6940 + }, + { + "epoch": 19.744318181818183, + "grad_norm": 2.0393118858337402, + "learning_rate": 0.0001, + "loss": 0.1018, + "step": 6950 + }, + { + "epoch": 19.772727272727273, + "grad_norm": 2.0741636753082275, + "learning_rate": 0.0001, + "loss": 0.1095, + "step": 6960 + }, + { + "epoch": 19.801136363636363, + "grad_norm": 1.9817142486572266, + "learning_rate": 0.0001, + "loss": 0.102, + "step": 6970 + }, + { + "epoch": 19.829545454545453, + "grad_norm": 2.222271680831909, + "learning_rate": 0.0001, + "loss": 0.1096, + "step": 6980 + }, + { + "epoch": 19.857954545454547, + "grad_norm": 2.0376973152160645, + "learning_rate": 0.0001, + "loss": 0.1086, + "step": 6990 + }, + { + "epoch": 19.886363636363637, + "grad_norm": 1.9794195890426636, + "learning_rate": 0.0001, + "loss": 0.106, + "step": 7000 + }, + { + "epoch": 19.914772727272727, + "grad_norm": 2.154062032699585, + "learning_rate": 0.0001, + "loss": 0.1117, + "step": 7010 + }, + { + "epoch": 19.943181818181817, + "grad_norm": 2.5424580574035645, + "learning_rate": 0.0001, + "loss": 0.1079, + "step": 7020 + }, + { + "epoch": 19.97159090909091, + "grad_norm": 2.4422006607055664, + "learning_rate": 0.0001, + "loss": 0.1005, + "step": 7030 + }, + { + "epoch": 20.0, + "grad_norm": 2.433610200881958, + "learning_rate": 0.0001, + "loss": 0.1051, + "step": 7040 + }, + { + "epoch": 20.02840909090909, + "grad_norm": 2.1430225372314453, + "learning_rate": 0.0001, + "loss": 0.1003, + "step": 7050 + }, + { + "epoch": 20.056818181818183, + "grad_norm": 2.1949706077575684, + "learning_rate": 0.0001, + "loss": 0.0975, + "step": 7060 + }, + { + "epoch": 20.085227272727273, + "grad_norm": 2.3351924419403076, + "learning_rate": 0.0001, + "loss": 0.1076, + "step": 7070 + }, + { + "epoch": 20.113636363636363, + "grad_norm": 2.2610702514648438, + "learning_rate": 0.0001, + "loss": 0.1052, + "step": 7080 + }, + { + "epoch": 20.142045454545453, + "grad_norm": 2.0343222618103027, + "learning_rate": 0.0001, + "loss": 0.105, + "step": 7090 + }, + { + "epoch": 20.170454545454547, + "grad_norm": 2.3669273853302, + "learning_rate": 0.0001, + "loss": 0.1037, + "step": 7100 + }, + { + "epoch": 20.198863636363637, + "grad_norm": 2.224647283554077, + "learning_rate": 0.0001, + "loss": 0.1007, + "step": 7110 + }, + { + "epoch": 20.227272727272727, + "grad_norm": 2.3760414123535156, + "learning_rate": 0.0001, + "loss": 0.1087, + "step": 7120 + }, + { + "epoch": 20.255681818181817, + "grad_norm": 2.272942543029785, + "learning_rate": 0.0001, + "loss": 0.1051, + "step": 7130 + }, + { + "epoch": 20.28409090909091, + "grad_norm": 2.159137487411499, + "learning_rate": 0.0001, + "loss": 0.1053, + "step": 7140 + }, + { + "epoch": 20.3125, + "grad_norm": 2.2997374534606934, + "learning_rate": 0.0001, + "loss": 0.1046, + "step": 7150 + }, + { + "epoch": 20.34090909090909, + "grad_norm": 2.431882381439209, + "learning_rate": 0.0001, + "loss": 0.1038, + "step": 7160 + }, + { + "epoch": 20.369318181818183, + "grad_norm": 2.410435676574707, + "learning_rate": 0.0001, + "loss": 0.1018, + "step": 7170 + }, + { + "epoch": 20.397727272727273, + "grad_norm": 2.3939435482025146, + "learning_rate": 0.0001, + "loss": 0.102, + "step": 7180 + }, + { + "epoch": 20.426136363636363, + "grad_norm": 2.132279396057129, + "learning_rate": 0.0001, + "loss": 0.1034, + "step": 7190 + }, + { + "epoch": 20.454545454545453, + "grad_norm": 2.2916312217712402, + "learning_rate": 0.0001, + "loss": 0.1052, + "step": 7200 + }, + { + "epoch": 20.482954545454547, + "grad_norm": 2.5798282623291016, + "learning_rate": 0.0001, + "loss": 0.1026, + "step": 7210 + }, + { + "epoch": 20.511363636363637, + "grad_norm": 2.2447385787963867, + "learning_rate": 0.0001, + "loss": 0.0993, + "step": 7220 + }, + { + "epoch": 20.539772727272727, + "grad_norm": 2.2323153018951416, + "learning_rate": 0.0001, + "loss": 0.1009, + "step": 7230 + }, + { + "epoch": 20.568181818181817, + "grad_norm": 1.8763328790664673, + "learning_rate": 0.0001, + "loss": 0.1053, + "step": 7240 + }, + { + "epoch": 20.59659090909091, + "grad_norm": 1.501619815826416, + "learning_rate": 0.0001, + "loss": 0.1027, + "step": 7250 + }, + { + "epoch": 20.625, + "grad_norm": 1.9412376880645752, + "learning_rate": 0.0001, + "loss": 0.1056, + "step": 7260 + }, + { + "epoch": 20.65340909090909, + "grad_norm": 2.07375431060791, + "learning_rate": 0.0001, + "loss": 0.1047, + "step": 7270 + }, + { + "epoch": 20.681818181818183, + "grad_norm": 2.0877957344055176, + "learning_rate": 0.0001, + "loss": 0.1035, + "step": 7280 + }, + { + "epoch": 20.710227272727273, + "grad_norm": 2.4032297134399414, + "learning_rate": 0.0001, + "loss": 0.1039, + "step": 7290 + }, + { + "epoch": 20.738636363636363, + "grad_norm": 2.4037721157073975, + "learning_rate": 0.0001, + "loss": 0.0988, + "step": 7300 + }, + { + "epoch": 20.767045454545453, + "grad_norm": 1.8980406522750854, + "learning_rate": 0.0001, + "loss": 0.1024, + "step": 7310 + }, + { + "epoch": 20.795454545454547, + "grad_norm": 2.298187732696533, + "learning_rate": 0.0001, + "loss": 0.1065, + "step": 7320 + }, + { + "epoch": 20.823863636363637, + "grad_norm": 2.2373411655426025, + "learning_rate": 0.0001, + "loss": 0.1107, + "step": 7330 + }, + { + "epoch": 20.852272727272727, + "grad_norm": 2.2414472103118896, + "learning_rate": 0.0001, + "loss": 0.1063, + "step": 7340 + }, + { + "epoch": 20.880681818181817, + "grad_norm": 1.9661855697631836, + "learning_rate": 0.0001, + "loss": 0.1055, + "step": 7350 + }, + { + "epoch": 20.90909090909091, + "grad_norm": 1.9864373207092285, + "learning_rate": 0.0001, + "loss": 0.1013, + "step": 7360 + }, + { + "epoch": 20.9375, + "grad_norm": 2.239394187927246, + "learning_rate": 0.0001, + "loss": 0.1028, + "step": 7370 + }, + { + "epoch": 20.96590909090909, + "grad_norm": 1.8729074001312256, + "learning_rate": 0.0001, + "loss": 0.1045, + "step": 7380 + }, + { + "epoch": 20.994318181818183, + "grad_norm": 1.9113003015518188, + "learning_rate": 0.0001, + "loss": 0.1061, + "step": 7390 + }, + { + "epoch": 21.022727272727273, + "grad_norm": 1.733733892440796, + "learning_rate": 0.0001, + "loss": 0.1029, + "step": 7400 + }, + { + "epoch": 21.051136363636363, + "grad_norm": 1.908080816268921, + "learning_rate": 0.0001, + "loss": 0.1055, + "step": 7410 + }, + { + "epoch": 21.079545454545453, + "grad_norm": 2.046468496322632, + "learning_rate": 0.0001, + "loss": 0.103, + "step": 7420 + }, + { + "epoch": 21.107954545454547, + "grad_norm": 2.1874492168426514, + "learning_rate": 0.0001, + "loss": 0.1023, + "step": 7430 + }, + { + "epoch": 21.136363636363637, + "grad_norm": 1.7166926860809326, + "learning_rate": 0.0001, + "loss": 0.1023, + "step": 7440 + }, + { + "epoch": 21.164772727272727, + "grad_norm": 2.2620322704315186, + "learning_rate": 0.0001, + "loss": 0.1019, + "step": 7450 + }, + { + "epoch": 21.193181818181817, + "grad_norm": 2.283912181854248, + "learning_rate": 0.0001, + "loss": 0.1044, + "step": 7460 + }, + { + "epoch": 21.22159090909091, + "grad_norm": 2.0486154556274414, + "learning_rate": 0.0001, + "loss": 0.1014, + "step": 7470 + }, + { + "epoch": 21.25, + "grad_norm": 1.8538813591003418, + "learning_rate": 0.0001, + "loss": 0.1014, + "step": 7480 + }, + { + "epoch": 21.27840909090909, + "grad_norm": 1.9765312671661377, + "learning_rate": 0.0001, + "loss": 0.0973, + "step": 7490 + }, + { + "epoch": 21.306818181818183, + "grad_norm": 1.8284136056900024, + "learning_rate": 0.0001, + "loss": 0.1008, + "step": 7500 + }, + { + "epoch": 21.335227272727273, + "grad_norm": 1.8823169469833374, + "learning_rate": 0.0001, + "loss": 0.1045, + "step": 7510 + }, + { + "epoch": 21.363636363636363, + "grad_norm": 1.8679107427597046, + "learning_rate": 0.0001, + "loss": 0.0994, + "step": 7520 + }, + { + "epoch": 21.392045454545453, + "grad_norm": 2.5383877754211426, + "learning_rate": 0.0001, + "loss": 0.1018, + "step": 7530 + }, + { + "epoch": 21.420454545454547, + "grad_norm": 1.8846964836120605, + "learning_rate": 0.0001, + "loss": 0.1014, + "step": 7540 + }, + { + "epoch": 21.448863636363637, + "grad_norm": 2.0011348724365234, + "learning_rate": 0.0001, + "loss": 0.101, + "step": 7550 + }, + { + "epoch": 21.477272727272727, + "grad_norm": 2.220806360244751, + "learning_rate": 0.0001, + "loss": 0.0957, + "step": 7560 + }, + { + "epoch": 21.505681818181817, + "grad_norm": 2.1716644763946533, + "learning_rate": 0.0001, + "loss": 0.1043, + "step": 7570 + }, + { + "epoch": 21.53409090909091, + "grad_norm": 2.017302989959717, + "learning_rate": 0.0001, + "loss": 0.101, + "step": 7580 + }, + { + "epoch": 21.5625, + "grad_norm": 1.7568871974945068, + "learning_rate": 0.0001, + "loss": 0.101, + "step": 7590 + }, + { + "epoch": 21.59090909090909, + "grad_norm": 1.4694595336914062, + "learning_rate": 0.0001, + "loss": 0.0992, + "step": 7600 + }, + { + "epoch": 21.619318181818183, + "grad_norm": 1.6587533950805664, + "learning_rate": 0.0001, + "loss": 0.0947, + "step": 7610 + }, + { + "epoch": 21.647727272727273, + "grad_norm": 1.5999675989151, + "learning_rate": 0.0001, + "loss": 0.0947, + "step": 7620 + }, + { + "epoch": 21.676136363636363, + "grad_norm": 1.8176852464675903, + "learning_rate": 0.0001, + "loss": 0.1049, + "step": 7630 + }, + { + "epoch": 21.704545454545453, + "grad_norm": 1.8811140060424805, + "learning_rate": 0.0001, + "loss": 0.1026, + "step": 7640 + }, + { + "epoch": 21.732954545454547, + "grad_norm": 2.0048694610595703, + "learning_rate": 0.0001, + "loss": 0.106, + "step": 7650 + }, + { + "epoch": 21.761363636363637, + "grad_norm": 2.050529956817627, + "learning_rate": 0.0001, + "loss": 0.102, + "step": 7660 + }, + { + "epoch": 21.789772727272727, + "grad_norm": 2.1366333961486816, + "learning_rate": 0.0001, + "loss": 0.1052, + "step": 7670 + }, + { + "epoch": 21.818181818181817, + "grad_norm": 2.113969564437866, + "learning_rate": 0.0001, + "loss": 0.1044, + "step": 7680 + }, + { + "epoch": 21.84659090909091, + "grad_norm": 1.9199646711349487, + "learning_rate": 0.0001, + "loss": 0.1019, + "step": 7690 + }, + { + "epoch": 21.875, + "grad_norm": 2.162484884262085, + "learning_rate": 0.0001, + "loss": 0.0991, + "step": 7700 + }, + { + "epoch": 21.90340909090909, + "grad_norm": 2.1541666984558105, + "learning_rate": 0.0001, + "loss": 0.0977, + "step": 7710 + }, + { + "epoch": 21.931818181818183, + "grad_norm": 2.0925753116607666, + "learning_rate": 0.0001, + "loss": 0.0971, + "step": 7720 + }, + { + "epoch": 21.960227272727273, + "grad_norm": 2.3108863830566406, + "learning_rate": 0.0001, + "loss": 0.0937, + "step": 7730 + }, + { + "epoch": 21.988636363636363, + "grad_norm": 2.105069398880005, + "learning_rate": 0.0001, + "loss": 0.0945, + "step": 7740 + }, + { + "epoch": 22.017045454545453, + "grad_norm": 2.304133892059326, + "learning_rate": 0.0001, + "loss": 0.0999, + "step": 7750 + }, + { + "epoch": 22.045454545454547, + "grad_norm": 1.9112257957458496, + "learning_rate": 0.0001, + "loss": 0.0944, + "step": 7760 + }, + { + "epoch": 22.073863636363637, + "grad_norm": 1.9586460590362549, + "learning_rate": 0.0001, + "loss": 0.0978, + "step": 7770 + }, + { + "epoch": 22.102272727272727, + "grad_norm": 2.0921013355255127, + "learning_rate": 0.0001, + "loss": 0.1018, + "step": 7780 + }, + { + "epoch": 22.130681818181817, + "grad_norm": 2.0148942470550537, + "learning_rate": 0.0001, + "loss": 0.0995, + "step": 7790 + }, + { + "epoch": 22.15909090909091, + "grad_norm": 1.9254063367843628, + "learning_rate": 0.0001, + "loss": 0.1026, + "step": 7800 + }, + { + "epoch": 22.1875, + "grad_norm": 1.7932246923446655, + "learning_rate": 0.0001, + "loss": 0.1009, + "step": 7810 + }, + { + "epoch": 22.21590909090909, + "grad_norm": 2.0023248195648193, + "learning_rate": 0.0001, + "loss": 0.1006, + "step": 7820 + }, + { + "epoch": 22.244318181818183, + "grad_norm": 2.1428818702697754, + "learning_rate": 0.0001, + "loss": 0.0965, + "step": 7830 + }, + { + "epoch": 22.272727272727273, + "grad_norm": 2.021170139312744, + "learning_rate": 0.0001, + "loss": 0.1, + "step": 7840 + }, + { + "epoch": 22.301136363636363, + "grad_norm": 1.9330023527145386, + "learning_rate": 0.0001, + "loss": 0.0981, + "step": 7850 + }, + { + "epoch": 22.329545454545453, + "grad_norm": 2.1269373893737793, + "learning_rate": 0.0001, + "loss": 0.096, + "step": 7860 + }, + { + "epoch": 22.357954545454547, + "grad_norm": 2.0002894401550293, + "learning_rate": 0.0001, + "loss": 0.0983, + "step": 7870 + }, + { + "epoch": 22.386363636363637, + "grad_norm": 1.7350810766220093, + "learning_rate": 0.0001, + "loss": 0.0979, + "step": 7880 + }, + { + "epoch": 22.414772727272727, + "grad_norm": 2.5721471309661865, + "learning_rate": 0.0001, + "loss": 0.096, + "step": 7890 + }, + { + "epoch": 22.443181818181817, + "grad_norm": 2.0510294437408447, + "learning_rate": 0.0001, + "loss": 0.0967, + "step": 7900 + }, + { + "epoch": 22.47159090909091, + "grad_norm": 1.7467889785766602, + "learning_rate": 0.0001, + "loss": 0.1012, + "step": 7910 + }, + { + "epoch": 22.5, + "grad_norm": 1.881221055984497, + "learning_rate": 0.0001, + "loss": 0.0977, + "step": 7920 + }, + { + "epoch": 22.52840909090909, + "grad_norm": 1.6960866451263428, + "learning_rate": 0.0001, + "loss": 0.1013, + "step": 7930 + }, + { + "epoch": 22.556818181818183, + "grad_norm": 1.9011884927749634, + "learning_rate": 0.0001, + "loss": 0.0998, + "step": 7940 + }, + { + "epoch": 22.585227272727273, + "grad_norm": 2.289515972137451, + "learning_rate": 0.0001, + "loss": 0.1015, + "step": 7950 + }, + { + "epoch": 22.613636363636363, + "grad_norm": 2.148452043533325, + "learning_rate": 0.0001, + "loss": 0.098, + "step": 7960 + }, + { + "epoch": 22.642045454545453, + "grad_norm": 2.1038877964019775, + "learning_rate": 0.0001, + "loss": 0.0994, + "step": 7970 + }, + { + "epoch": 22.670454545454547, + "grad_norm": 2.1400623321533203, + "learning_rate": 0.0001, + "loss": 0.0948, + "step": 7980 + }, + { + "epoch": 22.698863636363637, + "grad_norm": 1.864814043045044, + "learning_rate": 0.0001, + "loss": 0.0917, + "step": 7990 + }, + { + "epoch": 22.727272727272727, + "grad_norm": 1.9961179494857788, + "learning_rate": 0.0001, + "loss": 0.0962, + "step": 8000 + }, + { + "epoch": 22.755681818181817, + "grad_norm": 1.9807307720184326, + "learning_rate": 0.0001, + "loss": 0.0954, + "step": 8010 + }, + { + "epoch": 22.78409090909091, + "grad_norm": 1.8741666078567505, + "learning_rate": 0.0001, + "loss": 0.0944, + "step": 8020 + }, + { + "epoch": 22.8125, + "grad_norm": 2.272317409515381, + "learning_rate": 0.0001, + "loss": 0.1037, + "step": 8030 + }, + { + "epoch": 22.84090909090909, + "grad_norm": 1.9558086395263672, + "learning_rate": 0.0001, + "loss": 0.0963, + "step": 8040 + }, + { + "epoch": 22.869318181818183, + "grad_norm": 2.009176731109619, + "learning_rate": 0.0001, + "loss": 0.1018, + "step": 8050 + }, + { + "epoch": 22.897727272727273, + "grad_norm": 2.2107458114624023, + "learning_rate": 0.0001, + "loss": 0.1025, + "step": 8060 + }, + { + "epoch": 22.926136363636363, + "grad_norm": 2.5531365871429443, + "learning_rate": 0.0001, + "loss": 0.1018, + "step": 8070 + }, + { + "epoch": 22.954545454545453, + "grad_norm": 2.2472565174102783, + "learning_rate": 0.0001, + "loss": 0.0982, + "step": 8080 + }, + { + "epoch": 22.982954545454547, + "grad_norm": 1.920031189918518, + "learning_rate": 0.0001, + "loss": 0.0943, + "step": 8090 + }, + { + "epoch": 23.011363636363637, + "grad_norm": 1.6725058555603027, + "learning_rate": 0.0001, + "loss": 0.0951, + "step": 8100 + }, + { + "epoch": 23.039772727272727, + "grad_norm": 2.308568239212036, + "learning_rate": 0.0001, + "loss": 0.092, + "step": 8110 + }, + { + "epoch": 23.068181818181817, + "grad_norm": 1.8834666013717651, + "learning_rate": 0.0001, + "loss": 0.0927, + "step": 8120 + }, + { + "epoch": 23.09659090909091, + "grad_norm": 1.8598517179489136, + "learning_rate": 0.0001, + "loss": 0.095, + "step": 8130 + }, + { + "epoch": 23.125, + "grad_norm": 2.1915621757507324, + "learning_rate": 0.0001, + "loss": 0.0929, + "step": 8140 + }, + { + "epoch": 23.15340909090909, + "grad_norm": 2.160149335861206, + "learning_rate": 0.0001, + "loss": 0.0964, + "step": 8150 + }, + { + "epoch": 23.181818181818183, + "grad_norm": 1.9698961973190308, + "learning_rate": 0.0001, + "loss": 0.0996, + "step": 8160 + }, + { + "epoch": 23.210227272727273, + "grad_norm": 1.9553509950637817, + "learning_rate": 0.0001, + "loss": 0.0948, + "step": 8170 + }, + { + "epoch": 23.238636363636363, + "grad_norm": 1.9348289966583252, + "learning_rate": 0.0001, + "loss": 0.0945, + "step": 8180 + }, + { + "epoch": 23.267045454545453, + "grad_norm": 2.053300142288208, + "learning_rate": 0.0001, + "loss": 0.0953, + "step": 8190 + }, + { + "epoch": 23.295454545454547, + "grad_norm": 1.8271958827972412, + "learning_rate": 0.0001, + "loss": 0.092, + "step": 8200 + }, + { + "epoch": 23.323863636363637, + "grad_norm": 1.5689889192581177, + "learning_rate": 0.0001, + "loss": 0.1003, + "step": 8210 + }, + { + "epoch": 23.352272727272727, + "grad_norm": 2.1360859870910645, + "learning_rate": 0.0001, + "loss": 0.0956, + "step": 8220 + }, + { + "epoch": 23.380681818181817, + "grad_norm": 1.819110631942749, + "learning_rate": 0.0001, + "loss": 0.0964, + "step": 8230 + }, + { + "epoch": 23.40909090909091, + "grad_norm": 2.107375383377075, + "learning_rate": 0.0001, + "loss": 0.0967, + "step": 8240 + }, + { + "epoch": 23.4375, + "grad_norm": 1.8408470153808594, + "learning_rate": 0.0001, + "loss": 0.0929, + "step": 8250 + }, + { + "epoch": 23.46590909090909, + "grad_norm": 1.9134175777435303, + "learning_rate": 0.0001, + "loss": 0.0956, + "step": 8260 + }, + { + "epoch": 23.494318181818183, + "grad_norm": 1.71891188621521, + "learning_rate": 0.0001, + "loss": 0.0939, + "step": 8270 + }, + { + "epoch": 23.522727272727273, + "grad_norm": 1.5312421321868896, + "learning_rate": 0.0001, + "loss": 0.0987, + "step": 8280 + }, + { + "epoch": 23.551136363636363, + "grad_norm": 1.7557247877120972, + "learning_rate": 0.0001, + "loss": 0.0975, + "step": 8290 + }, + { + "epoch": 23.579545454545453, + "grad_norm": 1.8017261028289795, + "learning_rate": 0.0001, + "loss": 0.0974, + "step": 8300 + }, + { + "epoch": 23.607954545454547, + "grad_norm": 1.5881474018096924, + "learning_rate": 0.0001, + "loss": 0.0987, + "step": 8310 + }, + { + "epoch": 23.636363636363637, + "grad_norm": 1.8395788669586182, + "learning_rate": 0.0001, + "loss": 0.1008, + "step": 8320 + }, + { + "epoch": 23.664772727272727, + "grad_norm": 1.791631817817688, + "learning_rate": 0.0001, + "loss": 0.0983, + "step": 8330 + }, + { + "epoch": 23.693181818181817, + "grad_norm": 1.6137861013412476, + "learning_rate": 0.0001, + "loss": 0.0982, + "step": 8340 + }, + { + "epoch": 23.72159090909091, + "grad_norm": 1.9976779222488403, + "learning_rate": 0.0001, + "loss": 0.1021, + "step": 8350 + }, + { + "epoch": 23.75, + "grad_norm": 1.667160987854004, + "learning_rate": 0.0001, + "loss": 0.0982, + "step": 8360 + }, + { + "epoch": 23.77840909090909, + "grad_norm": 1.5434305667877197, + "learning_rate": 0.0001, + "loss": 0.0967, + "step": 8370 + }, + { + "epoch": 23.806818181818183, + "grad_norm": 1.8221416473388672, + "learning_rate": 0.0001, + "loss": 0.0971, + "step": 8380 + }, + { + "epoch": 23.835227272727273, + "grad_norm": 1.9259772300720215, + "learning_rate": 0.0001, + "loss": 0.0969, + "step": 8390 + }, + { + "epoch": 23.863636363636363, + "grad_norm": 1.9943630695343018, + "learning_rate": 0.0001, + "loss": 0.0993, + "step": 8400 + }, + { + "epoch": 23.892045454545453, + "grad_norm": 1.5301824808120728, + "learning_rate": 0.0001, + "loss": 0.0944, + "step": 8410 + }, + { + "epoch": 23.920454545454547, + "grad_norm": 2.062227964401245, + "learning_rate": 0.0001, + "loss": 0.0949, + "step": 8420 + }, + { + "epoch": 23.948863636363637, + "grad_norm": 1.7410181760787964, + "learning_rate": 0.0001, + "loss": 0.0975, + "step": 8430 + }, + { + "epoch": 23.977272727272727, + "grad_norm": 1.7448116540908813, + "learning_rate": 0.0001, + "loss": 0.0941, + "step": 8440 + }, + { + "epoch": 24.005681818181817, + "grad_norm": 2.3489348888397217, + "learning_rate": 0.0001, + "loss": 0.0946, + "step": 8450 + }, + { + "epoch": 24.03409090909091, + "grad_norm": 2.110835075378418, + "learning_rate": 0.0001, + "loss": 0.0929, + "step": 8460 + }, + { + "epoch": 24.0625, + "grad_norm": 2.4186344146728516, + "learning_rate": 0.0001, + "loss": 0.0982, + "step": 8470 + }, + { + "epoch": 24.09090909090909, + "grad_norm": 1.9502896070480347, + "learning_rate": 0.0001, + "loss": 0.0969, + "step": 8480 + }, + { + "epoch": 24.119318181818183, + "grad_norm": 1.9351022243499756, + "learning_rate": 0.0001, + "loss": 0.094, + "step": 8490 + }, + { + "epoch": 24.147727272727273, + "grad_norm": 1.8484196662902832, + "learning_rate": 0.0001, + "loss": 0.0935, + "step": 8500 + }, + { + "epoch": 24.176136363636363, + "grad_norm": 1.8879474401474, + "learning_rate": 0.0001, + "loss": 0.0947, + "step": 8510 + }, + { + "epoch": 24.204545454545453, + "grad_norm": 2.4570751190185547, + "learning_rate": 0.0001, + "loss": 0.0918, + "step": 8520 + }, + { + "epoch": 24.232954545454547, + "grad_norm": 2.6654608249664307, + "learning_rate": 0.0001, + "loss": 0.0977, + "step": 8530 + }, + { + "epoch": 24.261363636363637, + "grad_norm": 2.244088888168335, + "learning_rate": 0.0001, + "loss": 0.0935, + "step": 8540 + }, + { + "epoch": 24.289772727272727, + "grad_norm": 2.7572576999664307, + "learning_rate": 0.0001, + "loss": 0.0956, + "step": 8550 + }, + { + "epoch": 24.318181818181817, + "grad_norm": 2.1149368286132812, + "learning_rate": 0.0001, + "loss": 0.0929, + "step": 8560 + }, + { + "epoch": 24.34659090909091, + "grad_norm": 1.9651392698287964, + "learning_rate": 0.0001, + "loss": 0.093, + "step": 8570 + }, + { + "epoch": 24.375, + "grad_norm": 2.118886947631836, + "learning_rate": 0.0001, + "loss": 0.0918, + "step": 8580 + }, + { + "epoch": 24.40340909090909, + "grad_norm": 1.857898235321045, + "learning_rate": 0.0001, + "loss": 0.0912, + "step": 8590 + }, + { + "epoch": 24.431818181818183, + "grad_norm": 1.8843599557876587, + "learning_rate": 0.0001, + "loss": 0.0923, + "step": 8600 + }, + { + "epoch": 24.460227272727273, + "grad_norm": 1.8303879499435425, + "learning_rate": 0.0001, + "loss": 0.0916, + "step": 8610 + }, + { + "epoch": 24.488636363636363, + "grad_norm": 2.0222115516662598, + "learning_rate": 0.0001, + "loss": 0.0917, + "step": 8620 + }, + { + "epoch": 24.517045454545453, + "grad_norm": 1.773148536682129, + "learning_rate": 0.0001, + "loss": 0.0939, + "step": 8630 + }, + { + "epoch": 24.545454545454547, + "grad_norm": 1.9186317920684814, + "learning_rate": 0.0001, + "loss": 0.09, + "step": 8640 + }, + { + "epoch": 24.573863636363637, + "grad_norm": 1.938623309135437, + "learning_rate": 0.0001, + "loss": 0.0935, + "step": 8650 + }, + { + "epoch": 24.602272727272727, + "grad_norm": 2.09529972076416, + "learning_rate": 0.0001, + "loss": 0.0936, + "step": 8660 + }, + { + "epoch": 24.630681818181817, + "grad_norm": 2.1375720500946045, + "learning_rate": 0.0001, + "loss": 0.0901, + "step": 8670 + }, + { + "epoch": 24.65909090909091, + "grad_norm": 3.3729183673858643, + "learning_rate": 0.0001, + "loss": 0.0936, + "step": 8680 + }, + { + "epoch": 24.6875, + "grad_norm": 2.765795946121216, + "learning_rate": 0.0001, + "loss": 0.0942, + "step": 8690 + }, + { + "epoch": 24.71590909090909, + "grad_norm": 3.0943186283111572, + "learning_rate": 0.0001, + "loss": 0.0915, + "step": 8700 + }, + { + "epoch": 24.744318181818183, + "grad_norm": 2.4649555683135986, + "learning_rate": 0.0001, + "loss": 0.0902, + "step": 8710 + }, + { + "epoch": 24.772727272727273, + "grad_norm": 2.455824851989746, + "learning_rate": 0.0001, + "loss": 0.0953, + "step": 8720 + }, + { + "epoch": 24.801136363636363, + "grad_norm": 2.0996339321136475, + "learning_rate": 0.0001, + "loss": 0.0949, + "step": 8730 + }, + { + "epoch": 24.829545454545453, + "grad_norm": 2.2499396800994873, + "learning_rate": 0.0001, + "loss": 0.0915, + "step": 8740 + }, + { + "epoch": 24.857954545454547, + "grad_norm": 1.920745611190796, + "learning_rate": 0.0001, + "loss": 0.09, + "step": 8750 + }, + { + "epoch": 24.886363636363637, + "grad_norm": 1.906348705291748, + "learning_rate": 0.0001, + "loss": 0.0921, + "step": 8760 + }, + { + "epoch": 24.914772727272727, + "grad_norm": 1.6197078227996826, + "learning_rate": 0.0001, + "loss": 0.0889, + "step": 8770 + }, + { + "epoch": 24.943181818181817, + "grad_norm": 1.6164134740829468, + "learning_rate": 0.0001, + "loss": 0.0907, + "step": 8780 + }, + { + "epoch": 24.97159090909091, + "grad_norm": 1.7616385221481323, + "learning_rate": 0.0001, + "loss": 0.0928, + "step": 8790 + }, + { + "epoch": 25.0, + "grad_norm": 1.5803983211517334, + "learning_rate": 0.0001, + "loss": 0.0926, + "step": 8800 + }, + { + "epoch": 25.02840909090909, + "grad_norm": 1.6062462329864502, + "learning_rate": 0.0001, + "loss": 0.0961, + "step": 8810 + }, + { + "epoch": 25.056818181818183, + "grad_norm": 1.5102510452270508, + "learning_rate": 0.0001, + "loss": 0.0917, + "step": 8820 + }, + { + "epoch": 25.085227272727273, + "grad_norm": 1.6694464683532715, + "learning_rate": 0.0001, + "loss": 0.0946, + "step": 8830 + }, + { + "epoch": 25.113636363636363, + "grad_norm": 1.6508196592330933, + "learning_rate": 0.0001, + "loss": 0.0926, + "step": 8840 + }, + { + "epoch": 25.142045454545453, + "grad_norm": 1.7550101280212402, + "learning_rate": 0.0001, + "loss": 0.0912, + "step": 8850 + }, + { + "epoch": 25.170454545454547, + "grad_norm": 1.6361439228057861, + "learning_rate": 0.0001, + "loss": 0.0986, + "step": 8860 + }, + { + "epoch": 25.198863636363637, + "grad_norm": 1.810949683189392, + "learning_rate": 0.0001, + "loss": 0.0952, + "step": 8870 + }, + { + "epoch": 25.227272727272727, + "grad_norm": 1.7442113161087036, + "learning_rate": 0.0001, + "loss": 0.0936, + "step": 8880 + }, + { + "epoch": 25.255681818181817, + "grad_norm": 1.9462569952011108, + "learning_rate": 0.0001, + "loss": 0.0978, + "step": 8890 + }, + { + "epoch": 25.28409090909091, + "grad_norm": 1.865937352180481, + "learning_rate": 0.0001, + "loss": 0.0949, + "step": 8900 + }, + { + "epoch": 25.3125, + "grad_norm": 1.5846524238586426, + "learning_rate": 0.0001, + "loss": 0.0912, + "step": 8910 + }, + { + "epoch": 25.34090909090909, + "grad_norm": 1.6086736917495728, + "learning_rate": 0.0001, + "loss": 0.0945, + "step": 8920 + }, + { + "epoch": 25.369318181818183, + "grad_norm": 1.665158748626709, + "learning_rate": 0.0001, + "loss": 0.0895, + "step": 8930 + }, + { + "epoch": 25.397727272727273, + "grad_norm": 1.7580013275146484, + "learning_rate": 0.0001, + "loss": 0.0888, + "step": 8940 + }, + { + "epoch": 25.426136363636363, + "grad_norm": 1.765702247619629, + "learning_rate": 0.0001, + "loss": 0.0975, + "step": 8950 + }, + { + "epoch": 25.454545454545453, + "grad_norm": 1.6462661027908325, + "learning_rate": 0.0001, + "loss": 0.0956, + "step": 8960 + }, + { + "epoch": 25.482954545454547, + "grad_norm": 2.0281505584716797, + "learning_rate": 0.0001, + "loss": 0.0912, + "step": 8970 + }, + { + "epoch": 25.511363636363637, + "grad_norm": 1.7537845373153687, + "learning_rate": 0.0001, + "loss": 0.0932, + "step": 8980 + }, + { + "epoch": 25.539772727272727, + "grad_norm": 1.776159644126892, + "learning_rate": 0.0001, + "loss": 0.0967, + "step": 8990 + }, + { + "epoch": 25.568181818181817, + "grad_norm": 1.6971244812011719, + "learning_rate": 0.0001, + "loss": 0.0924, + "step": 9000 + }, + { + "epoch": 25.59659090909091, + "grad_norm": 1.4512749910354614, + "learning_rate": 0.0001, + "loss": 0.0897, + "step": 9010 + }, + { + "epoch": 25.625, + "grad_norm": 1.7332180738449097, + "learning_rate": 0.0001, + "loss": 0.0923, + "step": 9020 + }, + { + "epoch": 25.65340909090909, + "grad_norm": 1.9260343313217163, + "learning_rate": 0.0001, + "loss": 0.0945, + "step": 9030 + }, + { + "epoch": 25.681818181818183, + "grad_norm": 1.56917405128479, + "learning_rate": 0.0001, + "loss": 0.0959, + "step": 9040 + }, + { + "epoch": 25.710227272727273, + "grad_norm": 1.4904402494430542, + "learning_rate": 0.0001, + "loss": 0.0961, + "step": 9050 + }, + { + "epoch": 25.738636363636363, + "grad_norm": 1.7849690914154053, + "learning_rate": 0.0001, + "loss": 0.0987, + "step": 9060 + }, + { + "epoch": 25.767045454545453, + "grad_norm": 1.63370943069458, + "learning_rate": 0.0001, + "loss": 0.0994, + "step": 9070 + }, + { + "epoch": 25.795454545454547, + "grad_norm": 2.0049540996551514, + "learning_rate": 0.0001, + "loss": 0.0929, + "step": 9080 + }, + { + "epoch": 25.823863636363637, + "grad_norm": 1.876146912574768, + "learning_rate": 0.0001, + "loss": 0.0967, + "step": 9090 + }, + { + "epoch": 25.852272727272727, + "grad_norm": 1.7067279815673828, + "learning_rate": 0.0001, + "loss": 0.0968, + "step": 9100 + }, + { + "epoch": 25.880681818181817, + "grad_norm": 1.7685781717300415, + "learning_rate": 0.0001, + "loss": 0.093, + "step": 9110 + }, + { + "epoch": 25.90909090909091, + "grad_norm": 1.493255376815796, + "learning_rate": 0.0001, + "loss": 0.0905, + "step": 9120 + }, + { + "epoch": 25.9375, + "grad_norm": 1.7333801984786987, + "learning_rate": 0.0001, + "loss": 0.0947, + "step": 9130 + }, + { + "epoch": 25.96590909090909, + "grad_norm": 1.5893990993499756, + "learning_rate": 0.0001, + "loss": 0.092, + "step": 9140 + }, + { + "epoch": 25.994318181818183, + "grad_norm": 1.9104375839233398, + "learning_rate": 0.0001, + "loss": 0.0963, + "step": 9150 + }, + { + "epoch": 26.022727272727273, + "grad_norm": 1.5926457643508911, + "learning_rate": 0.0001, + "loss": 0.0939, + "step": 9160 + }, + { + "epoch": 26.051136363636363, + "grad_norm": 1.5772978067398071, + "learning_rate": 0.0001, + "loss": 0.0872, + "step": 9170 + }, + { + "epoch": 26.079545454545453, + "grad_norm": 1.5457425117492676, + "learning_rate": 0.0001, + "loss": 0.0927, + "step": 9180 + }, + { + "epoch": 26.107954545454547, + "grad_norm": 1.6755262613296509, + "learning_rate": 0.0001, + "loss": 0.0954, + "step": 9190 + }, + { + "epoch": 26.136363636363637, + "grad_norm": 1.461090326309204, + "learning_rate": 0.0001, + "loss": 0.0936, + "step": 9200 + }, + { + "epoch": 26.164772727272727, + "grad_norm": 1.4528868198394775, + "learning_rate": 0.0001, + "loss": 0.0937, + "step": 9210 + }, + { + "epoch": 26.193181818181817, + "grad_norm": 1.5315214395523071, + "learning_rate": 0.0001, + "loss": 0.1011, + "step": 9220 + }, + { + "epoch": 26.22159090909091, + "grad_norm": 1.560640573501587, + "learning_rate": 0.0001, + "loss": 0.092, + "step": 9230 + }, + { + "epoch": 26.25, + "grad_norm": 1.4106330871582031, + "learning_rate": 0.0001, + "loss": 0.0971, + "step": 9240 + }, + { + "epoch": 26.27840909090909, + "grad_norm": 1.4047380685806274, + "learning_rate": 0.0001, + "loss": 0.0924, + "step": 9250 + }, + { + "epoch": 26.306818181818183, + "grad_norm": 1.4862667322158813, + "learning_rate": 0.0001, + "loss": 0.0979, + "step": 9260 + }, + { + "epoch": 26.335227272727273, + "grad_norm": 1.5880588293075562, + "learning_rate": 0.0001, + "loss": 0.0934, + "step": 9270 + }, + { + "epoch": 26.363636363636363, + "grad_norm": 1.428328514099121, + "learning_rate": 0.0001, + "loss": 0.0971, + "step": 9280 + }, + { + "epoch": 26.392045454545453, + "grad_norm": 1.5301146507263184, + "learning_rate": 0.0001, + "loss": 0.095, + "step": 9290 + }, + { + "epoch": 26.420454545454547, + "grad_norm": 1.6134599447250366, + "learning_rate": 0.0001, + "loss": 0.0975, + "step": 9300 + }, + { + "epoch": 26.448863636363637, + "grad_norm": 1.497191071510315, + "learning_rate": 0.0001, + "loss": 0.0948, + "step": 9310 + }, + { + "epoch": 26.477272727272727, + "grad_norm": 1.7432132959365845, + "learning_rate": 0.0001, + "loss": 0.0956, + "step": 9320 + }, + { + "epoch": 26.505681818181817, + "grad_norm": 1.3511826992034912, + "learning_rate": 0.0001, + "loss": 0.0919, + "step": 9330 + }, + { + "epoch": 26.53409090909091, + "grad_norm": 1.5720796585083008, + "learning_rate": 0.0001, + "loss": 0.0936, + "step": 9340 + }, + { + "epoch": 26.5625, + "grad_norm": 1.5396133661270142, + "learning_rate": 0.0001, + "loss": 0.0959, + "step": 9350 + }, + { + "epoch": 26.59090909090909, + "grad_norm": 1.6743911504745483, + "learning_rate": 0.0001, + "loss": 0.0936, + "step": 9360 + }, + { + "epoch": 26.619318181818183, + "grad_norm": 1.85866117477417, + "learning_rate": 0.0001, + "loss": 0.0995, + "step": 9370 + }, + { + "epoch": 26.647727272727273, + "grad_norm": 1.370617151260376, + "learning_rate": 0.0001, + "loss": 0.0962, + "step": 9380 + }, + { + "epoch": 26.676136363636363, + "grad_norm": 1.953228235244751, + "learning_rate": 0.0001, + "loss": 0.0981, + "step": 9390 + }, + { + "epoch": 26.704545454545453, + "grad_norm": 2.1395270824432373, + "learning_rate": 0.0001, + "loss": 0.094, + "step": 9400 + }, + { + "epoch": 26.732954545454547, + "grad_norm": 2.1154062747955322, + "learning_rate": 0.0001, + "loss": 0.0969, + "step": 9410 + }, + { + "epoch": 26.761363636363637, + "grad_norm": 1.7266603708267212, + "learning_rate": 0.0001, + "loss": 0.094, + "step": 9420 + }, + { + "epoch": 26.789772727272727, + "grad_norm": 1.8902325630187988, + "learning_rate": 0.0001, + "loss": 0.0912, + "step": 9430 + }, + { + "epoch": 26.818181818181817, + "grad_norm": 1.7739678621292114, + "learning_rate": 0.0001, + "loss": 0.0894, + "step": 9440 + }, + { + "epoch": 26.84659090909091, + "grad_norm": 1.9259507656097412, + "learning_rate": 0.0001, + "loss": 0.0915, + "step": 9450 + }, + { + "epoch": 26.875, + "grad_norm": 1.898050308227539, + "learning_rate": 0.0001, + "loss": 0.0892, + "step": 9460 + }, + { + "epoch": 26.90340909090909, + "grad_norm": 1.8099193572998047, + "learning_rate": 0.0001, + "loss": 0.09, + "step": 9470 + }, + { + "epoch": 26.931818181818183, + "grad_norm": 1.7650624513626099, + "learning_rate": 0.0001, + "loss": 0.0917, + "step": 9480 + }, + { + "epoch": 26.960227272727273, + "grad_norm": 1.813428521156311, + "learning_rate": 0.0001, + "loss": 0.085, + "step": 9490 + }, + { + "epoch": 26.988636363636363, + "grad_norm": 1.67322838306427, + "learning_rate": 0.0001, + "loss": 0.0919, + "step": 9500 + }, + { + "epoch": 27.017045454545453, + "grad_norm": 1.668229103088379, + "learning_rate": 0.0001, + "loss": 0.0889, + "step": 9510 + }, + { + "epoch": 27.045454545454547, + "grad_norm": 1.6641284227371216, + "learning_rate": 0.0001, + "loss": 0.0871, + "step": 9520 + }, + { + "epoch": 27.073863636363637, + "grad_norm": 1.5563348531723022, + "learning_rate": 0.0001, + "loss": 0.0856, + "step": 9530 + }, + { + "epoch": 27.102272727272727, + "grad_norm": 1.72633957862854, + "learning_rate": 0.0001, + "loss": 0.0898, + "step": 9540 + }, + { + "epoch": 27.130681818181817, + "grad_norm": 1.5098172426223755, + "learning_rate": 0.0001, + "loss": 0.0893, + "step": 9550 + }, + { + "epoch": 27.15909090909091, + "grad_norm": 1.7383455038070679, + "learning_rate": 0.0001, + "loss": 0.0859, + "step": 9560 + }, + { + "epoch": 27.1875, + "grad_norm": 1.732844352722168, + "learning_rate": 0.0001, + "loss": 0.0954, + "step": 9570 + }, + { + "epoch": 27.21590909090909, + "grad_norm": 1.5359463691711426, + "learning_rate": 0.0001, + "loss": 0.09, + "step": 9580 + }, + { + "epoch": 27.244318181818183, + "grad_norm": 1.5415890216827393, + "learning_rate": 0.0001, + "loss": 0.0947, + "step": 9590 + }, + { + "epoch": 27.272727272727273, + "grad_norm": 1.702905297279358, + "learning_rate": 0.0001, + "loss": 0.0868, + "step": 9600 + }, + { + "epoch": 27.301136363636363, + "grad_norm": 1.5514419078826904, + "learning_rate": 0.0001, + "loss": 0.0931, + "step": 9610 + }, + { + "epoch": 27.329545454545453, + "grad_norm": 1.6951237916946411, + "learning_rate": 0.0001, + "loss": 0.0946, + "step": 9620 + }, + { + "epoch": 27.357954545454547, + "grad_norm": 1.726776123046875, + "learning_rate": 0.0001, + "loss": 0.0914, + "step": 9630 + }, + { + "epoch": 27.386363636363637, + "grad_norm": 1.5069643259048462, + "learning_rate": 0.0001, + "loss": 0.0955, + "step": 9640 + }, + { + "epoch": 27.414772727272727, + "grad_norm": 1.468774676322937, + "learning_rate": 0.0001, + "loss": 0.0895, + "step": 9650 + }, + { + "epoch": 27.443181818181817, + "grad_norm": 1.7724437713623047, + "learning_rate": 0.0001, + "loss": 0.0914, + "step": 9660 + }, + { + "epoch": 27.47159090909091, + "grad_norm": 1.3602412939071655, + "learning_rate": 0.0001, + "loss": 0.0851, + "step": 9670 + }, + { + "epoch": 27.5, + "grad_norm": 1.5905205011367798, + "learning_rate": 0.0001, + "loss": 0.0892, + "step": 9680 + }, + { + "epoch": 27.52840909090909, + "grad_norm": 1.389020562171936, + "learning_rate": 0.0001, + "loss": 0.0882, + "step": 9690 + }, + { + "epoch": 27.556818181818183, + "grad_norm": 1.7958135604858398, + "learning_rate": 0.0001, + "loss": 0.0896, + "step": 9700 + }, + { + "epoch": 27.585227272727273, + "grad_norm": 1.629370093345642, + "learning_rate": 0.0001, + "loss": 0.0945, + "step": 9710 + }, + { + "epoch": 27.613636363636363, + "grad_norm": 1.8372656106948853, + "learning_rate": 0.0001, + "loss": 0.0917, + "step": 9720 + }, + { + "epoch": 27.642045454545453, + "grad_norm": 2.0454485416412354, + "learning_rate": 0.0001, + "loss": 0.0911, + "step": 9730 + }, + { + "epoch": 27.670454545454547, + "grad_norm": 1.712260365486145, + "learning_rate": 0.0001, + "loss": 0.0974, + "step": 9740 + }, + { + "epoch": 27.698863636363637, + "grad_norm": 1.8884317874908447, + "learning_rate": 0.0001, + "loss": 0.0891, + "step": 9750 + }, + { + "epoch": 27.727272727272727, + "grad_norm": 1.5852235555648804, + "learning_rate": 0.0001, + "loss": 0.0898, + "step": 9760 + }, + { + "epoch": 27.755681818181817, + "grad_norm": 1.4623692035675049, + "learning_rate": 0.0001, + "loss": 0.0866, + "step": 9770 + }, + { + "epoch": 27.78409090909091, + "grad_norm": 1.6662063598632812, + "learning_rate": 0.0001, + "loss": 0.0875, + "step": 9780 + }, + { + "epoch": 27.8125, + "grad_norm": 1.5903691053390503, + "learning_rate": 0.0001, + "loss": 0.0898, + "step": 9790 + }, + { + "epoch": 27.84090909090909, + "grad_norm": 1.575780987739563, + "learning_rate": 0.0001, + "loss": 0.0918, + "step": 9800 + }, + { + "epoch": 27.869318181818183, + "grad_norm": 1.4641938209533691, + "learning_rate": 0.0001, + "loss": 0.0865, + "step": 9810 + }, + { + "epoch": 27.897727272727273, + "grad_norm": 1.3061418533325195, + "learning_rate": 0.0001, + "loss": 0.0837, + "step": 9820 + }, + { + "epoch": 27.926136363636363, + "grad_norm": 1.5644803047180176, + "learning_rate": 0.0001, + "loss": 0.0894, + "step": 9830 + }, + { + "epoch": 27.954545454545453, + "grad_norm": 1.9139891862869263, + "learning_rate": 0.0001, + "loss": 0.0925, + "step": 9840 + }, + { + "epoch": 27.982954545454547, + "grad_norm": 1.4878668785095215, + "learning_rate": 0.0001, + "loss": 0.0892, + "step": 9850 + }, + { + "epoch": 28.011363636363637, + "grad_norm": 1.41493821144104, + "learning_rate": 0.0001, + "loss": 0.0914, + "step": 9860 + }, + { + "epoch": 28.039772727272727, + "grad_norm": 1.5278867483139038, + "learning_rate": 0.0001, + "loss": 0.0875, + "step": 9870 + }, + { + "epoch": 28.068181818181817, + "grad_norm": 1.3877768516540527, + "learning_rate": 0.0001, + "loss": 0.0914, + "step": 9880 + }, + { + "epoch": 28.09659090909091, + "grad_norm": 1.3210440874099731, + "learning_rate": 0.0001, + "loss": 0.0854, + "step": 9890 + }, + { + "epoch": 28.125, + "grad_norm": 1.3031365871429443, + "learning_rate": 0.0001, + "loss": 0.0854, + "step": 9900 + }, + { + "epoch": 28.15340909090909, + "grad_norm": 1.334957242012024, + "learning_rate": 0.0001, + "loss": 0.0926, + "step": 9910 + }, + { + "epoch": 28.181818181818183, + "grad_norm": 1.3106921911239624, + "learning_rate": 0.0001, + "loss": 0.0938, + "step": 9920 + }, + { + "epoch": 28.210227272727273, + "grad_norm": 1.9076578617095947, + "learning_rate": 0.0001, + "loss": 0.0869, + "step": 9930 + }, + { + "epoch": 28.238636363636363, + "grad_norm": 1.8114360570907593, + "learning_rate": 0.0001, + "loss": 0.0923, + "step": 9940 + }, + { + "epoch": 28.267045454545453, + "grad_norm": 1.475202202796936, + "learning_rate": 0.0001, + "loss": 0.0942, + "step": 9950 + }, + { + "epoch": 28.295454545454547, + "grad_norm": 1.6145161390304565, + "learning_rate": 0.0001, + "loss": 0.0842, + "step": 9960 + }, + { + "epoch": 28.323863636363637, + "grad_norm": 1.5492805242538452, + "learning_rate": 0.0001, + "loss": 0.0866, + "step": 9970 + }, + { + "epoch": 28.352272727272727, + "grad_norm": 1.6605106592178345, + "learning_rate": 0.0001, + "loss": 0.0917, + "step": 9980 + }, + { + "epoch": 28.380681818181817, + "grad_norm": 1.6075584888458252, + "learning_rate": 0.0001, + "loss": 0.0888, + "step": 9990 + }, + { + "epoch": 28.40909090909091, + "grad_norm": 1.3253341913223267, + "learning_rate": 0.0001, + "loss": 0.0883, + "step": 10000 + }, + { + "epoch": 28.4375, + "grad_norm": 1.5245485305786133, + "learning_rate": 0.0001, + "loss": 0.0899, + "step": 10010 + }, + { + "epoch": 28.46590909090909, + "grad_norm": 1.7123736143112183, + "learning_rate": 0.0001, + "loss": 0.0868, + "step": 10020 + }, + { + "epoch": 28.494318181818183, + "grad_norm": 1.572593092918396, + "learning_rate": 0.0001, + "loss": 0.0865, + "step": 10030 + }, + { + "epoch": 28.522727272727273, + "grad_norm": 1.693306565284729, + "learning_rate": 0.0001, + "loss": 0.0904, + "step": 10040 + }, + { + "epoch": 28.551136363636363, + "grad_norm": 1.8397178649902344, + "learning_rate": 0.0001, + "loss": 0.0898, + "step": 10050 + }, + { + "epoch": 28.579545454545453, + "grad_norm": 1.6443665027618408, + "learning_rate": 0.0001, + "loss": 0.0911, + "step": 10060 + }, + { + "epoch": 28.607954545454547, + "grad_norm": 1.5029046535491943, + "learning_rate": 0.0001, + "loss": 0.0918, + "step": 10070 + }, + { + "epoch": 28.636363636363637, + "grad_norm": 1.4545835256576538, + "learning_rate": 0.0001, + "loss": 0.0851, + "step": 10080 + }, + { + "epoch": 28.664772727272727, + "grad_norm": 1.6282371282577515, + "learning_rate": 0.0001, + "loss": 0.0879, + "step": 10090 + }, + { + "epoch": 28.693181818181817, + "grad_norm": 1.659529209136963, + "learning_rate": 0.0001, + "loss": 0.0881, + "step": 10100 + }, + { + "epoch": 28.72159090909091, + "grad_norm": 1.797834873199463, + "learning_rate": 0.0001, + "loss": 0.0919, + "step": 10110 + }, + { + "epoch": 28.75, + "grad_norm": 1.6592271327972412, + "learning_rate": 0.0001, + "loss": 0.0833, + "step": 10120 + }, + { + "epoch": 28.77840909090909, + "grad_norm": 1.4718973636627197, + "learning_rate": 0.0001, + "loss": 0.0918, + "step": 10130 + }, + { + "epoch": 28.806818181818183, + "grad_norm": 1.2769532203674316, + "learning_rate": 0.0001, + "loss": 0.0915, + "step": 10140 + }, + { + "epoch": 28.835227272727273, + "grad_norm": 1.3063241243362427, + "learning_rate": 0.0001, + "loss": 0.0856, + "step": 10150 + }, + { + "epoch": 28.863636363636363, + "grad_norm": 1.497151494026184, + "learning_rate": 0.0001, + "loss": 0.0927, + "step": 10160 + }, + { + "epoch": 28.892045454545453, + "grad_norm": 1.538161277770996, + "learning_rate": 0.0001, + "loss": 0.0849, + "step": 10170 + }, + { + "epoch": 28.920454545454547, + "grad_norm": 1.5118201971054077, + "learning_rate": 0.0001, + "loss": 0.0891, + "step": 10180 + }, + { + "epoch": 28.948863636363637, + "grad_norm": 1.5277782678604126, + "learning_rate": 0.0001, + "loss": 0.0872, + "step": 10190 + }, + { + "epoch": 28.977272727272727, + "grad_norm": 1.4347714185714722, + "learning_rate": 0.0001, + "loss": 0.0868, + "step": 10200 + }, + { + "epoch": 29.005681818181817, + "grad_norm": 1.3337539434432983, + "learning_rate": 0.0001, + "loss": 0.0872, + "step": 10210 + }, + { + "epoch": 29.03409090909091, + "grad_norm": 1.6862537860870361, + "learning_rate": 0.0001, + "loss": 0.0841, + "step": 10220 + }, + { + "epoch": 29.0625, + "grad_norm": 1.4856092929840088, + "learning_rate": 0.0001, + "loss": 0.0844, + "step": 10230 + }, + { + "epoch": 29.09090909090909, + "grad_norm": 1.6301956176757812, + "learning_rate": 0.0001, + "loss": 0.0855, + "step": 10240 + }, + { + "epoch": 29.119318181818183, + "grad_norm": 1.4501256942749023, + "learning_rate": 0.0001, + "loss": 0.0866, + "step": 10250 + }, + { + "epoch": 29.147727272727273, + "grad_norm": 1.4976732730865479, + "learning_rate": 0.0001, + "loss": 0.0871, + "step": 10260 + }, + { + "epoch": 29.176136363636363, + "grad_norm": 1.5367155075073242, + "learning_rate": 0.0001, + "loss": 0.083, + "step": 10270 + }, + { + "epoch": 29.204545454545453, + "grad_norm": 1.4822138547897339, + "learning_rate": 0.0001, + "loss": 0.0877, + "step": 10280 + }, + { + "epoch": 29.232954545454547, + "grad_norm": 1.3127697706222534, + "learning_rate": 0.0001, + "loss": 0.0813, + "step": 10290 + }, + { + "epoch": 29.261363636363637, + "grad_norm": 1.4777271747589111, + "learning_rate": 0.0001, + "loss": 0.0886, + "step": 10300 + }, + { + "epoch": 29.289772727272727, + "grad_norm": 1.4947670698165894, + "learning_rate": 0.0001, + "loss": 0.0887, + "step": 10310 + }, + { + "epoch": 29.318181818181817, + "grad_norm": 1.7451188564300537, + "learning_rate": 0.0001, + "loss": 0.0892, + "step": 10320 + }, + { + "epoch": 29.34659090909091, + "grad_norm": 1.5838991403579712, + "learning_rate": 0.0001, + "loss": 0.0867, + "step": 10330 + }, + { + "epoch": 29.375, + "grad_norm": 1.4703574180603027, + "learning_rate": 0.0001, + "loss": 0.0879, + "step": 10340 + }, + { + "epoch": 29.40340909090909, + "grad_norm": 1.335748553276062, + "learning_rate": 0.0001, + "loss": 0.0838, + "step": 10350 + }, + { + "epoch": 29.431818181818183, + "grad_norm": 1.5957832336425781, + "learning_rate": 0.0001, + "loss": 0.0849, + "step": 10360 + }, + { + "epoch": 29.460227272727273, + "grad_norm": 1.7073551416397095, + "learning_rate": 0.0001, + "loss": 0.0866, + "step": 10370 + }, + { + "epoch": 29.488636363636363, + "grad_norm": 1.4526339769363403, + "learning_rate": 0.0001, + "loss": 0.0864, + "step": 10380 + }, + { + "epoch": 29.517045454545453, + "grad_norm": 1.439193606376648, + "learning_rate": 0.0001, + "loss": 0.0876, + "step": 10390 + }, + { + "epoch": 29.545454545454547, + "grad_norm": 1.460564136505127, + "learning_rate": 0.0001, + "loss": 0.0871, + "step": 10400 + }, + { + "epoch": 29.573863636363637, + "grad_norm": 1.3847678899765015, + "learning_rate": 0.0001, + "loss": 0.0813, + "step": 10410 + }, + { + "epoch": 29.602272727272727, + "grad_norm": 1.4355100393295288, + "learning_rate": 0.0001, + "loss": 0.084, + "step": 10420 + }, + { + "epoch": 29.630681818181817, + "grad_norm": 1.5265635251998901, + "learning_rate": 0.0001, + "loss": 0.085, + "step": 10430 + }, + { + "epoch": 29.65909090909091, + "grad_norm": 1.5522079467773438, + "learning_rate": 0.0001, + "loss": 0.0898, + "step": 10440 + }, + { + "epoch": 29.6875, + "grad_norm": 1.5721166133880615, + "learning_rate": 0.0001, + "loss": 0.0882, + "step": 10450 + }, + { + "epoch": 29.71590909090909, + "grad_norm": 1.7917033433914185, + "learning_rate": 0.0001, + "loss": 0.0908, + "step": 10460 + }, + { + "epoch": 29.744318181818183, + "grad_norm": 1.5711758136749268, + "learning_rate": 0.0001, + "loss": 0.085, + "step": 10470 + }, + { + "epoch": 29.772727272727273, + "grad_norm": 1.7206867933273315, + "learning_rate": 0.0001, + "loss": 0.0861, + "step": 10480 + }, + { + "epoch": 29.801136363636363, + "grad_norm": 2.0375149250030518, + "learning_rate": 0.0001, + "loss": 0.0908, + "step": 10490 + }, + { + "epoch": 29.829545454545453, + "grad_norm": 1.7295266389846802, + "learning_rate": 0.0001, + "loss": 0.0853, + "step": 10500 + }, + { + "epoch": 29.857954545454547, + "grad_norm": 1.5999189615249634, + "learning_rate": 0.0001, + "loss": 0.0831, + "step": 10510 + }, + { + "epoch": 29.886363636363637, + "grad_norm": 1.6992350816726685, + "learning_rate": 0.0001, + "loss": 0.0891, + "step": 10520 + }, + { + "epoch": 29.914772727272727, + "grad_norm": 1.819216012954712, + "learning_rate": 0.0001, + "loss": 0.0801, + "step": 10530 + }, + { + "epoch": 29.943181818181817, + "grad_norm": 1.4884485006332397, + "learning_rate": 0.0001, + "loss": 0.0881, + "step": 10540 + }, + { + "epoch": 29.97159090909091, + "grad_norm": 1.6694735288619995, + "learning_rate": 0.0001, + "loss": 0.0832, + "step": 10550 + }, + { + "epoch": 30.0, + "grad_norm": 1.6155649423599243, + "learning_rate": 0.0001, + "loss": 0.0838, + "step": 10560 + }, + { + "epoch": 30.02840909090909, + "grad_norm": 1.3682494163513184, + "learning_rate": 0.0001, + "loss": 0.081, + "step": 10570 + }, + { + "epoch": 30.056818181818183, + "grad_norm": 1.2710999250411987, + "learning_rate": 0.0001, + "loss": 0.0797, + "step": 10580 + }, + { + "epoch": 30.085227272727273, + "grad_norm": 1.2718095779418945, + "learning_rate": 0.0001, + "loss": 0.0838, + "step": 10590 + }, + { + "epoch": 30.113636363636363, + "grad_norm": 1.4790953397750854, + "learning_rate": 0.0001, + "loss": 0.0832, + "step": 10600 + }, + { + "epoch": 30.142045454545453, + "grad_norm": 4.220450401306152, + "learning_rate": 0.0001, + "loss": 0.0899, + "step": 10610 + }, + { + "epoch": 30.170454545454547, + "grad_norm": 3.7375528812408447, + "learning_rate": 0.0001, + "loss": 0.0921, + "step": 10620 + }, + { + "epoch": 30.198863636363637, + "grad_norm": 2.229771614074707, + "learning_rate": 0.0001, + "loss": 0.0843, + "step": 10630 + }, + { + "epoch": 30.227272727272727, + "grad_norm": 1.9127345085144043, + "learning_rate": 0.0001, + "loss": 0.0843, + "step": 10640 + }, + { + "epoch": 30.255681818181817, + "grad_norm": 1.7945572137832642, + "learning_rate": 0.0001, + "loss": 0.0855, + "step": 10650 + }, + { + "epoch": 30.28409090909091, + "grad_norm": 2.2238516807556152, + "learning_rate": 0.0001, + "loss": 0.08, + "step": 10660 + }, + { + "epoch": 30.3125, + "grad_norm": 2.0408074855804443, + "learning_rate": 0.0001, + "loss": 0.0812, + "step": 10670 + }, + { + "epoch": 30.34090909090909, + "grad_norm": 1.6794369220733643, + "learning_rate": 0.0001, + "loss": 0.0808, + "step": 10680 + }, + { + "epoch": 30.369318181818183, + "grad_norm": 1.8030424118041992, + "learning_rate": 0.0001, + "loss": 0.0819, + "step": 10690 + }, + { + "epoch": 30.397727272727273, + "grad_norm": 1.8233095407485962, + "learning_rate": 0.0001, + "loss": 0.0819, + "step": 10700 + }, + { + "epoch": 30.426136363636363, + "grad_norm": 1.6274789571762085, + "learning_rate": 0.0001, + "loss": 0.0806, + "step": 10710 + }, + { + "epoch": 30.454545454545453, + "grad_norm": 1.6231640577316284, + "learning_rate": 0.0001, + "loss": 0.0777, + "step": 10720 + }, + { + "epoch": 30.482954545454547, + "grad_norm": 1.6162152290344238, + "learning_rate": 0.0001, + "loss": 0.0864, + "step": 10730 + }, + { + "epoch": 30.511363636363637, + "grad_norm": 1.9388537406921387, + "learning_rate": 0.0001, + "loss": 0.0833, + "step": 10740 + }, + { + "epoch": 30.539772727272727, + "grad_norm": 1.3377724885940552, + "learning_rate": 0.0001, + "loss": 0.0836, + "step": 10750 + }, + { + "epoch": 30.568181818181817, + "grad_norm": 1.4415621757507324, + "learning_rate": 0.0001, + "loss": 0.0805, + "step": 10760 + }, + { + "epoch": 30.59659090909091, + "grad_norm": 1.4234329462051392, + "learning_rate": 0.0001, + "loss": 0.088, + "step": 10770 + }, + { + "epoch": 30.625, + "grad_norm": 1.5712944269180298, + "learning_rate": 0.0001, + "loss": 0.0866, + "step": 10780 + }, + { + "epoch": 30.65340909090909, + "grad_norm": 1.3370726108551025, + "learning_rate": 0.0001, + "loss": 0.086, + "step": 10790 + }, + { + "epoch": 30.681818181818183, + "grad_norm": 1.976441502571106, + "learning_rate": 0.0001, + "loss": 0.0894, + "step": 10800 + }, + { + "epoch": 30.710227272727273, + "grad_norm": 1.6814191341400146, + "learning_rate": 0.0001, + "loss": 0.083, + "step": 10810 + }, + { + "epoch": 30.738636363636363, + "grad_norm": 1.406453013420105, + "learning_rate": 0.0001, + "loss": 0.087, + "step": 10820 + }, + { + "epoch": 30.767045454545453, + "grad_norm": 1.406148910522461, + "learning_rate": 0.0001, + "loss": 0.0879, + "step": 10830 + }, + { + "epoch": 30.795454545454547, + "grad_norm": 1.2990154027938843, + "learning_rate": 0.0001, + "loss": 0.0826, + "step": 10840 + }, + { + "epoch": 30.823863636363637, + "grad_norm": 1.3759732246398926, + "learning_rate": 0.0001, + "loss": 0.0826, + "step": 10850 + }, + { + "epoch": 30.852272727272727, + "grad_norm": 1.6295632123947144, + "learning_rate": 0.0001, + "loss": 0.086, + "step": 10860 + }, + { + "epoch": 30.880681818181817, + "grad_norm": 1.4913337230682373, + "learning_rate": 0.0001, + "loss": 0.0879, + "step": 10870 + }, + { + "epoch": 30.90909090909091, + "grad_norm": 1.6488226652145386, + "learning_rate": 0.0001, + "loss": 0.0865, + "step": 10880 + }, + { + "epoch": 30.9375, + "grad_norm": 1.4675461053848267, + "learning_rate": 0.0001, + "loss": 0.0819, + "step": 10890 + }, + { + "epoch": 30.96590909090909, + "grad_norm": 1.4237877130508423, + "learning_rate": 0.0001, + "loss": 0.0878, + "step": 10900 + }, + { + "epoch": 30.994318181818183, + "grad_norm": 1.673284888267517, + "learning_rate": 0.0001, + "loss": 0.0876, + "step": 10910 + }, + { + "epoch": 31.022727272727273, + "grad_norm": 1.6682919263839722, + "learning_rate": 0.0001, + "loss": 0.0815, + "step": 10920 + }, + { + "epoch": 31.051136363636363, + "grad_norm": 1.567307949066162, + "learning_rate": 0.0001, + "loss": 0.0914, + "step": 10930 + }, + { + "epoch": 31.079545454545453, + "grad_norm": 1.646187424659729, + "learning_rate": 0.0001, + "loss": 0.0891, + "step": 10940 + }, + { + "epoch": 31.107954545454547, + "grad_norm": 1.3649544715881348, + "learning_rate": 0.0001, + "loss": 0.0822, + "step": 10950 + }, + { + "epoch": 31.136363636363637, + "grad_norm": 1.5282686948776245, + "learning_rate": 0.0001, + "loss": 0.0854, + "step": 10960 + }, + { + "epoch": 31.164772727272727, + "grad_norm": 1.5806162357330322, + "learning_rate": 0.0001, + "loss": 0.0835, + "step": 10970 + }, + { + "epoch": 31.193181818181817, + "grad_norm": 1.44295334815979, + "learning_rate": 0.0001, + "loss": 0.0872, + "step": 10980 + }, + { + "epoch": 31.22159090909091, + "grad_norm": 1.354772925376892, + "learning_rate": 0.0001, + "loss": 0.0843, + "step": 10990 + }, + { + "epoch": 31.25, + "grad_norm": 1.0891101360321045, + "learning_rate": 0.0001, + "loss": 0.0873, + "step": 11000 + }, + { + "epoch": 31.27840909090909, + "grad_norm": 1.6048698425292969, + "learning_rate": 0.0001, + "loss": 0.0872, + "step": 11010 + }, + { + "epoch": 31.306818181818183, + "grad_norm": 1.6361032724380493, + "learning_rate": 0.0001, + "loss": 0.084, + "step": 11020 + }, + { + "epoch": 31.335227272727273, + "grad_norm": 1.4586684703826904, + "learning_rate": 0.0001, + "loss": 0.0855, + "step": 11030 + }, + { + "epoch": 31.363636363636363, + "grad_norm": 1.440597653388977, + "learning_rate": 0.0001, + "loss": 0.0847, + "step": 11040 + }, + { + "epoch": 31.392045454545453, + "grad_norm": 1.579797387123108, + "learning_rate": 0.0001, + "loss": 0.085, + "step": 11050 + }, + { + "epoch": 31.420454545454547, + "grad_norm": 1.5352915525436401, + "learning_rate": 0.0001, + "loss": 0.0847, + "step": 11060 + }, + { + "epoch": 31.448863636363637, + "grad_norm": 1.5840104818344116, + "learning_rate": 0.0001, + "loss": 0.0853, + "step": 11070 + }, + { + "epoch": 31.477272727272727, + "grad_norm": 1.4220658540725708, + "learning_rate": 0.0001, + "loss": 0.0834, + "step": 11080 + }, + { + "epoch": 31.505681818181817, + "grad_norm": 1.3997197151184082, + "learning_rate": 0.0001, + "loss": 0.0874, + "step": 11090 + }, + { + "epoch": 31.53409090909091, + "grad_norm": 1.4210138320922852, + "learning_rate": 0.0001, + "loss": 0.0884, + "step": 11100 + }, + { + "epoch": 31.5625, + "grad_norm": 1.6847612857818604, + "learning_rate": 0.0001, + "loss": 0.0881, + "step": 11110 + }, + { + "epoch": 31.59090909090909, + "grad_norm": 1.4961141347885132, + "learning_rate": 0.0001, + "loss": 0.0891, + "step": 11120 + }, + { + "epoch": 31.619318181818183, + "grad_norm": 1.3087717294692993, + "learning_rate": 0.0001, + "loss": 0.0857, + "step": 11130 + }, + { + "epoch": 31.647727272727273, + "grad_norm": 1.3386777639389038, + "learning_rate": 0.0001, + "loss": 0.0828, + "step": 11140 + }, + { + "epoch": 31.676136363636363, + "grad_norm": 1.3143402338027954, + "learning_rate": 0.0001, + "loss": 0.0859, + "step": 11150 + }, + { + "epoch": 31.704545454545453, + "grad_norm": 2.002627372741699, + "learning_rate": 0.0001, + "loss": 0.0895, + "step": 11160 + }, + { + "epoch": 31.732954545454547, + "grad_norm": 1.8589037656784058, + "learning_rate": 0.0001, + "loss": 0.0922, + "step": 11170 + }, + { + "epoch": 31.761363636363637, + "grad_norm": 1.910962700843811, + "learning_rate": 0.0001, + "loss": 0.0927, + "step": 11180 + }, + { + "epoch": 31.789772727272727, + "grad_norm": 1.631377100944519, + "learning_rate": 0.0001, + "loss": 0.091, + "step": 11190 + }, + { + "epoch": 31.818181818181817, + "grad_norm": 1.6555157899856567, + "learning_rate": 0.0001, + "loss": 0.0869, + "step": 11200 + }, + { + "epoch": 31.84659090909091, + "grad_norm": 1.6346405744552612, + "learning_rate": 0.0001, + "loss": 0.0883, + "step": 11210 + }, + { + "epoch": 31.875, + "grad_norm": 1.2686203718185425, + "learning_rate": 0.0001, + "loss": 0.0856, + "step": 11220 + }, + { + "epoch": 31.90340909090909, + "grad_norm": 1.062166690826416, + "learning_rate": 0.0001, + "loss": 0.0823, + "step": 11230 + }, + { + "epoch": 31.931818181818183, + "grad_norm": 1.0907399654388428, + "learning_rate": 0.0001, + "loss": 0.0825, + "step": 11240 + }, + { + "epoch": 31.960227272727273, + "grad_norm": 1.4261188507080078, + "learning_rate": 0.0001, + "loss": 0.0857, + "step": 11250 + }, + { + "epoch": 31.988636363636363, + "grad_norm": 1.3329896926879883, + "learning_rate": 0.0001, + "loss": 0.083, + "step": 11260 + }, + { + "epoch": 32.01704545454545, + "grad_norm": 1.3007625341415405, + "learning_rate": 0.0001, + "loss": 0.0843, + "step": 11270 + }, + { + "epoch": 32.04545454545455, + "grad_norm": 1.3520420789718628, + "learning_rate": 0.0001, + "loss": 0.0834, + "step": 11280 + }, + { + "epoch": 32.07386363636363, + "grad_norm": 1.2438174486160278, + "learning_rate": 0.0001, + "loss": 0.0852, + "step": 11290 + }, + { + "epoch": 32.10227272727273, + "grad_norm": 1.422757863998413, + "learning_rate": 0.0001, + "loss": 0.0834, + "step": 11300 + }, + { + "epoch": 32.13068181818182, + "grad_norm": 1.2228397130966187, + "learning_rate": 0.0001, + "loss": 0.0852, + "step": 11310 + }, + { + "epoch": 32.15909090909091, + "grad_norm": 1.2606850862503052, + "learning_rate": 0.0001, + "loss": 0.0853, + "step": 11320 + }, + { + "epoch": 32.1875, + "grad_norm": 1.2866079807281494, + "learning_rate": 0.0001, + "loss": 0.0834, + "step": 11330 + }, + { + "epoch": 32.21590909090909, + "grad_norm": 1.2392500638961792, + "learning_rate": 0.0001, + "loss": 0.0808, + "step": 11340 + }, + { + "epoch": 32.24431818181818, + "grad_norm": 1.3232767581939697, + "learning_rate": 0.0001, + "loss": 0.0842, + "step": 11350 + }, + { + "epoch": 32.27272727272727, + "grad_norm": 1.2873938083648682, + "learning_rate": 0.0001, + "loss": 0.086, + "step": 11360 + }, + { + "epoch": 32.30113636363637, + "grad_norm": 1.2557556629180908, + "learning_rate": 0.0001, + "loss": 0.0828, + "step": 11370 + }, + { + "epoch": 32.32954545454545, + "grad_norm": 1.2549748420715332, + "learning_rate": 0.0001, + "loss": 0.0877, + "step": 11380 + }, + { + "epoch": 32.35795454545455, + "grad_norm": 1.199981689453125, + "learning_rate": 0.0001, + "loss": 0.0876, + "step": 11390 + }, + { + "epoch": 32.38636363636363, + "grad_norm": 1.204467535018921, + "learning_rate": 0.0001, + "loss": 0.0804, + "step": 11400 + }, + { + "epoch": 32.41477272727273, + "grad_norm": 1.5204373598098755, + "learning_rate": 0.0001, + "loss": 0.087, + "step": 11410 + }, + { + "epoch": 32.44318181818182, + "grad_norm": 1.773655652999878, + "learning_rate": 0.0001, + "loss": 0.0824, + "step": 11420 + }, + { + "epoch": 32.47159090909091, + "grad_norm": 2.06709885597229, + "learning_rate": 0.0001, + "loss": 0.0848, + "step": 11430 + }, + { + "epoch": 32.5, + "grad_norm": 1.8769580125808716, + "learning_rate": 0.0001, + "loss": 0.0848, + "step": 11440 + }, + { + "epoch": 32.52840909090909, + "grad_norm": 1.8511193990707397, + "learning_rate": 0.0001, + "loss": 0.0834, + "step": 11450 + }, + { + "epoch": 32.55681818181818, + "grad_norm": 1.5107983350753784, + "learning_rate": 0.0001, + "loss": 0.0855, + "step": 11460 + }, + { + "epoch": 32.58522727272727, + "grad_norm": 1.7288358211517334, + "learning_rate": 0.0001, + "loss": 0.0806, + "step": 11470 + }, + { + "epoch": 32.61363636363637, + "grad_norm": 1.7605435848236084, + "learning_rate": 0.0001, + "loss": 0.0795, + "step": 11480 + }, + { + "epoch": 32.64204545454545, + "grad_norm": 1.3793346881866455, + "learning_rate": 0.0001, + "loss": 0.0768, + "step": 11490 + }, + { + "epoch": 32.67045454545455, + "grad_norm": 1.468401551246643, + "learning_rate": 0.0001, + "loss": 0.0773, + "step": 11500 + }, + { + "epoch": 32.69886363636363, + "grad_norm": 1.4680513143539429, + "learning_rate": 0.0001, + "loss": 0.0832, + "step": 11510 + }, + { + "epoch": 32.72727272727273, + "grad_norm": 1.3982374668121338, + "learning_rate": 0.0001, + "loss": 0.081, + "step": 11520 + }, + { + "epoch": 32.75568181818182, + "grad_norm": 1.5383310317993164, + "learning_rate": 0.0001, + "loss": 0.0809, + "step": 11530 + }, + { + "epoch": 32.78409090909091, + "grad_norm": 1.2121058702468872, + "learning_rate": 0.0001, + "loss": 0.078, + "step": 11540 + }, + { + "epoch": 32.8125, + "grad_norm": 1.4773236513137817, + "learning_rate": 0.0001, + "loss": 0.0792, + "step": 11550 + }, + { + "epoch": 32.84090909090909, + "grad_norm": 1.153468370437622, + "learning_rate": 0.0001, + "loss": 0.0786, + "step": 11560 + }, + { + "epoch": 32.86931818181818, + "grad_norm": 1.1868444681167603, + "learning_rate": 0.0001, + "loss": 0.0839, + "step": 11570 + }, + { + "epoch": 32.89772727272727, + "grad_norm": 1.2777554988861084, + "learning_rate": 0.0001, + "loss": 0.0803, + "step": 11580 + }, + { + "epoch": 32.92613636363637, + "grad_norm": 1.643078088760376, + "learning_rate": 0.0001, + "loss": 0.0798, + "step": 11590 + }, + { + "epoch": 32.95454545454545, + "grad_norm": 1.6660960912704468, + "learning_rate": 0.0001, + "loss": 0.0804, + "step": 11600 + }, + { + "epoch": 32.98295454545455, + "grad_norm": 1.773538589477539, + "learning_rate": 0.0001, + "loss": 0.0779, + "step": 11610 + }, + { + "epoch": 33.01136363636363, + "grad_norm": 1.5937694311141968, + "learning_rate": 0.0001, + "loss": 0.0801, + "step": 11620 + }, + { + "epoch": 33.03977272727273, + "grad_norm": 1.380580186843872, + "learning_rate": 0.0001, + "loss": 0.0763, + "step": 11630 + }, + { + "epoch": 33.06818181818182, + "grad_norm": 1.3506697416305542, + "learning_rate": 0.0001, + "loss": 0.0765, + "step": 11640 + }, + { + "epoch": 33.09659090909091, + "grad_norm": 1.3326584100723267, + "learning_rate": 0.0001, + "loss": 0.0845, + "step": 11650 + }, + { + "epoch": 33.125, + "grad_norm": 1.3809016942977905, + "learning_rate": 0.0001, + "loss": 0.0786, + "step": 11660 + }, + { + "epoch": 33.15340909090909, + "grad_norm": 1.5046674013137817, + "learning_rate": 0.0001, + "loss": 0.0781, + "step": 11670 + }, + { + "epoch": 33.18181818181818, + "grad_norm": 1.373900294303894, + "learning_rate": 0.0001, + "loss": 0.0794, + "step": 11680 + }, + { + "epoch": 33.21022727272727, + "grad_norm": 1.6719146966934204, + "learning_rate": 0.0001, + "loss": 0.0828, + "step": 11690 + }, + { + "epoch": 33.23863636363637, + "grad_norm": 1.2766826152801514, + "learning_rate": 0.0001, + "loss": 0.0841, + "step": 11700 + }, + { + "epoch": 33.26704545454545, + "grad_norm": 1.2881532907485962, + "learning_rate": 0.0001, + "loss": 0.0781, + "step": 11710 + }, + { + "epoch": 33.29545454545455, + "grad_norm": 1.5469038486480713, + "learning_rate": 0.0001, + "loss": 0.0814, + "step": 11720 + }, + { + "epoch": 33.32386363636363, + "grad_norm": 1.404578685760498, + "learning_rate": 0.0001, + "loss": 0.0818, + "step": 11730 + }, + { + "epoch": 33.35227272727273, + "grad_norm": 1.504791021347046, + "learning_rate": 0.0001, + "loss": 0.0773, + "step": 11740 + }, + { + "epoch": 33.38068181818182, + "grad_norm": 1.8719531297683716, + "learning_rate": 0.0001, + "loss": 0.0797, + "step": 11750 + }, + { + "epoch": 33.40909090909091, + "grad_norm": 1.418943166732788, + "learning_rate": 0.0001, + "loss": 0.0768, + "step": 11760 + }, + { + "epoch": 33.4375, + "grad_norm": 1.6536691188812256, + "learning_rate": 0.0001, + "loss": 0.0772, + "step": 11770 + }, + { + "epoch": 33.46590909090909, + "grad_norm": 1.4639066457748413, + "learning_rate": 0.0001, + "loss": 0.076, + "step": 11780 + }, + { + "epoch": 33.49431818181818, + "grad_norm": 1.4900221824645996, + "learning_rate": 0.0001, + "loss": 0.0816, + "step": 11790 + }, + { + "epoch": 33.52272727272727, + "grad_norm": 1.6789690256118774, + "learning_rate": 0.0001, + "loss": 0.0807, + "step": 11800 + }, + { + "epoch": 33.55113636363637, + "grad_norm": 1.3068170547485352, + "learning_rate": 0.0001, + "loss": 0.083, + "step": 11810 + }, + { + "epoch": 33.57954545454545, + "grad_norm": 1.1812000274658203, + "learning_rate": 0.0001, + "loss": 0.0844, + "step": 11820 + }, + { + "epoch": 33.60795454545455, + "grad_norm": 1.303970456123352, + "learning_rate": 0.0001, + "loss": 0.077, + "step": 11830 + }, + { + "epoch": 33.63636363636363, + "grad_norm": 1.3309065103530884, + "learning_rate": 0.0001, + "loss": 0.08, + "step": 11840 + }, + { + "epoch": 33.66477272727273, + "grad_norm": 1.4080289602279663, + "learning_rate": 0.0001, + "loss": 0.0801, + "step": 11850 + }, + { + "epoch": 33.69318181818182, + "grad_norm": 1.5647964477539062, + "learning_rate": 0.0001, + "loss": 0.0822, + "step": 11860 + }, + { + "epoch": 33.72159090909091, + "grad_norm": 1.4176783561706543, + "learning_rate": 0.0001, + "loss": 0.0827, + "step": 11870 + }, + { + "epoch": 33.75, + "grad_norm": 1.288572072982788, + "learning_rate": 0.0001, + "loss": 0.079, + "step": 11880 + }, + { + "epoch": 33.77840909090909, + "grad_norm": 1.22144615650177, + "learning_rate": 0.0001, + "loss": 0.0835, + "step": 11890 + }, + { + "epoch": 33.80681818181818, + "grad_norm": 1.6118239164352417, + "learning_rate": 0.0001, + "loss": 0.0823, + "step": 11900 + }, + { + "epoch": 33.83522727272727, + "grad_norm": 1.3921666145324707, + "learning_rate": 0.0001, + "loss": 0.0801, + "step": 11910 + }, + { + "epoch": 33.86363636363637, + "grad_norm": 1.2439217567443848, + "learning_rate": 0.0001, + "loss": 0.081, + "step": 11920 + }, + { + "epoch": 33.89204545454545, + "grad_norm": 1.4157015085220337, + "learning_rate": 0.0001, + "loss": 0.0792, + "step": 11930 + }, + { + "epoch": 33.92045454545455, + "grad_norm": 1.2521430253982544, + "learning_rate": 0.0001, + "loss": 0.0815, + "step": 11940 + }, + { + "epoch": 33.94886363636363, + "grad_norm": 1.3754600286483765, + "learning_rate": 0.0001, + "loss": 0.0816, + "step": 11950 + }, + { + "epoch": 33.97727272727273, + "grad_norm": 1.2032493352890015, + "learning_rate": 0.0001, + "loss": 0.0788, + "step": 11960 + }, + { + "epoch": 34.00568181818182, + "grad_norm": 1.2101504802703857, + "learning_rate": 0.0001, + "loss": 0.0782, + "step": 11970 + }, + { + "epoch": 34.03409090909091, + "grad_norm": 1.4837169647216797, + "learning_rate": 0.0001, + "loss": 0.0785, + "step": 11980 + }, + { + "epoch": 34.0625, + "grad_norm": 1.3385684490203857, + "learning_rate": 0.0001, + "loss": 0.079, + "step": 11990 + }, + { + "epoch": 34.09090909090909, + "grad_norm": 1.264683723449707, + "learning_rate": 0.0001, + "loss": 0.0773, + "step": 12000 + }, + { + "epoch": 34.11931818181818, + "grad_norm": 1.3990920782089233, + "learning_rate": 0.0001, + "loss": 0.076, + "step": 12010 + }, + { + "epoch": 34.14772727272727, + "grad_norm": 1.4375253915786743, + "learning_rate": 0.0001, + "loss": 0.0832, + "step": 12020 + }, + { + "epoch": 34.17613636363637, + "grad_norm": 1.555679202079773, + "learning_rate": 0.0001, + "loss": 0.0784, + "step": 12030 + }, + { + "epoch": 34.20454545454545, + "grad_norm": 1.1317201852798462, + "learning_rate": 0.0001, + "loss": 0.0825, + "step": 12040 + }, + { + "epoch": 34.23295454545455, + "grad_norm": 1.1169489622116089, + "learning_rate": 0.0001, + "loss": 0.0768, + "step": 12050 + }, + { + "epoch": 34.26136363636363, + "grad_norm": 1.3788570165634155, + "learning_rate": 0.0001, + "loss": 0.0773, + "step": 12060 + }, + { + "epoch": 34.28977272727273, + "grad_norm": 1.4683411121368408, + "learning_rate": 0.0001, + "loss": 0.0808, + "step": 12070 + }, + { + "epoch": 34.31818181818182, + "grad_norm": 1.2734488248825073, + "learning_rate": 0.0001, + "loss": 0.0799, + "step": 12080 + }, + { + "epoch": 34.34659090909091, + "grad_norm": 1.175338625907898, + "learning_rate": 0.0001, + "loss": 0.086, + "step": 12090 + }, + { + "epoch": 34.375, + "grad_norm": 1.2136415243148804, + "learning_rate": 0.0001, + "loss": 0.0827, + "step": 12100 + }, + { + "epoch": 34.40340909090909, + "grad_norm": 1.1225967407226562, + "learning_rate": 0.0001, + "loss": 0.0801, + "step": 12110 + }, + { + "epoch": 34.43181818181818, + "grad_norm": 1.2012512683868408, + "learning_rate": 0.0001, + "loss": 0.0796, + "step": 12120 + }, + { + "epoch": 34.46022727272727, + "grad_norm": 1.3884018659591675, + "learning_rate": 0.0001, + "loss": 0.0818, + "step": 12130 + }, + { + "epoch": 34.48863636363637, + "grad_norm": 1.3851127624511719, + "learning_rate": 0.0001, + "loss": 0.0788, + "step": 12140 + }, + { + "epoch": 34.51704545454545, + "grad_norm": 1.442125678062439, + "learning_rate": 0.0001, + "loss": 0.078, + "step": 12150 + }, + { + "epoch": 34.54545454545455, + "grad_norm": 1.3751837015151978, + "learning_rate": 0.0001, + "loss": 0.0795, + "step": 12160 + }, + { + "epoch": 34.57386363636363, + "grad_norm": 1.5753248929977417, + "learning_rate": 0.0001, + "loss": 0.078, + "step": 12170 + }, + { + "epoch": 34.60227272727273, + "grad_norm": 1.5038411617279053, + "learning_rate": 0.0001, + "loss": 0.0804, + "step": 12180 + }, + { + "epoch": 34.63068181818182, + "grad_norm": 1.1918764114379883, + "learning_rate": 0.0001, + "loss": 0.0789, + "step": 12190 + }, + { + "epoch": 34.65909090909091, + "grad_norm": 1.28202223777771, + "learning_rate": 0.0001, + "loss": 0.0805, + "step": 12200 + }, + { + "epoch": 34.6875, + "grad_norm": 1.1731418371200562, + "learning_rate": 0.0001, + "loss": 0.0844, + "step": 12210 + }, + { + "epoch": 34.71590909090909, + "grad_norm": 1.4400376081466675, + "learning_rate": 0.0001, + "loss": 0.0793, + "step": 12220 + }, + { + "epoch": 34.74431818181818, + "grad_norm": 1.279740810394287, + "learning_rate": 0.0001, + "loss": 0.0837, + "step": 12230 + }, + { + "epoch": 34.77272727272727, + "grad_norm": 1.084643840789795, + "learning_rate": 0.0001, + "loss": 0.0785, + "step": 12240 + }, + { + "epoch": 34.80113636363637, + "grad_norm": 0.9900233745574951, + "learning_rate": 0.0001, + "loss": 0.0761, + "step": 12250 + }, + { + "epoch": 34.82954545454545, + "grad_norm": 1.505629539489746, + "learning_rate": 0.0001, + "loss": 0.0811, + "step": 12260 + }, + { + "epoch": 34.85795454545455, + "grad_norm": 1.2708748579025269, + "learning_rate": 0.0001, + "loss": 0.0805, + "step": 12270 + }, + { + "epoch": 34.88636363636363, + "grad_norm": 1.3224505186080933, + "learning_rate": 0.0001, + "loss": 0.0797, + "step": 12280 + }, + { + "epoch": 34.91477272727273, + "grad_norm": 1.1570143699645996, + "learning_rate": 0.0001, + "loss": 0.08, + "step": 12290 + }, + { + "epoch": 34.94318181818182, + "grad_norm": 1.260339379310608, + "learning_rate": 0.0001, + "loss": 0.0788, + "step": 12300 + }, + { + "epoch": 34.97159090909091, + "grad_norm": 1.1624943017959595, + "learning_rate": 0.0001, + "loss": 0.0794, + "step": 12310 + }, + { + "epoch": 35.0, + "grad_norm": 1.2895053625106812, + "learning_rate": 0.0001, + "loss": 0.0772, + "step": 12320 + }, + { + "epoch": 35.02840909090909, + "grad_norm": 1.2539187669754028, + "learning_rate": 0.0001, + "loss": 0.0751, + "step": 12330 + }, + { + "epoch": 35.05681818181818, + "grad_norm": 1.303320288658142, + "learning_rate": 0.0001, + "loss": 0.0853, + "step": 12340 + }, + { + "epoch": 35.08522727272727, + "grad_norm": 1.1098580360412598, + "learning_rate": 0.0001, + "loss": 0.082, + "step": 12350 + }, + { + "epoch": 35.11363636363637, + "grad_norm": 1.5412824153900146, + "learning_rate": 0.0001, + "loss": 0.0832, + "step": 12360 + }, + { + "epoch": 35.14204545454545, + "grad_norm": 1.3514188528060913, + "learning_rate": 0.0001, + "loss": 0.0761, + "step": 12370 + }, + { + "epoch": 35.17045454545455, + "grad_norm": 1.380109190940857, + "learning_rate": 0.0001, + "loss": 0.0747, + "step": 12380 + }, + { + "epoch": 35.19886363636363, + "grad_norm": 1.1679573059082031, + "learning_rate": 0.0001, + "loss": 0.0797, + "step": 12390 + }, + { + "epoch": 35.22727272727273, + "grad_norm": 1.2729599475860596, + "learning_rate": 0.0001, + "loss": 0.0795, + "step": 12400 + }, + { + "epoch": 35.25568181818182, + "grad_norm": 1.273162841796875, + "learning_rate": 0.0001, + "loss": 0.0801, + "step": 12410 + }, + { + "epoch": 35.28409090909091, + "grad_norm": 1.4882563352584839, + "learning_rate": 0.0001, + "loss": 0.0781, + "step": 12420 + }, + { + "epoch": 35.3125, + "grad_norm": 1.2322689294815063, + "learning_rate": 0.0001, + "loss": 0.082, + "step": 12430 + }, + { + "epoch": 35.34090909090909, + "grad_norm": 1.324062705039978, + "learning_rate": 0.0001, + "loss": 0.0762, + "step": 12440 + }, + { + "epoch": 35.36931818181818, + "grad_norm": 1.236574411392212, + "learning_rate": 0.0001, + "loss": 0.0766, + "step": 12450 + }, + { + "epoch": 35.39772727272727, + "grad_norm": 1.17245614528656, + "learning_rate": 0.0001, + "loss": 0.0785, + "step": 12460 + }, + { + "epoch": 35.42613636363637, + "grad_norm": 1.337213397026062, + "learning_rate": 0.0001, + "loss": 0.0811, + "step": 12470 + }, + { + "epoch": 35.45454545454545, + "grad_norm": 1.299586534500122, + "learning_rate": 0.0001, + "loss": 0.0821, + "step": 12480 + }, + { + "epoch": 35.48295454545455, + "grad_norm": 1.2936147451400757, + "learning_rate": 0.0001, + "loss": 0.0799, + "step": 12490 + }, + { + "epoch": 35.51136363636363, + "grad_norm": 1.203636646270752, + "learning_rate": 0.0001, + "loss": 0.0826, + "step": 12500 + }, + { + "epoch": 35.53977272727273, + "grad_norm": 1.260736346244812, + "learning_rate": 0.0001, + "loss": 0.0754, + "step": 12510 + }, + { + "epoch": 35.56818181818182, + "grad_norm": 1.2215540409088135, + "learning_rate": 0.0001, + "loss": 0.0806, + "step": 12520 + }, + { + "epoch": 35.59659090909091, + "grad_norm": 1.176795244216919, + "learning_rate": 0.0001, + "loss": 0.0741, + "step": 12530 + }, + { + "epoch": 35.625, + "grad_norm": 1.3341177701950073, + "learning_rate": 0.0001, + "loss": 0.0804, + "step": 12540 + }, + { + "epoch": 35.65340909090909, + "grad_norm": 1.2871227264404297, + "learning_rate": 0.0001, + "loss": 0.0839, + "step": 12550 + }, + { + "epoch": 35.68181818181818, + "grad_norm": 1.2276510000228882, + "learning_rate": 0.0001, + "loss": 0.0824, + "step": 12560 + }, + { + "epoch": 35.71022727272727, + "grad_norm": 1.1885565519332886, + "learning_rate": 0.0001, + "loss": 0.0794, + "step": 12570 + }, + { + "epoch": 35.73863636363637, + "grad_norm": 1.3643691539764404, + "learning_rate": 0.0001, + "loss": 0.0829, + "step": 12580 + }, + { + "epoch": 35.76704545454545, + "grad_norm": 1.1791919469833374, + "learning_rate": 0.0001, + "loss": 0.0798, + "step": 12590 + }, + { + "epoch": 35.79545454545455, + "grad_norm": 1.14838707447052, + "learning_rate": 0.0001, + "loss": 0.077, + "step": 12600 + }, + { + "epoch": 35.82386363636363, + "grad_norm": 1.418837308883667, + "learning_rate": 0.0001, + "loss": 0.0773, + "step": 12610 + }, + { + "epoch": 35.85227272727273, + "grad_norm": 1.3161529302597046, + "learning_rate": 0.0001, + "loss": 0.0827, + "step": 12620 + }, + { + "epoch": 35.88068181818182, + "grad_norm": 1.7161197662353516, + "learning_rate": 0.0001, + "loss": 0.0823, + "step": 12630 + }, + { + "epoch": 35.90909090909091, + "grad_norm": 1.4447532892227173, + "learning_rate": 0.0001, + "loss": 0.0795, + "step": 12640 + }, + { + "epoch": 35.9375, + "grad_norm": 1.743769884109497, + "learning_rate": 0.0001, + "loss": 0.0742, + "step": 12650 + }, + { + "epoch": 35.96590909090909, + "grad_norm": 1.4263979196548462, + "learning_rate": 0.0001, + "loss": 0.0773, + "step": 12660 + }, + { + "epoch": 35.99431818181818, + "grad_norm": 1.431891679763794, + "learning_rate": 0.0001, + "loss": 0.0765, + "step": 12670 + }, + { + "epoch": 36.02272727272727, + "grad_norm": 1.2072255611419678, + "learning_rate": 0.0001, + "loss": 0.0764, + "step": 12680 + }, + { + "epoch": 36.05113636363637, + "grad_norm": 1.2662785053253174, + "learning_rate": 0.0001, + "loss": 0.0763, + "step": 12690 + }, + { + "epoch": 36.07954545454545, + "grad_norm": 1.184961199760437, + "learning_rate": 0.0001, + "loss": 0.0696, + "step": 12700 + }, + { + "epoch": 36.10795454545455, + "grad_norm": 1.1873055696487427, + "learning_rate": 0.0001, + "loss": 0.0758, + "step": 12710 + }, + { + "epoch": 36.13636363636363, + "grad_norm": 1.1388927698135376, + "learning_rate": 0.0001, + "loss": 0.0731, + "step": 12720 + }, + { + "epoch": 36.16477272727273, + "grad_norm": 1.415749430656433, + "learning_rate": 0.0001, + "loss": 0.0774, + "step": 12730 + }, + { + "epoch": 36.19318181818182, + "grad_norm": 1.6537916660308838, + "learning_rate": 0.0001, + "loss": 0.0806, + "step": 12740 + }, + { + "epoch": 36.22159090909091, + "grad_norm": 1.479551076889038, + "learning_rate": 0.0001, + "loss": 0.0787, + "step": 12750 + }, + { + "epoch": 36.25, + "grad_norm": 1.4956623315811157, + "learning_rate": 0.0001, + "loss": 0.0746, + "step": 12760 + }, + { + "epoch": 36.27840909090909, + "grad_norm": 1.4210376739501953, + "learning_rate": 0.0001, + "loss": 0.0791, + "step": 12770 + }, + { + "epoch": 36.30681818181818, + "grad_norm": 1.4137691259384155, + "learning_rate": 0.0001, + "loss": 0.0746, + "step": 12780 + }, + { + "epoch": 36.33522727272727, + "grad_norm": 1.5666520595550537, + "learning_rate": 0.0001, + "loss": 0.0795, + "step": 12790 + }, + { + "epoch": 36.36363636363637, + "grad_norm": 2.1862380504608154, + "learning_rate": 0.0001, + "loss": 0.0799, + "step": 12800 + }, + { + "epoch": 36.39204545454545, + "grad_norm": 2.0382354259490967, + "learning_rate": 0.0001, + "loss": 0.076, + "step": 12810 + }, + { + "epoch": 36.42045454545455, + "grad_norm": 1.9271392822265625, + "learning_rate": 0.0001, + "loss": 0.0719, + "step": 12820 + }, + { + "epoch": 36.44886363636363, + "grad_norm": 1.600040078163147, + "learning_rate": 0.0001, + "loss": 0.0778, + "step": 12830 + }, + { + "epoch": 36.47727272727273, + "grad_norm": 1.6347086429595947, + "learning_rate": 0.0001, + "loss": 0.071, + "step": 12840 + }, + { + "epoch": 36.50568181818182, + "grad_norm": 1.4493471384048462, + "learning_rate": 0.0001, + "loss": 0.0753, + "step": 12850 + }, + { + "epoch": 36.53409090909091, + "grad_norm": 1.5854036808013916, + "learning_rate": 0.0001, + "loss": 0.0737, + "step": 12860 + }, + { + "epoch": 36.5625, + "grad_norm": 1.3838077783584595, + "learning_rate": 0.0001, + "loss": 0.0744, + "step": 12870 + }, + { + "epoch": 36.59090909090909, + "grad_norm": 1.5723717212677002, + "learning_rate": 0.0001, + "loss": 0.0716, + "step": 12880 + }, + { + "epoch": 36.61931818181818, + "grad_norm": 1.6842889785766602, + "learning_rate": 0.0001, + "loss": 0.0732, + "step": 12890 + }, + { + "epoch": 36.64772727272727, + "grad_norm": 1.209652066230774, + "learning_rate": 0.0001, + "loss": 0.0741, + "step": 12900 + }, + { + "epoch": 36.67613636363637, + "grad_norm": 1.4279730319976807, + "learning_rate": 0.0001, + "loss": 0.0762, + "step": 12910 + }, + { + "epoch": 36.70454545454545, + "grad_norm": 1.6031663417816162, + "learning_rate": 0.0001, + "loss": 0.0701, + "step": 12920 + }, + { + "epoch": 36.73295454545455, + "grad_norm": 1.3902431726455688, + "learning_rate": 0.0001, + "loss": 0.0785, + "step": 12930 + }, + { + "epoch": 36.76136363636363, + "grad_norm": 1.3266302347183228, + "learning_rate": 0.0001, + "loss": 0.0758, + "step": 12940 + }, + { + "epoch": 36.78977272727273, + "grad_norm": 1.4028958082199097, + "learning_rate": 0.0001, + "loss": 0.0741, + "step": 12950 + }, + { + "epoch": 36.81818181818182, + "grad_norm": 1.494147539138794, + "learning_rate": 0.0001, + "loss": 0.0716, + "step": 12960 + }, + { + "epoch": 36.84659090909091, + "grad_norm": 1.4022266864776611, + "learning_rate": 0.0001, + "loss": 0.0751, + "step": 12970 + }, + { + "epoch": 36.875, + "grad_norm": 1.1766167879104614, + "learning_rate": 0.0001, + "loss": 0.0718, + "step": 12980 + }, + { + "epoch": 36.90340909090909, + "grad_norm": 1.346309781074524, + "learning_rate": 0.0001, + "loss": 0.0773, + "step": 12990 + }, + { + "epoch": 36.93181818181818, + "grad_norm": 1.2325224876403809, + "learning_rate": 0.0001, + "loss": 0.0771, + "step": 13000 + }, + { + "epoch": 36.96022727272727, + "grad_norm": 1.4607832431793213, + "learning_rate": 0.0001, + "loss": 0.0731, + "step": 13010 + }, + { + "epoch": 36.98863636363637, + "grad_norm": 1.426300287246704, + "learning_rate": 0.0001, + "loss": 0.0744, + "step": 13020 + }, + { + "epoch": 37.01704545454545, + "grad_norm": 1.3669778108596802, + "learning_rate": 0.0001, + "loss": 0.0717, + "step": 13030 + }, + { + "epoch": 37.04545454545455, + "grad_norm": 1.3466182947158813, + "learning_rate": 0.0001, + "loss": 0.0708, + "step": 13040 + }, + { + "epoch": 37.07386363636363, + "grad_norm": 1.3738116025924683, + "learning_rate": 0.0001, + "loss": 0.0792, + "step": 13050 + }, + { + "epoch": 37.10227272727273, + "grad_norm": 1.414943814277649, + "learning_rate": 0.0001, + "loss": 0.0772, + "step": 13060 + }, + { + "epoch": 37.13068181818182, + "grad_norm": 1.4304829835891724, + "learning_rate": 0.0001, + "loss": 0.0728, + "step": 13070 + }, + { + "epoch": 37.15909090909091, + "grad_norm": 1.280889630317688, + "learning_rate": 0.0001, + "loss": 0.076, + "step": 13080 + }, + { + "epoch": 37.1875, + "grad_norm": 1.4901535511016846, + "learning_rate": 0.0001, + "loss": 0.0743, + "step": 13090 + }, + { + "epoch": 37.21590909090909, + "grad_norm": 1.2257989645004272, + "learning_rate": 0.0001, + "loss": 0.0739, + "step": 13100 + }, + { + "epoch": 37.24431818181818, + "grad_norm": 1.345968246459961, + "learning_rate": 0.0001, + "loss": 0.0746, + "step": 13110 + }, + { + "epoch": 37.27272727272727, + "grad_norm": 1.5769586563110352, + "learning_rate": 0.0001, + "loss": 0.0776, + "step": 13120 + }, + { + "epoch": 37.30113636363637, + "grad_norm": 1.3803303241729736, + "learning_rate": 0.0001, + "loss": 0.0747, + "step": 13130 + }, + { + "epoch": 37.32954545454545, + "grad_norm": 1.2624835968017578, + "learning_rate": 0.0001, + "loss": 0.0758, + "step": 13140 + }, + { + "epoch": 37.35795454545455, + "grad_norm": 1.2229900360107422, + "learning_rate": 0.0001, + "loss": 0.0739, + "step": 13150 + }, + { + "epoch": 37.38636363636363, + "grad_norm": 1.0312384366989136, + "learning_rate": 0.0001, + "loss": 0.0748, + "step": 13160 + }, + { + "epoch": 37.41477272727273, + "grad_norm": 1.174302339553833, + "learning_rate": 0.0001, + "loss": 0.0771, + "step": 13170 + }, + { + "epoch": 37.44318181818182, + "grad_norm": 1.4073001146316528, + "learning_rate": 0.0001, + "loss": 0.0804, + "step": 13180 + }, + { + "epoch": 37.47159090909091, + "grad_norm": 1.2546253204345703, + "learning_rate": 0.0001, + "loss": 0.0738, + "step": 13190 + }, + { + "epoch": 37.5, + "grad_norm": 1.0951519012451172, + "learning_rate": 0.0001, + "loss": 0.0778, + "step": 13200 + }, + { + "epoch": 37.52840909090909, + "grad_norm": 1.2326979637145996, + "learning_rate": 0.0001, + "loss": 0.0758, + "step": 13210 + }, + { + "epoch": 37.55681818181818, + "grad_norm": 1.4816584587097168, + "learning_rate": 0.0001, + "loss": 0.0742, + "step": 13220 + }, + { + "epoch": 37.58522727272727, + "grad_norm": 1.2732244729995728, + "learning_rate": 0.0001, + "loss": 0.0792, + "step": 13230 + }, + { + "epoch": 37.61363636363637, + "grad_norm": 1.2015769481658936, + "learning_rate": 0.0001, + "loss": 0.0774, + "step": 13240 + }, + { + "epoch": 37.64204545454545, + "grad_norm": 1.4944400787353516, + "learning_rate": 0.0001, + "loss": 0.0764, + "step": 13250 + }, + { + "epoch": 37.67045454545455, + "grad_norm": 1.596579670906067, + "learning_rate": 0.0001, + "loss": 0.0765, + "step": 13260 + }, + { + "epoch": 37.69886363636363, + "grad_norm": 1.1385072469711304, + "learning_rate": 0.0001, + "loss": 0.0729, + "step": 13270 + }, + { + "epoch": 37.72727272727273, + "grad_norm": 1.2156968116760254, + "learning_rate": 0.0001, + "loss": 0.0782, + "step": 13280 + }, + { + "epoch": 37.75568181818182, + "grad_norm": 1.4938963651657104, + "learning_rate": 0.0001, + "loss": 0.0718, + "step": 13290 + }, + { + "epoch": 37.78409090909091, + "grad_norm": 1.3732664585113525, + "learning_rate": 0.0001, + "loss": 0.0763, + "step": 13300 + }, + { + "epoch": 37.8125, + "grad_norm": 1.195371389389038, + "learning_rate": 0.0001, + "loss": 0.0767, + "step": 13310 + }, + { + "epoch": 37.84090909090909, + "grad_norm": 1.272255301475525, + "learning_rate": 0.0001, + "loss": 0.074, + "step": 13320 + }, + { + "epoch": 37.86931818181818, + "grad_norm": 1.451314926147461, + "learning_rate": 0.0001, + "loss": 0.0764, + "step": 13330 + }, + { + "epoch": 37.89772727272727, + "grad_norm": 1.2322098016738892, + "learning_rate": 0.0001, + "loss": 0.0804, + "step": 13340 + }, + { + "epoch": 37.92613636363637, + "grad_norm": 1.3791121244430542, + "learning_rate": 0.0001, + "loss": 0.07, + "step": 13350 + }, + { + "epoch": 37.95454545454545, + "grad_norm": 1.0808604955673218, + "learning_rate": 0.0001, + "loss": 0.073, + "step": 13360 + }, + { + "epoch": 37.98295454545455, + "grad_norm": 1.199265480041504, + "learning_rate": 0.0001, + "loss": 0.0705, + "step": 13370 + }, + { + "epoch": 38.01136363636363, + "grad_norm": 1.1241381168365479, + "learning_rate": 0.0001, + "loss": 0.0733, + "step": 13380 + }, + { + "epoch": 38.03977272727273, + "grad_norm": 1.279880404472351, + "learning_rate": 0.0001, + "loss": 0.0782, + "step": 13390 + }, + { + "epoch": 38.06818181818182, + "grad_norm": 1.2983338832855225, + "learning_rate": 0.0001, + "loss": 0.073, + "step": 13400 + }, + { + "epoch": 38.09659090909091, + "grad_norm": 1.2902085781097412, + "learning_rate": 0.0001, + "loss": 0.0778, + "step": 13410 + }, + { + "epoch": 38.125, + "grad_norm": 1.2779277563095093, + "learning_rate": 0.0001, + "loss": 0.0726, + "step": 13420 + }, + { + "epoch": 38.15340909090909, + "grad_norm": 1.4885528087615967, + "learning_rate": 0.0001, + "loss": 0.0762, + "step": 13430 + }, + { + "epoch": 38.18181818181818, + "grad_norm": 1.3437947034835815, + "learning_rate": 0.0001, + "loss": 0.0772, + "step": 13440 + }, + { + "epoch": 38.21022727272727, + "grad_norm": 1.2803330421447754, + "learning_rate": 0.0001, + "loss": 0.0773, + "step": 13450 + }, + { + "epoch": 38.23863636363637, + "grad_norm": 1.2626174688339233, + "learning_rate": 0.0001, + "loss": 0.0757, + "step": 13460 + }, + { + "epoch": 38.26704545454545, + "grad_norm": 1.3316041231155396, + "learning_rate": 0.0001, + "loss": 0.0755, + "step": 13470 + }, + { + "epoch": 38.29545454545455, + "grad_norm": 1.2017104625701904, + "learning_rate": 0.0001, + "loss": 0.0737, + "step": 13480 + }, + { + "epoch": 38.32386363636363, + "grad_norm": 1.158381462097168, + "learning_rate": 0.0001, + "loss": 0.0785, + "step": 13490 + }, + { + "epoch": 38.35227272727273, + "grad_norm": 1.5186028480529785, + "learning_rate": 0.0001, + "loss": 0.0765, + "step": 13500 + }, + { + "epoch": 38.38068181818182, + "grad_norm": 1.3031560182571411, + "learning_rate": 0.0001, + "loss": 0.0772, + "step": 13510 + }, + { + "epoch": 38.40909090909091, + "grad_norm": 1.0976595878601074, + "learning_rate": 0.0001, + "loss": 0.077, + "step": 13520 + }, + { + "epoch": 38.4375, + "grad_norm": 1.3816660642623901, + "learning_rate": 0.0001, + "loss": 0.0767, + "step": 13530 + }, + { + "epoch": 38.46590909090909, + "grad_norm": 1.3313119411468506, + "learning_rate": 0.0001, + "loss": 0.0759, + "step": 13540 + }, + { + "epoch": 38.49431818181818, + "grad_norm": 1.124053716659546, + "learning_rate": 0.0001, + "loss": 0.0761, + "step": 13550 + }, + { + "epoch": 38.52272727272727, + "grad_norm": 1.1821684837341309, + "learning_rate": 0.0001, + "loss": 0.0777, + "step": 13560 + }, + { + "epoch": 38.55113636363637, + "grad_norm": 1.1764349937438965, + "learning_rate": 0.0001, + "loss": 0.0755, + "step": 13570 + }, + { + "epoch": 38.57954545454545, + "grad_norm": 1.2892837524414062, + "learning_rate": 0.0001, + "loss": 0.0766, + "step": 13580 + }, + { + "epoch": 38.60795454545455, + "grad_norm": 1.2852972745895386, + "learning_rate": 0.0001, + "loss": 0.0749, + "step": 13590 + }, + { + "epoch": 38.63636363636363, + "grad_norm": 1.0687012672424316, + "learning_rate": 0.0001, + "loss": 0.0715, + "step": 13600 + }, + { + "epoch": 38.66477272727273, + "grad_norm": 1.088240385055542, + "learning_rate": 0.0001, + "loss": 0.0722, + "step": 13610 + }, + { + "epoch": 38.69318181818182, + "grad_norm": 1.0786799192428589, + "learning_rate": 0.0001, + "loss": 0.0729, + "step": 13620 + }, + { + "epoch": 38.72159090909091, + "grad_norm": 1.2037075757980347, + "learning_rate": 0.0001, + "loss": 0.0734, + "step": 13630 + }, + { + "epoch": 38.75, + "grad_norm": 1.2549363374710083, + "learning_rate": 0.0001, + "loss": 0.0737, + "step": 13640 + }, + { + "epoch": 38.77840909090909, + "grad_norm": 1.194278597831726, + "learning_rate": 0.0001, + "loss": 0.0733, + "step": 13650 + }, + { + "epoch": 38.80681818181818, + "grad_norm": 1.5739684104919434, + "learning_rate": 0.0001, + "loss": 0.0714, + "step": 13660 + }, + { + "epoch": 38.83522727272727, + "grad_norm": 1.3770755529403687, + "learning_rate": 0.0001, + "loss": 0.0728, + "step": 13670 + }, + { + "epoch": 38.86363636363637, + "grad_norm": 1.294053316116333, + "learning_rate": 0.0001, + "loss": 0.0678, + "step": 13680 + }, + { + "epoch": 38.89204545454545, + "grad_norm": 1.2223044633865356, + "learning_rate": 0.0001, + "loss": 0.0733, + "step": 13690 + }, + { + "epoch": 38.92045454545455, + "grad_norm": 1.3834986686706543, + "learning_rate": 0.0001, + "loss": 0.0716, + "step": 13700 + }, + { + "epoch": 38.94886363636363, + "grad_norm": 1.2326815128326416, + "learning_rate": 0.0001, + "loss": 0.0719, + "step": 13710 + }, + { + "epoch": 38.97727272727273, + "grad_norm": 1.0974726676940918, + "learning_rate": 0.0001, + "loss": 0.0731, + "step": 13720 + }, + { + "epoch": 39.00568181818182, + "grad_norm": 1.2831435203552246, + "learning_rate": 0.0001, + "loss": 0.0749, + "step": 13730 + }, + { + "epoch": 39.03409090909091, + "grad_norm": 1.2879631519317627, + "learning_rate": 0.0001, + "loss": 0.0756, + "step": 13740 + }, + { + "epoch": 39.0625, + "grad_norm": 1.3753108978271484, + "learning_rate": 0.0001, + "loss": 0.0723, + "step": 13750 + }, + { + "epoch": 39.09090909090909, + "grad_norm": 1.29164719581604, + "learning_rate": 0.0001, + "loss": 0.0722, + "step": 13760 + }, + { + "epoch": 39.11931818181818, + "grad_norm": 1.0802414417266846, + "learning_rate": 0.0001, + "loss": 0.0749, + "step": 13770 + }, + { + "epoch": 39.14772727272727, + "grad_norm": 1.1304676532745361, + "learning_rate": 0.0001, + "loss": 0.0735, + "step": 13780 + }, + { + "epoch": 39.17613636363637, + "grad_norm": 1.0044450759887695, + "learning_rate": 0.0001, + "loss": 0.0731, + "step": 13790 + }, + { + "epoch": 39.20454545454545, + "grad_norm": 0.9919958114624023, + "learning_rate": 0.0001, + "loss": 0.0731, + "step": 13800 + }, + { + "epoch": 39.23295454545455, + "grad_norm": 1.1686891317367554, + "learning_rate": 0.0001, + "loss": 0.0733, + "step": 13810 + }, + { + "epoch": 39.26136363636363, + "grad_norm": 1.145544409751892, + "learning_rate": 0.0001, + "loss": 0.0703, + "step": 13820 + }, + { + "epoch": 39.28977272727273, + "grad_norm": 1.1628680229187012, + "learning_rate": 0.0001, + "loss": 0.0739, + "step": 13830 + }, + { + "epoch": 39.31818181818182, + "grad_norm": 1.199570655822754, + "learning_rate": 0.0001, + "loss": 0.0734, + "step": 13840 + }, + { + "epoch": 39.34659090909091, + "grad_norm": 1.2358002662658691, + "learning_rate": 0.0001, + "loss": 0.0733, + "step": 13850 + }, + { + "epoch": 39.375, + "grad_norm": 1.3280445337295532, + "learning_rate": 0.0001, + "loss": 0.0711, + "step": 13860 + }, + { + "epoch": 39.40340909090909, + "grad_norm": 1.103401780128479, + "learning_rate": 0.0001, + "loss": 0.0698, + "step": 13870 + }, + { + "epoch": 39.43181818181818, + "grad_norm": 1.4654299020767212, + "learning_rate": 0.0001, + "loss": 0.0724, + "step": 13880 + }, + { + "epoch": 39.46022727272727, + "grad_norm": 1.3928581476211548, + "learning_rate": 0.0001, + "loss": 0.0728, + "step": 13890 + }, + { + "epoch": 39.48863636363637, + "grad_norm": 1.2623183727264404, + "learning_rate": 0.0001, + "loss": 0.0761, + "step": 13900 + }, + { + "epoch": 39.51704545454545, + "grad_norm": 1.0778528451919556, + "learning_rate": 0.0001, + "loss": 0.0699, + "step": 13910 + }, + { + "epoch": 39.54545454545455, + "grad_norm": 1.1330665349960327, + "learning_rate": 0.0001, + "loss": 0.0777, + "step": 13920 + }, + { + "epoch": 39.57386363636363, + "grad_norm": 1.1069144010543823, + "learning_rate": 0.0001, + "loss": 0.0757, + "step": 13930 + }, + { + "epoch": 39.60227272727273, + "grad_norm": 1.1391063928604126, + "learning_rate": 0.0001, + "loss": 0.0715, + "step": 13940 + }, + { + "epoch": 39.63068181818182, + "grad_norm": 1.3045668601989746, + "learning_rate": 0.0001, + "loss": 0.0744, + "step": 13950 + }, + { + "epoch": 39.65909090909091, + "grad_norm": 1.3357367515563965, + "learning_rate": 0.0001, + "loss": 0.0769, + "step": 13960 + }, + { + "epoch": 39.6875, + "grad_norm": 1.3155001401901245, + "learning_rate": 0.0001, + "loss": 0.0743, + "step": 13970 + }, + { + "epoch": 39.71590909090909, + "grad_norm": 1.215240716934204, + "learning_rate": 0.0001, + "loss": 0.0778, + "step": 13980 + }, + { + "epoch": 39.74431818181818, + "grad_norm": 1.0790923833847046, + "learning_rate": 0.0001, + "loss": 0.0748, + "step": 13990 + }, + { + "epoch": 39.77272727272727, + "grad_norm": 1.0089366436004639, + "learning_rate": 0.0001, + "loss": 0.0738, + "step": 14000 + }, + { + "epoch": 39.80113636363637, + "grad_norm": 1.0045474767684937, + "learning_rate": 0.0001, + "loss": 0.0748, + "step": 14010 + }, + { + "epoch": 39.82954545454545, + "grad_norm": 1.0148450136184692, + "learning_rate": 0.0001, + "loss": 0.0752, + "step": 14020 + }, + { + "epoch": 39.85795454545455, + "grad_norm": 1.1197142601013184, + "learning_rate": 0.0001, + "loss": 0.0762, + "step": 14030 + }, + { + "epoch": 39.88636363636363, + "grad_norm": 1.324987530708313, + "learning_rate": 0.0001, + "loss": 0.075, + "step": 14040 + }, + { + "epoch": 39.91477272727273, + "grad_norm": 1.3428856134414673, + "learning_rate": 0.0001, + "loss": 0.0791, + "step": 14050 + }, + { + "epoch": 39.94318181818182, + "grad_norm": 1.025665521621704, + "learning_rate": 0.0001, + "loss": 0.0761, + "step": 14060 + }, + { + "epoch": 39.97159090909091, + "grad_norm": 1.2127065658569336, + "learning_rate": 0.0001, + "loss": 0.0739, + "step": 14070 + }, + { + "epoch": 40.0, + "grad_norm": 1.304028868675232, + "learning_rate": 0.0001, + "loss": 0.0708, + "step": 14080 + }, + { + "epoch": 40.02840909090909, + "grad_norm": 1.194968819618225, + "learning_rate": 0.0001, + "loss": 0.0732, + "step": 14090 + }, + { + "epoch": 40.05681818181818, + "grad_norm": 1.2401859760284424, + "learning_rate": 0.0001, + "loss": 0.0718, + "step": 14100 + }, + { + "epoch": 40.08522727272727, + "grad_norm": 1.2328866720199585, + "learning_rate": 0.0001, + "loss": 0.0711, + "step": 14110 + }, + { + "epoch": 40.11363636363637, + "grad_norm": 1.178868055343628, + "learning_rate": 0.0001, + "loss": 0.0729, + "step": 14120 + }, + { + "epoch": 40.14204545454545, + "grad_norm": 1.2818002700805664, + "learning_rate": 0.0001, + "loss": 0.0656, + "step": 14130 + }, + { + "epoch": 40.17045454545455, + "grad_norm": 1.167518138885498, + "learning_rate": 0.0001, + "loss": 0.0708, + "step": 14140 + }, + { + "epoch": 40.19886363636363, + "grad_norm": 1.28043794631958, + "learning_rate": 0.0001, + "loss": 0.0727, + "step": 14150 + }, + { + "epoch": 40.22727272727273, + "grad_norm": 0.9946736097335815, + "learning_rate": 0.0001, + "loss": 0.0725, + "step": 14160 + }, + { + "epoch": 40.25568181818182, + "grad_norm": 1.186169981956482, + "learning_rate": 0.0001, + "loss": 0.0716, + "step": 14170 + }, + { + "epoch": 40.28409090909091, + "grad_norm": 1.138846755027771, + "learning_rate": 0.0001, + "loss": 0.0744, + "step": 14180 + }, + { + "epoch": 40.3125, + "grad_norm": 1.1993027925491333, + "learning_rate": 0.0001, + "loss": 0.0741, + "step": 14190 + }, + { + "epoch": 40.34090909090909, + "grad_norm": 1.4275126457214355, + "learning_rate": 0.0001, + "loss": 0.0724, + "step": 14200 + }, + { + "epoch": 40.36931818181818, + "grad_norm": 1.4426121711730957, + "learning_rate": 0.0001, + "loss": 0.0728, + "step": 14210 + }, + { + "epoch": 40.39772727272727, + "grad_norm": 1.7519258260726929, + "learning_rate": 0.0001, + "loss": 0.0723, + "step": 14220 + }, + { + "epoch": 40.42613636363637, + "grad_norm": 1.821484923362732, + "learning_rate": 0.0001, + "loss": 0.0727, + "step": 14230 + }, + { + "epoch": 40.45454545454545, + "grad_norm": 1.4972963333129883, + "learning_rate": 0.0001, + "loss": 0.0699, + "step": 14240 + }, + { + "epoch": 40.48295454545455, + "grad_norm": 1.267853021621704, + "learning_rate": 0.0001, + "loss": 0.0722, + "step": 14250 + }, + { + "epoch": 40.51136363636363, + "grad_norm": 1.0927573442459106, + "learning_rate": 0.0001, + "loss": 0.069, + "step": 14260 + }, + { + "epoch": 40.53977272727273, + "grad_norm": 1.4752814769744873, + "learning_rate": 0.0001, + "loss": 0.0672, + "step": 14270 + }, + { + "epoch": 40.56818181818182, + "grad_norm": 1.8464324474334717, + "learning_rate": 0.0001, + "loss": 0.0738, + "step": 14280 + }, + { + "epoch": 40.59659090909091, + "grad_norm": 1.4506888389587402, + "learning_rate": 0.0001, + "loss": 0.0693, + "step": 14290 + }, + { + "epoch": 40.625, + "grad_norm": 1.7628158330917358, + "learning_rate": 0.0001, + "loss": 0.068, + "step": 14300 + }, + { + "epoch": 40.65340909090909, + "grad_norm": 1.4462136030197144, + "learning_rate": 0.0001, + "loss": 0.0713, + "step": 14310 + }, + { + "epoch": 40.68181818181818, + "grad_norm": 1.3682395219802856, + "learning_rate": 0.0001, + "loss": 0.0688, + "step": 14320 + }, + { + "epoch": 40.71022727272727, + "grad_norm": 1.5475159883499146, + "learning_rate": 0.0001, + "loss": 0.0689, + "step": 14330 + }, + { + "epoch": 40.73863636363637, + "grad_norm": 1.2970659732818604, + "learning_rate": 0.0001, + "loss": 0.071, + "step": 14340 + }, + { + "epoch": 40.76704545454545, + "grad_norm": 1.2972766160964966, + "learning_rate": 0.0001, + "loss": 0.0691, + "step": 14350 + }, + { + "epoch": 40.79545454545455, + "grad_norm": 1.5972353219985962, + "learning_rate": 0.0001, + "loss": 0.0668, + "step": 14360 + }, + { + "epoch": 40.82386363636363, + "grad_norm": 1.2204009294509888, + "learning_rate": 0.0001, + "loss": 0.0688, + "step": 14370 + }, + { + "epoch": 40.85227272727273, + "grad_norm": 1.4080297946929932, + "learning_rate": 0.0001, + "loss": 0.0724, + "step": 14380 + }, + { + "epoch": 40.88068181818182, + "grad_norm": 1.5412365198135376, + "learning_rate": 0.0001, + "loss": 0.0688, + "step": 14390 + }, + { + "epoch": 40.90909090909091, + "grad_norm": 1.3308732509613037, + "learning_rate": 0.0001, + "loss": 0.0703, + "step": 14400 + }, + { + "epoch": 40.9375, + "grad_norm": 1.2614033222198486, + "learning_rate": 0.0001, + "loss": 0.0676, + "step": 14410 + }, + { + "epoch": 40.96590909090909, + "grad_norm": 1.3623310327529907, + "learning_rate": 0.0001, + "loss": 0.0691, + "step": 14420 + }, + { + "epoch": 40.99431818181818, + "grad_norm": 1.3249821662902832, + "learning_rate": 0.0001, + "loss": 0.0709, + "step": 14430 + }, + { + "epoch": 41.02272727272727, + "grad_norm": 1.224489450454712, + "learning_rate": 0.0001, + "loss": 0.0677, + "step": 14440 + }, + { + "epoch": 41.05113636363637, + "grad_norm": 1.2260276079177856, + "learning_rate": 0.0001, + "loss": 0.0687, + "step": 14450 + }, + { + "epoch": 41.07954545454545, + "grad_norm": 1.0387647151947021, + "learning_rate": 0.0001, + "loss": 0.0671, + "step": 14460 + }, + { + "epoch": 41.10795454545455, + "grad_norm": 1.3038042783737183, + "learning_rate": 0.0001, + "loss": 0.0709, + "step": 14470 + }, + { + "epoch": 41.13636363636363, + "grad_norm": 1.170926570892334, + "learning_rate": 0.0001, + "loss": 0.078, + "step": 14480 + }, + { + "epoch": 41.16477272727273, + "grad_norm": 1.118465781211853, + "learning_rate": 0.0001, + "loss": 0.072, + "step": 14490 + }, + { + "epoch": 41.19318181818182, + "grad_norm": 1.1143956184387207, + "learning_rate": 0.0001, + "loss": 0.0716, + "step": 14500 + }, + { + "epoch": 41.22159090909091, + "grad_norm": 1.1609805822372437, + "learning_rate": 0.0001, + "loss": 0.0724, + "step": 14510 + }, + { + "epoch": 41.25, + "grad_norm": 1.0995590686798096, + "learning_rate": 0.0001, + "loss": 0.0727, + "step": 14520 + }, + { + "epoch": 41.27840909090909, + "grad_norm": 1.0863865613937378, + "learning_rate": 0.0001, + "loss": 0.0706, + "step": 14530 + }, + { + "epoch": 41.30681818181818, + "grad_norm": 1.0013092756271362, + "learning_rate": 0.0001, + "loss": 0.0698, + "step": 14540 + }, + { + "epoch": 41.33522727272727, + "grad_norm": 1.1944328546524048, + "learning_rate": 0.0001, + "loss": 0.0739, + "step": 14550 + }, + { + "epoch": 41.36363636363637, + "grad_norm": 1.015170931816101, + "learning_rate": 0.0001, + "loss": 0.0699, + "step": 14560 + }, + { + "epoch": 41.39204545454545, + "grad_norm": 1.0812183618545532, + "learning_rate": 0.0001, + "loss": 0.0672, + "step": 14570 + }, + { + "epoch": 41.42045454545455, + "grad_norm": 1.2042665481567383, + "learning_rate": 0.0001, + "loss": 0.0695, + "step": 14580 + }, + { + "epoch": 41.44886363636363, + "grad_norm": 1.3115397691726685, + "learning_rate": 0.0001, + "loss": 0.0704, + "step": 14590 + }, + { + "epoch": 41.47727272727273, + "grad_norm": 1.325830101966858, + "learning_rate": 0.0001, + "loss": 0.0737, + "step": 14600 + }, + { + "epoch": 41.50568181818182, + "grad_norm": 1.1970694065093994, + "learning_rate": 0.0001, + "loss": 0.0721, + "step": 14610 + }, + { + "epoch": 41.53409090909091, + "grad_norm": 1.0794974565505981, + "learning_rate": 0.0001, + "loss": 0.0714, + "step": 14620 + }, + { + "epoch": 41.5625, + "grad_norm": 0.8236928582191467, + "learning_rate": 0.0001, + "loss": 0.0741, + "step": 14630 + }, + { + "epoch": 41.59090909090909, + "grad_norm": 0.8976597785949707, + "learning_rate": 0.0001, + "loss": 0.0713, + "step": 14640 + }, + { + "epoch": 41.61931818181818, + "grad_norm": 0.84346604347229, + "learning_rate": 0.0001, + "loss": 0.071, + "step": 14650 + }, + { + "epoch": 41.64772727272727, + "grad_norm": 0.9527440667152405, + "learning_rate": 0.0001, + "loss": 0.0691, + "step": 14660 + }, + { + "epoch": 41.67613636363637, + "grad_norm": 1.0900448560714722, + "learning_rate": 0.0001, + "loss": 0.0708, + "step": 14670 + }, + { + "epoch": 41.70454545454545, + "grad_norm": 1.057309627532959, + "learning_rate": 0.0001, + "loss": 0.0743, + "step": 14680 + }, + { + "epoch": 41.73295454545455, + "grad_norm": 1.161765456199646, + "learning_rate": 0.0001, + "loss": 0.0693, + "step": 14690 + }, + { + "epoch": 41.76136363636363, + "grad_norm": 1.0732311010360718, + "learning_rate": 0.0001, + "loss": 0.0744, + "step": 14700 + }, + { + "epoch": 41.78977272727273, + "grad_norm": 0.9801560044288635, + "learning_rate": 0.0001, + "loss": 0.0713, + "step": 14710 + }, + { + "epoch": 41.81818181818182, + "grad_norm": 0.952893853187561, + "learning_rate": 0.0001, + "loss": 0.0705, + "step": 14720 + }, + { + "epoch": 41.84659090909091, + "grad_norm": 1.247277021408081, + "learning_rate": 0.0001, + "loss": 0.0722, + "step": 14730 + }, + { + "epoch": 41.875, + "grad_norm": 1.1207205057144165, + "learning_rate": 0.0001, + "loss": 0.0691, + "step": 14740 + }, + { + "epoch": 41.90340909090909, + "grad_norm": 1.397698163986206, + "learning_rate": 0.0001, + "loss": 0.0718, + "step": 14750 + }, + { + "epoch": 41.93181818181818, + "grad_norm": 1.3861267566680908, + "learning_rate": 0.0001, + "loss": 0.0726, + "step": 14760 + }, + { + "epoch": 41.96022727272727, + "grad_norm": 1.3560404777526855, + "learning_rate": 0.0001, + "loss": 0.0725, + "step": 14770 + }, + { + "epoch": 41.98863636363637, + "grad_norm": 1.32721745967865, + "learning_rate": 0.0001, + "loss": 0.071, + "step": 14780 + }, + { + "epoch": 42.01704545454545, + "grad_norm": 1.1854987144470215, + "learning_rate": 0.0001, + "loss": 0.0712, + "step": 14790 + }, + { + "epoch": 42.04545454545455, + "grad_norm": 1.1440140008926392, + "learning_rate": 0.0001, + "loss": 0.0718, + "step": 14800 + }, + { + "epoch": 42.07386363636363, + "grad_norm": 1.064915657043457, + "learning_rate": 0.0001, + "loss": 0.0687, + "step": 14810 + }, + { + "epoch": 42.10227272727273, + "grad_norm": 1.279728889465332, + "learning_rate": 0.0001, + "loss": 0.0681, + "step": 14820 + }, + { + "epoch": 42.13068181818182, + "grad_norm": 1.2349141836166382, + "learning_rate": 0.0001, + "loss": 0.0651, + "step": 14830 + }, + { + "epoch": 42.15909090909091, + "grad_norm": 1.2165275812149048, + "learning_rate": 0.0001, + "loss": 0.0667, + "step": 14840 + }, + { + "epoch": 42.1875, + "grad_norm": 1.3267191648483276, + "learning_rate": 0.0001, + "loss": 0.071, + "step": 14850 + }, + { + "epoch": 42.21590909090909, + "grad_norm": 0.9966840147972107, + "learning_rate": 0.0001, + "loss": 0.0703, + "step": 14860 + }, + { + "epoch": 42.24431818181818, + "grad_norm": 1.1279515027999878, + "learning_rate": 0.0001, + "loss": 0.068, + "step": 14870 + }, + { + "epoch": 42.27272727272727, + "grad_norm": 1.3340145349502563, + "learning_rate": 0.0001, + "loss": 0.0705, + "step": 14880 + }, + { + "epoch": 42.30113636363637, + "grad_norm": 1.193154215812683, + "learning_rate": 0.0001, + "loss": 0.0686, + "step": 14890 + }, + { + "epoch": 42.32954545454545, + "grad_norm": 1.1050076484680176, + "learning_rate": 0.0001, + "loss": 0.0664, + "step": 14900 + }, + { + "epoch": 42.35795454545455, + "grad_norm": 1.1995725631713867, + "learning_rate": 0.0001, + "loss": 0.0677, + "step": 14910 + }, + { + "epoch": 42.38636363636363, + "grad_norm": 1.0086230039596558, + "learning_rate": 0.0001, + "loss": 0.0665, + "step": 14920 + }, + { + "epoch": 42.41477272727273, + "grad_norm": 0.8045913577079773, + "learning_rate": 0.0001, + "loss": 0.0673, + "step": 14930 + }, + { + "epoch": 42.44318181818182, + "grad_norm": 0.9728057980537415, + "learning_rate": 0.0001, + "loss": 0.0712, + "step": 14940 + }, + { + "epoch": 42.47159090909091, + "grad_norm": 1.2215827703475952, + "learning_rate": 0.0001, + "loss": 0.0702, + "step": 14950 + }, + { + "epoch": 42.5, + "grad_norm": 1.3105379343032837, + "learning_rate": 0.0001, + "loss": 0.0745, + "step": 14960 + }, + { + "epoch": 42.52840909090909, + "grad_norm": 1.5222160816192627, + "learning_rate": 0.0001, + "loss": 0.0719, + "step": 14970 + }, + { + "epoch": 42.55681818181818, + "grad_norm": 1.5078243017196655, + "learning_rate": 0.0001, + "loss": 0.0722, + "step": 14980 + }, + { + "epoch": 42.58522727272727, + "grad_norm": 1.5456411838531494, + "learning_rate": 0.0001, + "loss": 0.0701, + "step": 14990 + }, + { + "epoch": 42.61363636363637, + "grad_norm": 1.329354166984558, + "learning_rate": 0.0001, + "loss": 0.0727, + "step": 15000 + }, + { + "epoch": 42.64204545454545, + "grad_norm": 1.4910674095153809, + "learning_rate": 0.0001, + "loss": 0.0684, + "step": 15010 + }, + { + "epoch": 42.67045454545455, + "grad_norm": 1.3281280994415283, + "learning_rate": 0.0001, + "loss": 0.0707, + "step": 15020 + }, + { + "epoch": 42.69886363636363, + "grad_norm": 1.312171220779419, + "learning_rate": 0.0001, + "loss": 0.0683, + "step": 15030 + }, + { + "epoch": 42.72727272727273, + "grad_norm": 1.8369287252426147, + "learning_rate": 0.0001, + "loss": 0.0695, + "step": 15040 + }, + { + "epoch": 42.75568181818182, + "grad_norm": 1.3710136413574219, + "learning_rate": 0.0001, + "loss": 0.0684, + "step": 15050 + }, + { + "epoch": 42.78409090909091, + "grad_norm": 1.343110203742981, + "learning_rate": 0.0001, + "loss": 0.0685, + "step": 15060 + }, + { + "epoch": 42.8125, + "grad_norm": 1.2409188747406006, + "learning_rate": 0.0001, + "loss": 0.0666, + "step": 15070 + }, + { + "epoch": 42.84090909090909, + "grad_norm": 1.375400185585022, + "learning_rate": 0.0001, + "loss": 0.0678, + "step": 15080 + }, + { + "epoch": 42.86931818181818, + "grad_norm": 1.4058459997177124, + "learning_rate": 0.0001, + "loss": 0.0686, + "step": 15090 + }, + { + "epoch": 42.89772727272727, + "grad_norm": 1.4581125974655151, + "learning_rate": 0.0001, + "loss": 0.0726, + "step": 15100 + }, + { + "epoch": 42.92613636363637, + "grad_norm": 1.129081130027771, + "learning_rate": 0.0001, + "loss": 0.0705, + "step": 15110 + }, + { + "epoch": 42.95454545454545, + "grad_norm": 1.0302661657333374, + "learning_rate": 0.0001, + "loss": 0.0653, + "step": 15120 + }, + { + "epoch": 42.98295454545455, + "grad_norm": 0.974215030670166, + "learning_rate": 0.0001, + "loss": 0.0671, + "step": 15130 + }, + { + "epoch": 43.01136363636363, + "grad_norm": 1.0873677730560303, + "learning_rate": 0.0001, + "loss": 0.0674, + "step": 15140 + }, + { + "epoch": 43.03977272727273, + "grad_norm": 1.3052396774291992, + "learning_rate": 0.0001, + "loss": 0.069, + "step": 15150 + }, + { + "epoch": 43.06818181818182, + "grad_norm": 1.1269701719284058, + "learning_rate": 0.0001, + "loss": 0.0685, + "step": 15160 + }, + { + "epoch": 43.09659090909091, + "grad_norm": 1.0936193466186523, + "learning_rate": 0.0001, + "loss": 0.0674, + "step": 15170 + }, + { + "epoch": 43.125, + "grad_norm": 1.168410301208496, + "learning_rate": 0.0001, + "loss": 0.0705, + "step": 15180 + }, + { + "epoch": 43.15340909090909, + "grad_norm": 1.151530385017395, + "learning_rate": 0.0001, + "loss": 0.0662, + "step": 15190 + }, + { + "epoch": 43.18181818181818, + "grad_norm": 1.0683995485305786, + "learning_rate": 0.0001, + "loss": 0.07, + "step": 15200 + }, + { + "epoch": 43.21022727272727, + "grad_norm": 1.1150685548782349, + "learning_rate": 0.0001, + "loss": 0.07, + "step": 15210 + }, + { + "epoch": 43.23863636363637, + "grad_norm": 1.1715824604034424, + "learning_rate": 0.0001, + "loss": 0.0693, + "step": 15220 + }, + { + "epoch": 43.26704545454545, + "grad_norm": 1.2279553413391113, + "learning_rate": 0.0001, + "loss": 0.0663, + "step": 15230 + }, + { + "epoch": 43.29545454545455, + "grad_norm": 1.020858883857727, + "learning_rate": 0.0001, + "loss": 0.0716, + "step": 15240 + }, + { + "epoch": 43.32386363636363, + "grad_norm": 1.4411025047302246, + "learning_rate": 0.0001, + "loss": 0.0724, + "step": 15250 + }, + { + "epoch": 43.35227272727273, + "grad_norm": 1.0164995193481445, + "learning_rate": 0.0001, + "loss": 0.0726, + "step": 15260 + }, + { + "epoch": 43.38068181818182, + "grad_norm": 1.092726469039917, + "learning_rate": 0.0001, + "loss": 0.0701, + "step": 15270 + }, + { + "epoch": 43.40909090909091, + "grad_norm": 0.9105169773101807, + "learning_rate": 0.0001, + "loss": 0.0713, + "step": 15280 + }, + { + "epoch": 43.4375, + "grad_norm": 1.0798728466033936, + "learning_rate": 0.0001, + "loss": 0.0711, + "step": 15290 + }, + { + "epoch": 43.46590909090909, + "grad_norm": 1.08077871799469, + "learning_rate": 0.0001, + "loss": 0.0703, + "step": 15300 + }, + { + "epoch": 43.49431818181818, + "grad_norm": 1.2410343885421753, + "learning_rate": 0.0001, + "loss": 0.0712, + "step": 15310 + }, + { + "epoch": 43.52272727272727, + "grad_norm": 1.0531221628189087, + "learning_rate": 0.0001, + "loss": 0.0677, + "step": 15320 + }, + { + "epoch": 43.55113636363637, + "grad_norm": 1.0954484939575195, + "learning_rate": 0.0001, + "loss": 0.0706, + "step": 15330 + }, + { + "epoch": 43.57954545454545, + "grad_norm": 1.155243158340454, + "learning_rate": 0.0001, + "loss": 0.0712, + "step": 15340 + }, + { + "epoch": 43.60795454545455, + "grad_norm": 0.9618707299232483, + "learning_rate": 0.0001, + "loss": 0.0708, + "step": 15350 + }, + { + "epoch": 43.63636363636363, + "grad_norm": 1.0885404348373413, + "learning_rate": 0.0001, + "loss": 0.0645, + "step": 15360 + }, + { + "epoch": 43.66477272727273, + "grad_norm": 1.2146936655044556, + "learning_rate": 0.0001, + "loss": 0.0696, + "step": 15370 + }, + { + "epoch": 43.69318181818182, + "grad_norm": 1.3160111904144287, + "learning_rate": 0.0001, + "loss": 0.0703, + "step": 15380 + }, + { + "epoch": 43.72159090909091, + "grad_norm": 1.0108137130737305, + "learning_rate": 0.0001, + "loss": 0.0734, + "step": 15390 + }, + { + "epoch": 43.75, + "grad_norm": 1.0981762409210205, + "learning_rate": 0.0001, + "loss": 0.0683, + "step": 15400 + }, + { + "epoch": 43.77840909090909, + "grad_norm": 1.1093978881835938, + "learning_rate": 0.0001, + "loss": 0.0691, + "step": 15410 + }, + { + "epoch": 43.80681818181818, + "grad_norm": 1.1320979595184326, + "learning_rate": 0.0001, + "loss": 0.0704, + "step": 15420 + }, + { + "epoch": 43.83522727272727, + "grad_norm": 1.089289665222168, + "learning_rate": 0.0001, + "loss": 0.069, + "step": 15430 + }, + { + "epoch": 43.86363636363637, + "grad_norm": 1.138124704360962, + "learning_rate": 0.0001, + "loss": 0.0681, + "step": 15440 + }, + { + "epoch": 43.89204545454545, + "grad_norm": 1.080330491065979, + "learning_rate": 0.0001, + "loss": 0.0678, + "step": 15450 + }, + { + "epoch": 43.92045454545455, + "grad_norm": 0.9879652857780457, + "learning_rate": 0.0001, + "loss": 0.0728, + "step": 15460 + }, + { + "epoch": 43.94886363636363, + "grad_norm": 1.2704243659973145, + "learning_rate": 0.0001, + "loss": 0.0671, + "step": 15470 + }, + { + "epoch": 43.97727272727273, + "grad_norm": 1.0896133184432983, + "learning_rate": 0.0001, + "loss": 0.0709, + "step": 15480 + }, + { + "epoch": 44.00568181818182, + "grad_norm": 1.3182461261749268, + "learning_rate": 0.0001, + "loss": 0.0697, + "step": 15490 + }, + { + "epoch": 44.03409090909091, + "grad_norm": 1.1766080856323242, + "learning_rate": 0.0001, + "loss": 0.0683, + "step": 15500 + }, + { + "epoch": 44.0625, + "grad_norm": 1.1963214874267578, + "learning_rate": 0.0001, + "loss": 0.0729, + "step": 15510 + }, + { + "epoch": 44.09090909090909, + "grad_norm": 1.32454252243042, + "learning_rate": 0.0001, + "loss": 0.0702, + "step": 15520 + }, + { + "epoch": 44.11931818181818, + "grad_norm": 0.9734973907470703, + "learning_rate": 0.0001, + "loss": 0.0722, + "step": 15530 + }, + { + "epoch": 44.14772727272727, + "grad_norm": 0.9227120280265808, + "learning_rate": 0.0001, + "loss": 0.0679, + "step": 15540 + }, + { + "epoch": 44.17613636363637, + "grad_norm": 1.0899708271026611, + "learning_rate": 0.0001, + "loss": 0.0698, + "step": 15550 + }, + { + "epoch": 44.20454545454545, + "grad_norm": 0.9477924704551697, + "learning_rate": 0.0001, + "loss": 0.0739, + "step": 15560 + }, + { + "epoch": 44.23295454545455, + "grad_norm": 0.9881532788276672, + "learning_rate": 0.0001, + "loss": 0.0682, + "step": 15570 + }, + { + "epoch": 44.26136363636363, + "grad_norm": 1.1461341381072998, + "learning_rate": 0.0001, + "loss": 0.0682, + "step": 15580 + }, + { + "epoch": 44.28977272727273, + "grad_norm": 1.1203874349594116, + "learning_rate": 0.0001, + "loss": 0.0668, + "step": 15590 + }, + { + "epoch": 44.31818181818182, + "grad_norm": 1.113659381866455, + "learning_rate": 0.0001, + "loss": 0.0714, + "step": 15600 + }, + { + "epoch": 44.34659090909091, + "grad_norm": 0.9644593596458435, + "learning_rate": 0.0001, + "loss": 0.0665, + "step": 15610 + }, + { + "epoch": 44.375, + "grad_norm": 1.232541561126709, + "learning_rate": 0.0001, + "loss": 0.0693, + "step": 15620 + }, + { + "epoch": 44.40340909090909, + "grad_norm": 1.1204942464828491, + "learning_rate": 0.0001, + "loss": 0.0671, + "step": 15630 + }, + { + "epoch": 44.43181818181818, + "grad_norm": 1.2313846349716187, + "learning_rate": 0.0001, + "loss": 0.0711, + "step": 15640 + }, + { + "epoch": 44.46022727272727, + "grad_norm": 1.1166387796401978, + "learning_rate": 0.0001, + "loss": 0.0648, + "step": 15650 + }, + { + "epoch": 44.48863636363637, + "grad_norm": 1.1295506954193115, + "learning_rate": 0.0001, + "loss": 0.0706, + "step": 15660 + }, + { + "epoch": 44.51704545454545, + "grad_norm": 1.103320598602295, + "learning_rate": 0.0001, + "loss": 0.0692, + "step": 15670 + }, + { + "epoch": 44.54545454545455, + "grad_norm": 0.8729053139686584, + "learning_rate": 0.0001, + "loss": 0.0701, + "step": 15680 + }, + { + "epoch": 44.57386363636363, + "grad_norm": 0.8826537728309631, + "learning_rate": 0.0001, + "loss": 0.0729, + "step": 15690 + }, + { + "epoch": 44.60227272727273, + "grad_norm": 1.1283091306686401, + "learning_rate": 0.0001, + "loss": 0.0691, + "step": 15700 + }, + { + "epoch": 44.63068181818182, + "grad_norm": 1.0607900619506836, + "learning_rate": 0.0001, + "loss": 0.0719, + "step": 15710 + }, + { + "epoch": 44.65909090909091, + "grad_norm": 0.8510501384735107, + "learning_rate": 0.0001, + "loss": 0.0731, + "step": 15720 + }, + { + "epoch": 44.6875, + "grad_norm": 0.9762911796569824, + "learning_rate": 0.0001, + "loss": 0.0685, + "step": 15730 + }, + { + "epoch": 44.71590909090909, + "grad_norm": 0.9683955907821655, + "learning_rate": 0.0001, + "loss": 0.0732, + "step": 15740 + }, + { + "epoch": 44.74431818181818, + "grad_norm": 1.0100533962249756, + "learning_rate": 0.0001, + "loss": 0.0707, + "step": 15750 + }, + { + "epoch": 44.77272727272727, + "grad_norm": 0.860872745513916, + "learning_rate": 0.0001, + "loss": 0.0674, + "step": 15760 + }, + { + "epoch": 44.80113636363637, + "grad_norm": 1.065424919128418, + "learning_rate": 0.0001, + "loss": 0.0706, + "step": 15770 + }, + { + "epoch": 44.82954545454545, + "grad_norm": 0.9998086094856262, + "learning_rate": 0.0001, + "loss": 0.0727, + "step": 15780 + }, + { + "epoch": 44.85795454545455, + "grad_norm": 1.1579582691192627, + "learning_rate": 0.0001, + "loss": 0.0688, + "step": 15790 + }, + { + "epoch": 44.88636363636363, + "grad_norm": 1.0913549661636353, + "learning_rate": 0.0001, + "loss": 0.0713, + "step": 15800 + }, + { + "epoch": 44.91477272727273, + "grad_norm": 1.1674264669418335, + "learning_rate": 0.0001, + "loss": 0.0695, + "step": 15810 + }, + { + "epoch": 44.94318181818182, + "grad_norm": 1.1136603355407715, + "learning_rate": 0.0001, + "loss": 0.0698, + "step": 15820 + }, + { + "epoch": 44.97159090909091, + "grad_norm": 1.3215253353118896, + "learning_rate": 0.0001, + "loss": 0.0709, + "step": 15830 + }, + { + "epoch": 45.0, + "grad_norm": 1.308078646659851, + "learning_rate": 0.0001, + "loss": 0.0685, + "step": 15840 + }, + { + "epoch": 45.02840909090909, + "grad_norm": 1.174768090248108, + "learning_rate": 0.0001, + "loss": 0.072, + "step": 15850 + }, + { + "epoch": 45.05681818181818, + "grad_norm": 1.1076934337615967, + "learning_rate": 0.0001, + "loss": 0.067, + "step": 15860 + }, + { + "epoch": 45.08522727272727, + "grad_norm": 1.202553629875183, + "learning_rate": 0.0001, + "loss": 0.0698, + "step": 15870 + }, + { + "epoch": 45.11363636363637, + "grad_norm": 1.2212430238723755, + "learning_rate": 0.0001, + "loss": 0.0671, + "step": 15880 + }, + { + "epoch": 45.14204545454545, + "grad_norm": 1.785838007926941, + "learning_rate": 0.0001, + "loss": 0.0698, + "step": 15890 + }, + { + "epoch": 45.17045454545455, + "grad_norm": 1.2640763521194458, + "learning_rate": 0.0001, + "loss": 0.0717, + "step": 15900 + }, + { + "epoch": 45.19886363636363, + "grad_norm": 1.1825248003005981, + "learning_rate": 0.0001, + "loss": 0.0646, + "step": 15910 + }, + { + "epoch": 45.22727272727273, + "grad_norm": 1.1265792846679688, + "learning_rate": 0.0001, + "loss": 0.0666, + "step": 15920 + }, + { + "epoch": 45.25568181818182, + "grad_norm": 1.1798592805862427, + "learning_rate": 0.0001, + "loss": 0.0645, + "step": 15930 + }, + { + "epoch": 45.28409090909091, + "grad_norm": 0.996825635433197, + "learning_rate": 0.0001, + "loss": 0.0659, + "step": 15940 + }, + { + "epoch": 45.3125, + "grad_norm": 1.1232649087905884, + "learning_rate": 0.0001, + "loss": 0.0699, + "step": 15950 + }, + { + "epoch": 45.34090909090909, + "grad_norm": 1.0283252000808716, + "learning_rate": 0.0001, + "loss": 0.0697, + "step": 15960 + }, + { + "epoch": 45.36931818181818, + "grad_norm": 1.0725383758544922, + "learning_rate": 0.0001, + "loss": 0.0655, + "step": 15970 + }, + { + "epoch": 45.39772727272727, + "grad_norm": 0.88676917552948, + "learning_rate": 0.0001, + "loss": 0.068, + "step": 15980 + }, + { + "epoch": 45.42613636363637, + "grad_norm": 0.8896072506904602, + "learning_rate": 0.0001, + "loss": 0.0689, + "step": 15990 + }, + { + "epoch": 45.45454545454545, + "grad_norm": 1.008349061012268, + "learning_rate": 0.0001, + "loss": 0.0664, + "step": 16000 + }, + { + "epoch": 45.48295454545455, + "grad_norm": 0.9974130988121033, + "learning_rate": 0.0001, + "loss": 0.0672, + "step": 16010 + }, + { + "epoch": 45.51136363636363, + "grad_norm": 1.0345064401626587, + "learning_rate": 0.0001, + "loss": 0.0678, + "step": 16020 + }, + { + "epoch": 45.53977272727273, + "grad_norm": 1.2052901983261108, + "learning_rate": 0.0001, + "loss": 0.0655, + "step": 16030 + }, + { + "epoch": 45.56818181818182, + "grad_norm": 1.0479304790496826, + "learning_rate": 0.0001, + "loss": 0.0677, + "step": 16040 + }, + { + "epoch": 45.59659090909091, + "grad_norm": 1.1516709327697754, + "learning_rate": 0.0001, + "loss": 0.0724, + "step": 16050 + }, + { + "epoch": 45.625, + "grad_norm": 1.3962410688400269, + "learning_rate": 0.0001, + "loss": 0.0685, + "step": 16060 + }, + { + "epoch": 45.65340909090909, + "grad_norm": 1.3850163221359253, + "learning_rate": 0.0001, + "loss": 0.0698, + "step": 16070 + }, + { + "epoch": 45.68181818181818, + "grad_norm": 1.6799479722976685, + "learning_rate": 0.0001, + "loss": 0.069, + "step": 16080 + }, + { + "epoch": 45.71022727272727, + "grad_norm": 0.9734259247779846, + "learning_rate": 0.0001, + "loss": 0.0663, + "step": 16090 + }, + { + "epoch": 45.73863636363637, + "grad_norm": 1.6348193883895874, + "learning_rate": 0.0001, + "loss": 0.0656, + "step": 16100 + }, + { + "epoch": 45.76704545454545, + "grad_norm": 1.480636715888977, + "learning_rate": 0.0001, + "loss": 0.0638, + "step": 16110 + }, + { + "epoch": 45.79545454545455, + "grad_norm": 1.3442875146865845, + "learning_rate": 0.0001, + "loss": 0.0707, + "step": 16120 + }, + { + "epoch": 45.82386363636363, + "grad_norm": 1.4665964841842651, + "learning_rate": 0.0001, + "loss": 0.065, + "step": 16130 + }, + { + "epoch": 45.85227272727273, + "grad_norm": 1.2870675325393677, + "learning_rate": 0.0001, + "loss": 0.0657, + "step": 16140 + }, + { + "epoch": 45.88068181818182, + "grad_norm": 1.2831790447235107, + "learning_rate": 0.0001, + "loss": 0.0655, + "step": 16150 + }, + { + "epoch": 45.90909090909091, + "grad_norm": 1.2384798526763916, + "learning_rate": 0.0001, + "loss": 0.0691, + "step": 16160 + }, + { + "epoch": 45.9375, + "grad_norm": 1.0735738277435303, + "learning_rate": 0.0001, + "loss": 0.0671, + "step": 16170 + }, + { + "epoch": 45.96590909090909, + "grad_norm": 1.3313257694244385, + "learning_rate": 0.0001, + "loss": 0.0643, + "step": 16180 + }, + { + "epoch": 45.99431818181818, + "grad_norm": 1.170076608657837, + "learning_rate": 0.0001, + "loss": 0.068, + "step": 16190 + }, + { + "epoch": 46.02272727272727, + "grad_norm": 1.383847713470459, + "learning_rate": 0.0001, + "loss": 0.0648, + "step": 16200 + }, + { + "epoch": 46.05113636363637, + "grad_norm": 1.4958339929580688, + "learning_rate": 0.0001, + "loss": 0.0684, + "step": 16210 + }, + { + "epoch": 46.07954545454545, + "grad_norm": 0.9990864992141724, + "learning_rate": 0.0001, + "loss": 0.0663, + "step": 16220 + }, + { + "epoch": 46.10795454545455, + "grad_norm": 1.1924002170562744, + "learning_rate": 0.0001, + "loss": 0.0653, + "step": 16230 + }, + { + "epoch": 46.13636363636363, + "grad_norm": 1.2387312650680542, + "learning_rate": 0.0001, + "loss": 0.0637, + "step": 16240 + }, + { + "epoch": 46.16477272727273, + "grad_norm": 1.2327535152435303, + "learning_rate": 0.0001, + "loss": 0.0668, + "step": 16250 + }, + { + "epoch": 46.19318181818182, + "grad_norm": 1.0648465156555176, + "learning_rate": 0.0001, + "loss": 0.0661, + "step": 16260 + }, + { + "epoch": 46.22159090909091, + "grad_norm": 0.9485817551612854, + "learning_rate": 0.0001, + "loss": 0.0655, + "step": 16270 + }, + { + "epoch": 46.25, + "grad_norm": 1.2529845237731934, + "learning_rate": 0.0001, + "loss": 0.0652, + "step": 16280 + }, + { + "epoch": 46.27840909090909, + "grad_norm": 1.0558775663375854, + "learning_rate": 0.0001, + "loss": 0.0631, + "step": 16290 + }, + { + "epoch": 46.30681818181818, + "grad_norm": 1.1467417478561401, + "learning_rate": 0.0001, + "loss": 0.0659, + "step": 16300 + }, + { + "epoch": 46.33522727272727, + "grad_norm": 1.4327044486999512, + "learning_rate": 0.0001, + "loss": 0.0664, + "step": 16310 + }, + { + "epoch": 46.36363636363637, + "grad_norm": 1.25315260887146, + "learning_rate": 0.0001, + "loss": 0.0721, + "step": 16320 + }, + { + "epoch": 46.39204545454545, + "grad_norm": 1.2470471858978271, + "learning_rate": 0.0001, + "loss": 0.066, + "step": 16330 + }, + { + "epoch": 46.42045454545455, + "grad_norm": 1.2044808864593506, + "learning_rate": 0.0001, + "loss": 0.0679, + "step": 16340 + }, + { + "epoch": 46.44886363636363, + "grad_norm": 1.3889199495315552, + "learning_rate": 0.0001, + "loss": 0.0621, + "step": 16350 + }, + { + "epoch": 46.47727272727273, + "grad_norm": 1.1649527549743652, + "learning_rate": 0.0001, + "loss": 0.0651, + "step": 16360 + }, + { + "epoch": 46.50568181818182, + "grad_norm": 1.0425108671188354, + "learning_rate": 0.0001, + "loss": 0.0621, + "step": 16370 + }, + { + "epoch": 46.53409090909091, + "grad_norm": 1.1113712787628174, + "learning_rate": 0.0001, + "loss": 0.0634, + "step": 16380 + }, + { + "epoch": 46.5625, + "grad_norm": 1.1670324802398682, + "learning_rate": 0.0001, + "loss": 0.0642, + "step": 16390 + }, + { + "epoch": 46.59090909090909, + "grad_norm": 1.195335030555725, + "learning_rate": 0.0001, + "loss": 0.0672, + "step": 16400 + }, + { + "epoch": 46.61931818181818, + "grad_norm": 1.1802396774291992, + "learning_rate": 0.0001, + "loss": 0.0678, + "step": 16410 + }, + { + "epoch": 46.64772727272727, + "grad_norm": 1.2869985103607178, + "learning_rate": 0.0001, + "loss": 0.0655, + "step": 16420 + }, + { + "epoch": 46.67613636363637, + "grad_norm": 1.2032572031021118, + "learning_rate": 0.0001, + "loss": 0.0651, + "step": 16430 + }, + { + "epoch": 46.70454545454545, + "grad_norm": 1.116356611251831, + "learning_rate": 0.0001, + "loss": 0.0647, + "step": 16440 + }, + { + "epoch": 46.73295454545455, + "grad_norm": 1.3527079820632935, + "learning_rate": 0.0001, + "loss": 0.0686, + "step": 16450 + }, + { + "epoch": 46.76136363636363, + "grad_norm": 1.105823040008545, + "learning_rate": 0.0001, + "loss": 0.0654, + "step": 16460 + }, + { + "epoch": 46.78977272727273, + "grad_norm": 1.1165571212768555, + "learning_rate": 0.0001, + "loss": 0.0661, + "step": 16470 + }, + { + "epoch": 46.81818181818182, + "grad_norm": 1.0524123907089233, + "learning_rate": 0.0001, + "loss": 0.0662, + "step": 16480 + }, + { + "epoch": 46.84659090909091, + "grad_norm": 1.0740056037902832, + "learning_rate": 0.0001, + "loss": 0.0673, + "step": 16490 + }, + { + "epoch": 46.875, + "grad_norm": 1.0902903079986572, + "learning_rate": 0.0001, + "loss": 0.0713, + "step": 16500 + }, + { + "epoch": 46.90340909090909, + "grad_norm": 1.1585133075714111, + "learning_rate": 0.0001, + "loss": 0.0668, + "step": 16510 + }, + { + "epoch": 46.93181818181818, + "grad_norm": 0.8834289312362671, + "learning_rate": 0.0001, + "loss": 0.0683, + "step": 16520 + }, + { + "epoch": 46.96022727272727, + "grad_norm": 1.0335935354232788, + "learning_rate": 0.0001, + "loss": 0.0684, + "step": 16530 + }, + { + "epoch": 46.98863636363637, + "grad_norm": 1.1123617887496948, + "learning_rate": 0.0001, + "loss": 0.0665, + "step": 16540 + }, + { + "epoch": 47.01704545454545, + "grad_norm": 1.012212872505188, + "learning_rate": 0.0001, + "loss": 0.0663, + "step": 16550 + }, + { + "epoch": 47.04545454545455, + "grad_norm": 0.8919275999069214, + "learning_rate": 0.0001, + "loss": 0.0684, + "step": 16560 + }, + { + "epoch": 47.07386363636363, + "grad_norm": 1.0549639463424683, + "learning_rate": 0.0001, + "loss": 0.0699, + "step": 16570 + }, + { + "epoch": 47.10227272727273, + "grad_norm": 1.048153281211853, + "learning_rate": 0.0001, + "loss": 0.0657, + "step": 16580 + }, + { + "epoch": 47.13068181818182, + "grad_norm": 0.9343200325965881, + "learning_rate": 0.0001, + "loss": 0.0698, + "step": 16590 + }, + { + "epoch": 47.15909090909091, + "grad_norm": 0.970174252986908, + "learning_rate": 0.0001, + "loss": 0.0691, + "step": 16600 + }, + { + "epoch": 47.1875, + "grad_norm": 0.8680684566497803, + "learning_rate": 0.0001, + "loss": 0.0669, + "step": 16610 + }, + { + "epoch": 47.21590909090909, + "grad_norm": 1.1044062376022339, + "learning_rate": 0.0001, + "loss": 0.0678, + "step": 16620 + }, + { + "epoch": 47.24431818181818, + "grad_norm": 1.067230224609375, + "learning_rate": 0.0001, + "loss": 0.0694, + "step": 16630 + }, + { + "epoch": 47.27272727272727, + "grad_norm": 1.0942895412445068, + "learning_rate": 0.0001, + "loss": 0.0677, + "step": 16640 + }, + { + "epoch": 47.30113636363637, + "grad_norm": 0.955245852470398, + "learning_rate": 0.0001, + "loss": 0.0649, + "step": 16650 + }, + { + "epoch": 47.32954545454545, + "grad_norm": 0.9594484567642212, + "learning_rate": 0.0001, + "loss": 0.0675, + "step": 16660 + }, + { + "epoch": 47.35795454545455, + "grad_norm": 1.0073819160461426, + "learning_rate": 0.0001, + "loss": 0.0635, + "step": 16670 + }, + { + "epoch": 47.38636363636363, + "grad_norm": 1.0799858570098877, + "learning_rate": 0.0001, + "loss": 0.0673, + "step": 16680 + }, + { + "epoch": 47.41477272727273, + "grad_norm": 1.113233208656311, + "learning_rate": 0.0001, + "loss": 0.0686, + "step": 16690 + }, + { + "epoch": 47.44318181818182, + "grad_norm": 0.9608368277549744, + "learning_rate": 0.0001, + "loss": 0.0644, + "step": 16700 + }, + { + "epoch": 47.47159090909091, + "grad_norm": 0.9679139256477356, + "learning_rate": 0.0001, + "loss": 0.0648, + "step": 16710 + }, + { + "epoch": 47.5, + "grad_norm": 0.9586361050605774, + "learning_rate": 0.0001, + "loss": 0.0636, + "step": 16720 + }, + { + "epoch": 47.52840909090909, + "grad_norm": 0.7961944341659546, + "learning_rate": 0.0001, + "loss": 0.0649, + "step": 16730 + }, + { + "epoch": 47.55681818181818, + "grad_norm": 0.891774594783783, + "learning_rate": 0.0001, + "loss": 0.0647, + "step": 16740 + }, + { + "epoch": 47.58522727272727, + "grad_norm": 1.0160497426986694, + "learning_rate": 0.0001, + "loss": 0.062, + "step": 16750 + }, + { + "epoch": 47.61363636363637, + "grad_norm": 1.0420070886611938, + "learning_rate": 0.0001, + "loss": 0.0662, + "step": 16760 + }, + { + "epoch": 47.64204545454545, + "grad_norm": 1.0268901586532593, + "learning_rate": 0.0001, + "loss": 0.0671, + "step": 16770 + }, + { + "epoch": 47.67045454545455, + "grad_norm": 0.8848260641098022, + "learning_rate": 0.0001, + "loss": 0.0675, + "step": 16780 + }, + { + "epoch": 47.69886363636363, + "grad_norm": 0.9918054938316345, + "learning_rate": 0.0001, + "loss": 0.0667, + "step": 16790 + }, + { + "epoch": 47.72727272727273, + "grad_norm": 0.9512577056884766, + "learning_rate": 0.0001, + "loss": 0.0699, + "step": 16800 + }, + { + "epoch": 47.75568181818182, + "grad_norm": 1.1260731220245361, + "learning_rate": 0.0001, + "loss": 0.0679, + "step": 16810 + }, + { + "epoch": 47.78409090909091, + "grad_norm": 1.003982424736023, + "learning_rate": 0.0001, + "loss": 0.0644, + "step": 16820 + }, + { + "epoch": 47.8125, + "grad_norm": 0.9938884973526001, + "learning_rate": 0.0001, + "loss": 0.0677, + "step": 16830 + }, + { + "epoch": 47.84090909090909, + "grad_norm": 1.222053050994873, + "learning_rate": 0.0001, + "loss": 0.0631, + "step": 16840 + }, + { + "epoch": 47.86931818181818, + "grad_norm": 0.9429041147232056, + "learning_rate": 0.0001, + "loss": 0.0656, + "step": 16850 + }, + { + "epoch": 47.89772727272727, + "grad_norm": 1.0304569005966187, + "learning_rate": 0.0001, + "loss": 0.0632, + "step": 16860 + }, + { + "epoch": 47.92613636363637, + "grad_norm": 0.8651162981987, + "learning_rate": 0.0001, + "loss": 0.0638, + "step": 16870 + }, + { + "epoch": 47.95454545454545, + "grad_norm": 1.1727617979049683, + "learning_rate": 0.0001, + "loss": 0.0641, + "step": 16880 + }, + { + "epoch": 47.98295454545455, + "grad_norm": 0.9716474413871765, + "learning_rate": 0.0001, + "loss": 0.0672, + "step": 16890 + }, + { + "epoch": 48.01136363636363, + "grad_norm": 0.9646078944206238, + "learning_rate": 0.0001, + "loss": 0.0642, + "step": 16900 + }, + { + "epoch": 48.03977272727273, + "grad_norm": 0.7903724312782288, + "learning_rate": 0.0001, + "loss": 0.0652, + "step": 16910 + }, + { + "epoch": 48.06818181818182, + "grad_norm": 0.7010796070098877, + "learning_rate": 0.0001, + "loss": 0.065, + "step": 16920 + }, + { + "epoch": 48.09659090909091, + "grad_norm": 0.9780798554420471, + "learning_rate": 0.0001, + "loss": 0.064, + "step": 16930 + }, + { + "epoch": 48.125, + "grad_norm": 0.8578932285308838, + "learning_rate": 0.0001, + "loss": 0.065, + "step": 16940 + }, + { + "epoch": 48.15340909090909, + "grad_norm": 1.0814779996871948, + "learning_rate": 0.0001, + "loss": 0.0629, + "step": 16950 + }, + { + "epoch": 48.18181818181818, + "grad_norm": 1.1047223806381226, + "learning_rate": 0.0001, + "loss": 0.0659, + "step": 16960 + }, + { + "epoch": 48.21022727272727, + "grad_norm": 1.0128185749053955, + "learning_rate": 0.0001, + "loss": 0.07, + "step": 16970 + }, + { + "epoch": 48.23863636363637, + "grad_norm": 1.191439151763916, + "learning_rate": 0.0001, + "loss": 0.0683, + "step": 16980 + }, + { + "epoch": 48.26704545454545, + "grad_norm": 1.02851140499115, + "learning_rate": 0.0001, + "loss": 0.0646, + "step": 16990 + }, + { + "epoch": 48.29545454545455, + "grad_norm": 0.9317130446434021, + "learning_rate": 0.0001, + "loss": 0.0666, + "step": 17000 + }, + { + "epoch": 48.32386363636363, + "grad_norm": 0.8646169900894165, + "learning_rate": 0.0001, + "loss": 0.0645, + "step": 17010 + }, + { + "epoch": 48.35227272727273, + "grad_norm": 0.8524001240730286, + "learning_rate": 0.0001, + "loss": 0.0654, + "step": 17020 + }, + { + "epoch": 48.38068181818182, + "grad_norm": 0.7672250270843506, + "learning_rate": 0.0001, + "loss": 0.0676, + "step": 17030 + }, + { + "epoch": 48.40909090909091, + "grad_norm": 0.9044290781021118, + "learning_rate": 0.0001, + "loss": 0.0657, + "step": 17040 + }, + { + "epoch": 48.4375, + "grad_norm": 0.8433730602264404, + "learning_rate": 0.0001, + "loss": 0.0653, + "step": 17050 + }, + { + "epoch": 48.46590909090909, + "grad_norm": 0.7054641842842102, + "learning_rate": 0.0001, + "loss": 0.0642, + "step": 17060 + }, + { + "epoch": 48.49431818181818, + "grad_norm": 0.7396852374076843, + "learning_rate": 0.0001, + "loss": 0.0654, + "step": 17070 + }, + { + "epoch": 48.52272727272727, + "grad_norm": 0.8507287502288818, + "learning_rate": 0.0001, + "loss": 0.063, + "step": 17080 + }, + { + "epoch": 48.55113636363637, + "grad_norm": 0.7744329571723938, + "learning_rate": 0.0001, + "loss": 0.0652, + "step": 17090 + }, + { + "epoch": 48.57954545454545, + "grad_norm": 0.8510982990264893, + "learning_rate": 0.0001, + "loss": 0.0652, + "step": 17100 + }, + { + "epoch": 48.60795454545455, + "grad_norm": 0.9002107977867126, + "learning_rate": 0.0001, + "loss": 0.0652, + "step": 17110 + }, + { + "epoch": 48.63636363636363, + "grad_norm": 1.3234931230545044, + "learning_rate": 0.0001, + "loss": 0.0644, + "step": 17120 + }, + { + "epoch": 48.66477272727273, + "grad_norm": 1.2231804132461548, + "learning_rate": 0.0001, + "loss": 0.0651, + "step": 17130 + }, + { + "epoch": 48.69318181818182, + "grad_norm": 1.0435932874679565, + "learning_rate": 0.0001, + "loss": 0.063, + "step": 17140 + }, + { + "epoch": 48.72159090909091, + "grad_norm": 1.1819216012954712, + "learning_rate": 0.0001, + "loss": 0.0639, + "step": 17150 + }, + { + "epoch": 48.75, + "grad_norm": 1.3706990480422974, + "learning_rate": 0.0001, + "loss": 0.0649, + "step": 17160 + }, + { + "epoch": 48.77840909090909, + "grad_norm": 1.0659633874893188, + "learning_rate": 0.0001, + "loss": 0.0643, + "step": 17170 + }, + { + "epoch": 48.80681818181818, + "grad_norm": 1.1273925304412842, + "learning_rate": 0.0001, + "loss": 0.0647, + "step": 17180 + }, + { + "epoch": 48.83522727272727, + "grad_norm": 1.0904277563095093, + "learning_rate": 0.0001, + "loss": 0.0629, + "step": 17190 + }, + { + "epoch": 48.86363636363637, + "grad_norm": 1.4346485137939453, + "learning_rate": 0.0001, + "loss": 0.0658, + "step": 17200 + }, + { + "epoch": 48.89204545454545, + "grad_norm": 1.2752107381820679, + "learning_rate": 0.0001, + "loss": 0.0666, + "step": 17210 + }, + { + "epoch": 48.92045454545455, + "grad_norm": 1.179890751838684, + "learning_rate": 0.0001, + "loss": 0.0634, + "step": 17220 + }, + { + "epoch": 48.94886363636363, + "grad_norm": 1.0924259424209595, + "learning_rate": 0.0001, + "loss": 0.063, + "step": 17230 + }, + { + "epoch": 48.97727272727273, + "grad_norm": 1.2451024055480957, + "learning_rate": 0.0001, + "loss": 0.063, + "step": 17240 + }, + { + "epoch": 49.00568181818182, + "grad_norm": 1.29100501537323, + "learning_rate": 0.0001, + "loss": 0.0625, + "step": 17250 + }, + { + "epoch": 49.03409090909091, + "grad_norm": 1.175595998764038, + "learning_rate": 0.0001, + "loss": 0.065, + "step": 17260 + }, + { + "epoch": 49.0625, + "grad_norm": 1.084436297416687, + "learning_rate": 0.0001, + "loss": 0.0617, + "step": 17270 + }, + { + "epoch": 49.09090909090909, + "grad_norm": 1.1387672424316406, + "learning_rate": 0.0001, + "loss": 0.0597, + "step": 17280 + }, + { + "epoch": 49.11931818181818, + "grad_norm": 1.0709339380264282, + "learning_rate": 0.0001, + "loss": 0.0642, + "step": 17290 + }, + { + "epoch": 49.14772727272727, + "grad_norm": 1.1278772354125977, + "learning_rate": 0.0001, + "loss": 0.0592, + "step": 17300 + }, + { + "epoch": 49.17613636363637, + "grad_norm": 1.2357215881347656, + "learning_rate": 0.0001, + "loss": 0.0645, + "step": 17310 + }, + { + "epoch": 49.20454545454545, + "grad_norm": 1.3850127458572388, + "learning_rate": 0.0001, + "loss": 0.0643, + "step": 17320 + }, + { + "epoch": 49.23295454545455, + "grad_norm": 1.5667473077774048, + "learning_rate": 0.0001, + "loss": 0.064, + "step": 17330 + }, + { + "epoch": 49.26136363636363, + "grad_norm": 2.0329859256744385, + "learning_rate": 0.0001, + "loss": 0.0638, + "step": 17340 + }, + { + "epoch": 49.28977272727273, + "grad_norm": 1.9868841171264648, + "learning_rate": 0.0001, + "loss": 0.0619, + "step": 17350 + }, + { + "epoch": 49.31818181818182, + "grad_norm": 1.650707721710205, + "learning_rate": 0.0001, + "loss": 0.0622, + "step": 17360 + }, + { + "epoch": 49.34659090909091, + "grad_norm": 1.6646281480789185, + "learning_rate": 0.0001, + "loss": 0.0603, + "step": 17370 + }, + { + "epoch": 49.375, + "grad_norm": 1.3677852153778076, + "learning_rate": 0.0001, + "loss": 0.061, + "step": 17380 + }, + { + "epoch": 49.40340909090909, + "grad_norm": 1.33896005153656, + "learning_rate": 0.0001, + "loss": 0.0588, + "step": 17390 + }, + { + "epoch": 49.43181818181818, + "grad_norm": 1.3581352233886719, + "learning_rate": 0.0001, + "loss": 0.0617, + "step": 17400 + }, + { + "epoch": 49.46022727272727, + "grad_norm": 1.3389812707901, + "learning_rate": 0.0001, + "loss": 0.0574, + "step": 17410 + }, + { + "epoch": 49.48863636363637, + "grad_norm": 1.304565191268921, + "learning_rate": 0.0001, + "loss": 0.0628, + "step": 17420 + }, + { + "epoch": 49.51704545454545, + "grad_norm": 1.9376806020736694, + "learning_rate": 0.0001, + "loss": 0.0611, + "step": 17430 + }, + { + "epoch": 49.54545454545455, + "grad_norm": 1.8597931861877441, + "learning_rate": 0.0001, + "loss": 0.0617, + "step": 17440 + }, + { + "epoch": 49.57386363636363, + "grad_norm": 1.4572594165802002, + "learning_rate": 0.0001, + "loss": 0.0615, + "step": 17450 + }, + { + "epoch": 49.60227272727273, + "grad_norm": 1.7948307991027832, + "learning_rate": 0.0001, + "loss": 0.0604, + "step": 17460 + }, + { + "epoch": 49.63068181818182, + "grad_norm": 1.3871301412582397, + "learning_rate": 0.0001, + "loss": 0.0616, + "step": 17470 + }, + { + "epoch": 49.65909090909091, + "grad_norm": 1.322991132736206, + "learning_rate": 0.0001, + "loss": 0.0601, + "step": 17480 + }, + { + "epoch": 49.6875, + "grad_norm": 1.2961491346359253, + "learning_rate": 0.0001, + "loss": 0.0604, + "step": 17490 + }, + { + "epoch": 49.71590909090909, + "grad_norm": 1.283707618713379, + "learning_rate": 0.0001, + "loss": 0.0609, + "step": 17500 + }, + { + "epoch": 49.74431818181818, + "grad_norm": 1.142791986465454, + "learning_rate": 0.0001, + "loss": 0.0609, + "step": 17510 + }, + { + "epoch": 49.77272727272727, + "grad_norm": 0.8002589344978333, + "learning_rate": 0.0001, + "loss": 0.0618, + "step": 17520 + }, + { + "epoch": 49.80113636363637, + "grad_norm": 0.9746940732002258, + "learning_rate": 0.0001, + "loss": 0.0597, + "step": 17530 + }, + { + "epoch": 49.82954545454545, + "grad_norm": 0.965501606464386, + "learning_rate": 0.0001, + "loss": 0.0646, + "step": 17540 + }, + { + "epoch": 49.85795454545455, + "grad_norm": 1.053093671798706, + "learning_rate": 0.0001, + "loss": 0.0643, + "step": 17550 + }, + { + "epoch": 49.88636363636363, + "grad_norm": 1.2206720113754272, + "learning_rate": 0.0001, + "loss": 0.0618, + "step": 17560 + }, + { + "epoch": 49.91477272727273, + "grad_norm": 1.094285488128662, + "learning_rate": 0.0001, + "loss": 0.0606, + "step": 17570 + }, + { + "epoch": 49.94318181818182, + "grad_norm": 1.9030991792678833, + "learning_rate": 0.0001, + "loss": 0.0679, + "step": 17580 + }, + { + "epoch": 49.97159090909091, + "grad_norm": 1.452059030532837, + "learning_rate": 0.0001, + "loss": 0.0687, + "step": 17590 + }, + { + "epoch": 50.0, + "grad_norm": 1.4255893230438232, + "learning_rate": 0.0001, + "loss": 0.0641, + "step": 17600 + }, + { + "epoch": 50.02840909090909, + "grad_norm": 1.547782063484192, + "learning_rate": 0.0001, + "loss": 0.0607, + "step": 17610 + }, + { + "epoch": 50.05681818181818, + "grad_norm": 1.533228874206543, + "learning_rate": 0.0001, + "loss": 0.0623, + "step": 17620 + }, + { + "epoch": 50.08522727272727, + "grad_norm": 2.1276297569274902, + "learning_rate": 0.0001, + "loss": 0.062, + "step": 17630 + }, + { + "epoch": 50.11363636363637, + "grad_norm": 1.5320310592651367, + "learning_rate": 0.0001, + "loss": 0.059, + "step": 17640 + }, + { + "epoch": 50.14204545454545, + "grad_norm": 1.4868521690368652, + "learning_rate": 0.0001, + "loss": 0.0583, + "step": 17650 + }, + { + "epoch": 50.17045454545455, + "grad_norm": 1.3425029516220093, + "learning_rate": 0.0001, + "loss": 0.0625, + "step": 17660 + }, + { + "epoch": 50.19886363636363, + "grad_norm": 1.683226466178894, + "learning_rate": 0.0001, + "loss": 0.0624, + "step": 17670 + }, + { + "epoch": 50.22727272727273, + "grad_norm": 1.6359002590179443, + "learning_rate": 0.0001, + "loss": 0.0584, + "step": 17680 + }, + { + "epoch": 50.25568181818182, + "grad_norm": 1.1186552047729492, + "learning_rate": 0.0001, + "loss": 0.0624, + "step": 17690 + }, + { + "epoch": 50.28409090909091, + "grad_norm": 0.9917884469032288, + "learning_rate": 0.0001, + "loss": 0.0568, + "step": 17700 + }, + { + "epoch": 50.3125, + "grad_norm": 1.3089747428894043, + "learning_rate": 0.0001, + "loss": 0.0608, + "step": 17710 + }, + { + "epoch": 50.34090909090909, + "grad_norm": 1.094618320465088, + "learning_rate": 0.0001, + "loss": 0.0628, + "step": 17720 + }, + { + "epoch": 50.36931818181818, + "grad_norm": 1.0189076662063599, + "learning_rate": 0.0001, + "loss": 0.061, + "step": 17730 + }, + { + "epoch": 50.39772727272727, + "grad_norm": 1.0388256311416626, + "learning_rate": 0.0001, + "loss": 0.0642, + "step": 17740 + }, + { + "epoch": 50.42613636363637, + "grad_norm": 1.2349307537078857, + "learning_rate": 0.0001, + "loss": 0.0614, + "step": 17750 + }, + { + "epoch": 50.45454545454545, + "grad_norm": 1.1165496110916138, + "learning_rate": 0.0001, + "loss": 0.0615, + "step": 17760 + }, + { + "epoch": 50.48295454545455, + "grad_norm": 1.3187179565429688, + "learning_rate": 0.0001, + "loss": 0.064, + "step": 17770 + }, + { + "epoch": 50.51136363636363, + "grad_norm": 1.1244080066680908, + "learning_rate": 0.0001, + "loss": 0.0631, + "step": 17780 + }, + { + "epoch": 50.53977272727273, + "grad_norm": 1.1014071702957153, + "learning_rate": 0.0001, + "loss": 0.0612, + "step": 17790 + }, + { + "epoch": 50.56818181818182, + "grad_norm": 1.0804203748703003, + "learning_rate": 0.0001, + "loss": 0.0615, + "step": 17800 + }, + { + "epoch": 50.59659090909091, + "grad_norm": 1.1172409057617188, + "learning_rate": 0.0001, + "loss": 0.0583, + "step": 17810 + }, + { + "epoch": 50.625, + "grad_norm": 1.2156904935836792, + "learning_rate": 0.0001, + "loss": 0.0635, + "step": 17820 + }, + { + "epoch": 50.65340909090909, + "grad_norm": 1.0518922805786133, + "learning_rate": 0.0001, + "loss": 0.0649, + "step": 17830 + }, + { + "epoch": 50.68181818181818, + "grad_norm": 1.0572881698608398, + "learning_rate": 0.0001, + "loss": 0.0653, + "step": 17840 + }, + { + "epoch": 50.71022727272727, + "grad_norm": 0.9975048899650574, + "learning_rate": 0.0001, + "loss": 0.0637, + "step": 17850 + }, + { + "epoch": 50.73863636363637, + "grad_norm": 1.0606894493103027, + "learning_rate": 0.0001, + "loss": 0.0629, + "step": 17860 + }, + { + "epoch": 50.76704545454545, + "grad_norm": 1.1085385084152222, + "learning_rate": 0.0001, + "loss": 0.0638, + "step": 17870 + }, + { + "epoch": 50.79545454545455, + "grad_norm": 1.0079888105392456, + "learning_rate": 0.0001, + "loss": 0.0645, + "step": 17880 + }, + { + "epoch": 50.82386363636363, + "grad_norm": 0.9119659662246704, + "learning_rate": 0.0001, + "loss": 0.0657, + "step": 17890 + }, + { + "epoch": 50.85227272727273, + "grad_norm": 1.0529975891113281, + "learning_rate": 0.0001, + "loss": 0.0642, + "step": 17900 + }, + { + "epoch": 50.88068181818182, + "grad_norm": 1.101491928100586, + "learning_rate": 0.0001, + "loss": 0.0642, + "step": 17910 + }, + { + "epoch": 50.90909090909091, + "grad_norm": 1.049623966217041, + "learning_rate": 0.0001, + "loss": 0.0659, + "step": 17920 + }, + { + "epoch": 50.9375, + "grad_norm": 0.855363130569458, + "learning_rate": 0.0001, + "loss": 0.0624, + "step": 17930 + }, + { + "epoch": 50.96590909090909, + "grad_norm": 0.8307511210441589, + "learning_rate": 0.0001, + "loss": 0.0639, + "step": 17940 + }, + { + "epoch": 50.99431818181818, + "grad_norm": 0.9840141534805298, + "learning_rate": 0.0001, + "loss": 0.0638, + "step": 17950 + }, + { + "epoch": 51.02272727272727, + "grad_norm": 0.7411724328994751, + "learning_rate": 0.0001, + "loss": 0.0653, + "step": 17960 + }, + { + "epoch": 51.05113636363637, + "grad_norm": 1.1654945611953735, + "learning_rate": 0.0001, + "loss": 0.0655, + "step": 17970 + }, + { + "epoch": 51.07954545454545, + "grad_norm": 1.0566688776016235, + "learning_rate": 0.0001, + "loss": 0.0717, + "step": 17980 + }, + { + "epoch": 51.10795454545455, + "grad_norm": 1.1673790216445923, + "learning_rate": 0.0001, + "loss": 0.0642, + "step": 17990 + }, + { + "epoch": 51.13636363636363, + "grad_norm": 1.0396006107330322, + "learning_rate": 0.0001, + "loss": 0.0672, + "step": 18000 + }, + { + "epoch": 51.16477272727273, + "grad_norm": 0.9316548705101013, + "learning_rate": 0.0001, + "loss": 0.0688, + "step": 18010 + }, + { + "epoch": 51.19318181818182, + "grad_norm": 1.0011225938796997, + "learning_rate": 0.0001, + "loss": 0.0623, + "step": 18020 + }, + { + "epoch": 51.22159090909091, + "grad_norm": 1.2169394493103027, + "learning_rate": 0.0001, + "loss": 0.0668, + "step": 18030 + }, + { + "epoch": 51.25, + "grad_norm": 1.0183131694793701, + "learning_rate": 0.0001, + "loss": 0.0637, + "step": 18040 + }, + { + "epoch": 51.27840909090909, + "grad_norm": 1.3156449794769287, + "learning_rate": 0.0001, + "loss": 0.0674, + "step": 18050 + }, + { + "epoch": 51.30681818181818, + "grad_norm": 1.2456082105636597, + "learning_rate": 0.0001, + "loss": 0.0677, + "step": 18060 + }, + { + "epoch": 51.33522727272727, + "grad_norm": 1.2302768230438232, + "learning_rate": 0.0001, + "loss": 0.064, + "step": 18070 + }, + { + "epoch": 51.36363636363637, + "grad_norm": 1.3110893964767456, + "learning_rate": 0.0001, + "loss": 0.0643, + "step": 18080 + }, + { + "epoch": 51.39204545454545, + "grad_norm": 1.0676565170288086, + "learning_rate": 0.0001, + "loss": 0.0631, + "step": 18090 + }, + { + "epoch": 51.42045454545455, + "grad_norm": 0.8240692019462585, + "learning_rate": 0.0001, + "loss": 0.0613, + "step": 18100 + }, + { + "epoch": 51.44886363636363, + "grad_norm": 1.0472469329833984, + "learning_rate": 0.0001, + "loss": 0.0635, + "step": 18110 + }, + { + "epoch": 51.47727272727273, + "grad_norm": 1.0723049640655518, + "learning_rate": 0.0001, + "loss": 0.0626, + "step": 18120 + }, + { + "epoch": 51.50568181818182, + "grad_norm": 0.8317203521728516, + "learning_rate": 0.0001, + "loss": 0.0631, + "step": 18130 + }, + { + "epoch": 51.53409090909091, + "grad_norm": 0.8893545269966125, + "learning_rate": 0.0001, + "loss": 0.0642, + "step": 18140 + }, + { + "epoch": 51.5625, + "grad_norm": 1.143960952758789, + "learning_rate": 0.0001, + "loss": 0.0645, + "step": 18150 + }, + { + "epoch": 51.59090909090909, + "grad_norm": 1.1038347482681274, + "learning_rate": 0.0001, + "loss": 0.0654, + "step": 18160 + }, + { + "epoch": 51.61931818181818, + "grad_norm": 0.9441390037536621, + "learning_rate": 0.0001, + "loss": 0.0614, + "step": 18170 + }, + { + "epoch": 51.64772727272727, + "grad_norm": 1.1258492469787598, + "learning_rate": 0.0001, + "loss": 0.0609, + "step": 18180 + }, + { + "epoch": 51.67613636363637, + "grad_norm": 1.1269819736480713, + "learning_rate": 0.0001, + "loss": 0.0637, + "step": 18190 + }, + { + "epoch": 51.70454545454545, + "grad_norm": 0.8500455021858215, + "learning_rate": 0.0001, + "loss": 0.0653, + "step": 18200 + }, + { + "epoch": 51.73295454545455, + "grad_norm": 0.8912470936775208, + "learning_rate": 0.0001, + "loss": 0.0639, + "step": 18210 + }, + { + "epoch": 51.76136363636363, + "grad_norm": 1.0278549194335938, + "learning_rate": 0.0001, + "loss": 0.0603, + "step": 18220 + }, + { + "epoch": 51.78977272727273, + "grad_norm": 0.9313192367553711, + "learning_rate": 0.0001, + "loss": 0.0656, + "step": 18230 + }, + { + "epoch": 51.81818181818182, + "grad_norm": 0.8399008512496948, + "learning_rate": 0.0001, + "loss": 0.066, + "step": 18240 + }, + { + "epoch": 51.84659090909091, + "grad_norm": 0.8020362257957458, + "learning_rate": 0.0001, + "loss": 0.0647, + "step": 18250 + }, + { + "epoch": 51.875, + "grad_norm": 0.9596895575523376, + "learning_rate": 0.0001, + "loss": 0.0631, + "step": 18260 + }, + { + "epoch": 51.90340909090909, + "grad_norm": 0.8371610045433044, + "learning_rate": 0.0001, + "loss": 0.0669, + "step": 18270 + }, + { + "epoch": 51.93181818181818, + "grad_norm": 0.774612307548523, + "learning_rate": 0.0001, + "loss": 0.0637, + "step": 18280 + }, + { + "epoch": 51.96022727272727, + "grad_norm": 0.9038988351821899, + "learning_rate": 0.0001, + "loss": 0.0662, + "step": 18290 + }, + { + "epoch": 51.98863636363637, + "grad_norm": 1.150199055671692, + "learning_rate": 0.0001, + "loss": 0.0631, + "step": 18300 + }, + { + "epoch": 52.01704545454545, + "grad_norm": 1.3316466808319092, + "learning_rate": 0.0001, + "loss": 0.0621, + "step": 18310 + }, + { + "epoch": 52.04545454545455, + "grad_norm": 1.0422097444534302, + "learning_rate": 0.0001, + "loss": 0.0657, + "step": 18320 + }, + { + "epoch": 52.07386363636363, + "grad_norm": 1.0511544942855835, + "learning_rate": 0.0001, + "loss": 0.064, + "step": 18330 + }, + { + "epoch": 52.10227272727273, + "grad_norm": 0.9210748672485352, + "learning_rate": 0.0001, + "loss": 0.0624, + "step": 18340 + }, + { + "epoch": 52.13068181818182, + "grad_norm": 1.0048185586929321, + "learning_rate": 0.0001, + "loss": 0.0614, + "step": 18350 + }, + { + "epoch": 52.15909090909091, + "grad_norm": 1.0767287015914917, + "learning_rate": 0.0001, + "loss": 0.0613, + "step": 18360 + }, + { + "epoch": 52.1875, + "grad_norm": 1.0105133056640625, + "learning_rate": 0.0001, + "loss": 0.0615, + "step": 18370 + }, + { + "epoch": 52.21590909090909, + "grad_norm": 0.7021766304969788, + "learning_rate": 0.0001, + "loss": 0.068, + "step": 18380 + }, + { + "epoch": 52.24431818181818, + "grad_norm": 0.8372295498847961, + "learning_rate": 0.0001, + "loss": 0.0624, + "step": 18390 + }, + { + "epoch": 52.27272727272727, + "grad_norm": 0.7811925411224365, + "learning_rate": 0.0001, + "loss": 0.0649, + "step": 18400 + }, + { + "epoch": 52.30113636363637, + "grad_norm": 0.821349024772644, + "learning_rate": 0.0001, + "loss": 0.0654, + "step": 18410 + }, + { + "epoch": 52.32954545454545, + "grad_norm": 0.7928653359413147, + "learning_rate": 0.0001, + "loss": 0.0626, + "step": 18420 + }, + { + "epoch": 52.35795454545455, + "grad_norm": 0.8362237215042114, + "learning_rate": 0.0001, + "loss": 0.0629, + "step": 18430 + }, + { + "epoch": 52.38636363636363, + "grad_norm": 0.9375684261322021, + "learning_rate": 0.0001, + "loss": 0.0659, + "step": 18440 + }, + { + "epoch": 52.41477272727273, + "grad_norm": 0.9780309796333313, + "learning_rate": 0.0001, + "loss": 0.0631, + "step": 18450 + }, + { + "epoch": 52.44318181818182, + "grad_norm": 1.062294840812683, + "learning_rate": 0.0001, + "loss": 0.0665, + "step": 18460 + }, + { + "epoch": 52.47159090909091, + "grad_norm": 1.0660057067871094, + "learning_rate": 0.0001, + "loss": 0.065, + "step": 18470 + }, + { + "epoch": 52.5, + "grad_norm": 1.05930495262146, + "learning_rate": 0.0001, + "loss": 0.0626, + "step": 18480 + }, + { + "epoch": 52.52840909090909, + "grad_norm": 0.8140740394592285, + "learning_rate": 0.0001, + "loss": 0.0615, + "step": 18490 + }, + { + "epoch": 52.55681818181818, + "grad_norm": 1.0095051527023315, + "learning_rate": 0.0001, + "loss": 0.0605, + "step": 18500 + }, + { + "epoch": 52.58522727272727, + "grad_norm": 0.9089073538780212, + "learning_rate": 0.0001, + "loss": 0.0616, + "step": 18510 + }, + { + "epoch": 52.61363636363637, + "grad_norm": 0.8695672154426575, + "learning_rate": 0.0001, + "loss": 0.0623, + "step": 18520 + }, + { + "epoch": 52.64204545454545, + "grad_norm": 1.0662381649017334, + "learning_rate": 0.0001, + "loss": 0.0639, + "step": 18530 + }, + { + "epoch": 52.67045454545455, + "grad_norm": 1.0227075815200806, + "learning_rate": 0.0001, + "loss": 0.0632, + "step": 18540 + }, + { + "epoch": 52.69886363636363, + "grad_norm": 1.003777265548706, + "learning_rate": 0.0001, + "loss": 0.0629, + "step": 18550 + }, + { + "epoch": 52.72727272727273, + "grad_norm": 0.9030758738517761, + "learning_rate": 0.0001, + "loss": 0.0591, + "step": 18560 + }, + { + "epoch": 52.75568181818182, + "grad_norm": 1.0003148317337036, + "learning_rate": 0.0001, + "loss": 0.0632, + "step": 18570 + }, + { + "epoch": 52.78409090909091, + "grad_norm": 0.8785012364387512, + "learning_rate": 0.0001, + "loss": 0.0622, + "step": 18580 + }, + { + "epoch": 52.8125, + "grad_norm": 1.1217682361602783, + "learning_rate": 0.0001, + "loss": 0.0636, + "step": 18590 + }, + { + "epoch": 52.84090909090909, + "grad_norm": 1.0676538944244385, + "learning_rate": 0.0001, + "loss": 0.0617, + "step": 18600 + }, + { + "epoch": 52.86931818181818, + "grad_norm": 1.2667808532714844, + "learning_rate": 0.0001, + "loss": 0.0634, + "step": 18610 + }, + { + "epoch": 52.89772727272727, + "grad_norm": 1.255224347114563, + "learning_rate": 0.0001, + "loss": 0.0632, + "step": 18620 + }, + { + "epoch": 52.92613636363637, + "grad_norm": 1.0495514869689941, + "learning_rate": 0.0001, + "loss": 0.0635, + "step": 18630 + }, + { + "epoch": 52.95454545454545, + "grad_norm": 1.2018243074417114, + "learning_rate": 0.0001, + "loss": 0.066, + "step": 18640 + }, + { + "epoch": 52.98295454545455, + "grad_norm": 1.1866649389266968, + "learning_rate": 0.0001, + "loss": 0.0592, + "step": 18650 + }, + { + "epoch": 53.01136363636363, + "grad_norm": 1.3282562494277954, + "learning_rate": 0.0001, + "loss": 0.0644, + "step": 18660 + }, + { + "epoch": 53.03977272727273, + "grad_norm": 1.1297610998153687, + "learning_rate": 0.0001, + "loss": 0.0636, + "step": 18670 + }, + { + "epoch": 53.06818181818182, + "grad_norm": 1.1281236410140991, + "learning_rate": 0.0001, + "loss": 0.0598, + "step": 18680 + }, + { + "epoch": 53.09659090909091, + "grad_norm": 1.2281813621520996, + "learning_rate": 0.0001, + "loss": 0.0605, + "step": 18690 + }, + { + "epoch": 53.125, + "grad_norm": 1.1154251098632812, + "learning_rate": 0.0001, + "loss": 0.0613, + "step": 18700 + }, + { + "epoch": 53.15340909090909, + "grad_norm": 0.9964898824691772, + "learning_rate": 0.0001, + "loss": 0.0616, + "step": 18710 + }, + { + "epoch": 53.18181818181818, + "grad_norm": 1.0880987644195557, + "learning_rate": 0.0001, + "loss": 0.0582, + "step": 18720 + }, + { + "epoch": 53.21022727272727, + "grad_norm": 1.1195552349090576, + "learning_rate": 0.0001, + "loss": 0.0612, + "step": 18730 + }, + { + "epoch": 53.23863636363637, + "grad_norm": 1.1419169902801514, + "learning_rate": 0.0001, + "loss": 0.0616, + "step": 18740 + }, + { + "epoch": 53.26704545454545, + "grad_norm": 0.9695098400115967, + "learning_rate": 0.0001, + "loss": 0.0617, + "step": 18750 + }, + { + "epoch": 53.29545454545455, + "grad_norm": 1.0621428489685059, + "learning_rate": 0.0001, + "loss": 0.0632, + "step": 18760 + }, + { + "epoch": 53.32386363636363, + "grad_norm": 1.047326922416687, + "learning_rate": 0.0001, + "loss": 0.0603, + "step": 18770 + }, + { + "epoch": 53.35227272727273, + "grad_norm": 1.062296748161316, + "learning_rate": 0.0001, + "loss": 0.0639, + "step": 18780 + }, + { + "epoch": 53.38068181818182, + "grad_norm": 1.134915828704834, + "learning_rate": 0.0001, + "loss": 0.0619, + "step": 18790 + }, + { + "epoch": 53.40909090909091, + "grad_norm": 1.040932536125183, + "learning_rate": 0.0001, + "loss": 0.0629, + "step": 18800 + }, + { + "epoch": 53.4375, + "grad_norm": 1.0221163034439087, + "learning_rate": 0.0001, + "loss": 0.0603, + "step": 18810 + }, + { + "epoch": 53.46590909090909, + "grad_norm": 0.9637789726257324, + "learning_rate": 0.0001, + "loss": 0.0614, + "step": 18820 + }, + { + "epoch": 53.49431818181818, + "grad_norm": 1.0450648069381714, + "learning_rate": 0.0001, + "loss": 0.06, + "step": 18830 + }, + { + "epoch": 53.52272727272727, + "grad_norm": 0.9436495900154114, + "learning_rate": 0.0001, + "loss": 0.0622, + "step": 18840 + }, + { + "epoch": 53.55113636363637, + "grad_norm": 1.0419658422470093, + "learning_rate": 0.0001, + "loss": 0.0617, + "step": 18850 + }, + { + "epoch": 53.57954545454545, + "grad_norm": 0.8948044180870056, + "learning_rate": 0.0001, + "loss": 0.0604, + "step": 18860 + }, + { + "epoch": 53.60795454545455, + "grad_norm": 1.0394561290740967, + "learning_rate": 0.0001, + "loss": 0.0623, + "step": 18870 + }, + { + "epoch": 53.63636363636363, + "grad_norm": 1.00216543674469, + "learning_rate": 0.0001, + "loss": 0.0597, + "step": 18880 + }, + { + "epoch": 53.66477272727273, + "grad_norm": 0.7522396445274353, + "learning_rate": 0.0001, + "loss": 0.0618, + "step": 18890 + }, + { + "epoch": 53.69318181818182, + "grad_norm": 1.162223219871521, + "learning_rate": 0.0001, + "loss": 0.063, + "step": 18900 + }, + { + "epoch": 53.72159090909091, + "grad_norm": 1.0378843545913696, + "learning_rate": 0.0001, + "loss": 0.0645, + "step": 18910 + }, + { + "epoch": 53.75, + "grad_norm": 0.890440821647644, + "learning_rate": 0.0001, + "loss": 0.0615, + "step": 18920 + }, + { + "epoch": 53.77840909090909, + "grad_norm": 0.825039803981781, + "learning_rate": 0.0001, + "loss": 0.0611, + "step": 18930 + }, + { + "epoch": 53.80681818181818, + "grad_norm": 0.8410844802856445, + "learning_rate": 0.0001, + "loss": 0.0608, + "step": 18940 + }, + { + "epoch": 53.83522727272727, + "grad_norm": 1.1039787530899048, + "learning_rate": 0.0001, + "loss": 0.0653, + "step": 18950 + }, + { + "epoch": 53.86363636363637, + "grad_norm": 0.9757326245307922, + "learning_rate": 0.0001, + "loss": 0.0616, + "step": 18960 + }, + { + "epoch": 53.89204545454545, + "grad_norm": 0.7968357801437378, + "learning_rate": 0.0001, + "loss": 0.0639, + "step": 18970 + }, + { + "epoch": 53.92045454545455, + "grad_norm": 0.9554797410964966, + "learning_rate": 0.0001, + "loss": 0.0605, + "step": 18980 + }, + { + "epoch": 53.94886363636363, + "grad_norm": 0.8579493165016174, + "learning_rate": 0.0001, + "loss": 0.0584, + "step": 18990 + }, + { + "epoch": 53.97727272727273, + "grad_norm": 0.841977059841156, + "learning_rate": 0.0001, + "loss": 0.0638, + "step": 19000 + }, + { + "epoch": 54.00568181818182, + "grad_norm": 0.7986201643943787, + "learning_rate": 0.0001, + "loss": 0.0593, + "step": 19010 + }, + { + "epoch": 54.03409090909091, + "grad_norm": 1.3590857982635498, + "learning_rate": 0.0001, + "loss": 0.0612, + "step": 19020 + }, + { + "epoch": 54.0625, + "grad_norm": 0.7293931841850281, + "learning_rate": 0.0001, + "loss": 0.0606, + "step": 19030 + }, + { + "epoch": 54.09090909090909, + "grad_norm": 0.8448790907859802, + "learning_rate": 0.0001, + "loss": 0.0649, + "step": 19040 + }, + { + "epoch": 54.11931818181818, + "grad_norm": 0.9543153047561646, + "learning_rate": 0.0001, + "loss": 0.0627, + "step": 19050 + }, + { + "epoch": 54.14772727272727, + "grad_norm": 0.859277606010437, + "learning_rate": 0.0001, + "loss": 0.0657, + "step": 19060 + }, + { + "epoch": 54.17613636363637, + "grad_norm": 0.9446835517883301, + "learning_rate": 0.0001, + "loss": 0.0598, + "step": 19070 + }, + { + "epoch": 54.20454545454545, + "grad_norm": 0.9382405281066895, + "learning_rate": 0.0001, + "loss": 0.0624, + "step": 19080 + }, + { + "epoch": 54.23295454545455, + "grad_norm": 1.0334746837615967, + "learning_rate": 0.0001, + "loss": 0.0619, + "step": 19090 + }, + { + "epoch": 54.26136363636363, + "grad_norm": 1.0028332471847534, + "learning_rate": 0.0001, + "loss": 0.0611, + "step": 19100 + }, + { + "epoch": 54.28977272727273, + "grad_norm": 1.056213140487671, + "learning_rate": 0.0001, + "loss": 0.0626, + "step": 19110 + }, + { + "epoch": 54.31818181818182, + "grad_norm": 0.9702300429344177, + "learning_rate": 0.0001, + "loss": 0.0677, + "step": 19120 + }, + { + "epoch": 54.34659090909091, + "grad_norm": 1.210434079170227, + "learning_rate": 0.0001, + "loss": 0.0629, + "step": 19130 + }, + { + "epoch": 54.375, + "grad_norm": 1.048459529876709, + "learning_rate": 0.0001, + "loss": 0.0628, + "step": 19140 + }, + { + "epoch": 54.40340909090909, + "grad_norm": 0.9593278765678406, + "learning_rate": 0.0001, + "loss": 0.0605, + "step": 19150 + }, + { + "epoch": 54.43181818181818, + "grad_norm": 0.7503321170806885, + "learning_rate": 0.0001, + "loss": 0.0589, + "step": 19160 + }, + { + "epoch": 54.46022727272727, + "grad_norm": 0.8162446618080139, + "learning_rate": 0.0001, + "loss": 0.0623, + "step": 19170 + }, + { + "epoch": 54.48863636363637, + "grad_norm": 0.619260311126709, + "learning_rate": 0.0001, + "loss": 0.0598, + "step": 19180 + }, + { + "epoch": 54.51704545454545, + "grad_norm": 0.6872047781944275, + "learning_rate": 0.0001, + "loss": 0.0646, + "step": 19190 + }, + { + "epoch": 54.54545454545455, + "grad_norm": 0.9017942547798157, + "learning_rate": 0.0001, + "loss": 0.0642, + "step": 19200 + }, + { + "epoch": 54.57386363636363, + "grad_norm": 0.9055486917495728, + "learning_rate": 0.0001, + "loss": 0.0615, + "step": 19210 + }, + { + "epoch": 54.60227272727273, + "grad_norm": 0.9028745889663696, + "learning_rate": 0.0001, + "loss": 0.0607, + "step": 19220 + }, + { + "epoch": 54.63068181818182, + "grad_norm": 0.937928318977356, + "learning_rate": 0.0001, + "loss": 0.0618, + "step": 19230 + }, + { + "epoch": 54.65909090909091, + "grad_norm": 0.9727193713188171, + "learning_rate": 0.0001, + "loss": 0.0616, + "step": 19240 + }, + { + "epoch": 54.6875, + "grad_norm": 0.8907310366630554, + "learning_rate": 0.0001, + "loss": 0.0606, + "step": 19250 + }, + { + "epoch": 54.71590909090909, + "grad_norm": 0.8963820934295654, + "learning_rate": 0.0001, + "loss": 0.0588, + "step": 19260 + }, + { + "epoch": 54.74431818181818, + "grad_norm": 0.966294527053833, + "learning_rate": 0.0001, + "loss": 0.0604, + "step": 19270 + }, + { + "epoch": 54.77272727272727, + "grad_norm": 0.8197779059410095, + "learning_rate": 0.0001, + "loss": 0.0632, + "step": 19280 + }, + { + "epoch": 54.80113636363637, + "grad_norm": 0.9076131582260132, + "learning_rate": 0.0001, + "loss": 0.063, + "step": 19290 + }, + { + "epoch": 54.82954545454545, + "grad_norm": 1.0960373878479004, + "learning_rate": 0.0001, + "loss": 0.0629, + "step": 19300 + }, + { + "epoch": 54.85795454545455, + "grad_norm": 1.0276180505752563, + "learning_rate": 0.0001, + "loss": 0.0614, + "step": 19310 + }, + { + "epoch": 54.88636363636363, + "grad_norm": 0.8789876699447632, + "learning_rate": 0.0001, + "loss": 0.0603, + "step": 19320 + }, + { + "epoch": 54.91477272727273, + "grad_norm": 0.9199723601341248, + "learning_rate": 0.0001, + "loss": 0.0596, + "step": 19330 + }, + { + "epoch": 54.94318181818182, + "grad_norm": 0.8767564296722412, + "learning_rate": 0.0001, + "loss": 0.061, + "step": 19340 + }, + { + "epoch": 54.97159090909091, + "grad_norm": 0.8243430256843567, + "learning_rate": 0.0001, + "loss": 0.0598, + "step": 19350 + }, + { + "epoch": 55.0, + "grad_norm": 0.8050703406333923, + "learning_rate": 0.0001, + "loss": 0.0614, + "step": 19360 + }, + { + "epoch": 55.02840909090909, + "grad_norm": 0.7457884550094604, + "learning_rate": 0.0001, + "loss": 0.0591, + "step": 19370 + }, + { + "epoch": 55.05681818181818, + "grad_norm": 0.7916040420532227, + "learning_rate": 0.0001, + "loss": 0.0608, + "step": 19380 + }, + { + "epoch": 55.08522727272727, + "grad_norm": 1.00780189037323, + "learning_rate": 0.0001, + "loss": 0.0611, + "step": 19390 + }, + { + "epoch": 55.11363636363637, + "grad_norm": 1.1122857332229614, + "learning_rate": 0.0001, + "loss": 0.0638, + "step": 19400 + }, + { + "epoch": 55.14204545454545, + "grad_norm": 1.1893644332885742, + "learning_rate": 0.0001, + "loss": 0.0606, + "step": 19410 + }, + { + "epoch": 55.17045454545455, + "grad_norm": 1.3157379627227783, + "learning_rate": 0.0001, + "loss": 0.0601, + "step": 19420 + }, + { + "epoch": 55.19886363636363, + "grad_norm": 1.305027961730957, + "learning_rate": 0.0001, + "loss": 0.063, + "step": 19430 + }, + { + "epoch": 55.22727272727273, + "grad_norm": 1.2850711345672607, + "learning_rate": 0.0001, + "loss": 0.0601, + "step": 19440 + }, + { + "epoch": 55.25568181818182, + "grad_norm": 1.2181235551834106, + "learning_rate": 0.0001, + "loss": 0.0606, + "step": 19450 + }, + { + "epoch": 55.28409090909091, + "grad_norm": 1.2655651569366455, + "learning_rate": 0.0001, + "loss": 0.0586, + "step": 19460 + }, + { + "epoch": 55.3125, + "grad_norm": 1.0834294557571411, + "learning_rate": 0.0001, + "loss": 0.0592, + "step": 19470 + }, + { + "epoch": 55.34090909090909, + "grad_norm": 0.8892400860786438, + "learning_rate": 0.0001, + "loss": 0.0594, + "step": 19480 + }, + { + "epoch": 55.36931818181818, + "grad_norm": 1.0693202018737793, + "learning_rate": 0.0001, + "loss": 0.0631, + "step": 19490 + }, + { + "epoch": 55.39772727272727, + "grad_norm": 0.9103065729141235, + "learning_rate": 0.0001, + "loss": 0.0599, + "step": 19500 + }, + { + "epoch": 55.42613636363637, + "grad_norm": 0.9410889744758606, + "learning_rate": 0.0001, + "loss": 0.057, + "step": 19510 + }, + { + "epoch": 55.45454545454545, + "grad_norm": 0.9349491000175476, + "learning_rate": 0.0001, + "loss": 0.0617, + "step": 19520 + }, + { + "epoch": 55.48295454545455, + "grad_norm": 0.7211048007011414, + "learning_rate": 0.0001, + "loss": 0.0623, + "step": 19530 + }, + { + "epoch": 55.51136363636363, + "grad_norm": 1.076320767402649, + "learning_rate": 0.0001, + "loss": 0.063, + "step": 19540 + }, + { + "epoch": 55.53977272727273, + "grad_norm": 1.0220001935958862, + "learning_rate": 0.0001, + "loss": 0.0641, + "step": 19550 + }, + { + "epoch": 55.56818181818182, + "grad_norm": 1.0938565731048584, + "learning_rate": 0.0001, + "loss": 0.0648, + "step": 19560 + }, + { + "epoch": 55.59659090909091, + "grad_norm": 1.207349181175232, + "learning_rate": 0.0001, + "loss": 0.0618, + "step": 19570 + }, + { + "epoch": 55.625, + "grad_norm": 0.8213934302330017, + "learning_rate": 0.0001, + "loss": 0.0618, + "step": 19580 + }, + { + "epoch": 55.65340909090909, + "grad_norm": 0.9144793748855591, + "learning_rate": 0.0001, + "loss": 0.0611, + "step": 19590 + }, + { + "epoch": 55.68181818181818, + "grad_norm": 0.8391266465187073, + "learning_rate": 0.0001, + "loss": 0.0593, + "step": 19600 + }, + { + "epoch": 55.71022727272727, + "grad_norm": 0.8670185804367065, + "learning_rate": 0.0001, + "loss": 0.0612, + "step": 19610 + }, + { + "epoch": 55.73863636363637, + "grad_norm": 0.909506618976593, + "learning_rate": 0.0001, + "loss": 0.0622, + "step": 19620 + }, + { + "epoch": 55.76704545454545, + "grad_norm": 0.746269166469574, + "learning_rate": 0.0001, + "loss": 0.0613, + "step": 19630 + }, + { + "epoch": 55.79545454545455, + "grad_norm": 0.6903102993965149, + "learning_rate": 0.0001, + "loss": 0.0604, + "step": 19640 + }, + { + "epoch": 55.82386363636363, + "grad_norm": 0.7787826061248779, + "learning_rate": 0.0001, + "loss": 0.0617, + "step": 19650 + }, + { + "epoch": 55.85227272727273, + "grad_norm": 0.8575695753097534, + "learning_rate": 0.0001, + "loss": 0.061, + "step": 19660 + }, + { + "epoch": 55.88068181818182, + "grad_norm": 1.386139154434204, + "learning_rate": 0.0001, + "loss": 0.0617, + "step": 19670 + }, + { + "epoch": 55.90909090909091, + "grad_norm": 1.4138883352279663, + "learning_rate": 0.0001, + "loss": 0.0616, + "step": 19680 + }, + { + "epoch": 55.9375, + "grad_norm": 1.166518211364746, + "learning_rate": 0.0001, + "loss": 0.0607, + "step": 19690 + }, + { + "epoch": 55.96590909090909, + "grad_norm": 1.1394963264465332, + "learning_rate": 0.0001, + "loss": 0.0603, + "step": 19700 + }, + { + "epoch": 55.99431818181818, + "grad_norm": 1.0092238187789917, + "learning_rate": 0.0001, + "loss": 0.0604, + "step": 19710 + }, + { + "epoch": 56.02272727272727, + "grad_norm": 1.2652308940887451, + "learning_rate": 0.0001, + "loss": 0.0618, + "step": 19720 + }, + { + "epoch": 56.05113636363637, + "grad_norm": 1.0221445560455322, + "learning_rate": 0.0001, + "loss": 0.0605, + "step": 19730 + }, + { + "epoch": 56.07954545454545, + "grad_norm": 1.1227253675460815, + "learning_rate": 0.0001, + "loss": 0.0593, + "step": 19740 + }, + { + "epoch": 56.10795454545455, + "grad_norm": 1.105058193206787, + "learning_rate": 0.0001, + "loss": 0.0584, + "step": 19750 + }, + { + "epoch": 56.13636363636363, + "grad_norm": 0.9531204104423523, + "learning_rate": 0.0001, + "loss": 0.0614, + "step": 19760 + }, + { + "epoch": 56.16477272727273, + "grad_norm": 0.7669575214385986, + "learning_rate": 0.0001, + "loss": 0.0583, + "step": 19770 + }, + { + "epoch": 56.19318181818182, + "grad_norm": 1.0532715320587158, + "learning_rate": 0.0001, + "loss": 0.0572, + "step": 19780 + }, + { + "epoch": 56.22159090909091, + "grad_norm": 1.0322656631469727, + "learning_rate": 0.0001, + "loss": 0.0596, + "step": 19790 + }, + { + "epoch": 56.25, + "grad_norm": 1.2287739515304565, + "learning_rate": 0.0001, + "loss": 0.0567, + "step": 19800 + }, + { + "epoch": 56.27840909090909, + "grad_norm": 1.2664307355880737, + "learning_rate": 0.0001, + "loss": 0.0575, + "step": 19810 + }, + { + "epoch": 56.30681818181818, + "grad_norm": 1.059367060661316, + "learning_rate": 0.0001, + "loss": 0.0595, + "step": 19820 + }, + { + "epoch": 56.33522727272727, + "grad_norm": 0.7184119820594788, + "learning_rate": 0.0001, + "loss": 0.0607, + "step": 19830 + }, + { + "epoch": 56.36363636363637, + "grad_norm": 0.996780276298523, + "learning_rate": 0.0001, + "loss": 0.0594, + "step": 19840 + }, + { + "epoch": 56.39204545454545, + "grad_norm": 0.8815504312515259, + "learning_rate": 0.0001, + "loss": 0.0583, + "step": 19850 + }, + { + "epoch": 56.42045454545455, + "grad_norm": 1.0592563152313232, + "learning_rate": 0.0001, + "loss": 0.0595, + "step": 19860 + }, + { + "epoch": 56.44886363636363, + "grad_norm": 0.7666848301887512, + "learning_rate": 0.0001, + "loss": 0.0588, + "step": 19870 + }, + { + "epoch": 56.47727272727273, + "grad_norm": 0.7527984976768494, + "learning_rate": 0.0001, + "loss": 0.061, + "step": 19880 + }, + { + "epoch": 56.50568181818182, + "grad_norm": 1.3445789813995361, + "learning_rate": 0.0001, + "loss": 0.0625, + "step": 19890 + }, + { + "epoch": 56.53409090909091, + "grad_norm": 1.4454388618469238, + "learning_rate": 0.0001, + "loss": 0.0613, + "step": 19900 + }, + { + "epoch": 56.5625, + "grad_norm": 1.112709641456604, + "learning_rate": 0.0001, + "loss": 0.0611, + "step": 19910 + }, + { + "epoch": 56.59090909090909, + "grad_norm": 1.1458951234817505, + "learning_rate": 0.0001, + "loss": 0.0586, + "step": 19920 + }, + { + "epoch": 56.61931818181818, + "grad_norm": 1.526114821434021, + "learning_rate": 0.0001, + "loss": 0.059, + "step": 19930 + }, + { + "epoch": 56.64772727272727, + "grad_norm": 1.657968282699585, + "learning_rate": 0.0001, + "loss": 0.0615, + "step": 19940 + }, + { + "epoch": 56.67613636363637, + "grad_norm": 1.5407483577728271, + "learning_rate": 0.0001, + "loss": 0.0566, + "step": 19950 + }, + { + "epoch": 56.70454545454545, + "grad_norm": 1.186934232711792, + "learning_rate": 0.0001, + "loss": 0.0583, + "step": 19960 + }, + { + "epoch": 56.73295454545455, + "grad_norm": 0.9510246515274048, + "learning_rate": 0.0001, + "loss": 0.0554, + "step": 19970 + }, + { + "epoch": 56.76136363636363, + "grad_norm": 0.8539232611656189, + "learning_rate": 0.0001, + "loss": 0.0566, + "step": 19980 + }, + { + "epoch": 56.78977272727273, + "grad_norm": 1.1108481884002686, + "learning_rate": 0.0001, + "loss": 0.0539, + "step": 19990 + }, + { + "epoch": 56.81818181818182, + "grad_norm": 1.3073898553848267, + "learning_rate": 0.0001, + "loss": 0.0571, + "step": 20000 + }, + { + "epoch": 56.84659090909091, + "grad_norm": 1.0064797401428223, + "learning_rate": 0.0001, + "loss": 0.0569, + "step": 20010 + }, + { + "epoch": 56.875, + "grad_norm": 1.1430519819259644, + "learning_rate": 0.0001, + "loss": 0.058, + "step": 20020 + }, + { + "epoch": 56.90340909090909, + "grad_norm": 1.2745853662490845, + "learning_rate": 0.0001, + "loss": 0.0581, + "step": 20030 + }, + { + "epoch": 56.93181818181818, + "grad_norm": 1.700903296470642, + "learning_rate": 0.0001, + "loss": 0.0655, + "step": 20040 + }, + { + "epoch": 56.96022727272727, + "grad_norm": 1.9346566200256348, + "learning_rate": 0.0001, + "loss": 0.0595, + "step": 20050 + }, + { + "epoch": 56.98863636363637, + "grad_norm": 1.8350406885147095, + "learning_rate": 0.0001, + "loss": 0.058, + "step": 20060 + }, + { + "epoch": 57.01704545454545, + "grad_norm": 1.5323282480239868, + "learning_rate": 0.0001, + "loss": 0.0602, + "step": 20070 + }, + { + "epoch": 57.04545454545455, + "grad_norm": 1.794053077697754, + "learning_rate": 0.0001, + "loss": 0.0552, + "step": 20080 + }, + { + "epoch": 57.07386363636363, + "grad_norm": 2.6057610511779785, + "learning_rate": 0.0001, + "loss": 0.0596, + "step": 20090 + }, + { + "epoch": 57.10227272727273, + "grad_norm": 1.8744699954986572, + "learning_rate": 0.0001, + "loss": 0.0558, + "step": 20100 + }, + { + "epoch": 57.13068181818182, + "grad_norm": 2.1725265979766846, + "learning_rate": 0.0001, + "loss": 0.0573, + "step": 20110 + }, + { + "epoch": 57.15909090909091, + "grad_norm": 1.8657861948013306, + "learning_rate": 0.0001, + "loss": 0.0546, + "step": 20120 + }, + { + "epoch": 57.1875, + "grad_norm": 4.145320415496826, + "learning_rate": 0.0001, + "loss": 0.0647, + "step": 20130 + }, + { + "epoch": 57.21590909090909, + "grad_norm": 3.5790581703186035, + "learning_rate": 0.0001, + "loss": 0.0546, + "step": 20140 + }, + { + "epoch": 57.24431818181818, + "grad_norm": 1.8389453887939453, + "learning_rate": 0.0001, + "loss": 0.0529, + "step": 20150 + }, + { + "epoch": 57.27272727272727, + "grad_norm": 1.848724603652954, + "learning_rate": 0.0001, + "loss": 0.0652, + "step": 20160 + }, + { + "epoch": 57.30113636363637, + "grad_norm": 2.9280498027801514, + "learning_rate": 0.0001, + "loss": 0.0571, + "step": 20170 + }, + { + "epoch": 57.32954545454545, + "grad_norm": 3.1636435985565186, + "learning_rate": 0.0001, + "loss": 0.0539, + "step": 20180 + }, + { + "epoch": 57.35795454545455, + "grad_norm": 1.8078699111938477, + "learning_rate": 0.0001, + "loss": 0.0524, + "step": 20190 + }, + { + "epoch": 57.38636363636363, + "grad_norm": 2.467681884765625, + "learning_rate": 0.0001, + "loss": 0.0533, + "step": 20200 + }, + { + "epoch": 57.41477272727273, + "grad_norm": 1.868058204650879, + "learning_rate": 0.0001, + "loss": 0.054, + "step": 20210 + }, + { + "epoch": 57.44318181818182, + "grad_norm": 1.2513569593429565, + "learning_rate": 0.0001, + "loss": 0.0498, + "step": 20220 + }, + { + "epoch": 57.47159090909091, + "grad_norm": 1.9683401584625244, + "learning_rate": 0.0001, + "loss": 0.0507, + "step": 20230 + }, + { + "epoch": 57.5, + "grad_norm": 1.4425514936447144, + "learning_rate": 0.0001, + "loss": 0.0527, + "step": 20240 + }, + { + "epoch": 57.52840909090909, + "grad_norm": 1.2349433898925781, + "learning_rate": 0.0001, + "loss": 0.058, + "step": 20250 + }, + { + "epoch": 57.55681818181818, + "grad_norm": 1.2428669929504395, + "learning_rate": 0.0001, + "loss": 0.0541, + "step": 20260 + }, + { + "epoch": 57.58522727272727, + "grad_norm": 1.4915668964385986, + "learning_rate": 0.0001, + "loss": 0.0558, + "step": 20270 + }, + { + "epoch": 57.61363636363637, + "grad_norm": 1.5012083053588867, + "learning_rate": 0.0001, + "loss": 0.0564, + "step": 20280 + }, + { + "epoch": 57.64204545454545, + "grad_norm": 1.287113070487976, + "learning_rate": 0.0001, + "loss": 0.0545, + "step": 20290 + }, + { + "epoch": 57.67045454545455, + "grad_norm": 1.545423984527588, + "learning_rate": 0.0001, + "loss": 0.0558, + "step": 20300 + }, + { + "epoch": 57.69886363636363, + "grad_norm": 0.989124596118927, + "learning_rate": 0.0001, + "loss": 0.0579, + "step": 20310 + }, + { + "epoch": 57.72727272727273, + "grad_norm": 1.1454150676727295, + "learning_rate": 0.0001, + "loss": 0.0562, + "step": 20320 + }, + { + "epoch": 57.75568181818182, + "grad_norm": 1.7752538919448853, + "learning_rate": 0.0001, + "loss": 0.0583, + "step": 20330 + }, + { + "epoch": 57.78409090909091, + "grad_norm": 1.3004405498504639, + "learning_rate": 0.0001, + "loss": 0.0559, + "step": 20340 + }, + { + "epoch": 57.8125, + "grad_norm": 1.165421962738037, + "learning_rate": 0.0001, + "loss": 0.0554, + "step": 20350 + }, + { + "epoch": 57.84090909090909, + "grad_norm": 1.3419017791748047, + "learning_rate": 0.0001, + "loss": 0.0553, + "step": 20360 + }, + { + "epoch": 57.86931818181818, + "grad_norm": 1.2583837509155273, + "learning_rate": 0.0001, + "loss": 0.0574, + "step": 20370 + }, + { + "epoch": 57.89772727272727, + "grad_norm": 1.1867432594299316, + "learning_rate": 0.0001, + "loss": 0.0551, + "step": 20380 + }, + { + "epoch": 57.92613636363637, + "grad_norm": 1.191956639289856, + "learning_rate": 0.0001, + "loss": 0.0549, + "step": 20390 + }, + { + "epoch": 57.95454545454545, + "grad_norm": 1.0556656122207642, + "learning_rate": 0.0001, + "loss": 0.0575, + "step": 20400 + }, + { + "epoch": 57.98295454545455, + "grad_norm": 1.2158259153366089, + "learning_rate": 0.0001, + "loss": 0.0547, + "step": 20410 + }, + { + "epoch": 58.01136363636363, + "grad_norm": 0.9880191087722778, + "learning_rate": 0.0001, + "loss": 0.0576, + "step": 20420 + }, + { + "epoch": 58.03977272727273, + "grad_norm": 1.150399923324585, + "learning_rate": 0.0001, + "loss": 0.0589, + "step": 20430 + }, + { + "epoch": 58.06818181818182, + "grad_norm": 0.9008199572563171, + "learning_rate": 0.0001, + "loss": 0.0567, + "step": 20440 + }, + { + "epoch": 58.09659090909091, + "grad_norm": 0.8457942008972168, + "learning_rate": 0.0001, + "loss": 0.0575, + "step": 20450 + }, + { + "epoch": 58.125, + "grad_norm": 0.8913554549217224, + "learning_rate": 0.0001, + "loss": 0.0566, + "step": 20460 + }, + { + "epoch": 58.15340909090909, + "grad_norm": 1.0136390924453735, + "learning_rate": 0.0001, + "loss": 0.0612, + "step": 20470 + }, + { + "epoch": 58.18181818181818, + "grad_norm": 1.0893242359161377, + "learning_rate": 0.0001, + "loss": 0.0602, + "step": 20480 + }, + { + "epoch": 58.21022727272727, + "grad_norm": 0.8883498311042786, + "learning_rate": 0.0001, + "loss": 0.0627, + "step": 20490 + }, + { + "epoch": 58.23863636363637, + "grad_norm": 0.7249606251716614, + "learning_rate": 0.0001, + "loss": 0.0637, + "step": 20500 + }, + { + "epoch": 58.26704545454545, + "grad_norm": 0.83709716796875, + "learning_rate": 0.0001, + "loss": 0.0623, + "step": 20510 + }, + { + "epoch": 58.29545454545455, + "grad_norm": 0.8498445749282837, + "learning_rate": 0.0001, + "loss": 0.0604, + "step": 20520 + }, + { + "epoch": 58.32386363636363, + "grad_norm": 0.8652094602584839, + "learning_rate": 0.0001, + "loss": 0.0581, + "step": 20530 + }, + { + "epoch": 58.35227272727273, + "grad_norm": 0.933368444442749, + "learning_rate": 0.0001, + "loss": 0.0607, + "step": 20540 + }, + { + "epoch": 58.38068181818182, + "grad_norm": 1.022032380104065, + "learning_rate": 0.0001, + "loss": 0.0581, + "step": 20550 + }, + { + "epoch": 58.40909090909091, + "grad_norm": 0.884529173374176, + "learning_rate": 0.0001, + "loss": 0.0584, + "step": 20560 + }, + { + "epoch": 58.4375, + "grad_norm": 0.7771308422088623, + "learning_rate": 0.0001, + "loss": 0.0612, + "step": 20570 + }, + { + "epoch": 58.46590909090909, + "grad_norm": 1.055200457572937, + "learning_rate": 0.0001, + "loss": 0.0599, + "step": 20580 + }, + { + "epoch": 58.49431818181818, + "grad_norm": 0.9750531315803528, + "learning_rate": 0.0001, + "loss": 0.0598, + "step": 20590 + }, + { + "epoch": 58.52272727272727, + "grad_norm": 0.9744542241096497, + "learning_rate": 0.0001, + "loss": 0.0627, + "step": 20600 + }, + { + "epoch": 58.55113636363637, + "grad_norm": 1.1760064363479614, + "learning_rate": 0.0001, + "loss": 0.0616, + "step": 20610 + }, + { + "epoch": 58.57954545454545, + "grad_norm": 1.0370769500732422, + "learning_rate": 0.0001, + "loss": 0.0611, + "step": 20620 + }, + { + "epoch": 58.60795454545455, + "grad_norm": 1.3704447746276855, + "learning_rate": 0.0001, + "loss": 0.0599, + "step": 20630 + }, + { + "epoch": 58.63636363636363, + "grad_norm": 1.098480463027954, + "learning_rate": 0.0001, + "loss": 0.0622, + "step": 20640 + }, + { + "epoch": 58.66477272727273, + "grad_norm": 1.4485833644866943, + "learning_rate": 0.0001, + "loss": 0.0605, + "step": 20650 + }, + { + "epoch": 58.69318181818182, + "grad_norm": 1.1087062358856201, + "learning_rate": 0.0001, + "loss": 0.0618, + "step": 20660 + }, + { + "epoch": 58.72159090909091, + "grad_norm": 1.0735747814178467, + "learning_rate": 0.0001, + "loss": 0.0629, + "step": 20670 + }, + { + "epoch": 58.75, + "grad_norm": 1.2116317749023438, + "learning_rate": 0.0001, + "loss": 0.0578, + "step": 20680 + }, + { + "epoch": 58.77840909090909, + "grad_norm": 0.8660669922828674, + "learning_rate": 0.0001, + "loss": 0.0605, + "step": 20690 + }, + { + "epoch": 58.80681818181818, + "grad_norm": 0.9465160369873047, + "learning_rate": 0.0001, + "loss": 0.0602, + "step": 20700 + }, + { + "epoch": 58.83522727272727, + "grad_norm": 1.117857575416565, + "learning_rate": 0.0001, + "loss": 0.0577, + "step": 20710 + }, + { + "epoch": 58.86363636363637, + "grad_norm": 1.4936555624008179, + "learning_rate": 0.0001, + "loss": 0.064, + "step": 20720 + }, + { + "epoch": 58.89204545454545, + "grad_norm": 1.6041721105575562, + "learning_rate": 0.0001, + "loss": 0.0621, + "step": 20730 + }, + { + "epoch": 58.92045454545455, + "grad_norm": 1.4327595233917236, + "learning_rate": 0.0001, + "loss": 0.0575, + "step": 20740 + }, + { + "epoch": 58.94886363636363, + "grad_norm": 1.3365850448608398, + "learning_rate": 0.0001, + "loss": 0.0582, + "step": 20750 + }, + { + "epoch": 58.97727272727273, + "grad_norm": 1.3097879886627197, + "learning_rate": 0.0001, + "loss": 0.0568, + "step": 20760 + }, + { + "epoch": 59.00568181818182, + "grad_norm": 1.252744436264038, + "learning_rate": 0.0001, + "loss": 0.0598, + "step": 20770 + }, + { + "epoch": 59.03409090909091, + "grad_norm": 0.9207512140274048, + "learning_rate": 0.0001, + "loss": 0.0588, + "step": 20780 + }, + { + "epoch": 59.0625, + "grad_norm": 1.2421956062316895, + "learning_rate": 0.0001, + "loss": 0.0568, + "step": 20790 + }, + { + "epoch": 59.09090909090909, + "grad_norm": 0.8143938183784485, + "learning_rate": 0.0001, + "loss": 0.0578, + "step": 20800 + }, + { + "epoch": 59.11931818181818, + "grad_norm": 0.9497162103652954, + "learning_rate": 0.0001, + "loss": 0.0563, + "step": 20810 + }, + { + "epoch": 59.14772727272727, + "grad_norm": 1.0337462425231934, + "learning_rate": 0.0001, + "loss": 0.0572, + "step": 20820 + }, + { + "epoch": 59.17613636363637, + "grad_norm": 0.9024602770805359, + "learning_rate": 0.0001, + "loss": 0.0586, + "step": 20830 + }, + { + "epoch": 59.20454545454545, + "grad_norm": 0.9900998473167419, + "learning_rate": 0.0001, + "loss": 0.058, + "step": 20840 + }, + { + "epoch": 59.23295454545455, + "grad_norm": 0.9825519919395447, + "learning_rate": 0.0001, + "loss": 0.0619, + "step": 20850 + }, + { + "epoch": 59.26136363636363, + "grad_norm": 0.9931562542915344, + "learning_rate": 0.0001, + "loss": 0.0606, + "step": 20860 + }, + { + "epoch": 59.28977272727273, + "grad_norm": 0.8159868121147156, + "learning_rate": 0.0001, + "loss": 0.0588, + "step": 20870 + }, + { + "epoch": 59.31818181818182, + "grad_norm": 1.0349278450012207, + "learning_rate": 0.0001, + "loss": 0.0622, + "step": 20880 + }, + { + "epoch": 59.34659090909091, + "grad_norm": 0.8769015073776245, + "learning_rate": 0.0001, + "loss": 0.0603, + "step": 20890 + }, + { + "epoch": 59.375, + "grad_norm": 0.94813472032547, + "learning_rate": 0.0001, + "loss": 0.0591, + "step": 20900 + }, + { + "epoch": 59.40340909090909, + "grad_norm": 1.1130449771881104, + "learning_rate": 0.0001, + "loss": 0.0608, + "step": 20910 + }, + { + "epoch": 59.43181818181818, + "grad_norm": 0.9908705353736877, + "learning_rate": 0.0001, + "loss": 0.0595, + "step": 20920 + }, + { + "epoch": 59.46022727272727, + "grad_norm": 1.1722239255905151, + "learning_rate": 0.0001, + "loss": 0.0589, + "step": 20930 + }, + { + "epoch": 59.48863636363637, + "grad_norm": 1.0646426677703857, + "learning_rate": 0.0001, + "loss": 0.0617, + "step": 20940 + }, + { + "epoch": 59.51704545454545, + "grad_norm": 0.9773575067520142, + "learning_rate": 0.0001, + "loss": 0.0584, + "step": 20950 + }, + { + "epoch": 59.54545454545455, + "grad_norm": 0.9211640954017639, + "learning_rate": 0.0001, + "loss": 0.0588, + "step": 20960 + }, + { + "epoch": 59.57386363636363, + "grad_norm": 0.7611501216888428, + "learning_rate": 0.0001, + "loss": 0.0589, + "step": 20970 + }, + { + "epoch": 59.60227272727273, + "grad_norm": 0.877237856388092, + "learning_rate": 0.0001, + "loss": 0.0615, + "step": 20980 + }, + { + "epoch": 59.63068181818182, + "grad_norm": 0.921630322933197, + "learning_rate": 0.0001, + "loss": 0.0604, + "step": 20990 + }, + { + "epoch": 59.65909090909091, + "grad_norm": 0.9630839824676514, + "learning_rate": 0.0001, + "loss": 0.0589, + "step": 21000 + }, + { + "epoch": 59.6875, + "grad_norm": 0.9061483144760132, + "learning_rate": 0.0001, + "loss": 0.0584, + "step": 21010 + }, + { + "epoch": 59.71590909090909, + "grad_norm": 0.847222626209259, + "learning_rate": 0.0001, + "loss": 0.0651, + "step": 21020 + }, + { + "epoch": 59.74431818181818, + "grad_norm": 0.8466194868087769, + "learning_rate": 0.0001, + "loss": 0.0589, + "step": 21030 + }, + { + "epoch": 59.77272727272727, + "grad_norm": 1.054270625114441, + "learning_rate": 0.0001, + "loss": 0.0598, + "step": 21040 + }, + { + "epoch": 59.80113636363637, + "grad_norm": 1.2087162733078003, + "learning_rate": 0.0001, + "loss": 0.0594, + "step": 21050 + }, + { + "epoch": 59.82954545454545, + "grad_norm": 1.2614654302597046, + "learning_rate": 0.0001, + "loss": 0.06, + "step": 21060 + }, + { + "epoch": 59.85795454545455, + "grad_norm": 1.2037792205810547, + "learning_rate": 0.0001, + "loss": 0.0611, + "step": 21070 + }, + { + "epoch": 59.88636363636363, + "grad_norm": 1.1717537641525269, + "learning_rate": 0.0001, + "loss": 0.0596, + "step": 21080 + }, + { + "epoch": 59.91477272727273, + "grad_norm": 1.085711121559143, + "learning_rate": 0.0001, + "loss": 0.0579, + "step": 21090 + }, + { + "epoch": 59.94318181818182, + "grad_norm": 0.8551573753356934, + "learning_rate": 0.0001, + "loss": 0.0599, + "step": 21100 + }, + { + "epoch": 59.97159090909091, + "grad_norm": 0.7682262063026428, + "learning_rate": 0.0001, + "loss": 0.0595, + "step": 21110 + }, + { + "epoch": 60.0, + "grad_norm": 0.6982107162475586, + "learning_rate": 0.0001, + "loss": 0.0619, + "step": 21120 + }, + { + "epoch": 60.02840909090909, + "grad_norm": 0.9472159147262573, + "learning_rate": 0.0001, + "loss": 0.0614, + "step": 21130 + }, + { + "epoch": 60.05681818181818, + "grad_norm": 0.8628683686256409, + "learning_rate": 0.0001, + "loss": 0.0609, + "step": 21140 + }, + { + "epoch": 60.08522727272727, + "grad_norm": 0.6452422142028809, + "learning_rate": 0.0001, + "loss": 0.0622, + "step": 21150 + }, + { + "epoch": 60.11363636363637, + "grad_norm": 0.6607347726821899, + "learning_rate": 0.0001, + "loss": 0.0611, + "step": 21160 + }, + { + "epoch": 60.14204545454545, + "grad_norm": 0.6297292709350586, + "learning_rate": 0.0001, + "loss": 0.06, + "step": 21170 + }, + { + "epoch": 60.17045454545455, + "grad_norm": 0.705437958240509, + "learning_rate": 0.0001, + "loss": 0.0598, + "step": 21180 + }, + { + "epoch": 60.19886363636363, + "grad_norm": 0.64570552110672, + "learning_rate": 0.0001, + "loss": 0.0579, + "step": 21190 + }, + { + "epoch": 60.22727272727273, + "grad_norm": 0.8154585361480713, + "learning_rate": 0.0001, + "loss": 0.0557, + "step": 21200 + }, + { + "epoch": 60.25568181818182, + "grad_norm": 0.8044834136962891, + "learning_rate": 0.0001, + "loss": 0.0575, + "step": 21210 + }, + { + "epoch": 60.28409090909091, + "grad_norm": 0.984665036201477, + "learning_rate": 0.0001, + "loss": 0.0582, + "step": 21220 + }, + { + "epoch": 60.3125, + "grad_norm": 0.8446553349494934, + "learning_rate": 0.0001, + "loss": 0.0591, + "step": 21230 + }, + { + "epoch": 60.34090909090909, + "grad_norm": 0.9261309504508972, + "learning_rate": 0.0001, + "loss": 0.0607, + "step": 21240 + }, + { + "epoch": 60.36931818181818, + "grad_norm": 1.1594133377075195, + "learning_rate": 0.0001, + "loss": 0.0562, + "step": 21250 + }, + { + "epoch": 60.39772727272727, + "grad_norm": 1.2541656494140625, + "learning_rate": 0.0001, + "loss": 0.0568, + "step": 21260 + }, + { + "epoch": 60.42613636363637, + "grad_norm": 0.9860923290252686, + "learning_rate": 0.0001, + "loss": 0.0604, + "step": 21270 + }, + { + "epoch": 60.45454545454545, + "grad_norm": 1.032243251800537, + "learning_rate": 0.0001, + "loss": 0.0572, + "step": 21280 + }, + { + "epoch": 60.48295454545455, + "grad_norm": 1.0910956859588623, + "learning_rate": 0.0001, + "loss": 0.0575, + "step": 21290 + }, + { + "epoch": 60.51136363636363, + "grad_norm": 1.106114387512207, + "learning_rate": 0.0001, + "loss": 0.0581, + "step": 21300 + }, + { + "epoch": 60.53977272727273, + "grad_norm": 1.733173131942749, + "learning_rate": 0.0001, + "loss": 0.0624, + "step": 21310 + }, + { + "epoch": 60.56818181818182, + "grad_norm": 1.755391001701355, + "learning_rate": 0.0001, + "loss": 0.0586, + "step": 21320 + }, + { + "epoch": 60.59659090909091, + "grad_norm": 1.6064823865890503, + "learning_rate": 0.0001, + "loss": 0.0564, + "step": 21330 + }, + { + "epoch": 60.625, + "grad_norm": 1.0228577852249146, + "learning_rate": 0.0001, + "loss": 0.0563, + "step": 21340 + }, + { + "epoch": 60.65340909090909, + "grad_norm": 1.1767072677612305, + "learning_rate": 0.0001, + "loss": 0.0566, + "step": 21350 + }, + { + "epoch": 60.68181818181818, + "grad_norm": 0.9804391264915466, + "learning_rate": 0.0001, + "loss": 0.0564, + "step": 21360 + }, + { + "epoch": 60.71022727272727, + "grad_norm": 1.459820032119751, + "learning_rate": 0.0001, + "loss": 0.0549, + "step": 21370 + }, + { + "epoch": 60.73863636363637, + "grad_norm": 1.2355256080627441, + "learning_rate": 0.0001, + "loss": 0.0562, + "step": 21380 + }, + { + "epoch": 60.76704545454545, + "grad_norm": 0.9702253341674805, + "learning_rate": 0.0001, + "loss": 0.0582, + "step": 21390 + }, + { + "epoch": 60.79545454545455, + "grad_norm": 0.8217170834541321, + "learning_rate": 0.0001, + "loss": 0.0577, + "step": 21400 + }, + { + "epoch": 60.82386363636363, + "grad_norm": 1.1219531297683716, + "learning_rate": 0.0001, + "loss": 0.0571, + "step": 21410 + }, + { + "epoch": 60.85227272727273, + "grad_norm": 1.0550838708877563, + "learning_rate": 0.0001, + "loss": 0.0573, + "step": 21420 + }, + { + "epoch": 60.88068181818182, + "grad_norm": 1.1298226118087769, + "learning_rate": 0.0001, + "loss": 0.0567, + "step": 21430 + }, + { + "epoch": 60.90909090909091, + "grad_norm": 1.1814019680023193, + "learning_rate": 0.0001, + "loss": 0.0602, + "step": 21440 + }, + { + "epoch": 60.9375, + "grad_norm": 1.0315929651260376, + "learning_rate": 0.0001, + "loss": 0.0605, + "step": 21450 + }, + { + "epoch": 60.96590909090909, + "grad_norm": 1.0429394245147705, + "learning_rate": 0.0001, + "loss": 0.0576, + "step": 21460 + }, + { + "epoch": 60.99431818181818, + "grad_norm": 1.109660029411316, + "learning_rate": 0.0001, + "loss": 0.0579, + "step": 21470 + }, + { + "epoch": 61.02272727272727, + "grad_norm": 0.8597354292869568, + "learning_rate": 0.0001, + "loss": 0.0553, + "step": 21480 + }, + { + "epoch": 61.05113636363637, + "grad_norm": 0.7767676711082458, + "learning_rate": 0.0001, + "loss": 0.0551, + "step": 21490 + }, + { + "epoch": 61.07954545454545, + "grad_norm": 0.9464530944824219, + "learning_rate": 0.0001, + "loss": 0.0545, + "step": 21500 + }, + { + "epoch": 61.10795454545455, + "grad_norm": 0.8091188073158264, + "learning_rate": 0.0001, + "loss": 0.0596, + "step": 21510 + }, + { + "epoch": 61.13636363636363, + "grad_norm": 0.9647312760353088, + "learning_rate": 0.0001, + "loss": 0.0556, + "step": 21520 + }, + { + "epoch": 61.16477272727273, + "grad_norm": 1.1387494802474976, + "learning_rate": 0.0001, + "loss": 0.0577, + "step": 21530 + }, + { + "epoch": 61.19318181818182, + "grad_norm": 0.8227630853652954, + "learning_rate": 0.0001, + "loss": 0.0566, + "step": 21540 + }, + { + "epoch": 61.22159090909091, + "grad_norm": 1.2130613327026367, + "learning_rate": 0.0001, + "loss": 0.0565, + "step": 21550 + }, + { + "epoch": 61.25, + "grad_norm": 1.1565511226654053, + "learning_rate": 0.0001, + "loss": 0.0583, + "step": 21560 + }, + { + "epoch": 61.27840909090909, + "grad_norm": 1.049648404121399, + "learning_rate": 0.0001, + "loss": 0.0556, + "step": 21570 + }, + { + "epoch": 61.30681818181818, + "grad_norm": 1.279056429862976, + "learning_rate": 0.0001, + "loss": 0.0566, + "step": 21580 + }, + { + "epoch": 61.33522727272727, + "grad_norm": 0.8837600350379944, + "learning_rate": 0.0001, + "loss": 0.0579, + "step": 21590 + }, + { + "epoch": 61.36363636363637, + "grad_norm": 0.9417069554328918, + "learning_rate": 0.0001, + "loss": 0.0574, + "step": 21600 + }, + { + "epoch": 61.39204545454545, + "grad_norm": 0.7844614386558533, + "learning_rate": 0.0001, + "loss": 0.0554, + "step": 21610 + }, + { + "epoch": 61.42045454545455, + "grad_norm": 0.9132207632064819, + "learning_rate": 0.0001, + "loss": 0.0603, + "step": 21620 + }, + { + "epoch": 61.44886363636363, + "grad_norm": 0.8967658877372742, + "learning_rate": 0.0001, + "loss": 0.0568, + "step": 21630 + }, + { + "epoch": 61.47727272727273, + "grad_norm": 0.8098888397216797, + "learning_rate": 0.0001, + "loss": 0.0599, + "step": 21640 + }, + { + "epoch": 61.50568181818182, + "grad_norm": 0.856517493724823, + "learning_rate": 0.0001, + "loss": 0.0556, + "step": 21650 + }, + { + "epoch": 61.53409090909091, + "grad_norm": 0.7805210947990417, + "learning_rate": 0.0001, + "loss": 0.0603, + "step": 21660 + }, + { + "epoch": 61.5625, + "grad_norm": 0.8382397294044495, + "learning_rate": 0.0001, + "loss": 0.0578, + "step": 21670 + }, + { + "epoch": 61.59090909090909, + "grad_norm": 0.9148212671279907, + "learning_rate": 0.0001, + "loss": 0.0588, + "step": 21680 + }, + { + "epoch": 61.61931818181818, + "grad_norm": 0.774455726146698, + "learning_rate": 0.0001, + "loss": 0.0573, + "step": 21690 + }, + { + "epoch": 61.64772727272727, + "grad_norm": 0.6750848889350891, + "learning_rate": 0.0001, + "loss": 0.0559, + "step": 21700 + }, + { + "epoch": 61.67613636363637, + "grad_norm": 0.7105973362922668, + "learning_rate": 0.0001, + "loss": 0.0583, + "step": 21710 + }, + { + "epoch": 61.70454545454545, + "grad_norm": 1.188941240310669, + "learning_rate": 0.0001, + "loss": 0.0608, + "step": 21720 + }, + { + "epoch": 61.73295454545455, + "grad_norm": 1.0456372499465942, + "learning_rate": 0.0001, + "loss": 0.0599, + "step": 21730 + }, + { + "epoch": 61.76136363636363, + "grad_norm": 0.9662376642227173, + "learning_rate": 0.0001, + "loss": 0.0588, + "step": 21740 + }, + { + "epoch": 61.78977272727273, + "grad_norm": 1.0228948593139648, + "learning_rate": 0.0001, + "loss": 0.0577, + "step": 21750 + }, + { + "epoch": 61.81818181818182, + "grad_norm": 1.133011817932129, + "learning_rate": 0.0001, + "loss": 0.0564, + "step": 21760 + }, + { + "epoch": 61.84659090909091, + "grad_norm": 1.138669729232788, + "learning_rate": 0.0001, + "loss": 0.0562, + "step": 21770 + }, + { + "epoch": 61.875, + "grad_norm": 0.9990003705024719, + "learning_rate": 0.0001, + "loss": 0.056, + "step": 21780 + }, + { + "epoch": 61.90340909090909, + "grad_norm": 0.8538486957550049, + "learning_rate": 0.0001, + "loss": 0.057, + "step": 21790 + }, + { + "epoch": 61.93181818181818, + "grad_norm": 0.9877942204475403, + "learning_rate": 0.0001, + "loss": 0.0582, + "step": 21800 + }, + { + "epoch": 61.96022727272727, + "grad_norm": 0.7974020838737488, + "learning_rate": 0.0001, + "loss": 0.0566, + "step": 21810 + }, + { + "epoch": 61.98863636363637, + "grad_norm": 0.9531463980674744, + "learning_rate": 0.0001, + "loss": 0.0569, + "step": 21820 + }, + { + "epoch": 62.01704545454545, + "grad_norm": 0.8213363289833069, + "learning_rate": 0.0001, + "loss": 0.0562, + "step": 21830 + }, + { + "epoch": 62.04545454545455, + "grad_norm": 0.7434073686599731, + "learning_rate": 0.0001, + "loss": 0.0608, + "step": 21840 + }, + { + "epoch": 62.07386363636363, + "grad_norm": 1.0179238319396973, + "learning_rate": 0.0001, + "loss": 0.057, + "step": 21850 + }, + { + "epoch": 62.10227272727273, + "grad_norm": 0.8162310719490051, + "learning_rate": 0.0001, + "loss": 0.056, + "step": 21860 + }, + { + "epoch": 62.13068181818182, + "grad_norm": 0.749879777431488, + "learning_rate": 0.0001, + "loss": 0.0587, + "step": 21870 + }, + { + "epoch": 62.15909090909091, + "grad_norm": 0.7732171416282654, + "learning_rate": 0.0001, + "loss": 0.0536, + "step": 21880 + }, + { + "epoch": 62.1875, + "grad_norm": 0.7089868783950806, + "learning_rate": 0.0001, + "loss": 0.0551, + "step": 21890 + }, + { + "epoch": 62.21590909090909, + "grad_norm": 0.7101325988769531, + "learning_rate": 0.0001, + "loss": 0.0546, + "step": 21900 + }, + { + "epoch": 62.24431818181818, + "grad_norm": 0.7819742560386658, + "learning_rate": 0.0001, + "loss": 0.0575, + "step": 21910 + }, + { + "epoch": 62.27272727272727, + "grad_norm": 0.6902415752410889, + "learning_rate": 0.0001, + "loss": 0.0551, + "step": 21920 + }, + { + "epoch": 62.30113636363637, + "grad_norm": 0.8869762420654297, + "learning_rate": 0.0001, + "loss": 0.0582, + "step": 21930 + }, + { + "epoch": 62.32954545454545, + "grad_norm": 1.154178500175476, + "learning_rate": 0.0001, + "loss": 0.0571, + "step": 21940 + }, + { + "epoch": 62.35795454545455, + "grad_norm": 0.8612966537475586, + "learning_rate": 0.0001, + "loss": 0.0562, + "step": 21950 + }, + { + "epoch": 62.38636363636363, + "grad_norm": 0.8771665692329407, + "learning_rate": 0.0001, + "loss": 0.0573, + "step": 21960 + }, + { + "epoch": 62.41477272727273, + "grad_norm": 0.9387429356575012, + "learning_rate": 0.0001, + "loss": 0.0555, + "step": 21970 + }, + { + "epoch": 62.44318181818182, + "grad_norm": 0.7586554884910583, + "learning_rate": 0.0001, + "loss": 0.0597, + "step": 21980 + }, + { + "epoch": 62.47159090909091, + "grad_norm": 0.809971809387207, + "learning_rate": 0.0001, + "loss": 0.0588, + "step": 21990 + }, + { + "epoch": 62.5, + "grad_norm": 0.9225670099258423, + "learning_rate": 0.0001, + "loss": 0.0577, + "step": 22000 + }, + { + "epoch": 62.52840909090909, + "grad_norm": 0.9746827483177185, + "learning_rate": 0.0001, + "loss": 0.0559, + "step": 22010 + }, + { + "epoch": 62.55681818181818, + "grad_norm": 0.8748590350151062, + "learning_rate": 0.0001, + "loss": 0.0565, + "step": 22020 + }, + { + "epoch": 62.58522727272727, + "grad_norm": 1.0417462587356567, + "learning_rate": 0.0001, + "loss": 0.0554, + "step": 22030 + }, + { + "epoch": 62.61363636363637, + "grad_norm": 0.9173468351364136, + "learning_rate": 0.0001, + "loss": 0.0555, + "step": 22040 + }, + { + "epoch": 62.64204545454545, + "grad_norm": 0.9043504595756531, + "learning_rate": 0.0001, + "loss": 0.0533, + "step": 22050 + }, + { + "epoch": 62.67045454545455, + "grad_norm": 0.6783923506736755, + "learning_rate": 0.0001, + "loss": 0.0563, + "step": 22060 + }, + { + "epoch": 62.69886363636363, + "grad_norm": 0.6317957043647766, + "learning_rate": 0.0001, + "loss": 0.0539, + "step": 22070 + }, + { + "epoch": 62.72727272727273, + "grad_norm": 0.7598891258239746, + "learning_rate": 0.0001, + "loss": 0.056, + "step": 22080 + }, + { + "epoch": 62.75568181818182, + "grad_norm": 0.880479633808136, + "learning_rate": 0.0001, + "loss": 0.0572, + "step": 22090 + }, + { + "epoch": 62.78409090909091, + "grad_norm": 0.8354278802871704, + "learning_rate": 0.0001, + "loss": 0.0565, + "step": 22100 + }, + { + "epoch": 62.8125, + "grad_norm": 0.9944140911102295, + "learning_rate": 0.0001, + "loss": 0.0524, + "step": 22110 + }, + { + "epoch": 62.84090909090909, + "grad_norm": 0.9438153505325317, + "learning_rate": 0.0001, + "loss": 0.053, + "step": 22120 + }, + { + "epoch": 62.86931818181818, + "grad_norm": 1.1514655351638794, + "learning_rate": 0.0001, + "loss": 0.0561, + "step": 22130 + }, + { + "epoch": 62.89772727272727, + "grad_norm": 1.0064482688903809, + "learning_rate": 0.0001, + "loss": 0.053, + "step": 22140 + }, + { + "epoch": 62.92613636363637, + "grad_norm": 0.8011857271194458, + "learning_rate": 0.0001, + "loss": 0.0557, + "step": 22150 + }, + { + "epoch": 62.95454545454545, + "grad_norm": 0.7131310105323792, + "learning_rate": 0.0001, + "loss": 0.057, + "step": 22160 + }, + { + "epoch": 62.98295454545455, + "grad_norm": 0.7730833292007446, + "learning_rate": 0.0001, + "loss": 0.0554, + "step": 22170 + }, + { + "epoch": 63.01136363636363, + "grad_norm": 0.8114839792251587, + "learning_rate": 0.0001, + "loss": 0.0554, + "step": 22180 + }, + { + "epoch": 63.03977272727273, + "grad_norm": 0.714423656463623, + "learning_rate": 0.0001, + "loss": 0.0564, + "step": 22190 + }, + { + "epoch": 63.06818181818182, + "grad_norm": 1.106858730316162, + "learning_rate": 0.0001, + "loss": 0.0556, + "step": 22200 + }, + { + "epoch": 63.09659090909091, + "grad_norm": 1.0788785219192505, + "learning_rate": 0.0001, + "loss": 0.0575, + "step": 22210 + }, + { + "epoch": 63.125, + "grad_norm": 1.0309641361236572, + "learning_rate": 0.0001, + "loss": 0.0566, + "step": 22220 + }, + { + "epoch": 63.15340909090909, + "grad_norm": 0.9370083808898926, + "learning_rate": 0.0001, + "loss": 0.0545, + "step": 22230 + }, + { + "epoch": 63.18181818181818, + "grad_norm": 1.0266667604446411, + "learning_rate": 0.0001, + "loss": 0.0558, + "step": 22240 + }, + { + "epoch": 63.21022727272727, + "grad_norm": 1.0693178176879883, + "learning_rate": 0.0001, + "loss": 0.0543, + "step": 22250 + }, + { + "epoch": 63.23863636363637, + "grad_norm": 1.409576416015625, + "learning_rate": 0.0001, + "loss": 0.055, + "step": 22260 + }, + { + "epoch": 63.26704545454545, + "grad_norm": 1.0419577360153198, + "learning_rate": 0.0001, + "loss": 0.0562, + "step": 22270 + }, + { + "epoch": 63.29545454545455, + "grad_norm": 0.9042021632194519, + "learning_rate": 0.0001, + "loss": 0.0537, + "step": 22280 + }, + { + "epoch": 63.32386363636363, + "grad_norm": 0.9674776196479797, + "learning_rate": 0.0001, + "loss": 0.0551, + "step": 22290 + }, + { + "epoch": 63.35227272727273, + "grad_norm": 1.08954918384552, + "learning_rate": 0.0001, + "loss": 0.0549, + "step": 22300 + }, + { + "epoch": 63.38068181818182, + "grad_norm": 1.0163633823394775, + "learning_rate": 0.0001, + "loss": 0.0553, + "step": 22310 + }, + { + "epoch": 63.40909090909091, + "grad_norm": 1.0497983694076538, + "learning_rate": 0.0001, + "loss": 0.0553, + "step": 22320 + }, + { + "epoch": 63.4375, + "grad_norm": 0.7507213354110718, + "learning_rate": 0.0001, + "loss": 0.0543, + "step": 22330 + }, + { + "epoch": 63.46590909090909, + "grad_norm": 0.9704498648643494, + "learning_rate": 0.0001, + "loss": 0.0542, + "step": 22340 + }, + { + "epoch": 63.49431818181818, + "grad_norm": 0.9345890283584595, + "learning_rate": 0.0001, + "loss": 0.0562, + "step": 22350 + }, + { + "epoch": 63.52272727272727, + "grad_norm": 0.8323131799697876, + "learning_rate": 0.0001, + "loss": 0.0554, + "step": 22360 + }, + { + "epoch": 63.55113636363637, + "grad_norm": 0.8425998687744141, + "learning_rate": 0.0001, + "loss": 0.0551, + "step": 22370 + }, + { + "epoch": 63.57954545454545, + "grad_norm": 0.7790317535400391, + "learning_rate": 0.0001, + "loss": 0.055, + "step": 22380 + }, + { + "epoch": 63.60795454545455, + "grad_norm": 0.679909348487854, + "learning_rate": 0.0001, + "loss": 0.0564, + "step": 22390 + }, + { + "epoch": 63.63636363636363, + "grad_norm": 0.6957236528396606, + "learning_rate": 0.0001, + "loss": 0.0521, + "step": 22400 + }, + { + "epoch": 63.66477272727273, + "grad_norm": 0.7631174325942993, + "learning_rate": 0.0001, + "loss": 0.0539, + "step": 22410 + }, + { + "epoch": 63.69318181818182, + "grad_norm": 0.8418110609054565, + "learning_rate": 0.0001, + "loss": 0.0587, + "step": 22420 + }, + { + "epoch": 63.72159090909091, + "grad_norm": 0.9006642699241638, + "learning_rate": 0.0001, + "loss": 0.0548, + "step": 22430 + }, + { + "epoch": 63.75, + "grad_norm": 1.0244066715240479, + "learning_rate": 0.0001, + "loss": 0.0523, + "step": 22440 + }, + { + "epoch": 63.77840909090909, + "grad_norm": 0.8364577293395996, + "learning_rate": 0.0001, + "loss": 0.0561, + "step": 22450 + }, + { + "epoch": 63.80681818181818, + "grad_norm": 1.0565218925476074, + "learning_rate": 0.0001, + "loss": 0.0575, + "step": 22460 + }, + { + "epoch": 63.83522727272727, + "grad_norm": 0.994922935962677, + "learning_rate": 0.0001, + "loss": 0.0564, + "step": 22470 + }, + { + "epoch": 63.86363636363637, + "grad_norm": 1.0308321714401245, + "learning_rate": 0.0001, + "loss": 0.054, + "step": 22480 + }, + { + "epoch": 63.89204545454545, + "grad_norm": 0.9732064604759216, + "learning_rate": 0.0001, + "loss": 0.0537, + "step": 22490 + }, + { + "epoch": 63.92045454545455, + "grad_norm": 0.8068335056304932, + "learning_rate": 0.0001, + "loss": 0.0563, + "step": 22500 + }, + { + "epoch": 63.94886363636363, + "grad_norm": 0.7390735745429993, + "learning_rate": 0.0001, + "loss": 0.0569, + "step": 22510 + }, + { + "epoch": 63.97727272727273, + "grad_norm": 0.7898790240287781, + "learning_rate": 0.0001, + "loss": 0.0552, + "step": 22520 + }, + { + "epoch": 64.00568181818181, + "grad_norm": 0.8491553068161011, + "learning_rate": 0.0001, + "loss": 0.0554, + "step": 22530 + }, + { + "epoch": 64.0340909090909, + "grad_norm": 0.7833629846572876, + "learning_rate": 0.0001, + "loss": 0.0538, + "step": 22540 + }, + { + "epoch": 64.0625, + "grad_norm": 0.8554551005363464, + "learning_rate": 0.0001, + "loss": 0.055, + "step": 22550 + }, + { + "epoch": 64.0909090909091, + "grad_norm": 0.8123806715011597, + "learning_rate": 0.0001, + "loss": 0.0545, + "step": 22560 + }, + { + "epoch": 64.11931818181819, + "grad_norm": 0.7412775754928589, + "learning_rate": 0.0001, + "loss": 0.0535, + "step": 22570 + }, + { + "epoch": 64.14772727272727, + "grad_norm": 0.6799927949905396, + "learning_rate": 0.0001, + "loss": 0.054, + "step": 22580 + }, + { + "epoch": 64.17613636363636, + "grad_norm": 0.8203033804893494, + "learning_rate": 0.0001, + "loss": 0.0539, + "step": 22590 + }, + { + "epoch": 64.20454545454545, + "grad_norm": 0.7033742666244507, + "learning_rate": 0.0001, + "loss": 0.0522, + "step": 22600 + }, + { + "epoch": 64.23295454545455, + "grad_norm": 0.7215442061424255, + "learning_rate": 0.0001, + "loss": 0.0542, + "step": 22610 + }, + { + "epoch": 64.26136363636364, + "grad_norm": 0.5706157684326172, + "learning_rate": 0.0001, + "loss": 0.0542, + "step": 22620 + }, + { + "epoch": 64.28977272727273, + "grad_norm": 0.6812960505485535, + "learning_rate": 0.0001, + "loss": 0.0551, + "step": 22630 + }, + { + "epoch": 64.31818181818181, + "grad_norm": 0.6466752886772156, + "learning_rate": 0.0001, + "loss": 0.0539, + "step": 22640 + }, + { + "epoch": 64.3465909090909, + "grad_norm": 0.7440110445022583, + "learning_rate": 0.0001, + "loss": 0.0548, + "step": 22650 + }, + { + "epoch": 64.375, + "grad_norm": 0.7839359641075134, + "learning_rate": 0.0001, + "loss": 0.0562, + "step": 22660 + }, + { + "epoch": 64.4034090909091, + "grad_norm": 0.8440365195274353, + "learning_rate": 0.0001, + "loss": 0.0545, + "step": 22670 + }, + { + "epoch": 64.43181818181819, + "grad_norm": 0.774544358253479, + "learning_rate": 0.0001, + "loss": 0.0573, + "step": 22680 + }, + { + "epoch": 64.46022727272727, + "grad_norm": 0.601563036441803, + "learning_rate": 0.0001, + "loss": 0.0558, + "step": 22690 + }, + { + "epoch": 64.48863636363636, + "grad_norm": 0.7574673891067505, + "learning_rate": 0.0001, + "loss": 0.0535, + "step": 22700 + }, + { + "epoch": 64.51704545454545, + "grad_norm": 0.836617648601532, + "learning_rate": 0.0001, + "loss": 0.0549, + "step": 22710 + }, + { + "epoch": 64.54545454545455, + "grad_norm": 1.0453118085861206, + "learning_rate": 0.0001, + "loss": 0.0548, + "step": 22720 + }, + { + "epoch": 64.57386363636364, + "grad_norm": 1.0585159063339233, + "learning_rate": 0.0001, + "loss": 0.056, + "step": 22730 + }, + { + "epoch": 64.60227272727273, + "grad_norm": 1.0894050598144531, + "learning_rate": 0.0001, + "loss": 0.055, + "step": 22740 + }, + { + "epoch": 64.63068181818181, + "grad_norm": 0.9796726107597351, + "learning_rate": 0.0001, + "loss": 0.0526, + "step": 22750 + }, + { + "epoch": 64.6590909090909, + "grad_norm": 0.9019004702568054, + "learning_rate": 0.0001, + "loss": 0.0568, + "step": 22760 + }, + { + "epoch": 64.6875, + "grad_norm": 0.7627422213554382, + "learning_rate": 0.0001, + "loss": 0.0508, + "step": 22770 + }, + { + "epoch": 64.7159090909091, + "grad_norm": 0.7884737849235535, + "learning_rate": 0.0001, + "loss": 0.0553, + "step": 22780 + }, + { + "epoch": 64.74431818181819, + "grad_norm": 0.7071219682693481, + "learning_rate": 0.0001, + "loss": 0.0547, + "step": 22790 + }, + { + "epoch": 64.77272727272727, + "grad_norm": 0.7242072820663452, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 22800 + }, + { + "epoch": 64.80113636363636, + "grad_norm": 0.9985579252243042, + "learning_rate": 0.0001, + "loss": 0.0542, + "step": 22810 + }, + { + "epoch": 64.82954545454545, + "grad_norm": 0.7592743635177612, + "learning_rate": 0.0001, + "loss": 0.0557, + "step": 22820 + }, + { + "epoch": 64.85795454545455, + "grad_norm": 0.6539085507392883, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 22830 + }, + { + "epoch": 64.88636363636364, + "grad_norm": 0.699675977230072, + "learning_rate": 0.0001, + "loss": 0.056, + "step": 22840 + }, + { + "epoch": 64.91477272727273, + "grad_norm": 0.830615222454071, + "learning_rate": 0.0001, + "loss": 0.053, + "step": 22850 + }, + { + "epoch": 64.94318181818181, + "grad_norm": 0.760208785533905, + "learning_rate": 0.0001, + "loss": 0.0549, + "step": 22860 + }, + { + "epoch": 64.9715909090909, + "grad_norm": 0.7713984847068787, + "learning_rate": 0.0001, + "loss": 0.0542, + "step": 22870 + }, + { + "epoch": 65.0, + "grad_norm": 0.7789033055305481, + "learning_rate": 0.0001, + "loss": 0.0532, + "step": 22880 + }, + { + "epoch": 65.0284090909091, + "grad_norm": 0.780392050743103, + "learning_rate": 0.0001, + "loss": 0.0549, + "step": 22890 + }, + { + "epoch": 65.05681818181819, + "grad_norm": 0.7739676237106323, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 22900 + }, + { + "epoch": 65.08522727272727, + "grad_norm": 0.7210514545440674, + "learning_rate": 0.0001, + "loss": 0.0533, + "step": 22910 + }, + { + "epoch": 65.11363636363636, + "grad_norm": 1.0596988201141357, + "learning_rate": 0.0001, + "loss": 0.0536, + "step": 22920 + }, + { + "epoch": 65.14204545454545, + "grad_norm": 1.2048275470733643, + "learning_rate": 0.0001, + "loss": 0.0524, + "step": 22930 + }, + { + "epoch": 65.17045454545455, + "grad_norm": 0.8827832937240601, + "learning_rate": 0.0001, + "loss": 0.0537, + "step": 22940 + }, + { + "epoch": 65.19886363636364, + "grad_norm": 0.889045238494873, + "learning_rate": 0.0001, + "loss": 0.052, + "step": 22950 + }, + { + "epoch": 65.22727272727273, + "grad_norm": 1.0528745651245117, + "learning_rate": 0.0001, + "loss": 0.0545, + "step": 22960 + }, + { + "epoch": 65.25568181818181, + "grad_norm": 1.0414397716522217, + "learning_rate": 0.0001, + "loss": 0.0526, + "step": 22970 + }, + { + "epoch": 65.2840909090909, + "grad_norm": 1.096603512763977, + "learning_rate": 0.0001, + "loss": 0.0533, + "step": 22980 + }, + { + "epoch": 65.3125, + "grad_norm": 0.8513028025627136, + "learning_rate": 0.0001, + "loss": 0.0521, + "step": 22990 + }, + { + "epoch": 65.3409090909091, + "grad_norm": 1.2057493925094604, + "learning_rate": 0.0001, + "loss": 0.053, + "step": 23000 + }, + { + "epoch": 65.36931818181819, + "grad_norm": 1.0297787189483643, + "learning_rate": 0.0001, + "loss": 0.0523, + "step": 23010 + }, + { + "epoch": 65.39772727272727, + "grad_norm": 0.8249045610427856, + "learning_rate": 0.0001, + "loss": 0.0526, + "step": 23020 + }, + { + "epoch": 65.42613636363636, + "grad_norm": 0.949597179889679, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 23030 + }, + { + "epoch": 65.45454545454545, + "grad_norm": 0.7203119397163391, + "learning_rate": 0.0001, + "loss": 0.0537, + "step": 23040 + }, + { + "epoch": 65.48295454545455, + "grad_norm": 0.7339312434196472, + "learning_rate": 0.0001, + "loss": 0.0558, + "step": 23050 + }, + { + "epoch": 65.51136363636364, + "grad_norm": 0.716767430305481, + "learning_rate": 0.0001, + "loss": 0.0536, + "step": 23060 + }, + { + "epoch": 65.53977272727273, + "grad_norm": 0.8138099312782288, + "learning_rate": 0.0001, + "loss": 0.0545, + "step": 23070 + }, + { + "epoch": 65.56818181818181, + "grad_norm": 0.7120342254638672, + "learning_rate": 0.0001, + "loss": 0.053, + "step": 23080 + }, + { + "epoch": 65.5965909090909, + "grad_norm": 0.7150150537490845, + "learning_rate": 0.0001, + "loss": 0.054, + "step": 23090 + }, + { + "epoch": 65.625, + "grad_norm": 0.7718611359596252, + "learning_rate": 0.0001, + "loss": 0.0556, + "step": 23100 + }, + { + "epoch": 65.6534090909091, + "grad_norm": 0.842397153377533, + "learning_rate": 0.0001, + "loss": 0.0518, + "step": 23110 + }, + { + "epoch": 65.68181818181819, + "grad_norm": 0.8018172979354858, + "learning_rate": 0.0001, + "loss": 0.0526, + "step": 23120 + }, + { + "epoch": 65.71022727272727, + "grad_norm": 0.756478488445282, + "learning_rate": 0.0001, + "loss": 0.054, + "step": 23130 + }, + { + "epoch": 65.73863636363636, + "grad_norm": 0.8237600922584534, + "learning_rate": 0.0001, + "loss": 0.0556, + "step": 23140 + }, + { + "epoch": 65.76704545454545, + "grad_norm": 0.6838138103485107, + "learning_rate": 0.0001, + "loss": 0.0539, + "step": 23150 + }, + { + "epoch": 65.79545454545455, + "grad_norm": 0.7186658382415771, + "learning_rate": 0.0001, + "loss": 0.0543, + "step": 23160 + }, + { + "epoch": 65.82386363636364, + "grad_norm": 0.8290245532989502, + "learning_rate": 0.0001, + "loss": 0.055, + "step": 23170 + }, + { + "epoch": 65.85227272727273, + "grad_norm": 0.7229530811309814, + "learning_rate": 0.0001, + "loss": 0.0545, + "step": 23180 + }, + { + "epoch": 65.88068181818181, + "grad_norm": 0.6716543436050415, + "learning_rate": 0.0001, + "loss": 0.055, + "step": 23190 + }, + { + "epoch": 65.9090909090909, + "grad_norm": 0.8731271028518677, + "learning_rate": 0.0001, + "loss": 0.0578, + "step": 23200 + }, + { + "epoch": 65.9375, + "grad_norm": 0.8098838329315186, + "learning_rate": 0.0001, + "loss": 0.0544, + "step": 23210 + }, + { + "epoch": 65.9659090909091, + "grad_norm": 1.0541036128997803, + "learning_rate": 0.0001, + "loss": 0.0548, + "step": 23220 + }, + { + "epoch": 65.99431818181819, + "grad_norm": 0.8643235564231873, + "learning_rate": 0.0001, + "loss": 0.0555, + "step": 23230 + }, + { + "epoch": 66.02272727272727, + "grad_norm": 0.8315423130989075, + "learning_rate": 0.0001, + "loss": 0.0543, + "step": 23240 + }, + { + "epoch": 66.05113636363636, + "grad_norm": 0.6959272623062134, + "learning_rate": 0.0001, + "loss": 0.0541, + "step": 23250 + }, + { + "epoch": 66.07954545454545, + "grad_norm": 0.7065873742103577, + "learning_rate": 0.0001, + "loss": 0.0526, + "step": 23260 + }, + { + "epoch": 66.10795454545455, + "grad_norm": 0.9436522722244263, + "learning_rate": 0.0001, + "loss": 0.0538, + "step": 23270 + }, + { + "epoch": 66.13636363636364, + "grad_norm": 0.6383907794952393, + "learning_rate": 0.0001, + "loss": 0.0526, + "step": 23280 + }, + { + "epoch": 66.16477272727273, + "grad_norm": 0.8061172962188721, + "learning_rate": 0.0001, + "loss": 0.0569, + "step": 23290 + }, + { + "epoch": 66.19318181818181, + "grad_norm": 0.6677034497261047, + "learning_rate": 0.0001, + "loss": 0.0554, + "step": 23300 + }, + { + "epoch": 66.2215909090909, + "grad_norm": 0.8233653903007507, + "learning_rate": 0.0001, + "loss": 0.0542, + "step": 23310 + }, + { + "epoch": 66.25, + "grad_norm": 0.8707202672958374, + "learning_rate": 0.0001, + "loss": 0.0536, + "step": 23320 + }, + { + "epoch": 66.2784090909091, + "grad_norm": 0.7756959199905396, + "learning_rate": 0.0001, + "loss": 0.056, + "step": 23330 + }, + { + "epoch": 66.30681818181819, + "grad_norm": 0.8138574361801147, + "learning_rate": 0.0001, + "loss": 0.055, + "step": 23340 + }, + { + "epoch": 66.33522727272727, + "grad_norm": 0.8435407876968384, + "learning_rate": 0.0001, + "loss": 0.0572, + "step": 23350 + }, + { + "epoch": 66.36363636363636, + "grad_norm": 0.8531373143196106, + "learning_rate": 0.0001, + "loss": 0.0558, + "step": 23360 + }, + { + "epoch": 66.39204545454545, + "grad_norm": 0.9886962175369263, + "learning_rate": 0.0001, + "loss": 0.0562, + "step": 23370 + }, + { + "epoch": 66.42045454545455, + "grad_norm": 0.9955214262008667, + "learning_rate": 0.0001, + "loss": 0.0561, + "step": 23380 + }, + { + "epoch": 66.44886363636364, + "grad_norm": 1.1781306266784668, + "learning_rate": 0.0001, + "loss": 0.0532, + "step": 23390 + }, + { + "epoch": 66.47727272727273, + "grad_norm": 1.1715068817138672, + "learning_rate": 0.0001, + "loss": 0.0557, + "step": 23400 + }, + { + "epoch": 66.50568181818181, + "grad_norm": 1.1786881685256958, + "learning_rate": 0.0001, + "loss": 0.0519, + "step": 23410 + }, + { + "epoch": 66.5340909090909, + "grad_norm": 1.2845433950424194, + "learning_rate": 0.0001, + "loss": 0.056, + "step": 23420 + }, + { + "epoch": 66.5625, + "grad_norm": 1.0063714981079102, + "learning_rate": 0.0001, + "loss": 0.0524, + "step": 23430 + }, + { + "epoch": 66.5909090909091, + "grad_norm": 1.013217806816101, + "learning_rate": 0.0001, + "loss": 0.0532, + "step": 23440 + }, + { + "epoch": 66.61931818181819, + "grad_norm": 1.0957231521606445, + "learning_rate": 0.0001, + "loss": 0.0509, + "step": 23450 + }, + { + "epoch": 66.64772727272727, + "grad_norm": 0.9889658689498901, + "learning_rate": 0.0001, + "loss": 0.0519, + "step": 23460 + }, + { + "epoch": 66.67613636363636, + "grad_norm": 0.9741299748420715, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 23470 + }, + { + "epoch": 66.70454545454545, + "grad_norm": 1.234862208366394, + "learning_rate": 0.0001, + "loss": 0.0529, + "step": 23480 + }, + { + "epoch": 66.73295454545455, + "grad_norm": 0.8468987345695496, + "learning_rate": 0.0001, + "loss": 0.0536, + "step": 23490 + }, + { + "epoch": 66.76136363636364, + "grad_norm": 0.765661358833313, + "learning_rate": 0.0001, + "loss": 0.0508, + "step": 23500 + }, + { + "epoch": 66.78977272727273, + "grad_norm": 1.1269277334213257, + "learning_rate": 0.0001, + "loss": 0.0516, + "step": 23510 + }, + { + "epoch": 66.81818181818181, + "grad_norm": 0.9379168748855591, + "learning_rate": 0.0001, + "loss": 0.0516, + "step": 23520 + }, + { + "epoch": 66.8465909090909, + "grad_norm": 1.3234306573867798, + "learning_rate": 0.0001, + "loss": 0.0518, + "step": 23530 + }, + { + "epoch": 66.875, + "grad_norm": 0.9817354083061218, + "learning_rate": 0.0001, + "loss": 0.0539, + "step": 23540 + }, + { + "epoch": 66.9034090909091, + "grad_norm": 1.1395480632781982, + "learning_rate": 0.0001, + "loss": 0.0524, + "step": 23550 + }, + { + "epoch": 66.93181818181819, + "grad_norm": 0.9638949036598206, + "learning_rate": 0.0001, + "loss": 0.0511, + "step": 23560 + }, + { + "epoch": 66.96022727272727, + "grad_norm": 0.8169605135917664, + "learning_rate": 0.0001, + "loss": 0.0517, + "step": 23570 + }, + { + "epoch": 66.98863636363636, + "grad_norm": 0.861230731010437, + "learning_rate": 0.0001, + "loss": 0.0537, + "step": 23580 + }, + { + "epoch": 67.01704545454545, + "grad_norm": 0.656604528427124, + "learning_rate": 0.0001, + "loss": 0.0541, + "step": 23590 + }, + { + "epoch": 67.04545454545455, + "grad_norm": 0.7038812041282654, + "learning_rate": 0.0001, + "loss": 0.0545, + "step": 23600 + }, + { + "epoch": 67.07386363636364, + "grad_norm": 0.627716064453125, + "learning_rate": 0.0001, + "loss": 0.0517, + "step": 23610 + }, + { + "epoch": 67.10227272727273, + "grad_norm": 0.857126772403717, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 23620 + }, + { + "epoch": 67.13068181818181, + "grad_norm": 0.8336479067802429, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 23630 + }, + { + "epoch": 67.1590909090909, + "grad_norm": 0.6836590766906738, + "learning_rate": 0.0001, + "loss": 0.0527, + "step": 23640 + }, + { + "epoch": 67.1875, + "grad_norm": 0.8017722964286804, + "learning_rate": 0.0001, + "loss": 0.0535, + "step": 23650 + }, + { + "epoch": 67.2159090909091, + "grad_norm": 0.7780610918998718, + "learning_rate": 0.0001, + "loss": 0.0535, + "step": 23660 + }, + { + "epoch": 67.24431818181819, + "grad_norm": 0.7484387755393982, + "learning_rate": 0.0001, + "loss": 0.0526, + "step": 23670 + }, + { + "epoch": 67.27272727272727, + "grad_norm": 0.6655145287513733, + "learning_rate": 0.0001, + "loss": 0.0513, + "step": 23680 + }, + { + "epoch": 67.30113636363636, + "grad_norm": 0.6643372774124146, + "learning_rate": 0.0001, + "loss": 0.0535, + "step": 23690 + }, + { + "epoch": 67.32954545454545, + "grad_norm": 0.7950687408447266, + "learning_rate": 0.0001, + "loss": 0.0517, + "step": 23700 + }, + { + "epoch": 67.35795454545455, + "grad_norm": 0.8306936621665955, + "learning_rate": 0.0001, + "loss": 0.0534, + "step": 23710 + }, + { + "epoch": 67.38636363636364, + "grad_norm": 0.8002460598945618, + "learning_rate": 0.0001, + "loss": 0.0513, + "step": 23720 + }, + { + "epoch": 67.41477272727273, + "grad_norm": 0.7444122433662415, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 23730 + }, + { + "epoch": 67.44318181818181, + "grad_norm": 0.6906100511550903, + "learning_rate": 0.0001, + "loss": 0.0509, + "step": 23740 + }, + { + "epoch": 67.4715909090909, + "grad_norm": 0.7418623566627502, + "learning_rate": 0.0001, + "loss": 0.0546, + "step": 23750 + }, + { + "epoch": 67.5, + "grad_norm": 0.7137703895568848, + "learning_rate": 0.0001, + "loss": 0.0536, + "step": 23760 + }, + { + "epoch": 67.5284090909091, + "grad_norm": 0.793405294418335, + "learning_rate": 0.0001, + "loss": 0.0554, + "step": 23770 + }, + { + "epoch": 67.55681818181819, + "grad_norm": 0.7513002157211304, + "learning_rate": 0.0001, + "loss": 0.0568, + "step": 23780 + }, + { + "epoch": 67.58522727272727, + "grad_norm": 0.7225285172462463, + "learning_rate": 0.0001, + "loss": 0.0534, + "step": 23790 + }, + { + "epoch": 67.61363636363636, + "grad_norm": 0.690243661403656, + "learning_rate": 0.0001, + "loss": 0.0517, + "step": 23800 + }, + { + "epoch": 67.64204545454545, + "grad_norm": 0.6749371290206909, + "learning_rate": 0.0001, + "loss": 0.054, + "step": 23810 + }, + { + "epoch": 67.67045454545455, + "grad_norm": 0.7889542579650879, + "learning_rate": 0.0001, + "loss": 0.0535, + "step": 23820 + }, + { + "epoch": 67.69886363636364, + "grad_norm": 1.2622177600860596, + "learning_rate": 0.0001, + "loss": 0.052, + "step": 23830 + }, + { + "epoch": 67.72727272727273, + "grad_norm": 1.8038215637207031, + "learning_rate": 0.0001, + "loss": 0.0534, + "step": 23840 + }, + { + "epoch": 67.75568181818181, + "grad_norm": 1.4471378326416016, + "learning_rate": 0.0001, + "loss": 0.054, + "step": 23850 + }, + { + "epoch": 67.7840909090909, + "grad_norm": 1.2452491521835327, + "learning_rate": 0.0001, + "loss": 0.051, + "step": 23860 + }, + { + "epoch": 67.8125, + "grad_norm": 1.2193328142166138, + "learning_rate": 0.0001, + "loss": 0.0498, + "step": 23870 + }, + { + "epoch": 67.8409090909091, + "grad_norm": 1.1352566480636597, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 23880 + }, + { + "epoch": 67.86931818181819, + "grad_norm": 1.0166912078857422, + "learning_rate": 0.0001, + "loss": 0.0527, + "step": 23890 + }, + { + "epoch": 67.89772727272727, + "grad_norm": 0.9358308911323547, + "learning_rate": 0.0001, + "loss": 0.054, + "step": 23900 + }, + { + "epoch": 67.92613636363636, + "grad_norm": 1.0616742372512817, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 23910 + }, + { + "epoch": 67.95454545454545, + "grad_norm": 0.9217783808708191, + "learning_rate": 0.0001, + "loss": 0.0502, + "step": 23920 + }, + { + "epoch": 67.98295454545455, + "grad_norm": 1.0423084497451782, + "learning_rate": 0.0001, + "loss": 0.0518, + "step": 23930 + }, + { + "epoch": 68.01136363636364, + "grad_norm": 1.1823982000350952, + "learning_rate": 0.0001, + "loss": 0.0534, + "step": 23940 + }, + { + "epoch": 68.03977272727273, + "grad_norm": 0.9482648968696594, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 23950 + }, + { + "epoch": 68.06818181818181, + "grad_norm": 0.7669751644134521, + "learning_rate": 0.0001, + "loss": 0.0516, + "step": 23960 + }, + { + "epoch": 68.0965909090909, + "grad_norm": 1.1928632259368896, + "learning_rate": 0.0001, + "loss": 0.0535, + "step": 23970 + }, + { + "epoch": 68.125, + "grad_norm": 0.9698597192764282, + "learning_rate": 0.0001, + "loss": 0.0541, + "step": 23980 + }, + { + "epoch": 68.1534090909091, + "grad_norm": 1.0423868894577026, + "learning_rate": 0.0001, + "loss": 0.0541, + "step": 23990 + }, + { + "epoch": 68.18181818181819, + "grad_norm": 1.2554688453674316, + "learning_rate": 0.0001, + "loss": 0.0532, + "step": 24000 + }, + { + "epoch": 68.21022727272727, + "grad_norm": 1.3134796619415283, + "learning_rate": 0.0001, + "loss": 0.0545, + "step": 24010 + }, + { + "epoch": 68.23863636363636, + "grad_norm": 1.2554820775985718, + "learning_rate": 0.0001, + "loss": 0.0509, + "step": 24020 + }, + { + "epoch": 68.26704545454545, + "grad_norm": 0.9832156896591187, + "learning_rate": 0.0001, + "loss": 0.0497, + "step": 24030 + }, + { + "epoch": 68.29545454545455, + "grad_norm": 1.0172799825668335, + "learning_rate": 0.0001, + "loss": 0.0482, + "step": 24040 + }, + { + "epoch": 68.32386363636364, + "grad_norm": 1.321234107017517, + "learning_rate": 0.0001, + "loss": 0.0529, + "step": 24050 + }, + { + "epoch": 68.35227272727273, + "grad_norm": 1.5265092849731445, + "learning_rate": 0.0001, + "loss": 0.051, + "step": 24060 + }, + { + "epoch": 68.38068181818181, + "grad_norm": 1.1719361543655396, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 24070 + }, + { + "epoch": 68.4090909090909, + "grad_norm": 1.2583420276641846, + "learning_rate": 0.0001, + "loss": 0.0494, + "step": 24080 + }, + { + "epoch": 68.4375, + "grad_norm": 1.0527803897857666, + "learning_rate": 0.0001, + "loss": 0.0516, + "step": 24090 + }, + { + "epoch": 68.4659090909091, + "grad_norm": 1.011395812034607, + "learning_rate": 0.0001, + "loss": 0.05, + "step": 24100 + }, + { + "epoch": 68.49431818181819, + "grad_norm": 1.037021517753601, + "learning_rate": 0.0001, + "loss": 0.0499, + "step": 24110 + }, + { + "epoch": 68.52272727272727, + "grad_norm": 0.9442154765129089, + "learning_rate": 0.0001, + "loss": 0.051, + "step": 24120 + }, + { + "epoch": 68.55113636363636, + "grad_norm": 0.8959128260612488, + "learning_rate": 0.0001, + "loss": 0.051, + "step": 24130 + }, + { + "epoch": 68.57954545454545, + "grad_norm": 0.9579172730445862, + "learning_rate": 0.0001, + "loss": 0.0504, + "step": 24140 + }, + { + "epoch": 68.60795454545455, + "grad_norm": 1.0895936489105225, + "learning_rate": 0.0001, + "loss": 0.0488, + "step": 24150 + }, + { + "epoch": 68.63636363636364, + "grad_norm": 0.7573409676551819, + "learning_rate": 0.0001, + "loss": 0.0491, + "step": 24160 + }, + { + "epoch": 68.66477272727273, + "grad_norm": 0.8774531483650208, + "learning_rate": 0.0001, + "loss": 0.0481, + "step": 24170 + }, + { + "epoch": 68.69318181818181, + "grad_norm": 0.8967164158821106, + "learning_rate": 0.0001, + "loss": 0.0509, + "step": 24180 + }, + { + "epoch": 68.7215909090909, + "grad_norm": 1.0587197542190552, + "learning_rate": 0.0001, + "loss": 0.0541, + "step": 24190 + }, + { + "epoch": 68.75, + "grad_norm": 1.1898106336593628, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 24200 + }, + { + "epoch": 68.7784090909091, + "grad_norm": 1.0361658334732056, + "learning_rate": 0.0001, + "loss": 0.0513, + "step": 24210 + }, + { + "epoch": 68.80681818181819, + "grad_norm": 0.935555100440979, + "learning_rate": 0.0001, + "loss": 0.0518, + "step": 24220 + }, + { + "epoch": 68.83522727272727, + "grad_norm": 1.032720923423767, + "learning_rate": 0.0001, + "loss": 0.0511, + "step": 24230 + }, + { + "epoch": 68.86363636363636, + "grad_norm": 0.8149229884147644, + "learning_rate": 0.0001, + "loss": 0.052, + "step": 24240 + }, + { + "epoch": 68.89204545454545, + "grad_norm": 0.8590128421783447, + "learning_rate": 0.0001, + "loss": 0.0515, + "step": 24250 + }, + { + "epoch": 68.92045454545455, + "grad_norm": 0.9247688055038452, + "learning_rate": 0.0001, + "loss": 0.0518, + "step": 24260 + }, + { + "epoch": 68.94886363636364, + "grad_norm": 0.9311433434486389, + "learning_rate": 0.0001, + "loss": 0.0522, + "step": 24270 + }, + { + "epoch": 68.97727272727273, + "grad_norm": 0.7276850938796997, + "learning_rate": 0.0001, + "loss": 0.0533, + "step": 24280 + }, + { + "epoch": 69.00568181818181, + "grad_norm": 0.6716181039810181, + "learning_rate": 0.0001, + "loss": 0.0537, + "step": 24290 + }, + { + "epoch": 69.0340909090909, + "grad_norm": 0.9270053505897522, + "learning_rate": 0.0001, + "loss": 0.0527, + "step": 24300 + }, + { + "epoch": 69.0625, + "grad_norm": 0.666446328163147, + "learning_rate": 0.0001, + "loss": 0.0511, + "step": 24310 + }, + { + "epoch": 69.0909090909091, + "grad_norm": 0.8492375016212463, + "learning_rate": 0.0001, + "loss": 0.0514, + "step": 24320 + }, + { + "epoch": 69.11931818181819, + "grad_norm": 0.8447439074516296, + "learning_rate": 0.0001, + "loss": 0.0487, + "step": 24330 + }, + { + "epoch": 69.14772727272727, + "grad_norm": 0.7112112045288086, + "learning_rate": 0.0001, + "loss": 0.0521, + "step": 24340 + }, + { + "epoch": 69.17613636363636, + "grad_norm": 0.8900835514068604, + "learning_rate": 0.0001, + "loss": 0.0554, + "step": 24350 + }, + { + "epoch": 69.20454545454545, + "grad_norm": 0.7511789798736572, + "learning_rate": 0.0001, + "loss": 0.0511, + "step": 24360 + }, + { + "epoch": 69.23295454545455, + "grad_norm": 0.6234313249588013, + "learning_rate": 0.0001, + "loss": 0.0516, + "step": 24370 + }, + { + "epoch": 69.26136363636364, + "grad_norm": 0.8581838011741638, + "learning_rate": 0.0001, + "loss": 0.0503, + "step": 24380 + }, + { + "epoch": 69.28977272727273, + "grad_norm": 0.6439953446388245, + "learning_rate": 0.0001, + "loss": 0.0522, + "step": 24390 + }, + { + "epoch": 69.31818181818181, + "grad_norm": 0.805645763874054, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 24400 + }, + { + "epoch": 69.3465909090909, + "grad_norm": 0.7699912786483765, + "learning_rate": 0.0001, + "loss": 0.0555, + "step": 24410 + }, + { + "epoch": 69.375, + "grad_norm": 0.7186166644096375, + "learning_rate": 0.0001, + "loss": 0.0552, + "step": 24420 + }, + { + "epoch": 69.4034090909091, + "grad_norm": 0.8284119963645935, + "learning_rate": 0.0001, + "loss": 0.0514, + "step": 24430 + }, + { + "epoch": 69.43181818181819, + "grad_norm": 0.8688386082649231, + "learning_rate": 0.0001, + "loss": 0.0529, + "step": 24440 + }, + { + "epoch": 69.46022727272727, + "grad_norm": 1.1181520223617554, + "learning_rate": 0.0001, + "loss": 0.0544, + "step": 24450 + }, + { + "epoch": 69.48863636363636, + "grad_norm": 1.0079569816589355, + "learning_rate": 0.0001, + "loss": 0.0536, + "step": 24460 + }, + { + "epoch": 69.51704545454545, + "grad_norm": 1.6781340837478638, + "learning_rate": 0.0001, + "loss": 0.0526, + "step": 24470 + }, + { + "epoch": 69.54545454545455, + "grad_norm": 1.3601493835449219, + "learning_rate": 0.0001, + "loss": 0.0515, + "step": 24480 + }, + { + "epoch": 69.57386363636364, + "grad_norm": 0.9561741948127747, + "learning_rate": 0.0001, + "loss": 0.0514, + "step": 24490 + }, + { + "epoch": 69.60227272727273, + "grad_norm": 0.8902744650840759, + "learning_rate": 0.0001, + "loss": 0.0496, + "step": 24500 + }, + { + "epoch": 69.63068181818181, + "grad_norm": 0.9449006915092468, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 24510 + }, + { + "epoch": 69.6590909090909, + "grad_norm": 1.1786518096923828, + "learning_rate": 0.0001, + "loss": 0.0525, + "step": 24520 + }, + { + "epoch": 69.6875, + "grad_norm": 1.233751893043518, + "learning_rate": 0.0001, + "loss": 0.0507, + "step": 24530 + }, + { + "epoch": 69.7159090909091, + "grad_norm": 0.9357305765151978, + "learning_rate": 0.0001, + "loss": 0.0528, + "step": 24540 + }, + { + "epoch": 69.74431818181819, + "grad_norm": 1.226325511932373, + "learning_rate": 0.0001, + "loss": 0.0515, + "step": 24550 + }, + { + "epoch": 69.77272727272727, + "grad_norm": 0.8865256309509277, + "learning_rate": 0.0001, + "loss": 0.0497, + "step": 24560 + }, + { + "epoch": 69.80113636363636, + "grad_norm": 1.6879335641860962, + "learning_rate": 0.0001, + "loss": 0.0522, + "step": 24570 + }, + { + "epoch": 69.82954545454545, + "grad_norm": 1.2305909395217896, + "learning_rate": 0.0001, + "loss": 0.0502, + "step": 24580 + }, + { + "epoch": 69.85795454545455, + "grad_norm": 1.1654038429260254, + "learning_rate": 0.0001, + "loss": 0.0514, + "step": 24590 + }, + { + "epoch": 69.88636363636364, + "grad_norm": 0.8632926940917969, + "learning_rate": 0.0001, + "loss": 0.0507, + "step": 24600 + }, + { + "epoch": 69.91477272727273, + "grad_norm": 0.7934690713882446, + "learning_rate": 0.0001, + "loss": 0.0497, + "step": 24610 + }, + { + "epoch": 69.94318181818181, + "grad_norm": 0.7492729425430298, + "learning_rate": 0.0001, + "loss": 0.0496, + "step": 24620 + }, + { + "epoch": 69.9715909090909, + "grad_norm": 0.7984905242919922, + "learning_rate": 0.0001, + "loss": 0.0503, + "step": 24630 + }, + { + "epoch": 70.0, + "grad_norm": 0.8478935956954956, + "learning_rate": 0.0001, + "loss": 0.0514, + "step": 24640 + }, + { + "epoch": 70.0284090909091, + "grad_norm": 0.7653668522834778, + "learning_rate": 0.0001, + "loss": 0.0517, + "step": 24650 + }, + { + "epoch": 70.05681818181819, + "grad_norm": 0.7579995393753052, + "learning_rate": 0.0001, + "loss": 0.053, + "step": 24660 + }, + { + "epoch": 70.08522727272727, + "grad_norm": 0.9072360992431641, + "learning_rate": 0.0001, + "loss": 0.0515, + "step": 24670 + }, + { + "epoch": 70.11363636363636, + "grad_norm": 0.7853196859359741, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 24680 + }, + { + "epoch": 70.14204545454545, + "grad_norm": 0.7733336091041565, + "learning_rate": 0.0001, + "loss": 0.0566, + "step": 24690 + }, + { + "epoch": 70.17045454545455, + "grad_norm": 0.8603296279907227, + "learning_rate": 0.0001, + "loss": 0.0522, + "step": 24700 + }, + { + "epoch": 70.19886363636364, + "grad_norm": 1.4242461919784546, + "learning_rate": 0.0001, + "loss": 0.0529, + "step": 24710 + }, + { + "epoch": 70.22727272727273, + "grad_norm": 1.4059160947799683, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 24720 + }, + { + "epoch": 70.25568181818181, + "grad_norm": 1.3890278339385986, + "learning_rate": 0.0001, + "loss": 0.0523, + "step": 24730 + }, + { + "epoch": 70.2840909090909, + "grad_norm": 1.2617861032485962, + "learning_rate": 0.0001, + "loss": 0.0472, + "step": 24740 + }, + { + "epoch": 70.3125, + "grad_norm": 1.1536449193954468, + "learning_rate": 0.0001, + "loss": 0.0507, + "step": 24750 + }, + { + "epoch": 70.3409090909091, + "grad_norm": 1.032045602798462, + "learning_rate": 0.0001, + "loss": 0.0502, + "step": 24760 + }, + { + "epoch": 70.36931818181819, + "grad_norm": 0.9999845623970032, + "learning_rate": 0.0001, + "loss": 0.0511, + "step": 24770 + }, + { + "epoch": 70.39772727272727, + "grad_norm": 0.9327858090400696, + "learning_rate": 0.0001, + "loss": 0.0497, + "step": 24780 + }, + { + "epoch": 70.42613636363636, + "grad_norm": 1.1740491390228271, + "learning_rate": 0.0001, + "loss": 0.0509, + "step": 24790 + }, + { + "epoch": 70.45454545454545, + "grad_norm": 0.8893155455589294, + "learning_rate": 0.0001, + "loss": 0.0494, + "step": 24800 + }, + { + "epoch": 70.48295454545455, + "grad_norm": 0.8836989998817444, + "learning_rate": 0.0001, + "loss": 0.0522, + "step": 24810 + }, + { + "epoch": 70.51136363636364, + "grad_norm": 1.0119452476501465, + "learning_rate": 0.0001, + "loss": 0.0497, + "step": 24820 + }, + { + "epoch": 70.53977272727273, + "grad_norm": 0.9030247330665588, + "learning_rate": 0.0001, + "loss": 0.0522, + "step": 24830 + }, + { + "epoch": 70.56818181818181, + "grad_norm": 0.7199386358261108, + "learning_rate": 0.0001, + "loss": 0.0514, + "step": 24840 + }, + { + "epoch": 70.5965909090909, + "grad_norm": 0.8884567022323608, + "learning_rate": 0.0001, + "loss": 0.0504, + "step": 24850 + }, + { + "epoch": 70.625, + "grad_norm": 0.6433593034744263, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 24860 + }, + { + "epoch": 70.6534090909091, + "grad_norm": 0.6983967423439026, + "learning_rate": 0.0001, + "loss": 0.0494, + "step": 24870 + }, + { + "epoch": 70.68181818181819, + "grad_norm": 0.647629976272583, + "learning_rate": 0.0001, + "loss": 0.0499, + "step": 24880 + }, + { + "epoch": 70.71022727272727, + "grad_norm": 0.71266108751297, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 24890 + }, + { + "epoch": 70.73863636363636, + "grad_norm": 0.6877172589302063, + "learning_rate": 0.0001, + "loss": 0.0499, + "step": 24900 + }, + { + "epoch": 70.76704545454545, + "grad_norm": 0.6937993168830872, + "learning_rate": 0.0001, + "loss": 0.0496, + "step": 24910 + }, + { + "epoch": 70.79545454545455, + "grad_norm": 0.5959415435791016, + "learning_rate": 0.0001, + "loss": 0.0489, + "step": 24920 + }, + { + "epoch": 70.82386363636364, + "grad_norm": 0.6399363279342651, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 24930 + }, + { + "epoch": 70.85227272727273, + "grad_norm": 0.7871550917625427, + "learning_rate": 0.0001, + "loss": 0.0494, + "step": 24940 + }, + { + "epoch": 70.88068181818181, + "grad_norm": 0.7523185610771179, + "learning_rate": 0.0001, + "loss": 0.0536, + "step": 24950 + }, + { + "epoch": 70.9090909090909, + "grad_norm": 0.7533581852912903, + "learning_rate": 0.0001, + "loss": 0.0525, + "step": 24960 + }, + { + "epoch": 70.9375, + "grad_norm": 0.7682768106460571, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 24970 + }, + { + "epoch": 70.9659090909091, + "grad_norm": 0.8463433980941772, + "learning_rate": 0.0001, + "loss": 0.051, + "step": 24980 + }, + { + "epoch": 70.99431818181819, + "grad_norm": 1.0878268480300903, + "learning_rate": 0.0001, + "loss": 0.049, + "step": 24990 + }, + { + "epoch": 71.02272727272727, + "grad_norm": 0.7139332294464111, + "learning_rate": 0.0001, + "loss": 0.0523, + "step": 25000 + }, + { + "epoch": 71.05113636363636, + "grad_norm": 0.6780238747596741, + "learning_rate": 0.0001, + "loss": 0.05, + "step": 25010 + }, + { + "epoch": 71.07954545454545, + "grad_norm": 0.6342650055885315, + "learning_rate": 0.0001, + "loss": 0.0518, + "step": 25020 + }, + { + "epoch": 71.10795454545455, + "grad_norm": 0.6704277992248535, + "learning_rate": 0.0001, + "loss": 0.0506, + "step": 25030 + }, + { + "epoch": 71.13636363636364, + "grad_norm": 0.7333451509475708, + "learning_rate": 0.0001, + "loss": 0.0494, + "step": 25040 + }, + { + "epoch": 71.16477272727273, + "grad_norm": 0.8710368275642395, + "learning_rate": 0.0001, + "loss": 0.0508, + "step": 25050 + }, + { + "epoch": 71.19318181818181, + "grad_norm": 0.9135860204696655, + "learning_rate": 0.0001, + "loss": 0.053, + "step": 25060 + }, + { + "epoch": 71.2215909090909, + "grad_norm": 0.7403706908226013, + "learning_rate": 0.0001, + "loss": 0.0505, + "step": 25070 + }, + { + "epoch": 71.25, + "grad_norm": 0.6618191003799438, + "learning_rate": 0.0001, + "loss": 0.048, + "step": 25080 + }, + { + "epoch": 71.2784090909091, + "grad_norm": 0.7856776714324951, + "learning_rate": 0.0001, + "loss": 0.0476, + "step": 25090 + }, + { + "epoch": 71.30681818181819, + "grad_norm": 0.7596649527549744, + "learning_rate": 0.0001, + "loss": 0.0481, + "step": 25100 + }, + { + "epoch": 71.33522727272727, + "grad_norm": 0.8146116733551025, + "learning_rate": 0.0001, + "loss": 0.0474, + "step": 25110 + }, + { + "epoch": 71.36363636363636, + "grad_norm": 0.6791525483131409, + "learning_rate": 0.0001, + "loss": 0.0484, + "step": 25120 + }, + { + "epoch": 71.39204545454545, + "grad_norm": 0.7217307090759277, + "learning_rate": 0.0001, + "loss": 0.0499, + "step": 25130 + }, + { + "epoch": 71.42045454545455, + "grad_norm": 0.6544477939605713, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 25140 + }, + { + "epoch": 71.44886363636364, + "grad_norm": 0.6746852397918701, + "learning_rate": 0.0001, + "loss": 0.0521, + "step": 25150 + }, + { + "epoch": 71.47727272727273, + "grad_norm": 0.5204148888587952, + "learning_rate": 0.0001, + "loss": 0.0506, + "step": 25160 + }, + { + "epoch": 71.50568181818181, + "grad_norm": 0.5109100937843323, + "learning_rate": 0.0001, + "loss": 0.0493, + "step": 25170 + }, + { + "epoch": 71.5340909090909, + "grad_norm": 0.6211031675338745, + "learning_rate": 0.0001, + "loss": 0.0543, + "step": 25180 + }, + { + "epoch": 71.5625, + "grad_norm": 0.677085280418396, + "learning_rate": 0.0001, + "loss": 0.0527, + "step": 25190 + }, + { + "epoch": 71.5909090909091, + "grad_norm": 0.6960747838020325, + "learning_rate": 0.0001, + "loss": 0.0502, + "step": 25200 + }, + { + "epoch": 71.61931818181819, + "grad_norm": 0.6580451130867004, + "learning_rate": 0.0001, + "loss": 0.0505, + "step": 25210 + }, + { + "epoch": 71.64772727272727, + "grad_norm": 0.6775358319282532, + "learning_rate": 0.0001, + "loss": 0.0491, + "step": 25220 + }, + { + "epoch": 71.67613636363636, + "grad_norm": 0.6583216786384583, + "learning_rate": 0.0001, + "loss": 0.0505, + "step": 25230 + }, + { + "epoch": 71.70454545454545, + "grad_norm": 0.7078356146812439, + "learning_rate": 0.0001, + "loss": 0.0483, + "step": 25240 + }, + { + "epoch": 71.73295454545455, + "grad_norm": 0.7176387906074524, + "learning_rate": 0.0001, + "loss": 0.0487, + "step": 25250 + }, + { + "epoch": 71.76136363636364, + "grad_norm": 0.749264657497406, + "learning_rate": 0.0001, + "loss": 0.0523, + "step": 25260 + }, + { + "epoch": 71.78977272727273, + "grad_norm": 0.6820817589759827, + "learning_rate": 0.0001, + "loss": 0.0498, + "step": 25270 + }, + { + "epoch": 71.81818181818181, + "grad_norm": 0.6245760917663574, + "learning_rate": 0.0001, + "loss": 0.0499, + "step": 25280 + }, + { + "epoch": 71.8465909090909, + "grad_norm": 0.5692148804664612, + "learning_rate": 0.0001, + "loss": 0.0491, + "step": 25290 + }, + { + "epoch": 71.875, + "grad_norm": 0.6304931640625, + "learning_rate": 0.0001, + "loss": 0.0504, + "step": 25300 + }, + { + "epoch": 71.9034090909091, + "grad_norm": 0.546541690826416, + "learning_rate": 0.0001, + "loss": 0.0492, + "step": 25310 + }, + { + "epoch": 71.93181818181819, + "grad_norm": 0.5972326993942261, + "learning_rate": 0.0001, + "loss": 0.0505, + "step": 25320 + }, + { + "epoch": 71.96022727272727, + "grad_norm": 0.6020660996437073, + "learning_rate": 0.0001, + "loss": 0.0505, + "step": 25330 + }, + { + "epoch": 71.98863636363636, + "grad_norm": 0.48787808418273926, + "learning_rate": 0.0001, + "loss": 0.0523, + "step": 25340 + }, + { + "epoch": 72.01704545454545, + "grad_norm": 0.7013693451881409, + "learning_rate": 0.0001, + "loss": 0.0508, + "step": 25350 + }, + { + "epoch": 72.04545454545455, + "grad_norm": 0.5541148781776428, + "learning_rate": 0.0001, + "loss": 0.049, + "step": 25360 + }, + { + "epoch": 72.07386363636364, + "grad_norm": 0.6003844738006592, + "learning_rate": 0.0001, + "loss": 0.0509, + "step": 25370 + }, + { + "epoch": 72.10227272727273, + "grad_norm": 0.8124470114707947, + "learning_rate": 0.0001, + "loss": 0.0532, + "step": 25380 + }, + { + "epoch": 72.13068181818181, + "grad_norm": 0.6087120771408081, + "learning_rate": 0.0001, + "loss": 0.0513, + "step": 25390 + }, + { + "epoch": 72.1590909090909, + "grad_norm": 0.6835238337516785, + "learning_rate": 0.0001, + "loss": 0.0514, + "step": 25400 + }, + { + "epoch": 72.1875, + "grad_norm": 0.758734405040741, + "learning_rate": 0.0001, + "loss": 0.0502, + "step": 25410 + }, + { + "epoch": 72.2159090909091, + "grad_norm": 0.745496392250061, + "learning_rate": 0.0001, + "loss": 0.0496, + "step": 25420 + }, + { + "epoch": 72.24431818181819, + "grad_norm": 0.7484995722770691, + "learning_rate": 0.0001, + "loss": 0.0518, + "step": 25430 + }, + { + "epoch": 72.27272727272727, + "grad_norm": 0.7207466959953308, + "learning_rate": 0.0001, + "loss": 0.0506, + "step": 25440 + }, + { + "epoch": 72.30113636363636, + "grad_norm": 0.866812527179718, + "learning_rate": 0.0001, + "loss": 0.0514, + "step": 25450 + }, + { + "epoch": 72.32954545454545, + "grad_norm": 0.7610346674919128, + "learning_rate": 0.0001, + "loss": 0.0505, + "step": 25460 + }, + { + "epoch": 72.35795454545455, + "grad_norm": 0.6176382899284363, + "learning_rate": 0.0001, + "loss": 0.0493, + "step": 25470 + }, + { + "epoch": 72.38636363636364, + "grad_norm": 0.7282941937446594, + "learning_rate": 0.0001, + "loss": 0.0492, + "step": 25480 + }, + { + "epoch": 72.41477272727273, + "grad_norm": 0.6433279514312744, + "learning_rate": 0.0001, + "loss": 0.051, + "step": 25490 + }, + { + "epoch": 72.44318181818181, + "grad_norm": 0.6624048352241516, + "learning_rate": 0.0001, + "loss": 0.0507, + "step": 25500 + }, + { + "epoch": 72.4715909090909, + "grad_norm": 0.6259250044822693, + "learning_rate": 0.0001, + "loss": 0.0508, + "step": 25510 + }, + { + "epoch": 72.5, + "grad_norm": 0.5815131068229675, + "learning_rate": 0.0001, + "loss": 0.0509, + "step": 25520 + }, + { + "epoch": 72.5284090909091, + "grad_norm": 0.4949661195278168, + "learning_rate": 0.0001, + "loss": 0.0502, + "step": 25530 + }, + { + "epoch": 72.55681818181819, + "grad_norm": 0.6070393323898315, + "learning_rate": 0.0001, + "loss": 0.0536, + "step": 25540 + }, + { + "epoch": 72.58522727272727, + "grad_norm": 0.9325839281082153, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 25550 + }, + { + "epoch": 72.61363636363636, + "grad_norm": 0.6207942962646484, + "learning_rate": 0.0001, + "loss": 0.051, + "step": 25560 + }, + { + "epoch": 72.64204545454545, + "grad_norm": 0.7251754403114319, + "learning_rate": 0.0001, + "loss": 0.0511, + "step": 25570 + }, + { + "epoch": 72.67045454545455, + "grad_norm": 0.7657225131988525, + "learning_rate": 0.0001, + "loss": 0.053, + "step": 25580 + }, + { + "epoch": 72.69886363636364, + "grad_norm": 0.6369885802268982, + "learning_rate": 0.0001, + "loss": 0.0537, + "step": 25590 + }, + { + "epoch": 72.72727272727273, + "grad_norm": 0.82183837890625, + "learning_rate": 0.0001, + "loss": 0.0503, + "step": 25600 + }, + { + "epoch": 72.75568181818181, + "grad_norm": 0.670074999332428, + "learning_rate": 0.0001, + "loss": 0.0508, + "step": 25610 + }, + { + "epoch": 72.7840909090909, + "grad_norm": 0.7039807438850403, + "learning_rate": 0.0001, + "loss": 0.0487, + "step": 25620 + }, + { + "epoch": 72.8125, + "grad_norm": 0.6067477464675903, + "learning_rate": 0.0001, + "loss": 0.0516, + "step": 25630 + }, + { + "epoch": 72.8409090909091, + "grad_norm": 0.6139563918113708, + "learning_rate": 0.0001, + "loss": 0.0485, + "step": 25640 + }, + { + "epoch": 72.86931818181819, + "grad_norm": 0.8261796832084656, + "learning_rate": 0.0001, + "loss": 0.0477, + "step": 25650 + }, + { + "epoch": 72.89772727272727, + "grad_norm": 0.7676610350608826, + "learning_rate": 0.0001, + "loss": 0.0495, + "step": 25660 + }, + { + "epoch": 72.92613636363636, + "grad_norm": 0.6294236779212952, + "learning_rate": 0.0001, + "loss": 0.0501, + "step": 25670 + }, + { + "epoch": 72.95454545454545, + "grad_norm": 0.5884844660758972, + "learning_rate": 0.0001, + "loss": 0.047, + "step": 25680 + }, + { + "epoch": 72.98295454545455, + "grad_norm": 0.6261518001556396, + "learning_rate": 0.0001, + "loss": 0.0494, + "step": 25690 + }, + { + "epoch": 73.01136363636364, + "grad_norm": 0.5874741673469543, + "learning_rate": 0.0001, + "loss": 0.0492, + "step": 25700 + }, + { + "epoch": 73.03977272727273, + "grad_norm": 0.558462917804718, + "learning_rate": 0.0001, + "loss": 0.0485, + "step": 25710 + }, + { + "epoch": 73.06818181818181, + "grad_norm": 1.5096638202667236, + "learning_rate": 0.0001, + "loss": 0.0482, + "step": 25720 + }, + { + "epoch": 73.0965909090909, + "grad_norm": 0.8970916867256165, + "learning_rate": 0.0001, + "loss": 0.0485, + "step": 25730 + }, + { + "epoch": 73.125, + "grad_norm": 1.2470207214355469, + "learning_rate": 0.0001, + "loss": 0.0474, + "step": 25740 + }, + { + "epoch": 73.1534090909091, + "grad_norm": 1.1245193481445312, + "learning_rate": 0.0001, + "loss": 0.0464, + "step": 25750 + }, + { + "epoch": 73.18181818181819, + "grad_norm": 1.018660068511963, + "learning_rate": 0.0001, + "loss": 0.0487, + "step": 25760 + }, + { + "epoch": 73.21022727272727, + "grad_norm": 1.0547358989715576, + "learning_rate": 0.0001, + "loss": 0.049, + "step": 25770 + }, + { + "epoch": 73.23863636363636, + "grad_norm": 1.0074411630630493, + "learning_rate": 0.0001, + "loss": 0.0495, + "step": 25780 + }, + { + "epoch": 73.26704545454545, + "grad_norm": 1.107343077659607, + "learning_rate": 0.0001, + "loss": 0.0495, + "step": 25790 + }, + { + "epoch": 73.29545454545455, + "grad_norm": 1.0003204345703125, + "learning_rate": 0.0001, + "loss": 0.0461, + "step": 25800 + }, + { + "epoch": 73.32386363636364, + "grad_norm": 1.152951955795288, + "learning_rate": 0.0001, + "loss": 0.0485, + "step": 25810 + }, + { + "epoch": 73.35227272727273, + "grad_norm": 1.0957775115966797, + "learning_rate": 0.0001, + "loss": 0.0465, + "step": 25820 + }, + { + "epoch": 73.38068181818181, + "grad_norm": 0.9931585192680359, + "learning_rate": 0.0001, + "loss": 0.0468, + "step": 25830 + }, + { + "epoch": 73.4090909090909, + "grad_norm": 0.9358384609222412, + "learning_rate": 0.0001, + "loss": 0.0475, + "step": 25840 + }, + { + "epoch": 73.4375, + "grad_norm": 1.6903265714645386, + "learning_rate": 0.0001, + "loss": 0.0561, + "step": 25850 + }, + { + "epoch": 73.4659090909091, + "grad_norm": 1.554337501525879, + "learning_rate": 0.0001, + "loss": 0.0494, + "step": 25860 + }, + { + "epoch": 73.49431818181819, + "grad_norm": 1.531087875366211, + "learning_rate": 0.0001, + "loss": 0.0486, + "step": 25870 + }, + { + "epoch": 73.52272727272727, + "grad_norm": 1.144465684890747, + "learning_rate": 0.0001, + "loss": 0.0472, + "step": 25880 + }, + { + "epoch": 73.55113636363636, + "grad_norm": 1.1745659112930298, + "learning_rate": 0.0001, + "loss": 0.0475, + "step": 25890 + }, + { + "epoch": 73.57954545454545, + "grad_norm": 1.1764057874679565, + "learning_rate": 0.0001, + "loss": 0.0474, + "step": 25900 + }, + { + "epoch": 73.60795454545455, + "grad_norm": 1.2074421644210815, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 25910 + }, + { + "epoch": 73.63636363636364, + "grad_norm": 1.036546230316162, + "learning_rate": 0.0001, + "loss": 0.0474, + "step": 25920 + }, + { + "epoch": 73.66477272727273, + "grad_norm": 1.5946331024169922, + "learning_rate": 0.0001, + "loss": 0.0471, + "step": 25930 + }, + { + "epoch": 73.69318181818181, + "grad_norm": 1.7375640869140625, + "learning_rate": 0.0001, + "loss": 0.0482, + "step": 25940 + }, + { + "epoch": 73.7215909090909, + "grad_norm": 1.494059443473816, + "learning_rate": 0.0001, + "loss": 0.0498, + "step": 25950 + }, + { + "epoch": 73.75, + "grad_norm": 1.3823585510253906, + "learning_rate": 0.0001, + "loss": 0.0461, + "step": 25960 + }, + { + "epoch": 73.7784090909091, + "grad_norm": 1.4736156463623047, + "learning_rate": 0.0001, + "loss": 0.0476, + "step": 25970 + }, + { + "epoch": 73.80681818181819, + "grad_norm": 1.3302404880523682, + "learning_rate": 0.0001, + "loss": 0.0465, + "step": 25980 + }, + { + "epoch": 73.83522727272727, + "grad_norm": 1.0495837926864624, + "learning_rate": 0.0001, + "loss": 0.0465, + "step": 25990 + }, + { + "epoch": 73.86363636363636, + "grad_norm": 1.1226849555969238, + "learning_rate": 0.0001, + "loss": 0.0463, + "step": 26000 + }, + { + "epoch": 73.89204545454545, + "grad_norm": 1.1718180179595947, + "learning_rate": 0.0001, + "loss": 0.0461, + "step": 26010 + }, + { + "epoch": 73.92045454545455, + "grad_norm": 1.1437042951583862, + "learning_rate": 0.0001, + "loss": 0.0468, + "step": 26020 + }, + { + "epoch": 73.94886363636364, + "grad_norm": 0.9460147619247437, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 26030 + }, + { + "epoch": 73.97727272727273, + "grad_norm": 0.7734537720680237, + "learning_rate": 0.0001, + "loss": 0.0463, + "step": 26040 + }, + { + "epoch": 74.00568181818181, + "grad_norm": 0.7331590056419373, + "learning_rate": 0.0001, + "loss": 0.0479, + "step": 26050 + }, + { + "epoch": 74.0340909090909, + "grad_norm": 0.8983361721038818, + "learning_rate": 0.0001, + "loss": 0.0481, + "step": 26060 + }, + { + "epoch": 74.0625, + "grad_norm": 0.7752969861030579, + "learning_rate": 0.0001, + "loss": 0.0457, + "step": 26070 + }, + { + "epoch": 74.0909090909091, + "grad_norm": 1.147444725036621, + "learning_rate": 0.0001, + "loss": 0.0488, + "step": 26080 + }, + { + "epoch": 74.11931818181819, + "grad_norm": 1.5672545433044434, + "learning_rate": 0.0001, + "loss": 0.0646, + "step": 26090 + }, + { + "epoch": 74.14772727272727, + "grad_norm": 1.861153483390808, + "learning_rate": 0.0001, + "loss": 0.0496, + "step": 26100 + }, + { + "epoch": 74.17613636363636, + "grad_norm": 2.55692720413208, + "learning_rate": 0.0001, + "loss": 0.0485, + "step": 26110 + }, + { + "epoch": 74.20454545454545, + "grad_norm": 1.756406545639038, + "learning_rate": 0.0001, + "loss": 0.0462, + "step": 26120 + }, + { + "epoch": 74.23295454545455, + "grad_norm": 1.4010676145553589, + "learning_rate": 0.0001, + "loss": 0.0459, + "step": 26130 + }, + { + "epoch": 74.26136363636364, + "grad_norm": 1.0524970293045044, + "learning_rate": 0.0001, + "loss": 0.0452, + "step": 26140 + }, + { + "epoch": 74.28977272727273, + "grad_norm": 1.089568853378296, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 26150 + }, + { + "epoch": 74.31818181818181, + "grad_norm": 1.5746029615402222, + "learning_rate": 0.0001, + "loss": 0.0458, + "step": 26160 + }, + { + "epoch": 74.3465909090909, + "grad_norm": 1.353350281715393, + "learning_rate": 0.0001, + "loss": 0.0462, + "step": 26170 + }, + { + "epoch": 74.375, + "grad_norm": 0.9193561673164368, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 26180 + }, + { + "epoch": 74.4034090909091, + "grad_norm": 0.794005811214447, + "learning_rate": 0.0001, + "loss": 0.0444, + "step": 26190 + }, + { + "epoch": 74.43181818181819, + "grad_norm": 0.7287346124649048, + "learning_rate": 0.0001, + "loss": 0.0478, + "step": 26200 + }, + { + "epoch": 74.46022727272727, + "grad_norm": 0.9359661340713501, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 26210 + }, + { + "epoch": 74.48863636363636, + "grad_norm": 0.8077235221862793, + "learning_rate": 0.0001, + "loss": 0.0471, + "step": 26220 + }, + { + "epoch": 74.51704545454545, + "grad_norm": 0.7505087852478027, + "learning_rate": 0.0001, + "loss": 0.0471, + "step": 26230 + }, + { + "epoch": 74.54545454545455, + "grad_norm": 0.9545241594314575, + "learning_rate": 0.0001, + "loss": 0.0464, + "step": 26240 + }, + { + "epoch": 74.57386363636364, + "grad_norm": 0.8987447619438171, + "learning_rate": 0.0001, + "loss": 0.0479, + "step": 26250 + }, + { + "epoch": 74.60227272727273, + "grad_norm": 0.7390574812889099, + "learning_rate": 0.0001, + "loss": 0.0487, + "step": 26260 + }, + { + "epoch": 74.63068181818181, + "grad_norm": 0.7332533001899719, + "learning_rate": 0.0001, + "loss": 0.0501, + "step": 26270 + }, + { + "epoch": 74.6590909090909, + "grad_norm": 0.9167255163192749, + "learning_rate": 0.0001, + "loss": 0.0473, + "step": 26280 + }, + { + "epoch": 74.6875, + "grad_norm": 0.6819099187850952, + "learning_rate": 0.0001, + "loss": 0.0487, + "step": 26290 + }, + { + "epoch": 74.7159090909091, + "grad_norm": 0.8224645853042603, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 26300 + }, + { + "epoch": 74.74431818181819, + "grad_norm": 0.8777086138725281, + "learning_rate": 0.0001, + "loss": 0.0493, + "step": 26310 + }, + { + "epoch": 74.77272727272727, + "grad_norm": 0.8701086044311523, + "learning_rate": 0.0001, + "loss": 0.0496, + "step": 26320 + }, + { + "epoch": 74.80113636363636, + "grad_norm": 0.8876749873161316, + "learning_rate": 0.0001, + "loss": 0.0486, + "step": 26330 + }, + { + "epoch": 74.82954545454545, + "grad_norm": 1.0940346717834473, + "learning_rate": 0.0001, + "loss": 0.05, + "step": 26340 + }, + { + "epoch": 74.85795454545455, + "grad_norm": 0.8126282095909119, + "learning_rate": 0.0001, + "loss": 0.05, + "step": 26350 + }, + { + "epoch": 74.88636363636364, + "grad_norm": 0.6576694250106812, + "learning_rate": 0.0001, + "loss": 0.0477, + "step": 26360 + }, + { + "epoch": 74.91477272727273, + "grad_norm": 0.8096992373466492, + "learning_rate": 0.0001, + "loss": 0.0492, + "step": 26370 + }, + { + "epoch": 74.94318181818181, + "grad_norm": 0.7710022926330566, + "learning_rate": 0.0001, + "loss": 0.0469, + "step": 26380 + }, + { + "epoch": 74.9715909090909, + "grad_norm": 0.6302091479301453, + "learning_rate": 0.0001, + "loss": 0.0487, + "step": 26390 + }, + { + "epoch": 75.0, + "grad_norm": 0.5126988291740417, + "learning_rate": 0.0001, + "loss": 0.0476, + "step": 26400 + }, + { + "epoch": 75.0284090909091, + "grad_norm": 0.6366093754768372, + "learning_rate": 0.0001, + "loss": 0.0481, + "step": 26410 + }, + { + "epoch": 75.05681818181819, + "grad_norm": 0.6395828127861023, + "learning_rate": 0.0001, + "loss": 0.0488, + "step": 26420 + }, + { + "epoch": 75.08522727272727, + "grad_norm": 0.6386353969573975, + "learning_rate": 0.0001, + "loss": 0.0515, + "step": 26430 + }, + { + "epoch": 75.11363636363636, + "grad_norm": 0.7400466799736023, + "learning_rate": 0.0001, + "loss": 0.0502, + "step": 26440 + }, + { + "epoch": 75.14204545454545, + "grad_norm": 0.6518636345863342, + "learning_rate": 0.0001, + "loss": 0.0503, + "step": 26450 + }, + { + "epoch": 75.17045454545455, + "grad_norm": 1.10001540184021, + "learning_rate": 0.0001, + "loss": 0.0503, + "step": 26460 + }, + { + "epoch": 75.19886363636364, + "grad_norm": 1.4311150312423706, + "learning_rate": 0.0001, + "loss": 0.0517, + "step": 26470 + }, + { + "epoch": 75.22727272727273, + "grad_norm": 1.1874371767044067, + "learning_rate": 0.0001, + "loss": 0.0502, + "step": 26480 + }, + { + "epoch": 75.25568181818181, + "grad_norm": 1.4475220441818237, + "learning_rate": 0.0001, + "loss": 0.0475, + "step": 26490 + }, + { + "epoch": 75.2840909090909, + "grad_norm": 1.1500955820083618, + "learning_rate": 0.0001, + "loss": 0.049, + "step": 26500 + }, + { + "epoch": 75.3125, + "grad_norm": 1.1389282941818237, + "learning_rate": 0.0001, + "loss": 0.0468, + "step": 26510 + }, + { + "epoch": 75.3409090909091, + "grad_norm": 0.9480587840080261, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 26520 + }, + { + "epoch": 75.36931818181819, + "grad_norm": 0.8605413436889648, + "learning_rate": 0.0001, + "loss": 0.0445, + "step": 26530 + }, + { + "epoch": 75.39772727272727, + "grad_norm": 0.8746979832649231, + "learning_rate": 0.0001, + "loss": 0.0459, + "step": 26540 + }, + { + "epoch": 75.42613636363636, + "grad_norm": 0.9024845957756042, + "learning_rate": 0.0001, + "loss": 0.0472, + "step": 26550 + }, + { + "epoch": 75.45454545454545, + "grad_norm": 1.183405876159668, + "learning_rate": 0.0001, + "loss": 0.046, + "step": 26560 + }, + { + "epoch": 75.48295454545455, + "grad_norm": 0.9437256455421448, + "learning_rate": 0.0001, + "loss": 0.0477, + "step": 26570 + }, + { + "epoch": 75.51136363636364, + "grad_norm": 0.8547454476356506, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 26580 + }, + { + "epoch": 75.53977272727273, + "grad_norm": 0.8393595218658447, + "learning_rate": 0.0001, + "loss": 0.0486, + "step": 26590 + }, + { + "epoch": 75.56818181818181, + "grad_norm": 0.9341287016868591, + "learning_rate": 0.0001, + "loss": 0.0464, + "step": 26600 + }, + { + "epoch": 75.5965909090909, + "grad_norm": 0.9698324203491211, + "learning_rate": 0.0001, + "loss": 0.0483, + "step": 26610 + }, + { + "epoch": 75.625, + "grad_norm": 0.8105787038803101, + "learning_rate": 0.0001, + "loss": 0.0484, + "step": 26620 + }, + { + "epoch": 75.6534090909091, + "grad_norm": 1.2497771978378296, + "learning_rate": 0.0001, + "loss": 0.049, + "step": 26630 + }, + { + "epoch": 75.68181818181819, + "grad_norm": 1.0008628368377686, + "learning_rate": 0.0001, + "loss": 0.0487, + "step": 26640 + }, + { + "epoch": 75.71022727272727, + "grad_norm": 1.203723669052124, + "learning_rate": 0.0001, + "loss": 0.0483, + "step": 26650 + }, + { + "epoch": 75.73863636363636, + "grad_norm": 1.1215006113052368, + "learning_rate": 0.0001, + "loss": 0.0483, + "step": 26660 + }, + { + "epoch": 75.76704545454545, + "grad_norm": 0.9968708753585815, + "learning_rate": 0.0001, + "loss": 0.0468, + "step": 26670 + }, + { + "epoch": 75.79545454545455, + "grad_norm": 0.8633506298065186, + "learning_rate": 0.0001, + "loss": 0.0472, + "step": 26680 + }, + { + "epoch": 75.82386363636364, + "grad_norm": 1.0756628513336182, + "learning_rate": 0.0001, + "loss": 0.0486, + "step": 26690 + }, + { + "epoch": 75.85227272727273, + "grad_norm": 0.7618953585624695, + "learning_rate": 0.0001, + "loss": 0.0493, + "step": 26700 + }, + { + "epoch": 75.88068181818181, + "grad_norm": 0.9463710188865662, + "learning_rate": 0.0001, + "loss": 0.0487, + "step": 26710 + }, + { + "epoch": 75.9090909090909, + "grad_norm": 0.7390015125274658, + "learning_rate": 0.0001, + "loss": 0.0489, + "step": 26720 + }, + { + "epoch": 75.9375, + "grad_norm": 0.5806778073310852, + "learning_rate": 0.0001, + "loss": 0.0506, + "step": 26730 + }, + { + "epoch": 75.9659090909091, + "grad_norm": 0.6981925368309021, + "learning_rate": 0.0001, + "loss": 0.0498, + "step": 26740 + }, + { + "epoch": 75.99431818181819, + "grad_norm": 0.7363477349281311, + "learning_rate": 0.0001, + "loss": 0.0493, + "step": 26750 + }, + { + "epoch": 76.02272727272727, + "grad_norm": 0.7735875248908997, + "learning_rate": 0.0001, + "loss": 0.0491, + "step": 26760 + }, + { + "epoch": 76.05113636363636, + "grad_norm": 0.6567436456680298, + "learning_rate": 0.0001, + "loss": 0.0489, + "step": 26770 + }, + { + "epoch": 76.07954545454545, + "grad_norm": 0.6639755368232727, + "learning_rate": 0.0001, + "loss": 0.0481, + "step": 26780 + }, + { + "epoch": 76.10795454545455, + "grad_norm": 0.5334902405738831, + "learning_rate": 0.0001, + "loss": 0.0481, + "step": 26790 + }, + { + "epoch": 76.13636363636364, + "grad_norm": 0.6336926221847534, + "learning_rate": 0.0001, + "loss": 0.0481, + "step": 26800 + }, + { + "epoch": 76.16477272727273, + "grad_norm": 0.5552213191986084, + "learning_rate": 0.0001, + "loss": 0.0492, + "step": 26810 + }, + { + "epoch": 76.19318181818181, + "grad_norm": 0.5877450108528137, + "learning_rate": 0.0001, + "loss": 0.0475, + "step": 26820 + }, + { + "epoch": 76.2215909090909, + "grad_norm": 0.6252912878990173, + "learning_rate": 0.0001, + "loss": 0.0491, + "step": 26830 + }, + { + "epoch": 76.25, + "grad_norm": 0.7641172409057617, + "learning_rate": 0.0001, + "loss": 0.0495, + "step": 26840 + }, + { + "epoch": 76.2784090909091, + "grad_norm": 0.5870921611785889, + "learning_rate": 0.0001, + "loss": 0.0491, + "step": 26850 + }, + { + "epoch": 76.30681818181819, + "grad_norm": 0.6940385699272156, + "learning_rate": 0.0001, + "loss": 0.0473, + "step": 26860 + }, + { + "epoch": 76.33522727272727, + "grad_norm": 0.6808137893676758, + "learning_rate": 0.0001, + "loss": 0.0497, + "step": 26870 + }, + { + "epoch": 76.36363636363636, + "grad_norm": 0.7470855712890625, + "learning_rate": 0.0001, + "loss": 0.0478, + "step": 26880 + }, + { + "epoch": 76.39204545454545, + "grad_norm": 0.7515180110931396, + "learning_rate": 0.0001, + "loss": 0.0475, + "step": 26890 + }, + { + "epoch": 76.42045454545455, + "grad_norm": 0.5889479517936707, + "learning_rate": 0.0001, + "loss": 0.0487, + "step": 26900 + }, + { + "epoch": 76.44886363636364, + "grad_norm": 0.600273609161377, + "learning_rate": 0.0001, + "loss": 0.0479, + "step": 26910 + }, + { + "epoch": 76.47727272727273, + "grad_norm": 0.7066619396209717, + "learning_rate": 0.0001, + "loss": 0.05, + "step": 26920 + }, + { + "epoch": 76.50568181818181, + "grad_norm": 0.794434666633606, + "learning_rate": 0.0001, + "loss": 0.0493, + "step": 26930 + }, + { + "epoch": 76.5340909090909, + "grad_norm": 0.517598569393158, + "learning_rate": 0.0001, + "loss": 0.0462, + "step": 26940 + }, + { + "epoch": 76.5625, + "grad_norm": 0.7150055170059204, + "learning_rate": 0.0001, + "loss": 0.0495, + "step": 26950 + }, + { + "epoch": 76.5909090909091, + "grad_norm": 0.6902920603752136, + "learning_rate": 0.0001, + "loss": 0.0494, + "step": 26960 + }, + { + "epoch": 76.61931818181819, + "grad_norm": 0.8069965839385986, + "learning_rate": 0.0001, + "loss": 0.0476, + "step": 26970 + }, + { + "epoch": 76.64772727272727, + "grad_norm": 0.7762559056282043, + "learning_rate": 0.0001, + "loss": 0.0477, + "step": 26980 + }, + { + "epoch": 76.67613636363636, + "grad_norm": 0.7302852272987366, + "learning_rate": 0.0001, + "loss": 0.0484, + "step": 26990 + }, + { + "epoch": 76.70454545454545, + "grad_norm": 0.7422618865966797, + "learning_rate": 0.0001, + "loss": 0.0469, + "step": 27000 + }, + { + "epoch": 76.73295454545455, + "grad_norm": 0.719369649887085, + "learning_rate": 0.0001, + "loss": 0.0477, + "step": 27010 + }, + { + "epoch": 76.76136363636364, + "grad_norm": 0.7470158338546753, + "learning_rate": 0.0001, + "loss": 0.0466, + "step": 27020 + }, + { + "epoch": 76.78977272727273, + "grad_norm": 0.8050602078437805, + "learning_rate": 0.0001, + "loss": 0.0458, + "step": 27030 + }, + { + "epoch": 76.81818181818181, + "grad_norm": 0.617423415184021, + "learning_rate": 0.0001, + "loss": 0.0486, + "step": 27040 + }, + { + "epoch": 76.8465909090909, + "grad_norm": 0.505489706993103, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 27050 + }, + { + "epoch": 76.875, + "grad_norm": 0.5494895577430725, + "learning_rate": 0.0001, + "loss": 0.0464, + "step": 27060 + }, + { + "epoch": 76.9034090909091, + "grad_norm": 0.5236529111862183, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 27070 + }, + { + "epoch": 76.93181818181819, + "grad_norm": 0.6138285994529724, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 27080 + }, + { + "epoch": 76.96022727272727, + "grad_norm": 0.6185349225997925, + "learning_rate": 0.0001, + "loss": 0.0499, + "step": 27090 + }, + { + "epoch": 76.98863636363636, + "grad_norm": 0.6920495629310608, + "learning_rate": 0.0001, + "loss": 0.0496, + "step": 27100 + }, + { + "epoch": 77.01704545454545, + "grad_norm": 0.7093076109886169, + "learning_rate": 0.0001, + "loss": 0.0475, + "step": 27110 + }, + { + "epoch": 77.04545454545455, + "grad_norm": 0.9286143779754639, + "learning_rate": 0.0001, + "loss": 0.0471, + "step": 27120 + }, + { + "epoch": 77.07386363636364, + "grad_norm": 1.0601509809494019, + "learning_rate": 0.0001, + "loss": 0.0479, + "step": 27130 + }, + { + "epoch": 77.10227272727273, + "grad_norm": 0.9617673754692078, + "learning_rate": 0.0001, + "loss": 0.0468, + "step": 27140 + }, + { + "epoch": 77.13068181818181, + "grad_norm": 1.2405085563659668, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 27150 + }, + { + "epoch": 77.1590909090909, + "grad_norm": 0.8260979056358337, + "learning_rate": 0.0001, + "loss": 0.0461, + "step": 27160 + }, + { + "epoch": 77.1875, + "grad_norm": 0.6605196595191956, + "learning_rate": 0.0001, + "loss": 0.047, + "step": 27170 + }, + { + "epoch": 77.2159090909091, + "grad_norm": 0.7948494553565979, + "learning_rate": 0.0001, + "loss": 0.0464, + "step": 27180 + }, + { + "epoch": 77.24431818181819, + "grad_norm": 0.8191278576850891, + "learning_rate": 0.0001, + "loss": 0.0479, + "step": 27190 + }, + { + "epoch": 77.27272727272727, + "grad_norm": 0.7254536151885986, + "learning_rate": 0.0001, + "loss": 0.0496, + "step": 27200 + }, + { + "epoch": 77.30113636363636, + "grad_norm": 0.6482180953025818, + "learning_rate": 0.0001, + "loss": 0.046, + "step": 27210 + }, + { + "epoch": 77.32954545454545, + "grad_norm": 0.8210635781288147, + "learning_rate": 0.0001, + "loss": 0.0464, + "step": 27220 + }, + { + "epoch": 77.35795454545455, + "grad_norm": 0.6660655736923218, + "learning_rate": 0.0001, + "loss": 0.0465, + "step": 27230 + }, + { + "epoch": 77.38636363636364, + "grad_norm": 0.6348584294319153, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 27240 + }, + { + "epoch": 77.41477272727273, + "grad_norm": 0.8718886375427246, + "learning_rate": 0.0001, + "loss": 0.0463, + "step": 27250 + }, + { + "epoch": 77.44318181818181, + "grad_norm": 0.919781506061554, + "learning_rate": 0.0001, + "loss": 0.0453, + "step": 27260 + }, + { + "epoch": 77.4715909090909, + "grad_norm": 0.9934787154197693, + "learning_rate": 0.0001, + "loss": 0.0471, + "step": 27270 + }, + { + "epoch": 77.5, + "grad_norm": 0.9608179330825806, + "learning_rate": 0.0001, + "loss": 0.0463, + "step": 27280 + }, + { + "epoch": 77.5284090909091, + "grad_norm": 0.7589172720909119, + "learning_rate": 0.0001, + "loss": 0.0491, + "step": 27290 + }, + { + "epoch": 77.55681818181819, + "grad_norm": 0.9856165647506714, + "learning_rate": 0.0001, + "loss": 0.0459, + "step": 27300 + }, + { + "epoch": 77.58522727272727, + "grad_norm": 0.8956001996994019, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 27310 + }, + { + "epoch": 77.61363636363636, + "grad_norm": 1.1567500829696655, + "learning_rate": 0.0001, + "loss": 0.048, + "step": 27320 + }, + { + "epoch": 77.64204545454545, + "grad_norm": 0.9312670826911926, + "learning_rate": 0.0001, + "loss": 0.0452, + "step": 27330 + }, + { + "epoch": 77.67045454545455, + "grad_norm": 0.8068075776100159, + "learning_rate": 0.0001, + "loss": 0.0451, + "step": 27340 + }, + { + "epoch": 77.69886363636364, + "grad_norm": 0.5929557085037231, + "learning_rate": 0.0001, + "loss": 0.0475, + "step": 27350 + }, + { + "epoch": 77.72727272727273, + "grad_norm": 0.8782048225402832, + "learning_rate": 0.0001, + "loss": 0.0467, + "step": 27360 + }, + { + "epoch": 77.75568181818181, + "grad_norm": 1.0722332000732422, + "learning_rate": 0.0001, + "loss": 0.0468, + "step": 27370 + }, + { + "epoch": 77.7840909090909, + "grad_norm": 0.7801492810249329, + "learning_rate": 0.0001, + "loss": 0.0468, + "step": 27380 + }, + { + "epoch": 77.8125, + "grad_norm": 0.605384886264801, + "learning_rate": 0.0001, + "loss": 0.0472, + "step": 27390 + }, + { + "epoch": 77.8409090909091, + "grad_norm": 0.9070475101470947, + "learning_rate": 0.0001, + "loss": 0.0495, + "step": 27400 + }, + { + "epoch": 77.86931818181819, + "grad_norm": 1.0343130826950073, + "learning_rate": 0.0001, + "loss": 0.0463, + "step": 27410 + }, + { + "epoch": 77.89772727272727, + "grad_norm": 0.7611730098724365, + "learning_rate": 0.0001, + "loss": 0.0468, + "step": 27420 + }, + { + "epoch": 77.92613636363636, + "grad_norm": 0.8008614182472229, + "learning_rate": 0.0001, + "loss": 0.0469, + "step": 27430 + }, + { + "epoch": 77.95454545454545, + "grad_norm": 0.7293544411659241, + "learning_rate": 0.0001, + "loss": 0.0478, + "step": 27440 + }, + { + "epoch": 77.98295454545455, + "grad_norm": 0.832565188407898, + "learning_rate": 0.0001, + "loss": 0.0474, + "step": 27450 + }, + { + "epoch": 78.01136363636364, + "grad_norm": 0.7416606545448303, + "learning_rate": 0.0001, + "loss": 0.0464, + "step": 27460 + }, + { + "epoch": 78.03977272727273, + "grad_norm": 0.8914027214050293, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 27470 + }, + { + "epoch": 78.06818181818181, + "grad_norm": 0.6268876194953918, + "learning_rate": 0.0001, + "loss": 0.0446, + "step": 27480 + }, + { + "epoch": 78.0965909090909, + "grad_norm": 0.7498577237129211, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 27490 + }, + { + "epoch": 78.125, + "grad_norm": 0.7658631801605225, + "learning_rate": 0.0001, + "loss": 0.046, + "step": 27500 + }, + { + "epoch": 78.1534090909091, + "grad_norm": 0.9924762845039368, + "learning_rate": 0.0001, + "loss": 0.0457, + "step": 27510 + }, + { + "epoch": 78.18181818181819, + "grad_norm": 0.8507946133613586, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 27520 + }, + { + "epoch": 78.21022727272727, + "grad_norm": 0.8294076323509216, + "learning_rate": 0.0001, + "loss": 0.0457, + "step": 27530 + }, + { + "epoch": 78.23863636363636, + "grad_norm": 0.8344864249229431, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 27540 + }, + { + "epoch": 78.26704545454545, + "grad_norm": 0.6620252132415771, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 27550 + }, + { + "epoch": 78.29545454545455, + "grad_norm": 0.7037463784217834, + "learning_rate": 0.0001, + "loss": 0.0471, + "step": 27560 + }, + { + "epoch": 78.32386363636364, + "grad_norm": 0.7051752209663391, + "learning_rate": 0.0001, + "loss": 0.0457, + "step": 27570 + }, + { + "epoch": 78.35227272727273, + "grad_norm": 0.7858211398124695, + "learning_rate": 0.0001, + "loss": 0.0467, + "step": 27580 + }, + { + "epoch": 78.38068181818181, + "grad_norm": 0.7993125915527344, + "learning_rate": 0.0001, + "loss": 0.0469, + "step": 27590 + }, + { + "epoch": 78.4090909090909, + "grad_norm": 0.7271113395690918, + "learning_rate": 0.0001, + "loss": 0.0465, + "step": 27600 + }, + { + "epoch": 78.4375, + "grad_norm": 0.7963870763778687, + "learning_rate": 0.0001, + "loss": 0.0465, + "step": 27610 + }, + { + "epoch": 78.4659090909091, + "grad_norm": 0.9144273400306702, + "learning_rate": 0.0001, + "loss": 0.0468, + "step": 27620 + }, + { + "epoch": 78.49431818181819, + "grad_norm": 0.8622909784317017, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 27630 + }, + { + "epoch": 78.52272727272727, + "grad_norm": 0.7054391503334045, + "learning_rate": 0.0001, + "loss": 0.0457, + "step": 27640 + }, + { + "epoch": 78.55113636363636, + "grad_norm": 0.7337654232978821, + "learning_rate": 0.0001, + "loss": 0.0464, + "step": 27650 + }, + { + "epoch": 78.57954545454545, + "grad_norm": 0.6751934885978699, + "learning_rate": 0.0001, + "loss": 0.0476, + "step": 27660 + }, + { + "epoch": 78.60795454545455, + "grad_norm": 0.7194545269012451, + "learning_rate": 0.0001, + "loss": 0.047, + "step": 27670 + }, + { + "epoch": 78.63636363636364, + "grad_norm": 0.7210686802864075, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 27680 + }, + { + "epoch": 78.66477272727273, + "grad_norm": 0.9098225235939026, + "learning_rate": 0.0001, + "loss": 0.0465, + "step": 27690 + }, + { + "epoch": 78.69318181818181, + "grad_norm": 0.9643121361732483, + "learning_rate": 0.0001, + "loss": 0.0462, + "step": 27700 + }, + { + "epoch": 78.7215909090909, + "grad_norm": 1.057265043258667, + "learning_rate": 0.0001, + "loss": 0.0463, + "step": 27710 + }, + { + "epoch": 78.75, + "grad_norm": 0.8858153820037842, + "learning_rate": 0.0001, + "loss": 0.0477, + "step": 27720 + }, + { + "epoch": 78.7784090909091, + "grad_norm": 0.7570127248764038, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 27730 + }, + { + "epoch": 78.80681818181819, + "grad_norm": 0.8737295269966125, + "learning_rate": 0.0001, + "loss": 0.046, + "step": 27740 + }, + { + "epoch": 78.83522727272727, + "grad_norm": 0.8919824957847595, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 27750 + }, + { + "epoch": 78.86363636363636, + "grad_norm": 0.834999144077301, + "learning_rate": 0.0001, + "loss": 0.0444, + "step": 27760 + }, + { + "epoch": 78.89204545454545, + "grad_norm": 0.7257569432258606, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 27770 + }, + { + "epoch": 78.92045454545455, + "grad_norm": 0.7738620042800903, + "learning_rate": 0.0001, + "loss": 0.0475, + "step": 27780 + }, + { + "epoch": 78.94886363636364, + "grad_norm": 0.6493619084358215, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 27790 + }, + { + "epoch": 78.97727272727273, + "grad_norm": 0.7581443190574646, + "learning_rate": 0.0001, + "loss": 0.0465, + "step": 27800 + }, + { + "epoch": 79.00568181818181, + "grad_norm": 0.8084586262702942, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 27810 + }, + { + "epoch": 79.0340909090909, + "grad_norm": 0.6828577518463135, + "learning_rate": 0.0001, + "loss": 0.046, + "step": 27820 + }, + { + "epoch": 79.0625, + "grad_norm": 0.6780984401702881, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 27830 + }, + { + "epoch": 79.0909090909091, + "grad_norm": 0.7520745396614075, + "learning_rate": 0.0001, + "loss": 0.0452, + "step": 27840 + }, + { + "epoch": 79.11931818181819, + "grad_norm": 0.6034306883811951, + "learning_rate": 0.0001, + "loss": 0.0452, + "step": 27850 + }, + { + "epoch": 79.14772727272727, + "grad_norm": 0.8241128921508789, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 27860 + }, + { + "epoch": 79.17613636363636, + "grad_norm": 0.7252616286277771, + "learning_rate": 0.0001, + "loss": 0.0445, + "step": 27870 + }, + { + "epoch": 79.20454545454545, + "grad_norm": 0.8703776597976685, + "learning_rate": 0.0001, + "loss": 0.046, + "step": 27880 + }, + { + "epoch": 79.23295454545455, + "grad_norm": 0.6853988766670227, + "learning_rate": 0.0001, + "loss": 0.0457, + "step": 27890 + }, + { + "epoch": 79.26136363636364, + "grad_norm": 0.5899876356124878, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 27900 + }, + { + "epoch": 79.28977272727273, + "grad_norm": 0.5329005122184753, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 27910 + }, + { + "epoch": 79.31818181818181, + "grad_norm": 0.5844208002090454, + "learning_rate": 0.0001, + "loss": 0.0449, + "step": 27920 + }, + { + "epoch": 79.3465909090909, + "grad_norm": 0.5217543840408325, + "learning_rate": 0.0001, + "loss": 0.0441, + "step": 27930 + }, + { + "epoch": 79.375, + "grad_norm": 0.631199300289154, + "learning_rate": 0.0001, + "loss": 0.0452, + "step": 27940 + }, + { + "epoch": 79.4034090909091, + "grad_norm": 0.5435271859169006, + "learning_rate": 0.0001, + "loss": 0.0452, + "step": 27950 + }, + { + "epoch": 79.43181818181819, + "grad_norm": 0.5514788627624512, + "learning_rate": 0.0001, + "loss": 0.0463, + "step": 27960 + }, + { + "epoch": 79.46022727272727, + "grad_norm": 0.6063737273216248, + "learning_rate": 0.0001, + "loss": 0.0462, + "step": 27970 + }, + { + "epoch": 79.48863636363636, + "grad_norm": 0.6440352201461792, + "learning_rate": 0.0001, + "loss": 0.0459, + "step": 27980 + }, + { + "epoch": 79.51704545454545, + "grad_norm": 0.6347674131393433, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 27990 + }, + { + "epoch": 79.54545454545455, + "grad_norm": 0.5119302272796631, + "learning_rate": 0.0001, + "loss": 0.0443, + "step": 28000 + }, + { + "epoch": 79.57386363636364, + "grad_norm": 0.665009617805481, + "learning_rate": 0.0001, + "loss": 0.0443, + "step": 28010 + }, + { + "epoch": 79.60227272727273, + "grad_norm": 1.1492528915405273, + "learning_rate": 0.0001, + "loss": 0.0471, + "step": 28020 + }, + { + "epoch": 79.63068181818181, + "grad_norm": 0.6289621591567993, + "learning_rate": 0.0001, + "loss": 0.0471, + "step": 28030 + }, + { + "epoch": 79.6590909090909, + "grad_norm": 0.6949747204780579, + "learning_rate": 0.0001, + "loss": 0.0459, + "step": 28040 + }, + { + "epoch": 79.6875, + "grad_norm": 0.7128562331199646, + "learning_rate": 0.0001, + "loss": 0.0487, + "step": 28050 + }, + { + "epoch": 79.7159090909091, + "grad_norm": 0.7679532766342163, + "learning_rate": 0.0001, + "loss": 0.0463, + "step": 28060 + }, + { + "epoch": 79.74431818181819, + "grad_norm": 1.129748821258545, + "learning_rate": 0.0001, + "loss": 0.0485, + "step": 28070 + }, + { + "epoch": 79.77272727272727, + "grad_norm": 1.1302276849746704, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 28080 + }, + { + "epoch": 79.80113636363636, + "grad_norm": 1.242452621459961, + "learning_rate": 0.0001, + "loss": 0.0453, + "step": 28090 + }, + { + "epoch": 79.82954545454545, + "grad_norm": 1.3404399156570435, + "learning_rate": 0.0001, + "loss": 0.0474, + "step": 28100 + }, + { + "epoch": 79.85795454545455, + "grad_norm": 1.3003270626068115, + "learning_rate": 0.0001, + "loss": 0.0461, + "step": 28110 + }, + { + "epoch": 79.88636363636364, + "grad_norm": 1.1596304178237915, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 28120 + }, + { + "epoch": 79.91477272727273, + "grad_norm": 1.0483144521713257, + "learning_rate": 0.0001, + "loss": 0.0467, + "step": 28130 + }, + { + "epoch": 79.94318181818181, + "grad_norm": 0.9983393549919128, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 28140 + }, + { + "epoch": 79.9715909090909, + "grad_norm": 0.8313050270080566, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 28150 + }, + { + "epoch": 80.0, + "grad_norm": 0.9205082654953003, + "learning_rate": 0.0001, + "loss": 0.0468, + "step": 28160 + }, + { + "epoch": 80.0284090909091, + "grad_norm": 0.8589319586753845, + "learning_rate": 0.0001, + "loss": 0.0464, + "step": 28170 + }, + { + "epoch": 80.05681818181819, + "grad_norm": 0.9702844023704529, + "learning_rate": 0.0001, + "loss": 0.0424, + "step": 28180 + }, + { + "epoch": 80.08522727272727, + "grad_norm": 0.8930377960205078, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 28190 + }, + { + "epoch": 80.11363636363636, + "grad_norm": 0.721045970916748, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 28200 + }, + { + "epoch": 80.14204545454545, + "grad_norm": 0.7323723435401917, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 28210 + }, + { + "epoch": 80.17045454545455, + "grad_norm": 1.3309814929962158, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 28220 + }, + { + "epoch": 80.19886363636364, + "grad_norm": 1.14047110080719, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 28230 + }, + { + "epoch": 80.22727272727273, + "grad_norm": 1.6657135486602783, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 28240 + }, + { + "epoch": 80.25568181818181, + "grad_norm": 1.1823036670684814, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 28250 + }, + { + "epoch": 80.2840909090909, + "grad_norm": 0.9169923663139343, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 28260 + }, + { + "epoch": 80.3125, + "grad_norm": 0.8880297541618347, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 28270 + }, + { + "epoch": 80.3409090909091, + "grad_norm": 0.778057336807251, + "learning_rate": 0.0001, + "loss": 0.0438, + "step": 28280 + }, + { + "epoch": 80.36931818181819, + "grad_norm": 0.837870180606842, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 28290 + }, + { + "epoch": 80.39772727272727, + "grad_norm": 0.8132756352424622, + "learning_rate": 0.0001, + "loss": 0.0457, + "step": 28300 + }, + { + "epoch": 80.42613636363636, + "grad_norm": 0.9789218306541443, + "learning_rate": 0.0001, + "loss": 0.0453, + "step": 28310 + }, + { + "epoch": 80.45454545454545, + "grad_norm": 1.0025867223739624, + "learning_rate": 0.0001, + "loss": 0.0424, + "step": 28320 + }, + { + "epoch": 80.48295454545455, + "grad_norm": 0.8962274789810181, + "learning_rate": 0.0001, + "loss": 0.0434, + "step": 28330 + }, + { + "epoch": 80.51136363636364, + "grad_norm": 0.9207645058631897, + "learning_rate": 0.0001, + "loss": 0.0441, + "step": 28340 + }, + { + "epoch": 80.53977272727273, + "grad_norm": 0.7955727577209473, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 28350 + }, + { + "epoch": 80.56818181818181, + "grad_norm": 0.7597567439079285, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 28360 + }, + { + "epoch": 80.5965909090909, + "grad_norm": 0.9111728072166443, + "learning_rate": 0.0001, + "loss": 0.0432, + "step": 28370 + }, + { + "epoch": 80.625, + "grad_norm": 0.7924389243125916, + "learning_rate": 0.0001, + "loss": 0.0443, + "step": 28380 + }, + { + "epoch": 80.6534090909091, + "grad_norm": 0.7888645529747009, + "learning_rate": 0.0001, + "loss": 0.047, + "step": 28390 + }, + { + "epoch": 80.68181818181819, + "grad_norm": 0.7492277026176453, + "learning_rate": 0.0001, + "loss": 0.0451, + "step": 28400 + }, + { + "epoch": 80.71022727272727, + "grad_norm": 0.5666723251342773, + "learning_rate": 0.0001, + "loss": 0.0441, + "step": 28410 + }, + { + "epoch": 80.73863636363636, + "grad_norm": 0.5718184113502502, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 28420 + }, + { + "epoch": 80.76704545454545, + "grad_norm": 0.5507611632347107, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 28430 + }, + { + "epoch": 80.79545454545455, + "grad_norm": 0.6398160457611084, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 28440 + }, + { + "epoch": 80.82386363636364, + "grad_norm": 0.6520999073982239, + "learning_rate": 0.0001, + "loss": 0.0446, + "step": 28450 + }, + { + "epoch": 80.85227272727273, + "grad_norm": 0.6655693054199219, + "learning_rate": 0.0001, + "loss": 0.0443, + "step": 28460 + }, + { + "epoch": 80.88068181818181, + "grad_norm": 0.6784167885780334, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 28470 + }, + { + "epoch": 80.9090909090909, + "grad_norm": 0.6713101267814636, + "learning_rate": 0.0001, + "loss": 0.0462, + "step": 28480 + }, + { + "epoch": 80.9375, + "grad_norm": 0.6968391537666321, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 28490 + }, + { + "epoch": 80.9659090909091, + "grad_norm": 0.5912553668022156, + "learning_rate": 0.0001, + "loss": 0.0458, + "step": 28500 + }, + { + "epoch": 80.99431818181819, + "grad_norm": 0.6505694389343262, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 28510 + }, + { + "epoch": 81.02272727272727, + "grad_norm": 0.6993499994277954, + "learning_rate": 0.0001, + "loss": 0.0438, + "step": 28520 + }, + { + "epoch": 81.05113636363636, + "grad_norm": 0.8444068431854248, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 28530 + }, + { + "epoch": 81.07954545454545, + "grad_norm": 0.642077624797821, + "learning_rate": 0.0001, + "loss": 0.0446, + "step": 28540 + }, + { + "epoch": 81.10795454545455, + "grad_norm": 0.6671980023384094, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 28550 + }, + { + "epoch": 81.13636363636364, + "grad_norm": 0.7422840595245361, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 28560 + }, + { + "epoch": 81.16477272727273, + "grad_norm": 0.7244125604629517, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 28570 + }, + { + "epoch": 81.19318181818181, + "grad_norm": 0.5740301609039307, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 28580 + }, + { + "epoch": 81.2215909090909, + "grad_norm": 0.5553819537162781, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 28590 + }, + { + "epoch": 81.25, + "grad_norm": 0.4433813989162445, + "learning_rate": 0.0001, + "loss": 0.0443, + "step": 28600 + }, + { + "epoch": 81.2784090909091, + "grad_norm": 0.5169538855552673, + "learning_rate": 0.0001, + "loss": 0.0438, + "step": 28610 + }, + { + "epoch": 81.30681818181819, + "grad_norm": 0.5083233118057251, + "learning_rate": 0.0001, + "loss": 0.0452, + "step": 28620 + }, + { + "epoch": 81.33522727272727, + "grad_norm": 0.5519469976425171, + "learning_rate": 0.0001, + "loss": 0.0425, + "step": 28630 + }, + { + "epoch": 81.36363636363636, + "grad_norm": 0.731152355670929, + "learning_rate": 0.0001, + "loss": 0.0425, + "step": 28640 + }, + { + "epoch": 81.39204545454545, + "grad_norm": 0.4356805086135864, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 28650 + }, + { + "epoch": 81.42045454545455, + "grad_norm": 0.812091052532196, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 28660 + }, + { + "epoch": 81.44886363636364, + "grad_norm": 0.5545047521591187, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 28670 + }, + { + "epoch": 81.47727272727273, + "grad_norm": 0.8585087656974792, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 28680 + }, + { + "epoch": 81.50568181818181, + "grad_norm": 0.7256617546081543, + "learning_rate": 0.0001, + "loss": 0.0445, + "step": 28690 + }, + { + "epoch": 81.5340909090909, + "grad_norm": 0.7761886119842529, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 28700 + }, + { + "epoch": 81.5625, + "grad_norm": 0.7050015330314636, + "learning_rate": 0.0001, + "loss": 0.0449, + "step": 28710 + }, + { + "epoch": 81.5909090909091, + "grad_norm": 1.3305480480194092, + "learning_rate": 0.0001, + "loss": 0.047, + "step": 28720 + }, + { + "epoch": 81.61931818181819, + "grad_norm": 0.9800511598587036, + "learning_rate": 0.0001, + "loss": 0.0465, + "step": 28730 + }, + { + "epoch": 81.64772727272727, + "grad_norm": 1.166397213935852, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 28740 + }, + { + "epoch": 81.67613636363636, + "grad_norm": 1.0785977840423584, + "learning_rate": 0.0001, + "loss": 0.0446, + "step": 28750 + }, + { + "epoch": 81.70454545454545, + "grad_norm": 0.912084698677063, + "learning_rate": 0.0001, + "loss": 0.0441, + "step": 28760 + }, + { + "epoch": 81.73295454545455, + "grad_norm": 0.9032609462738037, + "learning_rate": 0.0001, + "loss": 0.0461, + "step": 28770 + }, + { + "epoch": 81.76136363636364, + "grad_norm": 1.6895674467086792, + "learning_rate": 0.0001, + "loss": 0.0436, + "step": 28780 + }, + { + "epoch": 81.78977272727273, + "grad_norm": 1.4334778785705566, + "learning_rate": 0.0001, + "loss": 0.0475, + "step": 28790 + }, + { + "epoch": 81.81818181818181, + "grad_norm": 1.5415606498718262, + "learning_rate": 0.0001, + "loss": 0.0449, + "step": 28800 + }, + { + "epoch": 81.8465909090909, + "grad_norm": 1.1889894008636475, + "learning_rate": 0.0001, + "loss": 0.0453, + "step": 28810 + }, + { + "epoch": 81.875, + "grad_norm": 1.0240460634231567, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 28820 + }, + { + "epoch": 81.9034090909091, + "grad_norm": 0.9590867757797241, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 28830 + }, + { + "epoch": 81.93181818181819, + "grad_norm": 0.776984453201294, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 28840 + }, + { + "epoch": 81.96022727272727, + "grad_norm": 1.2868221998214722, + "learning_rate": 0.0001, + "loss": 0.0462, + "step": 28850 + }, + { + "epoch": 81.98863636363636, + "grad_norm": 1.035592794418335, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 28860 + }, + { + "epoch": 82.01704545454545, + "grad_norm": 1.3734735250473022, + "learning_rate": 0.0001, + "loss": 0.0432, + "step": 28870 + }, + { + "epoch": 82.04545454545455, + "grad_norm": 1.5599828958511353, + "learning_rate": 0.0001, + "loss": 0.0423, + "step": 28880 + }, + { + "epoch": 82.07386363636364, + "grad_norm": 1.185050368309021, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 28890 + }, + { + "epoch": 82.10227272727273, + "grad_norm": 1.5994446277618408, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 28900 + }, + { + "epoch": 82.13068181818181, + "grad_norm": 1.3795400857925415, + "learning_rate": 0.0001, + "loss": 0.0421, + "step": 28910 + }, + { + "epoch": 82.1590909090909, + "grad_norm": 1.2823818922042847, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 28920 + }, + { + "epoch": 82.1875, + "grad_norm": 1.39549720287323, + "learning_rate": 0.0001, + "loss": 0.041, + "step": 28930 + }, + { + "epoch": 82.2159090909091, + "grad_norm": 1.1723809242248535, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 28940 + }, + { + "epoch": 82.24431818181819, + "grad_norm": 0.7764673829078674, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 28950 + }, + { + "epoch": 82.27272727272727, + "grad_norm": 0.7350073456764221, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 28960 + }, + { + "epoch": 82.30113636363636, + "grad_norm": 0.8376882672309875, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 28970 + }, + { + "epoch": 82.32954545454545, + "grad_norm": 0.7426922917366028, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 28980 + }, + { + "epoch": 82.35795454545455, + "grad_norm": 1.0266258716583252, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 28990 + }, + { + "epoch": 82.38636363636364, + "grad_norm": 0.6878785490989685, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 29000 + }, + { + "epoch": 82.41477272727273, + "grad_norm": 0.8112396597862244, + "learning_rate": 0.0001, + "loss": 0.0436, + "step": 29010 + }, + { + "epoch": 82.44318181818181, + "grad_norm": 0.7438217997550964, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 29020 + }, + { + "epoch": 82.4715909090909, + "grad_norm": 0.5908733010292053, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 29030 + }, + { + "epoch": 82.5, + "grad_norm": 0.7258604168891907, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 29040 + }, + { + "epoch": 82.5284090909091, + "grad_norm": 0.5995661616325378, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 29050 + }, + { + "epoch": 82.55681818181819, + "grad_norm": 0.7221328616142273, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 29060 + }, + { + "epoch": 82.58522727272727, + "grad_norm": 0.596004068851471, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 29070 + }, + { + "epoch": 82.61363636363636, + "grad_norm": 0.5565772652626038, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 29080 + }, + { + "epoch": 82.64204545454545, + "grad_norm": 0.697248637676239, + "learning_rate": 0.0001, + "loss": 0.0458, + "step": 29090 + }, + { + "epoch": 82.67045454545455, + "grad_norm": 0.6714693903923035, + "learning_rate": 0.0001, + "loss": 0.0453, + "step": 29100 + }, + { + "epoch": 82.69886363636364, + "grad_norm": 0.7975269556045532, + "learning_rate": 0.0001, + "loss": 0.0467, + "step": 29110 + }, + { + "epoch": 82.72727272727273, + "grad_norm": 0.870853841304779, + "learning_rate": 0.0001, + "loss": 0.0434, + "step": 29120 + }, + { + "epoch": 82.75568181818181, + "grad_norm": 0.7218012809753418, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 29130 + }, + { + "epoch": 82.7840909090909, + "grad_norm": 0.7033442258834839, + "learning_rate": 0.0001, + "loss": 0.0452, + "step": 29140 + }, + { + "epoch": 82.8125, + "grad_norm": 0.7944255471229553, + "learning_rate": 0.0001, + "loss": 0.0437, + "step": 29150 + }, + { + "epoch": 82.8409090909091, + "grad_norm": 0.712996244430542, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 29160 + }, + { + "epoch": 82.86931818181819, + "grad_norm": 0.7248801589012146, + "learning_rate": 0.0001, + "loss": 0.0458, + "step": 29170 + }, + { + "epoch": 82.89772727272727, + "grad_norm": 0.8134949803352356, + "learning_rate": 0.0001, + "loss": 0.0461, + "step": 29180 + }, + { + "epoch": 82.92613636363636, + "grad_norm": 0.5927881002426147, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 29190 + }, + { + "epoch": 82.95454545454545, + "grad_norm": 0.5970407128334045, + "learning_rate": 0.0001, + "loss": 0.0453, + "step": 29200 + }, + { + "epoch": 82.98295454545455, + "grad_norm": 0.5957374572753906, + "learning_rate": 0.0001, + "loss": 0.0443, + "step": 29210 + }, + { + "epoch": 83.01136363636364, + "grad_norm": 1.196097493171692, + "learning_rate": 0.0001, + "loss": 0.0513, + "step": 29220 + }, + { + "epoch": 83.03977272727273, + "grad_norm": 1.275525450706482, + "learning_rate": 0.0001, + "loss": 0.0465, + "step": 29230 + }, + { + "epoch": 83.06818181818181, + "grad_norm": 1.131506085395813, + "learning_rate": 0.0001, + "loss": 0.0453, + "step": 29240 + }, + { + "epoch": 83.0965909090909, + "grad_norm": 1.2795711755752563, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 29250 + }, + { + "epoch": 83.125, + "grad_norm": 1.0409256219863892, + "learning_rate": 0.0001, + "loss": 0.0438, + "step": 29260 + }, + { + "epoch": 83.1534090909091, + "grad_norm": 1.1085119247436523, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 29270 + }, + { + "epoch": 83.18181818181819, + "grad_norm": 1.0388301610946655, + "learning_rate": 0.0001, + "loss": 0.0463, + "step": 29280 + }, + { + "epoch": 83.21022727272727, + "grad_norm": 0.8974637985229492, + "learning_rate": 0.0001, + "loss": 0.0437, + "step": 29290 + }, + { + "epoch": 83.23863636363636, + "grad_norm": 0.8485453724861145, + "learning_rate": 0.0001, + "loss": 0.0437, + "step": 29300 + }, + { + "epoch": 83.26704545454545, + "grad_norm": 0.7687119841575623, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 29310 + }, + { + "epoch": 83.29545454545455, + "grad_norm": 0.844149649143219, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 29320 + }, + { + "epoch": 83.32386363636364, + "grad_norm": 0.8516110181808472, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 29330 + }, + { + "epoch": 83.35227272727273, + "grad_norm": 0.6241595149040222, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 29340 + }, + { + "epoch": 83.38068181818181, + "grad_norm": 0.7771625518798828, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 29350 + }, + { + "epoch": 83.4090909090909, + "grad_norm": 0.6839377284049988, + "learning_rate": 0.0001, + "loss": 0.0459, + "step": 29360 + }, + { + "epoch": 83.4375, + "grad_norm": 0.7105209231376648, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 29370 + }, + { + "epoch": 83.4659090909091, + "grad_norm": 1.2164829969406128, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 29380 + }, + { + "epoch": 83.49431818181819, + "grad_norm": 0.969587504863739, + "learning_rate": 0.0001, + "loss": 0.046, + "step": 29390 + }, + { + "epoch": 83.52272727272727, + "grad_norm": 0.8211323618888855, + "learning_rate": 0.0001, + "loss": 0.0429, + "step": 29400 + }, + { + "epoch": 83.55113636363636, + "grad_norm": 0.6561827659606934, + "learning_rate": 0.0001, + "loss": 0.0429, + "step": 29410 + }, + { + "epoch": 83.57954545454545, + "grad_norm": 0.694988489151001, + "learning_rate": 0.0001, + "loss": 0.0446, + "step": 29420 + }, + { + "epoch": 83.60795454545455, + "grad_norm": 0.8020559549331665, + "learning_rate": 0.0001, + "loss": 0.0431, + "step": 29430 + }, + { + "epoch": 83.63636363636364, + "grad_norm": 0.9569689631462097, + "learning_rate": 0.0001, + "loss": 0.0449, + "step": 29440 + }, + { + "epoch": 83.66477272727273, + "grad_norm": 0.874990701675415, + "learning_rate": 0.0001, + "loss": 0.0452, + "step": 29450 + }, + { + "epoch": 83.69318181818181, + "grad_norm": 0.6901691555976868, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 29460 + }, + { + "epoch": 83.7215909090909, + "grad_norm": 0.7540920376777649, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 29470 + }, + { + "epoch": 83.75, + "grad_norm": 0.8184428811073303, + "learning_rate": 0.0001, + "loss": 0.0443, + "step": 29480 + }, + { + "epoch": 83.7784090909091, + "grad_norm": 0.9438989162445068, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 29490 + }, + { + "epoch": 83.80681818181819, + "grad_norm": 0.8632564544677734, + "learning_rate": 0.0001, + "loss": 0.0432, + "step": 29500 + }, + { + "epoch": 83.83522727272727, + "grad_norm": 0.7080706357955933, + "learning_rate": 0.0001, + "loss": 0.0444, + "step": 29510 + }, + { + "epoch": 83.86363636363636, + "grad_norm": 1.5293716192245483, + "learning_rate": 0.0001, + "loss": 0.0451, + "step": 29520 + }, + { + "epoch": 83.89204545454545, + "grad_norm": 1.1970537900924683, + "learning_rate": 0.0001, + "loss": 0.0435, + "step": 29530 + }, + { + "epoch": 83.92045454545455, + "grad_norm": 1.4087823629379272, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 29540 + }, + { + "epoch": 83.94886363636364, + "grad_norm": 0.9186455011367798, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 29550 + }, + { + "epoch": 83.97727272727273, + "grad_norm": 0.89197838306427, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 29560 + }, + { + "epoch": 84.00568181818181, + "grad_norm": 0.8925532698631287, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 29570 + }, + { + "epoch": 84.0340909090909, + "grad_norm": 0.9491750597953796, + "learning_rate": 0.0001, + "loss": 0.0458, + "step": 29580 + }, + { + "epoch": 84.0625, + "grad_norm": 0.7977159023284912, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 29590 + }, + { + "epoch": 84.0909090909091, + "grad_norm": 0.7931260466575623, + "learning_rate": 0.0001, + "loss": 0.0451, + "step": 29600 + }, + { + "epoch": 84.11931818181819, + "grad_norm": 0.691261887550354, + "learning_rate": 0.0001, + "loss": 0.0434, + "step": 29610 + }, + { + "epoch": 84.14772727272727, + "grad_norm": 0.9392285346984863, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 29620 + }, + { + "epoch": 84.17613636363636, + "grad_norm": 1.9056662321090698, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 29630 + }, + { + "epoch": 84.20454545454545, + "grad_norm": 1.3401696681976318, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 29640 + }, + { + "epoch": 84.23295454545455, + "grad_norm": 1.1489585638046265, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 29650 + }, + { + "epoch": 84.26136363636364, + "grad_norm": 1.2388312816619873, + "learning_rate": 0.0001, + "loss": 0.0452, + "step": 29660 + }, + { + "epoch": 84.28977272727273, + "grad_norm": 1.1380677223205566, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 29670 + }, + { + "epoch": 84.31818181818181, + "grad_norm": 0.9601713418960571, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 29680 + }, + { + "epoch": 84.3465909090909, + "grad_norm": 1.2920958995819092, + "learning_rate": 0.0001, + "loss": 0.0425, + "step": 29690 + }, + { + "epoch": 84.375, + "grad_norm": 1.1546963453292847, + "learning_rate": 0.0001, + "loss": 0.0429, + "step": 29700 + }, + { + "epoch": 84.4034090909091, + "grad_norm": 1.6051952838897705, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 29710 + }, + { + "epoch": 84.43181818181819, + "grad_norm": 1.979384422302246, + "learning_rate": 0.0001, + "loss": 0.042, + "step": 29720 + }, + { + "epoch": 84.46022727272727, + "grad_norm": 2.053717613220215, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 29730 + }, + { + "epoch": 84.48863636363636, + "grad_norm": 1.1979448795318604, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 29740 + }, + { + "epoch": 84.51704545454545, + "grad_norm": 1.6801135540008545, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 29750 + }, + { + "epoch": 84.54545454545455, + "grad_norm": 0.7504470348358154, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 29760 + }, + { + "epoch": 84.57386363636364, + "grad_norm": 1.1721948385238647, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 29770 + }, + { + "epoch": 84.60227272727273, + "grad_norm": 0.9734560251235962, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 29780 + }, + { + "epoch": 84.63068181818181, + "grad_norm": 1.0226346254348755, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 29790 + }, + { + "epoch": 84.6590909090909, + "grad_norm": 0.7675327658653259, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 29800 + }, + { + "epoch": 84.6875, + "grad_norm": 0.9482449889183044, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 29810 + }, + { + "epoch": 84.7159090909091, + "grad_norm": 0.7545673251152039, + "learning_rate": 0.0001, + "loss": 0.0422, + "step": 29820 + }, + { + "epoch": 84.74431818181819, + "grad_norm": 0.7988566160202026, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 29830 + }, + { + "epoch": 84.77272727272727, + "grad_norm": 0.9122010469436646, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 29840 + }, + { + "epoch": 84.80113636363636, + "grad_norm": 0.8561978936195374, + "learning_rate": 0.0001, + "loss": 0.0443, + "step": 29850 + }, + { + "epoch": 84.82954545454545, + "grad_norm": 0.6838624477386475, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 29860 + }, + { + "epoch": 84.85795454545455, + "grad_norm": 1.1277817487716675, + "learning_rate": 0.0001, + "loss": 0.0441, + "step": 29870 + }, + { + "epoch": 84.88636363636364, + "grad_norm": 1.5995237827301025, + "learning_rate": 0.0001, + "loss": 0.0462, + "step": 29880 + }, + { + "epoch": 84.91477272727273, + "grad_norm": 1.4446282386779785, + "learning_rate": 0.0001, + "loss": 0.0451, + "step": 29890 + }, + { + "epoch": 84.94318181818181, + "grad_norm": 1.1589558124542236, + "learning_rate": 0.0001, + "loss": 0.0436, + "step": 29900 + }, + { + "epoch": 84.9715909090909, + "grad_norm": 1.063513159751892, + "learning_rate": 0.0001, + "loss": 0.0427, + "step": 29910 + }, + { + "epoch": 85.0, + "grad_norm": 0.8769459128379822, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 29920 + }, + { + "epoch": 85.0284090909091, + "grad_norm": 0.9075056910514832, + "learning_rate": 0.0001, + "loss": 0.0441, + "step": 29930 + }, + { + "epoch": 85.05681818181819, + "grad_norm": 0.6981393098831177, + "learning_rate": 0.0001, + "loss": 0.0441, + "step": 29940 + }, + { + "epoch": 85.08522727272727, + "grad_norm": 0.757666826248169, + "learning_rate": 0.0001, + "loss": 0.0436, + "step": 29950 + }, + { + "epoch": 85.11363636363636, + "grad_norm": 0.7865346074104309, + "learning_rate": 0.0001, + "loss": 0.0431, + "step": 29960 + }, + { + "epoch": 85.14204545454545, + "grad_norm": 0.8140910267829895, + "learning_rate": 0.0001, + "loss": 0.0452, + "step": 29970 + }, + { + "epoch": 85.17045454545455, + "grad_norm": 0.6491031646728516, + "learning_rate": 0.0001, + "loss": 0.0434, + "step": 29980 + }, + { + "epoch": 85.19886363636364, + "grad_norm": 0.8176889419555664, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 29990 + }, + { + "epoch": 85.22727272727273, + "grad_norm": 0.8975611925125122, + "learning_rate": 0.0001, + "loss": 0.0434, + "step": 30000 + }, + { + "epoch": 85.25568181818181, + "grad_norm": 0.6582433581352234, + "learning_rate": 0.0001, + "loss": 0.0431, + "step": 30010 + }, + { + "epoch": 85.2840909090909, + "grad_norm": 0.7108902931213379, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 30020 + }, + { + "epoch": 85.3125, + "grad_norm": 0.8365264534950256, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 30030 + }, + { + "epoch": 85.3409090909091, + "grad_norm": 1.003644347190857, + "learning_rate": 0.0001, + "loss": 0.0436, + "step": 30040 + }, + { + "epoch": 85.36931818181819, + "grad_norm": 0.8627144694328308, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 30050 + }, + { + "epoch": 85.39772727272727, + "grad_norm": 0.8255655765533447, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 30060 + }, + { + "epoch": 85.42613636363636, + "grad_norm": 0.7838973999023438, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 30070 + }, + { + "epoch": 85.45454545454545, + "grad_norm": 0.7875524163246155, + "learning_rate": 0.0001, + "loss": 0.0432, + "step": 30080 + }, + { + "epoch": 85.48295454545455, + "grad_norm": 0.9446814656257629, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 30090 + }, + { + "epoch": 85.51136363636364, + "grad_norm": 0.6380667686462402, + "learning_rate": 0.0001, + "loss": 0.0431, + "step": 30100 + }, + { + "epoch": 85.53977272727273, + "grad_norm": 0.8755031228065491, + "learning_rate": 0.0001, + "loss": 0.0434, + "step": 30110 + }, + { + "epoch": 85.56818181818181, + "grad_norm": 0.6563933491706848, + "learning_rate": 0.0001, + "loss": 0.0432, + "step": 30120 + }, + { + "epoch": 85.5965909090909, + "grad_norm": 0.7544918060302734, + "learning_rate": 0.0001, + "loss": 0.0435, + "step": 30130 + }, + { + "epoch": 85.625, + "grad_norm": 1.112111210823059, + "learning_rate": 0.0001, + "loss": 0.0463, + "step": 30140 + }, + { + "epoch": 85.6534090909091, + "grad_norm": 0.9833115935325623, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 30150 + }, + { + "epoch": 85.68181818181819, + "grad_norm": 0.914084255695343, + "learning_rate": 0.0001, + "loss": 0.0436, + "step": 30160 + }, + { + "epoch": 85.71022727272727, + "grad_norm": 1.0748567581176758, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 30170 + }, + { + "epoch": 85.73863636363636, + "grad_norm": 0.6668802499771118, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 30180 + }, + { + "epoch": 85.76704545454545, + "grad_norm": 0.8170040845870972, + "learning_rate": 0.0001, + "loss": 0.0435, + "step": 30190 + }, + { + "epoch": 85.79545454545455, + "grad_norm": 0.7252139449119568, + "learning_rate": 0.0001, + "loss": 0.0413, + "step": 30200 + }, + { + "epoch": 85.82386363636364, + "grad_norm": 0.621457576751709, + "learning_rate": 0.0001, + "loss": 0.0436, + "step": 30210 + }, + { + "epoch": 85.85227272727273, + "grad_norm": 0.7499610185623169, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 30220 + }, + { + "epoch": 85.88068181818181, + "grad_norm": 0.6016923785209656, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 30230 + }, + { + "epoch": 85.9090909090909, + "grad_norm": 0.9071959853172302, + "learning_rate": 0.0001, + "loss": 0.0429, + "step": 30240 + }, + { + "epoch": 85.9375, + "grad_norm": 0.6232897043228149, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 30250 + }, + { + "epoch": 85.9659090909091, + "grad_norm": 0.7610146999359131, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 30260 + }, + { + "epoch": 85.99431818181819, + "grad_norm": 0.6071455478668213, + "learning_rate": 0.0001, + "loss": 0.0437, + "step": 30270 + }, + { + "epoch": 86.02272727272727, + "grad_norm": 0.6175063848495483, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 30280 + }, + { + "epoch": 86.05113636363636, + "grad_norm": 0.7343127727508545, + "learning_rate": 0.0001, + "loss": 0.0434, + "step": 30290 + }, + { + "epoch": 86.07954545454545, + "grad_norm": 0.7600955367088318, + "learning_rate": 0.0001, + "loss": 0.0421, + "step": 30300 + }, + { + "epoch": 86.10795454545455, + "grad_norm": 0.5361849665641785, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 30310 + }, + { + "epoch": 86.13636363636364, + "grad_norm": 0.6238926649093628, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 30320 + }, + { + "epoch": 86.16477272727273, + "grad_norm": 0.530532717704773, + "learning_rate": 0.0001, + "loss": 0.0416, + "step": 30330 + }, + { + "epoch": 86.19318181818181, + "grad_norm": 0.6140005588531494, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 30340 + }, + { + "epoch": 86.2215909090909, + "grad_norm": 0.5914357900619507, + "learning_rate": 0.0001, + "loss": 0.0416, + "step": 30350 + }, + { + "epoch": 86.25, + "grad_norm": 0.5596780776977539, + "learning_rate": 0.0001, + "loss": 0.0427, + "step": 30360 + }, + { + "epoch": 86.2784090909091, + "grad_norm": 0.5929566025733948, + "learning_rate": 0.0001, + "loss": 0.0425, + "step": 30370 + }, + { + "epoch": 86.30681818181819, + "grad_norm": 0.6372137069702148, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 30380 + }, + { + "epoch": 86.33522727272727, + "grad_norm": 0.6501078009605408, + "learning_rate": 0.0001, + "loss": 0.0437, + "step": 30390 + }, + { + "epoch": 86.36363636363636, + "grad_norm": 0.57587730884552, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 30400 + }, + { + "epoch": 86.39204545454545, + "grad_norm": 0.6445661187171936, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 30410 + }, + { + "epoch": 86.42045454545455, + "grad_norm": 0.7396770715713501, + "learning_rate": 0.0001, + "loss": 0.0457, + "step": 30420 + }, + { + "epoch": 86.44886363636364, + "grad_norm": 0.673376202583313, + "learning_rate": 0.0001, + "loss": 0.0432, + "step": 30430 + }, + { + "epoch": 86.47727272727273, + "grad_norm": 0.6946069598197937, + "learning_rate": 0.0001, + "loss": 0.0438, + "step": 30440 + }, + { + "epoch": 86.50568181818181, + "grad_norm": 0.6759048104286194, + "learning_rate": 0.0001, + "loss": 0.0432, + "step": 30450 + }, + { + "epoch": 86.5340909090909, + "grad_norm": 0.5420788526535034, + "learning_rate": 0.0001, + "loss": 0.0421, + "step": 30460 + }, + { + "epoch": 86.5625, + "grad_norm": 0.6719872355461121, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 30470 + }, + { + "epoch": 86.5909090909091, + "grad_norm": 0.6998466849327087, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 30480 + }, + { + "epoch": 86.61931818181819, + "grad_norm": 0.6579269766807556, + "learning_rate": 0.0001, + "loss": 0.0429, + "step": 30490 + }, + { + "epoch": 86.64772727272727, + "grad_norm": 0.7772161364555359, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 30500 + }, + { + "epoch": 86.67613636363636, + "grad_norm": 0.5968173742294312, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 30510 + }, + { + "epoch": 86.70454545454545, + "grad_norm": 0.6808553338050842, + "learning_rate": 0.0001, + "loss": 0.0435, + "step": 30520 + }, + { + "epoch": 86.73295454545455, + "grad_norm": 0.7286439538002014, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 30530 + }, + { + "epoch": 86.76136363636364, + "grad_norm": 0.6226254105567932, + "learning_rate": 0.0001, + "loss": 0.0437, + "step": 30540 + }, + { + "epoch": 86.78977272727273, + "grad_norm": 0.61110919713974, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 30550 + }, + { + "epoch": 86.81818181818181, + "grad_norm": 0.6020199656486511, + "learning_rate": 0.0001, + "loss": 0.0432, + "step": 30560 + }, + { + "epoch": 86.8465909090909, + "grad_norm": 0.630531907081604, + "learning_rate": 0.0001, + "loss": 0.0432, + "step": 30570 + }, + { + "epoch": 86.875, + "grad_norm": 0.5699210166931152, + "learning_rate": 0.0001, + "loss": 0.0429, + "step": 30580 + }, + { + "epoch": 86.9034090909091, + "grad_norm": 0.5879133343696594, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 30590 + }, + { + "epoch": 86.93181818181819, + "grad_norm": 0.6827641129493713, + "learning_rate": 0.0001, + "loss": 0.0446, + "step": 30600 + }, + { + "epoch": 86.96022727272727, + "grad_norm": 0.581780731678009, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 30610 + }, + { + "epoch": 86.98863636363636, + "grad_norm": 0.5517546534538269, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 30620 + }, + { + "epoch": 87.01704545454545, + "grad_norm": 0.6034563779830933, + "learning_rate": 0.0001, + "loss": 0.0423, + "step": 30630 + }, + { + "epoch": 87.04545454545455, + "grad_norm": 0.5161349177360535, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 30640 + }, + { + "epoch": 87.07386363636364, + "grad_norm": 0.6517345309257507, + "learning_rate": 0.0001, + "loss": 0.0438, + "step": 30650 + }, + { + "epoch": 87.10227272727273, + "grad_norm": 0.7236988544464111, + "learning_rate": 0.0001, + "loss": 0.0436, + "step": 30660 + }, + { + "epoch": 87.13068181818181, + "grad_norm": 0.5659297704696655, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 30670 + }, + { + "epoch": 87.1590909090909, + "grad_norm": 0.5743705630302429, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 30680 + }, + { + "epoch": 87.1875, + "grad_norm": 0.4794277846813202, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 30690 + }, + { + "epoch": 87.2159090909091, + "grad_norm": 0.6366981267929077, + "learning_rate": 0.0001, + "loss": 0.0425, + "step": 30700 + }, + { + "epoch": 87.24431818181819, + "grad_norm": 0.6959528923034668, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 30710 + }, + { + "epoch": 87.27272727272727, + "grad_norm": 0.5670670866966248, + "learning_rate": 0.0001, + "loss": 0.0451, + "step": 30720 + }, + { + "epoch": 87.30113636363636, + "grad_norm": 0.5087947845458984, + "learning_rate": 0.0001, + "loss": 0.0429, + "step": 30730 + }, + { + "epoch": 87.32954545454545, + "grad_norm": 0.638629138469696, + "learning_rate": 0.0001, + "loss": 0.0408, + "step": 30740 + }, + { + "epoch": 87.35795454545455, + "grad_norm": 0.5405479073524475, + "learning_rate": 0.0001, + "loss": 0.0424, + "step": 30750 + }, + { + "epoch": 87.38636363636364, + "grad_norm": 0.5827491283416748, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 30760 + }, + { + "epoch": 87.41477272727273, + "grad_norm": 0.8291541337966919, + "learning_rate": 0.0001, + "loss": 0.0423, + "step": 30770 + }, + { + "epoch": 87.44318181818181, + "grad_norm": 0.5378076434135437, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 30780 + }, + { + "epoch": 87.4715909090909, + "grad_norm": 0.5673062801361084, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 30790 + }, + { + "epoch": 87.5, + "grad_norm": 0.6628111004829407, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 30800 + }, + { + "epoch": 87.5284090909091, + "grad_norm": 0.5737188458442688, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 30810 + }, + { + "epoch": 87.55681818181819, + "grad_norm": 0.49959149956703186, + "learning_rate": 0.0001, + "loss": 0.0438, + "step": 30820 + }, + { + "epoch": 87.58522727272727, + "grad_norm": 0.579260528087616, + "learning_rate": 0.0001, + "loss": 0.0461, + "step": 30830 + }, + { + "epoch": 87.61363636363636, + "grad_norm": 0.6071043014526367, + "learning_rate": 0.0001, + "loss": 0.0453, + "step": 30840 + }, + { + "epoch": 87.64204545454545, + "grad_norm": 0.8908697366714478, + "learning_rate": 0.0001, + "loss": 0.0453, + "step": 30850 + }, + { + "epoch": 87.67045454545455, + "grad_norm": 0.7816733717918396, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 30860 + }, + { + "epoch": 87.69886363636364, + "grad_norm": 0.837706446647644, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 30870 + }, + { + "epoch": 87.72727272727273, + "grad_norm": 0.6439931988716125, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 30880 + }, + { + "epoch": 87.75568181818181, + "grad_norm": 0.5524504780769348, + "learning_rate": 0.0001, + "loss": 0.0429, + "step": 30890 + }, + { + "epoch": 87.7840909090909, + "grad_norm": 0.5548602342605591, + "learning_rate": 0.0001, + "loss": 0.0436, + "step": 30900 + }, + { + "epoch": 87.8125, + "grad_norm": 0.5937913656234741, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 30910 + }, + { + "epoch": 87.8409090909091, + "grad_norm": 0.49119383096694946, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 30920 + }, + { + "epoch": 87.86931818181819, + "grad_norm": 0.5523015260696411, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 30930 + }, + { + "epoch": 87.89772727272727, + "grad_norm": 0.6836235523223877, + "learning_rate": 0.0001, + "loss": 0.0436, + "step": 30940 + }, + { + "epoch": 87.92613636363636, + "grad_norm": 0.728946328163147, + "learning_rate": 0.0001, + "loss": 0.0435, + "step": 30950 + }, + { + "epoch": 87.95454545454545, + "grad_norm": 0.7053698301315308, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 30960 + }, + { + "epoch": 87.98295454545455, + "grad_norm": 0.7172105312347412, + "learning_rate": 0.0001, + "loss": 0.0434, + "step": 30970 + }, + { + "epoch": 88.01136363636364, + "grad_norm": 0.8821631073951721, + "learning_rate": 0.0001, + "loss": 0.0425, + "step": 30980 + }, + { + "epoch": 88.03977272727273, + "grad_norm": 0.837773323059082, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 30990 + }, + { + "epoch": 88.06818181818181, + "grad_norm": 1.007065773010254, + "learning_rate": 0.0001, + "loss": 0.0422, + "step": 31000 + }, + { + "epoch": 88.0965909090909, + "grad_norm": 0.8076823353767395, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 31010 + }, + { + "epoch": 88.125, + "grad_norm": 1.02508544921875, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 31020 + }, + { + "epoch": 88.1534090909091, + "grad_norm": 0.726391077041626, + "learning_rate": 0.0001, + "loss": 0.0424, + "step": 31030 + }, + { + "epoch": 88.18181818181819, + "grad_norm": 0.78676837682724, + "learning_rate": 0.0001, + "loss": 0.0427, + "step": 31040 + }, + { + "epoch": 88.21022727272727, + "grad_norm": 0.7329301834106445, + "learning_rate": 0.0001, + "loss": 0.0427, + "step": 31050 + }, + { + "epoch": 88.23863636363636, + "grad_norm": 0.6445389986038208, + "learning_rate": 0.0001, + "loss": 0.0421, + "step": 31060 + }, + { + "epoch": 88.26704545454545, + "grad_norm": 0.6451675295829773, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 31070 + }, + { + "epoch": 88.29545454545455, + "grad_norm": 0.7502676248550415, + "learning_rate": 0.0001, + "loss": 0.0437, + "step": 31080 + }, + { + "epoch": 88.32386363636364, + "grad_norm": 0.8322815299034119, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 31090 + }, + { + "epoch": 88.35227272727273, + "grad_norm": 0.8641359210014343, + "learning_rate": 0.0001, + "loss": 0.0423, + "step": 31100 + }, + { + "epoch": 88.38068181818181, + "grad_norm": 0.8692095279693604, + "learning_rate": 0.0001, + "loss": 0.0438, + "step": 31110 + }, + { + "epoch": 88.4090909090909, + "grad_norm": 0.6443942785263062, + "learning_rate": 0.0001, + "loss": 0.0427, + "step": 31120 + }, + { + "epoch": 88.4375, + "grad_norm": 0.7591652274131775, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 31130 + }, + { + "epoch": 88.4659090909091, + "grad_norm": 0.8515008687973022, + "learning_rate": 0.0001, + "loss": 0.0432, + "step": 31140 + }, + { + "epoch": 88.49431818181819, + "grad_norm": 0.7373746633529663, + "learning_rate": 0.0001, + "loss": 0.0452, + "step": 31150 + }, + { + "epoch": 88.52272727272727, + "grad_norm": 2.620330810546875, + "learning_rate": 0.0001, + "loss": 0.0438, + "step": 31160 + }, + { + "epoch": 88.55113636363636, + "grad_norm": 0.7088080048561096, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 31170 + }, + { + "epoch": 88.57954545454545, + "grad_norm": 0.8215885758399963, + "learning_rate": 0.0001, + "loss": 0.0429, + "step": 31180 + }, + { + "epoch": 88.60795454545455, + "grad_norm": 0.8965012431144714, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 31190 + }, + { + "epoch": 88.63636363636364, + "grad_norm": 1.090306282043457, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 31200 + }, + { + "epoch": 88.66477272727273, + "grad_norm": 0.9115955829620361, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 31210 + }, + { + "epoch": 88.69318181818181, + "grad_norm": 1.4948323965072632, + "learning_rate": 0.0001, + "loss": 0.0408, + "step": 31220 + }, + { + "epoch": 88.7215909090909, + "grad_norm": 1.177667498588562, + "learning_rate": 0.0001, + "loss": 0.0427, + "step": 31230 + }, + { + "epoch": 88.75, + "grad_norm": 1.1721715927124023, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 31240 + }, + { + "epoch": 88.7784090909091, + "grad_norm": 1.04111647605896, + "learning_rate": 0.0001, + "loss": 0.0408, + "step": 31250 + }, + { + "epoch": 88.80681818181819, + "grad_norm": 1.1286450624465942, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 31260 + }, + { + "epoch": 88.83522727272727, + "grad_norm": 0.8720760941505432, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 31270 + }, + { + "epoch": 88.86363636363636, + "grad_norm": 0.7654427289962769, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 31280 + }, + { + "epoch": 88.89204545454545, + "grad_norm": 0.6938006281852722, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 31290 + }, + { + "epoch": 88.92045454545455, + "grad_norm": 0.6255007386207581, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 31300 + }, + { + "epoch": 88.94886363636364, + "grad_norm": 0.881568193435669, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 31310 + }, + { + "epoch": 88.97727272727273, + "grad_norm": 0.7803657650947571, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 31320 + }, + { + "epoch": 89.00568181818181, + "grad_norm": 0.5337231159210205, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 31330 + }, + { + "epoch": 89.0340909090909, + "grad_norm": 0.6003879904747009, + "learning_rate": 0.0001, + "loss": 0.0422, + "step": 31340 + }, + { + "epoch": 89.0625, + "grad_norm": 0.694319486618042, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 31350 + }, + { + "epoch": 89.0909090909091, + "grad_norm": 0.6574826836585999, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 31360 + }, + { + "epoch": 89.11931818181819, + "grad_norm": 0.6312698125839233, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 31370 + }, + { + "epoch": 89.14772727272727, + "grad_norm": 0.5977025032043457, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 31380 + }, + { + "epoch": 89.17613636363636, + "grad_norm": 0.6093351244926453, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 31390 + }, + { + "epoch": 89.20454545454545, + "grad_norm": 0.563823938369751, + "learning_rate": 0.0001, + "loss": 0.042, + "step": 31400 + }, + { + "epoch": 89.23295454545455, + "grad_norm": 0.6710460782051086, + "learning_rate": 0.0001, + "loss": 0.0397, + "step": 31410 + }, + { + "epoch": 89.26136363636364, + "grad_norm": 0.6708618998527527, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 31420 + }, + { + "epoch": 89.28977272727273, + "grad_norm": 0.7974550127983093, + "learning_rate": 0.0001, + "loss": 0.0421, + "step": 31430 + }, + { + "epoch": 89.31818181818181, + "grad_norm": 0.7905569672584534, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 31440 + }, + { + "epoch": 89.3465909090909, + "grad_norm": 0.6526768207550049, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 31450 + }, + { + "epoch": 89.375, + "grad_norm": 0.6570956707000732, + "learning_rate": 0.0001, + "loss": 0.0413, + "step": 31460 + }, + { + "epoch": 89.4034090909091, + "grad_norm": 0.5645592212677002, + "learning_rate": 0.0001, + "loss": 0.0421, + "step": 31470 + }, + { + "epoch": 89.43181818181819, + "grad_norm": 0.7741744518280029, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 31480 + }, + { + "epoch": 89.46022727272727, + "grad_norm": 0.8912363052368164, + "learning_rate": 0.0001, + "loss": 0.0423, + "step": 31490 + }, + { + "epoch": 89.48863636363636, + "grad_norm": 1.4925516843795776, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 31500 + }, + { + "epoch": 89.51704545454545, + "grad_norm": 1.1449397802352905, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 31510 + }, + { + "epoch": 89.54545454545455, + "grad_norm": 0.7447580099105835, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 31520 + }, + { + "epoch": 89.57386363636364, + "grad_norm": 0.8966802358627319, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 31530 + }, + { + "epoch": 89.60227272727273, + "grad_norm": 0.8590907454490662, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 31540 + }, + { + "epoch": 89.63068181818181, + "grad_norm": 1.0364528894424438, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 31550 + }, + { + "epoch": 89.6590909090909, + "grad_norm": 1.0565521717071533, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 31560 + }, + { + "epoch": 89.6875, + "grad_norm": 0.8635243773460388, + "learning_rate": 0.0001, + "loss": 0.0435, + "step": 31570 + }, + { + "epoch": 89.7159090909091, + "grad_norm": 0.8231905698776245, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 31580 + }, + { + "epoch": 89.74431818181819, + "grad_norm": 0.683319628238678, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 31590 + }, + { + "epoch": 89.77272727272727, + "grad_norm": 0.696625292301178, + "learning_rate": 0.0001, + "loss": 0.0413, + "step": 31600 + }, + { + "epoch": 89.80113636363636, + "grad_norm": 0.6028037667274475, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 31610 + }, + { + "epoch": 89.82954545454545, + "grad_norm": 1.206365704536438, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 31620 + }, + { + "epoch": 89.85795454545455, + "grad_norm": 1.4085060358047485, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 31630 + }, + { + "epoch": 89.88636363636364, + "grad_norm": 1.3718105554580688, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 31640 + }, + { + "epoch": 89.91477272727273, + "grad_norm": 0.872251570224762, + "learning_rate": 0.0001, + "loss": 0.0399, + "step": 31650 + }, + { + "epoch": 89.94318181818181, + "grad_norm": 0.8787030577659607, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 31660 + }, + { + "epoch": 89.9715909090909, + "grad_norm": 0.9750413298606873, + "learning_rate": 0.0001, + "loss": 0.0412, + "step": 31670 + }, + { + "epoch": 90.0, + "grad_norm": 1.8003432750701904, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 31680 + }, + { + "epoch": 90.0284090909091, + "grad_norm": 1.7299727201461792, + "learning_rate": 0.0001, + "loss": 0.0432, + "step": 31690 + }, + { + "epoch": 90.05681818181819, + "grad_norm": 2.0806267261505127, + "learning_rate": 0.0001, + "loss": 0.0413, + "step": 31700 + }, + { + "epoch": 90.08522727272727, + "grad_norm": 1.1748923063278198, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 31710 + }, + { + "epoch": 90.11363636363636, + "grad_norm": 1.2577301263809204, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 31720 + }, + { + "epoch": 90.14204545454545, + "grad_norm": 1.1263160705566406, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 31730 + }, + { + "epoch": 90.17045454545455, + "grad_norm": 0.8332096338272095, + "learning_rate": 0.0001, + "loss": 0.0396, + "step": 31740 + }, + { + "epoch": 90.19886363636364, + "grad_norm": 0.9236270189285278, + "learning_rate": 0.0001, + "loss": 0.0397, + "step": 31750 + }, + { + "epoch": 90.22727272727273, + "grad_norm": 0.826349675655365, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 31760 + }, + { + "epoch": 90.25568181818181, + "grad_norm": 0.7999365329742432, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 31770 + }, + { + "epoch": 90.2840909090909, + "grad_norm": 0.8490392565727234, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 31780 + }, + { + "epoch": 90.3125, + "grad_norm": 0.8082181811332703, + "learning_rate": 0.0001, + "loss": 0.0401, + "step": 31790 + }, + { + "epoch": 90.3409090909091, + "grad_norm": 0.9047965407371521, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 31800 + }, + { + "epoch": 90.36931818181819, + "grad_norm": 0.7220473885536194, + "learning_rate": 0.0001, + "loss": 0.0401, + "step": 31810 + }, + { + "epoch": 90.39772727272727, + "grad_norm": 1.0218350887298584, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 31820 + }, + { + "epoch": 90.42613636363636, + "grad_norm": 0.6703020930290222, + "learning_rate": 0.0001, + "loss": 0.0399, + "step": 31830 + }, + { + "epoch": 90.45454545454545, + "grad_norm": 0.6317295432090759, + "learning_rate": 0.0001, + "loss": 0.0416, + "step": 31840 + }, + { + "epoch": 90.48295454545455, + "grad_norm": 0.5803297758102417, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 31850 + }, + { + "epoch": 90.51136363636364, + "grad_norm": 0.7607895135879517, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 31860 + }, + { + "epoch": 90.53977272727273, + "grad_norm": 0.6137414574623108, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 31870 + }, + { + "epoch": 90.56818181818181, + "grad_norm": 0.5825350284576416, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 31880 + }, + { + "epoch": 90.5965909090909, + "grad_norm": 0.564761757850647, + "learning_rate": 0.0001, + "loss": 0.0413, + "step": 31890 + }, + { + "epoch": 90.625, + "grad_norm": 1.0057430267333984, + "learning_rate": 0.0001, + "loss": 0.041, + "step": 31900 + }, + { + "epoch": 90.6534090909091, + "grad_norm": 0.7206169962882996, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 31910 + }, + { + "epoch": 90.68181818181819, + "grad_norm": 0.8694084286689758, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 31920 + }, + { + "epoch": 90.71022727272727, + "grad_norm": 0.9109015464782715, + "learning_rate": 0.0001, + "loss": 0.0397, + "step": 31930 + }, + { + "epoch": 90.73863636363636, + "grad_norm": 1.422331690788269, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 31940 + }, + { + "epoch": 90.76704545454545, + "grad_norm": 0.6990547180175781, + "learning_rate": 0.0001, + "loss": 0.0408, + "step": 31950 + }, + { + "epoch": 90.79545454545455, + "grad_norm": 0.8138213753700256, + "learning_rate": 0.0001, + "loss": 0.041, + "step": 31960 + }, + { + "epoch": 90.82386363636364, + "grad_norm": 0.9061129093170166, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 31970 + }, + { + "epoch": 90.85227272727273, + "grad_norm": 0.5697906613349915, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 31980 + }, + { + "epoch": 90.88068181818181, + "grad_norm": 0.6935226917266846, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 31990 + }, + { + "epoch": 90.9090909090909, + "grad_norm": 0.6459117531776428, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 32000 + }, + { + "epoch": 90.9375, + "grad_norm": 0.8231947422027588, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 32010 + }, + { + "epoch": 90.9659090909091, + "grad_norm": 1.047978401184082, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 32020 + }, + { + "epoch": 90.99431818181819, + "grad_norm": 0.5649544596672058, + "learning_rate": 0.0001, + "loss": 0.0416, + "step": 32030 + }, + { + "epoch": 91.02272727272727, + "grad_norm": 0.6621559262275696, + "learning_rate": 0.0001, + "loss": 0.041, + "step": 32040 + }, + { + "epoch": 91.05113636363636, + "grad_norm": 0.8017844557762146, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 32050 + }, + { + "epoch": 91.07954545454545, + "grad_norm": 0.7462131977081299, + "learning_rate": 0.0001, + "loss": 0.0421, + "step": 32060 + }, + { + "epoch": 91.10795454545455, + "grad_norm": 0.7743386030197144, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 32070 + }, + { + "epoch": 91.13636363636364, + "grad_norm": 0.6021215915679932, + "learning_rate": 0.0001, + "loss": 0.0424, + "step": 32080 + }, + { + "epoch": 91.16477272727273, + "grad_norm": 0.6302787065505981, + "learning_rate": 0.0001, + "loss": 0.0401, + "step": 32090 + }, + { + "epoch": 91.19318181818181, + "grad_norm": 0.5769550800323486, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 32100 + }, + { + "epoch": 91.2215909090909, + "grad_norm": 0.6698492169380188, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 32110 + }, + { + "epoch": 91.25, + "grad_norm": 0.6460458040237427, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 32120 + }, + { + "epoch": 91.2784090909091, + "grad_norm": 0.693789541721344, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 32130 + }, + { + "epoch": 91.30681818181819, + "grad_norm": 0.7158985733985901, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 32140 + }, + { + "epoch": 91.33522727272727, + "grad_norm": 0.618126630783081, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 32150 + }, + { + "epoch": 91.36363636363636, + "grad_norm": 0.6397842168807983, + "learning_rate": 0.0001, + "loss": 0.0434, + "step": 32160 + }, + { + "epoch": 91.39204545454545, + "grad_norm": 0.7809488773345947, + "learning_rate": 0.0001, + "loss": 0.042, + "step": 32170 + }, + { + "epoch": 91.42045454545455, + "grad_norm": 0.5916877388954163, + "learning_rate": 0.0001, + "loss": 0.0438, + "step": 32180 + }, + { + "epoch": 91.44886363636364, + "grad_norm": 0.6435518860816956, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 32190 + }, + { + "epoch": 91.47727272727273, + "grad_norm": 0.7912642955780029, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 32200 + }, + { + "epoch": 91.50568181818181, + "grad_norm": 0.692492663860321, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 32210 + }, + { + "epoch": 91.5340909090909, + "grad_norm": 0.6788442730903625, + "learning_rate": 0.0001, + "loss": 0.0416, + "step": 32220 + }, + { + "epoch": 91.5625, + "grad_norm": 0.7223365902900696, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 32230 + }, + { + "epoch": 91.5909090909091, + "grad_norm": 0.7962009310722351, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 32240 + }, + { + "epoch": 91.61931818181819, + "grad_norm": 0.8391971588134766, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 32250 + }, + { + "epoch": 91.64772727272727, + "grad_norm": 0.963829517364502, + "learning_rate": 0.0001, + "loss": 0.0393, + "step": 32260 + }, + { + "epoch": 91.67613636363636, + "grad_norm": 0.7977566719055176, + "learning_rate": 0.0001, + "loss": 0.041, + "step": 32270 + }, + { + "epoch": 91.70454545454545, + "grad_norm": 0.9379525780677795, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 32280 + }, + { + "epoch": 91.73295454545455, + "grad_norm": 0.8582242727279663, + "learning_rate": 0.0001, + "loss": 0.0421, + "step": 32290 + }, + { + "epoch": 91.76136363636364, + "grad_norm": 0.8872998952865601, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 32300 + }, + { + "epoch": 91.78977272727273, + "grad_norm": 0.6711391806602478, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 32310 + }, + { + "epoch": 91.81818181818181, + "grad_norm": 0.890733540058136, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 32320 + }, + { + "epoch": 91.8465909090909, + "grad_norm": 0.9312843680381775, + "learning_rate": 0.0001, + "loss": 0.0399, + "step": 32330 + }, + { + "epoch": 91.875, + "grad_norm": 0.6852923035621643, + "learning_rate": 0.0001, + "loss": 0.0425, + "step": 32340 + }, + { + "epoch": 91.9034090909091, + "grad_norm": 0.7489289045333862, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 32350 + }, + { + "epoch": 91.93181818181819, + "grad_norm": 0.7574262022972107, + "learning_rate": 0.0001, + "loss": 0.0431, + "step": 32360 + }, + { + "epoch": 91.96022727272727, + "grad_norm": 0.7518380284309387, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 32370 + }, + { + "epoch": 91.98863636363636, + "grad_norm": 0.7089730501174927, + "learning_rate": 0.0001, + "loss": 0.0427, + "step": 32380 + }, + { + "epoch": 92.01704545454545, + "grad_norm": 0.7275684475898743, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 32390 + }, + { + "epoch": 92.04545454545455, + "grad_norm": 0.6037976741790771, + "learning_rate": 0.0001, + "loss": 0.0395, + "step": 32400 + }, + { + "epoch": 92.07386363636364, + "grad_norm": 0.5874007940292358, + "learning_rate": 0.0001, + "loss": 0.0404, + "step": 32410 + }, + { + "epoch": 92.10227272727273, + "grad_norm": 0.5332598686218262, + "learning_rate": 0.0001, + "loss": 0.0412, + "step": 32420 + }, + { + "epoch": 92.13068181818181, + "grad_norm": 0.7063932418823242, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 32430 + }, + { + "epoch": 92.1590909090909, + "grad_norm": 0.7319120168685913, + "learning_rate": 0.0001, + "loss": 0.0393, + "step": 32440 + }, + { + "epoch": 92.1875, + "grad_norm": 0.6438412070274353, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 32450 + }, + { + "epoch": 92.2159090909091, + "grad_norm": 0.5752390027046204, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 32460 + }, + { + "epoch": 92.24431818181819, + "grad_norm": 0.7674922943115234, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 32470 + }, + { + "epoch": 92.27272727272727, + "grad_norm": 0.5381972789764404, + "learning_rate": 0.0001, + "loss": 0.0404, + "step": 32480 + }, + { + "epoch": 92.30113636363636, + "grad_norm": 0.766947329044342, + "learning_rate": 0.0001, + "loss": 0.0431, + "step": 32490 + }, + { + "epoch": 92.32954545454545, + "grad_norm": 0.5970397591590881, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 32500 + }, + { + "epoch": 92.35795454545455, + "grad_norm": 0.5418734550476074, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 32510 + }, + { + "epoch": 92.38636363636364, + "grad_norm": 0.5782895684242249, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 32520 + }, + { + "epoch": 92.41477272727273, + "grad_norm": 0.5378401875495911, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 32530 + }, + { + "epoch": 92.44318181818181, + "grad_norm": 0.5437403321266174, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 32540 + }, + { + "epoch": 92.4715909090909, + "grad_norm": 0.574937641620636, + "learning_rate": 0.0001, + "loss": 0.0404, + "step": 32550 + }, + { + "epoch": 92.5, + "grad_norm": 0.4711826741695404, + "learning_rate": 0.0001, + "loss": 0.042, + "step": 32560 + }, + { + "epoch": 92.5284090909091, + "grad_norm": 0.5091038346290588, + "learning_rate": 0.0001, + "loss": 0.0408, + "step": 32570 + }, + { + "epoch": 92.55681818181819, + "grad_norm": 0.5985186696052551, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 32580 + }, + { + "epoch": 92.58522727272727, + "grad_norm": 0.6167530417442322, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 32590 + }, + { + "epoch": 92.61363636363636, + "grad_norm": 0.7481162548065186, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 32600 + }, + { + "epoch": 92.64204545454545, + "grad_norm": 0.6328353881835938, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 32610 + }, + { + "epoch": 92.67045454545455, + "grad_norm": 0.4634016752243042, + "learning_rate": 0.0001, + "loss": 0.0423, + "step": 32620 + }, + { + "epoch": 92.69886363636364, + "grad_norm": 0.5572225451469421, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 32630 + }, + { + "epoch": 92.72727272727273, + "grad_norm": 0.5547319650650024, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 32640 + }, + { + "epoch": 92.75568181818181, + "grad_norm": 0.5432265996932983, + "learning_rate": 0.0001, + "loss": 0.0398, + "step": 32650 + }, + { + "epoch": 92.7840909090909, + "grad_norm": 0.7217846512794495, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 32660 + }, + { + "epoch": 92.8125, + "grad_norm": 0.7317110896110535, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 32670 + }, + { + "epoch": 92.8409090909091, + "grad_norm": 0.76151442527771, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 32680 + }, + { + "epoch": 92.86931818181819, + "grad_norm": 0.6238445043563843, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 32690 + }, + { + "epoch": 92.89772727272727, + "grad_norm": 0.5886904001235962, + "learning_rate": 0.0001, + "loss": 0.0413, + "step": 32700 + }, + { + "epoch": 92.92613636363636, + "grad_norm": 0.46394994854927063, + "learning_rate": 0.0001, + "loss": 0.0404, + "step": 32710 + }, + { + "epoch": 92.95454545454545, + "grad_norm": 0.7471411824226379, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 32720 + }, + { + "epoch": 92.98295454545455, + "grad_norm": 0.6481496095657349, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 32730 + }, + { + "epoch": 93.01136363636364, + "grad_norm": 0.6740915775299072, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 32740 + }, + { + "epoch": 93.03977272727273, + "grad_norm": 0.4988252520561218, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 32750 + }, + { + "epoch": 93.06818181818181, + "grad_norm": 0.5601629614830017, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 32760 + }, + { + "epoch": 93.0965909090909, + "grad_norm": 0.5621158480644226, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 32770 + }, + { + "epoch": 93.125, + "grad_norm": 0.5953294038772583, + "learning_rate": 0.0001, + "loss": 0.0404, + "step": 32780 + }, + { + "epoch": 93.1534090909091, + "grad_norm": 0.6276609897613525, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 32790 + }, + { + "epoch": 93.18181818181819, + "grad_norm": 0.6082143783569336, + "learning_rate": 0.0001, + "loss": 0.0397, + "step": 32800 + }, + { + "epoch": 93.21022727272727, + "grad_norm": 0.7014224529266357, + "learning_rate": 0.0001, + "loss": 0.0408, + "step": 32810 + }, + { + "epoch": 93.23863636363636, + "grad_norm": 0.6491138339042664, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 32820 + }, + { + "epoch": 93.26704545454545, + "grad_norm": 0.7243189215660095, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 32830 + }, + { + "epoch": 93.29545454545455, + "grad_norm": 0.5766690969467163, + "learning_rate": 0.0001, + "loss": 0.0399, + "step": 32840 + }, + { + "epoch": 93.32386363636364, + "grad_norm": 0.7540706992149353, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 32850 + }, + { + "epoch": 93.35227272727273, + "grad_norm": 0.7168294787406921, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 32860 + }, + { + "epoch": 93.38068181818181, + "grad_norm": 0.8406569361686707, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 32870 + }, + { + "epoch": 93.4090909090909, + "grad_norm": 0.8490883708000183, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 32880 + }, + { + "epoch": 93.4375, + "grad_norm": 0.5935730338096619, + "learning_rate": 0.0001, + "loss": 0.0408, + "step": 32890 + }, + { + "epoch": 93.4659090909091, + "grad_norm": 0.5843430161476135, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 32900 + }, + { + "epoch": 93.49431818181819, + "grad_norm": 0.8435496091842651, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 32910 + }, + { + "epoch": 93.52272727272727, + "grad_norm": 0.599806547164917, + "learning_rate": 0.0001, + "loss": 0.0404, + "step": 32920 + }, + { + "epoch": 93.55113636363636, + "grad_norm": 0.6157388687133789, + "learning_rate": 0.0001, + "loss": 0.0401, + "step": 32930 + }, + { + "epoch": 93.57954545454545, + "grad_norm": 0.6935839056968689, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 32940 + }, + { + "epoch": 93.60795454545455, + "grad_norm": 0.6349316835403442, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 32950 + }, + { + "epoch": 93.63636363636364, + "grad_norm": 0.5609252452850342, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 32960 + }, + { + "epoch": 93.66477272727273, + "grad_norm": 0.6227506399154663, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 32970 + }, + { + "epoch": 93.69318181818181, + "grad_norm": 0.671882688999176, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 32980 + }, + { + "epoch": 93.7215909090909, + "grad_norm": 0.8065037727355957, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 32990 + }, + { + "epoch": 93.75, + "grad_norm": 0.7018570303916931, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 33000 + }, + { + "epoch": 93.7784090909091, + "grad_norm": 0.7340584993362427, + "learning_rate": 0.0001, + "loss": 0.0408, + "step": 33010 + }, + { + "epoch": 93.80681818181819, + "grad_norm": 0.6570980548858643, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 33020 + }, + { + "epoch": 93.83522727272727, + "grad_norm": 0.6870690584182739, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 33030 + }, + { + "epoch": 93.86363636363636, + "grad_norm": 0.6674038171768188, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 33040 + }, + { + "epoch": 93.89204545454545, + "grad_norm": 0.5651020407676697, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 33050 + }, + { + "epoch": 93.92045454545455, + "grad_norm": 0.7089354991912842, + "learning_rate": 0.0001, + "loss": 0.041, + "step": 33060 + }, + { + "epoch": 93.94886363636364, + "grad_norm": 0.5789372324943542, + "learning_rate": 0.0001, + "loss": 0.0413, + "step": 33070 + }, + { + "epoch": 93.97727272727273, + "grad_norm": 0.8415607213973999, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 33080 + }, + { + "epoch": 94.00568181818181, + "grad_norm": 0.7195010185241699, + "learning_rate": 0.0001, + "loss": 0.0399, + "step": 33090 + }, + { + "epoch": 94.0340909090909, + "grad_norm": 0.6305752992630005, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 33100 + }, + { + "epoch": 94.0625, + "grad_norm": 0.6140927076339722, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 33110 + }, + { + "epoch": 94.0909090909091, + "grad_norm": 0.761303186416626, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 33120 + }, + { + "epoch": 94.11931818181819, + "grad_norm": 0.7136927247047424, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 33130 + }, + { + "epoch": 94.14772727272727, + "grad_norm": 0.5686725378036499, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 33140 + }, + { + "epoch": 94.17613636363636, + "grad_norm": 0.7701740860939026, + "learning_rate": 0.0001, + "loss": 0.0395, + "step": 33150 + }, + { + "epoch": 94.20454545454545, + "grad_norm": 1.336498737335205, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 33160 + }, + { + "epoch": 94.23295454545455, + "grad_norm": 1.33478581905365, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 33170 + }, + { + "epoch": 94.26136363636364, + "grad_norm": 1.4353957176208496, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 33180 + }, + { + "epoch": 94.28977272727273, + "grad_norm": 1.2320867776870728, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 33190 + }, + { + "epoch": 94.31818181818181, + "grad_norm": 1.4484091997146606, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 33200 + }, + { + "epoch": 94.3465909090909, + "grad_norm": 1.3309117555618286, + "learning_rate": 0.0001, + "loss": 0.0398, + "step": 33210 + }, + { + "epoch": 94.375, + "grad_norm": 1.4127764701843262, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 33220 + }, + { + "epoch": 94.4034090909091, + "grad_norm": 1.5341440439224243, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 33230 + }, + { + "epoch": 94.43181818181819, + "grad_norm": 1.1439406871795654, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 33240 + }, + { + "epoch": 94.46022727272727, + "grad_norm": 1.584952473640442, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 33250 + }, + { + "epoch": 94.48863636363636, + "grad_norm": 1.6550278663635254, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 33260 + }, + { + "epoch": 94.51704545454545, + "grad_norm": 1.1844305992126465, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 33270 + }, + { + "epoch": 94.54545454545455, + "grad_norm": 1.3612699508666992, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 33280 + }, + { + "epoch": 94.57386363636364, + "grad_norm": 1.3791062831878662, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 33290 + }, + { + "epoch": 94.60227272727273, + "grad_norm": 0.9146853089332581, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 33300 + }, + { + "epoch": 94.63068181818181, + "grad_norm": 0.9118925929069519, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 33310 + }, + { + "epoch": 94.6590909090909, + "grad_norm": 0.9594295024871826, + "learning_rate": 0.0001, + "loss": 0.0393, + "step": 33320 + }, + { + "epoch": 94.6875, + "grad_norm": 0.9613752365112305, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 33330 + }, + { + "epoch": 94.7159090909091, + "grad_norm": 0.8596879839897156, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 33340 + }, + { + "epoch": 94.74431818181819, + "grad_norm": 0.8098438382148743, + "learning_rate": 0.0001, + "loss": 0.041, + "step": 33350 + }, + { + "epoch": 94.77272727272727, + "grad_norm": 1.0036510229110718, + "learning_rate": 0.0001, + "loss": 0.0398, + "step": 33360 + }, + { + "epoch": 94.80113636363636, + "grad_norm": 0.8176660537719727, + "learning_rate": 0.0001, + "loss": 0.041, + "step": 33370 + }, + { + "epoch": 94.82954545454545, + "grad_norm": 1.1036738157272339, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 33380 + }, + { + "epoch": 94.85795454545455, + "grad_norm": 0.7849661111831665, + "learning_rate": 0.0001, + "loss": 0.0401, + "step": 33390 + }, + { + "epoch": 94.88636363636364, + "grad_norm": 0.7073894739151001, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 33400 + }, + { + "epoch": 94.91477272727273, + "grad_norm": 0.8646548986434937, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 33410 + }, + { + "epoch": 94.94318181818181, + "grad_norm": 0.7283008694648743, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 33420 + }, + { + "epoch": 94.9715909090909, + "grad_norm": 0.8491483926773071, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 33430 + }, + { + "epoch": 95.0, + "grad_norm": 0.985192060470581, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 33440 + }, + { + "epoch": 95.0284090909091, + "grad_norm": 0.7137681841850281, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 33450 + }, + { + "epoch": 95.05681818181819, + "grad_norm": 0.6383925676345825, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 33460 + }, + { + "epoch": 95.08522727272727, + "grad_norm": 0.6039495468139648, + "learning_rate": 0.0001, + "loss": 0.0397, + "step": 33470 + }, + { + "epoch": 95.11363636363636, + "grad_norm": 0.7503993511199951, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 33480 + }, + { + "epoch": 95.14204545454545, + "grad_norm": 0.8905356526374817, + "learning_rate": 0.0001, + "loss": 0.0413, + "step": 33490 + }, + { + "epoch": 95.17045454545455, + "grad_norm": 0.8237714171409607, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 33500 + }, + { + "epoch": 95.19886363636364, + "grad_norm": 0.9115204811096191, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 33510 + }, + { + "epoch": 95.22727272727273, + "grad_norm": 0.7420920729637146, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 33520 + }, + { + "epoch": 95.25568181818181, + "grad_norm": 0.6619880199432373, + "learning_rate": 0.0001, + "loss": 0.0426, + "step": 33530 + }, + { + "epoch": 95.2840909090909, + "grad_norm": 0.7541500926017761, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 33540 + }, + { + "epoch": 95.3125, + "grad_norm": 0.6301935911178589, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 33550 + }, + { + "epoch": 95.3409090909091, + "grad_norm": 0.677110493183136, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 33560 + }, + { + "epoch": 95.36931818181819, + "grad_norm": 0.6058023571968079, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 33570 + }, + { + "epoch": 95.39772727272727, + "grad_norm": 0.6188281774520874, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 33580 + }, + { + "epoch": 95.42613636363636, + "grad_norm": 0.6876928210258484, + "learning_rate": 0.0001, + "loss": 0.0393, + "step": 33590 + }, + { + "epoch": 95.45454545454545, + "grad_norm": 0.7519726753234863, + "learning_rate": 0.0001, + "loss": 0.0396, + "step": 33600 + }, + { + "epoch": 95.48295454545455, + "grad_norm": 0.9113184809684753, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 33610 + }, + { + "epoch": 95.51136363636364, + "grad_norm": 0.6155601739883423, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 33620 + }, + { + "epoch": 95.53977272727273, + "grad_norm": 0.5960917472839355, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 33630 + }, + { + "epoch": 95.56818181818181, + "grad_norm": 0.849075198173523, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 33640 + }, + { + "epoch": 95.5965909090909, + "grad_norm": 0.6028590798377991, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 33650 + }, + { + "epoch": 95.625, + "grad_norm": 0.7840140461921692, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 33660 + }, + { + "epoch": 95.6534090909091, + "grad_norm": 1.1838630437850952, + "learning_rate": 0.0001, + "loss": 0.0399, + "step": 33670 + }, + { + "epoch": 95.68181818181819, + "grad_norm": 1.1603631973266602, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 33680 + }, + { + "epoch": 95.71022727272727, + "grad_norm": 1.3737505674362183, + "learning_rate": 0.0001, + "loss": 0.0423, + "step": 33690 + }, + { + "epoch": 95.73863636363636, + "grad_norm": 1.0241883993148804, + "learning_rate": 0.0001, + "loss": 0.0396, + "step": 33700 + }, + { + "epoch": 95.76704545454545, + "grad_norm": 1.056270718574524, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 33710 + }, + { + "epoch": 95.79545454545455, + "grad_norm": 0.6747000217437744, + "learning_rate": 0.0001, + "loss": 0.0395, + "step": 33720 + }, + { + "epoch": 95.82386363636364, + "grad_norm": 0.7106748819351196, + "learning_rate": 0.0001, + "loss": 0.0401, + "step": 33730 + }, + { + "epoch": 95.85227272727273, + "grad_norm": 0.7365654706954956, + "learning_rate": 0.0001, + "loss": 0.0404, + "step": 33740 + }, + { + "epoch": 95.88068181818181, + "grad_norm": 0.7549445033073425, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 33750 + }, + { + "epoch": 95.9090909090909, + "grad_norm": 0.8147190809249878, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 33760 + }, + { + "epoch": 95.9375, + "grad_norm": 0.7287954092025757, + "learning_rate": 0.0001, + "loss": 0.0393, + "step": 33770 + }, + { + "epoch": 95.9659090909091, + "grad_norm": 0.771390974521637, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 33780 + }, + { + "epoch": 95.99431818181819, + "grad_norm": 1.2401736974716187, + "learning_rate": 0.0001, + "loss": 0.0397, + "step": 33790 + }, + { + "epoch": 96.02272727272727, + "grad_norm": 0.8751268982887268, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 33800 + }, + { + "epoch": 96.05113636363636, + "grad_norm": 0.7138180732727051, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 33810 + }, + { + "epoch": 96.07954545454545, + "grad_norm": 0.7193799614906311, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 33820 + }, + { + "epoch": 96.10795454545455, + "grad_norm": 0.8099432587623596, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 33830 + }, + { + "epoch": 96.13636363636364, + "grad_norm": 0.76226407289505, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 33840 + }, + { + "epoch": 96.16477272727273, + "grad_norm": 0.6789332628250122, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 33850 + }, + { + "epoch": 96.19318181818181, + "grad_norm": 0.6385184526443481, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 33860 + }, + { + "epoch": 96.2215909090909, + "grad_norm": 0.6390976309776306, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 33870 + }, + { + "epoch": 96.25, + "grad_norm": 0.597517192363739, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 33880 + }, + { + "epoch": 96.2784090909091, + "grad_norm": 0.7059200406074524, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 33890 + }, + { + "epoch": 96.30681818181819, + "grad_norm": 0.7767623066902161, + "learning_rate": 0.0001, + "loss": 0.0399, + "step": 33900 + }, + { + "epoch": 96.33522727272727, + "grad_norm": 0.5218889117240906, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 33910 + }, + { + "epoch": 96.36363636363636, + "grad_norm": 0.6701022386550903, + "learning_rate": 0.0001, + "loss": 0.0399, + "step": 33920 + }, + { + "epoch": 96.39204545454545, + "grad_norm": 0.7549053430557251, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 33930 + }, + { + "epoch": 96.42045454545455, + "grad_norm": 0.7081325650215149, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 33940 + }, + { + "epoch": 96.44886363636364, + "grad_norm": 0.7790707349777222, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 33950 + }, + { + "epoch": 96.47727272727273, + "grad_norm": 0.6598635315895081, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 33960 + }, + { + "epoch": 96.50568181818181, + "grad_norm": 0.6724303364753723, + "learning_rate": 0.0001, + "loss": 0.0399, + "step": 33970 + }, + { + "epoch": 96.5340909090909, + "grad_norm": 0.5733104944229126, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 33980 + }, + { + "epoch": 96.5625, + "grad_norm": 0.5401538610458374, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 33990 + }, + { + "epoch": 96.5909090909091, + "grad_norm": 0.614717423915863, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 34000 + }, + { + "epoch": 96.61931818181819, + "grad_norm": 0.5657342672348022, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 34010 + }, + { + "epoch": 96.64772727272727, + "grad_norm": 1.142454981803894, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 34020 + }, + { + "epoch": 96.67613636363636, + "grad_norm": 1.0728636980056763, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 34030 + }, + { + "epoch": 96.70454545454545, + "grad_norm": 1.2795010805130005, + "learning_rate": 0.0001, + "loss": 0.0398, + "step": 34040 + }, + { + "epoch": 96.73295454545455, + "grad_norm": 1.0543229579925537, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 34050 + }, + { + "epoch": 96.76136363636364, + "grad_norm": 1.0960166454315186, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 34060 + }, + { + "epoch": 96.78977272727273, + "grad_norm": 1.0084160566329956, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 34070 + }, + { + "epoch": 96.81818181818181, + "grad_norm": 0.9923943281173706, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 34080 + }, + { + "epoch": 96.8465909090909, + "grad_norm": 0.8087642788887024, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 34090 + }, + { + "epoch": 96.875, + "grad_norm": 1.208733081817627, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 34100 + }, + { + "epoch": 96.9034090909091, + "grad_norm": 0.9292431473731995, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 34110 + }, + { + "epoch": 96.93181818181819, + "grad_norm": 0.8942785263061523, + "learning_rate": 0.0001, + "loss": 0.0397, + "step": 34120 + }, + { + "epoch": 96.96022727272727, + "grad_norm": 0.863674521446228, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 34130 + }, + { + "epoch": 96.98863636363636, + "grad_norm": 1.0691416263580322, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 34140 + }, + { + "epoch": 97.01704545454545, + "grad_norm": 0.5712941884994507, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 34150 + }, + { + "epoch": 97.04545454545455, + "grad_norm": 0.6319429278373718, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 34160 + }, + { + "epoch": 97.07386363636364, + "grad_norm": 0.9614266157150269, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 34170 + }, + { + "epoch": 97.10227272727273, + "grad_norm": 0.7226883769035339, + "learning_rate": 0.0001, + "loss": 0.0393, + "step": 34180 + }, + { + "epoch": 97.13068181818181, + "grad_norm": 0.7708411812782288, + "learning_rate": 0.0001, + "loss": 0.0396, + "step": 34190 + }, + { + "epoch": 97.1590909090909, + "grad_norm": 0.7161945104598999, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 34200 + }, + { + "epoch": 97.1875, + "grad_norm": 0.650200366973877, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 34210 + }, + { + "epoch": 97.2159090909091, + "grad_norm": 0.6397100687026978, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 34220 + }, + { + "epoch": 97.24431818181819, + "grad_norm": 0.7142146825790405, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 34230 + }, + { + "epoch": 97.27272727272727, + "grad_norm": 0.6408595442771912, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 34240 + }, + { + "epoch": 97.30113636363636, + "grad_norm": 0.6152955889701843, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 34250 + }, + { + "epoch": 97.32954545454545, + "grad_norm": 0.7230101227760315, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 34260 + }, + { + "epoch": 97.35795454545455, + "grad_norm": 0.945869505405426, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 34270 + }, + { + "epoch": 97.38636363636364, + "grad_norm": 0.9501050114631653, + "learning_rate": 0.0001, + "loss": 0.0399, + "step": 34280 + }, + { + "epoch": 97.41477272727273, + "grad_norm": 0.9380021691322327, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 34290 + }, + { + "epoch": 97.44318181818181, + "grad_norm": 1.3023756742477417, + "learning_rate": 0.0001, + "loss": 0.0396, + "step": 34300 + }, + { + "epoch": 97.4715909090909, + "grad_norm": 1.0887079238891602, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 34310 + }, + { + "epoch": 97.5, + "grad_norm": 1.2671388387680054, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 34320 + }, + { + "epoch": 97.5284090909091, + "grad_norm": 0.7356063723564148, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 34330 + }, + { + "epoch": 97.55681818181819, + "grad_norm": 1.0338929891586304, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 34340 + }, + { + "epoch": 97.58522727272727, + "grad_norm": 1.080224633216858, + "learning_rate": 0.0001, + "loss": 0.0393, + "step": 34350 + }, + { + "epoch": 97.61363636363636, + "grad_norm": 1.2610585689544678, + "learning_rate": 0.0001, + "loss": 0.0413, + "step": 34360 + }, + { + "epoch": 97.64204545454545, + "grad_norm": 1.3167310953140259, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 34370 + }, + { + "epoch": 97.67045454545455, + "grad_norm": 1.0706456899642944, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 34380 + }, + { + "epoch": 97.69886363636364, + "grad_norm": 0.8218298554420471, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 34390 + }, + { + "epoch": 97.72727272727273, + "grad_norm": 1.1640470027923584, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 34400 + }, + { + "epoch": 97.75568181818181, + "grad_norm": 0.8588812947273254, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 34410 + }, + { + "epoch": 97.7840909090909, + "grad_norm": 0.892999529838562, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 34420 + }, + { + "epoch": 97.8125, + "grad_norm": 0.7717360258102417, + "learning_rate": 0.0001, + "loss": 0.0397, + "step": 34430 + }, + { + "epoch": 97.8409090909091, + "grad_norm": 0.8617984652519226, + "learning_rate": 0.0001, + "loss": 0.0404, + "step": 34440 + }, + { + "epoch": 97.86931818181819, + "grad_norm": 0.8200704455375671, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 34450 + }, + { + "epoch": 97.89772727272727, + "grad_norm": 0.6376478672027588, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 34460 + }, + { + "epoch": 97.92613636363636, + "grad_norm": 0.9599566459655762, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 34470 + }, + { + "epoch": 97.95454545454545, + "grad_norm": 0.7544838786125183, + "learning_rate": 0.0001, + "loss": 0.0401, + "step": 34480 + }, + { + "epoch": 97.98295454545455, + "grad_norm": 0.6836613416671753, + "learning_rate": 0.0001, + "loss": 0.0393, + "step": 34490 + }, + { + "epoch": 98.01136363636364, + "grad_norm": 0.806623101234436, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 34500 + }, + { + "epoch": 98.03977272727273, + "grad_norm": 0.9929761290550232, + "learning_rate": 0.0001, + "loss": 0.0404, + "step": 34510 + }, + { + "epoch": 98.06818181818181, + "grad_norm": 0.7506119012832642, + "learning_rate": 0.0001, + "loss": 0.0397, + "step": 34520 + }, + { + "epoch": 98.0965909090909, + "grad_norm": 0.7990569472312927, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 34530 + }, + { + "epoch": 98.125, + "grad_norm": 0.8026459813117981, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 34540 + }, + { + "epoch": 98.1534090909091, + "grad_norm": 0.7655097842216492, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 34550 + }, + { + "epoch": 98.18181818181819, + "grad_norm": 0.626440703868866, + "learning_rate": 0.0001, + "loss": 0.0398, + "step": 34560 + }, + { + "epoch": 98.21022727272727, + "grad_norm": 0.5965021252632141, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 34570 + }, + { + "epoch": 98.23863636363636, + "grad_norm": 0.6253120303153992, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 34580 + }, + { + "epoch": 98.26704545454545, + "grad_norm": 0.728787362575531, + "learning_rate": 0.0001, + "loss": 0.0396, + "step": 34590 + }, + { + "epoch": 98.29545454545455, + "grad_norm": 0.6482828259468079, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 34600 + }, + { + "epoch": 98.32386363636364, + "grad_norm": 0.6943103671073914, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 34610 + }, + { + "epoch": 98.35227272727273, + "grad_norm": 0.7795711159706116, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 34620 + }, + { + "epoch": 98.38068181818181, + "grad_norm": 0.7584307193756104, + "learning_rate": 0.0001, + "loss": 0.0401, + "step": 34630 + }, + { + "epoch": 98.4090909090909, + "grad_norm": 0.583733320236206, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 34640 + }, + { + "epoch": 98.4375, + "grad_norm": 1.0233768224716187, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 34650 + }, + { + "epoch": 98.4659090909091, + "grad_norm": 0.7381771206855774, + "learning_rate": 0.0001, + "loss": 0.0401, + "step": 34660 + }, + { + "epoch": 98.49431818181819, + "grad_norm": 1.0933961868286133, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 34670 + }, + { + "epoch": 98.52272727272727, + "grad_norm": 0.8754884600639343, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 34680 + }, + { + "epoch": 98.55113636363636, + "grad_norm": 0.658818781375885, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 34690 + }, + { + "epoch": 98.57954545454545, + "grad_norm": 0.8832113146781921, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 34700 + }, + { + "epoch": 98.60795454545455, + "grad_norm": 0.6543091535568237, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 34710 + }, + { + "epoch": 98.63636363636364, + "grad_norm": 0.5386546850204468, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 34720 + }, + { + "epoch": 98.66477272727273, + "grad_norm": 0.4984776973724365, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 34730 + }, + { + "epoch": 98.69318181818181, + "grad_norm": 0.5178702473640442, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 34740 + }, + { + "epoch": 98.7215909090909, + "grad_norm": 0.7301501035690308, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 34750 + }, + { + "epoch": 98.75, + "grad_norm": 0.5040386319160461, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 34760 + }, + { + "epoch": 98.7784090909091, + "grad_norm": 0.4964589774608612, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 34770 + }, + { + "epoch": 98.80681818181819, + "grad_norm": 0.6709886193275452, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 34780 + }, + { + "epoch": 98.83522727272727, + "grad_norm": 0.6795845031738281, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 34790 + }, + { + "epoch": 98.86363636363636, + "grad_norm": 0.6201198101043701, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 34800 + }, + { + "epoch": 98.89204545454545, + "grad_norm": 0.5602060556411743, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 34810 + }, + { + "epoch": 98.92045454545455, + "grad_norm": 0.6460253000259399, + "learning_rate": 0.0001, + "loss": 0.0399, + "step": 34820 + }, + { + "epoch": 98.94886363636364, + "grad_norm": 0.6049633026123047, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 34830 + }, + { + "epoch": 98.97727272727273, + "grad_norm": 0.6341941356658936, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 34840 + }, + { + "epoch": 99.00568181818181, + "grad_norm": 0.6305556893348694, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 34850 + }, + { + "epoch": 99.0340909090909, + "grad_norm": 0.5669991970062256, + "learning_rate": 0.0001, + "loss": 0.0399, + "step": 34860 + }, + { + "epoch": 99.0625, + "grad_norm": 0.6804741024971008, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 34870 + }, + { + "epoch": 99.0909090909091, + "grad_norm": 0.6002970933914185, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 34880 + }, + { + "epoch": 99.11931818181819, + "grad_norm": 0.6289021968841553, + "learning_rate": 0.0001, + "loss": 0.0395, + "step": 34890 + }, + { + "epoch": 99.14772727272727, + "grad_norm": 0.5393396019935608, + "learning_rate": 0.0001, + "loss": 0.0395, + "step": 34900 + }, + { + "epoch": 99.17613636363636, + "grad_norm": 0.6519724130630493, + "learning_rate": 0.0001, + "loss": 0.0402, + "step": 34910 + }, + { + "epoch": 99.20454545454545, + "grad_norm": 0.6385529637336731, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 34920 + }, + { + "epoch": 99.23295454545455, + "grad_norm": 0.7283846139907837, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 34930 + }, + { + "epoch": 99.26136363636364, + "grad_norm": 0.5138035416603088, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 34940 + }, + { + "epoch": 99.28977272727273, + "grad_norm": 0.6235365867614746, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 34950 + }, + { + "epoch": 99.31818181818181, + "grad_norm": 0.6972271800041199, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 34960 + }, + { + "epoch": 99.3465909090909, + "grad_norm": 0.7025635242462158, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 34970 + }, + { + "epoch": 99.375, + "grad_norm": 0.5961763858795166, + "learning_rate": 0.0001, + "loss": 0.0396, + "step": 34980 + }, + { + "epoch": 99.4034090909091, + "grad_norm": 0.6492330431938171, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 34990 + }, + { + "epoch": 99.43181818181819, + "grad_norm": 0.6340757012367249, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 35000 + }, + { + "epoch": 99.46022727272727, + "grad_norm": 0.6484765410423279, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 35010 + }, + { + "epoch": 99.48863636363636, + "grad_norm": 0.5331886410713196, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 35020 + }, + { + "epoch": 99.51704545454545, + "grad_norm": 0.4786685109138489, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 35030 + }, + { + "epoch": 99.54545454545455, + "grad_norm": 0.5610359311103821, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 35040 + }, + { + "epoch": 99.57386363636364, + "grad_norm": 0.5391741394996643, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 35050 + }, + { + "epoch": 99.60227272727273, + "grad_norm": 0.610522985458374, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 35060 + }, + { + "epoch": 99.63068181818181, + "grad_norm": 0.6739444732666016, + "learning_rate": 0.0001, + "loss": 0.0393, + "step": 35070 + }, + { + "epoch": 99.6590909090909, + "grad_norm": 0.5937843918800354, + "learning_rate": 0.0001, + "loss": 0.0398, + "step": 35080 + }, + { + "epoch": 99.6875, + "grad_norm": 0.9213070869445801, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 35090 + }, + { + "epoch": 99.7159090909091, + "grad_norm": 1.3140711784362793, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 35100 + }, + { + "epoch": 99.74431818181819, + "grad_norm": 1.2353553771972656, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 35110 + }, + { + "epoch": 99.77272727272727, + "grad_norm": 0.7020501494407654, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 35120 + }, + { + "epoch": 99.80113636363636, + "grad_norm": 0.8266453742980957, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 35130 + }, + { + "epoch": 99.82954545454545, + "grad_norm": 0.7972448468208313, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 35140 + }, + { + "epoch": 99.85795454545455, + "grad_norm": 0.7136993408203125, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 35150 + }, + { + "epoch": 99.88636363636364, + "grad_norm": 0.794268012046814, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 35160 + }, + { + "epoch": 99.91477272727273, + "grad_norm": 0.7449502348899841, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 35170 + }, + { + "epoch": 99.94318181818181, + "grad_norm": 0.5898183584213257, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 35180 + }, + { + "epoch": 99.9715909090909, + "grad_norm": 0.5253759622573853, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 35190 + }, + { + "epoch": 100.0, + "grad_norm": 0.5536308288574219, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 35200 + }, + { + "epoch": 100.0284090909091, + "grad_norm": 0.712748110294342, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 35210 + }, + { + "epoch": 100.05681818181819, + "grad_norm": 0.6205607056617737, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 35220 + }, + { + "epoch": 100.08522727272727, + "grad_norm": 0.8103065490722656, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 35230 + }, + { + "epoch": 100.11363636363636, + "grad_norm": 0.8175324201583862, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 35240 + }, + { + "epoch": 100.14204545454545, + "grad_norm": 0.6265504956245422, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 35250 + }, + { + "epoch": 100.17045454545455, + "grad_norm": 0.7531624436378479, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 35260 + }, + { + "epoch": 100.19886363636364, + "grad_norm": 0.7789162993431091, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 35270 + }, + { + "epoch": 100.22727272727273, + "grad_norm": 0.7431286573410034, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 35280 + }, + { + "epoch": 100.25568181818181, + "grad_norm": 0.6338279247283936, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 35290 + }, + { + "epoch": 100.2840909090909, + "grad_norm": 0.6069151759147644, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 35300 + }, + { + "epoch": 100.3125, + "grad_norm": 0.6290576457977295, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 35310 + }, + { + "epoch": 100.3409090909091, + "grad_norm": 0.5072388052940369, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 35320 + }, + { + "epoch": 100.36931818181819, + "grad_norm": 0.9051946401596069, + "learning_rate": 0.0001, + "loss": 0.0398, + "step": 35330 + }, + { + "epoch": 100.39772727272727, + "grad_norm": 0.9270437955856323, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 35340 + }, + { + "epoch": 100.42613636363636, + "grad_norm": 1.1097337007522583, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 35350 + }, + { + "epoch": 100.45454545454545, + "grad_norm": 0.753572404384613, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 35360 + }, + { + "epoch": 100.48295454545455, + "grad_norm": 0.8269745111465454, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 35370 + }, + { + "epoch": 100.51136363636364, + "grad_norm": 0.9077835083007812, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 35380 + }, + { + "epoch": 100.53977272727273, + "grad_norm": 0.863038182258606, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 35390 + }, + { + "epoch": 100.56818181818181, + "grad_norm": 0.7924647927284241, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 35400 + }, + { + "epoch": 100.5965909090909, + "grad_norm": 0.8401536345481873, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 35410 + }, + { + "epoch": 100.625, + "grad_norm": 0.8418139219284058, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 35420 + }, + { + "epoch": 100.6534090909091, + "grad_norm": 0.8648441433906555, + "learning_rate": 0.0001, + "loss": 0.0393, + "step": 35430 + }, + { + "epoch": 100.68181818181819, + "grad_norm": 0.7955145239830017, + "learning_rate": 0.0001, + "loss": 0.0395, + "step": 35440 + }, + { + "epoch": 100.71022727272727, + "grad_norm": 0.759017825126648, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 35450 + }, + { + "epoch": 100.73863636363636, + "grad_norm": 0.7723873853683472, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 35460 + }, + { + "epoch": 100.76704545454545, + "grad_norm": 0.5847647190093994, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 35470 + }, + { + "epoch": 100.79545454545455, + "grad_norm": 0.7090848684310913, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 35480 + }, + { + "epoch": 100.82386363636364, + "grad_norm": 0.5850080847740173, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 35490 + }, + { + "epoch": 100.85227272727273, + "grad_norm": 0.6634331345558167, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 35500 + }, + { + "epoch": 100.88068181818181, + "grad_norm": 0.5659244060516357, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 35510 + }, + { + "epoch": 100.9090909090909, + "grad_norm": 0.6139445304870605, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 35520 + }, + { + "epoch": 100.9375, + "grad_norm": 0.5992445945739746, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 35530 + }, + { + "epoch": 100.9659090909091, + "grad_norm": 0.705426037311554, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 35540 + }, + { + "epoch": 100.99431818181819, + "grad_norm": 0.8458812832832336, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 35550 + }, + { + "epoch": 101.02272727272727, + "grad_norm": 0.8477011919021606, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 35560 + }, + { + "epoch": 101.05113636363636, + "grad_norm": 0.7048535346984863, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 35570 + }, + { + "epoch": 101.07954545454545, + "grad_norm": 0.9626438617706299, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 35580 + }, + { + "epoch": 101.10795454545455, + "grad_norm": 0.8232958912849426, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 35590 + }, + { + "epoch": 101.13636363636364, + "grad_norm": 1.2208539247512817, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 35600 + }, + { + "epoch": 101.16477272727273, + "grad_norm": 0.8574521541595459, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 35610 + }, + { + "epoch": 101.19318181818181, + "grad_norm": 0.536631166934967, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 35620 + }, + { + "epoch": 101.2215909090909, + "grad_norm": 0.6252682209014893, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 35630 + }, + { + "epoch": 101.25, + "grad_norm": 0.6429247260093689, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 35640 + }, + { + "epoch": 101.2784090909091, + "grad_norm": 0.6166262626647949, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 35650 + }, + { + "epoch": 101.30681818181819, + "grad_norm": 0.6511677503585815, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 35660 + }, + { + "epoch": 101.33522727272727, + "grad_norm": 0.6906519532203674, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 35670 + }, + { + "epoch": 101.36363636363636, + "grad_norm": 0.6717009544372559, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 35680 + }, + { + "epoch": 101.39204545454545, + "grad_norm": 0.6030964255332947, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 35690 + }, + { + "epoch": 101.42045454545455, + "grad_norm": 0.6418792009353638, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 35700 + }, + { + "epoch": 101.44886363636364, + "grad_norm": 0.7006118297576904, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 35710 + }, + { + "epoch": 101.47727272727273, + "grad_norm": 0.7065404653549194, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 35720 + }, + { + "epoch": 101.50568181818181, + "grad_norm": 0.7001347541809082, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 35730 + }, + { + "epoch": 101.5340909090909, + "grad_norm": 0.7850422859191895, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 35740 + }, + { + "epoch": 101.5625, + "grad_norm": 0.6983082294464111, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 35750 + }, + { + "epoch": 101.5909090909091, + "grad_norm": 0.6168388724327087, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 35760 + }, + { + "epoch": 101.61931818181819, + "grad_norm": 0.6485419869422913, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 35770 + }, + { + "epoch": 101.64772727272727, + "grad_norm": 0.694078803062439, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 35780 + }, + { + "epoch": 101.67613636363636, + "grad_norm": 0.6054084300994873, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 35790 + }, + { + "epoch": 101.70454545454545, + "grad_norm": 0.7031105756759644, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 35800 + }, + { + "epoch": 101.73295454545455, + "grad_norm": 0.6772159337997437, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 35810 + }, + { + "epoch": 101.76136363636364, + "grad_norm": 0.742162823677063, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 35820 + }, + { + "epoch": 101.78977272727273, + "grad_norm": 0.6221441626548767, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 35830 + }, + { + "epoch": 101.81818181818181, + "grad_norm": 0.5692674517631531, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 35840 + }, + { + "epoch": 101.8465909090909, + "grad_norm": 0.5200670957565308, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 35850 + }, + { + "epoch": 101.875, + "grad_norm": 0.44014811515808105, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 35860 + }, + { + "epoch": 101.9034090909091, + "grad_norm": 0.5131399035453796, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 35870 + }, + { + "epoch": 101.93181818181819, + "grad_norm": 0.7803551554679871, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 35880 + }, + { + "epoch": 101.96022727272727, + "grad_norm": 1.0714529752731323, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 35890 + }, + { + "epoch": 101.98863636363636, + "grad_norm": 0.8870387077331543, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 35900 + }, + { + "epoch": 102.01704545454545, + "grad_norm": 0.814504861831665, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 35910 + }, + { + "epoch": 102.04545454545455, + "grad_norm": 1.2375514507293701, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 35920 + }, + { + "epoch": 102.07386363636364, + "grad_norm": 1.0883283615112305, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 35930 + }, + { + "epoch": 102.10227272727273, + "grad_norm": 0.7217296361923218, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 35940 + }, + { + "epoch": 102.13068181818181, + "grad_norm": 0.9220253229141235, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 35950 + }, + { + "epoch": 102.1590909090909, + "grad_norm": 0.9065287113189697, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 35960 + }, + { + "epoch": 102.1875, + "grad_norm": 1.2304972410202026, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 35970 + }, + { + "epoch": 102.2159090909091, + "grad_norm": 0.9404845833778381, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 35980 + }, + { + "epoch": 102.24431818181819, + "grad_norm": 0.9212031960487366, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 35990 + }, + { + "epoch": 102.27272727272727, + "grad_norm": 0.7656883597373962, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 36000 + }, + { + "epoch": 102.30113636363636, + "grad_norm": 0.7965297102928162, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 36010 + }, + { + "epoch": 102.32954545454545, + "grad_norm": 0.6216439604759216, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 36020 + }, + { + "epoch": 102.35795454545455, + "grad_norm": 0.6738339066505432, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 36030 + }, + { + "epoch": 102.38636363636364, + "grad_norm": 0.6152580380439758, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 36040 + }, + { + "epoch": 102.41477272727273, + "grad_norm": 0.6139420866966248, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 36050 + }, + { + "epoch": 102.44318181818181, + "grad_norm": 0.5595870614051819, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 36060 + }, + { + "epoch": 102.4715909090909, + "grad_norm": 0.721856951713562, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 36070 + }, + { + "epoch": 102.5, + "grad_norm": 0.7998674511909485, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 36080 + }, + { + "epoch": 102.5284090909091, + "grad_norm": 0.8735951781272888, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 36090 + }, + { + "epoch": 102.55681818181819, + "grad_norm": 0.7308524250984192, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 36100 + }, + { + "epoch": 102.58522727272727, + "grad_norm": 0.7623921036720276, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 36110 + }, + { + "epoch": 102.61363636363636, + "grad_norm": 0.8306724429130554, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 36120 + }, + { + "epoch": 102.64204545454545, + "grad_norm": 0.8680564165115356, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 36130 + }, + { + "epoch": 102.67045454545455, + "grad_norm": 0.8162680864334106, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 36140 + }, + { + "epoch": 102.69886363636364, + "grad_norm": 0.6912002563476562, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 36150 + }, + { + "epoch": 102.72727272727273, + "grad_norm": 0.7910269498825073, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 36160 + }, + { + "epoch": 102.75568181818181, + "grad_norm": 0.6541531682014465, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 36170 + }, + { + "epoch": 102.7840909090909, + "grad_norm": 0.5540030002593994, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 36180 + }, + { + "epoch": 102.8125, + "grad_norm": 0.6944588422775269, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 36190 + }, + { + "epoch": 102.8409090909091, + "grad_norm": 0.6281765699386597, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 36200 + }, + { + "epoch": 102.86931818181819, + "grad_norm": 0.5334039330482483, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 36210 + }, + { + "epoch": 102.89772727272727, + "grad_norm": 0.6771912574768066, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 36220 + }, + { + "epoch": 102.92613636363636, + "grad_norm": 0.5143633484840393, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 36230 + }, + { + "epoch": 102.95454545454545, + "grad_norm": 0.5875553488731384, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 36240 + }, + { + "epoch": 102.98295454545455, + "grad_norm": 0.5696831941604614, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 36250 + }, + { + "epoch": 103.01136363636364, + "grad_norm": 0.6309694647789001, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 36260 + }, + { + "epoch": 103.03977272727273, + "grad_norm": 0.8848923444747925, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 36270 + }, + { + "epoch": 103.06818181818181, + "grad_norm": 0.578700840473175, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 36280 + }, + { + "epoch": 103.0965909090909, + "grad_norm": 0.6599058508872986, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 36290 + }, + { + "epoch": 103.125, + "grad_norm": 0.7933474779129028, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 36300 + }, + { + "epoch": 103.1534090909091, + "grad_norm": 0.8254300355911255, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 36310 + }, + { + "epoch": 103.18181818181819, + "grad_norm": 0.6838820576667786, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 36320 + }, + { + "epoch": 103.21022727272727, + "grad_norm": 0.686337947845459, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 36330 + }, + { + "epoch": 103.23863636363636, + "grad_norm": 0.768295168876648, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 36340 + }, + { + "epoch": 103.26704545454545, + "grad_norm": 0.7727903127670288, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 36350 + }, + { + "epoch": 103.29545454545455, + "grad_norm": 0.4438778758049011, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 36360 + }, + { + "epoch": 103.32386363636364, + "grad_norm": 0.5905418395996094, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 36370 + }, + { + "epoch": 103.35227272727273, + "grad_norm": 0.6414166688919067, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 36380 + }, + { + "epoch": 103.38068181818181, + "grad_norm": 0.6797493100166321, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 36390 + }, + { + "epoch": 103.4090909090909, + "grad_norm": 0.653895914554596, + "learning_rate": 0.0001, + "loss": 0.0398, + "step": 36400 + }, + { + "epoch": 103.4375, + "grad_norm": 0.6173151731491089, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 36410 + }, + { + "epoch": 103.4659090909091, + "grad_norm": 0.47727423906326294, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 36420 + }, + { + "epoch": 103.49431818181819, + "grad_norm": 0.6933102011680603, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 36430 + }, + { + "epoch": 103.52272727272727, + "grad_norm": 0.6333000063896179, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 36440 + }, + { + "epoch": 103.55113636363636, + "grad_norm": 0.5522160530090332, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 36450 + }, + { + "epoch": 103.57954545454545, + "grad_norm": 0.6237143278121948, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 36460 + }, + { + "epoch": 103.60795454545455, + "grad_norm": 0.7558906674385071, + "learning_rate": 0.0001, + "loss": 0.0395, + "step": 36470 + }, + { + "epoch": 103.63636363636364, + "grad_norm": 0.5823392868041992, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 36480 + }, + { + "epoch": 103.66477272727273, + "grad_norm": 0.6411099433898926, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 36490 + }, + { + "epoch": 103.69318181818181, + "grad_norm": 0.5531007051467896, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 36500 + }, + { + "epoch": 103.7215909090909, + "grad_norm": 0.5332525968551636, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 36510 + }, + { + "epoch": 103.75, + "grad_norm": 0.6390560269355774, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 36520 + }, + { + "epoch": 103.7784090909091, + "grad_norm": 0.6145923137664795, + "learning_rate": 0.0001, + "loss": 0.0368, + "step": 36530 + }, + { + "epoch": 103.80681818181819, + "grad_norm": 0.663539707660675, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 36540 + }, + { + "epoch": 103.83522727272727, + "grad_norm": 0.6167774796485901, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 36550 + }, + { + "epoch": 103.86363636363636, + "grad_norm": 0.7627246975898743, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 36560 + }, + { + "epoch": 103.89204545454545, + "grad_norm": 0.5639641880989075, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 36570 + }, + { + "epoch": 103.92045454545455, + "grad_norm": 0.6381582617759705, + "learning_rate": 0.0001, + "loss": 0.0398, + "step": 36580 + }, + { + "epoch": 103.94886363636364, + "grad_norm": 0.6323047876358032, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 36590 + }, + { + "epoch": 103.97727272727273, + "grad_norm": 0.5740509629249573, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 36600 + }, + { + "epoch": 104.00568181818181, + "grad_norm": 0.6786510348320007, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 36610 + }, + { + "epoch": 104.0340909090909, + "grad_norm": 0.6328359246253967, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 36620 + }, + { + "epoch": 104.0625, + "grad_norm": 0.5744404792785645, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 36630 + }, + { + "epoch": 104.0909090909091, + "grad_norm": 0.6046051979064941, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 36640 + }, + { + "epoch": 104.11931818181819, + "grad_norm": 0.6407805681228638, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 36650 + }, + { + "epoch": 104.14772727272727, + "grad_norm": 0.8871857523918152, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 36660 + }, + { + "epoch": 104.17613636363636, + "grad_norm": 0.8872131705284119, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 36670 + }, + { + "epoch": 104.20454545454545, + "grad_norm": 0.7679946422576904, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 36680 + }, + { + "epoch": 104.23295454545455, + "grad_norm": 0.6552764177322388, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 36690 + }, + { + "epoch": 104.26136363636364, + "grad_norm": 0.8447787761688232, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 36700 + }, + { + "epoch": 104.28977272727273, + "grad_norm": 0.6279290318489075, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 36710 + }, + { + "epoch": 104.31818181818181, + "grad_norm": 0.5877067446708679, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 36720 + }, + { + "epoch": 104.3465909090909, + "grad_norm": 0.8654998540878296, + "learning_rate": 0.0001, + "loss": 0.0408, + "step": 36730 + }, + { + "epoch": 104.375, + "grad_norm": 0.6628789901733398, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 36740 + }, + { + "epoch": 104.4034090909091, + "grad_norm": 1.0221258401870728, + "learning_rate": 0.0001, + "loss": 0.0401, + "step": 36750 + }, + { + "epoch": 104.43181818181819, + "grad_norm": 1.2567273378372192, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 36760 + }, + { + "epoch": 104.46022727272727, + "grad_norm": 1.0356628894805908, + "learning_rate": 0.0001, + "loss": 0.0404, + "step": 36770 + }, + { + "epoch": 104.48863636363636, + "grad_norm": 1.1455295085906982, + "learning_rate": 0.0001, + "loss": 0.0397, + "step": 36780 + }, + { + "epoch": 104.51704545454545, + "grad_norm": 1.0565097332000732, + "learning_rate": 0.0001, + "loss": 0.0392, + "step": 36790 + }, + { + "epoch": 104.54545454545455, + "grad_norm": 0.8333950638771057, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 36800 + }, + { + "epoch": 104.57386363636364, + "grad_norm": 0.8816116452217102, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 36810 + }, + { + "epoch": 104.60227272727273, + "grad_norm": 0.8084827661514282, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 36820 + }, + { + "epoch": 104.63068181818181, + "grad_norm": 0.7814130187034607, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 36830 + }, + { + "epoch": 104.6590909090909, + "grad_norm": 0.6202470064163208, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 36840 + }, + { + "epoch": 104.6875, + "grad_norm": 0.6300608515739441, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 36850 + }, + { + "epoch": 104.7159090909091, + "grad_norm": 0.6419079303741455, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 36860 + }, + { + "epoch": 104.74431818181819, + "grad_norm": 0.6812740564346313, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 36870 + }, + { + "epoch": 104.77272727272727, + "grad_norm": 0.8751927614212036, + "learning_rate": 0.0001, + "loss": 0.0398, + "step": 36880 + }, + { + "epoch": 104.80113636363636, + "grad_norm": 0.694141149520874, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 36890 + }, + { + "epoch": 104.82954545454545, + "grad_norm": 0.6310260891914368, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 36900 + }, + { + "epoch": 104.85795454545455, + "grad_norm": 0.45405858755111694, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 36910 + }, + { + "epoch": 104.88636363636364, + "grad_norm": 0.7782346606254578, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 36920 + }, + { + "epoch": 104.91477272727273, + "grad_norm": 0.8152375221252441, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 36930 + }, + { + "epoch": 104.94318181818181, + "grad_norm": 0.6959502696990967, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 36940 + }, + { + "epoch": 104.9715909090909, + "grad_norm": 0.805332362651825, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 36950 + }, + { + "epoch": 105.0, + "grad_norm": 0.8097184896469116, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 36960 + }, + { + "epoch": 105.0284090909091, + "grad_norm": 0.611344039440155, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 36970 + }, + { + "epoch": 105.05681818181819, + "grad_norm": 0.8121931552886963, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 36980 + }, + { + "epoch": 105.08522727272727, + "grad_norm": 0.965552568435669, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 36990 + }, + { + "epoch": 105.11363636363636, + "grad_norm": 0.772780179977417, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 37000 + }, + { + "epoch": 105.14204545454545, + "grad_norm": 0.8075849413871765, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 37010 + }, + { + "epoch": 105.17045454545455, + "grad_norm": 0.8520001173019409, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 37020 + }, + { + "epoch": 105.19886363636364, + "grad_norm": 0.9963288903236389, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 37030 + }, + { + "epoch": 105.22727272727273, + "grad_norm": 1.0980024337768555, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 37040 + }, + { + "epoch": 105.25568181818181, + "grad_norm": 0.8528324961662292, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 37050 + }, + { + "epoch": 105.2840909090909, + "grad_norm": 1.0183886289596558, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 37060 + }, + { + "epoch": 105.3125, + "grad_norm": 0.9727984666824341, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 37070 + }, + { + "epoch": 105.3409090909091, + "grad_norm": 1.1144496202468872, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 37080 + }, + { + "epoch": 105.36931818181819, + "grad_norm": 1.2052735090255737, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 37090 + }, + { + "epoch": 105.39772727272727, + "grad_norm": 0.855655312538147, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 37100 + }, + { + "epoch": 105.42613636363636, + "grad_norm": 0.714371383190155, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 37110 + }, + { + "epoch": 105.45454545454545, + "grad_norm": 0.7759353518486023, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 37120 + }, + { + "epoch": 105.48295454545455, + "grad_norm": 0.8793251514434814, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 37130 + }, + { + "epoch": 105.51136363636364, + "grad_norm": 0.8084626793861389, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 37140 + }, + { + "epoch": 105.53977272727273, + "grad_norm": 0.8977124094963074, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 37150 + }, + { + "epoch": 105.56818181818181, + "grad_norm": 0.6372682452201843, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 37160 + }, + { + "epoch": 105.5965909090909, + "grad_norm": 0.6709948182106018, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 37170 + }, + { + "epoch": 105.625, + "grad_norm": 0.8853369951248169, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 37180 + }, + { + "epoch": 105.6534090909091, + "grad_norm": 0.6816346645355225, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 37190 + }, + { + "epoch": 105.68181818181819, + "grad_norm": 0.8214898705482483, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 37200 + }, + { + "epoch": 105.71022727272727, + "grad_norm": 0.7285953164100647, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 37210 + }, + { + "epoch": 105.73863636363636, + "grad_norm": 0.7041904926300049, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 37220 + }, + { + "epoch": 105.76704545454545, + "grad_norm": 0.7178189754486084, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 37230 + }, + { + "epoch": 105.79545454545455, + "grad_norm": 1.0095086097717285, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 37240 + }, + { + "epoch": 105.82386363636364, + "grad_norm": 0.9176076054573059, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 37250 + }, + { + "epoch": 105.85227272727273, + "grad_norm": 0.8059961795806885, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 37260 + }, + { + "epoch": 105.88068181818181, + "grad_norm": 0.8492456078529358, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 37270 + }, + { + "epoch": 105.9090909090909, + "grad_norm": 0.8832942247390747, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 37280 + }, + { + "epoch": 105.9375, + "grad_norm": 0.7502552270889282, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 37290 + }, + { + "epoch": 105.9659090909091, + "grad_norm": 0.6862600445747375, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 37300 + }, + { + "epoch": 105.99431818181819, + "grad_norm": 0.6588712334632874, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 37310 + }, + { + "epoch": 106.02272727272727, + "grad_norm": 0.5885828137397766, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 37320 + }, + { + "epoch": 106.05113636363636, + "grad_norm": 0.5425651669502258, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 37330 + }, + { + "epoch": 106.07954545454545, + "grad_norm": 0.5327818393707275, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 37340 + }, + { + "epoch": 106.10795454545455, + "grad_norm": 0.6177202463150024, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 37350 + }, + { + "epoch": 106.13636363636364, + "grad_norm": 0.473233699798584, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 37360 + }, + { + "epoch": 106.16477272727273, + "grad_norm": 0.48036202788352966, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 37370 + }, + { + "epoch": 106.19318181818181, + "grad_norm": 0.699809193611145, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 37380 + }, + { + "epoch": 106.2215909090909, + "grad_norm": 0.865032434463501, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 37390 + }, + { + "epoch": 106.25, + "grad_norm": 0.707669198513031, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 37400 + }, + { + "epoch": 106.2784090909091, + "grad_norm": 0.6514762043952942, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 37410 + }, + { + "epoch": 106.30681818181819, + "grad_norm": 0.7002271413803101, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 37420 + }, + { + "epoch": 106.33522727272727, + "grad_norm": 0.6672790050506592, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 37430 + }, + { + "epoch": 106.36363636363636, + "grad_norm": 0.7932654619216919, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 37440 + }, + { + "epoch": 106.39204545454545, + "grad_norm": 0.6026307940483093, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 37450 + }, + { + "epoch": 106.42045454545455, + "grad_norm": 0.6472262144088745, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 37460 + }, + { + "epoch": 106.44886363636364, + "grad_norm": 0.6252856254577637, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 37470 + }, + { + "epoch": 106.47727272727273, + "grad_norm": 0.5449088215827942, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 37480 + }, + { + "epoch": 106.50568181818181, + "grad_norm": 0.7351489663124084, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 37490 + }, + { + "epoch": 106.5340909090909, + "grad_norm": 0.684830367565155, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 37500 + }, + { + "epoch": 106.5625, + "grad_norm": 0.522684633731842, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 37510 + }, + { + "epoch": 106.5909090909091, + "grad_norm": 0.5345920920372009, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 37520 + }, + { + "epoch": 106.61931818181819, + "grad_norm": 0.5353555083274841, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 37530 + }, + { + "epoch": 106.64772727272727, + "grad_norm": 0.631646454334259, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 37540 + }, + { + "epoch": 106.67613636363636, + "grad_norm": 0.6440027952194214, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 37550 + }, + { + "epoch": 106.70454545454545, + "grad_norm": 0.5962269306182861, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 37560 + }, + { + "epoch": 106.73295454545455, + "grad_norm": 0.6566577553749084, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 37570 + }, + { + "epoch": 106.76136363636364, + "grad_norm": 0.6731533408164978, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 37580 + }, + { + "epoch": 106.78977272727273, + "grad_norm": 0.8614497780799866, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 37590 + }, + { + "epoch": 106.81818181818181, + "grad_norm": 0.7013105154037476, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 37600 + }, + { + "epoch": 106.8465909090909, + "grad_norm": 0.7781490087509155, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 37610 + }, + { + "epoch": 106.875, + "grad_norm": 0.6093642711639404, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 37620 + }, + { + "epoch": 106.9034090909091, + "grad_norm": 0.5649243593215942, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 37630 + }, + { + "epoch": 106.93181818181819, + "grad_norm": 0.6890942454338074, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 37640 + }, + { + "epoch": 106.96022727272727, + "grad_norm": 0.7046976089477539, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 37650 + }, + { + "epoch": 106.98863636363636, + "grad_norm": 0.6145200729370117, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 37660 + }, + { + "epoch": 107.01704545454545, + "grad_norm": 0.4534114897251129, + "learning_rate": 0.0001, + "loss": 0.0368, + "step": 37670 + }, + { + "epoch": 107.04545454545455, + "grad_norm": 0.519280195236206, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 37680 + }, + { + "epoch": 107.07386363636364, + "grad_norm": 0.6525757312774658, + "learning_rate": 0.0001, + "loss": 0.0368, + "step": 37690 + }, + { + "epoch": 107.10227272727273, + "grad_norm": 0.6470338106155396, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 37700 + }, + { + "epoch": 107.13068181818181, + "grad_norm": 0.5215091705322266, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 37710 + }, + { + "epoch": 107.1590909090909, + "grad_norm": 0.48153358697891235, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 37720 + }, + { + "epoch": 107.1875, + "grad_norm": 0.6757826209068298, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 37730 + }, + { + "epoch": 107.2159090909091, + "grad_norm": 0.7484776377677917, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 37740 + }, + { + "epoch": 107.24431818181819, + "grad_norm": 0.6073766946792603, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 37750 + }, + { + "epoch": 107.27272727272727, + "grad_norm": 0.45944106578826904, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 37760 + }, + { + "epoch": 107.30113636363636, + "grad_norm": 0.481180876493454, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 37770 + }, + { + "epoch": 107.32954545454545, + "grad_norm": 0.5007697343826294, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 37780 + }, + { + "epoch": 107.35795454545455, + "grad_norm": 0.6877015233039856, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 37790 + }, + { + "epoch": 107.38636363636364, + "grad_norm": 1.0906400680541992, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 37800 + }, + { + "epoch": 107.41477272727273, + "grad_norm": 0.9992173910140991, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 37810 + }, + { + "epoch": 107.44318181818181, + "grad_norm": 1.065973162651062, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 37820 + }, + { + "epoch": 107.4715909090909, + "grad_norm": 0.8227842450141907, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 37830 + }, + { + "epoch": 107.5, + "grad_norm": 0.830697238445282, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 37840 + }, + { + "epoch": 107.5284090909091, + "grad_norm": 0.652164101600647, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 37850 + }, + { + "epoch": 107.55681818181819, + "grad_norm": 0.6938498020172119, + "learning_rate": 0.0001, + "loss": 0.0389, + "step": 37860 + }, + { + "epoch": 107.58522727272727, + "grad_norm": 0.680587887763977, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 37870 + }, + { + "epoch": 107.61363636363636, + "grad_norm": 0.6912381649017334, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 37880 + }, + { + "epoch": 107.64204545454545, + "grad_norm": 0.6087629795074463, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 37890 + }, + { + "epoch": 107.67045454545455, + "grad_norm": 0.8172582983970642, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 37900 + }, + { + "epoch": 107.69886363636364, + "grad_norm": 0.7405523657798767, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 37910 + }, + { + "epoch": 107.72727272727273, + "grad_norm": 0.6305014491081238, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 37920 + }, + { + "epoch": 107.75568181818181, + "grad_norm": 0.6484267711639404, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 37930 + }, + { + "epoch": 107.7840909090909, + "grad_norm": 0.6926651000976562, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 37940 + }, + { + "epoch": 107.8125, + "grad_norm": 0.7457519769668579, + "learning_rate": 0.0001, + "loss": 0.04, + "step": 37950 + }, + { + "epoch": 107.8409090909091, + "grad_norm": 0.6257100701332092, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 37960 + }, + { + "epoch": 107.86931818181819, + "grad_norm": 0.6201730966567993, + "learning_rate": 0.0001, + "loss": 0.0368, + "step": 37970 + }, + { + "epoch": 107.89772727272727, + "grad_norm": 0.6591887474060059, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 37980 + }, + { + "epoch": 107.92613636363636, + "grad_norm": 0.827599287033081, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 37990 + }, + { + "epoch": 107.95454545454545, + "grad_norm": 1.455817699432373, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 38000 + }, + { + "epoch": 107.98295454545455, + "grad_norm": 0.6563143730163574, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 38010 + }, + { + "epoch": 108.01136363636364, + "grad_norm": 0.7723172307014465, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 38020 + }, + { + "epoch": 108.03977272727273, + "grad_norm": 1.2289658784866333, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 38030 + }, + { + "epoch": 108.06818181818181, + "grad_norm": 0.977079451084137, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 38040 + }, + { + "epoch": 108.0965909090909, + "grad_norm": 0.7695228457450867, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 38050 + }, + { + "epoch": 108.125, + "grad_norm": 0.8520819544792175, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 38060 + }, + { + "epoch": 108.1534090909091, + "grad_norm": 0.6254966855049133, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 38070 + }, + { + "epoch": 108.18181818181819, + "grad_norm": 0.7900024056434631, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 38080 + }, + { + "epoch": 108.21022727272727, + "grad_norm": 0.7880393266677856, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 38090 + }, + { + "epoch": 108.23863636363636, + "grad_norm": 0.7509173154830933, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 38100 + }, + { + "epoch": 108.26704545454545, + "grad_norm": 0.5946815013885498, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 38110 + }, + { + "epoch": 108.29545454545455, + "grad_norm": 0.703536331653595, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 38120 + }, + { + "epoch": 108.32386363636364, + "grad_norm": 0.6964169144630432, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 38130 + }, + { + "epoch": 108.35227272727273, + "grad_norm": 0.778841495513916, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 38140 + }, + { + "epoch": 108.38068181818181, + "grad_norm": 0.8367446064949036, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 38150 + }, + { + "epoch": 108.4090909090909, + "grad_norm": 0.4970041811466217, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 38160 + }, + { + "epoch": 108.4375, + "grad_norm": 0.584697961807251, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 38170 + }, + { + "epoch": 108.4659090909091, + "grad_norm": 0.5434656143188477, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 38180 + }, + { + "epoch": 108.49431818181819, + "grad_norm": 0.8836389183998108, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 38190 + }, + { + "epoch": 108.52272727272727, + "grad_norm": 0.7299078702926636, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 38200 + }, + { + "epoch": 108.55113636363636, + "grad_norm": 0.6886661052703857, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 38210 + }, + { + "epoch": 108.57954545454545, + "grad_norm": 0.6602329611778259, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 38220 + }, + { + "epoch": 108.60795454545455, + "grad_norm": 0.5685455799102783, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 38230 + }, + { + "epoch": 108.63636363636364, + "grad_norm": 0.5193608403205872, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 38240 + }, + { + "epoch": 108.66477272727273, + "grad_norm": 0.4227427840232849, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 38250 + }, + { + "epoch": 108.69318181818181, + "grad_norm": 0.5614402890205383, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 38260 + }, + { + "epoch": 108.7215909090909, + "grad_norm": 0.4981783330440521, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 38270 + }, + { + "epoch": 108.75, + "grad_norm": 0.5758175849914551, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 38280 + }, + { + "epoch": 108.7784090909091, + "grad_norm": 0.5388387441635132, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 38290 + }, + { + "epoch": 108.80681818181819, + "grad_norm": 0.6330029964447021, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 38300 + }, + { + "epoch": 108.83522727272727, + "grad_norm": 0.6576165556907654, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 38310 + }, + { + "epoch": 108.86363636363636, + "grad_norm": 0.6450666189193726, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 38320 + }, + { + "epoch": 108.89204545454545, + "grad_norm": 0.5270466208457947, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 38330 + }, + { + "epoch": 108.92045454545455, + "grad_norm": 0.6218364834785461, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 38340 + }, + { + "epoch": 108.94886363636364, + "grad_norm": 0.7764090895652771, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 38350 + }, + { + "epoch": 108.97727272727273, + "grad_norm": 0.7030161619186401, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 38360 + }, + { + "epoch": 109.00568181818181, + "grad_norm": 0.9898335933685303, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 38370 + }, + { + "epoch": 109.0340909090909, + "grad_norm": 1.0687246322631836, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 38380 + }, + { + "epoch": 109.0625, + "grad_norm": 0.8941261768341064, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 38390 + }, + { + "epoch": 109.0909090909091, + "grad_norm": 0.9911275506019592, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 38400 + }, + { + "epoch": 109.11931818181819, + "grad_norm": 0.7970879077911377, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 38410 + }, + { + "epoch": 109.14772727272727, + "grad_norm": 0.5866600871086121, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 38420 + }, + { + "epoch": 109.17613636363636, + "grad_norm": 0.7543792128562927, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 38430 + }, + { + "epoch": 109.20454545454545, + "grad_norm": 0.9520198702812195, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 38440 + }, + { + "epoch": 109.23295454545455, + "grad_norm": 0.898628830909729, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 38450 + }, + { + "epoch": 109.26136363636364, + "grad_norm": 0.7924801707267761, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 38460 + }, + { + "epoch": 109.28977272727273, + "grad_norm": 0.6956962943077087, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 38470 + }, + { + "epoch": 109.31818181818181, + "grad_norm": 0.6093046069145203, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 38480 + }, + { + "epoch": 109.3465909090909, + "grad_norm": 0.8115942478179932, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 38490 + }, + { + "epoch": 109.375, + "grad_norm": 0.6574427485466003, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 38500 + }, + { + "epoch": 109.4034090909091, + "grad_norm": 0.48985546827316284, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 38510 + }, + { + "epoch": 109.43181818181819, + "grad_norm": 0.5606568455696106, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 38520 + }, + { + "epoch": 109.46022727272727, + "grad_norm": 0.5550665855407715, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 38530 + }, + { + "epoch": 109.48863636363636, + "grad_norm": 0.6146281361579895, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 38540 + }, + { + "epoch": 109.51704545454545, + "grad_norm": 0.5746130347251892, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 38550 + }, + { + "epoch": 109.54545454545455, + "grad_norm": 0.5612075328826904, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 38560 + }, + { + "epoch": 109.57386363636364, + "grad_norm": 0.623532235622406, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 38570 + }, + { + "epoch": 109.60227272727273, + "grad_norm": 0.5237706899642944, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 38580 + }, + { + "epoch": 109.63068181818181, + "grad_norm": 0.7601568102836609, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 38590 + }, + { + "epoch": 109.6590909090909, + "grad_norm": 0.7682206630706787, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 38600 + }, + { + "epoch": 109.6875, + "grad_norm": 0.6917203664779663, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 38610 + }, + { + "epoch": 109.7159090909091, + "grad_norm": 0.7227908372879028, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 38620 + }, + { + "epoch": 109.74431818181819, + "grad_norm": 0.6287977695465088, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 38630 + }, + { + "epoch": 109.77272727272727, + "grad_norm": 0.41634276509284973, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 38640 + }, + { + "epoch": 109.80113636363636, + "grad_norm": 0.5135223865509033, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 38650 + }, + { + "epoch": 109.82954545454545, + "grad_norm": 0.57159423828125, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 38660 + }, + { + "epoch": 109.85795454545455, + "grad_norm": 0.7393842935562134, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 38670 + }, + { + "epoch": 109.88636363636364, + "grad_norm": 0.8454589247703552, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 38680 + }, + { + "epoch": 109.91477272727273, + "grad_norm": 0.6690040230751038, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 38690 + }, + { + "epoch": 109.94318181818181, + "grad_norm": 1.1583824157714844, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 38700 + }, + { + "epoch": 109.9715909090909, + "grad_norm": 0.675502598285675, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 38710 + }, + { + "epoch": 110.0, + "grad_norm": 0.711791455745697, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 38720 + }, + { + "epoch": 110.0284090909091, + "grad_norm": 1.1118009090423584, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 38730 + }, + { + "epoch": 110.05681818181819, + "grad_norm": 1.0338175296783447, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 38740 + }, + { + "epoch": 110.08522727272727, + "grad_norm": 1.348634123802185, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 38750 + }, + { + "epoch": 110.11363636363636, + "grad_norm": 1.231924295425415, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 38760 + }, + { + "epoch": 110.14204545454545, + "grad_norm": 1.2100741863250732, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 38770 + }, + { + "epoch": 110.17045454545455, + "grad_norm": 0.7521945834159851, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 38780 + }, + { + "epoch": 110.19886363636364, + "grad_norm": 1.066770315170288, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 38790 + }, + { + "epoch": 110.22727272727273, + "grad_norm": 0.9602776765823364, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 38800 + }, + { + "epoch": 110.25568181818181, + "grad_norm": 1.193345069885254, + "learning_rate": 0.0001, + "loss": 0.0368, + "step": 38810 + }, + { + "epoch": 110.2840909090909, + "grad_norm": 0.9646350741386414, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 38820 + }, + { + "epoch": 110.3125, + "grad_norm": 1.0762289762496948, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 38830 + }, + { + "epoch": 110.3409090909091, + "grad_norm": 1.0231586694717407, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 38840 + }, + { + "epoch": 110.36931818181819, + "grad_norm": 0.8765637278556824, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 38850 + }, + { + "epoch": 110.39772727272727, + "grad_norm": 0.7653793692588806, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 38860 + }, + { + "epoch": 110.42613636363636, + "grad_norm": 0.7483477592468262, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 38870 + }, + { + "epoch": 110.45454545454545, + "grad_norm": 0.7942159175872803, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 38880 + }, + { + "epoch": 110.48295454545455, + "grad_norm": 0.7775100469589233, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 38890 + }, + { + "epoch": 110.51136363636364, + "grad_norm": 0.6128475666046143, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 38900 + }, + { + "epoch": 110.53977272727273, + "grad_norm": 0.6405518054962158, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 38910 + }, + { + "epoch": 110.56818181818181, + "grad_norm": 0.6473897695541382, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 38920 + }, + { + "epoch": 110.5965909090909, + "grad_norm": 0.8890331387519836, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 38930 + }, + { + "epoch": 110.625, + "grad_norm": 0.9068458676338196, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 38940 + }, + { + "epoch": 110.6534090909091, + "grad_norm": 0.8818080425262451, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 38950 + }, + { + "epoch": 110.68181818181819, + "grad_norm": 1.1605606079101562, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 38960 + }, + { + "epoch": 110.71022727272727, + "grad_norm": 0.8575087785720825, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 38970 + }, + { + "epoch": 110.73863636363636, + "grad_norm": 0.68806391954422, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 38980 + }, + { + "epoch": 110.76704545454545, + "grad_norm": 0.7244641184806824, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 38990 + }, + { + "epoch": 110.79545454545455, + "grad_norm": 0.7463381886482239, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 39000 + }, + { + "epoch": 110.82386363636364, + "grad_norm": 0.6760275959968567, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 39010 + }, + { + "epoch": 110.85227272727273, + "grad_norm": 0.6048280000686646, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 39020 + }, + { + "epoch": 110.88068181818181, + "grad_norm": 0.6237334609031677, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 39030 + }, + { + "epoch": 110.9090909090909, + "grad_norm": 0.5601615309715271, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 39040 + }, + { + "epoch": 110.9375, + "grad_norm": 0.657729983329773, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 39050 + }, + { + "epoch": 110.9659090909091, + "grad_norm": 0.7096065282821655, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 39060 + }, + { + "epoch": 110.99431818181819, + "grad_norm": 0.6007034182548523, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 39070 + }, + { + "epoch": 111.02272727272727, + "grad_norm": 0.675838053226471, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 39080 + }, + { + "epoch": 111.05113636363636, + "grad_norm": 0.6892589330673218, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 39090 + }, + { + "epoch": 111.07954545454545, + "grad_norm": 0.8822354674339294, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 39100 + }, + { + "epoch": 111.10795454545455, + "grad_norm": 0.7115966081619263, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 39110 + }, + { + "epoch": 111.13636363636364, + "grad_norm": 0.7949591279029846, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 39120 + }, + { + "epoch": 111.16477272727273, + "grad_norm": 0.8462267518043518, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 39130 + }, + { + "epoch": 111.19318181818181, + "grad_norm": 0.7834585905075073, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 39140 + }, + { + "epoch": 111.2215909090909, + "grad_norm": 0.7656166553497314, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 39150 + }, + { + "epoch": 111.25, + "grad_norm": 0.599009096622467, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 39160 + }, + { + "epoch": 111.2784090909091, + "grad_norm": 0.8071345090866089, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 39170 + }, + { + "epoch": 111.30681818181819, + "grad_norm": 0.6010581254959106, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 39180 + }, + { + "epoch": 111.33522727272727, + "grad_norm": 0.6166114807128906, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 39190 + }, + { + "epoch": 111.36363636363636, + "grad_norm": 0.5775583982467651, + "learning_rate": 0.0001, + "loss": 0.0368, + "step": 39200 + }, + { + "epoch": 111.39204545454545, + "grad_norm": 0.5263673067092896, + "learning_rate": 0.0001, + "loss": 0.0368, + "step": 39210 + }, + { + "epoch": 111.42045454545455, + "grad_norm": 1.2878767251968384, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 39220 + }, + { + "epoch": 111.44886363636364, + "grad_norm": 1.5461721420288086, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 39230 + }, + { + "epoch": 111.47727272727273, + "grad_norm": 1.5255823135375977, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 39240 + }, + { + "epoch": 111.50568181818181, + "grad_norm": 1.0932375192642212, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 39250 + }, + { + "epoch": 111.5340909090909, + "grad_norm": 0.901646614074707, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 39260 + }, + { + "epoch": 111.5625, + "grad_norm": 0.6453569531440735, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 39270 + }, + { + "epoch": 111.5909090909091, + "grad_norm": 0.5468852519989014, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 39280 + }, + { + "epoch": 111.61931818181819, + "grad_norm": 0.7907230257987976, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 39290 + }, + { + "epoch": 111.64772727272727, + "grad_norm": 0.8564003109931946, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 39300 + }, + { + "epoch": 111.67613636363636, + "grad_norm": 0.9764010310173035, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 39310 + }, + { + "epoch": 111.70454545454545, + "grad_norm": 0.8942014575004578, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 39320 + }, + { + "epoch": 111.73295454545455, + "grad_norm": 0.5502811074256897, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 39330 + }, + { + "epoch": 111.76136363636364, + "grad_norm": 0.8766093850135803, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 39340 + }, + { + "epoch": 111.78977272727273, + "grad_norm": 0.98386150598526, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 39350 + }, + { + "epoch": 111.81818181818181, + "grad_norm": 0.9076448678970337, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 39360 + }, + { + "epoch": 111.8465909090909, + "grad_norm": 0.9458385109901428, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 39370 + }, + { + "epoch": 111.875, + "grad_norm": 0.7242578864097595, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 39380 + }, + { + "epoch": 111.9034090909091, + "grad_norm": 0.9158487915992737, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 39390 + }, + { + "epoch": 111.93181818181819, + "grad_norm": 0.7818379402160645, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 39400 + }, + { + "epoch": 111.96022727272727, + "grad_norm": 0.8776116967201233, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 39410 + }, + { + "epoch": 111.98863636363636, + "grad_norm": 0.8241115808486938, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 39420 + }, + { + "epoch": 112.01704545454545, + "grad_norm": 0.5597853660583496, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 39430 + }, + { + "epoch": 112.04545454545455, + "grad_norm": 0.6248016953468323, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 39440 + }, + { + "epoch": 112.07386363636364, + "grad_norm": 0.5273939967155457, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 39450 + }, + { + "epoch": 112.10227272727273, + "grad_norm": 0.4962223172187805, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 39460 + }, + { + "epoch": 112.13068181818181, + "grad_norm": 0.6138328313827515, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 39470 + }, + { + "epoch": 112.1590909090909, + "grad_norm": 0.4526226222515106, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 39480 + }, + { + "epoch": 112.1875, + "grad_norm": 0.5858993530273438, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 39490 + }, + { + "epoch": 112.2159090909091, + "grad_norm": 0.6115566492080688, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 39500 + }, + { + "epoch": 112.24431818181819, + "grad_norm": 0.5232250094413757, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 39510 + }, + { + "epoch": 112.27272727272727, + "grad_norm": 0.5535911321640015, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 39520 + }, + { + "epoch": 112.30113636363636, + "grad_norm": 0.7334476113319397, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 39530 + }, + { + "epoch": 112.32954545454545, + "grad_norm": 0.569206178188324, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 39540 + }, + { + "epoch": 112.35795454545455, + "grad_norm": 0.9461883902549744, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 39550 + }, + { + "epoch": 112.38636363636364, + "grad_norm": 0.8097552061080933, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 39560 + }, + { + "epoch": 112.41477272727273, + "grad_norm": 0.6988733410835266, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 39570 + }, + { + "epoch": 112.44318181818181, + "grad_norm": 0.7010588645935059, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 39580 + }, + { + "epoch": 112.4715909090909, + "grad_norm": 0.6968778967857361, + "learning_rate": 0.0001, + "loss": 0.0368, + "step": 39590 + }, + { + "epoch": 112.5, + "grad_norm": 0.7270570397377014, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 39600 + }, + { + "epoch": 112.5284090909091, + "grad_norm": 0.5744696855545044, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 39610 + }, + { + "epoch": 112.55681818181819, + "grad_norm": 0.6304721832275391, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 39620 + }, + { + "epoch": 112.58522727272727, + "grad_norm": 0.6305207014083862, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 39630 + }, + { + "epoch": 112.61363636363636, + "grad_norm": 0.7027773261070251, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 39640 + }, + { + "epoch": 112.64204545454545, + "grad_norm": 0.6099282503128052, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 39650 + }, + { + "epoch": 112.67045454545455, + "grad_norm": 1.234322428703308, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 39660 + }, + { + "epoch": 112.69886363636364, + "grad_norm": 1.157665729522705, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 39670 + }, + { + "epoch": 112.72727272727273, + "grad_norm": 1.371219277381897, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 39680 + }, + { + "epoch": 112.75568181818181, + "grad_norm": 1.147692322731018, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 39690 + }, + { + "epoch": 112.7840909090909, + "grad_norm": 0.7648851871490479, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 39700 + }, + { + "epoch": 112.8125, + "grad_norm": 0.6350628733634949, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 39710 + }, + { + "epoch": 112.8409090909091, + "grad_norm": 0.6837986707687378, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 39720 + }, + { + "epoch": 112.86931818181819, + "grad_norm": 0.6625135540962219, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 39730 + }, + { + "epoch": 112.89772727272727, + "grad_norm": 0.8770467638969421, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 39740 + }, + { + "epoch": 112.92613636363636, + "grad_norm": 0.8206093311309814, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 39750 + }, + { + "epoch": 112.95454545454545, + "grad_norm": 0.8660224080085754, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 39760 + }, + { + "epoch": 112.98295454545455, + "grad_norm": 0.6569976806640625, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 39770 + }, + { + "epoch": 113.01136363636364, + "grad_norm": 0.9233992099761963, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 39780 + }, + { + "epoch": 113.03977272727273, + "grad_norm": 0.8801051378250122, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 39790 + }, + { + "epoch": 113.06818181818181, + "grad_norm": 0.773917019367218, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 39800 + }, + { + "epoch": 113.0965909090909, + "grad_norm": 0.7063732147216797, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 39810 + }, + { + "epoch": 113.125, + "grad_norm": 0.7631141543388367, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 39820 + }, + { + "epoch": 113.1534090909091, + "grad_norm": 0.7640942931175232, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 39830 + }, + { + "epoch": 113.18181818181819, + "grad_norm": 0.6806530356407166, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 39840 + }, + { + "epoch": 113.21022727272727, + "grad_norm": 0.627130389213562, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 39850 + }, + { + "epoch": 113.23863636363636, + "grad_norm": 0.4664798378944397, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 39860 + }, + { + "epoch": 113.26704545454545, + "grad_norm": 0.6047282814979553, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 39870 + }, + { + "epoch": 113.29545454545455, + "grad_norm": 0.8172615170478821, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 39880 + }, + { + "epoch": 113.32386363636364, + "grad_norm": 0.7047287821769714, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 39890 + }, + { + "epoch": 113.35227272727273, + "grad_norm": 0.6957646012306213, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 39900 + }, + { + "epoch": 113.38068181818181, + "grad_norm": 0.7700342535972595, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 39910 + }, + { + "epoch": 113.4090909090909, + "grad_norm": 0.6716678738594055, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 39920 + }, + { + "epoch": 113.4375, + "grad_norm": 0.6276105046272278, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 39930 + }, + { + "epoch": 113.4659090909091, + "grad_norm": 0.7621968388557434, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 39940 + }, + { + "epoch": 113.49431818181819, + "grad_norm": 0.5619763135910034, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 39950 + }, + { + "epoch": 113.52272727272727, + "grad_norm": 0.5187546014785767, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 39960 + }, + { + "epoch": 113.55113636363636, + "grad_norm": 0.7270740866661072, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 39970 + }, + { + "epoch": 113.57954545454545, + "grad_norm": 0.49849000573158264, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 39980 + }, + { + "epoch": 113.60795454545455, + "grad_norm": 1.0504893064498901, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 39990 + }, + { + "epoch": 113.63636363636364, + "grad_norm": 0.6417130827903748, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 40000 + }, + { + "epoch": 113.66477272727273, + "grad_norm": 0.6625685691833496, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 40010 + }, + { + "epoch": 113.69318181818181, + "grad_norm": 0.6271093487739563, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 40020 + }, + { + "epoch": 113.7215909090909, + "grad_norm": 0.6028963923454285, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 40030 + }, + { + "epoch": 113.75, + "grad_norm": 0.5805331468582153, + "learning_rate": 0.0001, + "loss": 0.0368, + "step": 40040 + }, + { + "epoch": 113.7784090909091, + "grad_norm": 0.594214677810669, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 40050 + }, + { + "epoch": 113.80681818181819, + "grad_norm": 0.6908999681472778, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 40060 + }, + { + "epoch": 113.83522727272727, + "grad_norm": 0.53608238697052, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 40070 + }, + { + "epoch": 113.86363636363636, + "grad_norm": 0.4885925352573395, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 40080 + }, + { + "epoch": 113.89204545454545, + "grad_norm": 0.5205367803573608, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 40090 + }, + { + "epoch": 113.92045454545455, + "grad_norm": 0.5592257976531982, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 40100 + }, + { + "epoch": 113.94886363636364, + "grad_norm": 0.5714760422706604, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 40110 + }, + { + "epoch": 113.97727272727273, + "grad_norm": 0.6751970648765564, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 40120 + }, + { + "epoch": 114.00568181818181, + "grad_norm": 0.6371946334838867, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 40130 + }, + { + "epoch": 114.0340909090909, + "grad_norm": 0.6978771686553955, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 40140 + }, + { + "epoch": 114.0625, + "grad_norm": 0.598162829875946, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 40150 + }, + { + "epoch": 114.0909090909091, + "grad_norm": 0.6158064007759094, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 40160 + }, + { + "epoch": 114.11931818181819, + "grad_norm": 0.5208731889724731, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 40170 + }, + { + "epoch": 114.14772727272727, + "grad_norm": 0.5302230715751648, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 40180 + }, + { + "epoch": 114.17613636363636, + "grad_norm": 0.5512319803237915, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 40190 + }, + { + "epoch": 114.20454545454545, + "grad_norm": 0.43418312072753906, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 40200 + }, + { + "epoch": 114.23295454545455, + "grad_norm": 0.5973665118217468, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 40210 + }, + { + "epoch": 114.26136363636364, + "grad_norm": 0.5855032801628113, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 40220 + }, + { + "epoch": 114.28977272727273, + "grad_norm": 0.5427087545394897, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 40230 + }, + { + "epoch": 114.31818181818181, + "grad_norm": 0.7128482460975647, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 40240 + }, + { + "epoch": 114.3465909090909, + "grad_norm": 0.4120272696018219, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 40250 + }, + { + "epoch": 114.375, + "grad_norm": 0.4165058135986328, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 40260 + }, + { + "epoch": 114.4034090909091, + "grad_norm": 0.4229552447795868, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 40270 + }, + { + "epoch": 114.43181818181819, + "grad_norm": 0.526804506778717, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 40280 + }, + { + "epoch": 114.46022727272727, + "grad_norm": 0.5471256971359253, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 40290 + }, + { + "epoch": 114.48863636363636, + "grad_norm": 0.7126163840293884, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 40300 + }, + { + "epoch": 114.51704545454545, + "grad_norm": 0.5300715565681458, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 40310 + }, + { + "epoch": 114.54545454545455, + "grad_norm": 0.5716768503189087, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 40320 + }, + { + "epoch": 114.57386363636364, + "grad_norm": 0.5660290122032166, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 40330 + }, + { + "epoch": 114.60227272727273, + "grad_norm": 0.4344553053379059, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 40340 + }, + { + "epoch": 114.63068181818181, + "grad_norm": 0.5412406921386719, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 40350 + }, + { + "epoch": 114.6590909090909, + "grad_norm": 0.6711289286613464, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 40360 + }, + { + "epoch": 114.6875, + "grad_norm": 0.783184289932251, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 40370 + }, + { + "epoch": 114.7159090909091, + "grad_norm": 0.819743812084198, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 40380 + }, + { + "epoch": 114.74431818181819, + "grad_norm": 0.7587737441062927, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 40390 + }, + { + "epoch": 114.77272727272727, + "grad_norm": 0.6675707101821899, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 40400 + }, + { + "epoch": 114.80113636363636, + "grad_norm": 0.5723679065704346, + "learning_rate": 0.0001, + "loss": 0.0368, + "step": 40410 + }, + { + "epoch": 114.82954545454545, + "grad_norm": 0.6985281109809875, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 40420 + }, + { + "epoch": 114.85795454545455, + "grad_norm": 1.0145719051361084, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 40430 + }, + { + "epoch": 114.88636363636364, + "grad_norm": 1.0415736436843872, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 40440 + }, + { + "epoch": 114.91477272727273, + "grad_norm": 0.74834805727005, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 40450 + }, + { + "epoch": 114.94318181818181, + "grad_norm": 1.022322654724121, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 40460 + }, + { + "epoch": 114.9715909090909, + "grad_norm": 0.8849312663078308, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 40470 + }, + { + "epoch": 115.0, + "grad_norm": 0.602628231048584, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 40480 + }, + { + "epoch": 115.0284090909091, + "grad_norm": 0.8007441163063049, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 40490 + }, + { + "epoch": 115.05681818181819, + "grad_norm": 0.8990688323974609, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 40500 + }, + { + "epoch": 115.08522727272727, + "grad_norm": 0.8828384876251221, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 40510 + }, + { + "epoch": 115.11363636363636, + "grad_norm": 1.0378295183181763, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 40520 + }, + { + "epoch": 115.14204545454545, + "grad_norm": 1.0264496803283691, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 40530 + }, + { + "epoch": 115.17045454545455, + "grad_norm": 1.1999590396881104, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 40540 + }, + { + "epoch": 115.19886363636364, + "grad_norm": 0.8743994832038879, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 40550 + }, + { + "epoch": 115.22727272727273, + "grad_norm": 0.5800889730453491, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 40560 + }, + { + "epoch": 115.25568181818181, + "grad_norm": 0.8396671414375305, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 40570 + }, + { + "epoch": 115.2840909090909, + "grad_norm": 0.8173549175262451, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 40580 + }, + { + "epoch": 115.3125, + "grad_norm": 0.7395663857460022, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 40590 + }, + { + "epoch": 115.3409090909091, + "grad_norm": 0.6188920140266418, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 40600 + }, + { + "epoch": 115.36931818181819, + "grad_norm": 0.5487187504768372, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 40610 + }, + { + "epoch": 115.39772727272727, + "grad_norm": 0.5410171151161194, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 40620 + }, + { + "epoch": 115.42613636363636, + "grad_norm": 0.5235713124275208, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 40630 + }, + { + "epoch": 115.45454545454545, + "grad_norm": 0.7061126232147217, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 40640 + }, + { + "epoch": 115.48295454545455, + "grad_norm": 0.7242769598960876, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 40650 + }, + { + "epoch": 115.51136363636364, + "grad_norm": 0.6157847046852112, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 40660 + }, + { + "epoch": 115.53977272727273, + "grad_norm": 0.6777865886688232, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 40670 + }, + { + "epoch": 115.56818181818181, + "grad_norm": 0.5488711595535278, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 40680 + }, + { + "epoch": 115.5965909090909, + "grad_norm": 0.7855844497680664, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 40690 + }, + { + "epoch": 115.625, + "grad_norm": 0.640532910823822, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 40700 + }, + { + "epoch": 115.6534090909091, + "grad_norm": 0.5868560671806335, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 40710 + }, + { + "epoch": 115.68181818181819, + "grad_norm": 0.8778387308120728, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 40720 + }, + { + "epoch": 115.71022727272727, + "grad_norm": 0.6132923364639282, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 40730 + }, + { + "epoch": 115.73863636363636, + "grad_norm": 0.6299883127212524, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 40740 + }, + { + "epoch": 115.76704545454545, + "grad_norm": 0.6556488871574402, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 40750 + }, + { + "epoch": 115.79545454545455, + "grad_norm": 0.7025827169418335, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 40760 + }, + { + "epoch": 115.82386363636364, + "grad_norm": 0.49530184268951416, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 40770 + }, + { + "epoch": 115.85227272727273, + "grad_norm": 0.6061105728149414, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 40780 + }, + { + "epoch": 115.88068181818181, + "grad_norm": 0.5180947184562683, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 40790 + }, + { + "epoch": 115.9090909090909, + "grad_norm": 0.584722101688385, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 40800 + }, + { + "epoch": 115.9375, + "grad_norm": 0.5374404788017273, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 40810 + }, + { + "epoch": 115.9659090909091, + "grad_norm": 0.5954856276512146, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 40820 + }, + { + "epoch": 115.99431818181819, + "grad_norm": 0.5415761470794678, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 40830 + }, + { + "epoch": 116.02272727272727, + "grad_norm": 0.6761354804039001, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 40840 + }, + { + "epoch": 116.05113636363636, + "grad_norm": 0.5434542894363403, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 40850 + }, + { + "epoch": 116.07954545454545, + "grad_norm": 0.67738276720047, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 40860 + }, + { + "epoch": 116.10795454545455, + "grad_norm": 0.5063143968582153, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 40870 + }, + { + "epoch": 116.13636363636364, + "grad_norm": 0.642759382724762, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 40880 + }, + { + "epoch": 116.16477272727273, + "grad_norm": 0.6486865878105164, + "learning_rate": 0.0001, + "loss": 0.0368, + "step": 40890 + }, + { + "epoch": 116.19318181818181, + "grad_norm": 0.6188929080963135, + "learning_rate": 0.0001, + "loss": 0.0401, + "step": 40900 + }, + { + "epoch": 116.2215909090909, + "grad_norm": 0.5677132606506348, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 40910 + }, + { + "epoch": 116.25, + "grad_norm": 0.583200991153717, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 40920 + }, + { + "epoch": 116.2784090909091, + "grad_norm": 0.6927564740180969, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 40930 + }, + { + "epoch": 116.30681818181819, + "grad_norm": 0.5694494247436523, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 40940 + }, + { + "epoch": 116.33522727272727, + "grad_norm": 0.6229391098022461, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 40950 + }, + { + "epoch": 116.36363636363636, + "grad_norm": 0.6101709008216858, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 40960 + }, + { + "epoch": 116.39204545454545, + "grad_norm": 0.5544459819793701, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 40970 + }, + { + "epoch": 116.42045454545455, + "grad_norm": 0.6003863215446472, + "learning_rate": 0.0001, + "loss": 0.0368, + "step": 40980 + }, + { + "epoch": 116.44886363636364, + "grad_norm": 0.735579788684845, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 40990 + }, + { + "epoch": 116.47727272727273, + "grad_norm": 0.5912768244743347, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 41000 + }, + { + "epoch": 116.50568181818181, + "grad_norm": 0.5649675726890564, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 41010 + }, + { + "epoch": 116.5340909090909, + "grad_norm": 0.9016214609146118, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 41020 + }, + { + "epoch": 116.5625, + "grad_norm": 0.6981037259101868, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 41030 + }, + { + "epoch": 116.5909090909091, + "grad_norm": 0.7360560297966003, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 41040 + }, + { + "epoch": 116.61931818181819, + "grad_norm": 0.9419438242912292, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 41050 + }, + { + "epoch": 116.64772727272727, + "grad_norm": 0.7652466297149658, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 41060 + }, + { + "epoch": 116.67613636363636, + "grad_norm": 0.8323368430137634, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 41070 + }, + { + "epoch": 116.70454545454545, + "grad_norm": 0.625653088092804, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 41080 + }, + { + "epoch": 116.73295454545455, + "grad_norm": 0.8039312362670898, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 41090 + }, + { + "epoch": 116.76136363636364, + "grad_norm": 0.7268533706665039, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 41100 + }, + { + "epoch": 116.78977272727273, + "grad_norm": 0.7572269439697266, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 41110 + }, + { + "epoch": 116.81818181818181, + "grad_norm": 0.695713460445404, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 41120 + }, + { + "epoch": 116.8465909090909, + "grad_norm": 0.7137623429298401, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 41130 + }, + { + "epoch": 116.875, + "grad_norm": 0.6217263340950012, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 41140 + }, + { + "epoch": 116.9034090909091, + "grad_norm": 0.6812773942947388, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 41150 + }, + { + "epoch": 116.93181818181819, + "grad_norm": 0.6269494891166687, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 41160 + }, + { + "epoch": 116.96022727272727, + "grad_norm": 0.5813413262367249, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 41170 + }, + { + "epoch": 116.98863636363636, + "grad_norm": 0.47251439094543457, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 41180 + }, + { + "epoch": 117.01704545454545, + "grad_norm": 0.82036954164505, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 41190 + }, + { + "epoch": 117.04545454545455, + "grad_norm": 0.8919968605041504, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 41200 + }, + { + "epoch": 117.07386363636364, + "grad_norm": 0.5334138870239258, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 41210 + }, + { + "epoch": 117.10227272727273, + "grad_norm": 0.4660698473453522, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 41220 + }, + { + "epoch": 117.13068181818181, + "grad_norm": 0.6906890869140625, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 41230 + }, + { + "epoch": 117.1590909090909, + "grad_norm": 0.8132134675979614, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 41240 + }, + { + "epoch": 117.1875, + "grad_norm": 0.8870444297790527, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 41250 + }, + { + "epoch": 117.2159090909091, + "grad_norm": 0.7599701285362244, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 41260 + }, + { + "epoch": 117.24431818181819, + "grad_norm": 0.8724075555801392, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 41270 + }, + { + "epoch": 117.27272727272727, + "grad_norm": 0.6574482917785645, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 41280 + }, + { + "epoch": 117.30113636363636, + "grad_norm": 0.7404472231864929, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 41290 + }, + { + "epoch": 117.32954545454545, + "grad_norm": 0.8209742903709412, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 41300 + }, + { + "epoch": 117.35795454545455, + "grad_norm": 1.0132182836532593, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 41310 + }, + { + "epoch": 117.38636363636364, + "grad_norm": 1.2387603521347046, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 41320 + }, + { + "epoch": 117.41477272727273, + "grad_norm": 1.3428541421890259, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 41330 + }, + { + "epoch": 117.44318181818181, + "grad_norm": 1.3659002780914307, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 41340 + }, + { + "epoch": 117.4715909090909, + "grad_norm": 1.2876734733581543, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 41350 + }, + { + "epoch": 117.5, + "grad_norm": 1.0242540836334229, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 41360 + }, + { + "epoch": 117.5284090909091, + "grad_norm": 0.6913342475891113, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 41370 + }, + { + "epoch": 117.55681818181819, + "grad_norm": 0.7995432019233704, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 41380 + }, + { + "epoch": 117.58522727272727, + "grad_norm": 0.8866602182388306, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 41390 + }, + { + "epoch": 117.61363636363636, + "grad_norm": 0.6081330180168152, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 41400 + }, + { + "epoch": 117.64204545454545, + "grad_norm": 0.7164961099624634, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 41410 + }, + { + "epoch": 117.67045454545455, + "grad_norm": 0.6436015963554382, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 41420 + }, + { + "epoch": 117.69886363636364, + "grad_norm": 0.7437272667884827, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 41430 + }, + { + "epoch": 117.72727272727273, + "grad_norm": 0.723030686378479, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 41440 + }, + { + "epoch": 117.75568181818181, + "grad_norm": 0.7317346334457397, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 41450 + }, + { + "epoch": 117.7840909090909, + "grad_norm": 0.7966662049293518, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 41460 + }, + { + "epoch": 117.8125, + "grad_norm": 0.6614232659339905, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 41470 + }, + { + "epoch": 117.8409090909091, + "grad_norm": 0.7168434858322144, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 41480 + }, + { + "epoch": 117.86931818181819, + "grad_norm": 0.5962741374969482, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 41490 + }, + { + "epoch": 117.89772727272727, + "grad_norm": 0.6524445414543152, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 41500 + }, + { + "epoch": 117.92613636363636, + "grad_norm": 0.4581736624240875, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 41510 + }, + { + "epoch": 117.95454545454545, + "grad_norm": 0.6047722101211548, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 41520 + }, + { + "epoch": 117.98295454545455, + "grad_norm": 0.9901228547096252, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 41530 + }, + { + "epoch": 118.01136363636364, + "grad_norm": 0.8791798949241638, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 41540 + }, + { + "epoch": 118.03977272727273, + "grad_norm": 0.727450966835022, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 41550 + }, + { + "epoch": 118.06818181818181, + "grad_norm": 0.638188898563385, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 41560 + }, + { + "epoch": 118.0965909090909, + "grad_norm": 0.7026476263999939, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 41570 + }, + { + "epoch": 118.125, + "grad_norm": 0.7015557885169983, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 41580 + }, + { + "epoch": 118.1534090909091, + "grad_norm": 0.7547580599784851, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 41590 + }, + { + "epoch": 118.18181818181819, + "grad_norm": 0.6024928092956543, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 41600 + }, + { + "epoch": 118.21022727272727, + "grad_norm": 0.7490041255950928, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 41610 + }, + { + "epoch": 118.23863636363636, + "grad_norm": 0.540759265422821, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 41620 + }, + { + "epoch": 118.26704545454545, + "grad_norm": 0.6325246691703796, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 41630 + }, + { + "epoch": 118.29545454545455, + "grad_norm": 0.8828673362731934, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 41640 + }, + { + "epoch": 118.32386363636364, + "grad_norm": 1.1287953853607178, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 41650 + }, + { + "epoch": 118.35227272727273, + "grad_norm": 1.034847617149353, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 41660 + }, + { + "epoch": 118.38068181818181, + "grad_norm": 0.6673224568367004, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 41670 + }, + { + "epoch": 118.4090909090909, + "grad_norm": 0.5625375509262085, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 41680 + }, + { + "epoch": 118.4375, + "grad_norm": 0.759629487991333, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 41690 + }, + { + "epoch": 118.4659090909091, + "grad_norm": 0.9632283449172974, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 41700 + }, + { + "epoch": 118.49431818181819, + "grad_norm": 0.9728202819824219, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 41710 + }, + { + "epoch": 118.52272727272727, + "grad_norm": 0.6623541712760925, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 41720 + }, + { + "epoch": 118.55113636363636, + "grad_norm": 0.5848730206489563, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 41730 + }, + { + "epoch": 118.57954545454545, + "grad_norm": 0.5939080715179443, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 41740 + }, + { + "epoch": 118.60795454545455, + "grad_norm": 0.4702250063419342, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 41750 + }, + { + "epoch": 118.63636363636364, + "grad_norm": 0.6767942905426025, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 41760 + }, + { + "epoch": 118.66477272727273, + "grad_norm": 0.7504387497901917, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 41770 + }, + { + "epoch": 118.69318181818181, + "grad_norm": 0.5079829096794128, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 41780 + }, + { + "epoch": 118.7215909090909, + "grad_norm": 0.5891208648681641, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 41790 + }, + { + "epoch": 118.75, + "grad_norm": 0.8426125049591064, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 41800 + }, + { + "epoch": 118.7784090909091, + "grad_norm": 0.6993535757064819, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 41810 + }, + { + "epoch": 118.80681818181819, + "grad_norm": 0.8501553535461426, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 41820 + }, + { + "epoch": 118.83522727272727, + "grad_norm": 0.7962374091148376, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 41830 + }, + { + "epoch": 118.86363636363636, + "grad_norm": 0.6130684018135071, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 41840 + }, + { + "epoch": 118.89204545454545, + "grad_norm": 0.6343328952789307, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 41850 + }, + { + "epoch": 118.92045454545455, + "grad_norm": 0.5049582123756409, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 41860 + }, + { + "epoch": 118.94886363636364, + "grad_norm": 0.6091402769088745, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 41870 + }, + { + "epoch": 118.97727272727273, + "grad_norm": 0.5600488781929016, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 41880 + }, + { + "epoch": 119.00568181818181, + "grad_norm": 0.6329433917999268, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 41890 + }, + { + "epoch": 119.0340909090909, + "grad_norm": 0.589053213596344, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 41900 + }, + { + "epoch": 119.0625, + "grad_norm": 0.456606924533844, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 41910 + }, + { + "epoch": 119.0909090909091, + "grad_norm": 0.5378241539001465, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 41920 + }, + { + "epoch": 119.11931818181819, + "grad_norm": 0.6176024675369263, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 41930 + }, + { + "epoch": 119.14772727272727, + "grad_norm": 0.50550776720047, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 41940 + }, + { + "epoch": 119.17613636363636, + "grad_norm": 0.5080638527870178, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 41950 + }, + { + "epoch": 119.20454545454545, + "grad_norm": 0.5049692392349243, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 41960 + }, + { + "epoch": 119.23295454545455, + "grad_norm": 0.5414020419120789, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 41970 + }, + { + "epoch": 119.26136363636364, + "grad_norm": 0.592162549495697, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 41980 + }, + { + "epoch": 119.28977272727273, + "grad_norm": 1.7480474710464478, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 41990 + }, + { + "epoch": 119.31818181818181, + "grad_norm": 1.2184784412384033, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 42000 + }, + { + "epoch": 119.3465909090909, + "grad_norm": 1.2475852966308594, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 42010 + }, + { + "epoch": 119.375, + "grad_norm": 1.83725905418396, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 42020 + }, + { + "epoch": 119.4034090909091, + "grad_norm": 1.1545556783676147, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 42030 + }, + { + "epoch": 119.43181818181819, + "grad_norm": 1.2570842504501343, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 42040 + }, + { + "epoch": 119.46022727272727, + "grad_norm": 1.244055151939392, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 42050 + }, + { + "epoch": 119.48863636363636, + "grad_norm": 0.766913652420044, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 42060 + }, + { + "epoch": 119.51704545454545, + "grad_norm": 0.6588661074638367, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 42070 + }, + { + "epoch": 119.54545454545455, + "grad_norm": 0.5511944890022278, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 42080 + }, + { + "epoch": 119.57386363636364, + "grad_norm": 0.7311079502105713, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 42090 + }, + { + "epoch": 119.60227272727273, + "grad_norm": 0.6783902049064636, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 42100 + }, + { + "epoch": 119.63068181818181, + "grad_norm": 0.7213060855865479, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 42110 + }, + { + "epoch": 119.6590909090909, + "grad_norm": 0.6389514207839966, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 42120 + }, + { + "epoch": 119.6875, + "grad_norm": 0.6836166977882385, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 42130 + }, + { + "epoch": 119.7159090909091, + "grad_norm": 0.6873879432678223, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 42140 + }, + { + "epoch": 119.74431818181819, + "grad_norm": 0.6492112278938293, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 42150 + }, + { + "epoch": 119.77272727272727, + "grad_norm": 0.6663733720779419, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 42160 + }, + { + "epoch": 119.80113636363636, + "grad_norm": 1.1575038433074951, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 42170 + }, + { + "epoch": 119.82954545454545, + "grad_norm": 0.878379225730896, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 42180 + }, + { + "epoch": 119.85795454545455, + "grad_norm": 0.6646192073822021, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 42190 + }, + { + "epoch": 119.88636363636364, + "grad_norm": 0.7406749129295349, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 42200 + }, + { + "epoch": 119.91477272727273, + "grad_norm": 0.6425890326499939, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 42210 + }, + { + "epoch": 119.94318181818181, + "grad_norm": 0.8351615071296692, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 42220 + }, + { + "epoch": 119.9715909090909, + "grad_norm": 0.6505674719810486, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 42230 + }, + { + "epoch": 120.0, + "grad_norm": 0.6099982857704163, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 42240 + }, + { + "epoch": 120.0284090909091, + "grad_norm": 0.604333758354187, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 42250 + }, + { + "epoch": 120.05681818181819, + "grad_norm": 0.5962154269218445, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 42260 + }, + { + "epoch": 120.08522727272727, + "grad_norm": 0.5833165645599365, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 42270 + }, + { + "epoch": 120.11363636363636, + "grad_norm": 0.5957491397857666, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 42280 + }, + { + "epoch": 120.14204545454545, + "grad_norm": 0.6122380495071411, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 42290 + }, + { + "epoch": 120.17045454545455, + "grad_norm": 0.5932005643844604, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 42300 + }, + { + "epoch": 120.19886363636364, + "grad_norm": 0.5959727168083191, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 42310 + }, + { + "epoch": 120.22727272727273, + "grad_norm": 0.5512206554412842, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 42320 + }, + { + "epoch": 120.25568181818181, + "grad_norm": 0.535960853099823, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 42330 + }, + { + "epoch": 120.2840909090909, + "grad_norm": 0.5918856263160706, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 42340 + }, + { + "epoch": 120.3125, + "grad_norm": 0.5420950055122375, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 42350 + }, + { + "epoch": 120.3409090909091, + "grad_norm": 0.5215405225753784, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 42360 + }, + { + "epoch": 120.36931818181819, + "grad_norm": 0.48384472727775574, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 42370 + }, + { + "epoch": 120.39772727272727, + "grad_norm": 0.40733209252357483, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 42380 + }, + { + "epoch": 120.42613636363636, + "grad_norm": 0.5102291703224182, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 42390 + }, + { + "epoch": 120.45454545454545, + "grad_norm": 0.7866867184638977, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 42400 + }, + { + "epoch": 120.48295454545455, + "grad_norm": 1.2717626094818115, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 42410 + }, + { + "epoch": 120.51136363636364, + "grad_norm": 1.0195508003234863, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 42420 + }, + { + "epoch": 120.53977272727273, + "grad_norm": 0.6073787212371826, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 42430 + }, + { + "epoch": 120.56818181818181, + "grad_norm": 0.6645169854164124, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 42440 + }, + { + "epoch": 120.5965909090909, + "grad_norm": 0.9627057313919067, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 42450 + }, + { + "epoch": 120.625, + "grad_norm": 0.8808974623680115, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 42460 + }, + { + "epoch": 120.6534090909091, + "grad_norm": 0.8835111856460571, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 42470 + }, + { + "epoch": 120.68181818181819, + "grad_norm": 0.6539061665534973, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 42480 + }, + { + "epoch": 120.71022727272727, + "grad_norm": 0.7671836018562317, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 42490 + }, + { + "epoch": 120.73863636363636, + "grad_norm": 0.6868611574172974, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 42500 + }, + { + "epoch": 120.76704545454545, + "grad_norm": 0.613646388053894, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 42510 + }, + { + "epoch": 120.79545454545455, + "grad_norm": 0.6648271083831787, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 42520 + }, + { + "epoch": 120.82386363636364, + "grad_norm": 0.5934839248657227, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 42530 + }, + { + "epoch": 120.85227272727273, + "grad_norm": 0.6186235547065735, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 42540 + }, + { + "epoch": 120.88068181818181, + "grad_norm": 0.46539390087127686, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 42550 + }, + { + "epoch": 120.9090909090909, + "grad_norm": 0.5254285931587219, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 42560 + }, + { + "epoch": 120.9375, + "grad_norm": 0.6108625531196594, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 42570 + }, + { + "epoch": 120.9659090909091, + "grad_norm": 0.6213340759277344, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 42580 + }, + { + "epoch": 120.99431818181819, + "grad_norm": 0.5728833675384521, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 42590 + }, + { + "epoch": 121.02272727272727, + "grad_norm": 0.47747138142585754, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 42600 + }, + { + "epoch": 121.05113636363636, + "grad_norm": 0.5983991026878357, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 42610 + }, + { + "epoch": 121.07954545454545, + "grad_norm": 1.5157920122146606, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 42620 + }, + { + "epoch": 121.10795454545455, + "grad_norm": 1.188115119934082, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 42630 + }, + { + "epoch": 121.13636363636364, + "grad_norm": 0.8782476782798767, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 42640 + }, + { + "epoch": 121.16477272727273, + "grad_norm": 1.138963222503662, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 42650 + }, + { + "epoch": 121.19318181818181, + "grad_norm": 0.7090156674385071, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 42660 + }, + { + "epoch": 121.2215909090909, + "grad_norm": 1.0206100940704346, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 42670 + }, + { + "epoch": 121.25, + "grad_norm": 0.8041273355484009, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 42680 + }, + { + "epoch": 121.2784090909091, + "grad_norm": 0.7158668637275696, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 42690 + }, + { + "epoch": 121.30681818181819, + "grad_norm": 0.6531086564064026, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 42700 + }, + { + "epoch": 121.33522727272727, + "grad_norm": 0.7232179641723633, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 42710 + }, + { + "epoch": 121.36363636363636, + "grad_norm": 0.6331400275230408, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 42720 + }, + { + "epoch": 121.39204545454545, + "grad_norm": 0.7535783052444458, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 42730 + }, + { + "epoch": 121.42045454545455, + "grad_norm": 0.721626341342926, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 42740 + }, + { + "epoch": 121.44886363636364, + "grad_norm": 0.6356875896453857, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 42750 + }, + { + "epoch": 121.47727272727273, + "grad_norm": 0.6131756901741028, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 42760 + }, + { + "epoch": 121.50568181818181, + "grad_norm": 0.577969491481781, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 42770 + }, + { + "epoch": 121.5340909090909, + "grad_norm": 0.5389184951782227, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 42780 + }, + { + "epoch": 121.5625, + "grad_norm": 0.6019951105117798, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 42790 + }, + { + "epoch": 121.5909090909091, + "grad_norm": 0.5243552923202515, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 42800 + }, + { + "epoch": 121.61931818181819, + "grad_norm": 0.5662066340446472, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 42810 + }, + { + "epoch": 121.64772727272727, + "grad_norm": 0.673316478729248, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 42820 + }, + { + "epoch": 121.67613636363636, + "grad_norm": 0.7163783311843872, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 42830 + }, + { + "epoch": 121.70454545454545, + "grad_norm": 0.8090773820877075, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 42840 + }, + { + "epoch": 121.73295454545455, + "grad_norm": 0.97684246301651, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 42850 + }, + { + "epoch": 121.76136363636364, + "grad_norm": 0.8781417012214661, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 42860 + }, + { + "epoch": 121.78977272727273, + "grad_norm": 0.6750118136405945, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 42870 + }, + { + "epoch": 121.81818181818181, + "grad_norm": 0.7185530662536621, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 42880 + }, + { + "epoch": 121.8465909090909, + "grad_norm": 0.7951401472091675, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 42890 + }, + { + "epoch": 121.875, + "grad_norm": 0.6739190816879272, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 42900 + }, + { + "epoch": 121.9034090909091, + "grad_norm": 0.5545329451560974, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 42910 + }, + { + "epoch": 121.93181818181819, + "grad_norm": 0.7083759307861328, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 42920 + }, + { + "epoch": 121.96022727272727, + "grad_norm": 0.5985446572303772, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 42930 + }, + { + "epoch": 121.98863636363636, + "grad_norm": 0.6231947541236877, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 42940 + }, + { + "epoch": 122.01704545454545, + "grad_norm": 0.6297039985656738, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 42950 + }, + { + "epoch": 122.04545454545455, + "grad_norm": 0.5027703046798706, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 42960 + }, + { + "epoch": 122.07386363636364, + "grad_norm": 0.5628147721290588, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 42970 + }, + { + "epoch": 122.10227272727273, + "grad_norm": 0.5758391618728638, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 42980 + }, + { + "epoch": 122.13068181818181, + "grad_norm": 0.5796226263046265, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 42990 + }, + { + "epoch": 122.1590909090909, + "grad_norm": 0.6459672451019287, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 43000 + }, + { + "epoch": 122.1875, + "grad_norm": 0.7128278613090515, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 43010 + }, + { + "epoch": 122.2159090909091, + "grad_norm": 0.522715151309967, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 43020 + }, + { + "epoch": 122.24431818181819, + "grad_norm": 0.6805800795555115, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 43030 + }, + { + "epoch": 122.27272727272727, + "grad_norm": 0.8094000220298767, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 43040 + }, + { + "epoch": 122.30113636363636, + "grad_norm": 0.617563784122467, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 43050 + }, + { + "epoch": 122.32954545454545, + "grad_norm": 0.6483767628669739, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 43060 + }, + { + "epoch": 122.35795454545455, + "grad_norm": 0.6512372493743896, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 43070 + }, + { + "epoch": 122.38636363636364, + "grad_norm": 0.7526001334190369, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 43080 + }, + { + "epoch": 122.41477272727273, + "grad_norm": 0.7624586820602417, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 43090 + }, + { + "epoch": 122.44318181818181, + "grad_norm": 0.5687345862388611, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 43100 + }, + { + "epoch": 122.4715909090909, + "grad_norm": 0.60966956615448, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 43110 + }, + { + "epoch": 122.5, + "grad_norm": 0.8027265667915344, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 43120 + }, + { + "epoch": 122.5284090909091, + "grad_norm": 0.5919138193130493, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 43130 + }, + { + "epoch": 122.55681818181819, + "grad_norm": 0.6522887349128723, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 43140 + }, + { + "epoch": 122.58522727272727, + "grad_norm": 0.6283818483352661, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 43150 + }, + { + "epoch": 122.61363636363636, + "grad_norm": 0.8638715147972107, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 43160 + }, + { + "epoch": 122.64204545454545, + "grad_norm": 0.9759404063224792, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 43170 + }, + { + "epoch": 122.67045454545455, + "grad_norm": 0.9701955914497375, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 43180 + }, + { + "epoch": 122.69886363636364, + "grad_norm": 0.6288473606109619, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 43190 + }, + { + "epoch": 122.72727272727273, + "grad_norm": 0.798579216003418, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 43200 + }, + { + "epoch": 122.75568181818181, + "grad_norm": 0.7400290369987488, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 43210 + }, + { + "epoch": 122.7840909090909, + "grad_norm": 0.6623448133468628, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 43220 + }, + { + "epoch": 122.8125, + "grad_norm": 0.645901083946228, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 43230 + }, + { + "epoch": 122.8409090909091, + "grad_norm": 0.7637975811958313, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 43240 + }, + { + "epoch": 122.86931818181819, + "grad_norm": 0.763724684715271, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 43250 + }, + { + "epoch": 122.89772727272727, + "grad_norm": 0.6974006295204163, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 43260 + }, + { + "epoch": 122.92613636363636, + "grad_norm": 0.749366283416748, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 43270 + }, + { + "epoch": 122.95454545454545, + "grad_norm": 0.6450308561325073, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 43280 + }, + { + "epoch": 122.98295454545455, + "grad_norm": 0.7591984868049622, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 43290 + }, + { + "epoch": 123.01136363636364, + "grad_norm": 0.6023324728012085, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 43300 + }, + { + "epoch": 123.03977272727273, + "grad_norm": 0.6163926720619202, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 43310 + }, + { + "epoch": 123.06818181818181, + "grad_norm": 0.5046906471252441, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 43320 + }, + { + "epoch": 123.0965909090909, + "grad_norm": 0.6651236414909363, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 43330 + }, + { + "epoch": 123.125, + "grad_norm": 0.9277121424674988, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 43340 + }, + { + "epoch": 123.1534090909091, + "grad_norm": 0.847284734249115, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 43350 + }, + { + "epoch": 123.18181818181819, + "grad_norm": 0.7165888547897339, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 43360 + }, + { + "epoch": 123.21022727272727, + "grad_norm": 0.5978952646255493, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 43370 + }, + { + "epoch": 123.23863636363636, + "grad_norm": 0.8708299398422241, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 43380 + }, + { + "epoch": 123.26704545454545, + "grad_norm": 0.6764265298843384, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 43390 + }, + { + "epoch": 123.29545454545455, + "grad_norm": 0.9524985551834106, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 43400 + }, + { + "epoch": 123.32386363636364, + "grad_norm": 0.856083333492279, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 43410 + }, + { + "epoch": 123.35227272727273, + "grad_norm": 0.8081461191177368, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 43420 + }, + { + "epoch": 123.38068181818181, + "grad_norm": 0.8981258273124695, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 43430 + }, + { + "epoch": 123.4090909090909, + "grad_norm": 0.7242816090583801, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 43440 + }, + { + "epoch": 123.4375, + "grad_norm": 0.983867347240448, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 43450 + }, + { + "epoch": 123.4659090909091, + "grad_norm": 0.9016323089599609, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 43460 + }, + { + "epoch": 123.49431818181819, + "grad_norm": 0.6101610660552979, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 43470 + }, + { + "epoch": 123.52272727272727, + "grad_norm": 0.6919850707054138, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 43480 + }, + { + "epoch": 123.55113636363636, + "grad_norm": 0.668318510055542, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 43490 + }, + { + "epoch": 123.57954545454545, + "grad_norm": 0.6016658544540405, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 43500 + }, + { + "epoch": 123.60795454545455, + "grad_norm": 0.6011306643486023, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 43510 + }, + { + "epoch": 123.63636363636364, + "grad_norm": 0.5712813138961792, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 43520 + }, + { + "epoch": 123.66477272727273, + "grad_norm": 0.5258704423904419, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 43530 + }, + { + "epoch": 123.69318181818181, + "grad_norm": 0.6852856278419495, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 43540 + }, + { + "epoch": 123.7215909090909, + "grad_norm": 0.6821662187576294, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 43550 + }, + { + "epoch": 123.75, + "grad_norm": 0.9406258463859558, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 43560 + }, + { + "epoch": 123.7784090909091, + "grad_norm": 0.6541404128074646, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 43570 + }, + { + "epoch": 123.80681818181819, + "grad_norm": 0.6775745153427124, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 43580 + }, + { + "epoch": 123.83522727272727, + "grad_norm": 0.8705558180809021, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 43590 + }, + { + "epoch": 123.86363636363636, + "grad_norm": 0.8164870142936707, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 43600 + }, + { + "epoch": 123.89204545454545, + "grad_norm": 0.6702988147735596, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 43610 + }, + { + "epoch": 123.92045454545455, + "grad_norm": 0.8119181990623474, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 43620 + }, + { + "epoch": 123.94886363636364, + "grad_norm": 0.5951023101806641, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 43630 + }, + { + "epoch": 123.97727272727273, + "grad_norm": 0.7227770090103149, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 43640 + }, + { + "epoch": 124.00568181818181, + "grad_norm": 0.6837309002876282, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 43650 + }, + { + "epoch": 124.0340909090909, + "grad_norm": 0.5477321743965149, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 43660 + }, + { + "epoch": 124.0625, + "grad_norm": 0.5354117751121521, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 43670 + }, + { + "epoch": 124.0909090909091, + "grad_norm": 0.6974219083786011, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 43680 + }, + { + "epoch": 124.11931818181819, + "grad_norm": 0.6769513487815857, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 43690 + }, + { + "epoch": 124.14772727272727, + "grad_norm": 0.7007016539573669, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 43700 + }, + { + "epoch": 124.17613636363636, + "grad_norm": 0.6461037993431091, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 43710 + }, + { + "epoch": 124.20454545454545, + "grad_norm": 0.6010048389434814, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 43720 + }, + { + "epoch": 124.23295454545455, + "grad_norm": 0.5394991040229797, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 43730 + }, + { + "epoch": 124.26136363636364, + "grad_norm": 0.48307615518569946, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 43740 + }, + { + "epoch": 124.28977272727273, + "grad_norm": 0.5091378092765808, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 43750 + }, + { + "epoch": 124.31818181818181, + "grad_norm": 0.5722032785415649, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 43760 + }, + { + "epoch": 124.3465909090909, + "grad_norm": 0.5467521548271179, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 43770 + }, + { + "epoch": 124.375, + "grad_norm": 0.49909818172454834, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 43780 + }, + { + "epoch": 124.4034090909091, + "grad_norm": 0.6383428573608398, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 43790 + }, + { + "epoch": 124.43181818181819, + "grad_norm": 0.7279382348060608, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 43800 + }, + { + "epoch": 124.46022727272727, + "grad_norm": 0.4871160089969635, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 43810 + }, + { + "epoch": 124.48863636363636, + "grad_norm": 0.5173508524894714, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 43820 + }, + { + "epoch": 124.51704545454545, + "grad_norm": 0.5603179335594177, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 43830 + }, + { + "epoch": 124.54545454545455, + "grad_norm": 0.4561804533004761, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 43840 + }, + { + "epoch": 124.57386363636364, + "grad_norm": 0.6842979788780212, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 43850 + }, + { + "epoch": 124.60227272727273, + "grad_norm": 0.6346740126609802, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 43860 + }, + { + "epoch": 124.63068181818181, + "grad_norm": 0.5946170091629028, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 43870 + }, + { + "epoch": 124.6590909090909, + "grad_norm": 0.4583919942378998, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 43880 + }, + { + "epoch": 124.6875, + "grad_norm": 0.8892697095870972, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 43890 + }, + { + "epoch": 124.7159090909091, + "grad_norm": 0.7108684182167053, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 43900 + }, + { + "epoch": 124.74431818181819, + "grad_norm": 0.5772215127944946, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 43910 + }, + { + "epoch": 124.77272727272727, + "grad_norm": 0.6915180683135986, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 43920 + }, + { + "epoch": 124.80113636363636, + "grad_norm": 0.8463635444641113, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 43930 + }, + { + "epoch": 124.82954545454545, + "grad_norm": 0.700923502445221, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 43940 + }, + { + "epoch": 124.85795454545455, + "grad_norm": 0.6794731616973877, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 43950 + }, + { + "epoch": 124.88636363636364, + "grad_norm": 0.6753823161125183, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 43960 + }, + { + "epoch": 124.91477272727273, + "grad_norm": 0.7288954257965088, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 43970 + }, + { + "epoch": 124.94318181818181, + "grad_norm": 0.8429861664772034, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 43980 + }, + { + "epoch": 124.9715909090909, + "grad_norm": 0.8071476817131042, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 43990 + }, + { + "epoch": 125.0, + "grad_norm": 0.535753607749939, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 44000 + }, + { + "epoch": 125.0284090909091, + "grad_norm": 0.6149882078170776, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 44010 + }, + { + "epoch": 125.05681818181819, + "grad_norm": 0.4777073860168457, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 44020 + }, + { + "epoch": 125.08522727272727, + "grad_norm": 0.5367774367332458, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 44030 + }, + { + "epoch": 125.11363636363636, + "grad_norm": 0.7416982054710388, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 44040 + }, + { + "epoch": 125.14204545454545, + "grad_norm": 0.5535955429077148, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 44050 + }, + { + "epoch": 125.17045454545455, + "grad_norm": 0.5561586022377014, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 44060 + }, + { + "epoch": 125.19886363636364, + "grad_norm": 0.6382668018341064, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 44070 + }, + { + "epoch": 125.22727272727273, + "grad_norm": 0.7232362031936646, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 44080 + }, + { + "epoch": 125.25568181818181, + "grad_norm": 0.5948888659477234, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 44090 + }, + { + "epoch": 125.2840909090909, + "grad_norm": 0.6899495124816895, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 44100 + }, + { + "epoch": 125.3125, + "grad_norm": 0.612220287322998, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 44110 + }, + { + "epoch": 125.3409090909091, + "grad_norm": 0.731208086013794, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 44120 + }, + { + "epoch": 125.36931818181819, + "grad_norm": 0.6531153917312622, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 44130 + }, + { + "epoch": 125.39772727272727, + "grad_norm": 0.662437379360199, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 44140 + }, + { + "epoch": 125.42613636363636, + "grad_norm": 0.6035165786743164, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 44150 + }, + { + "epoch": 125.45454545454545, + "grad_norm": 0.6566755771636963, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 44160 + }, + { + "epoch": 125.48295454545455, + "grad_norm": 0.80877286195755, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 44170 + }, + { + "epoch": 125.51136363636364, + "grad_norm": 0.7452892065048218, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 44180 + }, + { + "epoch": 125.53977272727273, + "grad_norm": 0.7200114727020264, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 44190 + }, + { + "epoch": 125.56818181818181, + "grad_norm": 0.5483570098876953, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 44200 + }, + { + "epoch": 125.5965909090909, + "grad_norm": 0.7039602398872375, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 44210 + }, + { + "epoch": 125.625, + "grad_norm": 0.5269778966903687, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 44220 + }, + { + "epoch": 125.6534090909091, + "grad_norm": 0.5524904131889343, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 44230 + }, + { + "epoch": 125.68181818181819, + "grad_norm": 0.6056973934173584, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 44240 + }, + { + "epoch": 125.71022727272727, + "grad_norm": 0.7236668467521667, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 44250 + }, + { + "epoch": 125.73863636363636, + "grad_norm": 0.6657916903495789, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 44260 + }, + { + "epoch": 125.76704545454545, + "grad_norm": 0.7973979711532593, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 44270 + }, + { + "epoch": 125.79545454545455, + "grad_norm": 0.7450402975082397, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 44280 + }, + { + "epoch": 125.82386363636364, + "grad_norm": 0.8265330791473389, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 44290 + }, + { + "epoch": 125.85227272727273, + "grad_norm": 0.49795201420783997, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 44300 + }, + { + "epoch": 125.88068181818181, + "grad_norm": 0.5793216824531555, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 44310 + }, + { + "epoch": 125.9090909090909, + "grad_norm": 0.5494784116744995, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 44320 + }, + { + "epoch": 125.9375, + "grad_norm": 0.7741979360580444, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 44330 + }, + { + "epoch": 125.9659090909091, + "grad_norm": 0.5874280333518982, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 44340 + }, + { + "epoch": 125.99431818181819, + "grad_norm": 0.5626118183135986, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 44350 + }, + { + "epoch": 126.02272727272727, + "grad_norm": 0.5672938823699951, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 44360 + }, + { + "epoch": 126.05113636363636, + "grad_norm": 0.5379377603530884, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 44370 + }, + { + "epoch": 126.07954545454545, + "grad_norm": 0.6178792715072632, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 44380 + }, + { + "epoch": 126.10795454545455, + "grad_norm": 0.4595714509487152, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 44390 + }, + { + "epoch": 126.13636363636364, + "grad_norm": 0.5440025925636292, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 44400 + }, + { + "epoch": 126.16477272727273, + "grad_norm": 0.447454035282135, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 44410 + }, + { + "epoch": 126.19318181818181, + "grad_norm": 0.5264921188354492, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 44420 + }, + { + "epoch": 126.2215909090909, + "grad_norm": 0.8086947202682495, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 44430 + }, + { + "epoch": 126.25, + "grad_norm": 0.743067741394043, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 44440 + }, + { + "epoch": 126.2784090909091, + "grad_norm": 0.5720465779304504, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 44450 + }, + { + "epoch": 126.30681818181819, + "grad_norm": 0.6255154609680176, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 44460 + }, + { + "epoch": 126.33522727272727, + "grad_norm": 0.7044152617454529, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 44470 + }, + { + "epoch": 126.36363636363636, + "grad_norm": 0.7242276668548584, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 44480 + }, + { + "epoch": 126.39204545454545, + "grad_norm": 0.5152789354324341, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 44490 + }, + { + "epoch": 126.42045454545455, + "grad_norm": 0.8477872610092163, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 44500 + }, + { + "epoch": 126.44886363636364, + "grad_norm": 1.2072283029556274, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 44510 + }, + { + "epoch": 126.47727272727273, + "grad_norm": 1.0745333433151245, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 44520 + }, + { + "epoch": 126.50568181818181, + "grad_norm": 0.6237938404083252, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 44530 + }, + { + "epoch": 126.5340909090909, + "grad_norm": 1.150746464729309, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 44540 + }, + { + "epoch": 126.5625, + "grad_norm": 0.6744441390037537, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 44550 + }, + { + "epoch": 126.5909090909091, + "grad_norm": 0.8944215178489685, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 44560 + }, + { + "epoch": 126.61931818181819, + "grad_norm": 0.8610298037528992, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 44570 + }, + { + "epoch": 126.64772727272727, + "grad_norm": 1.040744423866272, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 44580 + }, + { + "epoch": 126.67613636363636, + "grad_norm": 0.8564578890800476, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 44590 + }, + { + "epoch": 126.70454545454545, + "grad_norm": 0.9337520599365234, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 44600 + }, + { + "epoch": 126.73295454545455, + "grad_norm": 0.8132588863372803, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 44610 + }, + { + "epoch": 126.76136363636364, + "grad_norm": 0.9135181903839111, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 44620 + }, + { + "epoch": 126.78977272727273, + "grad_norm": 1.0999176502227783, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 44630 + }, + { + "epoch": 126.81818181818181, + "grad_norm": 0.9005665183067322, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 44640 + }, + { + "epoch": 126.8465909090909, + "grad_norm": 0.9482970833778381, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 44650 + }, + { + "epoch": 126.875, + "grad_norm": 0.708218514919281, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 44660 + }, + { + "epoch": 126.9034090909091, + "grad_norm": 0.8633854389190674, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 44670 + }, + { + "epoch": 126.93181818181819, + "grad_norm": 0.7367269992828369, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 44680 + }, + { + "epoch": 126.96022727272727, + "grad_norm": 0.954519510269165, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 44690 + }, + { + "epoch": 126.98863636363636, + "grad_norm": 1.0101993083953857, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 44700 + }, + { + "epoch": 127.01704545454545, + "grad_norm": 1.0522290468215942, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 44710 + }, + { + "epoch": 127.04545454545455, + "grad_norm": 1.0212730169296265, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 44720 + }, + { + "epoch": 127.07386363636364, + "grad_norm": 0.8338181972503662, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 44730 + }, + { + "epoch": 127.10227272727273, + "grad_norm": 0.8435046672821045, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 44740 + }, + { + "epoch": 127.13068181818181, + "grad_norm": 0.6481569409370422, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 44750 + }, + { + "epoch": 127.1590909090909, + "grad_norm": 0.525007426738739, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 44760 + }, + { + "epoch": 127.1875, + "grad_norm": 0.7917028069496155, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 44770 + }, + { + "epoch": 127.2159090909091, + "grad_norm": 0.6355962157249451, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 44780 + }, + { + "epoch": 127.24431818181819, + "grad_norm": 0.4666050970554352, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 44790 + }, + { + "epoch": 127.27272727272727, + "grad_norm": 0.6988716125488281, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 44800 + }, + { + "epoch": 127.30113636363636, + "grad_norm": 0.599835216999054, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 44810 + }, + { + "epoch": 127.32954545454545, + "grad_norm": 0.7451519966125488, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 44820 + }, + { + "epoch": 127.35795454545455, + "grad_norm": 0.7405692934989929, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 44830 + }, + { + "epoch": 127.38636363636364, + "grad_norm": 0.7457634806632996, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 44840 + }, + { + "epoch": 127.41477272727273, + "grad_norm": 0.575497031211853, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 44850 + }, + { + "epoch": 127.44318181818181, + "grad_norm": 0.6736639738082886, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 44860 + }, + { + "epoch": 127.4715909090909, + "grad_norm": 0.7018943428993225, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 44870 + }, + { + "epoch": 127.5, + "grad_norm": 0.6599423289299011, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 44880 + }, + { + "epoch": 127.5284090909091, + "grad_norm": 0.6989256739616394, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 44890 + }, + { + "epoch": 127.55681818181819, + "grad_norm": 0.6219071745872498, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 44900 + }, + { + "epoch": 127.58522727272727, + "grad_norm": 0.6060497164726257, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 44910 + }, + { + "epoch": 127.61363636363636, + "grad_norm": 0.8178271651268005, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 44920 + }, + { + "epoch": 127.64204545454545, + "grad_norm": 0.6442059874534607, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 44930 + }, + { + "epoch": 127.67045454545455, + "grad_norm": 0.8080576062202454, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 44940 + }, + { + "epoch": 127.69886363636364, + "grad_norm": 0.8716291189193726, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 44950 + }, + { + "epoch": 127.72727272727273, + "grad_norm": 0.6577211022377014, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 44960 + }, + { + "epoch": 127.75568181818181, + "grad_norm": 0.6145163774490356, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 44970 + }, + { + "epoch": 127.7840909090909, + "grad_norm": 0.6429737210273743, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 44980 + }, + { + "epoch": 127.8125, + "grad_norm": 1.2113999128341675, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 44990 + }, + { + "epoch": 127.8409090909091, + "grad_norm": 1.2781754732131958, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 45000 + }, + { + "epoch": 127.86931818181819, + "grad_norm": 0.8394537568092346, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 45010 + }, + { + "epoch": 127.89772727272727, + "grad_norm": 1.1930190324783325, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 45020 + }, + { + "epoch": 127.92613636363636, + "grad_norm": 0.6906962990760803, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 45030 + }, + { + "epoch": 127.95454545454545, + "grad_norm": 0.891907811164856, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 45040 + }, + { + "epoch": 127.98295454545455, + "grad_norm": 1.1798697710037231, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 45050 + }, + { + "epoch": 128.01136363636363, + "grad_norm": 1.0021605491638184, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 45060 + }, + { + "epoch": 128.03977272727272, + "grad_norm": 1.0694570541381836, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 45070 + }, + { + "epoch": 128.0681818181818, + "grad_norm": 0.9267218708992004, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 45080 + }, + { + "epoch": 128.0965909090909, + "grad_norm": 0.6276006698608398, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 45090 + }, + { + "epoch": 128.125, + "grad_norm": 0.6347649693489075, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 45100 + }, + { + "epoch": 128.1534090909091, + "grad_norm": 0.6764646768569946, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 45110 + }, + { + "epoch": 128.1818181818182, + "grad_norm": 0.6223496794700623, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 45120 + }, + { + "epoch": 128.21022727272728, + "grad_norm": 0.7356517910957336, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 45130 + }, + { + "epoch": 128.23863636363637, + "grad_norm": 0.9081064462661743, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 45140 + }, + { + "epoch": 128.26704545454547, + "grad_norm": 0.8737921118736267, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 45150 + }, + { + "epoch": 128.29545454545453, + "grad_norm": 0.9559502005577087, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 45160 + }, + { + "epoch": 128.32386363636363, + "grad_norm": 0.9400319457054138, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 45170 + }, + { + "epoch": 128.35227272727272, + "grad_norm": 0.9618202447891235, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 45180 + }, + { + "epoch": 128.3806818181818, + "grad_norm": 0.8962883353233337, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 45190 + }, + { + "epoch": 128.4090909090909, + "grad_norm": 0.6514441967010498, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 45200 + }, + { + "epoch": 128.4375, + "grad_norm": 0.7900486588478088, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 45210 + }, + { + "epoch": 128.4659090909091, + "grad_norm": 1.0713646411895752, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 45220 + }, + { + "epoch": 128.4943181818182, + "grad_norm": 0.7977399230003357, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 45230 + }, + { + "epoch": 128.52272727272728, + "grad_norm": 0.7408506870269775, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 45240 + }, + { + "epoch": 128.55113636363637, + "grad_norm": 0.7208636999130249, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 45250 + }, + { + "epoch": 128.57954545454547, + "grad_norm": 0.9026774764060974, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 45260 + }, + { + "epoch": 128.60795454545453, + "grad_norm": 0.7447446584701538, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 45270 + }, + { + "epoch": 128.63636363636363, + "grad_norm": 0.6284778714179993, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 45280 + }, + { + "epoch": 128.66477272727272, + "grad_norm": 0.773349404335022, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 45290 + }, + { + "epoch": 128.6931818181818, + "grad_norm": 0.652649462223053, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 45300 + }, + { + "epoch": 128.7215909090909, + "grad_norm": 0.664982795715332, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 45310 + }, + { + "epoch": 128.75, + "grad_norm": 0.9043540954589844, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 45320 + }, + { + "epoch": 128.7784090909091, + "grad_norm": 0.6659265160560608, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 45330 + }, + { + "epoch": 128.8068181818182, + "grad_norm": 0.6651855111122131, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 45340 + }, + { + "epoch": 128.83522727272728, + "grad_norm": 0.6218985915184021, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 45350 + }, + { + "epoch": 128.86363636363637, + "grad_norm": 0.7133499979972839, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 45360 + }, + { + "epoch": 128.89204545454547, + "grad_norm": 0.6537865996360779, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 45370 + }, + { + "epoch": 128.92045454545453, + "grad_norm": 0.7740278840065002, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 45380 + }, + { + "epoch": 128.94886363636363, + "grad_norm": 0.7691712379455566, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 45390 + }, + { + "epoch": 128.97727272727272, + "grad_norm": 0.7746933698654175, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 45400 + }, + { + "epoch": 129.0056818181818, + "grad_norm": 0.7170177102088928, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 45410 + }, + { + "epoch": 129.0340909090909, + "grad_norm": 0.6429873108863831, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 45420 + }, + { + "epoch": 129.0625, + "grad_norm": 0.8797420263290405, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 45430 + }, + { + "epoch": 129.0909090909091, + "grad_norm": 0.6143895983695984, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 45440 + }, + { + "epoch": 129.1193181818182, + "grad_norm": 0.7619710564613342, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 45450 + }, + { + "epoch": 129.14772727272728, + "grad_norm": 0.5125098824501038, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 45460 + }, + { + "epoch": 129.17613636363637, + "grad_norm": 0.7374431490898132, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 45470 + }, + { + "epoch": 129.20454545454547, + "grad_norm": 0.6298514604568481, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 45480 + }, + { + "epoch": 129.23295454545453, + "grad_norm": 0.570999801158905, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 45490 + }, + { + "epoch": 129.26136363636363, + "grad_norm": 0.5362004041671753, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 45500 + }, + { + "epoch": 129.28977272727272, + "grad_norm": 0.6400303840637207, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 45510 + }, + { + "epoch": 129.3181818181818, + "grad_norm": 0.772942066192627, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 45520 + }, + { + "epoch": 129.3465909090909, + "grad_norm": 0.5503178238868713, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 45530 + }, + { + "epoch": 129.375, + "grad_norm": 0.5466687083244324, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 45540 + }, + { + "epoch": 129.4034090909091, + "grad_norm": 0.6772168874740601, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 45550 + }, + { + "epoch": 129.4318181818182, + "grad_norm": 0.6691098809242249, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 45560 + }, + { + "epoch": 129.46022727272728, + "grad_norm": 0.8726401925086975, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 45570 + }, + { + "epoch": 129.48863636363637, + "grad_norm": 1.072825312614441, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 45580 + }, + { + "epoch": 129.51704545454547, + "grad_norm": 1.0415953397750854, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 45590 + }, + { + "epoch": 129.54545454545453, + "grad_norm": 0.5183255672454834, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 45600 + }, + { + "epoch": 129.57386363636363, + "grad_norm": 0.8744351267814636, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 45610 + }, + { + "epoch": 129.60227272727272, + "grad_norm": 0.8066695928573608, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 45620 + }, + { + "epoch": 129.6306818181818, + "grad_norm": 0.7792837619781494, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 45630 + }, + { + "epoch": 129.6590909090909, + "grad_norm": 0.8309561610221863, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 45640 + }, + { + "epoch": 129.6875, + "grad_norm": 0.7426981925964355, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 45650 + }, + { + "epoch": 129.7159090909091, + "grad_norm": 0.7349977493286133, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 45660 + }, + { + "epoch": 129.7443181818182, + "grad_norm": 0.6687122583389282, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 45670 + }, + { + "epoch": 129.77272727272728, + "grad_norm": 0.6240840554237366, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 45680 + }, + { + "epoch": 129.80113636363637, + "grad_norm": 0.6070581078529358, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 45690 + }, + { + "epoch": 129.82954545454547, + "grad_norm": 0.6064611673355103, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 45700 + }, + { + "epoch": 129.85795454545453, + "grad_norm": 0.8292164206504822, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 45710 + }, + { + "epoch": 129.88636363636363, + "grad_norm": 0.6600549221038818, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 45720 + }, + { + "epoch": 129.91477272727272, + "grad_norm": 0.8599357604980469, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 45730 + }, + { + "epoch": 129.9431818181818, + "grad_norm": 0.6765527725219727, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 45740 + }, + { + "epoch": 129.9715909090909, + "grad_norm": 0.7478293776512146, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 45750 + }, + { + "epoch": 130.0, + "grad_norm": 0.5045310258865356, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 45760 + }, + { + "epoch": 130.0284090909091, + "grad_norm": 0.5499379634857178, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 45770 + }, + { + "epoch": 130.0568181818182, + "grad_norm": 0.5516977906227112, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 45780 + }, + { + "epoch": 130.08522727272728, + "grad_norm": 0.4980061948299408, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 45790 + }, + { + "epoch": 130.11363636363637, + "grad_norm": 0.47144022583961487, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 45800 + }, + { + "epoch": 130.14204545454547, + "grad_norm": 0.6652446985244751, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 45810 + }, + { + "epoch": 130.17045454545453, + "grad_norm": 0.5668972730636597, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 45820 + }, + { + "epoch": 130.19886363636363, + "grad_norm": 0.5128690004348755, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 45830 + }, + { + "epoch": 130.22727272727272, + "grad_norm": 0.47686073184013367, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 45840 + }, + { + "epoch": 130.2556818181818, + "grad_norm": 0.7872583270072937, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 45850 + }, + { + "epoch": 130.2840909090909, + "grad_norm": 0.6706736087799072, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 45860 + }, + { + "epoch": 130.3125, + "grad_norm": 0.6146107912063599, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 45870 + }, + { + "epoch": 130.3409090909091, + "grad_norm": 0.7997154593467712, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 45880 + }, + { + "epoch": 130.3693181818182, + "grad_norm": 0.8617053031921387, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 45890 + }, + { + "epoch": 130.39772727272728, + "grad_norm": 0.8967577219009399, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 45900 + }, + { + "epoch": 130.42613636363637, + "grad_norm": 0.7063471674919128, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 45910 + }, + { + "epoch": 130.45454545454547, + "grad_norm": 0.6364906430244446, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 45920 + }, + { + "epoch": 130.48295454545453, + "grad_norm": 0.5470952987670898, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 45930 + }, + { + "epoch": 130.51136363636363, + "grad_norm": 0.8126204013824463, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 45940 + }, + { + "epoch": 130.53977272727272, + "grad_norm": 0.9707944989204407, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 45950 + }, + { + "epoch": 130.5681818181818, + "grad_norm": 0.777581512928009, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 45960 + }, + { + "epoch": 130.5965909090909, + "grad_norm": 0.49622127413749695, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 45970 + }, + { + "epoch": 130.625, + "grad_norm": 0.7890464067459106, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 45980 + }, + { + "epoch": 130.6534090909091, + "grad_norm": 0.6376560926437378, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 45990 + }, + { + "epoch": 130.6818181818182, + "grad_norm": 0.5111783742904663, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 46000 + }, + { + "epoch": 130.71022727272728, + "grad_norm": 0.6418190002441406, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 46010 + }, + { + "epoch": 130.73863636363637, + "grad_norm": 0.5361145734786987, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 46020 + }, + { + "epoch": 130.76704545454547, + "grad_norm": 0.5517832040786743, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 46030 + }, + { + "epoch": 130.79545454545453, + "grad_norm": 0.6392123103141785, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 46040 + }, + { + "epoch": 130.82386363636363, + "grad_norm": 0.4958942234516144, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 46050 + }, + { + "epoch": 130.85227272727272, + "grad_norm": 0.5867140293121338, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 46060 + }, + { + "epoch": 130.8806818181818, + "grad_norm": 0.666496217250824, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 46070 + }, + { + "epoch": 130.9090909090909, + "grad_norm": 0.6093063950538635, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 46080 + }, + { + "epoch": 130.9375, + "grad_norm": 0.6793018579483032, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 46090 + }, + { + "epoch": 130.9659090909091, + "grad_norm": 0.8787961602210999, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 46100 + }, + { + "epoch": 130.9943181818182, + "grad_norm": 0.8491075038909912, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 46110 + }, + { + "epoch": 131.02272727272728, + "grad_norm": 0.8769429922103882, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 46120 + }, + { + "epoch": 131.05113636363637, + "grad_norm": 0.6816815137863159, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 46130 + }, + { + "epoch": 131.07954545454547, + "grad_norm": 0.716789722442627, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 46140 + }, + { + "epoch": 131.10795454545453, + "grad_norm": 0.5310009717941284, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 46150 + }, + { + "epoch": 131.13636363636363, + "grad_norm": 0.5277566909790039, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 46160 + }, + { + "epoch": 131.16477272727272, + "grad_norm": 0.48119422793388367, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 46170 + }, + { + "epoch": 131.1931818181818, + "grad_norm": 0.510347843170166, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 46180 + }, + { + "epoch": 131.2215909090909, + "grad_norm": 0.5207124948501587, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 46190 + }, + { + "epoch": 131.25, + "grad_norm": 0.5676156878471375, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 46200 + }, + { + "epoch": 131.2784090909091, + "grad_norm": 0.5713900327682495, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 46210 + }, + { + "epoch": 131.3068181818182, + "grad_norm": 0.6059330701828003, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 46220 + }, + { + "epoch": 131.33522727272728, + "grad_norm": 0.5800050497055054, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 46230 + }, + { + "epoch": 131.36363636363637, + "grad_norm": 0.5849801301956177, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 46240 + }, + { + "epoch": 131.39204545454547, + "grad_norm": 0.46115511655807495, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 46250 + }, + { + "epoch": 131.42045454545453, + "grad_norm": 0.6311548948287964, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 46260 + }, + { + "epoch": 131.44886363636363, + "grad_norm": 0.4590200185775757, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 46270 + }, + { + "epoch": 131.47727272727272, + "grad_norm": 0.4637441635131836, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 46280 + }, + { + "epoch": 131.5056818181818, + "grad_norm": 0.5604123473167419, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 46290 + }, + { + "epoch": 131.5340909090909, + "grad_norm": 0.7339373826980591, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 46300 + }, + { + "epoch": 131.5625, + "grad_norm": 0.49434801936149597, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 46310 + }, + { + "epoch": 131.5909090909091, + "grad_norm": 0.5729549527168274, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 46320 + }, + { + "epoch": 131.6193181818182, + "grad_norm": 0.5489758253097534, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 46330 + }, + { + "epoch": 131.64772727272728, + "grad_norm": 0.5991235375404358, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 46340 + }, + { + "epoch": 131.67613636363637, + "grad_norm": 0.5655492544174194, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 46350 + }, + { + "epoch": 131.70454545454547, + "grad_norm": 0.5596151947975159, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 46360 + }, + { + "epoch": 131.73295454545453, + "grad_norm": 0.6359032988548279, + "learning_rate": 0.0001, + "loss": 0.0317, + "step": 46370 + }, + { + "epoch": 131.76136363636363, + "grad_norm": 0.6330766677856445, + "learning_rate": 0.0001, + "loss": 0.0313, + "step": 46380 + }, + { + "epoch": 131.78977272727272, + "grad_norm": 0.7287530303001404, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 46390 + }, + { + "epoch": 131.8181818181818, + "grad_norm": 0.4960668087005615, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 46400 + }, + { + "epoch": 131.8465909090909, + "grad_norm": 0.7417987585067749, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 46410 + }, + { + "epoch": 131.875, + "grad_norm": 0.5909314155578613, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 46420 + }, + { + "epoch": 131.9034090909091, + "grad_norm": 0.4722643494606018, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 46430 + }, + { + "epoch": 131.9318181818182, + "grad_norm": 0.5753642320632935, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 46440 + }, + { + "epoch": 131.96022727272728, + "grad_norm": 0.5663985013961792, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 46450 + }, + { + "epoch": 131.98863636363637, + "grad_norm": 0.5400167107582092, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 46460 + }, + { + "epoch": 132.01704545454547, + "grad_norm": 0.46016308665275574, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 46470 + }, + { + "epoch": 132.04545454545453, + "grad_norm": 0.5892532467842102, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 46480 + }, + { + "epoch": 132.07386363636363, + "grad_norm": 0.5685364603996277, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 46490 + }, + { + "epoch": 132.10227272727272, + "grad_norm": 0.5203407406806946, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 46500 + }, + { + "epoch": 132.1306818181818, + "grad_norm": 0.5543718338012695, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 46510 + }, + { + "epoch": 132.1590909090909, + "grad_norm": 0.624247670173645, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 46520 + }, + { + "epoch": 132.1875, + "grad_norm": 0.6007071137428284, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 46530 + }, + { + "epoch": 132.2159090909091, + "grad_norm": 0.68288254737854, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 46540 + }, + { + "epoch": 132.2443181818182, + "grad_norm": 1.2552138566970825, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 46550 + }, + { + "epoch": 132.27272727272728, + "grad_norm": 0.9271278381347656, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 46560 + }, + { + "epoch": 132.30113636363637, + "grad_norm": 0.8253664374351501, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 46570 + }, + { + "epoch": 132.32954545454547, + "grad_norm": 0.5965460538864136, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 46580 + }, + { + "epoch": 132.35795454545453, + "grad_norm": 0.6545840501785278, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 46590 + }, + { + "epoch": 132.38636363636363, + "grad_norm": 0.6397396326065063, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 46600 + }, + { + "epoch": 132.41477272727272, + "grad_norm": 0.6624685525894165, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 46610 + }, + { + "epoch": 132.4431818181818, + "grad_norm": 0.7068834900856018, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 46620 + }, + { + "epoch": 132.4715909090909, + "grad_norm": 0.6984627842903137, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 46630 + }, + { + "epoch": 132.5, + "grad_norm": 0.6401987075805664, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 46640 + }, + { + "epoch": 132.5284090909091, + "grad_norm": 0.5327432155609131, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 46650 + }, + { + "epoch": 132.5568181818182, + "grad_norm": 0.5304933190345764, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 46660 + }, + { + "epoch": 132.58522727272728, + "grad_norm": 0.7458568215370178, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 46670 + }, + { + "epoch": 132.61363636363637, + "grad_norm": 0.6337578892707825, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 46680 + }, + { + "epoch": 132.64204545454547, + "grad_norm": 0.712566077709198, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 46690 + }, + { + "epoch": 132.67045454545453, + "grad_norm": 0.6466312408447266, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 46700 + }, + { + "epoch": 132.69886363636363, + "grad_norm": 0.6697583794593811, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 46710 + }, + { + "epoch": 132.72727272727272, + "grad_norm": 0.5255504846572876, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 46720 + }, + { + "epoch": 132.7556818181818, + "grad_norm": 1.0727545022964478, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 46730 + }, + { + "epoch": 132.7840909090909, + "grad_norm": 0.7345282435417175, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 46740 + }, + { + "epoch": 132.8125, + "grad_norm": 0.6004593968391418, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 46750 + }, + { + "epoch": 132.8409090909091, + "grad_norm": 0.4870907664299011, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 46760 + }, + { + "epoch": 132.8693181818182, + "grad_norm": 0.7557304501533508, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 46770 + }, + { + "epoch": 132.89772727272728, + "grad_norm": 0.9188068509101868, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 46780 + }, + { + "epoch": 132.92613636363637, + "grad_norm": 0.5896967053413391, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 46790 + }, + { + "epoch": 132.95454545454547, + "grad_norm": 0.7609643936157227, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 46800 + }, + { + "epoch": 132.98295454545453, + "grad_norm": 0.8466443419456482, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 46810 + }, + { + "epoch": 133.01136363636363, + "grad_norm": 0.8880951404571533, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 46820 + }, + { + "epoch": 133.03977272727272, + "grad_norm": 0.9683408737182617, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 46830 + }, + { + "epoch": 133.0681818181818, + "grad_norm": 0.871724009513855, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 46840 + }, + { + "epoch": 133.0965909090909, + "grad_norm": 0.5088608264923096, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 46850 + }, + { + "epoch": 133.125, + "grad_norm": 0.7277428507804871, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 46860 + }, + { + "epoch": 133.1534090909091, + "grad_norm": 0.7390486598014832, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 46870 + }, + { + "epoch": 133.1818181818182, + "grad_norm": 0.707007884979248, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 46880 + }, + { + "epoch": 133.21022727272728, + "grad_norm": 0.6008827090263367, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 46890 + }, + { + "epoch": 133.23863636363637, + "grad_norm": 0.6185063123703003, + "learning_rate": 0.0001, + "loss": 0.0317, + "step": 46900 + }, + { + "epoch": 133.26704545454547, + "grad_norm": 0.67989182472229, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 46910 + }, + { + "epoch": 133.29545454545453, + "grad_norm": 0.548943817615509, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 46920 + }, + { + "epoch": 133.32386363636363, + "grad_norm": 0.6702325344085693, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 46930 + }, + { + "epoch": 133.35227272727272, + "grad_norm": 0.7883853316307068, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 46940 + }, + { + "epoch": 133.3806818181818, + "grad_norm": 0.7740568518638611, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 46950 + }, + { + "epoch": 133.4090909090909, + "grad_norm": 0.6243300437927246, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 46960 + }, + { + "epoch": 133.4375, + "grad_norm": 0.7169111371040344, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 46970 + }, + { + "epoch": 133.4659090909091, + "grad_norm": 0.6098527312278748, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 46980 + }, + { + "epoch": 133.4943181818182, + "grad_norm": 0.5657358169555664, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 46990 + }, + { + "epoch": 133.52272727272728, + "grad_norm": 0.5001091957092285, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 47000 + }, + { + "epoch": 133.55113636363637, + "grad_norm": 0.5768367052078247, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 47010 + }, + { + "epoch": 133.57954545454547, + "grad_norm": 0.7666851282119751, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 47020 + }, + { + "epoch": 133.60795454545453, + "grad_norm": 0.6639266014099121, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 47030 + }, + { + "epoch": 133.63636363636363, + "grad_norm": 1.1128325462341309, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 47040 + }, + { + "epoch": 133.66477272727272, + "grad_norm": 0.7729830741882324, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 47050 + }, + { + "epoch": 133.6931818181818, + "grad_norm": 0.60756915807724, + "learning_rate": 0.0001, + "loss": 0.0313, + "step": 47060 + }, + { + "epoch": 133.7215909090909, + "grad_norm": 0.6942067742347717, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 47070 + }, + { + "epoch": 133.75, + "grad_norm": 0.9247666597366333, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 47080 + }, + { + "epoch": 133.7784090909091, + "grad_norm": 0.9722429513931274, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 47090 + }, + { + "epoch": 133.8068181818182, + "grad_norm": 0.6130657196044922, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 47100 + }, + { + "epoch": 133.83522727272728, + "grad_norm": 0.8849529027938843, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 47110 + }, + { + "epoch": 133.86363636363637, + "grad_norm": 0.865003228187561, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 47120 + }, + { + "epoch": 133.89204545454547, + "grad_norm": 0.6581420302391052, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 47130 + }, + { + "epoch": 133.92045454545453, + "grad_norm": 0.7639325857162476, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 47140 + }, + { + "epoch": 133.94886363636363, + "grad_norm": 0.6666318774223328, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 47150 + }, + { + "epoch": 133.97727272727272, + "grad_norm": 1.2542755603790283, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 47160 + }, + { + "epoch": 134.0056818181818, + "grad_norm": 1.2946697473526, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 47170 + }, + { + "epoch": 134.0340909090909, + "grad_norm": 1.3122329711914062, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 47180 + }, + { + "epoch": 134.0625, + "grad_norm": 1.2413455247879028, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 47190 + }, + { + "epoch": 134.0909090909091, + "grad_norm": 0.801084578037262, + "learning_rate": 0.0001, + "loss": 0.0313, + "step": 47200 + }, + { + "epoch": 134.1193181818182, + "grad_norm": 0.6665140986442566, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 47210 + }, + { + "epoch": 134.14772727272728, + "grad_norm": 1.088781714439392, + "learning_rate": 0.0001, + "loss": 0.031, + "step": 47220 + }, + { + "epoch": 134.17613636363637, + "grad_norm": 0.8474469780921936, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 47230 + }, + { + "epoch": 134.20454545454547, + "grad_norm": 0.8532624244689941, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 47240 + }, + { + "epoch": 134.23295454545453, + "grad_norm": 0.6617245078086853, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 47250 + }, + { + "epoch": 134.26136363636363, + "grad_norm": 0.7599644064903259, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 47260 + }, + { + "epoch": 134.28977272727272, + "grad_norm": 0.7604702711105347, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 47270 + }, + { + "epoch": 134.3181818181818, + "grad_norm": 0.6242595314979553, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 47280 + }, + { + "epoch": 134.3465909090909, + "grad_norm": 0.8279832601547241, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 47290 + }, + { + "epoch": 134.375, + "grad_norm": 0.7884923815727234, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 47300 + }, + { + "epoch": 134.4034090909091, + "grad_norm": 0.6375776529312134, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 47310 + }, + { + "epoch": 134.4318181818182, + "grad_norm": 0.5708988308906555, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 47320 + }, + { + "epoch": 134.46022727272728, + "grad_norm": 0.570242702960968, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 47330 + }, + { + "epoch": 134.48863636363637, + "grad_norm": 0.6287363767623901, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 47340 + }, + { + "epoch": 134.51704545454547, + "grad_norm": 0.6968216300010681, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 47350 + }, + { + "epoch": 134.54545454545453, + "grad_norm": 0.48705530166625977, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 47360 + }, + { + "epoch": 134.57386363636363, + "grad_norm": 0.7090975046157837, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 47370 + }, + { + "epoch": 134.60227272727272, + "grad_norm": 0.9196219444274902, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 47380 + }, + { + "epoch": 134.6306818181818, + "grad_norm": 0.9817768335342407, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 47390 + }, + { + "epoch": 134.6590909090909, + "grad_norm": 0.8549873232841492, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 47400 + }, + { + "epoch": 134.6875, + "grad_norm": 0.8343051671981812, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 47410 + }, + { + "epoch": 134.7159090909091, + "grad_norm": 0.6651965975761414, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 47420 + }, + { + "epoch": 134.7443181818182, + "grad_norm": 0.8093419075012207, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 47430 + }, + { + "epoch": 134.77272727272728, + "grad_norm": 0.7498792409896851, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 47440 + }, + { + "epoch": 134.80113636363637, + "grad_norm": 1.3343470096588135, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 47450 + }, + { + "epoch": 134.82954545454547, + "grad_norm": 1.099355936050415, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 47460 + }, + { + "epoch": 134.85795454545453, + "grad_norm": 1.077439785003662, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 47470 + }, + { + "epoch": 134.88636363636363, + "grad_norm": 0.9508234262466431, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 47480 + }, + { + "epoch": 134.91477272727272, + "grad_norm": 0.8098722696304321, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 47490 + }, + { + "epoch": 134.9431818181818, + "grad_norm": 0.6014454960823059, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 47500 + }, + { + "epoch": 134.9715909090909, + "grad_norm": 0.7299231886863708, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 47510 + }, + { + "epoch": 135.0, + "grad_norm": 0.6908378005027771, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 47520 + }, + { + "epoch": 135.0284090909091, + "grad_norm": 0.7811752557754517, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 47530 + }, + { + "epoch": 135.0568181818182, + "grad_norm": 0.8679901957511902, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 47540 + }, + { + "epoch": 135.08522727272728, + "grad_norm": 0.9140509963035583, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 47550 + }, + { + "epoch": 135.11363636363637, + "grad_norm": 0.7968837022781372, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 47560 + }, + { + "epoch": 135.14204545454547, + "grad_norm": 0.7876217365264893, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 47570 + }, + { + "epoch": 135.17045454545453, + "grad_norm": 0.5965085625648499, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 47580 + }, + { + "epoch": 135.19886363636363, + "grad_norm": 0.5109805464744568, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 47590 + }, + { + "epoch": 135.22727272727272, + "grad_norm": 0.5189054012298584, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 47600 + }, + { + "epoch": 135.2556818181818, + "grad_norm": 0.5483865737915039, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 47610 + }, + { + "epoch": 135.2840909090909, + "grad_norm": 0.8424668312072754, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 47620 + }, + { + "epoch": 135.3125, + "grad_norm": 1.2137010097503662, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 47630 + }, + { + "epoch": 135.3409090909091, + "grad_norm": 0.8658679127693176, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 47640 + }, + { + "epoch": 135.3693181818182, + "grad_norm": 0.8400396704673767, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 47650 + }, + { + "epoch": 135.39772727272728, + "grad_norm": 0.6896227598190308, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 47660 + }, + { + "epoch": 135.42613636363637, + "grad_norm": 0.8099536895751953, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 47670 + }, + { + "epoch": 135.45454545454547, + "grad_norm": 1.0839929580688477, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 47680 + }, + { + "epoch": 135.48295454545453, + "grad_norm": 0.9490888714790344, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 47690 + }, + { + "epoch": 135.51136363636363, + "grad_norm": 0.9408382177352905, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 47700 + }, + { + "epoch": 135.53977272727272, + "grad_norm": 0.6724865436553955, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 47710 + }, + { + "epoch": 135.5681818181818, + "grad_norm": 0.889179527759552, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 47720 + }, + { + "epoch": 135.5965909090909, + "grad_norm": 1.22073495388031, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 47730 + }, + { + "epoch": 135.625, + "grad_norm": 1.0256925821304321, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 47740 + }, + { + "epoch": 135.6534090909091, + "grad_norm": 0.9867517352104187, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 47750 + }, + { + "epoch": 135.6818181818182, + "grad_norm": 0.9394050240516663, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 47760 + }, + { + "epoch": 135.71022727272728, + "grad_norm": 0.7679027318954468, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 47770 + }, + { + "epoch": 135.73863636363637, + "grad_norm": 0.6860771775245667, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 47780 + }, + { + "epoch": 135.76704545454547, + "grad_norm": 0.6097438335418701, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 47790 + }, + { + "epoch": 135.79545454545453, + "grad_norm": 0.8389930129051208, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 47800 + }, + { + "epoch": 135.82386363636363, + "grad_norm": 0.7888689637184143, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 47810 + }, + { + "epoch": 135.85227272727272, + "grad_norm": 0.9129384756088257, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 47820 + }, + { + "epoch": 135.8806818181818, + "grad_norm": 1.0235633850097656, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 47830 + }, + { + "epoch": 135.9090909090909, + "grad_norm": 0.7481328845024109, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 47840 + }, + { + "epoch": 135.9375, + "grad_norm": 0.5055482387542725, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 47850 + }, + { + "epoch": 135.9659090909091, + "grad_norm": 0.5696248412132263, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 47860 + }, + { + "epoch": 135.9943181818182, + "grad_norm": 0.7397261261940002, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 47870 + }, + { + "epoch": 136.02272727272728, + "grad_norm": 0.651668906211853, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 47880 + }, + { + "epoch": 136.05113636363637, + "grad_norm": 0.5386239886283875, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 47890 + }, + { + "epoch": 136.07954545454547, + "grad_norm": 0.4532436430454254, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 47900 + }, + { + "epoch": 136.10795454545453, + "grad_norm": 0.5337704420089722, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 47910 + }, + { + "epoch": 136.13636363636363, + "grad_norm": 0.9169813394546509, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 47920 + }, + { + "epoch": 136.16477272727272, + "grad_norm": 0.7223308682441711, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 47930 + }, + { + "epoch": 136.1931818181818, + "grad_norm": 0.8555115461349487, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 47940 + }, + { + "epoch": 136.2215909090909, + "grad_norm": 0.537327229976654, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 47950 + }, + { + "epoch": 136.25, + "grad_norm": 0.8894582986831665, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 47960 + }, + { + "epoch": 136.2784090909091, + "grad_norm": 0.555550754070282, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 47970 + }, + { + "epoch": 136.3068181818182, + "grad_norm": 0.6033909916877747, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 47980 + }, + { + "epoch": 136.33522727272728, + "grad_norm": 0.6103994250297546, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 47990 + }, + { + "epoch": 136.36363636363637, + "grad_norm": 0.6190768480300903, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 48000 + }, + { + "epoch": 136.39204545454547, + "grad_norm": 0.7021390795707703, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 48010 + }, + { + "epoch": 136.42045454545453, + "grad_norm": 0.5402888059616089, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 48020 + }, + { + "epoch": 136.44886363636363, + "grad_norm": 0.7875863909721375, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 48030 + }, + { + "epoch": 136.47727272727272, + "grad_norm": 0.5794070959091187, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 48040 + }, + { + "epoch": 136.5056818181818, + "grad_norm": 0.7281160950660706, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 48050 + }, + { + "epoch": 136.5340909090909, + "grad_norm": 0.6087822318077087, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 48060 + }, + { + "epoch": 136.5625, + "grad_norm": 0.7562154531478882, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 48070 + }, + { + "epoch": 136.5909090909091, + "grad_norm": 0.7490687370300293, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 48080 + }, + { + "epoch": 136.6193181818182, + "grad_norm": 0.6700184345245361, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 48090 + }, + { + "epoch": 136.64772727272728, + "grad_norm": 0.7165958881378174, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 48100 + }, + { + "epoch": 136.67613636363637, + "grad_norm": 0.5619990229606628, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 48110 + }, + { + "epoch": 136.70454545454547, + "grad_norm": 0.6590405106544495, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 48120 + }, + { + "epoch": 136.73295454545453, + "grad_norm": 0.8965096473693848, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 48130 + }, + { + "epoch": 136.76136363636363, + "grad_norm": 0.932625949382782, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 48140 + }, + { + "epoch": 136.78977272727272, + "grad_norm": 0.806952714920044, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 48150 + }, + { + "epoch": 136.8181818181818, + "grad_norm": 0.718571126461029, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 48160 + }, + { + "epoch": 136.8465909090909, + "grad_norm": 0.6784353256225586, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 48170 + }, + { + "epoch": 136.875, + "grad_norm": 0.672789454460144, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 48180 + }, + { + "epoch": 136.9034090909091, + "grad_norm": 0.7343295216560364, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 48190 + }, + { + "epoch": 136.9318181818182, + "grad_norm": 0.6511361598968506, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 48200 + }, + { + "epoch": 136.96022727272728, + "grad_norm": 0.7251712083816528, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 48210 + }, + { + "epoch": 136.98863636363637, + "grad_norm": 0.8382689356803894, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 48220 + }, + { + "epoch": 137.01704545454547, + "grad_norm": 1.2256827354431152, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 48230 + }, + { + "epoch": 137.04545454545453, + "grad_norm": 0.815129816532135, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 48240 + }, + { + "epoch": 137.07386363636363, + "grad_norm": 1.3422629833221436, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 48250 + }, + { + "epoch": 137.10227272727272, + "grad_norm": 1.2612097263336182, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 48260 + }, + { + "epoch": 137.1306818181818, + "grad_norm": 1.3112272024154663, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 48270 + }, + { + "epoch": 137.1590909090909, + "grad_norm": 0.8872440457344055, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 48280 + }, + { + "epoch": 137.1875, + "grad_norm": 0.9705610871315002, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 48290 + }, + { + "epoch": 137.2159090909091, + "grad_norm": 1.0388199090957642, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 48300 + }, + { + "epoch": 137.2443181818182, + "grad_norm": 0.8184165358543396, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 48310 + }, + { + "epoch": 137.27272727272728, + "grad_norm": 1.2038040161132812, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 48320 + }, + { + "epoch": 137.30113636363637, + "grad_norm": 0.8519647121429443, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 48330 + }, + { + "epoch": 137.32954545454547, + "grad_norm": 0.7184175252914429, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 48340 + }, + { + "epoch": 137.35795454545453, + "grad_norm": 0.9237968325614929, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 48350 + }, + { + "epoch": 137.38636363636363, + "grad_norm": 0.5992299914360046, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 48360 + }, + { + "epoch": 137.41477272727272, + "grad_norm": 0.914725124835968, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 48370 + }, + { + "epoch": 137.4431818181818, + "grad_norm": 0.8184134364128113, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 48380 + }, + { + "epoch": 137.4715909090909, + "grad_norm": 0.5038926601409912, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 48390 + }, + { + "epoch": 137.5, + "grad_norm": 0.6409310102462769, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 48400 + }, + { + "epoch": 137.5284090909091, + "grad_norm": 0.5178235173225403, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 48410 + }, + { + "epoch": 137.5568181818182, + "grad_norm": 0.7749777436256409, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 48420 + }, + { + "epoch": 137.58522727272728, + "grad_norm": 0.5888954401016235, + "learning_rate": 0.0001, + "loss": 0.0311, + "step": 48430 + }, + { + "epoch": 137.61363636363637, + "grad_norm": 0.631784200668335, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 48440 + }, + { + "epoch": 137.64204545454547, + "grad_norm": 0.6141875386238098, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 48450 + }, + { + "epoch": 137.67045454545453, + "grad_norm": 0.7147916555404663, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 48460 + }, + { + "epoch": 137.69886363636363, + "grad_norm": 0.6216191649436951, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 48470 + }, + { + "epoch": 137.72727272727272, + "grad_norm": 0.9274272918701172, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 48480 + }, + { + "epoch": 137.7556818181818, + "grad_norm": 1.197992205619812, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 48490 + }, + { + "epoch": 137.7840909090909, + "grad_norm": 1.0352435111999512, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 48500 + }, + { + "epoch": 137.8125, + "grad_norm": 0.9775570631027222, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 48510 + }, + { + "epoch": 137.8409090909091, + "grad_norm": 1.0361013412475586, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 48520 + }, + { + "epoch": 137.8693181818182, + "grad_norm": 0.9069687724113464, + "learning_rate": 0.0001, + "loss": 0.0317, + "step": 48530 + }, + { + "epoch": 137.89772727272728, + "grad_norm": 0.6086276173591614, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 48540 + }, + { + "epoch": 137.92613636363637, + "grad_norm": 0.7118426561355591, + "learning_rate": 0.0001, + "loss": 0.0313, + "step": 48550 + }, + { + "epoch": 137.95454545454547, + "grad_norm": 0.9204250574111938, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 48560 + }, + { + "epoch": 137.98295454545453, + "grad_norm": 0.7215946316719055, + "learning_rate": 0.0001, + "loss": 0.0311, + "step": 48570 + }, + { + "epoch": 138.01136363636363, + "grad_norm": 0.7724602222442627, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 48580 + }, + { + "epoch": 138.03977272727272, + "grad_norm": 0.6827357411384583, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 48590 + }, + { + "epoch": 138.0681818181818, + "grad_norm": 0.46647927165031433, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 48600 + }, + { + "epoch": 138.0965909090909, + "grad_norm": 0.5820374488830566, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 48610 + }, + { + "epoch": 138.125, + "grad_norm": 0.9222242832183838, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 48620 + }, + { + "epoch": 138.1534090909091, + "grad_norm": 1.222086787223816, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 48630 + }, + { + "epoch": 138.1818181818182, + "grad_norm": 0.7875241637229919, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 48640 + }, + { + "epoch": 138.21022727272728, + "grad_norm": 0.7012037634849548, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 48650 + }, + { + "epoch": 138.23863636363637, + "grad_norm": 1.2003134489059448, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 48660 + }, + { + "epoch": 138.26704545454547, + "grad_norm": 0.6346859335899353, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 48670 + }, + { + "epoch": 138.29545454545453, + "grad_norm": 0.5016667246818542, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 48680 + }, + { + "epoch": 138.32386363636363, + "grad_norm": 0.5193179845809937, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 48690 + }, + { + "epoch": 138.35227272727272, + "grad_norm": 0.5191895365715027, + "learning_rate": 0.0001, + "loss": 0.0317, + "step": 48700 + }, + { + "epoch": 138.3806818181818, + "grad_norm": 0.4985046088695526, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 48710 + }, + { + "epoch": 138.4090909090909, + "grad_norm": 0.6570383310317993, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 48720 + }, + { + "epoch": 138.4375, + "grad_norm": 0.5687001347541809, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 48730 + }, + { + "epoch": 138.4659090909091, + "grad_norm": 0.6005369424819946, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 48740 + }, + { + "epoch": 138.4943181818182, + "grad_norm": 0.5225914716720581, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 48750 + }, + { + "epoch": 138.52272727272728, + "grad_norm": 0.6342768669128418, + "learning_rate": 0.0001, + "loss": 0.0317, + "step": 48760 + }, + { + "epoch": 138.55113636363637, + "grad_norm": 0.5711255669593811, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 48770 + }, + { + "epoch": 138.57954545454547, + "grad_norm": 0.6120138764381409, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 48780 + }, + { + "epoch": 138.60795454545453, + "grad_norm": 0.7916771769523621, + "learning_rate": 0.0001, + "loss": 0.0311, + "step": 48790 + }, + { + "epoch": 138.63636363636363, + "grad_norm": 0.9732442498207092, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 48800 + }, + { + "epoch": 138.66477272727272, + "grad_norm": 1.2003506422042847, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 48810 + }, + { + "epoch": 138.6931818181818, + "grad_norm": 1.2463781833648682, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 48820 + }, + { + "epoch": 138.7215909090909, + "grad_norm": 0.8527126908302307, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 48830 + }, + { + "epoch": 138.75, + "grad_norm": 0.9467070698738098, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 48840 + }, + { + "epoch": 138.7784090909091, + "grad_norm": 1.0931123495101929, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 48850 + }, + { + "epoch": 138.8068181818182, + "grad_norm": 1.0193461179733276, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 48860 + }, + { + "epoch": 138.83522727272728, + "grad_norm": 0.9290983080863953, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 48870 + }, + { + "epoch": 138.86363636363637, + "grad_norm": 0.7232368588447571, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 48880 + }, + { + "epoch": 138.89204545454547, + "grad_norm": 0.6270406246185303, + "learning_rate": 0.0001, + "loss": 0.0313, + "step": 48890 + }, + { + "epoch": 138.92045454545453, + "grad_norm": 0.8998165726661682, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 48900 + }, + { + "epoch": 138.94886363636363, + "grad_norm": 0.6000069379806519, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 48910 + }, + { + "epoch": 138.97727272727272, + "grad_norm": 0.45096200704574585, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 48920 + }, + { + "epoch": 139.0056818181818, + "grad_norm": 0.6368238925933838, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 48930 + }, + { + "epoch": 139.0340909090909, + "grad_norm": 0.8111124038696289, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 48940 + }, + { + "epoch": 139.0625, + "grad_norm": 0.5319854021072388, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 48950 + }, + { + "epoch": 139.0909090909091, + "grad_norm": 0.6842014193534851, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 48960 + }, + { + "epoch": 139.1193181818182, + "grad_norm": 0.5523969531059265, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 48970 + }, + { + "epoch": 139.14772727272728, + "grad_norm": 0.6312474608421326, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 48980 + }, + { + "epoch": 139.17613636363637, + "grad_norm": 0.6765289902687073, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 48990 + }, + { + "epoch": 139.20454545454547, + "grad_norm": 0.7405977845191956, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 49000 + }, + { + "epoch": 139.23295454545453, + "grad_norm": 0.9844215512275696, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 49010 + }, + { + "epoch": 139.26136363636363, + "grad_norm": 0.824510395526886, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 49020 + }, + { + "epoch": 139.28977272727272, + "grad_norm": 0.6249207258224487, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 49030 + }, + { + "epoch": 139.3181818181818, + "grad_norm": 0.5916060209274292, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 49040 + }, + { + "epoch": 139.3465909090909, + "grad_norm": 0.742417573928833, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 49050 + }, + { + "epoch": 139.375, + "grad_norm": 0.5674957036972046, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 49060 + }, + { + "epoch": 139.4034090909091, + "grad_norm": 0.5021371841430664, + "learning_rate": 0.0001, + "loss": 0.0309, + "step": 49070 + }, + { + "epoch": 139.4318181818182, + "grad_norm": 0.5639374256134033, + "learning_rate": 0.0001, + "loss": 0.0307, + "step": 49080 + }, + { + "epoch": 139.46022727272728, + "grad_norm": 0.6540923118591309, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 49090 + }, + { + "epoch": 139.48863636363637, + "grad_norm": 0.7910658121109009, + "learning_rate": 0.0001, + "loss": 0.0311, + "step": 49100 + }, + { + "epoch": 139.51704545454547, + "grad_norm": 0.9734665155410767, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 49110 + }, + { + "epoch": 139.54545454545453, + "grad_norm": 0.6400315165519714, + "learning_rate": 0.0001, + "loss": 0.0307, + "step": 49120 + }, + { + "epoch": 139.57386363636363, + "grad_norm": 0.7147937417030334, + "learning_rate": 0.0001, + "loss": 0.031, + "step": 49130 + }, + { + "epoch": 139.60227272727272, + "grad_norm": 0.7652814984321594, + "learning_rate": 0.0001, + "loss": 0.031, + "step": 49140 + }, + { + "epoch": 139.6306818181818, + "grad_norm": 0.8998138308525085, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 49150 + }, + { + "epoch": 139.6590909090909, + "grad_norm": 0.6835416555404663, + "learning_rate": 0.0001, + "loss": 0.0311, + "step": 49160 + }, + { + "epoch": 139.6875, + "grad_norm": 0.6805562376976013, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 49170 + }, + { + "epoch": 139.7159090909091, + "grad_norm": 0.6778475642204285, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 49180 + }, + { + "epoch": 139.7443181818182, + "grad_norm": 0.476952463388443, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 49190 + }, + { + "epoch": 139.77272727272728, + "grad_norm": 0.9565654397010803, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 49200 + }, + { + "epoch": 139.80113636363637, + "grad_norm": 0.770005464553833, + "learning_rate": 0.0001, + "loss": 0.0317, + "step": 49210 + }, + { + "epoch": 139.82954545454547, + "grad_norm": 0.8685285449028015, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 49220 + }, + { + "epoch": 139.85795454545453, + "grad_norm": 0.6290770769119263, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 49230 + }, + { + "epoch": 139.88636363636363, + "grad_norm": 0.7894107103347778, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 49240 + }, + { + "epoch": 139.91477272727272, + "grad_norm": 0.73750901222229, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 49250 + }, + { + "epoch": 139.9431818181818, + "grad_norm": 0.5105662941932678, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 49260 + }, + { + "epoch": 139.9715909090909, + "grad_norm": 0.5434335470199585, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 49270 + }, + { + "epoch": 140.0, + "grad_norm": 0.5327863097190857, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 49280 + }, + { + "epoch": 140.0284090909091, + "grad_norm": 0.4427546560764313, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 49290 + }, + { + "epoch": 140.0568181818182, + "grad_norm": 0.5727342963218689, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 49300 + }, + { + "epoch": 140.08522727272728, + "grad_norm": 0.698470950126648, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 49310 + }, + { + "epoch": 140.11363636363637, + "grad_norm": 0.5280812978744507, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 49320 + }, + { + "epoch": 140.14204545454547, + "grad_norm": 0.5219975709915161, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 49330 + }, + { + "epoch": 140.17045454545453, + "grad_norm": 0.603614866733551, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 49340 + }, + { + "epoch": 140.19886363636363, + "grad_norm": 0.48663216829299927, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 49350 + }, + { + "epoch": 140.22727272727272, + "grad_norm": 0.6505529284477234, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 49360 + }, + { + "epoch": 140.2556818181818, + "grad_norm": 0.7080401182174683, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 49370 + }, + { + "epoch": 140.2840909090909, + "grad_norm": 0.49827688932418823, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 49380 + }, + { + "epoch": 140.3125, + "grad_norm": 0.6238806843757629, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 49390 + }, + { + "epoch": 140.3409090909091, + "grad_norm": 0.5958791971206665, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 49400 + }, + { + "epoch": 140.3693181818182, + "grad_norm": 0.5299167633056641, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 49410 + }, + { + "epoch": 140.39772727272728, + "grad_norm": 0.534270703792572, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 49420 + }, + { + "epoch": 140.42613636363637, + "grad_norm": 0.4572816789150238, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 49430 + }, + { + "epoch": 140.45454545454547, + "grad_norm": 0.7355735301971436, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 49440 + }, + { + "epoch": 140.48295454545453, + "grad_norm": 0.8774271607398987, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 49450 + }, + { + "epoch": 140.51136363636363, + "grad_norm": 0.5380182266235352, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 49460 + }, + { + "epoch": 140.53977272727272, + "grad_norm": 0.6516682505607605, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 49470 + }, + { + "epoch": 140.5681818181818, + "grad_norm": 0.4943990111351013, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 49480 + }, + { + "epoch": 140.5965909090909, + "grad_norm": 0.5519253611564636, + "learning_rate": 0.0001, + "loss": 0.0309, + "step": 49490 + }, + { + "epoch": 140.625, + "grad_norm": 0.6039503216743469, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 49500 + }, + { + "epoch": 140.6534090909091, + "grad_norm": 0.754988431930542, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 49510 + }, + { + "epoch": 140.6818181818182, + "grad_norm": 0.6376588344573975, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 49520 + }, + { + "epoch": 140.71022727272728, + "grad_norm": 0.7755230069160461, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 49530 + }, + { + "epoch": 140.73863636363637, + "grad_norm": 0.6854098439216614, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 49540 + }, + { + "epoch": 140.76704545454547, + "grad_norm": 0.5473119020462036, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 49550 + }, + { + "epoch": 140.79545454545453, + "grad_norm": 0.659775972366333, + "learning_rate": 0.0001, + "loss": 0.0306, + "step": 49560 + }, + { + "epoch": 140.82386363636363, + "grad_norm": 0.7851589918136597, + "learning_rate": 0.0001, + "loss": 0.0317, + "step": 49570 + }, + { + "epoch": 140.85227272727272, + "grad_norm": 0.4822283983230591, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 49580 + }, + { + "epoch": 140.8806818181818, + "grad_norm": 0.6676194667816162, + "learning_rate": 0.0001, + "loss": 0.0311, + "step": 49590 + }, + { + "epoch": 140.9090909090909, + "grad_norm": 0.4834653437137604, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 49600 + }, + { + "epoch": 140.9375, + "grad_norm": 0.6345698833465576, + "learning_rate": 0.0001, + "loss": 0.03, + "step": 49610 + }, + { + "epoch": 140.9659090909091, + "grad_norm": 0.45117560029029846, + "learning_rate": 0.0001, + "loss": 0.0313, + "step": 49620 + }, + { + "epoch": 140.9943181818182, + "grad_norm": 0.5543921589851379, + "learning_rate": 0.0001, + "loss": 0.0317, + "step": 49630 + }, + { + "epoch": 141.02272727272728, + "grad_norm": 0.5042457580566406, + "learning_rate": 0.0001, + "loss": 0.0305, + "step": 49640 + }, + { + "epoch": 141.05113636363637, + "grad_norm": 0.47779983282089233, + "learning_rate": 0.0001, + "loss": 0.0307, + "step": 49650 + }, + { + "epoch": 141.07954545454547, + "grad_norm": 0.641806423664093, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 49660 + }, + { + "epoch": 141.10795454545453, + "grad_norm": 0.7229103446006775, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 49670 + }, + { + "epoch": 141.13636363636363, + "grad_norm": 0.5515846014022827, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 49680 + }, + { + "epoch": 141.16477272727272, + "grad_norm": 0.6664286255836487, + "learning_rate": 0.0001, + "loss": 0.0313, + "step": 49690 + }, + { + "epoch": 141.1931818181818, + "grad_norm": 0.5435540080070496, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 49700 + }, + { + "epoch": 141.2215909090909, + "grad_norm": 0.5387634038925171, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 49710 + }, + { + "epoch": 141.25, + "grad_norm": 0.5133399963378906, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 49720 + }, + { + "epoch": 141.2784090909091, + "grad_norm": 0.7076446413993835, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 49730 + }, + { + "epoch": 141.3068181818182, + "grad_norm": 0.5965754985809326, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 49740 + }, + { + "epoch": 141.33522727272728, + "grad_norm": 0.5514310598373413, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 49750 + }, + { + "epoch": 141.36363636363637, + "grad_norm": 0.6107453107833862, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 49760 + }, + { + "epoch": 141.39204545454547, + "grad_norm": 0.6507061719894409, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 49770 + }, + { + "epoch": 141.42045454545453, + "grad_norm": 0.4469587802886963, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 49780 + }, + { + "epoch": 141.44886363636363, + "grad_norm": 0.5173867344856262, + "learning_rate": 0.0001, + "loss": 0.0309, + "step": 49790 + }, + { + "epoch": 141.47727272727272, + "grad_norm": 0.4976654052734375, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 49800 + }, + { + "epoch": 141.5056818181818, + "grad_norm": 0.7025495171546936, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 49810 + }, + { + "epoch": 141.5340909090909, + "grad_norm": 0.5611472725868225, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 49820 + }, + { + "epoch": 141.5625, + "grad_norm": 0.5493874549865723, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 49830 + }, + { + "epoch": 141.5909090909091, + "grad_norm": 0.649082601070404, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 49840 + }, + { + "epoch": 141.6193181818182, + "grad_norm": 0.5634473562240601, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 49850 + }, + { + "epoch": 141.64772727272728, + "grad_norm": 0.44207829236984253, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 49860 + }, + { + "epoch": 141.67613636363637, + "grad_norm": 1.1749712228775024, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 49870 + }, + { + "epoch": 141.70454545454547, + "grad_norm": 0.8790702223777771, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 49880 + }, + { + "epoch": 141.73295454545453, + "grad_norm": 1.1796921491622925, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 49890 + }, + { + "epoch": 141.76136363636363, + "grad_norm": 0.7493149638175964, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 49900 + }, + { + "epoch": 141.78977272727272, + "grad_norm": 0.6880519986152649, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 49910 + }, + { + "epoch": 141.8181818181818, + "grad_norm": 0.7424251437187195, + "learning_rate": 0.0001, + "loss": 0.0305, + "step": 49920 + }, + { + "epoch": 141.8465909090909, + "grad_norm": 0.8121177554130554, + "learning_rate": 0.0001, + "loss": 0.0313, + "step": 49930 + }, + { + "epoch": 141.875, + "grad_norm": 0.7063887715339661, + "learning_rate": 0.0001, + "loss": 0.0307, + "step": 49940 + }, + { + "epoch": 141.9034090909091, + "grad_norm": 0.6800211071968079, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 49950 + }, + { + "epoch": 141.9318181818182, + "grad_norm": 0.800621509552002, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 49960 + }, + { + "epoch": 141.96022727272728, + "grad_norm": 0.7496196627616882, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 49970 + }, + { + "epoch": 141.98863636363637, + "grad_norm": 0.8301522135734558, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 49980 + }, + { + "epoch": 142.01704545454547, + "grad_norm": 1.3548810482025146, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 49990 + }, + { + "epoch": 142.04545454545453, + "grad_norm": 1.1022731065750122, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 50000 + }, + { + "epoch": 142.04545454545453, + "step": 50000, + "total_flos": 0.0, + "train_loss": 0.090032759501338, + "train_runtime": 22271.7421, + "train_samples_per_second": 287.36, + "train_steps_per_second": 2.245 + } + ], + "logging_steps": 10, + "max_steps": 50000, + "num_input_tokens_seen": 0, + "num_train_epochs": 143, + "save_steps": 5000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": null +}