| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1356, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0022123893805309734, | |
| "grad_norm": 2.9161036014556885, | |
| "learning_rate": 7.352941176470589e-08, | |
| "loss": 0.7653, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004424778761061947, | |
| "grad_norm": 2.9344425201416016, | |
| "learning_rate": 1.4705882352941178e-07, | |
| "loss": 0.7756, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00663716814159292, | |
| "grad_norm": 2.8798561096191406, | |
| "learning_rate": 2.2058823529411768e-07, | |
| "loss": 0.7784, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.008849557522123894, | |
| "grad_norm": 2.898063898086548, | |
| "learning_rate": 2.9411764705882356e-07, | |
| "loss": 0.7798, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.011061946902654867, | |
| "grad_norm": 2.915313959121704, | |
| "learning_rate": 3.6764705882352943e-07, | |
| "loss": 0.7844, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01327433628318584, | |
| "grad_norm": 2.7517311573028564, | |
| "learning_rate": 4.4117647058823536e-07, | |
| "loss": 0.7519, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.015486725663716814, | |
| "grad_norm": 2.8670687675476074, | |
| "learning_rate": 5.147058823529412e-07, | |
| "loss": 0.7639, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.017699115044247787, | |
| "grad_norm": 2.838557243347168, | |
| "learning_rate": 5.882352941176471e-07, | |
| "loss": 0.7794, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01991150442477876, | |
| "grad_norm": 2.6696882247924805, | |
| "learning_rate": 6.61764705882353e-07, | |
| "loss": 0.7539, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.022123893805309734, | |
| "grad_norm": 2.657489061355591, | |
| "learning_rate": 7.352941176470589e-07, | |
| "loss": 0.7437, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.024336283185840708, | |
| "grad_norm": 2.6917357444763184, | |
| "learning_rate": 8.088235294117648e-07, | |
| "loss": 0.763, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.02654867256637168, | |
| "grad_norm": 2.2191965579986572, | |
| "learning_rate": 8.823529411764707e-07, | |
| "loss": 0.7424, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.028761061946902654, | |
| "grad_norm": 2.2132158279418945, | |
| "learning_rate": 9.558823529411764e-07, | |
| "loss": 0.7238, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.030973451327433628, | |
| "grad_norm": 2.1964025497436523, | |
| "learning_rate": 1.0294117647058825e-06, | |
| "loss": 0.7384, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.033185840707964605, | |
| "grad_norm": 2.0652434825897217, | |
| "learning_rate": 1.1029411764705884e-06, | |
| "loss": 0.7366, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.035398230088495575, | |
| "grad_norm": 1.6799640655517578, | |
| "learning_rate": 1.1764705882352942e-06, | |
| "loss": 0.7022, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03761061946902655, | |
| "grad_norm": 1.4400370121002197, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.6861, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.03982300884955752, | |
| "grad_norm": 1.4010155200958252, | |
| "learning_rate": 1.323529411764706e-06, | |
| "loss": 0.6904, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0420353982300885, | |
| "grad_norm": 1.3582905530929565, | |
| "learning_rate": 1.3970588235294119e-06, | |
| "loss": 0.6876, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.04424778761061947, | |
| "grad_norm": 1.3372609615325928, | |
| "learning_rate": 1.4705882352941177e-06, | |
| "loss": 0.6809, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.046460176991150445, | |
| "grad_norm": 1.3210690021514893, | |
| "learning_rate": 1.5441176470588238e-06, | |
| "loss": 0.687, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.048672566371681415, | |
| "grad_norm": 1.0419282913208008, | |
| "learning_rate": 1.6176470588235297e-06, | |
| "loss": 0.6743, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05088495575221239, | |
| "grad_norm": 0.9881656765937805, | |
| "learning_rate": 1.6911764705882356e-06, | |
| "loss": 0.6483, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.05309734513274336, | |
| "grad_norm": 0.9939265251159668, | |
| "learning_rate": 1.7647058823529414e-06, | |
| "loss": 0.6527, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05530973451327434, | |
| "grad_norm": 0.9658608436584473, | |
| "learning_rate": 1.8382352941176473e-06, | |
| "loss": 0.6544, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05752212389380531, | |
| "grad_norm": 0.897885262966156, | |
| "learning_rate": 1.9117647058823528e-06, | |
| "loss": 0.6358, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.059734513274336286, | |
| "grad_norm": 0.8456088900566101, | |
| "learning_rate": 1.985294117647059e-06, | |
| "loss": 0.6403, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.061946902654867256, | |
| "grad_norm": 0.8407159447669983, | |
| "learning_rate": 2.058823529411765e-06, | |
| "loss": 0.6366, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06415929203539823, | |
| "grad_norm": 0.7394031286239624, | |
| "learning_rate": 2.132352941176471e-06, | |
| "loss": 0.6277, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.06637168141592921, | |
| "grad_norm": 0.6359221339225769, | |
| "learning_rate": 2.2058823529411767e-06, | |
| "loss": 0.6088, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06858407079646017, | |
| "grad_norm": 0.6923512816429138, | |
| "learning_rate": 2.2794117647058826e-06, | |
| "loss": 0.6072, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.07079646017699115, | |
| "grad_norm": 0.6872782707214355, | |
| "learning_rate": 2.3529411764705885e-06, | |
| "loss": 0.5939, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07300884955752213, | |
| "grad_norm": 0.6121441721916199, | |
| "learning_rate": 2.4264705882352943e-06, | |
| "loss": 0.5936, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0752212389380531, | |
| "grad_norm": 0.5648632645606995, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.5925, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07743362831858407, | |
| "grad_norm": 0.527875542640686, | |
| "learning_rate": 2.5735294117647057e-06, | |
| "loss": 0.5788, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07964601769911504, | |
| "grad_norm": 0.48248472809791565, | |
| "learning_rate": 2.647058823529412e-06, | |
| "loss": 0.584, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08185840707964602, | |
| "grad_norm": 0.5289769172668457, | |
| "learning_rate": 2.720588235294118e-06, | |
| "loss": 0.5811, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.084070796460177, | |
| "grad_norm": 0.5214428901672363, | |
| "learning_rate": 2.7941176470588237e-06, | |
| "loss": 0.5592, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08628318584070796, | |
| "grad_norm": 0.4773428440093994, | |
| "learning_rate": 2.8676470588235296e-06, | |
| "loss": 0.558, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.08849557522123894, | |
| "grad_norm": 0.4300067722797394, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": 0.5592, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09070796460176991, | |
| "grad_norm": 0.4010540246963501, | |
| "learning_rate": 3.0147058823529413e-06, | |
| "loss": 0.559, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.09292035398230089, | |
| "grad_norm": 0.3986721336841583, | |
| "learning_rate": 3.0882352941176476e-06, | |
| "loss": 0.5653, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09513274336283185, | |
| "grad_norm": 0.37803158164024353, | |
| "learning_rate": 3.161764705882353e-06, | |
| "loss": 0.5421, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.09734513274336283, | |
| "grad_norm": 0.3909413516521454, | |
| "learning_rate": 3.2352941176470594e-06, | |
| "loss": 0.5347, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.09955752212389381, | |
| "grad_norm": 0.373319536447525, | |
| "learning_rate": 3.308823529411765e-06, | |
| "loss": 0.5262, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.10176991150442478, | |
| "grad_norm": 0.38008248805999756, | |
| "learning_rate": 3.382352941176471e-06, | |
| "loss": 0.5285, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10398230088495575, | |
| "grad_norm": 0.37302571535110474, | |
| "learning_rate": 3.4558823529411766e-06, | |
| "loss": 0.5132, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.10619469026548672, | |
| "grad_norm": 0.3702549636363983, | |
| "learning_rate": 3.529411764705883e-06, | |
| "loss": 0.5228, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1084070796460177, | |
| "grad_norm": 0.3243614733219147, | |
| "learning_rate": 3.6029411764705883e-06, | |
| "loss": 0.5126, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.11061946902654868, | |
| "grad_norm": 0.2833242118358612, | |
| "learning_rate": 3.6764705882352946e-06, | |
| "loss": 0.5061, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11283185840707964, | |
| "grad_norm": 0.27726179361343384, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.5054, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.11504424778761062, | |
| "grad_norm": 0.2611929774284363, | |
| "learning_rate": 3.8235294117647055e-06, | |
| "loss": 0.5136, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1172566371681416, | |
| "grad_norm": 0.2536194324493408, | |
| "learning_rate": 3.897058823529412e-06, | |
| "loss": 0.5016, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.11946902654867257, | |
| "grad_norm": 0.24586400389671326, | |
| "learning_rate": 3.970588235294118e-06, | |
| "loss": 0.5017, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.12168141592920353, | |
| "grad_norm": 0.22390125691890717, | |
| "learning_rate": 4.044117647058824e-06, | |
| "loss": 0.5015, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.12389380530973451, | |
| "grad_norm": 0.2428012639284134, | |
| "learning_rate": 4.11764705882353e-06, | |
| "loss": 0.5125, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1261061946902655, | |
| "grad_norm": 0.2092347890138626, | |
| "learning_rate": 4.191176470588236e-06, | |
| "loss": 0.4978, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.12831858407079647, | |
| "grad_norm": 0.2112482786178589, | |
| "learning_rate": 4.264705882352942e-06, | |
| "loss": 0.5117, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.13053097345132744, | |
| "grad_norm": 0.2061765491962433, | |
| "learning_rate": 4.3382352941176475e-06, | |
| "loss": 0.4917, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.13274336283185842, | |
| "grad_norm": 0.20188196003437042, | |
| "learning_rate": 4.411764705882353e-06, | |
| "loss": 0.4958, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13495575221238937, | |
| "grad_norm": 0.21565476059913635, | |
| "learning_rate": 4.485294117647059e-06, | |
| "loss": 0.4839, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.13716814159292035, | |
| "grad_norm": 0.22550469636917114, | |
| "learning_rate": 4.558823529411765e-06, | |
| "loss": 0.4919, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.13938053097345132, | |
| "grad_norm": 0.20743007957935333, | |
| "learning_rate": 4.632352941176471e-06, | |
| "loss": 0.4937, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.1415929203539823, | |
| "grad_norm": 0.2033708542585373, | |
| "learning_rate": 4.705882352941177e-06, | |
| "loss": 0.4858, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.14380530973451328, | |
| "grad_norm": 0.21126723289489746, | |
| "learning_rate": 4.779411764705883e-06, | |
| "loss": 0.487, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.14601769911504425, | |
| "grad_norm": 0.2027079313993454, | |
| "learning_rate": 4.852941176470589e-06, | |
| "loss": 0.4975, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.14823008849557523, | |
| "grad_norm": 0.18106848001480103, | |
| "learning_rate": 4.9264705882352945e-06, | |
| "loss": 0.4894, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.1504424778761062, | |
| "grad_norm": 0.17369899153709412, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4825, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.15265486725663716, | |
| "grad_norm": 0.1895466446876526, | |
| "learning_rate": 5.073529411764706e-06, | |
| "loss": 0.4792, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.15486725663716813, | |
| "grad_norm": 0.17305336892604828, | |
| "learning_rate": 5.147058823529411e-06, | |
| "loss": 0.4715, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1570796460176991, | |
| "grad_norm": 0.1629684716463089, | |
| "learning_rate": 5.220588235294118e-06, | |
| "loss": 0.4696, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.1592920353982301, | |
| "grad_norm": 0.1580497771501541, | |
| "learning_rate": 5.294117647058824e-06, | |
| "loss": 0.4647, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.16150442477876106, | |
| "grad_norm": 0.16177481412887573, | |
| "learning_rate": 5.36764705882353e-06, | |
| "loss": 0.4667, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.16371681415929204, | |
| "grad_norm": 0.14660590887069702, | |
| "learning_rate": 5.441176470588236e-06, | |
| "loss": 0.4702, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16592920353982302, | |
| "grad_norm": 0.16958165168762207, | |
| "learning_rate": 5.514705882352942e-06, | |
| "loss": 0.4664, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.168141592920354, | |
| "grad_norm": 0.18446360528469086, | |
| "learning_rate": 5.588235294117647e-06, | |
| "loss": 0.4634, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.17035398230088494, | |
| "grad_norm": 0.18088003993034363, | |
| "learning_rate": 5.661764705882353e-06, | |
| "loss": 0.4782, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.17256637168141592, | |
| "grad_norm": 0.16989891231060028, | |
| "learning_rate": 5.735294117647059e-06, | |
| "loss": 0.4751, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1747787610619469, | |
| "grad_norm": 0.15071384608745575, | |
| "learning_rate": 5.808823529411766e-06, | |
| "loss": 0.4679, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.17699115044247787, | |
| "grad_norm": 0.16606192290782928, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 0.4613, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.17920353982300885, | |
| "grad_norm": 0.16193555295467377, | |
| "learning_rate": 5.955882352941177e-06, | |
| "loss": 0.4656, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.18141592920353983, | |
| "grad_norm": 0.14852063357830048, | |
| "learning_rate": 6.029411764705883e-06, | |
| "loss": 0.4508, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.1836283185840708, | |
| "grad_norm": 0.16259585320949554, | |
| "learning_rate": 6.102941176470589e-06, | |
| "loss": 0.453, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.18584070796460178, | |
| "grad_norm": 0.17288607358932495, | |
| "learning_rate": 6.176470588235295e-06, | |
| "loss": 0.4597, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.18805309734513273, | |
| "grad_norm": 0.15836317837238312, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.4548, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1902654867256637, | |
| "grad_norm": 0.17005616426467896, | |
| "learning_rate": 6.323529411764706e-06, | |
| "loss": 0.4608, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.19247787610619468, | |
| "grad_norm": 0.148678258061409, | |
| "learning_rate": 6.397058823529412e-06, | |
| "loss": 0.4599, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.19469026548672566, | |
| "grad_norm": 0.16987866163253784, | |
| "learning_rate": 6.470588235294119e-06, | |
| "loss": 0.4512, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19690265486725664, | |
| "grad_norm": 0.15269418060779572, | |
| "learning_rate": 6.544117647058824e-06, | |
| "loss": 0.4579, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.19911504424778761, | |
| "grad_norm": 0.1564721018075943, | |
| "learning_rate": 6.61764705882353e-06, | |
| "loss": 0.4577, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2013274336283186, | |
| "grad_norm": 0.15829169750213623, | |
| "learning_rate": 6.6911764705882356e-06, | |
| "loss": 0.4628, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.20353982300884957, | |
| "grad_norm": 0.16252678632736206, | |
| "learning_rate": 6.764705882352942e-06, | |
| "loss": 0.4508, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.20575221238938052, | |
| "grad_norm": 0.16816888749599457, | |
| "learning_rate": 6.838235294117648e-06, | |
| "loss": 0.4569, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.2079646017699115, | |
| "grad_norm": 0.16243158280849457, | |
| "learning_rate": 6.911764705882353e-06, | |
| "loss": 0.4492, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.21017699115044247, | |
| "grad_norm": 0.1554814875125885, | |
| "learning_rate": 6.985294117647059e-06, | |
| "loss": 0.4501, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.21238938053097345, | |
| "grad_norm": 0.15875332057476044, | |
| "learning_rate": 7.058823529411766e-06, | |
| "loss": 0.4581, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.21460176991150443, | |
| "grad_norm": 0.16014939546585083, | |
| "learning_rate": 7.132352941176472e-06, | |
| "loss": 0.448, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2168141592920354, | |
| "grad_norm": 0.18041987717151642, | |
| "learning_rate": 7.205882352941177e-06, | |
| "loss": 0.4553, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.21902654867256638, | |
| "grad_norm": 0.16102494299411774, | |
| "learning_rate": 7.2794117647058826e-06, | |
| "loss": 0.4553, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.22123893805309736, | |
| "grad_norm": 0.1710263043642044, | |
| "learning_rate": 7.352941176470589e-06, | |
| "loss": 0.4426, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2234513274336283, | |
| "grad_norm": 0.17998401820659637, | |
| "learning_rate": 7.426470588235295e-06, | |
| "loss": 0.439, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.22566371681415928, | |
| "grad_norm": 0.17915406823158264, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.4467, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.22787610619469026, | |
| "grad_norm": 0.16851037740707397, | |
| "learning_rate": 7.573529411764706e-06, | |
| "loss": 0.4363, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.23008849557522124, | |
| "grad_norm": 0.17610934376716614, | |
| "learning_rate": 7.647058823529411e-06, | |
| "loss": 0.4493, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2323008849557522, | |
| "grad_norm": 0.18994346261024475, | |
| "learning_rate": 7.720588235294119e-06, | |
| "loss": 0.4485, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2345132743362832, | |
| "grad_norm": 0.1732485294342041, | |
| "learning_rate": 7.794117647058825e-06, | |
| "loss": 0.4396, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.23672566371681417, | |
| "grad_norm": 0.16453640162944794, | |
| "learning_rate": 7.86764705882353e-06, | |
| "loss": 0.4442, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.23893805309734514, | |
| "grad_norm": 0.1798321008682251, | |
| "learning_rate": 7.941176470588236e-06, | |
| "loss": 0.44, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2411504424778761, | |
| "grad_norm": 0.17025865614414215, | |
| "learning_rate": 8.014705882352942e-06, | |
| "loss": 0.436, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.24336283185840707, | |
| "grad_norm": 0.17872163653373718, | |
| "learning_rate": 8.088235294117648e-06, | |
| "loss": 0.4393, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.24557522123893805, | |
| "grad_norm": 0.17654740810394287, | |
| "learning_rate": 8.161764705882354e-06, | |
| "loss": 0.4453, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.24778761061946902, | |
| "grad_norm": 0.1653042584657669, | |
| "learning_rate": 8.23529411764706e-06, | |
| "loss": 0.429, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.16623114049434662, | |
| "learning_rate": 8.308823529411766e-06, | |
| "loss": 0.4378, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.252212389380531, | |
| "grad_norm": 0.17289873957633972, | |
| "learning_rate": 8.382352941176472e-06, | |
| "loss": 0.4386, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.25442477876106195, | |
| "grad_norm": 0.17378394305706024, | |
| "learning_rate": 8.455882352941177e-06, | |
| "loss": 0.4337, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.25663716814159293, | |
| "grad_norm": 0.15947799384593964, | |
| "learning_rate": 8.529411764705883e-06, | |
| "loss": 0.4426, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2588495575221239, | |
| "grad_norm": 0.20766223967075348, | |
| "learning_rate": 8.60294117647059e-06, | |
| "loss": 0.4345, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.2610619469026549, | |
| "grad_norm": 0.19195608794689178, | |
| "learning_rate": 8.676470588235295e-06, | |
| "loss": 0.4288, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.26327433628318586, | |
| "grad_norm": 0.1608441323041916, | |
| "learning_rate": 8.750000000000001e-06, | |
| "loss": 0.4431, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.26548672566371684, | |
| "grad_norm": 0.21266116201877594, | |
| "learning_rate": 8.823529411764707e-06, | |
| "loss": 0.4312, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2676991150442478, | |
| "grad_norm": 0.21518948674201965, | |
| "learning_rate": 8.897058823529413e-06, | |
| "loss": 0.4296, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.26991150442477874, | |
| "grad_norm": 0.1727646440267563, | |
| "learning_rate": 8.970588235294119e-06, | |
| "loss": 0.432, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2721238938053097, | |
| "grad_norm": 0.25698140263557434, | |
| "learning_rate": 9.044117647058824e-06, | |
| "loss": 0.4245, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.2743362831858407, | |
| "grad_norm": 0.20740661025047302, | |
| "learning_rate": 9.11764705882353e-06, | |
| "loss": 0.4407, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.27654867256637167, | |
| "grad_norm": 0.19939358532428741, | |
| "learning_rate": 9.191176470588236e-06, | |
| "loss": 0.4478, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.27876106194690264, | |
| "grad_norm": 0.24062572419643402, | |
| "learning_rate": 9.264705882352942e-06, | |
| "loss": 0.4259, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2809734513274336, | |
| "grad_norm": 0.2283308357000351, | |
| "learning_rate": 9.338235294117648e-06, | |
| "loss": 0.4377, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.2831858407079646, | |
| "grad_norm": 0.20251670479774475, | |
| "learning_rate": 9.411764705882354e-06, | |
| "loss": 0.4399, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2853982300884956, | |
| "grad_norm": 0.23135575652122498, | |
| "learning_rate": 9.48529411764706e-06, | |
| "loss": 0.4374, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.28761061946902655, | |
| "grad_norm": 0.1677524596452713, | |
| "learning_rate": 9.558823529411766e-06, | |
| "loss": 0.4382, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.28982300884955753, | |
| "grad_norm": 0.19608592987060547, | |
| "learning_rate": 9.632352941176471e-06, | |
| "loss": 0.4319, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.2920353982300885, | |
| "grad_norm": 0.1991558074951172, | |
| "learning_rate": 9.705882352941177e-06, | |
| "loss": 0.4294, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2942477876106195, | |
| "grad_norm": 0.17177461087703705, | |
| "learning_rate": 9.779411764705883e-06, | |
| "loss": 0.4349, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.29646017699115046, | |
| "grad_norm": 0.1729421466588974, | |
| "learning_rate": 9.852941176470589e-06, | |
| "loss": 0.4283, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.29867256637168144, | |
| "grad_norm": 0.2021639496088028, | |
| "learning_rate": 9.926470588235295e-06, | |
| "loss": 0.4377, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.3008849557522124, | |
| "grad_norm": 0.18062114715576172, | |
| "learning_rate": 1e-05, | |
| "loss": 0.4427, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3030973451327434, | |
| "grad_norm": 0.16362692415714264, | |
| "learning_rate": 9.999983422468849e-06, | |
| "loss": 0.4395, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.3053097345132743, | |
| "grad_norm": 0.20908989012241364, | |
| "learning_rate": 9.999933689985315e-06, | |
| "loss": 0.4363, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3075221238938053, | |
| "grad_norm": 0.16169682145118713, | |
| "learning_rate": 9.999850802879177e-06, | |
| "loss": 0.4303, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.30973451327433627, | |
| "grad_norm": 0.17690162360668182, | |
| "learning_rate": 9.999734761700061e-06, | |
| "loss": 0.4266, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.31194690265486724, | |
| "grad_norm": 0.17501935362815857, | |
| "learning_rate": 9.99958556721744e-06, | |
| "loss": 0.4336, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3141592920353982, | |
| "grad_norm": 0.16887253522872925, | |
| "learning_rate": 9.999403220420619e-06, | |
| "loss": 0.4351, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3163716814159292, | |
| "grad_norm": 0.1647050976753235, | |
| "learning_rate": 9.999187722518747e-06, | |
| "loss": 0.426, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.3185840707964602, | |
| "grad_norm": 0.18983641266822815, | |
| "learning_rate": 9.998939074940788e-06, | |
| "loss": 0.4219, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.32079646017699115, | |
| "grad_norm": 0.19264842569828033, | |
| "learning_rate": 9.99865727933553e-06, | |
| "loss": 0.4237, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3230088495575221, | |
| "grad_norm": 0.18118713796138763, | |
| "learning_rate": 9.998342337571566e-06, | |
| "loss": 0.4174, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3252212389380531, | |
| "grad_norm": 0.22882233560085297, | |
| "learning_rate": 9.997994251737276e-06, | |
| "loss": 0.4277, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3274336283185841, | |
| "grad_norm": 0.17980162799358368, | |
| "learning_rate": 9.99761302414082e-06, | |
| "loss": 0.4257, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.32964601769911506, | |
| "grad_norm": 0.2404302954673767, | |
| "learning_rate": 9.997198657310126e-06, | |
| "loss": 0.4305, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.33185840707964603, | |
| "grad_norm": 0.19852545857429504, | |
| "learning_rate": 9.996751153992861e-06, | |
| "loss": 0.4212, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.334070796460177, | |
| "grad_norm": 0.22487014532089233, | |
| "learning_rate": 9.996270517156431e-06, | |
| "loss": 0.4257, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.336283185840708, | |
| "grad_norm": 0.22307410836219788, | |
| "learning_rate": 9.995756749987942e-06, | |
| "loss": 0.4208, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.33849557522123896, | |
| "grad_norm": 0.2162560373544693, | |
| "learning_rate": 9.995209855894191e-06, | |
| "loss": 0.4206, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3407079646017699, | |
| "grad_norm": 0.20444706082344055, | |
| "learning_rate": 9.994629838501637e-06, | |
| "loss": 0.4295, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.34292035398230086, | |
| "grad_norm": 0.1971275806427002, | |
| "learning_rate": 9.994016701656384e-06, | |
| "loss": 0.4299, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.34513274336283184, | |
| "grad_norm": 0.19772182404994965, | |
| "learning_rate": 9.993370449424153e-06, | |
| "loss": 0.4201, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3473451327433628, | |
| "grad_norm": 0.1802293360233307, | |
| "learning_rate": 9.992691086090249e-06, | |
| "loss": 0.4254, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.3495575221238938, | |
| "grad_norm": 0.1828855574131012, | |
| "learning_rate": 9.991978616159535e-06, | |
| "loss": 0.4279, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.35176991150442477, | |
| "grad_norm": 0.16842177510261536, | |
| "learning_rate": 9.991233044356414e-06, | |
| "loss": 0.4176, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.35398230088495575, | |
| "grad_norm": 0.1759248673915863, | |
| "learning_rate": 9.990454375624778e-06, | |
| "loss": 0.4266, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3561946902654867, | |
| "grad_norm": 0.18696172535419464, | |
| "learning_rate": 9.98964261512799e-06, | |
| "loss": 0.4195, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.3584070796460177, | |
| "grad_norm": 0.16631002724170685, | |
| "learning_rate": 9.988797768248844e-06, | |
| "loss": 0.4272, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3606194690265487, | |
| "grad_norm": 0.16732241213321686, | |
| "learning_rate": 9.987919840589529e-06, | |
| "loss": 0.4195, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.36283185840707965, | |
| "grad_norm": 0.18925903737545013, | |
| "learning_rate": 9.987008837971595e-06, | |
| "loss": 0.4202, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.36504424778761063, | |
| "grad_norm": 0.17854173481464386, | |
| "learning_rate": 9.986064766435915e-06, | |
| "loss": 0.4247, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3672566371681416, | |
| "grad_norm": 0.20543624460697174, | |
| "learning_rate": 9.985087632242634e-06, | |
| "loss": 0.4163, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3694690265486726, | |
| "grad_norm": 0.17206773161888123, | |
| "learning_rate": 9.984077441871144e-06, | |
| "loss": 0.4245, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.37168141592920356, | |
| "grad_norm": 0.19772297143936157, | |
| "learning_rate": 9.98303420202003e-06, | |
| "loss": 0.406, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.37389380530973454, | |
| "grad_norm": 0.21683964133262634, | |
| "learning_rate": 9.981957919607026e-06, | |
| "loss": 0.4302, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.37610619469026546, | |
| "grad_norm": 0.17397964000701904, | |
| "learning_rate": 9.980848601768976e-06, | |
| "loss": 0.4155, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.37831858407079644, | |
| "grad_norm": 0.19775860011577606, | |
| "learning_rate": 9.97970625586178e-06, | |
| "loss": 0.4223, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.3805309734513274, | |
| "grad_norm": 0.18166320025920868, | |
| "learning_rate": 9.978530889460351e-06, | |
| "loss": 0.4205, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3827433628318584, | |
| "grad_norm": 0.20108245313167572, | |
| "learning_rate": 9.977322510358552e-06, | |
| "loss": 0.4262, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.38495575221238937, | |
| "grad_norm": 0.17232555150985718, | |
| "learning_rate": 9.976081126569164e-06, | |
| "loss": 0.4257, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.38716814159292035, | |
| "grad_norm": 0.17979106307029724, | |
| "learning_rate": 9.97480674632382e-06, | |
| "loss": 0.4164, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3893805309734513, | |
| "grad_norm": 0.21929936110973358, | |
| "learning_rate": 9.973499378072947e-06, | |
| "loss": 0.4156, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3915929203539823, | |
| "grad_norm": 0.19628576934337616, | |
| "learning_rate": 9.972159030485722e-06, | |
| "loss": 0.4176, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.3938053097345133, | |
| "grad_norm": 0.20714552700519562, | |
| "learning_rate": 9.970785712450007e-06, | |
| "loss": 0.429, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.39601769911504425, | |
| "grad_norm": 0.2554994821548462, | |
| "learning_rate": 9.96937943307229e-06, | |
| "loss": 0.4183, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.39823008849557523, | |
| "grad_norm": 0.21966539323329926, | |
| "learning_rate": 9.967940201677628e-06, | |
| "loss": 0.4148, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4004424778761062, | |
| "grad_norm": 0.19780422747135162, | |
| "learning_rate": 9.966468027809582e-06, | |
| "loss": 0.4086, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.4026548672566372, | |
| "grad_norm": 0.19103385508060455, | |
| "learning_rate": 9.964962921230155e-06, | |
| "loss": 0.4123, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.40486725663716816, | |
| "grad_norm": 0.20896346867084503, | |
| "learning_rate": 9.963424891919728e-06, | |
| "loss": 0.4138, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.40707964601769914, | |
| "grad_norm": 0.1816549003124237, | |
| "learning_rate": 9.961853950076992e-06, | |
| "loss": 0.4175, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4092920353982301, | |
| "grad_norm": 0.1906682550907135, | |
| "learning_rate": 9.960250106118883e-06, | |
| "loss": 0.4146, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.41150442477876104, | |
| "grad_norm": 0.19309958815574646, | |
| "learning_rate": 9.958613370680507e-06, | |
| "loss": 0.422, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.413716814159292, | |
| "grad_norm": 0.19457145035266876, | |
| "learning_rate": 9.956943754615082e-06, | |
| "loss": 0.4138, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.415929203539823, | |
| "grad_norm": 0.1757512390613556, | |
| "learning_rate": 9.955241268993852e-06, | |
| "loss": 0.414, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.41814159292035397, | |
| "grad_norm": 0.16454994678497314, | |
| "learning_rate": 9.953505925106016e-06, | |
| "loss": 0.4199, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.42035398230088494, | |
| "grad_norm": 0.1722830832004547, | |
| "learning_rate": 9.951737734458665e-06, | |
| "loss": 0.4219, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4225663716814159, | |
| "grad_norm": 0.1671864539384842, | |
| "learning_rate": 9.949936708776692e-06, | |
| "loss": 0.4226, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.4247787610619469, | |
| "grad_norm": 0.16959279775619507, | |
| "learning_rate": 9.94810286000272e-06, | |
| "loss": 0.4188, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4269911504424779, | |
| "grad_norm": 0.18185065686702728, | |
| "learning_rate": 9.946236200297025e-06, | |
| "loss": 0.4225, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.42920353982300885, | |
| "grad_norm": 0.1568220853805542, | |
| "learning_rate": 9.944336742037451e-06, | |
| "loss": 0.4064, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.4314159292035398, | |
| "grad_norm": 0.19281834363937378, | |
| "learning_rate": 9.942404497819324e-06, | |
| "loss": 0.4135, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4336283185840708, | |
| "grad_norm": 0.16964174807071686, | |
| "learning_rate": 9.940439480455386e-06, | |
| "loss": 0.4196, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4358407079646018, | |
| "grad_norm": 0.17745113372802734, | |
| "learning_rate": 9.938441702975689e-06, | |
| "loss": 0.4167, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.43805309734513276, | |
| "grad_norm": 0.1645510345697403, | |
| "learning_rate": 9.936411178627521e-06, | |
| "loss": 0.4093, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.44026548672566373, | |
| "grad_norm": 0.16442488133907318, | |
| "learning_rate": 9.934347920875314e-06, | |
| "loss": 0.4248, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.4424778761061947, | |
| "grad_norm": 0.17704367637634277, | |
| "learning_rate": 9.932251943400554e-06, | |
| "loss": 0.4075, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4446902654867257, | |
| "grad_norm": 0.1849382370710373, | |
| "learning_rate": 9.930123260101697e-06, | |
| "loss": 0.4182, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.4469026548672566, | |
| "grad_norm": 0.1769019365310669, | |
| "learning_rate": 9.927961885094065e-06, | |
| "loss": 0.4044, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4491150442477876, | |
| "grad_norm": 0.17679619789123535, | |
| "learning_rate": 9.925767832709765e-06, | |
| "loss": 0.4161, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.45132743362831856, | |
| "grad_norm": 0.19563840329647064, | |
| "learning_rate": 9.923541117497586e-06, | |
| "loss": 0.4014, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.45353982300884954, | |
| "grad_norm": 0.20635181665420532, | |
| "learning_rate": 9.921281754222903e-06, | |
| "loss": 0.434, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.4557522123893805, | |
| "grad_norm": 0.17097140848636627, | |
| "learning_rate": 9.918989757867584e-06, | |
| "loss": 0.4147, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4579646017699115, | |
| "grad_norm": 0.17044438421726227, | |
| "learning_rate": 9.916665143629881e-06, | |
| "loss": 0.4138, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.46017699115044247, | |
| "grad_norm": 0.18953591585159302, | |
| "learning_rate": 9.914307926924344e-06, | |
| "loss": 0.4185, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.46238938053097345, | |
| "grad_norm": 0.17236648499965668, | |
| "learning_rate": 9.911918123381706e-06, | |
| "loss": 0.4079, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.4646017699115044, | |
| "grad_norm": 0.15649288892745972, | |
| "learning_rate": 9.909495748848783e-06, | |
| "loss": 0.4114, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4668141592920354, | |
| "grad_norm": 0.18650922179222107, | |
| "learning_rate": 9.907040819388372e-06, | |
| "loss": 0.4231, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.4690265486725664, | |
| "grad_norm": 0.16785378754138947, | |
| "learning_rate": 9.904553351279139e-06, | |
| "loss": 0.409, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.47123893805309736, | |
| "grad_norm": 0.17052291333675385, | |
| "learning_rate": 9.902033361015515e-06, | |
| "loss": 0.415, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.47345132743362833, | |
| "grad_norm": 0.1584978699684143, | |
| "learning_rate": 9.899480865307591e-06, | |
| "loss": 0.4014, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4756637168141593, | |
| "grad_norm": 0.16302478313446045, | |
| "learning_rate": 9.896895881080995e-06, | |
| "loss": 0.4103, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4778761061946903, | |
| "grad_norm": 0.16019950807094574, | |
| "learning_rate": 9.89427842547679e-06, | |
| "loss": 0.4149, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.48008849557522126, | |
| "grad_norm": 0.17266441881656647, | |
| "learning_rate": 9.891628515851358e-06, | |
| "loss": 0.4079, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.4823008849557522, | |
| "grad_norm": 0.17257748544216156, | |
| "learning_rate": 9.88894616977628e-06, | |
| "loss": 0.4055, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.48451327433628316, | |
| "grad_norm": 0.17442859709262848, | |
| "learning_rate": 9.88623140503823e-06, | |
| "loss": 0.4194, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.48672566371681414, | |
| "grad_norm": 0.1760149896144867, | |
| "learning_rate": 9.883484239638842e-06, | |
| "loss": 0.4103, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4889380530973451, | |
| "grad_norm": 0.16283486783504486, | |
| "learning_rate": 9.880704691794608e-06, | |
| "loss": 0.4081, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4911504424778761, | |
| "grad_norm": 0.17161579430103302, | |
| "learning_rate": 9.877892779936744e-06, | |
| "loss": 0.4105, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.49336283185840707, | |
| "grad_norm": 0.17453111708164215, | |
| "learning_rate": 9.87504852271107e-06, | |
| "loss": 0.4186, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.49557522123893805, | |
| "grad_norm": 0.17772270739078522, | |
| "learning_rate": 9.872171938977895e-06, | |
| "loss": 0.4196, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.497787610619469, | |
| "grad_norm": 0.1604829877614975, | |
| "learning_rate": 9.869263047811877e-06, | |
| "loss": 0.4138, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.1991002857685089, | |
| "learning_rate": 9.866321868501914e-06, | |
| "loss": 0.4094, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5022123893805309, | |
| "grad_norm": 0.1912929266691208, | |
| "learning_rate": 9.863348420550998e-06, | |
| "loss": 0.4073, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.504424778761062, | |
| "grad_norm": 0.1809907853603363, | |
| "learning_rate": 9.860342723676105e-06, | |
| "loss": 0.417, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5066371681415929, | |
| "grad_norm": 0.20652230083942413, | |
| "learning_rate": 9.857304797808043e-06, | |
| "loss": 0.4094, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.5088495575221239, | |
| "grad_norm": 0.16395345330238342, | |
| "learning_rate": 9.85423466309134e-06, | |
| "loss": 0.4024, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5110619469026548, | |
| "grad_norm": 0.20369473099708557, | |
| "learning_rate": 9.851132339884097e-06, | |
| "loss": 0.4139, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5132743362831859, | |
| "grad_norm": 0.169134721159935, | |
| "learning_rate": 9.847997848757855e-06, | |
| "loss": 0.4113, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5154867256637168, | |
| "grad_norm": 0.19118903577327728, | |
| "learning_rate": 9.844831210497468e-06, | |
| "loss": 0.4089, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5176991150442478, | |
| "grad_norm": 0.19465133547782898, | |
| "learning_rate": 9.84163244610095e-06, | |
| "loss": 0.4092, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5199115044247787, | |
| "grad_norm": 0.18870508670806885, | |
| "learning_rate": 9.83840157677935e-06, | |
| "loss": 0.4108, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5221238938053098, | |
| "grad_norm": 0.18022289872169495, | |
| "learning_rate": 9.835138623956603e-06, | |
| "loss": 0.4244, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5243362831858407, | |
| "grad_norm": 0.18440207839012146, | |
| "learning_rate": 9.831843609269387e-06, | |
| "loss": 0.4164, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5265486725663717, | |
| "grad_norm": 0.17794957756996155, | |
| "learning_rate": 9.828516554566988e-06, | |
| "loss": 0.4069, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5287610619469026, | |
| "grad_norm": 0.1812417358160019, | |
| "learning_rate": 9.825157481911146e-06, | |
| "loss": 0.4149, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.5309734513274337, | |
| "grad_norm": 0.18725480139255524, | |
| "learning_rate": 9.821766413575915e-06, | |
| "loss": 0.4303, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5331858407079646, | |
| "grad_norm": 0.17442503571510315, | |
| "learning_rate": 9.818343372047509e-06, | |
| "loss": 0.4036, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.5353982300884956, | |
| "grad_norm": 0.21173766255378723, | |
| "learning_rate": 9.814888380024161e-06, | |
| "loss": 0.4179, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5376106194690266, | |
| "grad_norm": 0.18231187760829926, | |
| "learning_rate": 9.811401460415966e-06, | |
| "loss": 0.4173, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.5398230088495575, | |
| "grad_norm": 0.1679583489894867, | |
| "learning_rate": 9.80788263634473e-06, | |
| "loss": 0.4115, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5420353982300885, | |
| "grad_norm": 0.16705110669136047, | |
| "learning_rate": 9.804331931143822e-06, | |
| "loss": 0.4025, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5442477876106194, | |
| "grad_norm": 0.17078952491283417, | |
| "learning_rate": 9.80074936835801e-06, | |
| "loss": 0.4208, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5464601769911505, | |
| "grad_norm": 0.16935208439826965, | |
| "learning_rate": 9.797134971743312e-06, | |
| "loss": 0.4036, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.5486725663716814, | |
| "grad_norm": 0.1650349199771881, | |
| "learning_rate": 9.793488765266838e-06, | |
| "loss": 0.4172, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5508849557522124, | |
| "grad_norm": 0.16523662209510803, | |
| "learning_rate": 9.789810773106632e-06, | |
| "loss": 0.4077, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.5530973451327433, | |
| "grad_norm": 0.20408877730369568, | |
| "learning_rate": 9.786101019651499e-06, | |
| "loss": 0.4043, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5553097345132744, | |
| "grad_norm": 0.1925540715456009, | |
| "learning_rate": 9.782359529500867e-06, | |
| "loss": 0.4166, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.5575221238938053, | |
| "grad_norm": 0.18965932726860046, | |
| "learning_rate": 9.7785863274646e-06, | |
| "loss": 0.4016, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5597345132743363, | |
| "grad_norm": 0.18967291712760925, | |
| "learning_rate": 9.774781438562846e-06, | |
| "loss": 0.407, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.5619469026548672, | |
| "grad_norm": 0.17891788482666016, | |
| "learning_rate": 9.770944888025874e-06, | |
| "loss": 0.4078, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5641592920353983, | |
| "grad_norm": 0.1903616040945053, | |
| "learning_rate": 9.767076701293898e-06, | |
| "loss": 0.4012, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5663716814159292, | |
| "grad_norm": 0.1644955724477768, | |
| "learning_rate": 9.763176904016914e-06, | |
| "loss": 0.4009, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5685840707964602, | |
| "grad_norm": 0.1887083798646927, | |
| "learning_rate": 9.759245522054523e-06, | |
| "loss": 0.3965, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5707964601769911, | |
| "grad_norm": 0.16408571600914001, | |
| "learning_rate": 9.755282581475769e-06, | |
| "loss": 0.4008, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5730088495575221, | |
| "grad_norm": 0.2137821614742279, | |
| "learning_rate": 9.751288108558961e-06, | |
| "loss": 0.4029, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.5752212389380531, | |
| "grad_norm": 0.17689137160778046, | |
| "learning_rate": 9.747262129791497e-06, | |
| "loss": 0.4045, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.577433628318584, | |
| "grad_norm": 0.1932871788740158, | |
| "learning_rate": 9.743204671869694e-06, | |
| "loss": 0.4082, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.5796460176991151, | |
| "grad_norm": 0.2326955646276474, | |
| "learning_rate": 9.739115761698607e-06, | |
| "loss": 0.4043, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.581858407079646, | |
| "grad_norm": 0.22756756842136383, | |
| "learning_rate": 9.73499542639185e-06, | |
| "loss": 0.4132, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.584070796460177, | |
| "grad_norm": 0.18425188958644867, | |
| "learning_rate": 9.730843693271413e-06, | |
| "loss": 0.4026, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5862831858407079, | |
| "grad_norm": 0.24644114077091217, | |
| "learning_rate": 9.726660589867495e-06, | |
| "loss": 0.3966, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.588495575221239, | |
| "grad_norm": 0.20994241535663605, | |
| "learning_rate": 9.722446143918307e-06, | |
| "loss": 0.4068, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5907079646017699, | |
| "grad_norm": 0.17565971612930298, | |
| "learning_rate": 9.718200383369891e-06, | |
| "loss": 0.402, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5929203539823009, | |
| "grad_norm": 0.1879590004682541, | |
| "learning_rate": 9.713923336375936e-06, | |
| "loss": 0.4109, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5951327433628318, | |
| "grad_norm": 0.16611909866333008, | |
| "learning_rate": 9.709615031297598e-06, | |
| "loss": 0.3957, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.5973451327433629, | |
| "grad_norm": 0.19158808887004852, | |
| "learning_rate": 9.705275496703302e-06, | |
| "loss": 0.4125, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5995575221238938, | |
| "grad_norm": 0.19456025958061218, | |
| "learning_rate": 9.70090476136855e-06, | |
| "loss": 0.4079, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.6017699115044248, | |
| "grad_norm": 0.17634005844593048, | |
| "learning_rate": 9.69650285427575e-06, | |
| "loss": 0.4025, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6039823008849557, | |
| "grad_norm": 0.20455583930015564, | |
| "learning_rate": 9.692069804613995e-06, | |
| "loss": 0.4101, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.6061946902654868, | |
| "grad_norm": 0.18841254711151123, | |
| "learning_rate": 9.687605641778899e-06, | |
| "loss": 0.4026, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6084070796460177, | |
| "grad_norm": 0.19969072937965393, | |
| "learning_rate": 9.683110395372379e-06, | |
| "loss": 0.4004, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.6106194690265486, | |
| "grad_norm": 0.2100944221019745, | |
| "learning_rate": 9.678584095202468e-06, | |
| "loss": 0.4103, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6128318584070797, | |
| "grad_norm": 0.16795648634433746, | |
| "learning_rate": 9.674026771283123e-06, | |
| "loss": 0.4046, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.6150442477876106, | |
| "grad_norm": 0.19852401316165924, | |
| "learning_rate": 9.669438453834014e-06, | |
| "loss": 0.4053, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6172566371681416, | |
| "grad_norm": 0.18384751677513123, | |
| "learning_rate": 9.664819173280328e-06, | |
| "loss": 0.4106, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.6194690265486725, | |
| "grad_norm": 0.16662909090518951, | |
| "learning_rate": 9.660168960252575e-06, | |
| "loss": 0.4158, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6216814159292036, | |
| "grad_norm": 0.22212032973766327, | |
| "learning_rate": 9.655487845586378e-06, | |
| "loss": 0.4156, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.6238938053097345, | |
| "grad_norm": 0.17248587310314178, | |
| "learning_rate": 9.650775860322263e-06, | |
| "loss": 0.3941, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6261061946902655, | |
| "grad_norm": 0.2107280045747757, | |
| "learning_rate": 9.646033035705462e-06, | |
| "loss": 0.4202, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.6283185840707964, | |
| "grad_norm": 0.16596029698848724, | |
| "learning_rate": 9.641259403185706e-06, | |
| "loss": 0.4061, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6305309734513275, | |
| "grad_norm": 0.18452270328998566, | |
| "learning_rate": 9.636454994417013e-06, | |
| "loss": 0.4023, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6327433628318584, | |
| "grad_norm": 0.17428122460842133, | |
| "learning_rate": 9.631619841257477e-06, | |
| "loss": 0.3974, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6349557522123894, | |
| "grad_norm": 0.18349966406822205, | |
| "learning_rate": 9.626753975769054e-06, | |
| "loss": 0.4051, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.6371681415929203, | |
| "grad_norm": 0.18694283068180084, | |
| "learning_rate": 9.621857430217366e-06, | |
| "loss": 0.4141, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6393805309734514, | |
| "grad_norm": 0.196182981133461, | |
| "learning_rate": 9.616930237071464e-06, | |
| "loss": 0.4037, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.6415929203539823, | |
| "grad_norm": 0.1949911266565323, | |
| "learning_rate": 9.611972429003626e-06, | |
| "loss": 0.411, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6438053097345132, | |
| "grad_norm": 0.21375317871570587, | |
| "learning_rate": 9.606984038889142e-06, | |
| "loss": 0.4025, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.6460176991150443, | |
| "grad_norm": 0.2112773060798645, | |
| "learning_rate": 9.601965099806085e-06, | |
| "loss": 0.3962, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6482300884955752, | |
| "grad_norm": 0.1982385218143463, | |
| "learning_rate": 9.596915645035107e-06, | |
| "loss": 0.4116, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.6504424778761062, | |
| "grad_norm": 0.18961909413337708, | |
| "learning_rate": 9.591835708059202e-06, | |
| "loss": 0.4016, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6526548672566371, | |
| "grad_norm": 0.205304354429245, | |
| "learning_rate": 9.5867253225635e-06, | |
| "loss": 0.4107, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.6548672566371682, | |
| "grad_norm": 0.1793058067560196, | |
| "learning_rate": 9.581584522435025e-06, | |
| "loss": 0.4091, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6570796460176991, | |
| "grad_norm": 0.2273833453655243, | |
| "learning_rate": 9.57641334176249e-06, | |
| "loss": 0.4079, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.6592920353982301, | |
| "grad_norm": 0.19448456168174744, | |
| "learning_rate": 9.571211814836059e-06, | |
| "loss": 0.4126, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.661504424778761, | |
| "grad_norm": 0.18413153290748596, | |
| "learning_rate": 9.56597997614712e-06, | |
| "loss": 0.4023, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.6637168141592921, | |
| "grad_norm": 0.20027752220630646, | |
| "learning_rate": 9.560717860388061e-06, | |
| "loss": 0.4111, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.665929203539823, | |
| "grad_norm": 0.1603672206401825, | |
| "learning_rate": 9.555425502452038e-06, | |
| "loss": 0.4036, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.668141592920354, | |
| "grad_norm": 0.18299739062786102, | |
| "learning_rate": 9.550102937432743e-06, | |
| "loss": 0.3942, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6703539823008849, | |
| "grad_norm": 0.1947639435529709, | |
| "learning_rate": 9.544750200624169e-06, | |
| "loss": 0.4054, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.672566371681416, | |
| "grad_norm": 0.16402120888233185, | |
| "learning_rate": 9.539367327520382e-06, | |
| "loss": 0.4048, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6747787610619469, | |
| "grad_norm": 0.17655643820762634, | |
| "learning_rate": 9.533954353815279e-06, | |
| "loss": 0.4068, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6769911504424779, | |
| "grad_norm": 0.1705683320760727, | |
| "learning_rate": 9.528511315402358e-06, | |
| "loss": 0.3953, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6792035398230089, | |
| "grad_norm": 0.16182245314121246, | |
| "learning_rate": 9.523038248374474e-06, | |
| "loss": 0.408, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6814159292035398, | |
| "grad_norm": 0.20871901512145996, | |
| "learning_rate": 9.517535189023602e-06, | |
| "loss": 0.4031, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6836283185840708, | |
| "grad_norm": 0.13933664560317993, | |
| "learning_rate": 9.512002173840597e-06, | |
| "loss": 0.405, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6858407079646017, | |
| "grad_norm": 0.20702913403511047, | |
| "learning_rate": 9.506439239514954e-06, | |
| "loss": 0.4012, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6880530973451328, | |
| "grad_norm": 0.1594098061323166, | |
| "learning_rate": 9.500846422934557e-06, | |
| "loss": 0.3926, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.6902654867256637, | |
| "grad_norm": 0.19512666761875153, | |
| "learning_rate": 9.495223761185443e-06, | |
| "loss": 0.4031, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6924778761061947, | |
| "grad_norm": 0.14987067878246307, | |
| "learning_rate": 9.489571291551553e-06, | |
| "loss": 0.4138, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.6946902654867256, | |
| "grad_norm": 0.18819770216941833, | |
| "learning_rate": 9.483889051514483e-06, | |
| "loss": 0.4022, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6969026548672567, | |
| "grad_norm": 0.16024132072925568, | |
| "learning_rate": 9.478177078753236e-06, | |
| "loss": 0.4044, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6991150442477876, | |
| "grad_norm": 0.18151608109474182, | |
| "learning_rate": 9.472435411143979e-06, | |
| "loss": 0.4081, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.7013274336283186, | |
| "grad_norm": 0.17617082595825195, | |
| "learning_rate": 9.466664086759777e-06, | |
| "loss": 0.4134, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.7035398230088495, | |
| "grad_norm": 0.17612871527671814, | |
| "learning_rate": 9.460863143870355e-06, | |
| "loss": 0.4089, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7057522123893806, | |
| "grad_norm": 0.17769289016723633, | |
| "learning_rate": 9.45503262094184e-06, | |
| "loss": 0.4047, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.7079646017699115, | |
| "grad_norm": 0.1573716551065445, | |
| "learning_rate": 9.4491725566365e-06, | |
| "loss": 0.4029, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7101769911504425, | |
| "grad_norm": 0.17037460207939148, | |
| "learning_rate": 9.443282989812494e-06, | |
| "loss": 0.4059, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.7123893805309734, | |
| "grad_norm": 0.1654326617717743, | |
| "learning_rate": 9.437363959523613e-06, | |
| "loss": 0.4099, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.7146017699115044, | |
| "grad_norm": 0.1739991158246994, | |
| "learning_rate": 9.431415505019024e-06, | |
| "loss": 0.3916, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.7168141592920354, | |
| "grad_norm": 0.156654492020607, | |
| "learning_rate": 9.425437665742998e-06, | |
| "loss": 0.3912, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7190265486725663, | |
| "grad_norm": 0.16226203739643097, | |
| "learning_rate": 9.419430481334663e-06, | |
| "loss": 0.3995, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.7212389380530974, | |
| "grad_norm": 0.14988061785697937, | |
| "learning_rate": 9.413393991627737e-06, | |
| "loss": 0.3959, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7234513274336283, | |
| "grad_norm": 0.1781560331583023, | |
| "learning_rate": 9.407328236650257e-06, | |
| "loss": 0.3957, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.7256637168141593, | |
| "grad_norm": 0.17818742990493774, | |
| "learning_rate": 9.401233256624318e-06, | |
| "loss": 0.3949, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7278761061946902, | |
| "grad_norm": 0.1621919423341751, | |
| "learning_rate": 9.395109091965808e-06, | |
| "loss": 0.3978, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.7300884955752213, | |
| "grad_norm": 0.1838427484035492, | |
| "learning_rate": 9.388955783284141e-06, | |
| "loss": 0.3998, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7323008849557522, | |
| "grad_norm": 0.17227084934711456, | |
| "learning_rate": 9.382773371381986e-06, | |
| "loss": 0.3989, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.7345132743362832, | |
| "grad_norm": 0.18801696598529816, | |
| "learning_rate": 9.376561897254987e-06, | |
| "loss": 0.3956, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7367256637168141, | |
| "grad_norm": 0.15474942326545715, | |
| "learning_rate": 9.370321402091514e-06, | |
| "loss": 0.4035, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.7389380530973452, | |
| "grad_norm": 0.16465848684310913, | |
| "learning_rate": 9.36405192727236e-06, | |
| "loss": 0.4016, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7411504424778761, | |
| "grad_norm": 0.19732873141765594, | |
| "learning_rate": 9.357753514370497e-06, | |
| "loss": 0.4038, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7433628318584071, | |
| "grad_norm": 0.16492311656475067, | |
| "learning_rate": 9.351426205150778e-06, | |
| "loss": 0.4015, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.745575221238938, | |
| "grad_norm": 0.18818548321723938, | |
| "learning_rate": 9.345070041569666e-06, | |
| "loss": 0.399, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.7477876106194691, | |
| "grad_norm": 0.17031817138195038, | |
| "learning_rate": 9.338685065774964e-06, | |
| "loss": 0.3937, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.24733419716358185, | |
| "learning_rate": 9.332271320105527e-06, | |
| "loss": 0.4081, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.7522123893805309, | |
| "grad_norm": 0.18530051410198212, | |
| "learning_rate": 9.32582884709098e-06, | |
| "loss": 0.4007, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.754424778761062, | |
| "grad_norm": 0.18777970969676971, | |
| "learning_rate": 9.319357689451444e-06, | |
| "loss": 0.3989, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.7566371681415929, | |
| "grad_norm": 0.18476152420043945, | |
| "learning_rate": 9.312857890097243e-06, | |
| "loss": 0.399, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7588495575221239, | |
| "grad_norm": 0.17613354325294495, | |
| "learning_rate": 9.30632949212863e-06, | |
| "loss": 0.4026, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.7610619469026548, | |
| "grad_norm": 0.17226579785346985, | |
| "learning_rate": 9.299772538835492e-06, | |
| "loss": 0.4051, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7632743362831859, | |
| "grad_norm": 0.18831267952919006, | |
| "learning_rate": 9.29318707369707e-06, | |
| "loss": 0.3927, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7654867256637168, | |
| "grad_norm": 0.15509851276874542, | |
| "learning_rate": 9.286573140381663e-06, | |
| "loss": 0.3956, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7676991150442478, | |
| "grad_norm": 0.18900151550769806, | |
| "learning_rate": 9.279930782746346e-06, | |
| "loss": 0.3975, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.7699115044247787, | |
| "grad_norm": 0.16996918618679047, | |
| "learning_rate": 9.273260044836675e-06, | |
| "loss": 0.3972, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7721238938053098, | |
| "grad_norm": 0.17968420684337616, | |
| "learning_rate": 9.266560970886397e-06, | |
| "loss": 0.4026, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.7743362831858407, | |
| "grad_norm": 0.1665457934141159, | |
| "learning_rate": 9.259833605317155e-06, | |
| "loss": 0.3946, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7765486725663717, | |
| "grad_norm": 0.18998298048973083, | |
| "learning_rate": 9.253077992738193e-06, | |
| "loss": 0.3964, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.7787610619469026, | |
| "grad_norm": 0.17083673179149628, | |
| "learning_rate": 9.246294177946062e-06, | |
| "loss": 0.4026, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7809734513274337, | |
| "grad_norm": 0.1637433022260666, | |
| "learning_rate": 9.239482205924322e-06, | |
| "loss": 0.3922, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.7831858407079646, | |
| "grad_norm": 0.1800343543291092, | |
| "learning_rate": 9.232642121843247e-06, | |
| "loss": 0.407, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7853982300884956, | |
| "grad_norm": 0.16393564641475677, | |
| "learning_rate": 9.225773971059518e-06, | |
| "loss": 0.4057, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7876106194690266, | |
| "grad_norm": 0.18692757189273834, | |
| "learning_rate": 9.218877799115929e-06, | |
| "loss": 0.3979, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7898230088495575, | |
| "grad_norm": 0.17437385022640228, | |
| "learning_rate": 9.21195365174108e-06, | |
| "loss": 0.4057, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.7920353982300885, | |
| "grad_norm": 0.1898239552974701, | |
| "learning_rate": 9.205001574849081e-06, | |
| "loss": 0.4039, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7942477876106194, | |
| "grad_norm": 0.1568395495414734, | |
| "learning_rate": 9.19802161453924e-06, | |
| "loss": 0.4039, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.7964601769911505, | |
| "grad_norm": 0.19088448584079742, | |
| "learning_rate": 9.191013817095762e-06, | |
| "loss": 0.3953, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7986725663716814, | |
| "grad_norm": 0.17273734509944916, | |
| "learning_rate": 9.183978228987436e-06, | |
| "loss": 0.4104, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.8008849557522124, | |
| "grad_norm": 0.17058604955673218, | |
| "learning_rate": 9.176914896867335e-06, | |
| "loss": 0.398, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.8030973451327433, | |
| "grad_norm": 0.18058565258979797, | |
| "learning_rate": 9.169823867572505e-06, | |
| "loss": 0.4076, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.8053097345132744, | |
| "grad_norm": 0.1585603505373001, | |
| "learning_rate": 9.162705188123647e-06, | |
| "loss": 0.3988, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.8075221238938053, | |
| "grad_norm": 0.1937795877456665, | |
| "learning_rate": 9.155558905724815e-06, | |
| "loss": 0.4081, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.8097345132743363, | |
| "grad_norm": 0.1497035026550293, | |
| "learning_rate": 9.148385067763094e-06, | |
| "loss": 0.3921, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8119469026548672, | |
| "grad_norm": 0.184015691280365, | |
| "learning_rate": 9.141183721808298e-06, | |
| "loss": 0.3959, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.8141592920353983, | |
| "grad_norm": 0.16560928523540497, | |
| "learning_rate": 9.133954915612635e-06, | |
| "loss": 0.4016, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.8163716814159292, | |
| "grad_norm": 0.17013601958751678, | |
| "learning_rate": 9.126698697110414e-06, | |
| "loss": 0.4052, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.8185840707964602, | |
| "grad_norm": 0.16712944209575653, | |
| "learning_rate": 9.119415114417709e-06, | |
| "loss": 0.3995, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8207964601769911, | |
| "grad_norm": 0.16210488975048065, | |
| "learning_rate": 9.112104215832047e-06, | |
| "loss": 0.4094, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.8230088495575221, | |
| "grad_norm": 0.1718660295009613, | |
| "learning_rate": 9.104766049832088e-06, | |
| "loss": 0.3852, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8252212389380531, | |
| "grad_norm": 0.1845821589231491, | |
| "learning_rate": 9.0974006650773e-06, | |
| "loss": 0.4091, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.827433628318584, | |
| "grad_norm": 0.15908625721931458, | |
| "learning_rate": 9.090008110407646e-06, | |
| "loss": 0.4134, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8296460176991151, | |
| "grad_norm": 0.159079447388649, | |
| "learning_rate": 9.082588434843244e-06, | |
| "loss": 0.4003, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.831858407079646, | |
| "grad_norm": 0.1775507628917694, | |
| "learning_rate": 9.075141687584056e-06, | |
| "loss": 0.4059, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.834070796460177, | |
| "grad_norm": 0.1623350828886032, | |
| "learning_rate": 9.067667918009559e-06, | |
| "loss": 0.3968, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.8362831858407079, | |
| "grad_norm": 0.17421099543571472, | |
| "learning_rate": 9.060167175678407e-06, | |
| "loss": 0.3928, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.838495575221239, | |
| "grad_norm": 0.16807816922664642, | |
| "learning_rate": 9.05263951032812e-06, | |
| "loss": 0.4035, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.8407079646017699, | |
| "grad_norm": 0.16292251646518707, | |
| "learning_rate": 9.045084971874738e-06, | |
| "loss": 0.4017, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8429203539823009, | |
| "grad_norm": 0.16168609261512756, | |
| "learning_rate": 9.037503610412502e-06, | |
| "loss": 0.3929, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.8451327433628318, | |
| "grad_norm": 0.1535169780254364, | |
| "learning_rate": 9.02989547621351e-06, | |
| "loss": 0.3944, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8473451327433629, | |
| "grad_norm": 0.16936345398426056, | |
| "learning_rate": 9.022260619727401e-06, | |
| "loss": 0.3989, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.8495575221238938, | |
| "grad_norm": 0.1527484804391861, | |
| "learning_rate": 9.014599091581e-06, | |
| "loss": 0.3952, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8517699115044248, | |
| "grad_norm": 0.17519870400428772, | |
| "learning_rate": 9.006910942577995e-06, | |
| "loss": 0.401, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.8539823008849557, | |
| "grad_norm": 0.1557687520980835, | |
| "learning_rate": 8.999196223698599e-06, | |
| "loss": 0.4076, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8561946902654868, | |
| "grad_norm": 0.16832038760185242, | |
| "learning_rate": 8.991454986099207e-06, | |
| "loss": 0.3916, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.8584070796460177, | |
| "grad_norm": 0.16987960040569305, | |
| "learning_rate": 8.983687281112066e-06, | |
| "loss": 0.3921, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8606194690265486, | |
| "grad_norm": 0.1476796269416809, | |
| "learning_rate": 8.975893160244921e-06, | |
| "loss": 0.3886, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.8628318584070797, | |
| "grad_norm": 0.16488701105117798, | |
| "learning_rate": 8.968072675180686e-06, | |
| "loss": 0.3944, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8650442477876106, | |
| "grad_norm": 0.1628437340259552, | |
| "learning_rate": 8.960225877777095e-06, | |
| "loss": 0.3867, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.8672566371681416, | |
| "grad_norm": 0.1530245542526245, | |
| "learning_rate": 8.952352820066359e-06, | |
| "loss": 0.4099, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8694690265486725, | |
| "grad_norm": 0.16439391672611237, | |
| "learning_rate": 8.944453554254823e-06, | |
| "loss": 0.3991, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.8716814159292036, | |
| "grad_norm": 0.17328006029129028, | |
| "learning_rate": 8.936528132722616e-06, | |
| "loss": 0.3924, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8738938053097345, | |
| "grad_norm": 0.16984131932258606, | |
| "learning_rate": 8.928576608023305e-06, | |
| "loss": 0.4048, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8761061946902655, | |
| "grad_norm": 0.14963462948799133, | |
| "learning_rate": 8.920599032883553e-06, | |
| "loss": 0.3918, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8783185840707964, | |
| "grad_norm": 0.18854303658008575, | |
| "learning_rate": 8.912595460202758e-06, | |
| "loss": 0.3981, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.8805309734513275, | |
| "grad_norm": 0.19335384666919708, | |
| "learning_rate": 8.90456594305271e-06, | |
| "loss": 0.3984, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8827433628318584, | |
| "grad_norm": 0.17484664916992188, | |
| "learning_rate": 8.896510534677238e-06, | |
| "loss": 0.392, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.8849557522123894, | |
| "grad_norm": 0.19245024025440216, | |
| "learning_rate": 8.888429288491857e-06, | |
| "loss": 0.3949, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8871681415929203, | |
| "grad_norm": 0.15718188881874084, | |
| "learning_rate": 8.880322258083408e-06, | |
| "loss": 0.3973, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.8893805309734514, | |
| "grad_norm": 0.17654351890087128, | |
| "learning_rate": 8.872189497209712e-06, | |
| "loss": 0.3996, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8915929203539823, | |
| "grad_norm": 0.16744081676006317, | |
| "learning_rate": 8.864031059799208e-06, | |
| "loss": 0.3879, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.8938053097345132, | |
| "grad_norm": 0.16980023682117462, | |
| "learning_rate": 8.855846999950595e-06, | |
| "loss": 0.4029, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8960176991150443, | |
| "grad_norm": 0.17948906123638153, | |
| "learning_rate": 8.847637371932478e-06, | |
| "loss": 0.3938, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8982300884955752, | |
| "grad_norm": 0.1663169413805008, | |
| "learning_rate": 8.839402230183e-06, | |
| "loss": 0.404, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.9004424778761062, | |
| "grad_norm": 0.1661122441291809, | |
| "learning_rate": 8.831141629309492e-06, | |
| "loss": 0.3923, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.9026548672566371, | |
| "grad_norm": 0.17008569836616516, | |
| "learning_rate": 8.822855624088099e-06, | |
| "loss": 0.3911, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.9048672566371682, | |
| "grad_norm": 0.16437990963459015, | |
| "learning_rate": 8.814544269463422e-06, | |
| "loss": 0.3952, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.9070796460176991, | |
| "grad_norm": 0.18122254312038422, | |
| "learning_rate": 8.806207620548165e-06, | |
| "loss": 0.3912, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9092920353982301, | |
| "grad_norm": 0.1973719596862793, | |
| "learning_rate": 8.797845732622742e-06, | |
| "loss": 0.3978, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.911504424778761, | |
| "grad_norm": 0.1716318130493164, | |
| "learning_rate": 8.789458661134943e-06, | |
| "loss": 0.3966, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.9137168141592921, | |
| "grad_norm": 0.1809050291776657, | |
| "learning_rate": 8.781046461699538e-06, | |
| "loss": 0.3933, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.915929203539823, | |
| "grad_norm": 0.1711595505475998, | |
| "learning_rate": 8.772609190097932e-06, | |
| "loss": 0.3912, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.918141592920354, | |
| "grad_norm": 0.16632473468780518, | |
| "learning_rate": 8.764146902277773e-06, | |
| "loss": 0.3851, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.9203539823008849, | |
| "grad_norm": 0.15847158432006836, | |
| "learning_rate": 8.755659654352599e-06, | |
| "loss": 0.3958, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.922566371681416, | |
| "grad_norm": 0.16290074586868286, | |
| "learning_rate": 8.747147502601458e-06, | |
| "loss": 0.3965, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.9247787610619469, | |
| "grad_norm": 0.15800520777702332, | |
| "learning_rate": 8.738610503468534e-06, | |
| "loss": 0.3952, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9269911504424779, | |
| "grad_norm": 0.13968175649642944, | |
| "learning_rate": 8.730048713562771e-06, | |
| "loss": 0.3935, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.9292035398230089, | |
| "grad_norm": 0.15484796464443207, | |
| "learning_rate": 8.72146218965751e-06, | |
| "loss": 0.396, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9314159292035398, | |
| "grad_norm": 0.14914363622665405, | |
| "learning_rate": 8.712850988690094e-06, | |
| "loss": 0.4041, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.9336283185840708, | |
| "grad_norm": 0.16192984580993652, | |
| "learning_rate": 8.704215167761506e-06, | |
| "loss": 0.3897, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9358407079646017, | |
| "grad_norm": 0.17002323269844055, | |
| "learning_rate": 8.695554784135982e-06, | |
| "loss": 0.3902, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.9380530973451328, | |
| "grad_norm": 0.15230736136436462, | |
| "learning_rate": 8.686869895240631e-06, | |
| "loss": 0.398, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9402654867256637, | |
| "grad_norm": 0.2072356790304184, | |
| "learning_rate": 8.678160558665063e-06, | |
| "loss": 0.4063, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.9424778761061947, | |
| "grad_norm": 0.16368243098258972, | |
| "learning_rate": 8.669426832160997e-06, | |
| "loss": 0.3988, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9446902654867256, | |
| "grad_norm": 0.16881467401981354, | |
| "learning_rate": 8.66066877364188e-06, | |
| "loss": 0.3986, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.9469026548672567, | |
| "grad_norm": 0.17835210263729095, | |
| "learning_rate": 8.651886441182509e-06, | |
| "loss": 0.3969, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9491150442477876, | |
| "grad_norm": 0.15917813777923584, | |
| "learning_rate": 8.64307989301864e-06, | |
| "loss": 0.3793, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.9513274336283186, | |
| "grad_norm": 0.1801290661096573, | |
| "learning_rate": 8.634249187546601e-06, | |
| "loss": 0.3959, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9535398230088495, | |
| "grad_norm": 0.16084758937358856, | |
| "learning_rate": 8.625394383322914e-06, | |
| "loss": 0.4085, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.9557522123893806, | |
| "grad_norm": 0.17122162878513336, | |
| "learning_rate": 8.616515539063894e-06, | |
| "loss": 0.3996, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9579646017699115, | |
| "grad_norm": 0.16651393473148346, | |
| "learning_rate": 8.60761271364527e-06, | |
| "loss": 0.4013, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.9601769911504425, | |
| "grad_norm": 0.15684399008750916, | |
| "learning_rate": 8.598685966101783e-06, | |
| "loss": 0.3875, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9623893805309734, | |
| "grad_norm": 0.15404202044010162, | |
| "learning_rate": 8.589735355626814e-06, | |
| "loss": 0.3929, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9646017699115044, | |
| "grad_norm": 0.1662074774503708, | |
| "learning_rate": 8.580760941571968e-06, | |
| "loss": 0.3957, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9668141592920354, | |
| "grad_norm": 0.15725825726985931, | |
| "learning_rate": 8.571762783446696e-06, | |
| "loss": 0.3914, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.9690265486725663, | |
| "grad_norm": 0.1751137673854828, | |
| "learning_rate": 8.562740940917901e-06, | |
| "loss": 0.4001, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9712389380530974, | |
| "grad_norm": 0.1850391924381256, | |
| "learning_rate": 8.55369547380953e-06, | |
| "loss": 0.39, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.9734513274336283, | |
| "grad_norm": 0.16561464965343475, | |
| "learning_rate": 8.544626442102188e-06, | |
| "loss": 0.3903, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9756637168141593, | |
| "grad_norm": 0.16364803910255432, | |
| "learning_rate": 8.535533905932739e-06, | |
| "loss": 0.3862, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.9778761061946902, | |
| "grad_norm": 0.1534828543663025, | |
| "learning_rate": 8.526417925593901e-06, | |
| "loss": 0.3986, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9800884955752213, | |
| "grad_norm": 0.17045795917510986, | |
| "learning_rate": 8.517278561533857e-06, | |
| "loss": 0.393, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.9823008849557522, | |
| "grad_norm": 0.16603973507881165, | |
| "learning_rate": 8.50811587435584e-06, | |
| "loss": 0.3915, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9845132743362832, | |
| "grad_norm": 0.17057141661643982, | |
| "learning_rate": 8.498929924817745e-06, | |
| "loss": 0.3839, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.9867256637168141, | |
| "grad_norm": 0.16480037569999695, | |
| "learning_rate": 8.489720773831717e-06, | |
| "loss": 0.3931, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9889380530973452, | |
| "grad_norm": 0.15113525092601776, | |
| "learning_rate": 8.480488482463753e-06, | |
| "loss": 0.4149, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.9911504424778761, | |
| "grad_norm": 0.17928218841552734, | |
| "learning_rate": 8.471233111933291e-06, | |
| "loss": 0.3984, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9933628318584071, | |
| "grad_norm": 0.1759202629327774, | |
| "learning_rate": 8.461954723612807e-06, | |
| "loss": 0.3989, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.995575221238938, | |
| "grad_norm": 0.15595725178718567, | |
| "learning_rate": 8.45265337902741e-06, | |
| "loss": 0.3943, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9977876106194691, | |
| "grad_norm": 0.17772260308265686, | |
| "learning_rate": 8.443329139854434e-06, | |
| "loss": 0.3925, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.17362035810947418, | |
| "learning_rate": 8.433982067923021e-06, | |
| "loss": 0.3919, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.002212389380531, | |
| "grad_norm": 0.1918303370475769, | |
| "learning_rate": 8.424612225213726e-06, | |
| "loss": 0.3747, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.0044247787610618, | |
| "grad_norm": 0.1567537784576416, | |
| "learning_rate": 8.41521967385809e-06, | |
| "loss": 0.3705, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.0066371681415929, | |
| "grad_norm": 0.20494352281093597, | |
| "learning_rate": 8.405804476138239e-06, | |
| "loss": 0.3764, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.008849557522124, | |
| "grad_norm": 0.1830655187368393, | |
| "learning_rate": 8.396366694486466e-06, | |
| "loss": 0.3826, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.011061946902655, | |
| "grad_norm": 0.17687253654003143, | |
| "learning_rate": 8.386906391484819e-06, | |
| "loss": 0.3913, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.0132743362831858, | |
| "grad_norm": 0.21283870935440063, | |
| "learning_rate": 8.377423629864686e-06, | |
| "loss": 0.3771, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.0154867256637168, | |
| "grad_norm": 0.1914159059524536, | |
| "learning_rate": 8.367918472506375e-06, | |
| "loss": 0.3738, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.0176991150442478, | |
| "grad_norm": 0.17665503919124603, | |
| "learning_rate": 8.358390982438706e-06, | |
| "loss": 0.3784, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.0199115044247788, | |
| "grad_norm": 0.1868385225534439, | |
| "learning_rate": 8.348841222838579e-06, | |
| "loss": 0.3876, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.0221238938053097, | |
| "grad_norm": 0.17088955640792847, | |
| "learning_rate": 8.339269257030576e-06, | |
| "loss": 0.3828, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.0243362831858407, | |
| "grad_norm": 0.174570694565773, | |
| "learning_rate": 8.329675148486518e-06, | |
| "loss": 0.3789, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.0265486725663717, | |
| "grad_norm": 0.16205543279647827, | |
| "learning_rate": 8.32005896082506e-06, | |
| "loss": 0.3784, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.0287610619469028, | |
| "grad_norm": 0.18252155184745789, | |
| "learning_rate": 8.310420757811258e-06, | |
| "loss": 0.3797, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.0309734513274336, | |
| "grad_norm": 0.2198188453912735, | |
| "learning_rate": 8.30076060335616e-06, | |
| "loss": 0.3797, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.0331858407079646, | |
| "grad_norm": 0.15900884568691254, | |
| "learning_rate": 8.291078561516368e-06, | |
| "loss": 0.3737, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.0353982300884956, | |
| "grad_norm": 0.21332651376724243, | |
| "learning_rate": 8.281374696493628e-06, | |
| "loss": 0.3728, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.0376106194690267, | |
| "grad_norm": 0.18220320343971252, | |
| "learning_rate": 8.271649072634381e-06, | |
| "loss": 0.3669, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.0398230088495575, | |
| "grad_norm": 0.16027416288852692, | |
| "learning_rate": 8.261901754429367e-06, | |
| "loss": 0.3791, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0420353982300885, | |
| "grad_norm": 0.19321398437023163, | |
| "learning_rate": 8.25213280651317e-06, | |
| "loss": 0.3803, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.0442477876106195, | |
| "grad_norm": 0.14267107844352722, | |
| "learning_rate": 8.24234229366381e-06, | |
| "loss": 0.3769, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0464601769911503, | |
| "grad_norm": 0.16832539439201355, | |
| "learning_rate": 8.232530280802296e-06, | |
| "loss": 0.3792, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.0486725663716814, | |
| "grad_norm": 0.15776118636131287, | |
| "learning_rate": 8.222696832992208e-06, | |
| "loss": 0.3741, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0508849557522124, | |
| "grad_norm": 0.16222868859767914, | |
| "learning_rate": 8.212842015439263e-06, | |
| "loss": 0.3758, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.0530973451327434, | |
| "grad_norm": 0.188007190823555, | |
| "learning_rate": 8.202965893490877e-06, | |
| "loss": 0.3845, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0553097345132743, | |
| "grad_norm": 0.17090556025505066, | |
| "learning_rate": 8.193068532635737e-06, | |
| "loss": 0.3944, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.0575221238938053, | |
| "grad_norm": 0.17572391033172607, | |
| "learning_rate": 8.18314999850337e-06, | |
| "loss": 0.3736, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0597345132743363, | |
| "grad_norm": 0.1866428256034851, | |
| "learning_rate": 8.173210356863696e-06, | |
| "loss": 0.3803, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.0619469026548674, | |
| "grad_norm": 0.15502867102622986, | |
| "learning_rate": 8.163249673626603e-06, | |
| "loss": 0.375, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0641592920353982, | |
| "grad_norm": 0.1912487894296646, | |
| "learning_rate": 8.153268014841507e-06, | |
| "loss": 0.3697, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.0663716814159292, | |
| "grad_norm": 0.179610013961792, | |
| "learning_rate": 8.143265446696909e-06, | |
| "loss": 0.3882, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0685840707964602, | |
| "grad_norm": 0.1993560791015625, | |
| "learning_rate": 8.133242035519968e-06, | |
| "loss": 0.3825, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.0707964601769913, | |
| "grad_norm": 0.153557687997818, | |
| "learning_rate": 8.123197847776043e-06, | |
| "loss": 0.3835, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.073008849557522, | |
| "grad_norm": 0.1919250637292862, | |
| "learning_rate": 8.113132950068272e-06, | |
| "loss": 0.3769, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.075221238938053, | |
| "grad_norm": 0.19700811803340912, | |
| "learning_rate": 8.103047409137114e-06, | |
| "loss": 0.384, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.0774336283185841, | |
| "grad_norm": 0.16748955845832825, | |
| "learning_rate": 8.09294129185992e-06, | |
| "loss": 0.3746, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.079646017699115, | |
| "grad_norm": 0.19604726135730743, | |
| "learning_rate": 8.082814665250476e-06, | |
| "loss": 0.38, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.081858407079646, | |
| "grad_norm": 0.16203738749027252, | |
| "learning_rate": 8.072667596458573e-06, | |
| "loss": 0.3702, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.084070796460177, | |
| "grad_norm": 0.1730918437242508, | |
| "learning_rate": 8.062500152769547e-06, | |
| "loss": 0.3838, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.086283185840708, | |
| "grad_norm": 0.1520654857158661, | |
| "learning_rate": 8.052312401603848e-06, | |
| "loss": 0.3749, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.0884955752212389, | |
| "grad_norm": 0.1743493527173996, | |
| "learning_rate": 8.042104410516576e-06, | |
| "loss": 0.3961, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0907079646017699, | |
| "grad_norm": 0.1602836549282074, | |
| "learning_rate": 8.03187624719705e-06, | |
| "loss": 0.39, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.092920353982301, | |
| "grad_norm": 0.15004900097846985, | |
| "learning_rate": 8.021627979468348e-06, | |
| "loss": 0.3783, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.095132743362832, | |
| "grad_norm": 0.17503239214420319, | |
| "learning_rate": 8.01135967528686e-06, | |
| "loss": 0.3767, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.0973451327433628, | |
| "grad_norm": 0.15622718632221222, | |
| "learning_rate": 8.001071402741843e-06, | |
| "loss": 0.3758, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0995575221238938, | |
| "grad_norm": 0.16066192090511322, | |
| "learning_rate": 7.990763230054953e-06, | |
| "loss": 0.3781, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.1017699115044248, | |
| "grad_norm": 0.1617273986339569, | |
| "learning_rate": 7.980435225579819e-06, | |
| "loss": 0.3826, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.1039823008849559, | |
| "grad_norm": 0.16359879076480865, | |
| "learning_rate": 7.970087457801563e-06, | |
| "loss": 0.3831, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.1061946902654867, | |
| "grad_norm": 0.1616351306438446, | |
| "learning_rate": 7.959719995336364e-06, | |
| "loss": 0.3809, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.1084070796460177, | |
| "grad_norm": 0.14531832933425903, | |
| "learning_rate": 7.949332906930995e-06, | |
| "loss": 0.3766, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.1106194690265487, | |
| "grad_norm": 0.17033183574676514, | |
| "learning_rate": 7.938926261462366e-06, | |
| "loss": 0.3779, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.1128318584070795, | |
| "grad_norm": 0.16740630567073822, | |
| "learning_rate": 7.928500127937075e-06, | |
| "loss": 0.3788, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.1150442477876106, | |
| "grad_norm": 0.15742437541484833, | |
| "learning_rate": 7.918054575490943e-06, | |
| "loss": 0.3778, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.1172566371681416, | |
| "grad_norm": 0.17109255492687225, | |
| "learning_rate": 7.90758967338856e-06, | |
| "loss": 0.3772, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.1194690265486726, | |
| "grad_norm": 0.15209320187568665, | |
| "learning_rate": 7.897105491022819e-06, | |
| "loss": 0.3812, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.1216814159292035, | |
| "grad_norm": 0.16745400428771973, | |
| "learning_rate": 7.886602097914466e-06, | |
| "loss": 0.3747, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.1238938053097345, | |
| "grad_norm": 0.1631808876991272, | |
| "learning_rate": 7.876079563711631e-06, | |
| "loss": 0.3791, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.1261061946902655, | |
| "grad_norm": 0.15150891244411469, | |
| "learning_rate": 7.86553795818937e-06, | |
| "loss": 0.382, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.1283185840707965, | |
| "grad_norm": 0.17548954486846924, | |
| "learning_rate": 7.854977351249199e-06, | |
| "loss": 0.3692, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.1305309734513274, | |
| "grad_norm": 0.15667495131492615, | |
| "learning_rate": 7.844397812918637e-06, | |
| "loss": 0.3848, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.1327433628318584, | |
| "grad_norm": 0.1640884280204773, | |
| "learning_rate": 7.833799413350732e-06, | |
| "loss": 0.3812, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.1349557522123894, | |
| "grad_norm": 0.1685960441827774, | |
| "learning_rate": 7.823182222823603e-06, | |
| "loss": 0.3851, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.1371681415929205, | |
| "grad_norm": 0.16258198022842407, | |
| "learning_rate": 7.812546311739976e-06, | |
| "loss": 0.376, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1393805309734513, | |
| "grad_norm": 0.18257202208042145, | |
| "learning_rate": 7.801891750626706e-06, | |
| "loss": 0.3805, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.1415929203539823, | |
| "grad_norm": 0.18744589388370514, | |
| "learning_rate": 7.791218610134324e-06, | |
| "loss": 0.38, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1438053097345133, | |
| "grad_norm": 0.17237654328346252, | |
| "learning_rate": 7.780526961036556e-06, | |
| "loss": 0.377, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.1460176991150441, | |
| "grad_norm": 0.18574164807796478, | |
| "learning_rate": 7.769816874229862e-06, | |
| "loss": 0.3794, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1482300884955752, | |
| "grad_norm": 0.1659688502550125, | |
| "learning_rate": 7.759088420732958e-06, | |
| "loss": 0.3836, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.1504424778761062, | |
| "grad_norm": 0.18307393789291382, | |
| "learning_rate": 7.748341671686355e-06, | |
| "loss": 0.3849, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1526548672566372, | |
| "grad_norm": 0.17125365138053894, | |
| "learning_rate": 7.737576698351878e-06, | |
| "loss": 0.3718, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.154867256637168, | |
| "grad_norm": 0.1512458175420761, | |
| "learning_rate": 7.726793572112203e-06, | |
| "loss": 0.363, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.157079646017699, | |
| "grad_norm": 0.16603504121303558, | |
| "learning_rate": 7.715992364470371e-06, | |
| "loss": 0.3753, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.1592920353982301, | |
| "grad_norm": 0.15968966484069824, | |
| "learning_rate": 7.705173147049326e-06, | |
| "loss": 0.3811, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1615044247787611, | |
| "grad_norm": 0.1410856544971466, | |
| "learning_rate": 7.694335991591431e-06, | |
| "loss": 0.3915, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.163716814159292, | |
| "grad_norm": 0.14456801116466522, | |
| "learning_rate": 7.683480969958005e-06, | |
| "loss": 0.3813, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.165929203539823, | |
| "grad_norm": 0.15530700981616974, | |
| "learning_rate": 7.672608154128824e-06, | |
| "loss": 0.3871, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.168141592920354, | |
| "grad_norm": 0.14297249913215637, | |
| "learning_rate": 7.66171761620167e-06, | |
| "loss": 0.3807, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.170353982300885, | |
| "grad_norm": 0.13215981423854828, | |
| "learning_rate": 7.650809428391834e-06, | |
| "loss": 0.379, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.1725663716814159, | |
| "grad_norm": 0.15017202496528625, | |
| "learning_rate": 7.63988366303165e-06, | |
| "loss": 0.3731, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.174778761061947, | |
| "grad_norm": 0.15315097570419312, | |
| "learning_rate": 7.628940392569995e-06, | |
| "loss": 0.376, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.176991150442478, | |
| "grad_norm": 0.15044665336608887, | |
| "learning_rate": 7.61797968957184e-06, | |
| "loss": 0.3816, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1792035398230087, | |
| "grad_norm": 0.15934717655181885, | |
| "learning_rate": 7.607001626717738e-06, | |
| "loss": 0.3741, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.1814159292035398, | |
| "grad_norm": 0.14937704801559448, | |
| "learning_rate": 7.596006276803365e-06, | |
| "loss": 0.3849, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.1836283185840708, | |
| "grad_norm": 0.14274275302886963, | |
| "learning_rate": 7.58499371273902e-06, | |
| "loss": 0.3697, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.1858407079646018, | |
| "grad_norm": 0.14679598808288574, | |
| "learning_rate": 7.5739640075491546e-06, | |
| "loss": 0.3769, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1880530973451326, | |
| "grad_norm": 0.1407579779624939, | |
| "learning_rate": 7.562917234371879e-06, | |
| "loss": 0.3774, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.1902654867256637, | |
| "grad_norm": 0.14973343908786774, | |
| "learning_rate": 7.551853466458486e-06, | |
| "loss": 0.3842, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.1924778761061947, | |
| "grad_norm": 0.17621995508670807, | |
| "learning_rate": 7.540772777172958e-06, | |
| "loss": 0.3706, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.1946902654867257, | |
| "grad_norm": 0.17068910598754883, | |
| "learning_rate": 7.529675239991483e-06, | |
| "loss": 0.3837, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1969026548672566, | |
| "grad_norm": 0.1598779559135437, | |
| "learning_rate": 7.518560928501969e-06, | |
| "loss": 0.365, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.1991150442477876, | |
| "grad_norm": 0.1503140926361084, | |
| "learning_rate": 7.507429916403553e-06, | |
| "loss": 0.3714, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.2013274336283186, | |
| "grad_norm": 0.16745896637439728, | |
| "learning_rate": 7.496282277506115e-06, | |
| "loss": 0.3772, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.2035398230088497, | |
| "grad_norm": 0.1463339626789093, | |
| "learning_rate": 7.48511808572979e-06, | |
| "loss": 0.3675, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.2057522123893805, | |
| "grad_norm": 0.16788248717784882, | |
| "learning_rate": 7.473937415104471e-06, | |
| "loss": 0.3768, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.2079646017699115, | |
| "grad_norm": 0.1352882832288742, | |
| "learning_rate": 7.462740339769323e-06, | |
| "loss": 0.3761, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.2101769911504425, | |
| "grad_norm": 0.15344858169555664, | |
| "learning_rate": 7.451526933972294e-06, | |
| "loss": 0.377, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.2123893805309733, | |
| "grad_norm": 0.16328397393226624, | |
| "learning_rate": 7.440297272069615e-06, | |
| "loss": 0.3877, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.2146017699115044, | |
| "grad_norm": 0.16266027092933655, | |
| "learning_rate": 7.429051428525318e-06, | |
| "loss": 0.378, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.2168141592920354, | |
| "grad_norm": 0.15220795571804047, | |
| "learning_rate": 7.417789477910728e-06, | |
| "loss": 0.3779, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.2190265486725664, | |
| "grad_norm": 0.15698575973510742, | |
| "learning_rate": 7.406511494903982e-06, | |
| "loss": 0.3748, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.2212389380530975, | |
| "grad_norm": 0.17737598717212677, | |
| "learning_rate": 7.395217554289524e-06, | |
| "loss": 0.3764, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.2234513274336283, | |
| "grad_norm": 0.1444133073091507, | |
| "learning_rate": 7.383907730957618e-06, | |
| "loss": 0.3822, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.2256637168141593, | |
| "grad_norm": 0.16357633471488953, | |
| "learning_rate": 7.372582099903841e-06, | |
| "loss": 0.3647, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.2278761061946903, | |
| "grad_norm": 0.14997902512550354, | |
| "learning_rate": 7.361240736228594e-06, | |
| "loss": 0.3735, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.2300884955752212, | |
| "grad_norm": 0.12847259640693665, | |
| "learning_rate": 7.349883715136601e-06, | |
| "loss": 0.368, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.2323008849557522, | |
| "grad_norm": 0.15516282618045807, | |
| "learning_rate": 7.3385111119364105e-06, | |
| "loss": 0.3819, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.2345132743362832, | |
| "grad_norm": 0.16119252145290375, | |
| "learning_rate": 7.327123002039897e-06, | |
| "loss": 0.3759, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.2367256637168142, | |
| "grad_norm": 0.15966367721557617, | |
| "learning_rate": 7.315719460961757e-06, | |
| "loss": 0.3806, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.238938053097345, | |
| "grad_norm": 0.16308777034282684, | |
| "learning_rate": 7.304300564319013e-06, | |
| "loss": 0.3883, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.241150442477876, | |
| "grad_norm": 0.14293457567691803, | |
| "learning_rate": 7.292866387830515e-06, | |
| "loss": 0.3748, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.2433628318584071, | |
| "grad_norm": 0.17340628802776337, | |
| "learning_rate": 7.281417007316427e-06, | |
| "loss": 0.3832, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.245575221238938, | |
| "grad_norm": 0.15052464604377747, | |
| "learning_rate": 7.269952498697734e-06, | |
| "loss": 0.3714, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.247787610619469, | |
| "grad_norm": 0.18350891768932343, | |
| "learning_rate": 7.258472937995736e-06, | |
| "loss": 0.3789, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.16274268925189972, | |
| "learning_rate": 7.246978401331543e-06, | |
| "loss": 0.3766, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.252212389380531, | |
| "grad_norm": 0.1731940507888794, | |
| "learning_rate": 7.235468964925571e-06, | |
| "loss": 0.3794, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.254424778761062, | |
| "grad_norm": 0.145197331905365, | |
| "learning_rate": 7.223944705097035e-06, | |
| "loss": 0.3771, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.2566371681415929, | |
| "grad_norm": 0.14675655961036682, | |
| "learning_rate": 7.212405698263446e-06, | |
| "loss": 0.3639, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.258849557522124, | |
| "grad_norm": 0.14033639430999756, | |
| "learning_rate": 7.200852020940102e-06, | |
| "loss": 0.3658, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.261061946902655, | |
| "grad_norm": 0.15826043486595154, | |
| "learning_rate": 7.189283749739584e-06, | |
| "loss": 0.3772, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2632743362831858, | |
| "grad_norm": 0.14263145625591278, | |
| "learning_rate": 7.177700961371239e-06, | |
| "loss": 0.3824, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.2654867256637168, | |
| "grad_norm": 0.16336825489997864, | |
| "learning_rate": 7.1661037326406825e-06, | |
| "loss": 0.3687, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2676991150442478, | |
| "grad_norm": 0.148985356092453, | |
| "learning_rate": 7.154492140449283e-06, | |
| "loss": 0.3687, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.2699115044247788, | |
| "grad_norm": 0.14789626002311707, | |
| "learning_rate": 7.142866261793651e-06, | |
| "loss": 0.3791, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2721238938053097, | |
| "grad_norm": 0.13839952647686005, | |
| "learning_rate": 7.1312261737651354e-06, | |
| "loss": 0.3785, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.2743362831858407, | |
| "grad_norm": 0.15205146372318268, | |
| "learning_rate": 7.119571953549305e-06, | |
| "loss": 0.3842, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2765486725663717, | |
| "grad_norm": 0.14949627220630646, | |
| "learning_rate": 7.107903678425436e-06, | |
| "loss": 0.3729, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.2787610619469025, | |
| "grad_norm": 0.14976395666599274, | |
| "learning_rate": 7.09622142576601e-06, | |
| "loss": 0.3747, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2809734513274336, | |
| "grad_norm": 0.15081347525119781, | |
| "learning_rate": 7.084525273036187e-06, | |
| "loss": 0.3937, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.2831858407079646, | |
| "grad_norm": 0.16921454668045044, | |
| "learning_rate": 7.072815297793303e-06, | |
| "loss": 0.3827, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2853982300884956, | |
| "grad_norm": 0.16270343959331512, | |
| "learning_rate": 7.061091577686349e-06, | |
| "loss": 0.3797, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.2876106194690267, | |
| "grad_norm": 0.15536241233348846, | |
| "learning_rate": 7.0493541904554605e-06, | |
| "loss": 0.3727, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2898230088495575, | |
| "grad_norm": 0.1545332819223404, | |
| "learning_rate": 7.037603213931397e-06, | |
| "loss": 0.3826, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.2920353982300885, | |
| "grad_norm": 0.1420442909002304, | |
| "learning_rate": 7.025838726035032e-06, | |
| "loss": 0.3786, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2942477876106195, | |
| "grad_norm": 0.15322910249233246, | |
| "learning_rate": 7.014060804776831e-06, | |
| "loss": 0.3814, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.2964601769911503, | |
| "grad_norm": 0.1383352428674698, | |
| "learning_rate": 7.002269528256334e-06, | |
| "loss": 0.3838, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.2986725663716814, | |
| "grad_norm": 0.15499426424503326, | |
| "learning_rate": 6.990464974661644e-06, | |
| "loss": 0.3803, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.3008849557522124, | |
| "grad_norm": 0.13908816874027252, | |
| "learning_rate": 6.978647222268904e-06, | |
| "loss": 0.3804, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.3030973451327434, | |
| "grad_norm": 0.15479522943496704, | |
| "learning_rate": 6.9668163494417775e-06, | |
| "loss": 0.3747, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.3053097345132743, | |
| "grad_norm": 0.139928936958313, | |
| "learning_rate": 6.954972434630928e-06, | |
| "loss": 0.3735, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.3075221238938053, | |
| "grad_norm": 0.1611955463886261, | |
| "learning_rate": 6.943115556373503e-06, | |
| "loss": 0.3719, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.3097345132743363, | |
| "grad_norm": 0.16825099289417267, | |
| "learning_rate": 6.93124579329261e-06, | |
| "loss": 0.3768, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.3119469026548671, | |
| "grad_norm": 0.15945830941200256, | |
| "learning_rate": 6.919363224096797e-06, | |
| "loss": 0.3772, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.3141592920353982, | |
| "grad_norm": 0.17050500214099884, | |
| "learning_rate": 6.907467927579528e-06, | |
| "loss": 0.3768, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.3163716814159292, | |
| "grad_norm": 0.15621913969516754, | |
| "learning_rate": 6.8955599826186606e-06, | |
| "loss": 0.3966, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.3185840707964602, | |
| "grad_norm": 0.15994904935359955, | |
| "learning_rate": 6.883639468175926e-06, | |
| "loss": 0.3798, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.3207964601769913, | |
| "grad_norm": 0.14877241849899292, | |
| "learning_rate": 6.871706463296407e-06, | |
| "loss": 0.3649, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.323008849557522, | |
| "grad_norm": 0.1415458768606186, | |
| "learning_rate": 6.859761047108007e-06, | |
| "loss": 0.3721, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.325221238938053, | |
| "grad_norm": 0.15687789022922516, | |
| "learning_rate": 6.847803298820927e-06, | |
| "loss": 0.3731, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.3274336283185841, | |
| "grad_norm": 0.14746886491775513, | |
| "learning_rate": 6.835833297727148e-06, | |
| "loss": 0.3756, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.329646017699115, | |
| "grad_norm": 0.16469167172908783, | |
| "learning_rate": 6.823851123199894e-06, | |
| "loss": 0.3807, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.331858407079646, | |
| "grad_norm": 0.14631612598896027, | |
| "learning_rate": 6.811856854693114e-06, | |
| "loss": 0.3753, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.334070796460177, | |
| "grad_norm": 0.13799001276493073, | |
| "learning_rate": 6.799850571740955e-06, | |
| "loss": 0.3635, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.336283185840708, | |
| "grad_norm": 0.14614099264144897, | |
| "learning_rate": 6.787832353957225e-06, | |
| "loss": 0.3819, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.338495575221239, | |
| "grad_norm": 0.14514301717281342, | |
| "learning_rate": 6.775802281034876e-06, | |
| "loss": 0.3881, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.3407079646017699, | |
| "grad_norm": 0.149121955037117, | |
| "learning_rate": 6.763760432745475e-06, | |
| "loss": 0.3737, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.342920353982301, | |
| "grad_norm": 0.14145825803279877, | |
| "learning_rate": 6.751706888938665e-06, | |
| "loss": 0.3786, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.3451327433628317, | |
| "grad_norm": 0.15232518315315247, | |
| "learning_rate": 6.739641729541645e-06, | |
| "loss": 0.3735, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.3473451327433628, | |
| "grad_norm": 0.15066224336624146, | |
| "learning_rate": 6.72756503455864e-06, | |
| "loss": 0.3678, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.3495575221238938, | |
| "grad_norm": 0.14223569631576538, | |
| "learning_rate": 6.715476884070362e-06, | |
| "loss": 0.3834, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3517699115044248, | |
| "grad_norm": 0.13839849829673767, | |
| "learning_rate": 6.703377358233489e-06, | |
| "loss": 0.3744, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.3539823008849559, | |
| "grad_norm": 0.14612992107868195, | |
| "learning_rate": 6.691266537280128e-06, | |
| "loss": 0.3845, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3561946902654867, | |
| "grad_norm": 0.14740517735481262, | |
| "learning_rate": 6.679144501517283e-06, | |
| "loss": 0.3871, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.3584070796460177, | |
| "grad_norm": 0.13791871070861816, | |
| "learning_rate": 6.667011331326324e-06, | |
| "loss": 0.3644, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.3606194690265487, | |
| "grad_norm": 0.15612854063510895, | |
| "learning_rate": 6.654867107162454e-06, | |
| "loss": 0.3771, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.3628318584070795, | |
| "grad_norm": 0.14010357856750488, | |
| "learning_rate": 6.6427119095541745e-06, | |
| "loss": 0.3658, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3650442477876106, | |
| "grad_norm": 0.14571578800678253, | |
| "learning_rate": 6.6305458191027525e-06, | |
| "loss": 0.3761, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.3672566371681416, | |
| "grad_norm": 0.1459769606590271, | |
| "learning_rate": 6.618368916481686e-06, | |
| "loss": 0.3761, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3694690265486726, | |
| "grad_norm": 0.154235377907753, | |
| "learning_rate": 6.606181282436166e-06, | |
| "loss": 0.3721, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.3716814159292037, | |
| "grad_norm": 0.1487996131181717, | |
| "learning_rate": 6.593982997782549e-06, | |
| "loss": 0.3735, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3738938053097345, | |
| "grad_norm": 0.1267055720090866, | |
| "learning_rate": 6.58177414340781e-06, | |
| "loss": 0.3798, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.3761061946902655, | |
| "grad_norm": 0.18154090642929077, | |
| "learning_rate": 6.569554800269014e-06, | |
| "loss": 0.3767, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3783185840707963, | |
| "grad_norm": 0.14018799364566803, | |
| "learning_rate": 6.557325049392781e-06, | |
| "loss": 0.3635, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.3805309734513274, | |
| "grad_norm": 0.18179184198379517, | |
| "learning_rate": 6.545084971874738e-06, | |
| "loss": 0.3787, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3827433628318584, | |
| "grad_norm": 0.13475830852985382, | |
| "learning_rate": 6.532834648878993e-06, | |
| "loss": 0.3721, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.3849557522123894, | |
| "grad_norm": 0.16122294962406158, | |
| "learning_rate": 6.520574161637591e-06, | |
| "loss": 0.3839, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3871681415929205, | |
| "grad_norm": 0.13646018505096436, | |
| "learning_rate": 6.5083035914499736e-06, | |
| "loss": 0.3766, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.3893805309734513, | |
| "grad_norm": 0.17333966493606567, | |
| "learning_rate": 6.496023019682447e-06, | |
| "loss": 0.3701, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3915929203539823, | |
| "grad_norm": 0.14083491265773773, | |
| "learning_rate": 6.483732527767633e-06, | |
| "loss": 0.3803, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.3938053097345133, | |
| "grad_norm": 0.16099579632282257, | |
| "learning_rate": 6.4714321972039395e-06, | |
| "loss": 0.3743, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3960176991150441, | |
| "grad_norm": 0.1497550904750824, | |
| "learning_rate": 6.459122109555011e-06, | |
| "loss": 0.3649, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.3982300884955752, | |
| "grad_norm": 0.16616369783878326, | |
| "learning_rate": 6.4468023464491906e-06, | |
| "loss": 0.3748, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.4004424778761062, | |
| "grad_norm": 0.15206293761730194, | |
| "learning_rate": 6.434472989578983e-06, | |
| "loss": 0.3732, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.4026548672566372, | |
| "grad_norm": 0.15567412972450256, | |
| "learning_rate": 6.422134120700506e-06, | |
| "loss": 0.3752, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.4048672566371683, | |
| "grad_norm": 0.1447472721338272, | |
| "learning_rate": 6.409785821632952e-06, | |
| "loss": 0.3775, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.407079646017699, | |
| "grad_norm": 0.1411687433719635, | |
| "learning_rate": 6.397428174258048e-06, | |
| "loss": 0.3827, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.4092920353982301, | |
| "grad_norm": 0.17297664284706116, | |
| "learning_rate": 6.385061260519507e-06, | |
| "loss": 0.3826, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.411504424778761, | |
| "grad_norm": 0.1365060657262802, | |
| "learning_rate": 6.3726851624224875e-06, | |
| "loss": 0.3818, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.413716814159292, | |
| "grad_norm": 0.15566328167915344, | |
| "learning_rate": 6.360299962033051e-06, | |
| "loss": 0.3718, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.415929203539823, | |
| "grad_norm": 0.13370470702648163, | |
| "learning_rate": 6.347905741477613e-06, | |
| "loss": 0.3708, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.418141592920354, | |
| "grad_norm": 0.15631158649921417, | |
| "learning_rate": 6.335502582942409e-06, | |
| "loss": 0.381, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.420353982300885, | |
| "grad_norm": 0.15640929341316223, | |
| "learning_rate": 6.323090568672935e-06, | |
| "loss": 0.3673, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.4225663716814159, | |
| "grad_norm": 0.13334697484970093, | |
| "learning_rate": 6.310669780973414e-06, | |
| "loss": 0.3827, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.424778761061947, | |
| "grad_norm": 0.1540810763835907, | |
| "learning_rate": 6.298240302206242e-06, | |
| "loss": 0.3776, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.426991150442478, | |
| "grad_norm": 0.1316145956516266, | |
| "learning_rate": 6.285802214791448e-06, | |
| "loss": 0.3844, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.4292035398230087, | |
| "grad_norm": 0.14876599609851837, | |
| "learning_rate": 6.273355601206143e-06, | |
| "loss": 0.3691, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.4314159292035398, | |
| "grad_norm": 0.1518520712852478, | |
| "learning_rate": 6.260900543983982e-06, | |
| "loss": 0.378, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.4336283185840708, | |
| "grad_norm": 0.1387074887752533, | |
| "learning_rate": 6.2484371257146e-06, | |
| "loss": 0.3791, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.4358407079646018, | |
| "grad_norm": 0.16339018940925598, | |
| "learning_rate": 6.235965429043082e-06, | |
| "loss": 0.3655, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.4380530973451329, | |
| "grad_norm": 0.15789134800434113, | |
| "learning_rate": 6.223485536669401e-06, | |
| "loss": 0.3773, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.4402654867256637, | |
| "grad_norm": 0.145791694521904, | |
| "learning_rate": 6.210997531347879e-06, | |
| "loss": 0.3766, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.4424778761061947, | |
| "grad_norm": 0.16261202096939087, | |
| "learning_rate": 6.1985014958866386e-06, | |
| "loss": 0.3708, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4446902654867257, | |
| "grad_norm": 0.14985325932502747, | |
| "learning_rate": 6.185997513147043e-06, | |
| "loss": 0.3868, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.4469026548672566, | |
| "grad_norm": 0.1505439281463623, | |
| "learning_rate": 6.173485666043159e-06, | |
| "loss": 0.3803, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.4491150442477876, | |
| "grad_norm": 0.13515354692935944, | |
| "learning_rate": 6.160966037541201e-06, | |
| "loss": 0.3704, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.4513274336283186, | |
| "grad_norm": 0.14188633859157562, | |
| "learning_rate": 6.148438710658979e-06, | |
| "loss": 0.3761, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.4535398230088497, | |
| "grad_norm": 0.15125387907028198, | |
| "learning_rate": 6.135903768465356e-06, | |
| "loss": 0.3812, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.4557522123893805, | |
| "grad_norm": 0.13983094692230225, | |
| "learning_rate": 6.123361294079691e-06, | |
| "loss": 0.3757, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4579646017699115, | |
| "grad_norm": 0.13178189098834991, | |
| "learning_rate": 6.110811370671286e-06, | |
| "loss": 0.3757, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.4601769911504425, | |
| "grad_norm": 0.13429959118366241, | |
| "learning_rate": 6.098254081458839e-06, | |
| "loss": 0.378, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4623893805309733, | |
| "grad_norm": 0.14196458458900452, | |
| "learning_rate": 6.085689509709893e-06, | |
| "loss": 0.3817, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.4646017699115044, | |
| "grad_norm": 0.13938362896442413, | |
| "learning_rate": 6.07311773874028e-06, | |
| "loss": 0.366, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.4668141592920354, | |
| "grad_norm": 0.13635776937007904, | |
| "learning_rate": 6.060538851913568e-06, | |
| "loss": 0.3853, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.4690265486725664, | |
| "grad_norm": 0.15285412967205048, | |
| "learning_rate": 6.047952932640513e-06, | |
| "loss": 0.3793, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.4712389380530975, | |
| "grad_norm": 0.14070792496204376, | |
| "learning_rate": 6.035360064378504e-06, | |
| "loss": 0.3772, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.4734513274336283, | |
| "grad_norm": 0.14458097517490387, | |
| "learning_rate": 6.022760330631006e-06, | |
| "loss": 0.3866, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.4756637168141593, | |
| "grad_norm": 0.15231792628765106, | |
| "learning_rate": 6.01015381494701e-06, | |
| "loss": 0.3714, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.4778761061946903, | |
| "grad_norm": 0.16281698644161224, | |
| "learning_rate": 5.997540600920479e-06, | |
| "loss": 0.3855, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4800884955752212, | |
| "grad_norm": 0.1445644348859787, | |
| "learning_rate": 5.984920772189793e-06, | |
| "loss": 0.3861, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.4823008849557522, | |
| "grad_norm": 0.15494373440742493, | |
| "learning_rate": 5.972294412437194e-06, | |
| "loss": 0.3752, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4845132743362832, | |
| "grad_norm": 0.1489405632019043, | |
| "learning_rate": 5.959661605388229e-06, | |
| "loss": 0.3674, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.4867256637168142, | |
| "grad_norm": 0.13303735852241516, | |
| "learning_rate": 5.947022434811202e-06, | |
| "loss": 0.3668, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.488938053097345, | |
| "grad_norm": 0.16998596489429474, | |
| "learning_rate": 5.934376984516608e-06, | |
| "loss": 0.3856, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.491150442477876, | |
| "grad_norm": 0.1437830924987793, | |
| "learning_rate": 5.921725338356585e-06, | |
| "loss": 0.3686, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4933628318584071, | |
| "grad_norm": 0.15047383308410645, | |
| "learning_rate": 5.909067580224359e-06, | |
| "loss": 0.3714, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.495575221238938, | |
| "grad_norm": 0.15299569070339203, | |
| "learning_rate": 5.896403794053679e-06, | |
| "loss": 0.368, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.497787610619469, | |
| "grad_norm": 0.14748515188694, | |
| "learning_rate": 5.883734063818272e-06, | |
| "loss": 0.3693, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.1613292545080185, | |
| "learning_rate": 5.871058473531273e-06, | |
| "loss": 0.3722, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.502212389380531, | |
| "grad_norm": 0.14528776705265045, | |
| "learning_rate": 5.858377107244683e-06, | |
| "loss": 0.372, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.504424778761062, | |
| "grad_norm": 0.15699909627437592, | |
| "learning_rate": 5.845690049048799e-06, | |
| "loss": 0.3737, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.5066371681415929, | |
| "grad_norm": 0.15869326889514923, | |
| "learning_rate": 5.83299738307166e-06, | |
| "loss": 0.3801, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.508849557522124, | |
| "grad_norm": 0.1448500007390976, | |
| "learning_rate": 5.820299193478496e-06, | |
| "loss": 0.3838, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.5110619469026547, | |
| "grad_norm": 0.16063882410526276, | |
| "learning_rate": 5.807595564471157e-06, | |
| "loss": 0.3716, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.5132743362831858, | |
| "grad_norm": 0.14111746847629547, | |
| "learning_rate": 5.794886580287565e-06, | |
| "loss": 0.3769, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.5154867256637168, | |
| "grad_norm": 0.16199442744255066, | |
| "learning_rate": 5.782172325201155e-06, | |
| "loss": 0.374, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.5176991150442478, | |
| "grad_norm": 0.14267931878566742, | |
| "learning_rate": 5.76945288352031e-06, | |
| "loss": 0.369, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.5199115044247788, | |
| "grad_norm": 0.14546652138233185, | |
| "learning_rate": 5.756728339587806e-06, | |
| "loss": 0.3732, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.5221238938053099, | |
| "grad_norm": 0.17248912155628204, | |
| "learning_rate": 5.743998777780252e-06, | |
| "loss": 0.3788, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.5243362831858407, | |
| "grad_norm": 0.16043299436569214, | |
| "learning_rate": 5.731264282507531e-06, | |
| "loss": 0.3834, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.5265486725663717, | |
| "grad_norm": 0.1646987795829773, | |
| "learning_rate": 5.71852493821224e-06, | |
| "loss": 0.3778, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.5287610619469025, | |
| "grad_norm": 0.1607351303100586, | |
| "learning_rate": 5.7057808293691305e-06, | |
| "loss": 0.3764, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.5309734513274336, | |
| "grad_norm": 0.16537661850452423, | |
| "learning_rate": 5.6930320404845475e-06, | |
| "loss": 0.3757, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.5331858407079646, | |
| "grad_norm": 0.16083936393260956, | |
| "learning_rate": 5.680278656095868e-06, | |
| "loss": 0.3768, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.5353982300884956, | |
| "grad_norm": 0.1676769256591797, | |
| "learning_rate": 5.6675207607709426e-06, | |
| "loss": 0.3799, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.5376106194690267, | |
| "grad_norm": 0.1764930635690689, | |
| "learning_rate": 5.654758439107533e-06, | |
| "loss": 0.3736, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.5398230088495575, | |
| "grad_norm": 0.14429877698421478, | |
| "learning_rate": 5.641991775732756e-06, | |
| "loss": 0.3689, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.5420353982300885, | |
| "grad_norm": 0.17751586437225342, | |
| "learning_rate": 5.629220855302513e-06, | |
| "loss": 0.3798, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.5442477876106193, | |
| "grad_norm": 0.15950946509838104, | |
| "learning_rate": 5.6164457625009386e-06, | |
| "loss": 0.3776, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.5464601769911503, | |
| "grad_norm": 0.1793259084224701, | |
| "learning_rate": 5.60366658203983e-06, | |
| "loss": 0.3851, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.5486725663716814, | |
| "grad_norm": 0.1719927191734314, | |
| "learning_rate": 5.590883398658095e-06, | |
| "loss": 0.3795, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5508849557522124, | |
| "grad_norm": 0.15601767599582672, | |
| "learning_rate": 5.5780962971211795e-06, | |
| "loss": 0.3781, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.5530973451327434, | |
| "grad_norm": 0.15840476751327515, | |
| "learning_rate": 5.565305362220515e-06, | |
| "loss": 0.3767, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.5553097345132745, | |
| "grad_norm": 0.16765564680099487, | |
| "learning_rate": 5.552510678772949e-06, | |
| "loss": 0.3825, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.5575221238938053, | |
| "grad_norm": 0.15824854373931885, | |
| "learning_rate": 5.539712331620186e-06, | |
| "loss": 0.3646, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.5597345132743363, | |
| "grad_norm": 0.18552853167057037, | |
| "learning_rate": 5.526910405628227e-06, | |
| "loss": 0.3633, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.5619469026548671, | |
| "grad_norm": 0.14328765869140625, | |
| "learning_rate": 5.514104985686802e-06, | |
| "loss": 0.3725, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.5641592920353982, | |
| "grad_norm": 0.18817439675331116, | |
| "learning_rate": 5.501296156708812e-06, | |
| "loss": 0.3777, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.5663716814159292, | |
| "grad_norm": 0.13829733431339264, | |
| "learning_rate": 5.488484003629759e-06, | |
| "loss": 0.3648, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.5685840707964602, | |
| "grad_norm": 0.1701672077178955, | |
| "learning_rate": 5.475668611407191e-06, | |
| "loss": 0.3723, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.5707964601769913, | |
| "grad_norm": 0.14328286051750183, | |
| "learning_rate": 5.462850065020133e-06, | |
| "loss": 0.3709, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.573008849557522, | |
| "grad_norm": 0.1651555746793747, | |
| "learning_rate": 5.4500284494685275e-06, | |
| "loss": 0.3755, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.575221238938053, | |
| "grad_norm": 0.16392961144447327, | |
| "learning_rate": 5.437203849772664e-06, | |
| "loss": 0.3764, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.577433628318584, | |
| "grad_norm": 0.14715544879436493, | |
| "learning_rate": 5.424376350972625e-06, | |
| "loss": 0.3737, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.579646017699115, | |
| "grad_norm": 0.16308023035526276, | |
| "learning_rate": 5.411546038127715e-06, | |
| "loss": 0.3772, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.581858407079646, | |
| "grad_norm": 0.18041308224201202, | |
| "learning_rate": 5.398712996315898e-06, | |
| "loss": 0.3732, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.584070796460177, | |
| "grad_norm": 0.1642719805240631, | |
| "learning_rate": 5.385877310633233e-06, | |
| "loss": 0.3868, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.586283185840708, | |
| "grad_norm": 0.17112895846366882, | |
| "learning_rate": 5.373039066193312e-06, | |
| "loss": 0.3741, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.588495575221239, | |
| "grad_norm": 0.1910698413848877, | |
| "learning_rate": 5.360198348126696e-06, | |
| "loss": 0.3843, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.5907079646017699, | |
| "grad_norm": 0.16182135045528412, | |
| "learning_rate": 5.347355241580344e-06, | |
| "loss": 0.3707, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.592920353982301, | |
| "grad_norm": 0.15010400116443634, | |
| "learning_rate": 5.334509831717058e-06, | |
| "loss": 0.3743, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5951327433628317, | |
| "grad_norm": 0.1402837634086609, | |
| "learning_rate": 5.321662203714909e-06, | |
| "loss": 0.3779, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.5973451327433628, | |
| "grad_norm": 0.14762374758720398, | |
| "learning_rate": 5.308812442766679e-06, | |
| "loss": 0.3666, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5995575221238938, | |
| "grad_norm": 0.1661965399980545, | |
| "learning_rate": 5.295960634079292e-06, | |
| "loss": 0.3711, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.6017699115044248, | |
| "grad_norm": 0.1451561152935028, | |
| "learning_rate": 5.283106862873253e-06, | |
| "loss": 0.3804, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.6039823008849559, | |
| "grad_norm": 0.16984255611896515, | |
| "learning_rate": 5.270251214382078e-06, | |
| "loss": 0.3844, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.606194690265487, | |
| "grad_norm": 0.14532634615898132, | |
| "learning_rate": 5.257393773851733e-06, | |
| "loss": 0.3637, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.6084070796460177, | |
| "grad_norm": 0.149654358625412, | |
| "learning_rate": 5.244534626540067e-06, | |
| "loss": 0.3722, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.6106194690265485, | |
| "grad_norm": 0.14878515899181366, | |
| "learning_rate": 5.231673857716244e-06, | |
| "loss": 0.3775, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.6128318584070795, | |
| "grad_norm": 0.1508331298828125, | |
| "learning_rate": 5.218811552660184e-06, | |
| "loss": 0.372, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.6150442477876106, | |
| "grad_norm": 0.14183960855007172, | |
| "learning_rate": 5.205947796661991e-06, | |
| "loss": 0.3736, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.6172566371681416, | |
| "grad_norm": 0.14773423969745636, | |
| "learning_rate": 5.193082675021393e-06, | |
| "loss": 0.3699, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.6194690265486726, | |
| "grad_norm": 0.1459917426109314, | |
| "learning_rate": 5.1802162730471704e-06, | |
| "loss": 0.3742, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.6216814159292037, | |
| "grad_norm": 0.14351701736450195, | |
| "learning_rate": 5.167348676056596e-06, | |
| "loss": 0.375, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.6238938053097345, | |
| "grad_norm": 0.14323677122592926, | |
| "learning_rate": 5.154479969374865e-06, | |
| "loss": 0.381, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.6261061946902655, | |
| "grad_norm": 0.14793291687965393, | |
| "learning_rate": 5.1416102383345315e-06, | |
| "loss": 0.3756, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.6283185840707963, | |
| "grad_norm": 0.14177453517913818, | |
| "learning_rate": 5.1287395682749444e-06, | |
| "loss": 0.3766, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.6305309734513274, | |
| "grad_norm": 0.1743677854537964, | |
| "learning_rate": 5.115868044541674e-06, | |
| "loss": 0.3707, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.6327433628318584, | |
| "grad_norm": 0.14689166843891144, | |
| "learning_rate": 5.102995752485956e-06, | |
| "loss": 0.3672, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.6349557522123894, | |
| "grad_norm": 0.14640650153160095, | |
| "learning_rate": 5.090122777464121e-06, | |
| "loss": 0.3774, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.6371681415929205, | |
| "grad_norm": 0.1713770627975464, | |
| "learning_rate": 5.077249204837026e-06, | |
| "loss": 0.3658, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.6393805309734515, | |
| "grad_norm": 0.1488184779882431, | |
| "learning_rate": 5.064375119969491e-06, | |
| "loss": 0.3702, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.6415929203539823, | |
| "grad_norm": 0.14465579390525818, | |
| "learning_rate": 5.051500608229734e-06, | |
| "loss": 0.3652, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.643805309734513, | |
| "grad_norm": 0.1728200763463974, | |
| "learning_rate": 5.038625754988802e-06, | |
| "loss": 0.3762, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.6460176991150441, | |
| "grad_norm": 0.14261655509471893, | |
| "learning_rate": 5.025750645620004e-06, | |
| "loss": 0.3812, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.6482300884955752, | |
| "grad_norm": 0.14125920832157135, | |
| "learning_rate": 5.012875365498357e-06, | |
| "loss": 0.3794, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.6504424778761062, | |
| "grad_norm": 0.15553857386112213, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3711, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.6526548672566372, | |
| "grad_norm": 0.14529219269752502, | |
| "learning_rate": 4.9871246345016445e-06, | |
| "loss": 0.3743, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.6548672566371683, | |
| "grad_norm": 0.13652414083480835, | |
| "learning_rate": 4.974249354379997e-06, | |
| "loss": 0.3773, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.657079646017699, | |
| "grad_norm": 0.1763504445552826, | |
| "learning_rate": 4.961374245011201e-06, | |
| "loss": 0.3791, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.6592920353982301, | |
| "grad_norm": 0.14034205675125122, | |
| "learning_rate": 4.948499391770268e-06, | |
| "loss": 0.3848, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.661504424778761, | |
| "grad_norm": 0.14464423060417175, | |
| "learning_rate": 4.93562488003051e-06, | |
| "loss": 0.3716, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.663716814159292, | |
| "grad_norm": 0.17310793697834015, | |
| "learning_rate": 4.922750795162974e-06, | |
| "loss": 0.3722, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.665929203539823, | |
| "grad_norm": 0.14925870299339294, | |
| "learning_rate": 4.909877222535879e-06, | |
| "loss": 0.3732, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.668141592920354, | |
| "grad_norm": 0.14656174182891846, | |
| "learning_rate": 4.897004247514044e-06, | |
| "loss": 0.3779, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.670353982300885, | |
| "grad_norm": 0.17445078492164612, | |
| "learning_rate": 4.884131955458327e-06, | |
| "loss": 0.375, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.672566371681416, | |
| "grad_norm": 0.17037415504455566, | |
| "learning_rate": 4.871260431725058e-06, | |
| "loss": 0.3852, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.674778761061947, | |
| "grad_norm": 0.15467847883701324, | |
| "learning_rate": 4.858389761665469e-06, | |
| "loss": 0.3716, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.676991150442478, | |
| "grad_norm": 0.1854114532470703, | |
| "learning_rate": 4.845520030625136e-06, | |
| "loss": 0.379, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6792035398230087, | |
| "grad_norm": 0.14063851535320282, | |
| "learning_rate": 4.832651323943406e-06, | |
| "loss": 0.3794, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.6814159292035398, | |
| "grad_norm": 0.15527218580245972, | |
| "learning_rate": 4.819783726952831e-06, | |
| "loss": 0.3817, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6836283185840708, | |
| "grad_norm": 0.14957115054130554, | |
| "learning_rate": 4.806917324978608e-06, | |
| "loss": 0.3724, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.6858407079646018, | |
| "grad_norm": 0.13799116015434265, | |
| "learning_rate": 4.79405220333801e-06, | |
| "loss": 0.3667, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.6880530973451329, | |
| "grad_norm": 0.13975948095321655, | |
| "learning_rate": 4.781188447339817e-06, | |
| "loss": 0.3765, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.6902654867256637, | |
| "grad_norm": 0.1460770219564438, | |
| "learning_rate": 4.768326142283757e-06, | |
| "loss": 0.3709, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6924778761061947, | |
| "grad_norm": 0.1463456004858017, | |
| "learning_rate": 4.755465373459934e-06, | |
| "loss": 0.383, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.6946902654867255, | |
| "grad_norm": 0.14482566714286804, | |
| "learning_rate": 4.742606226148268e-06, | |
| "loss": 0.3766, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.6969026548672566, | |
| "grad_norm": 0.15000230073928833, | |
| "learning_rate": 4.7297487856179224e-06, | |
| "loss": 0.3657, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.6991150442477876, | |
| "grad_norm": 0.15064841508865356, | |
| "learning_rate": 4.716893137126748e-06, | |
| "loss": 0.369, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.7013274336283186, | |
| "grad_norm": 0.14898055791854858, | |
| "learning_rate": 4.704039365920709e-06, | |
| "loss": 0.3826, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.7035398230088497, | |
| "grad_norm": 0.1572868973016739, | |
| "learning_rate": 4.691187557233323e-06, | |
| "loss": 0.3764, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.7057522123893807, | |
| "grad_norm": 0.12325187027454376, | |
| "learning_rate": 4.678337796285093e-06, | |
| "loss": 0.3803, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.7079646017699115, | |
| "grad_norm": 0.1512259542942047, | |
| "learning_rate": 4.665490168282943e-06, | |
| "loss": 0.377, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.7101769911504425, | |
| "grad_norm": 0.1448717713356018, | |
| "learning_rate": 4.6526447584196575e-06, | |
| "loss": 0.3773, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.7123893805309733, | |
| "grad_norm": 0.16558457911014557, | |
| "learning_rate": 4.639801651873305e-06, | |
| "loss": 0.3772, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.7146017699115044, | |
| "grad_norm": 0.14137940108776093, | |
| "learning_rate": 4.6269609338066875e-06, | |
| "loss": 0.3747, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.7168141592920354, | |
| "grad_norm": 0.14484670758247375, | |
| "learning_rate": 4.614122689366769e-06, | |
| "loss": 0.3832, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.7190265486725664, | |
| "grad_norm": 0.1538202315568924, | |
| "learning_rate": 4.601287003684104e-06, | |
| "loss": 0.377, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.7212389380530975, | |
| "grad_norm": 0.15519271790981293, | |
| "learning_rate": 4.588453961872286e-06, | |
| "loss": 0.3653, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.7234513274336283, | |
| "grad_norm": 0.1556425839662552, | |
| "learning_rate": 4.575623649027376e-06, | |
| "loss": 0.3729, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.7256637168141593, | |
| "grad_norm": 0.15341098606586456, | |
| "learning_rate": 4.562796150227337e-06, | |
| "loss": 0.3722, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.7278761061946901, | |
| "grad_norm": 0.1618175208568573, | |
| "learning_rate": 4.549971550531474e-06, | |
| "loss": 0.3715, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.7300884955752212, | |
| "grad_norm": 0.1501229852437973, | |
| "learning_rate": 4.537149934979869e-06, | |
| "loss": 0.3617, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.7323008849557522, | |
| "grad_norm": 0.1526263803243637, | |
| "learning_rate": 4.524331388592812e-06, | |
| "loss": 0.3816, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.7345132743362832, | |
| "grad_norm": 0.14755792915821075, | |
| "learning_rate": 4.511515996370244e-06, | |
| "loss": 0.3719, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.7367256637168142, | |
| "grad_norm": 0.16713263094425201, | |
| "learning_rate": 4.498703843291189e-06, | |
| "loss": 0.3833, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.7389380530973453, | |
| "grad_norm": 0.14121297001838684, | |
| "learning_rate": 4.485895014313198e-06, | |
| "loss": 0.3729, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.741150442477876, | |
| "grad_norm": 0.1821858286857605, | |
| "learning_rate": 4.4730895943717735e-06, | |
| "loss": 0.3866, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.7433628318584071, | |
| "grad_norm": 0.150221586227417, | |
| "learning_rate": 4.460287668379815e-06, | |
| "loss": 0.3715, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.745575221238938, | |
| "grad_norm": 0.16274835169315338, | |
| "learning_rate": 4.447489321227052e-06, | |
| "loss": 0.3703, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.747787610619469, | |
| "grad_norm": 0.14972063899040222, | |
| "learning_rate": 4.434694637779486e-06, | |
| "loss": 0.3592, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 0.15914012491703033, | |
| "learning_rate": 4.421903702878822e-06, | |
| "loss": 0.3754, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.752212389380531, | |
| "grad_norm": 0.1527598351240158, | |
| "learning_rate": 4.409116601341908e-06, | |
| "loss": 0.3754, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.754424778761062, | |
| "grad_norm": 0.17452898621559143, | |
| "learning_rate": 4.396333417960172e-06, | |
| "loss": 0.3781, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.7566371681415929, | |
| "grad_norm": 0.1623452603816986, | |
| "learning_rate": 4.383554237499064e-06, | |
| "loss": 0.3787, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.758849557522124, | |
| "grad_norm": 0.14795322716236115, | |
| "learning_rate": 4.37077914469749e-06, | |
| "loss": 0.3723, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.7610619469026547, | |
| "grad_norm": 0.18765774369239807, | |
| "learning_rate": 4.358008224267245e-06, | |
| "loss": 0.3643, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.7632743362831858, | |
| "grad_norm": 0.1436903327703476, | |
| "learning_rate": 4.345241560892467e-06, | |
| "loss": 0.3757, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.7654867256637168, | |
| "grad_norm": 0.17049679160118103, | |
| "learning_rate": 4.332479239229059e-06, | |
| "loss": 0.3775, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.7676991150442478, | |
| "grad_norm": 0.15550793707370758, | |
| "learning_rate": 4.319721343904133e-06, | |
| "loss": 0.3825, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.7699115044247788, | |
| "grad_norm": 0.1573866903781891, | |
| "learning_rate": 4.306967959515454e-06, | |
| "loss": 0.3696, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.7721238938053099, | |
| "grad_norm": 0.16475972533226013, | |
| "learning_rate": 4.29421917063087e-06, | |
| "loss": 0.3789, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.7743362831858407, | |
| "grad_norm": 0.13793496787548065, | |
| "learning_rate": 4.2814750617877615e-06, | |
| "loss": 0.3718, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.7765486725663717, | |
| "grad_norm": 0.1852518618106842, | |
| "learning_rate": 4.268735717492472e-06, | |
| "loss": 0.3672, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.7787610619469025, | |
| "grad_norm": 0.14557519555091858, | |
| "learning_rate": 4.256001222219751e-06, | |
| "loss": 0.3802, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.7809734513274336, | |
| "grad_norm": 0.16771559417247772, | |
| "learning_rate": 4.243271660412197e-06, | |
| "loss": 0.365, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.7831858407079646, | |
| "grad_norm": 0.15564660727977753, | |
| "learning_rate": 4.230547116479691e-06, | |
| "loss": 0.3758, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7853982300884956, | |
| "grad_norm": 0.18923735618591309, | |
| "learning_rate": 4.217827674798845e-06, | |
| "loss": 0.3865, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.7876106194690267, | |
| "grad_norm": 0.14485545456409454, | |
| "learning_rate": 4.2051134197124354e-06, | |
| "loss": 0.3771, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7898230088495575, | |
| "grad_norm": 0.17624424397945404, | |
| "learning_rate": 4.192404435528844e-06, | |
| "loss": 0.3785, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.7920353982300885, | |
| "grad_norm": 0.16202057898044586, | |
| "learning_rate": 4.179700806521506e-06, | |
| "loss": 0.3815, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7942477876106193, | |
| "grad_norm": 0.13739414513111115, | |
| "learning_rate": 4.167002616928341e-06, | |
| "loss": 0.369, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.7964601769911503, | |
| "grad_norm": 0.1619684100151062, | |
| "learning_rate": 4.154309950951203e-06, | |
| "loss": 0.3843, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.7986725663716814, | |
| "grad_norm": 0.14789964258670807, | |
| "learning_rate": 4.141622892755318e-06, | |
| "loss": 0.3842, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.8008849557522124, | |
| "grad_norm": 0.16962085664272308, | |
| "learning_rate": 4.128941526468728e-06, | |
| "loss": 0.3713, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.8030973451327434, | |
| "grad_norm": 0.16202415525913239, | |
| "learning_rate": 4.116265936181731e-06, | |
| "loss": 0.3629, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.8053097345132745, | |
| "grad_norm": 0.1684337556362152, | |
| "learning_rate": 4.103596205946323e-06, | |
| "loss": 0.383, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.8075221238938053, | |
| "grad_norm": 0.14394833147525787, | |
| "learning_rate": 4.090932419775642e-06, | |
| "loss": 0.3619, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.8097345132743363, | |
| "grad_norm": 0.15720577538013458, | |
| "learning_rate": 4.078274661643415e-06, | |
| "loss": 0.3705, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.8119469026548671, | |
| "grad_norm": 0.1556907743215561, | |
| "learning_rate": 4.065623015483394e-06, | |
| "loss": 0.3794, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.8141592920353982, | |
| "grad_norm": 0.16392920911312103, | |
| "learning_rate": 4.0529775651888e-06, | |
| "loss": 0.3671, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.8163716814159292, | |
| "grad_norm": 0.13912513852119446, | |
| "learning_rate": 4.040338394611772e-06, | |
| "loss": 0.3662, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.8185840707964602, | |
| "grad_norm": 0.16647394001483917, | |
| "learning_rate": 4.027705587562808e-06, | |
| "loss": 0.3716, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.8207964601769913, | |
| "grad_norm": 0.15679362416267395, | |
| "learning_rate": 4.015079227810208e-06, | |
| "loss": 0.3732, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.823008849557522, | |
| "grad_norm": 0.16492421925067902, | |
| "learning_rate": 4.002459399079523e-06, | |
| "loss": 0.3749, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.825221238938053, | |
| "grad_norm": 0.1708700805902481, | |
| "learning_rate": 3.9898461850529925e-06, | |
| "loss": 0.3775, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.827433628318584, | |
| "grad_norm": 0.14519424736499786, | |
| "learning_rate": 3.977239669368998e-06, | |
| "loss": 0.3708, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.829646017699115, | |
| "grad_norm": 0.17130383849143982, | |
| "learning_rate": 3.964639935621498e-06, | |
| "loss": 0.361, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.831858407079646, | |
| "grad_norm": 0.1383930891752243, | |
| "learning_rate": 3.952047067359488e-06, | |
| "loss": 0.3771, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.834070796460177, | |
| "grad_norm": 0.16033463180065155, | |
| "learning_rate": 3.939461148086434e-06, | |
| "loss": 0.378, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.836283185840708, | |
| "grad_norm": 0.1625954806804657, | |
| "learning_rate": 3.926882261259723e-06, | |
| "loss": 0.3696, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.838495575221239, | |
| "grad_norm": 0.14928433299064636, | |
| "learning_rate": 3.9143104902901085e-06, | |
| "loss": 0.3771, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.8407079646017699, | |
| "grad_norm": 0.14306089282035828, | |
| "learning_rate": 3.901745918541162e-06, | |
| "loss": 0.3773, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.842920353982301, | |
| "grad_norm": 0.1791837066411972, | |
| "learning_rate": 3.889188629328716e-06, | |
| "loss": 0.3767, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.8451327433628317, | |
| "grad_norm": 0.14630135893821716, | |
| "learning_rate": 3.876638705920312e-06, | |
| "loss": 0.3726, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.8473451327433628, | |
| "grad_norm": 0.15009824931621552, | |
| "learning_rate": 3.864096231534645e-06, | |
| "loss": 0.3656, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.8495575221238938, | |
| "grad_norm": 0.14151619374752045, | |
| "learning_rate": 3.851561289341023e-06, | |
| "loss": 0.3777, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.8517699115044248, | |
| "grad_norm": 0.15849147737026215, | |
| "learning_rate": 3.839033962458802e-06, | |
| "loss": 0.3706, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.8539823008849559, | |
| "grad_norm": 0.14633609354496002, | |
| "learning_rate": 3.826514333956843e-06, | |
| "loss": 0.3643, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.856194690265487, | |
| "grad_norm": 0.1464887112379074, | |
| "learning_rate": 3.8140024868529585e-06, | |
| "loss": 0.3688, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.8584070796460177, | |
| "grad_norm": 0.16905274987220764, | |
| "learning_rate": 3.8014985041133627e-06, | |
| "loss": 0.3862, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.8606194690265485, | |
| "grad_norm": 0.1511407345533371, | |
| "learning_rate": 3.789002468652121e-06, | |
| "loss": 0.3659, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.8628318584070795, | |
| "grad_norm": 0.13423565030097961, | |
| "learning_rate": 3.7765144633306006e-06, | |
| "loss": 0.3716, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.8650442477876106, | |
| "grad_norm": 0.1653309166431427, | |
| "learning_rate": 3.76403457095692e-06, | |
| "loss": 0.3645, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.8672566371681416, | |
| "grad_norm": 0.1364339143037796, | |
| "learning_rate": 3.7515628742854006e-06, | |
| "loss": 0.3665, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8694690265486726, | |
| "grad_norm": 0.16295693814754486, | |
| "learning_rate": 3.7390994560160187e-06, | |
| "loss": 0.3732, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.8716814159292037, | |
| "grad_norm": 0.13839292526245117, | |
| "learning_rate": 3.726644398793857e-06, | |
| "loss": 0.3687, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.8738938053097345, | |
| "grad_norm": 0.15138795971870422, | |
| "learning_rate": 3.714197785208554e-06, | |
| "loss": 0.3795, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.8761061946902655, | |
| "grad_norm": 0.14297796785831451, | |
| "learning_rate": 3.701759697793761e-06, | |
| "loss": 0.3879, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8783185840707963, | |
| "grad_norm": 0.15279804170131683, | |
| "learning_rate": 3.689330219026588e-06, | |
| "loss": 0.3725, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.8805309734513274, | |
| "grad_norm": 0.1471881866455078, | |
| "learning_rate": 3.6769094313270647e-06, | |
| "loss": 0.3709, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.8827433628318584, | |
| "grad_norm": 0.153276726603508, | |
| "learning_rate": 3.6644974170575907e-06, | |
| "loss": 0.3666, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.8849557522123894, | |
| "grad_norm": 0.13241925835609436, | |
| "learning_rate": 3.652094258522387e-06, | |
| "loss": 0.3726, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.8871681415929205, | |
| "grad_norm": 0.14609576761722565, | |
| "learning_rate": 3.6397000379669513e-06, | |
| "loss": 0.3795, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.8893805309734515, | |
| "grad_norm": 0.13605985045433044, | |
| "learning_rate": 3.627314837577514e-06, | |
| "loss": 0.3688, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8915929203539823, | |
| "grad_norm": 0.14954599738121033, | |
| "learning_rate": 3.6149387394804946e-06, | |
| "loss": 0.3654, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.893805309734513, | |
| "grad_norm": 0.1630832552909851, | |
| "learning_rate": 3.6025718257419532e-06, | |
| "loss": 0.3837, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.8960176991150441, | |
| "grad_norm": 0.15406356751918793, | |
| "learning_rate": 3.590214178367049e-06, | |
| "loss": 0.3705, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.8982300884955752, | |
| "grad_norm": 0.14090083539485931, | |
| "learning_rate": 3.5778658792994957e-06, | |
| "loss": 0.3754, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.9004424778761062, | |
| "grad_norm": 0.1341380476951599, | |
| "learning_rate": 3.565527010421019e-06, | |
| "loss": 0.3688, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.9026548672566372, | |
| "grad_norm": 0.17473238706588745, | |
| "learning_rate": 3.5531976535508107e-06, | |
| "loss": 0.3743, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.9048672566371683, | |
| "grad_norm": 0.14425218105316162, | |
| "learning_rate": 3.540877890444989e-06, | |
| "loss": 0.3699, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.907079646017699, | |
| "grad_norm": 0.1527353972196579, | |
| "learning_rate": 3.528567802796061e-06, | |
| "loss": 0.3648, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.9092920353982301, | |
| "grad_norm": 0.142634317278862, | |
| "learning_rate": 3.5162674722323677e-06, | |
| "loss": 0.3673, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.911504424778761, | |
| "grad_norm": 0.19563040137290955, | |
| "learning_rate": 3.5039769803175545e-06, | |
| "loss": 0.3703, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.913716814159292, | |
| "grad_norm": 0.16823236644268036, | |
| "learning_rate": 3.4916964085500277e-06, | |
| "loss": 0.3762, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.915929203539823, | |
| "grad_norm": 0.15324640274047852, | |
| "learning_rate": 3.4794258383624115e-06, | |
| "loss": 0.3818, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.918141592920354, | |
| "grad_norm": 0.15772469341754913, | |
| "learning_rate": 3.4671653511210086e-06, | |
| "loss": 0.3761, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.920353982300885, | |
| "grad_norm": 0.15935544669628143, | |
| "learning_rate": 3.4549150281252635e-06, | |
| "loss": 0.3738, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.922566371681416, | |
| "grad_norm": 0.15252330899238586, | |
| "learning_rate": 3.442674950607221e-06, | |
| "loss": 0.3828, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.924778761061947, | |
| "grad_norm": 0.1612301915884018, | |
| "learning_rate": 3.430445199730987e-06, | |
| "loss": 0.3762, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.926991150442478, | |
| "grad_norm": 0.16883090138435364, | |
| "learning_rate": 3.4182258565921933e-06, | |
| "loss": 0.371, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.9292035398230087, | |
| "grad_norm": 0.14610210061073303, | |
| "learning_rate": 3.406017002217452e-06, | |
| "loss": 0.3766, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.9314159292035398, | |
| "grad_norm": 0.15019983053207397, | |
| "learning_rate": 3.393818717563834e-06, | |
| "loss": 0.3646, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.9336283185840708, | |
| "grad_norm": 0.1793092042207718, | |
| "learning_rate": 3.3816310835183153e-06, | |
| "loss": 0.3751, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.9358407079646018, | |
| "grad_norm": 0.1681625097990036, | |
| "learning_rate": 3.369454180897248e-06, | |
| "loss": 0.3729, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.9380530973451329, | |
| "grad_norm": 0.13819968700408936, | |
| "learning_rate": 3.3572880904458267e-06, | |
| "loss": 0.3644, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.9402654867256637, | |
| "grad_norm": 0.14391469955444336, | |
| "learning_rate": 3.345132892837547e-06, | |
| "loss": 0.374, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.9424778761061947, | |
| "grad_norm": 0.1573283076286316, | |
| "learning_rate": 3.332988668673677e-06, | |
| "loss": 0.3648, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.9446902654867255, | |
| "grad_norm": 0.1618097424507141, | |
| "learning_rate": 3.320855498482718e-06, | |
| "loss": 0.37, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.9469026548672566, | |
| "grad_norm": 0.14835193753242493, | |
| "learning_rate": 3.308733462719873e-06, | |
| "loss": 0.3658, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.9491150442477876, | |
| "grad_norm": 0.16873405873775482, | |
| "learning_rate": 3.2966226417665125e-06, | |
| "loss": 0.3727, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.9513274336283186, | |
| "grad_norm": 0.17028756439685822, | |
| "learning_rate": 3.2845231159296404e-06, | |
| "loss": 0.3718, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.9535398230088497, | |
| "grad_norm": 0.1492159515619278, | |
| "learning_rate": 3.2724349654413612e-06, | |
| "loss": 0.3769, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.9557522123893807, | |
| "grad_norm": 0.1774129718542099, | |
| "learning_rate": 3.2603582704583547e-06, | |
| "loss": 0.3734, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.9579646017699115, | |
| "grad_norm": 0.16402071714401245, | |
| "learning_rate": 3.2482931110613358e-06, | |
| "loss": 0.3791, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.9601769911504425, | |
| "grad_norm": 0.16409912705421448, | |
| "learning_rate": 3.236239567254526e-06, | |
| "loss": 0.3689, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.9623893805309733, | |
| "grad_norm": 0.15910659730434418, | |
| "learning_rate": 3.224197718965124e-06, | |
| "loss": 0.367, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.9646017699115044, | |
| "grad_norm": 0.16524411737918854, | |
| "learning_rate": 3.2121676460427765e-06, | |
| "loss": 0.3767, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.9668141592920354, | |
| "grad_norm": 0.1652010977268219, | |
| "learning_rate": 3.2001494282590466e-06, | |
| "loss": 0.3797, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.9690265486725664, | |
| "grad_norm": 0.17844292521476746, | |
| "learning_rate": 3.188143145306888e-06, | |
| "loss": 0.3864, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.9712389380530975, | |
| "grad_norm": 0.1592842936515808, | |
| "learning_rate": 3.176148876800109e-06, | |
| "loss": 0.3792, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.9734513274336283, | |
| "grad_norm": 0.1572018712759018, | |
| "learning_rate": 3.164166702272855e-06, | |
| "loss": 0.3733, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.9756637168141593, | |
| "grad_norm": 0.15086941421031952, | |
| "learning_rate": 3.1521967011790753e-06, | |
| "loss": 0.3731, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.9778761061946901, | |
| "grad_norm": 0.15987545251846313, | |
| "learning_rate": 3.140238952891994e-06, | |
| "loss": 0.3718, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.9800884955752212, | |
| "grad_norm": 0.19604964554309845, | |
| "learning_rate": 3.1282935367035935e-06, | |
| "loss": 0.3693, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.9823008849557522, | |
| "grad_norm": 0.14786747097969055, | |
| "learning_rate": 3.116360531824074e-06, | |
| "loss": 0.3707, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.9845132743362832, | |
| "grad_norm": 0.15440818667411804, | |
| "learning_rate": 3.1044400173813415e-06, | |
| "loss": 0.3782, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.9867256637168142, | |
| "grad_norm": 0.1537942886352539, | |
| "learning_rate": 3.0925320724204743e-06, | |
| "loss": 0.3859, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.9889380530973453, | |
| "grad_norm": 0.14648617804050446, | |
| "learning_rate": 3.080636775903205e-06, | |
| "loss": 0.3701, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.991150442477876, | |
| "grad_norm": 0.14667072892189026, | |
| "learning_rate": 3.068754206707392e-06, | |
| "loss": 0.37, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.9933628318584071, | |
| "grad_norm": 0.1474722921848297, | |
| "learning_rate": 3.056884443626499e-06, | |
| "loss": 0.3815, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.995575221238938, | |
| "grad_norm": 0.14442405104637146, | |
| "learning_rate": 3.0450275653690743e-06, | |
| "loss": 0.376, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.997787610619469, | |
| "grad_norm": 0.1478486955165863, | |
| "learning_rate": 3.033183650558225e-06, | |
| "loss": 0.3692, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.15566988289356232, | |
| "learning_rate": 3.021352777731096e-06, | |
| "loss": 0.3595, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.002212389380531, | |
| "grad_norm": 0.1503528654575348, | |
| "learning_rate": 3.0095350253383567e-06, | |
| "loss": 0.3539, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 2.004424778761062, | |
| "grad_norm": 0.14453503489494324, | |
| "learning_rate": 2.997730471743667e-06, | |
| "loss": 0.3616, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.006637168141593, | |
| "grad_norm": 0.14768028259277344, | |
| "learning_rate": 2.985939195223171e-06, | |
| "loss": 0.3451, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 2.0088495575221237, | |
| "grad_norm": 0.17169658839702606, | |
| "learning_rate": 2.9741612739649694e-06, | |
| "loss": 0.3583, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.0110619469026547, | |
| "grad_norm": 0.1610146015882492, | |
| "learning_rate": 2.9623967860686035e-06, | |
| "loss": 0.3563, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 2.0132743362831858, | |
| "grad_norm": 0.1743859350681305, | |
| "learning_rate": 2.9506458095445408e-06, | |
| "loss": 0.3538, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.015486725663717, | |
| "grad_norm": 0.16596271097660065, | |
| "learning_rate": 2.9389084223136523e-06, | |
| "loss": 0.3465, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 2.017699115044248, | |
| "grad_norm": 0.16805611550807953, | |
| "learning_rate": 2.9271847022066992e-06, | |
| "loss": 0.3568, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.019911504424779, | |
| "grad_norm": 0.2030407339334488, | |
| "learning_rate": 2.915474726963815e-06, | |
| "loss": 0.3584, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 2.02212389380531, | |
| "grad_norm": 0.1419757902622223, | |
| "learning_rate": 2.903778574233992e-06, | |
| "loss": 0.3556, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.024336283185841, | |
| "grad_norm": 0.19129696488380432, | |
| "learning_rate": 2.892096321574564e-06, | |
| "loss": 0.3629, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 2.0265486725663715, | |
| "grad_norm": 0.14490734040737152, | |
| "learning_rate": 2.880428046450697e-06, | |
| "loss": 0.3446, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.0287610619469025, | |
| "grad_norm": 0.19220589101314545, | |
| "learning_rate": 2.8687738262348645e-06, | |
| "loss": 0.3534, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 2.0309734513274336, | |
| "grad_norm": 0.1297258734703064, | |
| "learning_rate": 2.8571337382063503e-06, | |
| "loss": 0.3588, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.0331858407079646, | |
| "grad_norm": 0.18526272475719452, | |
| "learning_rate": 2.845507859550718e-06, | |
| "loss": 0.3543, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 2.0353982300884956, | |
| "grad_norm": 0.13665147125720978, | |
| "learning_rate": 2.8338962673593196e-06, | |
| "loss": 0.3411, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.0376106194690267, | |
| "grad_norm": 0.15971770882606506, | |
| "learning_rate": 2.822299038628762e-06, | |
| "loss": 0.3489, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 2.0398230088495577, | |
| "grad_norm": 0.14711672067642212, | |
| "learning_rate": 2.810716250260418e-06, | |
| "loss": 0.355, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.0420353982300883, | |
| "grad_norm": 0.15557117760181427, | |
| "learning_rate": 2.799147979059898e-06, | |
| "loss": 0.3549, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 2.0442477876106193, | |
| "grad_norm": 0.1388644129037857, | |
| "learning_rate": 2.787594301736556e-06, | |
| "loss": 0.3549, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.0464601769911503, | |
| "grad_norm": 0.1850692480802536, | |
| "learning_rate": 2.7760552949029683e-06, | |
| "loss": 0.3578, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.0486725663716814, | |
| "grad_norm": 0.15951766073703766, | |
| "learning_rate": 2.7645310350744296e-06, | |
| "loss": 0.3405, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.0508849557522124, | |
| "grad_norm": 0.16866743564605713, | |
| "learning_rate": 2.7530215986684584e-06, | |
| "loss": 0.3567, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.0530973451327434, | |
| "grad_norm": 0.15932750701904297, | |
| "learning_rate": 2.741527062004264e-06, | |
| "loss": 0.3509, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.0553097345132745, | |
| "grad_norm": 0.15171128511428833, | |
| "learning_rate": 2.7300475013022666e-06, | |
| "loss": 0.362, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.0575221238938055, | |
| "grad_norm": 0.15653663873672485, | |
| "learning_rate": 2.7185829926835728e-06, | |
| "loss": 0.362, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.059734513274336, | |
| "grad_norm": 0.14654697477817535, | |
| "learning_rate": 2.7071336121694856e-06, | |
| "loss": 0.3612, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.061946902654867, | |
| "grad_norm": 0.14019131660461426, | |
| "learning_rate": 2.695699435680986e-06, | |
| "loss": 0.3618, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.064159292035398, | |
| "grad_norm": 0.16346430778503418, | |
| "learning_rate": 2.6842805390382453e-06, | |
| "loss": 0.3476, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.066371681415929, | |
| "grad_norm": 0.12734098732471466, | |
| "learning_rate": 2.6728769979601044e-06, | |
| "loss": 0.3576, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.0685840707964602, | |
| "grad_norm": 0.15421155095100403, | |
| "learning_rate": 2.661488888063591e-06, | |
| "loss": 0.3605, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.0707964601769913, | |
| "grad_norm": 0.14653554558753967, | |
| "learning_rate": 2.6501162848634023e-06, | |
| "loss": 0.3525, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.0730088495575223, | |
| "grad_norm": 0.15689359605312347, | |
| "learning_rate": 2.6387592637714062e-06, | |
| "loss": 0.3531, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.0752212389380533, | |
| "grad_norm": 0.1751505434513092, | |
| "learning_rate": 2.6274179000961604e-06, | |
| "loss": 0.3489, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.077433628318584, | |
| "grad_norm": 0.1466556340456009, | |
| "learning_rate": 2.6160922690423818e-06, | |
| "loss": 0.3594, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.079646017699115, | |
| "grad_norm": 0.17935603857040405, | |
| "learning_rate": 2.6047824457104766e-06, | |
| "loss": 0.3536, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.081858407079646, | |
| "grad_norm": 0.13906565308570862, | |
| "learning_rate": 2.5934885050960183e-06, | |
| "loss": 0.366, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.084070796460177, | |
| "grad_norm": 0.16516318917274475, | |
| "learning_rate": 2.5822105220892733e-06, | |
| "loss": 0.3612, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.086283185840708, | |
| "grad_norm": 0.1419246345758438, | |
| "learning_rate": 2.5709485714746827e-06, | |
| "loss": 0.3484, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.088495575221239, | |
| "grad_norm": 0.1455831378698349, | |
| "learning_rate": 2.559702727930386e-06, | |
| "loss": 0.3614, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.09070796460177, | |
| "grad_norm": 0.15348093211650848, | |
| "learning_rate": 2.548473066027709e-06, | |
| "loss": 0.3595, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.0929203539823007, | |
| "grad_norm": 0.14580413699150085, | |
| "learning_rate": 2.537259660230679e-06, | |
| "loss": 0.3576, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.0951327433628317, | |
| "grad_norm": 0.16142421960830688, | |
| "learning_rate": 2.5260625848955324e-06, | |
| "loss": 0.3595, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.0973451327433628, | |
| "grad_norm": 0.1431436985731125, | |
| "learning_rate": 2.5148819142702095e-06, | |
| "loss": 0.361, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.099557522123894, | |
| "grad_norm": 0.18138659000396729, | |
| "learning_rate": 2.5037177224938846e-06, | |
| "loss": 0.3591, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.101769911504425, | |
| "grad_norm": 0.1631578505039215, | |
| "learning_rate": 2.492570083596447e-06, | |
| "loss": 0.3576, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.103982300884956, | |
| "grad_norm": 0.1554289162158966, | |
| "learning_rate": 2.4814390714980325e-06, | |
| "loss": 0.347, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.106194690265487, | |
| "grad_norm": 0.15791471302509308, | |
| "learning_rate": 2.4703247600085173e-06, | |
| "loss": 0.3632, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.1084070796460175, | |
| "grad_norm": 0.1508740782737732, | |
| "learning_rate": 2.459227222827043e-06, | |
| "loss": 0.3548, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.1106194690265485, | |
| "grad_norm": 0.1677217036485672, | |
| "learning_rate": 2.4481465335415162e-06, | |
| "loss": 0.3544, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.1128318584070795, | |
| "grad_norm": 0.13547132909297943, | |
| "learning_rate": 2.437082765628122e-06, | |
| "loss": 0.3438, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.1150442477876106, | |
| "grad_norm": 0.15041135251522064, | |
| "learning_rate": 2.426035992450848e-06, | |
| "loss": 0.3618, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.1172566371681416, | |
| "grad_norm": 0.16150957345962524, | |
| "learning_rate": 2.4150062872609812e-06, | |
| "loss": 0.3565, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.1194690265486726, | |
| "grad_norm": 0.16388878226280212, | |
| "learning_rate": 2.4039937231966374e-06, | |
| "loss": 0.3576, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.1216814159292037, | |
| "grad_norm": 0.1527947336435318, | |
| "learning_rate": 2.3929983732822607e-06, | |
| "loss": 0.3599, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.1238938053097347, | |
| "grad_norm": 0.1690537929534912, | |
| "learning_rate": 2.3820203104281616e-06, | |
| "loss": 0.3512, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.1261061946902653, | |
| "grad_norm": 0.14079472422599792, | |
| "learning_rate": 2.3710596074300045e-06, | |
| "loss": 0.3641, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.1283185840707963, | |
| "grad_norm": 0.14791461825370789, | |
| "learning_rate": 2.360116336968353e-06, | |
| "loss": 0.3528, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.1305309734513274, | |
| "grad_norm": 0.1540174037218094, | |
| "learning_rate": 2.3491905716081668e-06, | |
| "loss": 0.3546, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.1327433628318584, | |
| "grad_norm": 0.14626696705818176, | |
| "learning_rate": 2.3382823837983314e-06, | |
| "loss": 0.3511, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.1349557522123894, | |
| "grad_norm": 0.15072183310985565, | |
| "learning_rate": 2.327391845871179e-06, | |
| "loss": 0.3621, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.1371681415929205, | |
| "grad_norm": 0.14171431958675385, | |
| "learning_rate": 2.316519030041998e-06, | |
| "loss": 0.3499, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.1393805309734515, | |
| "grad_norm": 0.134790301322937, | |
| "learning_rate": 2.3056640084085707e-06, | |
| "loss": 0.3564, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.1415929203539825, | |
| "grad_norm": 0.1500406712293625, | |
| "learning_rate": 2.2948268529506768e-06, | |
| "loss": 0.3546, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.143805309734513, | |
| "grad_norm": 0.1394817978143692, | |
| "learning_rate": 2.284007635529632e-06, | |
| "loss": 0.3521, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.146017699115044, | |
| "grad_norm": 0.16197897493839264, | |
| "learning_rate": 2.2732064278877975e-06, | |
| "loss": 0.3573, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.148230088495575, | |
| "grad_norm": 0.14796555042266846, | |
| "learning_rate": 2.2624233016481224e-06, | |
| "loss": 0.3676, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.150442477876106, | |
| "grad_norm": 0.13100607693195343, | |
| "learning_rate": 2.2516583283136474e-06, | |
| "loss": 0.3575, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.1526548672566372, | |
| "grad_norm": 0.1775142401456833, | |
| "learning_rate": 2.2409115792670434e-06, | |
| "loss": 0.3584, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.1548672566371683, | |
| "grad_norm": 0.17847150564193726, | |
| "learning_rate": 2.2301831257701405e-06, | |
| "loss": 0.3592, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.1570796460176993, | |
| "grad_norm": 0.14856776595115662, | |
| "learning_rate": 2.2194730389634444e-06, | |
| "loss": 0.3571, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.15929203539823, | |
| "grad_norm": 0.17716825008392334, | |
| "learning_rate": 2.2087813898656775e-06, | |
| "loss": 0.3626, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.161504424778761, | |
| "grad_norm": 0.13400061428546906, | |
| "learning_rate": 2.1981082493732945e-06, | |
| "loss": 0.3591, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.163716814159292, | |
| "grad_norm": 0.14946198463439941, | |
| "learning_rate": 2.187453688260027e-06, | |
| "loss": 0.3581, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.165929203539823, | |
| "grad_norm": 0.15970373153686523, | |
| "learning_rate": 2.176817777176398e-06, | |
| "loss": 0.357, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.168141592920354, | |
| "grad_norm": 0.1397649049758911, | |
| "learning_rate": 2.166200586649272e-06, | |
| "loss": 0.3541, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.170353982300885, | |
| "grad_norm": 0.1510685831308365, | |
| "learning_rate": 2.1556021870813653e-06, | |
| "loss": 0.357, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.172566371681416, | |
| "grad_norm": 0.15586382150650024, | |
| "learning_rate": 2.1450226487508017e-06, | |
| "loss": 0.3425, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.1747787610619467, | |
| "grad_norm": 0.14433304965496063, | |
| "learning_rate": 2.134462041810632e-06, | |
| "loss": 0.3635, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.1769911504424777, | |
| "grad_norm": 0.16394202411174774, | |
| "learning_rate": 2.12392043628837e-06, | |
| "loss": 0.3509, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.1792035398230087, | |
| "grad_norm": 0.1602417677640915, | |
| "learning_rate": 2.1133979020855357e-06, | |
| "loss": 0.3595, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.1814159292035398, | |
| "grad_norm": 0.14802055060863495, | |
| "learning_rate": 2.102894508977182e-06, | |
| "loss": 0.3518, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.183628318584071, | |
| "grad_norm": 0.13788118958473206, | |
| "learning_rate": 2.0924103266114422e-06, | |
| "loss": 0.3537, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.185840707964602, | |
| "grad_norm": 0.16340523958206177, | |
| "learning_rate": 2.081945424509057e-06, | |
| "loss": 0.3588, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.188053097345133, | |
| "grad_norm": 0.14665019512176514, | |
| "learning_rate": 2.0714998720629264e-06, | |
| "loss": 0.3485, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.190265486725664, | |
| "grad_norm": 0.14981283247470856, | |
| "learning_rate": 2.061073738537635e-06, | |
| "loss": 0.3615, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.1924778761061945, | |
| "grad_norm": 0.13499760627746582, | |
| "learning_rate": 2.0506670930690074e-06, | |
| "loss": 0.3551, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.1946902654867255, | |
| "grad_norm": 0.15379314124584198, | |
| "learning_rate": 2.040280004663637e-06, | |
| "loss": 0.3471, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.1969026548672566, | |
| "grad_norm": 0.14276005327701569, | |
| "learning_rate": 2.0299125421984367e-06, | |
| "loss": 0.3565, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.1991150442477876, | |
| "grad_norm": 0.15052393078804016, | |
| "learning_rate": 2.0195647744201826e-06, | |
| "loss": 0.3552, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.2013274336283186, | |
| "grad_norm": 0.16427332162857056, | |
| "learning_rate": 2.0092367699450466e-06, | |
| "loss": 0.3588, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.2035398230088497, | |
| "grad_norm": 0.14771218597888947, | |
| "learning_rate": 1.9989285972581595e-06, | |
| "loss": 0.3583, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.2057522123893807, | |
| "grad_norm": 0.13689512014389038, | |
| "learning_rate": 1.9886403247131395e-06, | |
| "loss": 0.3525, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.2079646017699117, | |
| "grad_norm": 0.15630333125591278, | |
| "learning_rate": 1.9783720205316535e-06, | |
| "loss": 0.3492, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.2101769911504423, | |
| "grad_norm": 0.13555830717086792, | |
| "learning_rate": 1.9681237528029513e-06, | |
| "loss": 0.3368, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.2123893805309733, | |
| "grad_norm": 0.16636361181735992, | |
| "learning_rate": 1.957895589483426e-06, | |
| "loss": 0.3607, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.2146017699115044, | |
| "grad_norm": 0.1358216553926468, | |
| "learning_rate": 1.947687598396154e-06, | |
| "loss": 0.3587, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.2168141592920354, | |
| "grad_norm": 0.15513230860233307, | |
| "learning_rate": 1.9374998472304523e-06, | |
| "loss": 0.3537, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.2190265486725664, | |
| "grad_norm": 0.16106431186199188, | |
| "learning_rate": 1.927332403541428e-06, | |
| "loss": 0.3582, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.2212389380530975, | |
| "grad_norm": 0.16195151209831238, | |
| "learning_rate": 1.9171853347495234e-06, | |
| "loss": 0.356, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.2234513274336285, | |
| "grad_norm": 0.1492871195077896, | |
| "learning_rate": 1.9070587081400815e-06, | |
| "loss": 0.3611, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.225663716814159, | |
| "grad_norm": 0.15143686532974243, | |
| "learning_rate": 1.896952590862886e-06, | |
| "loss": 0.3542, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.22787610619469, | |
| "grad_norm": 0.1420651376247406, | |
| "learning_rate": 1.8868670499317298e-06, | |
| "loss": 0.3589, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.230088495575221, | |
| "grad_norm": 0.15692518651485443, | |
| "learning_rate": 1.8768021522239576e-06, | |
| "loss": 0.3606, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.232300884955752, | |
| "grad_norm": 0.14444686472415924, | |
| "learning_rate": 1.8667579644800344e-06, | |
| "loss": 0.3584, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.234513274336283, | |
| "grad_norm": 0.13391245901584625, | |
| "learning_rate": 1.856734553303091e-06, | |
| "loss": 0.3522, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.2367256637168142, | |
| "grad_norm": 0.15556755661964417, | |
| "learning_rate": 1.8467319851584952e-06, | |
| "loss": 0.3591, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.2389380530973453, | |
| "grad_norm": 0.14630340039730072, | |
| "learning_rate": 1.8367503263733983e-06, | |
| "loss": 0.3556, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.241150442477876, | |
| "grad_norm": 0.13597136735916138, | |
| "learning_rate": 1.8267896431363048e-06, | |
| "loss": 0.3519, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.243362831858407, | |
| "grad_norm": 0.14077752828598022, | |
| "learning_rate": 1.8168500014966316e-06, | |
| "loss": 0.349, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.245575221238938, | |
| "grad_norm": 0.15375056862831116, | |
| "learning_rate": 1.8069314673642624e-06, | |
| "loss": 0.3575, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.247787610619469, | |
| "grad_norm": 0.1405712366104126, | |
| "learning_rate": 1.7970341065091246e-06, | |
| "loss": 0.3543, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.14993660151958466, | |
| "learning_rate": 1.7871579845607378e-06, | |
| "loss": 0.3564, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.252212389380531, | |
| "grad_norm": 0.14628523588180542, | |
| "learning_rate": 1.7773031670077934e-06, | |
| "loss": 0.3488, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.254424778761062, | |
| "grad_norm": 0.1479613482952118, | |
| "learning_rate": 1.7674697191977053e-06, | |
| "loss": 0.3619, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.256637168141593, | |
| "grad_norm": 0.14518770575523376, | |
| "learning_rate": 1.757657706336192e-06, | |
| "loss": 0.3695, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.258849557522124, | |
| "grad_norm": 0.14480751752853394, | |
| "learning_rate": 1.7478671934868302e-06, | |
| "loss": 0.3556, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.2610619469026547, | |
| "grad_norm": 0.15581965446472168, | |
| "learning_rate": 1.7380982455706353e-06, | |
| "loss": 0.3559, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.2632743362831858, | |
| "grad_norm": 0.14438295364379883, | |
| "learning_rate": 1.72835092736562e-06, | |
| "loss": 0.3697, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.265486725663717, | |
| "grad_norm": 0.1663781851530075, | |
| "learning_rate": 1.7186253035063738e-06, | |
| "loss": 0.3517, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.267699115044248, | |
| "grad_norm": 0.14601710438728333, | |
| "learning_rate": 1.7089214384836322e-06, | |
| "loss": 0.3467, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.269911504424779, | |
| "grad_norm": 0.15470395982265472, | |
| "learning_rate": 1.699239396643841e-06, | |
| "loss": 0.3529, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.27212389380531, | |
| "grad_norm": 0.15689963102340698, | |
| "learning_rate": 1.6895792421887437e-06, | |
| "loss": 0.3575, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 2.274336283185841, | |
| "grad_norm": 0.1612994223833084, | |
| "learning_rate": 1.6799410391749416e-06, | |
| "loss": 0.356, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.2765486725663715, | |
| "grad_norm": 0.15020525455474854, | |
| "learning_rate": 1.670324851513483e-06, | |
| "loss": 0.3403, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 2.2787610619469025, | |
| "grad_norm": 0.16059263050556183, | |
| "learning_rate": 1.6607307429694237e-06, | |
| "loss": 0.3604, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.2809734513274336, | |
| "grad_norm": 0.1457761973142624, | |
| "learning_rate": 1.6511587771614208e-06, | |
| "loss": 0.3573, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 2.2831858407079646, | |
| "grad_norm": 0.14527295529842377, | |
| "learning_rate": 1.6416090175612958e-06, | |
| "loss": 0.3563, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.2853982300884956, | |
| "grad_norm": 0.145360067486763, | |
| "learning_rate": 1.6320815274936269e-06, | |
| "loss": 0.356, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 2.2876106194690267, | |
| "grad_norm": 0.14848755300045013, | |
| "learning_rate": 1.622576370135317e-06, | |
| "loss": 0.3522, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.2898230088495577, | |
| "grad_norm": 0.1375618577003479, | |
| "learning_rate": 1.613093608515181e-06, | |
| "loss": 0.3513, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.2920353982300883, | |
| "grad_norm": 0.13908326625823975, | |
| "learning_rate": 1.6036333055135345e-06, | |
| "loss": 0.3604, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.2942477876106193, | |
| "grad_norm": 0.15508785843849182, | |
| "learning_rate": 1.5941955238617612e-06, | |
| "loss": 0.3604, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 2.2964601769911503, | |
| "grad_norm": 0.14073608815670013, | |
| "learning_rate": 1.5847803261419109e-06, | |
| "loss": 0.3624, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.2986725663716814, | |
| "grad_norm": 0.1389189213514328, | |
| "learning_rate": 1.575387774786274e-06, | |
| "loss": 0.3512, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 2.3008849557522124, | |
| "grad_norm": 0.1345841884613037, | |
| "learning_rate": 1.5660179320769792e-06, | |
| "loss": 0.3625, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.3030973451327434, | |
| "grad_norm": 0.14056994020938873, | |
| "learning_rate": 1.556670860145567e-06, | |
| "loss": 0.3559, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 2.3053097345132745, | |
| "grad_norm": 0.13505183160305023, | |
| "learning_rate": 1.5473466209725907e-06, | |
| "loss": 0.3525, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.307522123893805, | |
| "grad_norm": 0.13815952837467194, | |
| "learning_rate": 1.5380452763871951e-06, | |
| "loss": 0.3681, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 2.309734513274336, | |
| "grad_norm": 0.1454608291387558, | |
| "learning_rate": 1.5287668880667107e-06, | |
| "loss": 0.3542, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.311946902654867, | |
| "grad_norm": 0.1534843146800995, | |
| "learning_rate": 1.5195115175362485e-06, | |
| "loss": 0.3575, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.314159292035398, | |
| "grad_norm": 0.16305556893348694, | |
| "learning_rate": 1.5102792261682813e-06, | |
| "loss": 0.3586, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.316371681415929, | |
| "grad_norm": 0.16194356977939606, | |
| "learning_rate": 1.5010700751822555e-06, | |
| "loss": 0.351, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 2.3185840707964602, | |
| "grad_norm": 0.17614226043224335, | |
| "learning_rate": 1.4918841256441603e-06, | |
| "loss": 0.3603, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.3207964601769913, | |
| "grad_norm": 0.1571769267320633, | |
| "learning_rate": 1.4827214384661447e-06, | |
| "loss": 0.3493, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 2.3230088495575223, | |
| "grad_norm": 0.1548829972743988, | |
| "learning_rate": 1.473582074406099e-06, | |
| "loss": 0.363, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.3252212389380533, | |
| "grad_norm": 0.1630624383687973, | |
| "learning_rate": 1.4644660940672628e-06, | |
| "loss": 0.348, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 2.327433628318584, | |
| "grad_norm": 0.18259887397289276, | |
| "learning_rate": 1.455373557897814e-06, | |
| "loss": 0.3582, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.329646017699115, | |
| "grad_norm": 0.1488334983587265, | |
| "learning_rate": 1.4463045261904718e-06, | |
| "loss": 0.3559, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 2.331858407079646, | |
| "grad_norm": 0.16422435641288757, | |
| "learning_rate": 1.4372590590821012e-06, | |
| "loss": 0.3586, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.334070796460177, | |
| "grad_norm": 0.13306331634521484, | |
| "learning_rate": 1.4282372165533042e-06, | |
| "loss": 0.3465, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.336283185840708, | |
| "grad_norm": 0.1893102079629898, | |
| "learning_rate": 1.4192390584280347e-06, | |
| "loss": 0.3616, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.338495575221239, | |
| "grad_norm": 0.15167655050754547, | |
| "learning_rate": 1.4102646443731866e-06, | |
| "loss": 0.3515, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 2.34070796460177, | |
| "grad_norm": 0.17918401956558228, | |
| "learning_rate": 1.4013140338982168e-06, | |
| "loss": 0.3634, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.3429203539823007, | |
| "grad_norm": 0.152436301112175, | |
| "learning_rate": 1.392387286354731e-06, | |
| "loss": 0.3541, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 2.3451327433628317, | |
| "grad_norm": 0.20484967529773712, | |
| "learning_rate": 1.3834844609361064e-06, | |
| "loss": 0.3529, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.3473451327433628, | |
| "grad_norm": 0.15942007303237915, | |
| "learning_rate": 1.3746056166770872e-06, | |
| "loss": 0.3576, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 2.349557522123894, | |
| "grad_norm": 0.17121823132038116, | |
| "learning_rate": 1.3657508124533992e-06, | |
| "loss": 0.3464, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.351769911504425, | |
| "grad_norm": 0.16647647321224213, | |
| "learning_rate": 1.3569201069813626e-06, | |
| "loss": 0.36, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 2.353982300884956, | |
| "grad_norm": 0.14579357206821442, | |
| "learning_rate": 1.3481135588174926e-06, | |
| "loss": 0.3429, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.356194690265487, | |
| "grad_norm": 0.15810784697532654, | |
| "learning_rate": 1.3393312263581222e-06, | |
| "loss": 0.3518, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.3584070796460175, | |
| "grad_norm": 0.15230241417884827, | |
| "learning_rate": 1.330573167839005e-06, | |
| "loss": 0.3682, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.3606194690265485, | |
| "grad_norm": 0.15005375444889069, | |
| "learning_rate": 1.3218394413349389e-06, | |
| "loss": 0.3484, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 2.3628318584070795, | |
| "grad_norm": 0.13220618665218353, | |
| "learning_rate": 1.3131301047593685e-06, | |
| "loss": 0.3527, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.3650442477876106, | |
| "grad_norm": 0.15161649882793427, | |
| "learning_rate": 1.3044452158640197e-06, | |
| "loss": 0.3679, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 2.3672566371681416, | |
| "grad_norm": 0.14612559974193573, | |
| "learning_rate": 1.2957848322384959e-06, | |
| "loss": 0.3595, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.3694690265486726, | |
| "grad_norm": 0.14127154648303986, | |
| "learning_rate": 1.2871490113099066e-06, | |
| "loss": 0.3536, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 2.3716814159292037, | |
| "grad_norm": 0.13997747004032135, | |
| "learning_rate": 1.2785378103424917e-06, | |
| "loss": 0.3549, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.3738938053097347, | |
| "grad_norm": 0.13644693791866302, | |
| "learning_rate": 1.2699512864372287e-06, | |
| "loss": 0.3596, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 2.3761061946902653, | |
| "grad_norm": 0.14478059113025665, | |
| "learning_rate": 1.2613894965314682e-06, | |
| "loss": 0.3588, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.3783185840707963, | |
| "grad_norm": 0.15023688971996307, | |
| "learning_rate": 1.2528524973985424e-06, | |
| "loss": 0.3606, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.3805309734513274, | |
| "grad_norm": 0.14657047390937805, | |
| "learning_rate": 1.2443403456474017e-06, | |
| "loss": 0.3647, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.3827433628318584, | |
| "grad_norm": 0.1500697135925293, | |
| "learning_rate": 1.2358530977222276e-06, | |
| "loss": 0.3551, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 2.3849557522123894, | |
| "grad_norm": 0.14721229672431946, | |
| "learning_rate": 1.2273908099020703e-06, | |
| "loss": 0.3595, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.3871681415929205, | |
| "grad_norm": 0.15616820752620697, | |
| "learning_rate": 1.218953538300462e-06, | |
| "loss": 0.3498, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 2.3893805309734515, | |
| "grad_norm": 0.15405981242656708, | |
| "learning_rate": 1.2105413388650577e-06, | |
| "loss": 0.3482, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.3915929203539825, | |
| "grad_norm": 0.14757871627807617, | |
| "learning_rate": 1.2021542673772584e-06, | |
| "loss": 0.3592, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 2.393805309734513, | |
| "grad_norm": 0.16278573870658875, | |
| "learning_rate": 1.193792379451837e-06, | |
| "loss": 0.3515, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.396017699115044, | |
| "grad_norm": 0.15779441595077515, | |
| "learning_rate": 1.1854557305365783e-06, | |
| "loss": 0.351, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 2.398230088495575, | |
| "grad_norm": 0.16500075161457062, | |
| "learning_rate": 1.1771443759119028e-06, | |
| "loss": 0.3538, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.400442477876106, | |
| "grad_norm": 0.15716195106506348, | |
| "learning_rate": 1.1688583706905099e-06, | |
| "loss": 0.3522, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.4026548672566372, | |
| "grad_norm": 0.1632101982831955, | |
| "learning_rate": 1.1605977698170001e-06, | |
| "loss": 0.3612, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.4048672566371683, | |
| "grad_norm": 0.14858639240264893, | |
| "learning_rate": 1.1523626280675237e-06, | |
| "loss": 0.3575, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 2.4070796460176993, | |
| "grad_norm": 0.16127295792102814, | |
| "learning_rate": 1.1441530000494055e-06, | |
| "loss": 0.3557, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.40929203539823, | |
| "grad_norm": 0.15704701840877533, | |
| "learning_rate": 1.135968940200794e-06, | |
| "loss": 0.3547, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 2.411504424778761, | |
| "grad_norm": 0.14434731006622314, | |
| "learning_rate": 1.1278105027902898e-06, | |
| "loss": 0.3561, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.413716814159292, | |
| "grad_norm": 0.17251616716384888, | |
| "learning_rate": 1.1196777419165927e-06, | |
| "loss": 0.3646, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 2.415929203539823, | |
| "grad_norm": 0.17024458944797516, | |
| "learning_rate": 1.1115707115081448e-06, | |
| "loss": 0.3536, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.418141592920354, | |
| "grad_norm": 0.14864687621593475, | |
| "learning_rate": 1.1034894653227618e-06, | |
| "loss": 0.3526, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 2.420353982300885, | |
| "grad_norm": 0.18286971747875214, | |
| "learning_rate": 1.095434056947291e-06, | |
| "loss": 0.356, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.422566371681416, | |
| "grad_norm": 0.1528891921043396, | |
| "learning_rate": 1.0874045397972433e-06, | |
| "loss": 0.352, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.4247787610619467, | |
| "grad_norm": 0.15347851812839508, | |
| "learning_rate": 1.0794009671164484e-06, | |
| "loss": 0.3591, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.4269911504424777, | |
| "grad_norm": 0.16304871439933777, | |
| "learning_rate": 1.0714233919766953e-06, | |
| "loss": 0.3557, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 2.4292035398230087, | |
| "grad_norm": 0.13769623637199402, | |
| "learning_rate": 1.0634718672773863e-06, | |
| "loss": 0.3534, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.4314159292035398, | |
| "grad_norm": 0.1583521068096161, | |
| "learning_rate": 1.055546445745178e-06, | |
| "loss": 0.3591, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 2.433628318584071, | |
| "grad_norm": 0.17005863785743713, | |
| "learning_rate": 1.0476471799336424e-06, | |
| "loss": 0.3532, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.435840707964602, | |
| "grad_norm": 0.1480191946029663, | |
| "learning_rate": 1.0397741222229057e-06, | |
| "loss": 0.3556, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 2.438053097345133, | |
| "grad_norm": 0.16332688927650452, | |
| "learning_rate": 1.0319273248193145e-06, | |
| "loss": 0.3595, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.440265486725664, | |
| "grad_norm": 0.17268969118595123, | |
| "learning_rate": 1.0241068397550807e-06, | |
| "loss": 0.3522, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 2.442477876106195, | |
| "grad_norm": 0.15803387761116028, | |
| "learning_rate": 1.0163127188879352e-06, | |
| "loss": 0.3526, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.4446902654867255, | |
| "grad_norm": 0.15174445509910583, | |
| "learning_rate": 1.008545013900794e-06, | |
| "loss": 0.3521, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 2.4469026548672566, | |
| "grad_norm": 0.1590649038553238, | |
| "learning_rate": 1.0008037763014033e-06, | |
| "loss": 0.3529, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.4491150442477876, | |
| "grad_norm": 0.15736490488052368, | |
| "learning_rate": 9.930890574220076e-07, | |
| "loss": 0.3549, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 2.4513274336283186, | |
| "grad_norm": 0.16248001158237457, | |
| "learning_rate": 9.85400908419002e-07, | |
| "loss": 0.3503, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.4535398230088497, | |
| "grad_norm": 0.16862428188323975, | |
| "learning_rate": 9.77739380272601e-07, | |
| "loss": 0.3513, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 2.4557522123893807, | |
| "grad_norm": 0.16417399048805237, | |
| "learning_rate": 9.701045237864898e-07, | |
| "loss": 0.3505, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.4579646017699117, | |
| "grad_norm": 0.14535874128341675, | |
| "learning_rate": 9.624963895874995e-07, | |
| "loss": 0.3623, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 2.4601769911504423, | |
| "grad_norm": 0.1544649451971054, | |
| "learning_rate": 9.549150281252633e-07, | |
| "loss": 0.3524, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.4623893805309733, | |
| "grad_norm": 0.157157301902771, | |
| "learning_rate": 9.473604896718808e-07, | |
| "loss": 0.3542, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 2.4646017699115044, | |
| "grad_norm": 0.16022609174251556, | |
| "learning_rate": 9.398328243215937e-07, | |
| "loss": 0.3486, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.4668141592920354, | |
| "grad_norm": 0.14643922448158264, | |
| "learning_rate": 9.323320819904419e-07, | |
| "loss": 0.3588, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 2.4690265486725664, | |
| "grad_norm": 0.15757547318935394, | |
| "learning_rate": 9.248583124159438e-07, | |
| "loss": 0.3619, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.4712389380530975, | |
| "grad_norm": 0.14214399456977844, | |
| "learning_rate": 9.174115651567561e-07, | |
| "loss": 0.3559, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 2.4734513274336285, | |
| "grad_norm": 0.1586948037147522, | |
| "learning_rate": 9.099918895923554e-07, | |
| "loss": 0.3504, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.475663716814159, | |
| "grad_norm": 0.14957429468631744, | |
| "learning_rate": 9.025993349226997e-07, | |
| "loss": 0.3714, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 2.47787610619469, | |
| "grad_norm": 0.15308675169944763, | |
| "learning_rate": 8.952339501679142e-07, | |
| "loss": 0.3639, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.480088495575221, | |
| "grad_norm": 0.1536092609167099, | |
| "learning_rate": 8.878957841679542e-07, | |
| "loss": 0.3543, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 2.482300884955752, | |
| "grad_norm": 0.14460043609142303, | |
| "learning_rate": 8.805848855822918e-07, | |
| "loss": 0.3573, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.484513274336283, | |
| "grad_norm": 0.1362440139055252, | |
| "learning_rate": 8.733013028895864e-07, | |
| "loss": 0.3557, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 2.4867256637168142, | |
| "grad_norm": 0.15375365316867828, | |
| "learning_rate": 8.660450843873647e-07, | |
| "loss": 0.3555, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.4889380530973453, | |
| "grad_norm": 0.14579614996910095, | |
| "learning_rate": 8.588162781917042e-07, | |
| "loss": 0.358, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.491150442477876, | |
| "grad_norm": 0.18101723492145538, | |
| "learning_rate": 8.516149322369055e-07, | |
| "loss": 0.3595, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.493362831858407, | |
| "grad_norm": 0.16425468027591705, | |
| "learning_rate": 8.444410942751863e-07, | |
| "loss": 0.3459, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 2.495575221238938, | |
| "grad_norm": 0.17149604856967926, | |
| "learning_rate": 8.372948118763536e-07, | |
| "loss": 0.3601, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.497787610619469, | |
| "grad_norm": 0.1419076919555664, | |
| "learning_rate": 8.301761324274965e-07, | |
| "loss": 0.3441, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.14477266371250153, | |
| "learning_rate": 8.230851031326653e-07, | |
| "loss": 0.3539, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.502212389380531, | |
| "grad_norm": 0.1530475616455078, | |
| "learning_rate": 8.160217710125661e-07, | |
| "loss": 0.3502, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 2.504424778761062, | |
| "grad_norm": 0.14788125455379486, | |
| "learning_rate": 8.089861829042406e-07, | |
| "loss": 0.3591, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.5066371681415927, | |
| "grad_norm": 0.15901798009872437, | |
| "learning_rate": 8.019783854607593e-07, | |
| "loss": 0.3551, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 2.508849557522124, | |
| "grad_norm": 0.1555824875831604, | |
| "learning_rate": 7.949984251509185e-07, | |
| "loss": 0.3448, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.5110619469026547, | |
| "grad_norm": 0.15857146680355072, | |
| "learning_rate": 7.880463482589196e-07, | |
| "loss": 0.3433, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 2.5132743362831858, | |
| "grad_norm": 0.16051232814788818, | |
| "learning_rate": 7.811222008840719e-07, | |
| "loss": 0.3563, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.515486725663717, | |
| "grad_norm": 0.13366073369979858, | |
| "learning_rate": 7.742260289404819e-07, | |
| "loss": 0.3552, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 2.517699115044248, | |
| "grad_norm": 0.16636747121810913, | |
| "learning_rate": 7.673578781567537e-07, | |
| "loss": 0.3549, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.519911504424779, | |
| "grad_norm": 0.16012783348560333, | |
| "learning_rate": 7.605177940756774e-07, | |
| "loss": 0.3505, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 2.52212389380531, | |
| "grad_norm": 0.1498229205608368, | |
| "learning_rate": 7.537058220539395e-07, | |
| "loss": 0.3579, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.524336283185841, | |
| "grad_norm": 0.17187750339508057, | |
| "learning_rate": 7.469220072618094e-07, | |
| "loss": 0.3548, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 2.5265486725663715, | |
| "grad_norm": 0.15075713396072388, | |
| "learning_rate": 7.401663946828469e-07, | |
| "loss": 0.3568, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.5287610619469025, | |
| "grad_norm": 0.18497484922409058, | |
| "learning_rate": 7.334390291136051e-07, | |
| "loss": 0.3579, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 2.5309734513274336, | |
| "grad_norm": 0.1425379067659378, | |
| "learning_rate": 7.267399551633253e-07, | |
| "loss": 0.3569, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.5331858407079646, | |
| "grad_norm": 0.17833374440670013, | |
| "learning_rate": 7.200692172536555e-07, | |
| "loss": 0.3476, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 2.5353982300884956, | |
| "grad_norm": 0.15675534307956696, | |
| "learning_rate": 7.13426859618338e-07, | |
| "loss": 0.3683, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.5376106194690267, | |
| "grad_norm": 0.16931159794330597, | |
| "learning_rate": 7.068129263029316e-07, | |
| "loss": 0.3558, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 2.5398230088495577, | |
| "grad_norm": 0.1581314355134964, | |
| "learning_rate": 7.002274611645083e-07, | |
| "loss": 0.36, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.5420353982300883, | |
| "grad_norm": 0.1542506217956543, | |
| "learning_rate": 6.936705078713713e-07, | |
| "loss": 0.3541, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 2.5442477876106193, | |
| "grad_norm": 0.16385367512702942, | |
| "learning_rate": 6.871421099027586e-07, | |
| "loss": 0.3587, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.5464601769911503, | |
| "grad_norm": 0.14884746074676514, | |
| "learning_rate": 6.806423105485576e-07, | |
| "loss": 0.3542, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 2.5486725663716814, | |
| "grad_norm": 0.15693628787994385, | |
| "learning_rate": 6.741711529090212e-07, | |
| "loss": 0.3573, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.5508849557522124, | |
| "grad_norm": 0.13773605227470398, | |
| "learning_rate": 6.677286798944743e-07, | |
| "loss": 0.3537, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 2.5530973451327434, | |
| "grad_norm": 0.15046106278896332, | |
| "learning_rate": 6.61314934225037e-07, | |
| "loss": 0.3533, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.5553097345132745, | |
| "grad_norm": 0.15596453845500946, | |
| "learning_rate": 6.549299584303343e-07, | |
| "loss": 0.3541, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 2.557522123893805, | |
| "grad_norm": 0.1392877697944641, | |
| "learning_rate": 6.485737948492237e-07, | |
| "loss": 0.3671, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.5597345132743365, | |
| "grad_norm": 0.16301456093788147, | |
| "learning_rate": 6.422464856295035e-07, | |
| "loss": 0.3551, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 2.561946902654867, | |
| "grad_norm": 0.14623309671878815, | |
| "learning_rate": 6.359480727276407e-07, | |
| "loss": 0.3566, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.564159292035398, | |
| "grad_norm": 0.15128614008426666, | |
| "learning_rate": 6.296785979084891e-07, | |
| "loss": 0.3573, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 2.566371681415929, | |
| "grad_norm": 0.1874575912952423, | |
| "learning_rate": 6.234381027450132e-07, | |
| "loss": 0.3497, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.5685840707964602, | |
| "grad_norm": 0.15378333628177643, | |
| "learning_rate": 6.172266286180162e-07, | |
| "loss": 0.3462, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 2.5707964601769913, | |
| "grad_norm": 0.1589403748512268, | |
| "learning_rate": 6.110442167158592e-07, | |
| "loss": 0.3496, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.573008849557522, | |
| "grad_norm": 0.1497460901737213, | |
| "learning_rate": 6.048909080341936e-07, | |
| "loss": 0.3509, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 2.5752212389380533, | |
| "grad_norm": 0.15763600170612335, | |
| "learning_rate": 5.987667433756844e-07, | |
| "loss": 0.3511, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.577433628318584, | |
| "grad_norm": 0.16659215092658997, | |
| "learning_rate": 5.926717633497453e-07, | |
| "loss": 0.364, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 2.579646017699115, | |
| "grad_norm": 0.15350916981697083, | |
| "learning_rate": 5.866060083722624e-07, | |
| "loss": 0.3604, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.581858407079646, | |
| "grad_norm": 0.1593090444803238, | |
| "learning_rate": 5.805695186653365e-07, | |
| "loss": 0.3554, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 2.584070796460177, | |
| "grad_norm": 0.13886038959026337, | |
| "learning_rate": 5.745623342570039e-07, | |
| "loss": 0.3567, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.586283185840708, | |
| "grad_norm": 0.17093704640865326, | |
| "learning_rate": 5.685844949809777e-07, | |
| "loss": 0.3569, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 2.588495575221239, | |
| "grad_norm": 0.11451248824596405, | |
| "learning_rate": 5.626360404763875e-07, | |
| "loss": 0.3468, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.59070796460177, | |
| "grad_norm": 0.15386511385440826, | |
| "learning_rate": 5.567170101875074e-07, | |
| "loss": 0.3639, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 2.5929203539823007, | |
| "grad_norm": 0.14580263197422028, | |
| "learning_rate": 5.508274433635019e-07, | |
| "loss": 0.3555, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.5951327433628317, | |
| "grad_norm": 0.14180666208267212, | |
| "learning_rate": 5.449673790581611e-07, | |
| "loss": 0.3593, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 2.5973451327433628, | |
| "grad_norm": 0.13817204535007477, | |
| "learning_rate": 5.391368561296456e-07, | |
| "loss": 0.3412, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.599557522123894, | |
| "grad_norm": 0.14034853875637054, | |
| "learning_rate": 5.333359132402238e-07, | |
| "loss": 0.3539, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.601769911504425, | |
| "grad_norm": 0.14221332967281342, | |
| "learning_rate": 5.275645888560233e-07, | |
| "loss": 0.3541, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.603982300884956, | |
| "grad_norm": 0.15998932719230652, | |
| "learning_rate": 5.218229212467635e-07, | |
| "loss": 0.3696, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 2.606194690265487, | |
| "grad_norm": 0.15716540813446045, | |
| "learning_rate": 5.161109484855182e-07, | |
| "loss": 0.3518, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.6084070796460175, | |
| "grad_norm": 0.13059420883655548, | |
| "learning_rate": 5.104287084484489e-07, | |
| "loss": 0.3468, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 2.6106194690265485, | |
| "grad_norm": 0.15905797481536865, | |
| "learning_rate": 5.047762388145582e-07, | |
| "loss": 0.3515, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.6128318584070795, | |
| "grad_norm": 0.1637641042470932, | |
| "learning_rate": 4.991535770654449e-07, | |
| "loss": 0.3589, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 2.6150442477876106, | |
| "grad_norm": 0.14523540437221527, | |
| "learning_rate": 4.935607604850473e-07, | |
| "loss": 0.3532, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.6172566371681416, | |
| "grad_norm": 0.15000000596046448, | |
| "learning_rate": 4.879978261594037e-07, | |
| "loss": 0.3557, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 2.6194690265486726, | |
| "grad_norm": 0.155274897813797, | |
| "learning_rate": 4.824648109763991e-07, | |
| "loss": 0.3578, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.6216814159292037, | |
| "grad_norm": 0.1417737454175949, | |
| "learning_rate": 4.769617516255276e-07, | |
| "loss": 0.3631, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 2.6238938053097343, | |
| "grad_norm": 0.1490320861339569, | |
| "learning_rate": 4.71488684597643e-07, | |
| "loss": 0.3526, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.6261061946902657, | |
| "grad_norm": 0.1615634262561798, | |
| "learning_rate": 4.660456461847224e-07, | |
| "loss": 0.3572, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 2.6283185840707963, | |
| "grad_norm": 0.14165712893009186, | |
| "learning_rate": 4.606326724796195e-07, | |
| "loss": 0.3653, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.6305309734513274, | |
| "grad_norm": 0.14493530988693237, | |
| "learning_rate": 4.5524979937583203e-07, | |
| "loss": 0.3527, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 2.6327433628318584, | |
| "grad_norm": 0.13274651765823364, | |
| "learning_rate": 4.498970625672588e-07, | |
| "loss": 0.3544, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.6349557522123894, | |
| "grad_norm": 0.16449962556362152, | |
| "learning_rate": 4.445744975479627e-07, | |
| "loss": 0.3589, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 2.6371681415929205, | |
| "grad_norm": 0.1559862345457077, | |
| "learning_rate": 4.392821396119407e-07, | |
| "loss": 0.3507, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.6393805309734515, | |
| "grad_norm": 0.15669386088848114, | |
| "learning_rate": 4.3402002385288133e-07, | |
| "loss": 0.3505, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 2.6415929203539825, | |
| "grad_norm": 0.17834536731243134, | |
| "learning_rate": 4.2878818516394307e-07, | |
| "loss": 0.3508, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.643805309734513, | |
| "grad_norm": 0.1579374521970749, | |
| "learning_rate": 4.235866582375114e-07, | |
| "loss": 0.3627, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 2.646017699115044, | |
| "grad_norm": 0.16911913454532623, | |
| "learning_rate": 4.184154775649768e-07, | |
| "loss": 0.36, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.648230088495575, | |
| "grad_norm": 0.17118600010871887, | |
| "learning_rate": 4.132746774365021e-07, | |
| "loss": 0.3571, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 2.650442477876106, | |
| "grad_norm": 0.1715717762708664, | |
| "learning_rate": 4.0816429194079857e-07, | |
| "loss": 0.361, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.6526548672566372, | |
| "grad_norm": 0.17212989926338196, | |
| "learning_rate": 4.030843549648944e-07, | |
| "loss": 0.3517, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 2.6548672566371683, | |
| "grad_norm": 0.17287541925907135, | |
| "learning_rate": 3.9803490019391545e-07, | |
| "loss": 0.354, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.6570796460176993, | |
| "grad_norm": 0.18660354614257812, | |
| "learning_rate": 3.930159611108603e-07, | |
| "loss": 0.3559, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 2.65929203539823, | |
| "grad_norm": 0.13577118515968323, | |
| "learning_rate": 3.880275709963749e-07, | |
| "loss": 0.3612, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.661504424778761, | |
| "grad_norm": 0.17823448777198792, | |
| "learning_rate": 3.8306976292853794e-07, | |
| "loss": 0.3542, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 2.663716814159292, | |
| "grad_norm": 0.14912089705467224, | |
| "learning_rate": 3.7814256978263465e-07, | |
| "loss": 0.3525, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.665929203539823, | |
| "grad_norm": 0.16290871798992157, | |
| "learning_rate": 3.7324602423094603e-07, | |
| "loss": 0.3601, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 2.668141592920354, | |
| "grad_norm": 0.15941615402698517, | |
| "learning_rate": 3.683801587425251e-07, | |
| "loss": 0.3627, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.670353982300885, | |
| "grad_norm": 0.14429469406604767, | |
| "learning_rate": 3.635450055829881e-07, | |
| "loss": 0.3604, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 2.672566371681416, | |
| "grad_norm": 0.15333802998065948, | |
| "learning_rate": 3.587405968142943e-07, | |
| "loss": 0.3507, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.6747787610619467, | |
| "grad_norm": 0.1445317417383194, | |
| "learning_rate": 3.539669642945387e-07, | |
| "loss": 0.3567, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 2.676991150442478, | |
| "grad_norm": 0.16064175963401794, | |
| "learning_rate": 3.4922413967773905e-07, | |
| "loss": 0.3623, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.6792035398230087, | |
| "grad_norm": 0.1633787751197815, | |
| "learning_rate": 3.4451215441362264e-07, | |
| "loss": 0.3509, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 2.6814159292035398, | |
| "grad_norm": 0.14114798605442047, | |
| "learning_rate": 3.3983103974742415e-07, | |
| "loss": 0.3658, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.683628318584071, | |
| "grad_norm": 0.15633976459503174, | |
| "learning_rate": 3.351808267196721e-07, | |
| "loss": 0.3668, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 2.685840707964602, | |
| "grad_norm": 0.14381296932697296, | |
| "learning_rate": 3.3056154616598777e-07, | |
| "loss": 0.3581, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.688053097345133, | |
| "grad_norm": 0.14406733214855194, | |
| "learning_rate": 3.2597322871687766e-07, | |
| "loss": 0.3543, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 2.6902654867256635, | |
| "grad_norm": 0.156460240483284, | |
| "learning_rate": 3.214159047975324e-07, | |
| "loss": 0.352, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.692477876106195, | |
| "grad_norm": 0.1374759078025818, | |
| "learning_rate": 3.1688960462762263e-07, | |
| "loss": 0.3601, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 2.6946902654867255, | |
| "grad_norm": 0.16220209002494812, | |
| "learning_rate": 3.1239435822110253e-07, | |
| "loss": 0.3614, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.6969026548672566, | |
| "grad_norm": 0.15912321209907532, | |
| "learning_rate": 3.079301953860059e-07, | |
| "loss": 0.3573, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 2.6991150442477876, | |
| "grad_norm": 0.16917406022548676, | |
| "learning_rate": 3.034971457242525e-07, | |
| "loss": 0.3601, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.7013274336283186, | |
| "grad_norm": 0.16305102407932281, | |
| "learning_rate": 2.990952386314505e-07, | |
| "loss": 0.3594, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 2.7035398230088497, | |
| "grad_norm": 0.1461646407842636, | |
| "learning_rate": 2.947245032967e-07, | |
| "loss": 0.3517, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.7057522123893807, | |
| "grad_norm": 0.16588905453681946, | |
| "learning_rate": 2.9038496870240187e-07, | |
| "loss": 0.3564, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 2.7079646017699117, | |
| "grad_norm": 0.14253613352775574, | |
| "learning_rate": 2.860766636240636e-07, | |
| "loss": 0.3633, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.7101769911504423, | |
| "grad_norm": 0.15777863562107086, | |
| "learning_rate": 2.817996166301107e-07, | |
| "loss": 0.3465, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.7123893805309733, | |
| "grad_norm": 0.1424233764410019, | |
| "learning_rate": 2.7755385608169374e-07, | |
| "loss": 0.3523, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.7146017699115044, | |
| "grad_norm": 0.1586133986711502, | |
| "learning_rate": 2.733394101325054e-07, | |
| "loss": 0.3579, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 2.7168141592920354, | |
| "grad_norm": 0.14557451009750366, | |
| "learning_rate": 2.691563067285874e-07, | |
| "loss": 0.3539, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.7190265486725664, | |
| "grad_norm": 0.15484808385372162, | |
| "learning_rate": 2.65004573608153e-07, | |
| "loss": 0.3577, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 2.7212389380530975, | |
| "grad_norm": 0.14703123271465302, | |
| "learning_rate": 2.608842383013943e-07, | |
| "loss": 0.3586, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.7234513274336285, | |
| "grad_norm": 0.1443336308002472, | |
| "learning_rate": 2.567953281303059e-07, | |
| "loss": 0.3571, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 2.725663716814159, | |
| "grad_norm": 0.14558246731758118, | |
| "learning_rate": 2.527378702085037e-07, | |
| "loss": 0.3584, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.72787610619469, | |
| "grad_norm": 0.14799098670482635, | |
| "learning_rate": 2.4871189144104025e-07, | |
| "loss": 0.3612, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 2.730088495575221, | |
| "grad_norm": 0.14273111522197723, | |
| "learning_rate": 2.447174185242324e-07, | |
| "loss": 0.3557, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.732300884955752, | |
| "grad_norm": 0.15998686850070953, | |
| "learning_rate": 2.407544779454779e-07, | |
| "loss": 0.3445, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 2.734513274336283, | |
| "grad_norm": 0.16166724264621735, | |
| "learning_rate": 2.368230959830875e-07, | |
| "loss": 0.3381, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.7367256637168142, | |
| "grad_norm": 0.15504410862922668, | |
| "learning_rate": 2.32923298706102e-07, | |
| "loss": 0.3548, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 2.7389380530973453, | |
| "grad_norm": 0.15463854372501373, | |
| "learning_rate": 2.2905511197412634e-07, | |
| "loss": 0.354, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.741150442477876, | |
| "grad_norm": 0.15159274637699127, | |
| "learning_rate": 2.2521856143715492e-07, | |
| "loss": 0.353, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 2.7433628318584073, | |
| "grad_norm": 0.16764990985393524, | |
| "learning_rate": 2.2141367253540325e-07, | |
| "loss": 0.3575, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.745575221238938, | |
| "grad_norm": 0.15683040022850037, | |
| "learning_rate": 2.1764047049913528e-07, | |
| "loss": 0.3505, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 2.747787610619469, | |
| "grad_norm": 0.15731599926948547, | |
| "learning_rate": 2.1389898034850086e-07, | |
| "loss": 0.3592, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 0.15384253859519958, | |
| "learning_rate": 2.1018922689336973e-07, | |
| "loss": 0.3599, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 2.752212389380531, | |
| "grad_norm": 0.1472090780735016, | |
| "learning_rate": 2.0651123473316103e-07, | |
| "loss": 0.358, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.754424778761062, | |
| "grad_norm": 0.144203320145607, | |
| "learning_rate": 2.0286502825668852e-07, | |
| "loss": 0.3519, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 2.7566371681415927, | |
| "grad_norm": 0.1510661542415619, | |
| "learning_rate": 1.992506316419912e-07, | |
| "loss": 0.3585, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.758849557522124, | |
| "grad_norm": 0.1556992530822754, | |
| "learning_rate": 1.9566806885617906e-07, | |
| "loss": 0.3556, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 2.7610619469026547, | |
| "grad_norm": 0.14370451867580414, | |
| "learning_rate": 1.921173636552698e-07, | |
| "loss": 0.3595, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.7632743362831858, | |
| "grad_norm": 0.14656773209571838, | |
| "learning_rate": 1.8859853958403507e-07, | |
| "loss": 0.3483, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 2.765486725663717, | |
| "grad_norm": 0.13567841053009033, | |
| "learning_rate": 1.8511161997584015e-07, | |
| "loss": 0.354, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.767699115044248, | |
| "grad_norm": 0.1580413579940796, | |
| "learning_rate": 1.8165662795249172e-07, | |
| "loss": 0.3644, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 2.769911504424779, | |
| "grad_norm": 0.14433561265468597, | |
| "learning_rate": 1.7823358642408694e-07, | |
| "loss": 0.3485, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.77212389380531, | |
| "grad_norm": 0.1432889699935913, | |
| "learning_rate": 1.748425180888541e-07, | |
| "loss": 0.3645, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 2.774336283185841, | |
| "grad_norm": 0.14327970147132874, | |
| "learning_rate": 1.7148344543301277e-07, | |
| "loss": 0.3495, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.7765486725663715, | |
| "grad_norm": 0.1495339274406433, | |
| "learning_rate": 1.681563907306133e-07, | |
| "loss": 0.3439, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 2.7787610619469025, | |
| "grad_norm": 0.14024530351161957, | |
| "learning_rate": 1.6486137604339813e-07, | |
| "loss": 0.3537, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.7809734513274336, | |
| "grad_norm": 0.16141167283058167, | |
| "learning_rate": 1.6159842322065022e-07, | |
| "loss": 0.3502, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 2.7831858407079646, | |
| "grad_norm": 0.14094969630241394, | |
| "learning_rate": 1.5836755389905035e-07, | |
| "loss": 0.3506, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.7853982300884956, | |
| "grad_norm": 0.15461289882659912, | |
| "learning_rate": 1.5516878950253333e-07, | |
| "loss": 0.3616, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 2.7876106194690267, | |
| "grad_norm": 0.14529578387737274, | |
| "learning_rate": 1.5200215124214491e-07, | |
| "loss": 0.3474, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.7898230088495577, | |
| "grad_norm": 0.13706517219543457, | |
| "learning_rate": 1.4886766011590449e-07, | |
| "loss": 0.3509, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 2.7920353982300883, | |
| "grad_norm": 0.15313966572284698, | |
| "learning_rate": 1.4576533690866035e-07, | |
| "loss": 0.3551, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.7942477876106193, | |
| "grad_norm": 0.1422608494758606, | |
| "learning_rate": 1.4269520219195753e-07, | |
| "loss": 0.3437, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 2.7964601769911503, | |
| "grad_norm": 0.13179507851600647, | |
| "learning_rate": 1.396572763238957e-07, | |
| "loss": 0.3562, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.7986725663716814, | |
| "grad_norm": 0.14244070649147034, | |
| "learning_rate": 1.3665157944900198e-07, | |
| "loss": 0.3641, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 2.8008849557522124, | |
| "grad_norm": 0.14658492803573608, | |
| "learning_rate": 1.3367813149808728e-07, | |
| "loss": 0.3567, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.8030973451327434, | |
| "grad_norm": 0.1432352066040039, | |
| "learning_rate": 1.3073695218812356e-07, | |
| "loss": 0.3483, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 2.8053097345132745, | |
| "grad_norm": 0.1332673579454422, | |
| "learning_rate": 1.278280610221072e-07, | |
| "loss": 0.3594, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.807522123893805, | |
| "grad_norm": 0.15009014308452606, | |
| "learning_rate": 1.2495147728893043e-07, | |
| "loss": 0.3592, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 2.8097345132743365, | |
| "grad_norm": 0.13184407353401184, | |
| "learning_rate": 1.2210722006325782e-07, | |
| "loss": 0.3542, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.811946902654867, | |
| "grad_norm": 0.13673901557922363, | |
| "learning_rate": 1.192953082053927e-07, | |
| "loss": 0.3622, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 2.814159292035398, | |
| "grad_norm": 0.1729208379983902, | |
| "learning_rate": 1.1651576036115942e-07, | |
| "loss": 0.3544, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.816371681415929, | |
| "grad_norm": 0.139160618185997, | |
| "learning_rate": 1.1376859496177228e-07, | |
| "loss": 0.3499, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 2.8185840707964602, | |
| "grad_norm": 0.15454445779323578, | |
| "learning_rate": 1.110538302237213e-07, | |
| "loss": 0.3572, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.8207964601769913, | |
| "grad_norm": 0.1475721150636673, | |
| "learning_rate": 1.0837148414864329e-07, | |
| "loss": 0.3636, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.823008849557522, | |
| "grad_norm": 0.1556055098772049, | |
| "learning_rate": 1.0572157452321097e-07, | |
| "loss": 0.348, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.8252212389380533, | |
| "grad_norm": 0.14078259468078613, | |
| "learning_rate": 1.0310411891900629e-07, | |
| "loss": 0.3546, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 2.827433628318584, | |
| "grad_norm": 0.15832266211509705, | |
| "learning_rate": 1.0051913469241003e-07, | |
| "loss": 0.3639, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.829646017699115, | |
| "grad_norm": 0.16918309032917023, | |
| "learning_rate": 9.796663898448578e-08, | |
| "loss": 0.3579, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 2.831858407079646, | |
| "grad_norm": 0.13976359367370605, | |
| "learning_rate": 9.544664872086329e-08, | |
| "loss": 0.3632, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.834070796460177, | |
| "grad_norm": 0.14802858233451843, | |
| "learning_rate": 9.295918061163034e-08, | |
| "loss": 0.3548, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 2.836283185840708, | |
| "grad_norm": 0.15616321563720703, | |
| "learning_rate": 9.050425115121775e-08, | |
| "loss": 0.3521, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.838495575221239, | |
| "grad_norm": 0.13561472296714783, | |
| "learning_rate": 8.808187661829504e-08, | |
| "loss": 0.3601, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 2.84070796460177, | |
| "grad_norm": 0.16056296229362488, | |
| "learning_rate": 8.569207307565664e-08, | |
| "loss": 0.3596, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.8429203539823007, | |
| "grad_norm": 0.1590995341539383, | |
| "learning_rate": 8.333485637012029e-08, | |
| "loss": 0.3573, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 2.8451327433628317, | |
| "grad_norm": 0.15869586169719696, | |
| "learning_rate": 8.101024213241826e-08, | |
| "loss": 0.3526, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.8473451327433628, | |
| "grad_norm": 0.15596012771129608, | |
| "learning_rate": 7.871824577709797e-08, | |
| "loss": 0.3504, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 2.849557522123894, | |
| "grad_norm": 0.1455894559621811, | |
| "learning_rate": 7.645888250241485e-08, | |
| "loss": 0.3554, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.851769911504425, | |
| "grad_norm": 0.1496601700782776, | |
| "learning_rate": 7.423216729023574e-08, | |
| "loss": 0.3666, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 2.853982300884956, | |
| "grad_norm": 0.13408933579921722, | |
| "learning_rate": 7.203811490593626e-08, | |
| "loss": 0.3636, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.856194690265487, | |
| "grad_norm": 0.1369679570198059, | |
| "learning_rate": 6.987673989830523e-08, | |
| "loss": 0.3587, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 2.8584070796460175, | |
| "grad_norm": 0.15344880521297455, | |
| "learning_rate": 6.774805659944761e-08, | |
| "loss": 0.3522, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.8606194690265485, | |
| "grad_norm": 0.1609773337841034, | |
| "learning_rate": 6.565207912468785e-08, | |
| "loss": 0.3535, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 2.8628318584070795, | |
| "grad_norm": 0.14126433432102203, | |
| "learning_rate": 6.358882137248001e-08, | |
| "loss": 0.3452, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.8650442477876106, | |
| "grad_norm": 0.13827714323997498, | |
| "learning_rate": 6.15582970243117e-08, | |
| "loss": 0.3488, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 2.8672566371681416, | |
| "grad_norm": 0.1625031977891922, | |
| "learning_rate": 5.9560519544614725e-08, | |
| "loss": 0.3541, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.8694690265486726, | |
| "grad_norm": 0.14330527186393738, | |
| "learning_rate": 5.759550218067622e-08, | |
| "loss": 0.3535, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 2.8716814159292037, | |
| "grad_norm": 0.16162008047103882, | |
| "learning_rate": 5.566325796255101e-08, | |
| "loss": 0.3562, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.8738938053097343, | |
| "grad_norm": 0.1462399661540985, | |
| "learning_rate": 5.3763799702975516e-08, | |
| "loss": 0.3531, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 2.8761061946902657, | |
| "grad_norm": 0.14997854828834534, | |
| "learning_rate": 5.1897139997280074e-08, | |
| "loss": 0.3561, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.8783185840707963, | |
| "grad_norm": 0.1586892157793045, | |
| "learning_rate": 5.0063291223308993e-08, | |
| "loss": 0.3636, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 2.8805309734513274, | |
| "grad_norm": 0.1484476923942566, | |
| "learning_rate": 4.826226554133562e-08, | |
| "loss": 0.3637, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.8827433628318584, | |
| "grad_norm": 0.1511233150959015, | |
| "learning_rate": 4.649407489398461e-08, | |
| "loss": 0.3618, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 2.8849557522123894, | |
| "grad_norm": 0.14198242127895355, | |
| "learning_rate": 4.4758731006149804e-08, | |
| "loss": 0.3595, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.8871681415929205, | |
| "grad_norm": 0.15620240569114685, | |
| "learning_rate": 4.305624538491815e-08, | |
| "loss": 0.3599, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 2.8893805309734515, | |
| "grad_norm": 0.15184152126312256, | |
| "learning_rate": 4.138662931949255e-08, | |
| "loss": 0.3552, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.8915929203539825, | |
| "grad_norm": 0.14311784505844116, | |
| "learning_rate": 3.974989388111861e-08, | |
| "loss": 0.3568, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 2.893805309734513, | |
| "grad_norm": 0.14970344305038452, | |
| "learning_rate": 3.814604992300908e-08, | |
| "loss": 0.3564, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.896017699115044, | |
| "grad_norm": 0.13278964161872864, | |
| "learning_rate": 3.657510808027343e-08, | |
| "loss": 0.362, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 2.898230088495575, | |
| "grad_norm": 0.15549008548259735, | |
| "learning_rate": 3.503707876984674e-08, | |
| "loss": 0.353, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.900442477876106, | |
| "grad_norm": 0.15713003277778625, | |
| "learning_rate": 3.353197219041981e-08, | |
| "loss": 0.3599, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 2.9026548672566372, | |
| "grad_norm": 0.17100383341312408, | |
| "learning_rate": 3.205979832237416e-08, | |
| "loss": 0.36, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.9048672566371683, | |
| "grad_norm": 0.15952634811401367, | |
| "learning_rate": 3.062056692771154e-08, | |
| "loss": 0.3562, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 2.9070796460176993, | |
| "grad_norm": 0.13878712058067322, | |
| "learning_rate": 2.9214287549995114e-08, | |
| "loss": 0.3486, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.90929203539823, | |
| "grad_norm": 0.14025112986564636, | |
| "learning_rate": 2.7840969514279503e-08, | |
| "loss": 0.3565, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 2.911504424778761, | |
| "grad_norm": 0.15756577253341675, | |
| "learning_rate": 2.6500621927054716e-08, | |
| "loss": 0.3475, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.913716814159292, | |
| "grad_norm": 0.14622363448143005, | |
| "learning_rate": 2.519325367618175e-08, | |
| "loss": 0.3631, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 2.915929203539823, | |
| "grad_norm": 0.15228000283241272, | |
| "learning_rate": 2.3918873430835986e-08, | |
| "loss": 0.3489, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.918141592920354, | |
| "grad_norm": 0.1473086029291153, | |
| "learning_rate": 2.2677489641448335e-08, | |
| "loss": 0.3459, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 2.920353982300885, | |
| "grad_norm": 0.13773617148399353, | |
| "learning_rate": 2.1469110539650283e-08, | |
| "loss": 0.3674, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.922566371681416, | |
| "grad_norm": 0.15642863512039185, | |
| "learning_rate": 2.0293744138219495e-08, | |
| "loss": 0.3493, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 2.9247787610619467, | |
| "grad_norm": 0.1433519721031189, | |
| "learning_rate": 1.9151398231024297e-08, | |
| "loss": 0.3517, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.926991150442478, | |
| "grad_norm": 0.14097705483436584, | |
| "learning_rate": 1.8042080392974837e-08, | |
| "loss": 0.3654, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 2.9292035398230087, | |
| "grad_norm": 0.15193448960781097, | |
| "learning_rate": 1.6965797979971442e-08, | |
| "loss": 0.3556, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.9314159292035398, | |
| "grad_norm": 0.14747443795204163, | |
| "learning_rate": 1.5922558128856903e-08, | |
| "loss": 0.3521, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.933628318584071, | |
| "grad_norm": 0.16591234505176544, | |
| "learning_rate": 1.4912367757366485e-08, | |
| "loss": 0.3628, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.935840707964602, | |
| "grad_norm": 0.15506120026111603, | |
| "learning_rate": 1.3935233564086326e-08, | |
| "loss": 0.3529, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 2.938053097345133, | |
| "grad_norm": 0.16547641158103943, | |
| "learning_rate": 1.2991162028405113e-08, | |
| "loss": 0.3603, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.9402654867256635, | |
| "grad_norm": 0.17579355835914612, | |
| "learning_rate": 1.2080159410471914e-08, | |
| "loss": 0.3652, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 2.942477876106195, | |
| "grad_norm": 0.13282181322574615, | |
| "learning_rate": 1.1202231751157866e-08, | |
| "loss": 0.3529, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.9446902654867255, | |
| "grad_norm": 0.14515455067157745, | |
| "learning_rate": 1.0357384872011767e-08, | |
| "loss": 0.3637, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 2.9469026548672566, | |
| "grad_norm": 0.1463669091463089, | |
| "learning_rate": 9.545624375223439e-09, | |
| "loss": 0.3551, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.9491150442477876, | |
| "grad_norm": 0.1455363929271698, | |
| "learning_rate": 8.766955643587094e-09, | |
| "loss": 0.3561, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 2.9513274336283186, | |
| "grad_norm": 0.15577441453933716, | |
| "learning_rate": 8.021383840465247e-09, | |
| "loss": 0.3589, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.9535398230088497, | |
| "grad_norm": 0.14496655762195587, | |
| "learning_rate": 7.308913909752635e-09, | |
| "loss": 0.3571, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 2.9557522123893807, | |
| "grad_norm": 0.15788689255714417, | |
| "learning_rate": 6.629550575847355e-09, | |
| "loss": 0.3575, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.9579646017699117, | |
| "grad_norm": 0.1579669564962387, | |
| "learning_rate": 5.983298343615884e-09, | |
| "loss": 0.3494, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 2.9601769911504423, | |
| "grad_norm": 0.15941593050956726, | |
| "learning_rate": 5.3701614983647786e-09, | |
| "loss": 0.3584, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.9623893805309733, | |
| "grad_norm": 0.16959086060523987, | |
| "learning_rate": 4.7901441058118006e-09, | |
| "loss": 0.3517, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 2.9646017699115044, | |
| "grad_norm": 0.16718046367168427, | |
| "learning_rate": 4.243250012059275e-09, | |
| "loss": 0.3584, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.9668141592920354, | |
| "grad_norm": 0.1667267084121704, | |
| "learning_rate": 3.729482843569665e-09, | |
| "loss": 0.3652, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 2.9690265486725664, | |
| "grad_norm": 0.15754228830337524, | |
| "learning_rate": 3.2488460071389285e-09, | |
| "loss": 0.3578, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.9712389380530975, | |
| "grad_norm": 0.15564315021038055, | |
| "learning_rate": 2.801342689875974e-09, | |
| "loss": 0.3509, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 2.9734513274336285, | |
| "grad_norm": 0.16392084956169128, | |
| "learning_rate": 2.3869758591810177e-09, | |
| "loss": 0.3574, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.975663716814159, | |
| "grad_norm": 0.147927388548851, | |
| "learning_rate": 2.005748262725593e-09, | |
| "loss": 0.3557, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 2.97787610619469, | |
| "grad_norm": 0.14830511808395386, | |
| "learning_rate": 1.657662428434792e-09, | |
| "loss": 0.3524, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.980088495575221, | |
| "grad_norm": 0.15716874599456787, | |
| "learning_rate": 1.342720664469499e-09, | |
| "loss": 0.3549, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 2.982300884955752, | |
| "grad_norm": 0.17521563172340393, | |
| "learning_rate": 1.0609250592130693e-09, | |
| "loss": 0.3629, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.984513274336283, | |
| "grad_norm": 0.15542881190776825, | |
| "learning_rate": 8.122774812552303e-10, | |
| "loss": 0.3501, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 2.9867256637168142, | |
| "grad_norm": 0.20576362311840057, | |
| "learning_rate": 5.967795793820896e-10, | |
| "loss": 0.3627, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.9889380530973453, | |
| "grad_norm": 0.16061358153820038, | |
| "learning_rate": 4.1443278256170227e-10, | |
| "loss": 0.3524, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 2.991150442477876, | |
| "grad_norm": 0.1754293441772461, | |
| "learning_rate": 2.6523829993907456e-10, | |
| "loss": 0.3595, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.9933628318584073, | |
| "grad_norm": 0.17124155163764954, | |
| "learning_rate": 1.4919712082339665e-10, | |
| "loss": 0.3549, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 2.995575221238938, | |
| "grad_norm": 0.14956091344356537, | |
| "learning_rate": 6.63100146863771e-11, | |
| "loss": 0.353, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.997787610619469, | |
| "grad_norm": 0.1708720326423645, | |
| "learning_rate": 1.6577531152806027e-11, | |
| "loss": 0.3521, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.17063909769058228, | |
| "learning_rate": 0.0, | |
| "loss": 0.3461, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1356, | |
| "total_flos": 2.751475469831373e+16, | |
| "train_loss": 0.20257837947315172, | |
| "train_runtime": 61703.9023, | |
| "train_samples_per_second": 8.428, | |
| "train_steps_per_second": 0.022 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1356, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.751475469831373e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |