{ "best_global_step": 1000, "best_metric": 0.7549859375827388, "best_model_checkpoint": "./output_checkpoints/graphcodebert-robust/checkpoint-1000", "epoch": 0.1024, "eval_steps": 1000, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00064, "grad_norm": 1.6144306659698486, "learning_rate": 1.1520000000000002e-08, "loss": 0.729, "step": 10 }, { "epoch": 0.00128, "grad_norm": 2.0952296257019043, "learning_rate": 2.4320000000000002e-08, "loss": 0.7295, "step": 20 }, { "epoch": 0.00192, "grad_norm": 1.3587689399719238, "learning_rate": 3.7120000000000004e-08, "loss": 0.73, "step": 30 }, { "epoch": 0.00256, "grad_norm": 1.2531732320785522, "learning_rate": 4.9920000000000006e-08, "loss": 0.7221, "step": 40 }, { "epoch": 0.0032, "grad_norm": 1.437932014465332, "learning_rate": 6.272000000000001e-08, "loss": 0.7209, "step": 50 }, { "epoch": 0.00384, "grad_norm": 1.418426752090454, "learning_rate": 7.552e-08, "loss": 0.729, "step": 60 }, { "epoch": 0.00448, "grad_norm": 1.9476298093795776, "learning_rate": 8.832e-08, "loss": 0.7242, "step": 70 }, { "epoch": 0.00512, "grad_norm": 1.7948051691055298, "learning_rate": 1.0112000000000001e-07, "loss": 0.7227, "step": 80 }, { "epoch": 0.00576, "grad_norm": 1.6534360647201538, "learning_rate": 1.1392e-07, "loss": 0.7234, "step": 90 }, { "epoch": 0.0064, "grad_norm": 1.0920158624649048, "learning_rate": 1.2672e-07, "loss": 0.7328, "step": 100 }, { "epoch": 0.00704, "grad_norm": 1.977837085723877, "learning_rate": 1.3952000000000002e-07, "loss": 0.7263, "step": 110 }, { "epoch": 0.00768, "grad_norm": 1.388983130455017, "learning_rate": 1.5232000000000003e-07, "loss": 0.7286, "step": 120 }, { "epoch": 0.00832, "grad_norm": 1.2956682443618774, "learning_rate": 1.6512e-07, "loss": 0.7251, "step": 130 }, { "epoch": 0.00896, "grad_norm": 1.8125052452087402, "learning_rate": 1.7792e-07, "loss": 0.7251, "step": 140 }, { "epoch": 0.0096, "grad_norm": 1.626846194267273, "learning_rate": 1.9072e-07, "loss": 0.727, "step": 150 }, { "epoch": 0.01024, "grad_norm": 2.3243086338043213, "learning_rate": 2.0352e-07, "loss": 0.726, "step": 160 }, { "epoch": 0.01088, "grad_norm": 1.4734737873077393, "learning_rate": 2.1632e-07, "loss": 0.7252, "step": 170 }, { "epoch": 0.01152, "grad_norm": 2.090498685836792, "learning_rate": 2.2912e-07, "loss": 0.7273, "step": 180 }, { "epoch": 0.01216, "grad_norm": 1.7563093900680542, "learning_rate": 2.4192000000000004e-07, "loss": 0.719, "step": 190 }, { "epoch": 0.0128, "grad_norm": 1.449843168258667, "learning_rate": 2.5472000000000005e-07, "loss": 0.7237, "step": 200 }, { "epoch": 0.01344, "grad_norm": 141396.296875, "learning_rate": 5.350742447516642e-07, "loss": 0.7217, "step": 210 }, { "epoch": 0.01408, "grad_norm": 102339.1640625, "learning_rate": 5.606758832565284e-07, "loss": 0.7215, "step": 220 }, { "epoch": 0.01472, "grad_norm": 134052.9375, "learning_rate": 5.862775217613928e-07, "loss": 0.7115, "step": 230 }, { "epoch": 0.01536, "grad_norm": 87181.984375, "learning_rate": 6.118791602662571e-07, "loss": 0.7241, "step": 240 }, { "epoch": 0.016, "grad_norm": 100231.328125, "learning_rate": 6.374807987711214e-07, "loss": 0.71, "step": 250 }, { "epoch": 0.01664, "grad_norm": 136721.484375, "learning_rate": 6.630824372759858e-07, "loss": 0.7188, "step": 260 }, { "epoch": 0.01728, "grad_norm": 115868.8125, "learning_rate": 6.8868407578085e-07, "loss": 0.7199, "step": 270 }, { "epoch": 0.01792, "grad_norm": 70205.1484375, "learning_rate": 7.142857142857143e-07, "loss": 0.7299, "step": 280 }, { "epoch": 0.01856, "grad_norm": 98926.4453125, "learning_rate": 7.398873527905787e-07, "loss": 0.7159, "step": 290 }, { "epoch": 0.0192, "grad_norm": 134108.140625, "learning_rate": 7.65488991295443e-07, "loss": 0.7122, "step": 300 }, { "epoch": 0.01984, "grad_norm": 103719.140625, "learning_rate": 7.910906298003073e-07, "loss": 0.7185, "step": 310 }, { "epoch": 0.02048, "grad_norm": 85624.953125, "learning_rate": 8.166922683051716e-07, "loss": 0.718, "step": 320 }, { "epoch": 0.02112, "grad_norm": 138824.15625, "learning_rate": 8.422939068100359e-07, "loss": 0.713, "step": 330 }, { "epoch": 0.02176, "grad_norm": 73629.0859375, "learning_rate": 8.678955453149002e-07, "loss": 0.7186, "step": 340 }, { "epoch": 0.0224, "grad_norm": 132493.0, "learning_rate": 8.934971838197646e-07, "loss": 0.7133, "step": 350 }, { "epoch": 0.02304, "grad_norm": 85223.625, "learning_rate": 9.190988223246289e-07, "loss": 0.7124, "step": 360 }, { "epoch": 0.02368, "grad_norm": 77868.78125, "learning_rate": 9.447004608294931e-07, "loss": 0.7058, "step": 370 }, { "epoch": 0.02432, "grad_norm": 75874.3046875, "learning_rate": 9.703020993343575e-07, "loss": 0.7139, "step": 380 }, { "epoch": 0.02496, "grad_norm": 151937.703125, "learning_rate": 9.959037378392218e-07, "loss": 0.713, "step": 390 }, { "epoch": 0.0256, "grad_norm": 161711.671875, "learning_rate": 1.021505376344086e-06, "loss": 0.7137, "step": 400 }, { "epoch": 0.02624, "grad_norm": 90800.234375, "learning_rate": 1.0471070148489503e-06, "loss": 0.7091, "step": 410 }, { "epoch": 0.02688, "grad_norm": 82131.34375, "learning_rate": 1.0727086533538148e-06, "loss": 0.7098, "step": 420 }, { "epoch": 0.02752, "grad_norm": 92818.9140625, "learning_rate": 1.0983102918586791e-06, "loss": 0.7099, "step": 430 }, { "epoch": 0.02816, "grad_norm": 88555.5078125, "learning_rate": 1.1239119303635434e-06, "loss": 0.7086, "step": 440 }, { "epoch": 0.0288, "grad_norm": 73428.6015625, "learning_rate": 1.1495135688684077e-06, "loss": 0.7117, "step": 450 }, { "epoch": 0.02944, "grad_norm": 128938.7421875, "learning_rate": 1.175115207373272e-06, "loss": 0.7182, "step": 460 }, { "epoch": 0.03008, "grad_norm": 102742.3359375, "learning_rate": 1.2007168458781362e-06, "loss": 0.7108, "step": 470 }, { "epoch": 0.03072, "grad_norm": 73825.8125, "learning_rate": 1.2263184843830007e-06, "loss": 0.7087, "step": 480 }, { "epoch": 0.03136, "grad_norm": 110930.75, "learning_rate": 1.251920122887865e-06, "loss": 0.7232, "step": 490 }, { "epoch": 0.032, "grad_norm": 95068.84375, "learning_rate": 1.2775217613927293e-06, "loss": 0.703, "step": 500 }, { "epoch": 0.03264, "grad_norm": 118731.9296875, "learning_rate": 1.3031233998975938e-06, "loss": 0.7063, "step": 510 }, { "epoch": 0.03328, "grad_norm": 80511.828125, "learning_rate": 1.3287250384024578e-06, "loss": 0.7143, "step": 520 }, { "epoch": 0.03392, "grad_norm": 84864.484375, "learning_rate": 1.354326676907322e-06, "loss": 0.7055, "step": 530 }, { "epoch": 0.03456, "grad_norm": 107800.109375, "learning_rate": 1.3799283154121864e-06, "loss": 0.7119, "step": 540 }, { "epoch": 0.0352, "grad_norm": 83667.671875, "learning_rate": 1.4055299539170509e-06, "loss": 0.7082, "step": 550 }, { "epoch": 0.03584, "grad_norm": 75656.4140625, "learning_rate": 1.4311315924219151e-06, "loss": 0.7062, "step": 560 }, { "epoch": 0.03648, "grad_norm": 79985.875, "learning_rate": 1.4567332309267796e-06, "loss": 0.7155, "step": 570 }, { "epoch": 0.03712, "grad_norm": 76334.078125, "learning_rate": 1.4823348694316437e-06, "loss": 0.7075, "step": 580 }, { "epoch": 0.03776, "grad_norm": 140764.03125, "learning_rate": 1.507936507936508e-06, "loss": 0.7065, "step": 590 }, { "epoch": 0.0384, "grad_norm": 100877.296875, "learning_rate": 1.5335381464413722e-06, "loss": 0.7096, "step": 600 }, { "epoch": 0.03904, "grad_norm": 104088.1171875, "learning_rate": 1.5591397849462367e-06, "loss": 0.6987, "step": 610 }, { "epoch": 0.03968, "grad_norm": 80806.2265625, "learning_rate": 1.584741423451101e-06, "loss": 0.707, "step": 620 }, { "epoch": 0.04032, "grad_norm": 109884.765625, "learning_rate": 1.6103430619559655e-06, "loss": 0.6991, "step": 630 }, { "epoch": 0.04096, "grad_norm": 79944.890625, "learning_rate": 1.6359447004608298e-06, "loss": 0.7047, "step": 640 }, { "epoch": 0.0416, "grad_norm": 93673.3828125, "learning_rate": 1.6615463389656938e-06, "loss": 0.6971, "step": 650 }, { "epoch": 0.04224, "grad_norm": 76641.265625, "learning_rate": 1.6871479774705581e-06, "loss": 0.6957, "step": 660 }, { "epoch": 0.04288, "grad_norm": 73583.5546875, "learning_rate": 1.7127496159754226e-06, "loss": 0.7028, "step": 670 }, { "epoch": 0.04352, "grad_norm": 75177.9609375, "learning_rate": 1.7383512544802869e-06, "loss": 0.7012, "step": 680 }, { "epoch": 0.04416, "grad_norm": 78340.8515625, "learning_rate": 1.7639528929851512e-06, "loss": 0.6987, "step": 690 }, { "epoch": 0.0448, "grad_norm": 86004.1171875, "learning_rate": 1.7895545314900157e-06, "loss": 0.7061, "step": 700 }, { "epoch": 0.04544, "grad_norm": 94212.0390625, "learning_rate": 1.8151561699948797e-06, "loss": 0.6993, "step": 710 }, { "epoch": 0.04608, "grad_norm": 83918.2421875, "learning_rate": 1.840757808499744e-06, "loss": 0.7009, "step": 720 }, { "epoch": 0.04672, "grad_norm": 68374.3125, "learning_rate": 1.8663594470046085e-06, "loss": 0.6964, "step": 730 }, { "epoch": 0.04736, "grad_norm": 90348.78125, "learning_rate": 1.8919610855094728e-06, "loss": 0.7011, "step": 740 }, { "epoch": 0.048, "grad_norm": 146658.0, "learning_rate": 1.9175627240143373e-06, "loss": 0.7003, "step": 750 }, { "epoch": 0.04864, "grad_norm": 112037.1640625, "learning_rate": 1.9431643625192015e-06, "loss": 0.7051, "step": 760 }, { "epoch": 0.04928, "grad_norm": 70628.625, "learning_rate": 1.9687660010240654e-06, "loss": 0.6923, "step": 770 }, { "epoch": 0.04992, "grad_norm": 109922.125, "learning_rate": 1.99436763952893e-06, "loss": 0.6893, "step": 780 }, { "epoch": 0.05056, "grad_norm": 135306.375, "learning_rate": 2.0199692780337944e-06, "loss": 0.7008, "step": 790 }, { "epoch": 0.0512, "grad_norm": 82354.8046875, "learning_rate": 2.0455709165386586e-06, "loss": 0.705, "step": 800 }, { "epoch": 0.05184, "grad_norm": 95951.671875, "learning_rate": 2.071172555043523e-06, "loss": 0.6912, "step": 810 }, { "epoch": 0.05248, "grad_norm": 96797.4609375, "learning_rate": 2.096774193548387e-06, "loss": 0.6922, "step": 820 }, { "epoch": 0.05312, "grad_norm": 87190.625, "learning_rate": 2.122375832053252e-06, "loss": 0.6946, "step": 830 }, { "epoch": 0.05376, "grad_norm": 87958.5625, "learning_rate": 2.1479774705581158e-06, "loss": 0.6949, "step": 840 }, { "epoch": 0.0544, "grad_norm": 77217.1796875, "learning_rate": 2.17357910906298e-06, "loss": 0.6928, "step": 850 }, { "epoch": 0.05504, "grad_norm": 117156.5546875, "learning_rate": 2.1991807475678443e-06, "loss": 0.692, "step": 860 }, { "epoch": 0.05568, "grad_norm": 94618.6875, "learning_rate": 2.224782386072709e-06, "loss": 0.6976, "step": 870 }, { "epoch": 0.05632, "grad_norm": 71444.6484375, "learning_rate": 2.2503840245775733e-06, "loss": 0.6989, "step": 880 }, { "epoch": 0.05696, "grad_norm": 159991.609375, "learning_rate": 2.2759856630824376e-06, "loss": 0.6928, "step": 890 }, { "epoch": 0.0576, "grad_norm": 81899.6875, "learning_rate": 2.301587301587302e-06, "loss": 0.691, "step": 900 }, { "epoch": 0.05824, "grad_norm": 110817.3671875, "learning_rate": 2.327188940092166e-06, "loss": 0.6858, "step": 910 }, { "epoch": 0.05888, "grad_norm": 105698.109375, "learning_rate": 2.3527905785970304e-06, "loss": 0.6965, "step": 920 }, { "epoch": 0.05952, "grad_norm": 76475.0, "learning_rate": 2.3783922171018947e-06, "loss": 0.6901, "step": 930 }, { "epoch": 0.06016, "grad_norm": 96672.6796875, "learning_rate": 2.403993855606759e-06, "loss": 0.6908, "step": 940 }, { "epoch": 0.0608, "grad_norm": 114510.8125, "learning_rate": 2.4295954941116232e-06, "loss": 0.6904, "step": 950 }, { "epoch": 0.06144, "grad_norm": 62412.4375, "learning_rate": 2.455197132616488e-06, "loss": 0.6855, "step": 960 }, { "epoch": 0.06208, "grad_norm": 92860.7109375, "learning_rate": 2.4807987711213518e-06, "loss": 0.6752, "step": 970 }, { "epoch": 0.06272, "grad_norm": 75184.359375, "learning_rate": 2.506400409626216e-06, "loss": 0.6868, "step": 980 }, { "epoch": 0.06336, "grad_norm": 77771.1640625, "learning_rate": 2.5320020481310808e-06, "loss": 0.6941, "step": 990 }, { "epoch": 0.064, "grad_norm": 65366.796875, "learning_rate": 2.557603686635945e-06, "loss": 0.6808, "step": 1000 }, { "epoch": 0.064, "eval_accuracy": 0.75744, "eval_loss": 0.6539617776870728, "eval_macro_f1": 0.7549859375827388, "eval_runtime": 1576.6702, "eval_samples_per_second": 63.425, "eval_steps_per_second": 0.496, "step": 1000 }, { "epoch": 0.06464, "grad_norm": 73310.6171875, "learning_rate": 2.583205325140809e-06, "loss": 0.6866, "step": 1010 }, { "epoch": 0.06528, "grad_norm": 80602.859375, "learning_rate": 2.6088069636456736e-06, "loss": 0.6873, "step": 1020 }, { "epoch": 0.06592, "grad_norm": 121537.0234375, "learning_rate": 2.634408602150538e-06, "loss": 0.6806, "step": 1030 }, { "epoch": 0.06656, "grad_norm": 105537.46875, "learning_rate": 2.6600102406554026e-06, "loss": 0.6835, "step": 1040 }, { "epoch": 0.0672, "grad_norm": 188847.71875, "learning_rate": 2.6856118791602664e-06, "loss": 0.687, "step": 1050 }, { "epoch": 0.06784, "grad_norm": 73677.8359375, "learning_rate": 2.7112135176651307e-06, "loss": 0.6848, "step": 1060 }, { "epoch": 0.06848, "grad_norm": 72158.984375, "learning_rate": 2.736815156169995e-06, "loss": 0.6833, "step": 1070 }, { "epoch": 0.06912, "grad_norm": 73585.1015625, "learning_rate": 2.7624167946748593e-06, "loss": 0.6766, "step": 1080 }, { "epoch": 0.06976, "grad_norm": 78721.0390625, "learning_rate": 2.788018433179724e-06, "loss": 0.6796, "step": 1090 }, { "epoch": 0.0704, "grad_norm": 72044.3515625, "learning_rate": 2.813620071684588e-06, "loss": 0.673, "step": 1100 }, { "epoch": 0.07104, "grad_norm": 100059.984375, "learning_rate": 2.8392217101894525e-06, "loss": 0.6783, "step": 1110 }, { "epoch": 0.07168, "grad_norm": 92175.921875, "learning_rate": 2.864823348694317e-06, "loss": 0.6901, "step": 1120 }, { "epoch": 0.07232, "grad_norm": 86143.453125, "learning_rate": 2.8904249871991806e-06, "loss": 0.6769, "step": 1130 }, { "epoch": 0.07296, "grad_norm": 101410.171875, "learning_rate": 2.9160266257040453e-06, "loss": 0.6781, "step": 1140 }, { "epoch": 0.0736, "grad_norm": 67173.296875, "learning_rate": 2.9416282642089096e-06, "loss": 0.6737, "step": 1150 }, { "epoch": 0.07424, "grad_norm": 100701.8203125, "learning_rate": 2.967229902713774e-06, "loss": 0.6741, "step": 1160 }, { "epoch": 0.07488, "grad_norm": 75457.328125, "learning_rate": 2.992831541218638e-06, "loss": 0.6742, "step": 1170 }, { "epoch": 0.07552, "grad_norm": 97755.9921875, "learning_rate": 3.018433179723503e-06, "loss": 0.6845, "step": 1180 }, { "epoch": 0.07616, "grad_norm": 53426.1171875, "learning_rate": 3.0440348182283667e-06, "loss": 0.6718, "step": 1190 }, { "epoch": 0.0768, "grad_norm": 71654.625, "learning_rate": 3.069636456733231e-06, "loss": 0.6798, "step": 1200 }, { "epoch": 0.07744, "grad_norm": 74562.71875, "learning_rate": 3.0952380952380957e-06, "loss": 0.6771, "step": 1210 }, { "epoch": 0.07808, "grad_norm": 102821.5, "learning_rate": 3.1208397337429596e-06, "loss": 0.6682, "step": 1220 }, { "epoch": 0.07872, "grad_norm": 102060.71875, "learning_rate": 3.1464413722478243e-06, "loss": 0.6734, "step": 1230 }, { "epoch": 0.07936, "grad_norm": 106793.0546875, "learning_rate": 3.1720430107526885e-06, "loss": 0.6775, "step": 1240 }, { "epoch": 0.08, "grad_norm": 118106.40625, "learning_rate": 3.1976446492575524e-06, "loss": 0.6789, "step": 1250 }, { "epoch": 0.08064, "grad_norm": 80626.078125, "learning_rate": 3.223246287762417e-06, "loss": 0.6675, "step": 1260 }, { "epoch": 0.08128, "grad_norm": 78956.4375, "learning_rate": 3.2488479262672814e-06, "loss": 0.6658, "step": 1270 }, { "epoch": 0.08192, "grad_norm": 98567.125, "learning_rate": 3.2744495647721457e-06, "loss": 0.6726, "step": 1280 }, { "epoch": 0.08256, "grad_norm": 84071.5546875, "learning_rate": 3.30005120327701e-06, "loss": 0.6793, "step": 1290 }, { "epoch": 0.0832, "grad_norm": 92090.375, "learning_rate": 3.3256528417818746e-06, "loss": 0.6758, "step": 1300 }, { "epoch": 0.08384, "grad_norm": 82021.3671875, "learning_rate": 3.3512544802867385e-06, "loss": 0.6731, "step": 1310 }, { "epoch": 0.08448, "grad_norm": 156372.765625, "learning_rate": 3.3768561187916028e-06, "loss": 0.6657, "step": 1320 }, { "epoch": 0.08512, "grad_norm": 71925.234375, "learning_rate": 3.4024577572964675e-06, "loss": 0.6838, "step": 1330 }, { "epoch": 0.08576, "grad_norm": 103299.3828125, "learning_rate": 3.4280593958013313e-06, "loss": 0.663, "step": 1340 }, { "epoch": 0.0864, "grad_norm": 71233.90625, "learning_rate": 3.453661034306196e-06, "loss": 0.6754, "step": 1350 }, { "epoch": 0.08704, "grad_norm": 66573.046875, "learning_rate": 3.4792626728110603e-06, "loss": 0.667, "step": 1360 }, { "epoch": 0.08768, "grad_norm": 128433.109375, "learning_rate": 3.5048643113159246e-06, "loss": 0.6744, "step": 1370 }, { "epoch": 0.08832, "grad_norm": 158480.765625, "learning_rate": 3.530465949820789e-06, "loss": 0.6636, "step": 1380 }, { "epoch": 0.08896, "grad_norm": 62473.26953125, "learning_rate": 3.5560675883256527e-06, "loss": 0.6648, "step": 1390 }, { "epoch": 0.0896, "grad_norm": 74170.6953125, "learning_rate": 3.5816692268305174e-06, "loss": 0.6775, "step": 1400 }, { "epoch": 0.09024, "grad_norm": 138458.296875, "learning_rate": 3.6072708653353817e-06, "loss": 0.6712, "step": 1410 }, { "epoch": 0.09088, "grad_norm": 90254.9921875, "learning_rate": 3.6328725038402464e-06, "loss": 0.6798, "step": 1420 }, { "epoch": 0.09152, "grad_norm": 68962.5390625, "learning_rate": 3.6584741423451102e-06, "loss": 0.6671, "step": 1430 }, { "epoch": 0.09216, "grad_norm": 96779.4609375, "learning_rate": 3.684075780849975e-06, "loss": 0.6521, "step": 1440 }, { "epoch": 0.0928, "grad_norm": 105383.8203125, "learning_rate": 3.7096774193548392e-06, "loss": 0.6648, "step": 1450 }, { "epoch": 0.09344, "grad_norm": 78728.4609375, "learning_rate": 3.735279057859703e-06, "loss": 0.6637, "step": 1460 }, { "epoch": 0.09408, "grad_norm": 121998.46875, "learning_rate": 3.7608806963645678e-06, "loss": 0.6642, "step": 1470 }, { "epoch": 0.09472, "grad_norm": 87487.171875, "learning_rate": 3.786482334869432e-06, "loss": 0.6584, "step": 1480 }, { "epoch": 0.09536, "grad_norm": 81816.6640625, "learning_rate": 3.8120839733742963e-06, "loss": 0.6699, "step": 1490 }, { "epoch": 0.096, "grad_norm": 63027.9453125, "learning_rate": 3.83768561187916e-06, "loss": 0.6642, "step": 1500 }, { "epoch": 0.09664, "grad_norm": 69659.5, "learning_rate": 3.8632872503840245e-06, "loss": 0.6671, "step": 1510 }, { "epoch": 0.09728, "grad_norm": 136521.953125, "learning_rate": 3.88888888888889e-06, "loss": 0.6578, "step": 1520 }, { "epoch": 0.09792, "grad_norm": 75749.6640625, "learning_rate": 3.914490527393753e-06, "loss": 0.6654, "step": 1530 }, { "epoch": 0.09856, "grad_norm": 69284.6640625, "learning_rate": 3.940092165898618e-06, "loss": 0.6734, "step": 1540 }, { "epoch": 0.0992, "grad_norm": 78889.4921875, "learning_rate": 3.965693804403482e-06, "loss": 0.663, "step": 1550 }, { "epoch": 0.09984, "grad_norm": 65618.0546875, "learning_rate": 3.991295442908347e-06, "loss": 0.6606, "step": 1560 }, { "epoch": 0.10048, "grad_norm": 138387.71875, "learning_rate": 4.016897081413211e-06, "loss": 0.6609, "step": 1570 }, { "epoch": 0.10112, "grad_norm": 92268.203125, "learning_rate": 4.042498719918075e-06, "loss": 0.6649, "step": 1580 }, { "epoch": 0.10176, "grad_norm": 93633.984375, "learning_rate": 4.0681003584229395e-06, "loss": 0.6726, "step": 1590 }, { "epoch": 0.1024, "grad_norm": 70783.703125, "learning_rate": 4.093701996927804e-06, "loss": 0.6551, "step": 1600 } ], "logging_steps": 10, "max_steps": 78125, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.346696114645952e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }