{ "best_global_step": 2500, "best_metric": 0.8020345252909776, "best_model_checkpoint": "voice_emotion_classification/checkpoint-2500", "epoch": 2.0, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008, "grad_norm": 1.4005043506622314, "learning_rate": 6.000000000000001e-07, "loss": 2.0903, "step": 1 }, { "epoch": 0.008, "grad_norm": 1.3180536031723022, "learning_rate": 6e-06, "loss": 2.0826, "step": 10 }, { "epoch": 0.016, "grad_norm": 1.0437175035476685, "learning_rate": 1.2e-05, "loss": 2.0693, "step": 20 }, { "epoch": 0.024, "grad_norm": 1.371071457862854, "learning_rate": 1.8e-05, "loss": 2.0463, "step": 30 }, { "epoch": 0.032, "grad_norm": 1.51685631275177, "learning_rate": 2.4e-05, "loss": 2.0423, "step": 40 }, { "epoch": 0.04, "grad_norm": 1.4282890558242798, "learning_rate": 3e-05, "loss": 1.996, "step": 50 }, { "epoch": 0.048, "grad_norm": 1.860026478767395, "learning_rate": 2.991891891891892e-05, "loss": 1.9639, "step": 60 }, { "epoch": 0.056, "grad_norm": 2.121481418609619, "learning_rate": 2.983783783783784e-05, "loss": 1.9517, "step": 70 }, { "epoch": 0.064, "grad_norm": 2.056445837020874, "learning_rate": 2.9756756756756758e-05, "loss": 1.8765, "step": 80 }, { "epoch": 0.072, "grad_norm": 2.3928184509277344, "learning_rate": 2.9675675675675678e-05, "loss": 1.8703, "step": 90 }, { "epoch": 0.08, "grad_norm": 3.0733420848846436, "learning_rate": 2.9594594594594598e-05, "loss": 1.853, "step": 100 }, { "epoch": 0.088, "grad_norm": 2.88864803314209, "learning_rate": 2.9513513513513514e-05, "loss": 1.7652, "step": 110 }, { "epoch": 0.096, "grad_norm": 2.9048268795013428, "learning_rate": 2.943243243243243e-05, "loss": 1.7984, "step": 120 }, { "epoch": 0.104, "grad_norm": 2.6991426944732666, "learning_rate": 2.935135135135135e-05, "loss": 1.6869, "step": 130 }, { "epoch": 0.112, "grad_norm": 2.4570231437683105, "learning_rate": 2.927027027027027e-05, "loss": 1.7812, "step": 140 }, { "epoch": 0.12, "grad_norm": 4.502678871154785, "learning_rate": 2.918918918918919e-05, "loss": 1.8444, "step": 150 }, { "epoch": 0.128, "grad_norm": 3.117838144302368, "learning_rate": 2.9108108108108108e-05, "loss": 1.7544, "step": 160 }, { "epoch": 0.136, "grad_norm": 2.812086343765259, "learning_rate": 2.9027027027027028e-05, "loss": 1.638, "step": 170 }, { "epoch": 0.144, "grad_norm": 3.23271107673645, "learning_rate": 2.8954054054054057e-05, "loss": 1.7026, "step": 180 }, { "epoch": 0.152, "grad_norm": 3.975172996520996, "learning_rate": 2.8872972972972977e-05, "loss": 1.6715, "step": 190 }, { "epoch": 0.16, "grad_norm": 3.8965938091278076, "learning_rate": 2.879189189189189e-05, "loss": 1.6804, "step": 200 }, { "epoch": 0.168, "grad_norm": 4.267274856567383, "learning_rate": 2.871081081081081e-05, "loss": 1.6587, "step": 210 }, { "epoch": 0.176, "grad_norm": 3.524360179901123, "learning_rate": 2.862972972972973e-05, "loss": 1.4811, "step": 220 }, { "epoch": 0.184, "grad_norm": 3.266697883605957, "learning_rate": 2.854864864864865e-05, "loss": 1.672, "step": 230 }, { "epoch": 0.192, "grad_norm": 5.3684186935424805, "learning_rate": 2.8467567567567567e-05, "loss": 1.5284, "step": 240 }, { "epoch": 0.2, "grad_norm": 3.898176431655884, "learning_rate": 2.8386486486486487e-05, "loss": 1.5774, "step": 250 }, { "epoch": 0.208, "grad_norm": 3.189732074737549, "learning_rate": 2.8305405405405407e-05, "loss": 1.4874, "step": 260 }, { "epoch": 0.216, "grad_norm": 3.274244785308838, "learning_rate": 2.8224324324324327e-05, "loss": 1.5098, "step": 270 }, { "epoch": 0.224, "grad_norm": 5.691224098205566, "learning_rate": 2.8143243243243244e-05, "loss": 1.509, "step": 280 }, { "epoch": 0.232, "grad_norm": 6.856773376464844, "learning_rate": 2.8062162162162164e-05, "loss": 1.4558, "step": 290 }, { "epoch": 0.24, "grad_norm": 7.078716278076172, "learning_rate": 2.7981081081081084e-05, "loss": 1.5298, "step": 300 }, { "epoch": 0.248, "grad_norm": 4.4305100440979, "learning_rate": 2.79e-05, "loss": 1.3387, "step": 310 }, { "epoch": 0.256, "grad_norm": 10.400449752807617, "learning_rate": 2.7818918918918917e-05, "loss": 1.4501, "step": 320 }, { "epoch": 0.264, "grad_norm": 5.316948890686035, "learning_rate": 2.7737837837837837e-05, "loss": 1.367, "step": 330 }, { "epoch": 0.272, "grad_norm": 9.753177642822266, "learning_rate": 2.7656756756756757e-05, "loss": 1.4684, "step": 340 }, { "epoch": 0.28, "grad_norm": 8.100529670715332, "learning_rate": 2.7575675675675677e-05, "loss": 1.4175, "step": 350 }, { "epoch": 0.288, "grad_norm": 9.878854751586914, "learning_rate": 2.7494594594594594e-05, "loss": 1.308, "step": 360 }, { "epoch": 0.296, "grad_norm": 5.865877151489258, "learning_rate": 2.7413513513513514e-05, "loss": 1.3035, "step": 370 }, { "epoch": 0.304, "grad_norm": 7.870754241943359, "learning_rate": 2.7332432432432434e-05, "loss": 1.2915, "step": 380 }, { "epoch": 0.312, "grad_norm": 8.517908096313477, "learning_rate": 2.7251351351351354e-05, "loss": 1.4318, "step": 390 }, { "epoch": 0.32, "grad_norm": 4.7960309982299805, "learning_rate": 2.717027027027027e-05, "loss": 1.3154, "step": 400 }, { "epoch": 0.328, "grad_norm": 5.629390716552734, "learning_rate": 2.708918918918919e-05, "loss": 1.3433, "step": 410 }, { "epoch": 0.336, "grad_norm": 8.473249435424805, "learning_rate": 2.700810810810811e-05, "loss": 1.1474, "step": 420 }, { "epoch": 0.344, "grad_norm": 3.652617931365967, "learning_rate": 2.6927027027027028e-05, "loss": 1.3247, "step": 430 }, { "epoch": 0.352, "grad_norm": 4.9890055656433105, "learning_rate": 2.6845945945945944e-05, "loss": 1.3347, "step": 440 }, { "epoch": 0.36, "grad_norm": 5.2355055809021, "learning_rate": 2.6764864864864864e-05, "loss": 1.0932, "step": 450 }, { "epoch": 0.368, "grad_norm": 6.325026512145996, "learning_rate": 2.6683783783783785e-05, "loss": 1.4873, "step": 460 }, { "epoch": 0.376, "grad_norm": 6.78115701675415, "learning_rate": 2.6602702702702705e-05, "loss": 1.2311, "step": 470 }, { "epoch": 0.384, "grad_norm": 4.194353103637695, "learning_rate": 2.652162162162162e-05, "loss": 1.2493, "step": 480 }, { "epoch": 0.392, "grad_norm": 3.8817057609558105, "learning_rate": 2.644054054054054e-05, "loss": 1.1237, "step": 490 }, { "epoch": 0.4, "grad_norm": 6.7539520263671875, "learning_rate": 2.635945945945946e-05, "loss": 1.1135, "step": 500 }, { "epoch": 0.408, "grad_norm": 9.044737815856934, "learning_rate": 2.627837837837838e-05, "loss": 1.2459, "step": 510 }, { "epoch": 0.416, "grad_norm": 15.829017639160156, "learning_rate": 2.6197297297297298e-05, "loss": 1.2803, "step": 520 }, { "epoch": 0.424, "grad_norm": 10.789520263671875, "learning_rate": 2.6116216216216218e-05, "loss": 1.1912, "step": 530 }, { "epoch": 0.432, "grad_norm": 5.011368274688721, "learning_rate": 2.6035135135135135e-05, "loss": 1.0143, "step": 540 }, { "epoch": 0.44, "grad_norm": 8.985868453979492, "learning_rate": 2.5954054054054055e-05, "loss": 1.118, "step": 550 }, { "epoch": 0.448, "grad_norm": 6.862995147705078, "learning_rate": 2.587297297297297e-05, "loss": 1.1269, "step": 560 }, { "epoch": 0.456, "grad_norm": 10.972336769104004, "learning_rate": 2.579189189189189e-05, "loss": 1.1591, "step": 570 }, { "epoch": 0.464, "grad_norm": 8.179327011108398, "learning_rate": 2.5710810810810812e-05, "loss": 1.154, "step": 580 }, { "epoch": 0.472, "grad_norm": 11.713990211486816, "learning_rate": 2.5629729729729732e-05, "loss": 1.0995, "step": 590 }, { "epoch": 0.48, "grad_norm": 10.86710262298584, "learning_rate": 2.554864864864865e-05, "loss": 1.1544, "step": 600 }, { "epoch": 0.488, "grad_norm": 6.228063106536865, "learning_rate": 2.546756756756757e-05, "loss": 1.2395, "step": 610 }, { "epoch": 0.496, "grad_norm": 12.631518363952637, "learning_rate": 2.538648648648649e-05, "loss": 1.0992, "step": 620 }, { "epoch": 0.504, "grad_norm": 7.058006763458252, "learning_rate": 2.530540540540541e-05, "loss": 1.194, "step": 630 }, { "epoch": 0.512, "grad_norm": 5.026750087738037, "learning_rate": 2.5224324324324325e-05, "loss": 1.103, "step": 640 }, { "epoch": 0.52, "grad_norm": 7.1134843826293945, "learning_rate": 2.5143243243243242e-05, "loss": 0.9427, "step": 650 }, { "epoch": 0.528, "grad_norm": 7.147433280944824, "learning_rate": 2.5062162162162162e-05, "loss": 0.9881, "step": 660 }, { "epoch": 0.536, "grad_norm": 6.535639762878418, "learning_rate": 2.4981081081081082e-05, "loss": 1.1143, "step": 670 }, { "epoch": 0.544, "grad_norm": 10.878937721252441, "learning_rate": 2.49e-05, "loss": 0.8909, "step": 680 }, { "epoch": 0.552, "grad_norm": 5.79094934463501, "learning_rate": 2.481891891891892e-05, "loss": 0.9728, "step": 690 }, { "epoch": 0.56, "grad_norm": 6.935592174530029, "learning_rate": 2.473783783783784e-05, "loss": 1.0735, "step": 700 }, { "epoch": 0.568, "grad_norm": 5.661824703216553, "learning_rate": 2.465675675675676e-05, "loss": 1.0012, "step": 710 }, { "epoch": 0.576, "grad_norm": 13.233421325683594, "learning_rate": 2.4575675675675676e-05, "loss": 1.0315, "step": 720 }, { "epoch": 0.584, "grad_norm": 9.292459487915039, "learning_rate": 2.4494594594594596e-05, "loss": 0.9547, "step": 730 }, { "epoch": 0.592, "grad_norm": 13.138367652893066, "learning_rate": 2.442162162162162e-05, "loss": 0.9379, "step": 740 }, { "epoch": 0.6, "grad_norm": 13.352531433105469, "learning_rate": 2.434054054054054e-05, "loss": 0.9484, "step": 750 }, { "epoch": 0.608, "grad_norm": 11.993139266967773, "learning_rate": 2.4259459459459458e-05, "loss": 1.1064, "step": 760 }, { "epoch": 0.616, "grad_norm": 12.132452011108398, "learning_rate": 2.4178378378378378e-05, "loss": 1.1363, "step": 770 }, { "epoch": 0.624, "grad_norm": 13.944737434387207, "learning_rate": 2.4097297297297298e-05, "loss": 0.9835, "step": 780 }, { "epoch": 0.632, "grad_norm": 6.077609062194824, "learning_rate": 2.4016216216216218e-05, "loss": 0.8391, "step": 790 }, { "epoch": 0.64, "grad_norm": 7.873855113983154, "learning_rate": 2.3935135135135135e-05, "loss": 0.7772, "step": 800 }, { "epoch": 0.648, "grad_norm": 13.312115669250488, "learning_rate": 2.3854054054054055e-05, "loss": 1.0117, "step": 810 }, { "epoch": 0.656, "grad_norm": 9.016510963439941, "learning_rate": 2.3772972972972975e-05, "loss": 0.9353, "step": 820 }, { "epoch": 0.664, "grad_norm": 8.618375778198242, "learning_rate": 2.3691891891891895e-05, "loss": 0.9598, "step": 830 }, { "epoch": 0.672, "grad_norm": 10.867205619812012, "learning_rate": 2.361081081081081e-05, "loss": 0.8726, "step": 840 }, { "epoch": 0.68, "grad_norm": 13.182415962219238, "learning_rate": 2.3529729729729728e-05, "loss": 0.9202, "step": 850 }, { "epoch": 0.688, "grad_norm": 12.405129432678223, "learning_rate": 2.3448648648648648e-05, "loss": 0.8795, "step": 860 }, { "epoch": 0.696, "grad_norm": 8.207524299621582, "learning_rate": 2.3367567567567568e-05, "loss": 0.8015, "step": 870 }, { "epoch": 0.704, "grad_norm": 15.442817687988281, "learning_rate": 2.3286486486486485e-05, "loss": 0.9932, "step": 880 }, { "epoch": 0.712, "grad_norm": 13.388226509094238, "learning_rate": 2.3205405405405405e-05, "loss": 0.87, "step": 890 }, { "epoch": 0.72, "grad_norm": 8.635920524597168, "learning_rate": 2.3124324324324325e-05, "loss": 0.7842, "step": 900 }, { "epoch": 0.728, "grad_norm": 11.66073989868164, "learning_rate": 2.3043243243243245e-05, "loss": 0.9023, "step": 910 }, { "epoch": 0.736, "grad_norm": 12.954612731933594, "learning_rate": 2.2962162162162162e-05, "loss": 1.0076, "step": 920 }, { "epoch": 0.744, "grad_norm": 11.18680191040039, "learning_rate": 2.2881081081081082e-05, "loss": 1.1349, "step": 930 }, { "epoch": 0.752, "grad_norm": 8.514711380004883, "learning_rate": 2.2800000000000002e-05, "loss": 0.9604, "step": 940 }, { "epoch": 0.76, "grad_norm": 4.436418056488037, "learning_rate": 2.2718918918918922e-05, "loss": 0.9632, "step": 950 }, { "epoch": 0.768, "grad_norm": 10.213781356811523, "learning_rate": 2.263783783783784e-05, "loss": 0.8139, "step": 960 }, { "epoch": 0.776, "grad_norm": 9.987252235412598, "learning_rate": 2.2556756756756755e-05, "loss": 0.8276, "step": 970 }, { "epoch": 0.784, "grad_norm": 12.511467933654785, "learning_rate": 2.2475675675675675e-05, "loss": 0.8709, "step": 980 }, { "epoch": 0.792, "grad_norm": 8.908098220825195, "learning_rate": 2.2394594594594595e-05, "loss": 0.8812, "step": 990 }, { "epoch": 0.8, "grad_norm": 10.62246322631836, "learning_rate": 2.2313513513513512e-05, "loss": 0.9733, "step": 1000 }, { "epoch": 0.808, "grad_norm": 14.651544570922852, "learning_rate": 2.2232432432432432e-05, "loss": 1.0309, "step": 1010 }, { "epoch": 0.816, "grad_norm": 19.1525936126709, "learning_rate": 2.2151351351351352e-05, "loss": 0.8808, "step": 1020 }, { "epoch": 0.824, "grad_norm": 7.289106369018555, "learning_rate": 2.2070270270270272e-05, "loss": 0.9126, "step": 1030 }, { "epoch": 0.832, "grad_norm": 5.375001907348633, "learning_rate": 2.198918918918919e-05, "loss": 0.847, "step": 1040 }, { "epoch": 0.84, "grad_norm": 8.623431205749512, "learning_rate": 2.190810810810811e-05, "loss": 0.9139, "step": 1050 }, { "epoch": 0.848, "grad_norm": 6.639071941375732, "learning_rate": 2.182702702702703e-05, "loss": 0.9345, "step": 1060 }, { "epoch": 0.856, "grad_norm": 7.635943412780762, "learning_rate": 2.174594594594595e-05, "loss": 0.8134, "step": 1070 }, { "epoch": 0.864, "grad_norm": 12.048315048217773, "learning_rate": 2.1664864864864862e-05, "loss": 0.6728, "step": 1080 }, { "epoch": 0.872, "grad_norm": 13.869949340820312, "learning_rate": 2.1583783783783783e-05, "loss": 0.8256, "step": 1090 }, { "epoch": 0.88, "grad_norm": 26.233325958251953, "learning_rate": 2.1502702702702703e-05, "loss": 0.7044, "step": 1100 }, { "epoch": 0.888, "grad_norm": 7.98716926574707, "learning_rate": 2.1421621621621623e-05, "loss": 0.7398, "step": 1110 }, { "epoch": 0.896, "grad_norm": 13.682205200195312, "learning_rate": 2.134054054054054e-05, "loss": 0.7522, "step": 1120 }, { "epoch": 0.904, "grad_norm": 9.086796760559082, "learning_rate": 2.125945945945946e-05, "loss": 0.8574, "step": 1130 }, { "epoch": 0.912, "grad_norm": 10.3043851852417, "learning_rate": 2.117837837837838e-05, "loss": 0.8005, "step": 1140 }, { "epoch": 0.92, "grad_norm": 12.477950096130371, "learning_rate": 2.10972972972973e-05, "loss": 0.8436, "step": 1150 }, { "epoch": 0.928, "grad_norm": 16.634178161621094, "learning_rate": 2.1016216216216216e-05, "loss": 0.6515, "step": 1160 }, { "epoch": 0.936, "grad_norm": 11.066425323486328, "learning_rate": 2.0935135135135136e-05, "loss": 0.8689, "step": 1170 }, { "epoch": 0.944, "grad_norm": 19.47179412841797, "learning_rate": 2.0854054054054056e-05, "loss": 0.6605, "step": 1180 }, { "epoch": 0.952, "grad_norm": 3.917236804962158, "learning_rate": 2.0772972972972973e-05, "loss": 0.6826, "step": 1190 }, { "epoch": 0.96, "grad_norm": 16.43979263305664, "learning_rate": 2.069189189189189e-05, "loss": 0.7731, "step": 1200 }, { "epoch": 0.968, "grad_norm": 6.7848711013793945, "learning_rate": 2.061081081081081e-05, "loss": 0.705, "step": 1210 }, { "epoch": 0.976, "grad_norm": 7.472936153411865, "learning_rate": 2.052972972972973e-05, "loss": 0.7663, "step": 1220 }, { "epoch": 0.984, "grad_norm": 5.729743957519531, "learning_rate": 2.044864864864865e-05, "loss": 0.9337, "step": 1230 }, { "epoch": 0.992, "grad_norm": 6.306894302368164, "learning_rate": 2.0367567567567567e-05, "loss": 0.7655, "step": 1240 }, { "epoch": 1.0, "grad_norm": 17.98261260986328, "learning_rate": 2.0286486486486487e-05, "loss": 0.8202, "step": 1250 }, { "epoch": 1.0, "eval_accuracy": 0.7668834417208604, "eval_f1": 0.7589222033223469, "eval_loss": 0.7338727712631226, "eval_model_preparation_time": 0.0029, "eval_runtime": 59.9403, "eval_samples_per_second": 33.35, "eval_steps_per_second": 4.171, "step": 1250 }, { "epoch": 1.008, "grad_norm": 3.764857292175293, "learning_rate": 2.0205405405405407e-05, "loss": 0.6991, "step": 1260 }, { "epoch": 1.016, "grad_norm": 12.638517379760742, "learning_rate": 2.0124324324324327e-05, "loss": 0.7653, "step": 1270 }, { "epoch": 1.024, "grad_norm": 17.76312255859375, "learning_rate": 2.0043243243243243e-05, "loss": 0.8033, "step": 1280 }, { "epoch": 1.032, "grad_norm": 11.941933631896973, "learning_rate": 1.9962162162162163e-05, "loss": 0.699, "step": 1290 }, { "epoch": 1.04, "grad_norm": 17.107053756713867, "learning_rate": 1.988108108108108e-05, "loss": 0.706, "step": 1300 }, { "epoch": 1.048, "grad_norm": 6.369427680969238, "learning_rate": 1.98e-05, "loss": 0.6494, "step": 1310 }, { "epoch": 1.056, "grad_norm": 7.1872477531433105, "learning_rate": 1.9718918918918917e-05, "loss": 0.5989, "step": 1320 }, { "epoch": 1.064, "grad_norm": 3.7603728771209717, "learning_rate": 1.9637837837837837e-05, "loss": 0.7582, "step": 1330 }, { "epoch": 1.072, "grad_norm": 10.045304298400879, "learning_rate": 1.9556756756756757e-05, "loss": 0.7192, "step": 1340 }, { "epoch": 1.08, "grad_norm": 14.639888763427734, "learning_rate": 1.9475675675675677e-05, "loss": 0.7959, "step": 1350 }, { "epoch": 1.088, "grad_norm": 8.216081619262695, "learning_rate": 1.9394594594594594e-05, "loss": 0.6062, "step": 1360 }, { "epoch": 1.096, "grad_norm": 5.784476280212402, "learning_rate": 1.9313513513513514e-05, "loss": 0.541, "step": 1370 }, { "epoch": 1.104, "grad_norm": 5.356358051300049, "learning_rate": 1.9232432432432434e-05, "loss": 0.6993, "step": 1380 }, { "epoch": 1.112, "grad_norm": 17.11981773376465, "learning_rate": 1.9151351351351354e-05, "loss": 0.7217, "step": 1390 }, { "epoch": 1.12, "grad_norm": 15.52505111694336, "learning_rate": 1.907027027027027e-05, "loss": 0.548, "step": 1400 }, { "epoch": 1.1280000000000001, "grad_norm": 12.326894760131836, "learning_rate": 1.8989189189189187e-05, "loss": 0.7058, "step": 1410 }, { "epoch": 1.1360000000000001, "grad_norm": 12.889031410217285, "learning_rate": 1.8908108108108107e-05, "loss": 0.7748, "step": 1420 }, { "epoch": 1.144, "grad_norm": 12.953654289245605, "learning_rate": 1.8827027027027027e-05, "loss": 0.7251, "step": 1430 }, { "epoch": 1.152, "grad_norm": 2.564222812652588, "learning_rate": 1.8745945945945944e-05, "loss": 0.551, "step": 1440 }, { "epoch": 1.16, "grad_norm": 9.111184120178223, "learning_rate": 1.8664864864864864e-05, "loss": 0.5548, "step": 1450 }, { "epoch": 1.168, "grad_norm": 7.713393211364746, "learning_rate": 1.8583783783783784e-05, "loss": 0.564, "step": 1460 }, { "epoch": 1.176, "grad_norm": 8.282889366149902, "learning_rate": 1.8502702702702704e-05, "loss": 0.7312, "step": 1470 }, { "epoch": 1.184, "grad_norm": 15.445865631103516, "learning_rate": 1.842162162162162e-05, "loss": 0.6974, "step": 1480 }, { "epoch": 1.192, "grad_norm": 6.040890693664551, "learning_rate": 1.834054054054054e-05, "loss": 0.6808, "step": 1490 }, { "epoch": 1.2, "grad_norm": 17.368532180786133, "learning_rate": 1.825945945945946e-05, "loss": 0.5695, "step": 1500 }, { "epoch": 1.208, "grad_norm": 11.174856185913086, "learning_rate": 1.817837837837838e-05, "loss": 0.8114, "step": 1510 }, { "epoch": 1.216, "grad_norm": 18.861087799072266, "learning_rate": 1.8097297297297298e-05, "loss": 0.601, "step": 1520 }, { "epoch": 1.224, "grad_norm": 4.157520771026611, "learning_rate": 1.8016216216216214e-05, "loss": 0.6195, "step": 1530 }, { "epoch": 1.232, "grad_norm": 40.063621520996094, "learning_rate": 1.7935135135135134e-05, "loss": 0.6502, "step": 1540 }, { "epoch": 1.24, "grad_norm": 12.99301528930664, "learning_rate": 1.7854054054054055e-05, "loss": 0.5463, "step": 1550 }, { "epoch": 1.248, "grad_norm": 3.057586908340454, "learning_rate": 1.7772972972972975e-05, "loss": 0.5025, "step": 1560 }, { "epoch": 1.256, "grad_norm": 7.806783199310303, "learning_rate": 1.769189189189189e-05, "loss": 0.5544, "step": 1570 }, { "epoch": 1.264, "grad_norm": 6.477509498596191, "learning_rate": 1.761081081081081e-05, "loss": 0.5447, "step": 1580 }, { "epoch": 1.272, "grad_norm": 28.918643951416016, "learning_rate": 1.752972972972973e-05, "loss": 0.7803, "step": 1590 }, { "epoch": 1.28, "grad_norm": 23.26552391052246, "learning_rate": 1.744864864864865e-05, "loss": 0.7442, "step": 1600 }, { "epoch": 1.288, "grad_norm": 24.359582901000977, "learning_rate": 1.7367567567567568e-05, "loss": 0.705, "step": 1610 }, { "epoch": 1.296, "grad_norm": 20.904409408569336, "learning_rate": 1.7286486486486488e-05, "loss": 0.7149, "step": 1620 }, { "epoch": 1.304, "grad_norm": 11.675884246826172, "learning_rate": 1.7205405405405408e-05, "loss": 0.5916, "step": 1630 }, { "epoch": 1.312, "grad_norm": 19.712337493896484, "learning_rate": 1.7124324324324325e-05, "loss": 0.59, "step": 1640 }, { "epoch": 1.32, "grad_norm": 1.4685373306274414, "learning_rate": 1.704324324324324e-05, "loss": 0.6127, "step": 1650 }, { "epoch": 1.328, "grad_norm": 13.145341873168945, "learning_rate": 1.696216216216216e-05, "loss": 0.5311, "step": 1660 }, { "epoch": 1.336, "grad_norm": 33.24889373779297, "learning_rate": 1.6881081081081082e-05, "loss": 0.539, "step": 1670 }, { "epoch": 1.3439999999999999, "grad_norm": 2.730905771255493, "learning_rate": 1.6800000000000002e-05, "loss": 0.6207, "step": 1680 }, { "epoch": 1.3519999999999999, "grad_norm": 18.928560256958008, "learning_rate": 1.671891891891892e-05, "loss": 0.5226, "step": 1690 }, { "epoch": 1.3599999999999999, "grad_norm": 6.420986175537109, "learning_rate": 1.663783783783784e-05, "loss": 0.5054, "step": 1700 }, { "epoch": 1.3679999999999999, "grad_norm": 10.68362045288086, "learning_rate": 1.655675675675676e-05, "loss": 0.553, "step": 1710 }, { "epoch": 1.376, "grad_norm": 23.89041519165039, "learning_rate": 1.647567567567568e-05, "loss": 0.585, "step": 1720 }, { "epoch": 1.384, "grad_norm": 0.6503021717071533, "learning_rate": 1.6394594594594595e-05, "loss": 0.5443, "step": 1730 }, { "epoch": 1.392, "grad_norm": 16.69384002685547, "learning_rate": 1.6313513513513515e-05, "loss": 0.777, "step": 1740 }, { "epoch": 1.4, "grad_norm": 11.571426391601562, "learning_rate": 1.6232432432432432e-05, "loss": 0.7578, "step": 1750 }, { "epoch": 1.408, "grad_norm": 19.505590438842773, "learning_rate": 1.6151351351351352e-05, "loss": 0.755, "step": 1760 }, { "epoch": 1.416, "grad_norm": 12.909994125366211, "learning_rate": 1.607027027027027e-05, "loss": 0.5617, "step": 1770 }, { "epoch": 1.424, "grad_norm": 10.301375389099121, "learning_rate": 1.598918918918919e-05, "loss": 0.7985, "step": 1780 }, { "epoch": 1.432, "grad_norm": 20.1243839263916, "learning_rate": 1.590810810810811e-05, "loss": 0.6966, "step": 1790 }, { "epoch": 1.44, "grad_norm": 5.007569789886475, "learning_rate": 1.582702702702703e-05, "loss": 0.5544, "step": 1800 }, { "epoch": 1.448, "grad_norm": 2.8352081775665283, "learning_rate": 1.5745945945945946e-05, "loss": 0.8716, "step": 1810 }, { "epoch": 1.456, "grad_norm": 8.246051788330078, "learning_rate": 1.5664864864864866e-05, "loss": 0.449, "step": 1820 }, { "epoch": 1.464, "grad_norm": 7.072529315948486, "learning_rate": 1.5583783783783786e-05, "loss": 0.6345, "step": 1830 }, { "epoch": 1.472, "grad_norm": 11.075968742370605, "learning_rate": 1.5502702702702706e-05, "loss": 0.5029, "step": 1840 }, { "epoch": 1.48, "grad_norm": 10.411526679992676, "learning_rate": 1.5421621621621622e-05, "loss": 0.4708, "step": 1850 }, { "epoch": 1.488, "grad_norm": 13.274471282958984, "learning_rate": 1.534054054054054e-05, "loss": 0.4823, "step": 1860 }, { "epoch": 1.496, "grad_norm": 8.039985656738281, "learning_rate": 1.525945945945946e-05, "loss": 0.5719, "step": 1870 }, { "epoch": 1.504, "grad_norm": 19.872621536254883, "learning_rate": 1.5178378378378381e-05, "loss": 0.7933, "step": 1880 }, { "epoch": 1.512, "grad_norm": 12.538918495178223, "learning_rate": 1.5097297297297296e-05, "loss": 0.4885, "step": 1890 }, { "epoch": 1.52, "grad_norm": 26.685623168945312, "learning_rate": 1.5016216216216216e-05, "loss": 0.6399, "step": 1900 }, { "epoch": 1.528, "grad_norm": 10.571418762207031, "learning_rate": 1.4935135135135136e-05, "loss": 0.6462, "step": 1910 }, { "epoch": 1.536, "grad_norm": 3.1144027709960938, "learning_rate": 1.4854054054054054e-05, "loss": 0.6041, "step": 1920 }, { "epoch": 1.544, "grad_norm": 5.647855758666992, "learning_rate": 1.4772972972972975e-05, "loss": 0.584, "step": 1930 }, { "epoch": 1.552, "grad_norm": 9.756006240844727, "learning_rate": 1.4691891891891893e-05, "loss": 0.3929, "step": 1940 }, { "epoch": 1.56, "grad_norm": 7.031187534332275, "learning_rate": 1.4610810810810811e-05, "loss": 0.5963, "step": 1950 }, { "epoch": 1.568, "grad_norm": 19.187641143798828, "learning_rate": 1.452972972972973e-05, "loss": 0.4967, "step": 1960 }, { "epoch": 1.576, "grad_norm": 16.660043716430664, "learning_rate": 1.444864864864865e-05, "loss": 0.632, "step": 1970 }, { "epoch": 1.584, "grad_norm": 15.292383193969727, "learning_rate": 1.4367567567567568e-05, "loss": 0.6671, "step": 1980 }, { "epoch": 1.592, "grad_norm": 15.64156436920166, "learning_rate": 1.4286486486486488e-05, "loss": 0.5182, "step": 1990 }, { "epoch": 1.6, "grad_norm": 12.81575870513916, "learning_rate": 1.4205405405405405e-05, "loss": 0.7377, "step": 2000 }, { "epoch": 1.608, "grad_norm": 28.722570419311523, "learning_rate": 1.4124324324324325e-05, "loss": 0.6432, "step": 2010 }, { "epoch": 1.616, "grad_norm": 9.101573944091797, "learning_rate": 1.4043243243243243e-05, "loss": 0.7597, "step": 2020 }, { "epoch": 1.624, "grad_norm": 18.51584815979004, "learning_rate": 1.3962162162162163e-05, "loss": 0.5169, "step": 2030 }, { "epoch": 1.6320000000000001, "grad_norm": 19.951353073120117, "learning_rate": 1.3881081081081082e-05, "loss": 0.6938, "step": 2040 }, { "epoch": 1.6400000000000001, "grad_norm": 0.7395208477973938, "learning_rate": 1.3800000000000002e-05, "loss": 0.5438, "step": 2050 }, { "epoch": 1.6480000000000001, "grad_norm": 23.373943328857422, "learning_rate": 1.3718918918918918e-05, "loss": 0.543, "step": 2060 }, { "epoch": 1.6560000000000001, "grad_norm": 13.313843727111816, "learning_rate": 1.3637837837837838e-05, "loss": 0.752, "step": 2070 }, { "epoch": 1.6640000000000001, "grad_norm": 19.967775344848633, "learning_rate": 1.3556756756756757e-05, "loss": 0.5858, "step": 2080 }, { "epoch": 1.6720000000000002, "grad_norm": 6.15806770324707, "learning_rate": 1.3475675675675677e-05, "loss": 0.6735, "step": 2090 }, { "epoch": 1.6800000000000002, "grad_norm": 20.810691833496094, "learning_rate": 1.3394594594594595e-05, "loss": 0.7207, "step": 2100 }, { "epoch": 1.688, "grad_norm": 21.559804916381836, "learning_rate": 1.3313513513513514e-05, "loss": 0.6681, "step": 2110 }, { "epoch": 1.696, "grad_norm": 5.827245235443115, "learning_rate": 1.3232432432432432e-05, "loss": 0.5774, "step": 2120 }, { "epoch": 1.704, "grad_norm": 4.7927069664001465, "learning_rate": 1.3151351351351352e-05, "loss": 0.6439, "step": 2130 }, { "epoch": 1.712, "grad_norm": 14.177338600158691, "learning_rate": 1.307027027027027e-05, "loss": 0.7137, "step": 2140 }, { "epoch": 1.72, "grad_norm": 14.718915939331055, "learning_rate": 1.298918918918919e-05, "loss": 0.5463, "step": 2150 }, { "epoch": 1.728, "grad_norm": 18.23885726928711, "learning_rate": 1.2908108108108109e-05, "loss": 0.6557, "step": 2160 }, { "epoch": 1.736, "grad_norm": 9.514300346374512, "learning_rate": 1.2827027027027027e-05, "loss": 0.5304, "step": 2170 }, { "epoch": 1.744, "grad_norm": 15.950238227844238, "learning_rate": 1.2745945945945946e-05, "loss": 0.721, "step": 2180 }, { "epoch": 1.752, "grad_norm": 5.722634315490723, "learning_rate": 1.2664864864864866e-05, "loss": 0.3436, "step": 2190 }, { "epoch": 1.76, "grad_norm": 11.308035850524902, "learning_rate": 1.2583783783783784e-05, "loss": 0.5269, "step": 2200 }, { "epoch": 1.768, "grad_norm": 5.413994789123535, "learning_rate": 1.2502702702702704e-05, "loss": 0.4461, "step": 2210 }, { "epoch": 1.776, "grad_norm": 29.982696533203125, "learning_rate": 1.2421621621621622e-05, "loss": 0.5942, "step": 2220 }, { "epoch": 1.784, "grad_norm": 25.45384979248047, "learning_rate": 1.234054054054054e-05, "loss": 0.5469, "step": 2230 }, { "epoch": 1.792, "grad_norm": 10.957773208618164, "learning_rate": 1.225945945945946e-05, "loss": 0.4809, "step": 2240 }, { "epoch": 1.8, "grad_norm": 11.609101295471191, "learning_rate": 1.217837837837838e-05, "loss": 0.73, "step": 2250 }, { "epoch": 1.808, "grad_norm": 14.325447082519531, "learning_rate": 1.2097297297297298e-05, "loss": 0.3926, "step": 2260 }, { "epoch": 1.8159999999999998, "grad_norm": 4.8160719871521, "learning_rate": 1.2016216216216218e-05, "loss": 0.5617, "step": 2270 }, { "epoch": 1.8239999999999998, "grad_norm": 6.287977695465088, "learning_rate": 1.1935135135135134e-05, "loss": 0.3482, "step": 2280 }, { "epoch": 1.8319999999999999, "grad_norm": 9.819110870361328, "learning_rate": 1.1854054054054054e-05, "loss": 0.5353, "step": 2290 }, { "epoch": 1.8399999999999999, "grad_norm": 7.572418689727783, "learning_rate": 1.1772972972972973e-05, "loss": 0.7756, "step": 2300 }, { "epoch": 1.8479999999999999, "grad_norm": 16.719934463500977, "learning_rate": 1.1691891891891893e-05, "loss": 0.6298, "step": 2310 }, { "epoch": 1.8559999999999999, "grad_norm": 18.21957778930664, "learning_rate": 1.1610810810810811e-05, "loss": 0.4695, "step": 2320 }, { "epoch": 1.8639999999999999, "grad_norm": 5.47652530670166, "learning_rate": 1.1529729729729731e-05, "loss": 0.2983, "step": 2330 }, { "epoch": 1.8719999999999999, "grad_norm": 22.044818878173828, "learning_rate": 1.1448648648648648e-05, "loss": 0.5972, "step": 2340 }, { "epoch": 1.88, "grad_norm": 26.34394645690918, "learning_rate": 1.1367567567567568e-05, "loss": 0.4404, "step": 2350 }, { "epoch": 1.888, "grad_norm": 21.979583740234375, "learning_rate": 1.1286486486486486e-05, "loss": 0.6474, "step": 2360 }, { "epoch": 1.896, "grad_norm": 15.81022834777832, "learning_rate": 1.1205405405405406e-05, "loss": 0.552, "step": 2370 }, { "epoch": 1.904, "grad_norm": 13.853069305419922, "learning_rate": 1.1124324324324325e-05, "loss": 0.5908, "step": 2380 }, { "epoch": 1.912, "grad_norm": 4.924503326416016, "learning_rate": 1.1043243243243243e-05, "loss": 0.3883, "step": 2390 }, { "epoch": 1.92, "grad_norm": 15.801043510437012, "learning_rate": 1.0962162162162162e-05, "loss": 0.4635, "step": 2400 }, { "epoch": 1.928, "grad_norm": 17.398475646972656, "learning_rate": 1.0881081081081082e-05, "loss": 0.5509, "step": 2410 }, { "epoch": 1.936, "grad_norm": 12.026921272277832, "learning_rate": 1.08e-05, "loss": 0.533, "step": 2420 }, { "epoch": 1.944, "grad_norm": 23.21822738647461, "learning_rate": 1.071891891891892e-05, "loss": 0.5655, "step": 2430 }, { "epoch": 1.952, "grad_norm": 9.777156829833984, "learning_rate": 1.0637837837837838e-05, "loss": 0.7364, "step": 2440 }, { "epoch": 1.96, "grad_norm": 17.892311096191406, "learning_rate": 1.0556756756756757e-05, "loss": 0.5123, "step": 2450 }, { "epoch": 1.968, "grad_norm": 10.79381275177002, "learning_rate": 1.0475675675675675e-05, "loss": 0.5601, "step": 2460 }, { "epoch": 1.976, "grad_norm": 16.45550537109375, "learning_rate": 1.0394594594594595e-05, "loss": 0.3604, "step": 2470 }, { "epoch": 1.984, "grad_norm": 4.945703506469727, "learning_rate": 1.0313513513513514e-05, "loss": 0.5801, "step": 2480 }, { "epoch": 1.992, "grad_norm": 7.19441556930542, "learning_rate": 1.0232432432432434e-05, "loss": 0.5552, "step": 2490 }, { "epoch": 2.0, "grad_norm": 16.103708267211914, "learning_rate": 1.0151351351351352e-05, "loss": 0.5873, "step": 2500 }, { "epoch": 2.0, "eval_accuracy": 0.7983991995997999, "eval_f1": 0.8020345252909776, "eval_loss": 0.6678956747055054, "eval_model_preparation_time": 0.0029, "eval_runtime": 59.5494, "eval_samples_per_second": 33.569, "eval_steps_per_second": 4.198, "step": 2500 } ], "logging_steps": 10, "max_steps": 3750, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.83389772226827e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }