{ "best_metric": 0.9014084507042254, "best_model_checkpoint": "beit-base-patch16-224-65-fold2/checkpoint-100", "epoch": 92.3076923076923, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9230769230769231, "eval_accuracy": 0.5492957746478874, "eval_loss": 0.6829895973205566, "eval_runtime": 1.1518, "eval_samples_per_second": 61.641, "eval_steps_per_second": 2.605, "step": 3 }, { "epoch": 1.8461538461538463, "eval_accuracy": 0.647887323943662, "eval_loss": 0.6407253742218018, "eval_runtime": 1.1245, "eval_samples_per_second": 63.139, "eval_steps_per_second": 2.668, "step": 6 }, { "epoch": 2.769230769230769, "eval_accuracy": 0.5352112676056338, "eval_loss": 0.6611723303794861, "eval_runtime": 1.0983, "eval_samples_per_second": 64.644, "eval_steps_per_second": 2.731, "step": 9 }, { "epoch": 3.076923076923077, "grad_norm": 13.373871803283691, "learning_rate": 1.6666666666666667e-05, "loss": 0.7094, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.6901408450704225, "eval_loss": 0.6174536943435669, "eval_runtime": 1.0831, "eval_samples_per_second": 65.553, "eval_steps_per_second": 2.77, "step": 13 }, { "epoch": 4.923076923076923, "eval_accuracy": 0.676056338028169, "eval_loss": 0.5914616584777832, "eval_runtime": 1.1133, "eval_samples_per_second": 63.775, "eval_steps_per_second": 2.695, "step": 16 }, { "epoch": 5.846153846153846, "eval_accuracy": 0.704225352112676, "eval_loss": 0.5677279233932495, "eval_runtime": 1.1168, "eval_samples_per_second": 63.574, "eval_steps_per_second": 2.686, "step": 19 }, { "epoch": 6.153846153846154, "grad_norm": 10.936724662780762, "learning_rate": 3.3333333333333335e-05, "loss": 0.6444, "step": 20 }, { "epoch": 6.769230769230769, "eval_accuracy": 0.7746478873239436, "eval_loss": 0.5177403688430786, "eval_runtime": 1.0706, "eval_samples_per_second": 66.315, "eval_steps_per_second": 2.802, "step": 22 }, { "epoch": 8.0, "eval_accuracy": 0.7887323943661971, "eval_loss": 0.4927891492843628, "eval_runtime": 1.0755, "eval_samples_per_second": 66.014, "eval_steps_per_second": 2.789, "step": 26 }, { "epoch": 8.923076923076923, "eval_accuracy": 0.6901408450704225, "eval_loss": 0.5641009211540222, "eval_runtime": 1.0927, "eval_samples_per_second": 64.977, "eval_steps_per_second": 2.745, "step": 29 }, { "epoch": 9.23076923076923, "grad_norm": 10.08945083618164, "learning_rate": 5e-05, "loss": 0.5574, "step": 30 }, { "epoch": 9.846153846153847, "eval_accuracy": 0.8169014084507042, "eval_loss": 0.43716326355934143, "eval_runtime": 1.0928, "eval_samples_per_second": 64.973, "eval_steps_per_second": 2.745, "step": 32 }, { "epoch": 10.76923076923077, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.5677071809768677, "eval_runtime": 1.15, "eval_samples_per_second": 61.738, "eval_steps_per_second": 2.609, "step": 35 }, { "epoch": 12.0, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.5032159090042114, "eval_runtime": 1.1005, "eval_samples_per_second": 64.518, "eval_steps_per_second": 2.726, "step": 39 }, { "epoch": 12.307692307692308, "grad_norm": 4.973866939544678, "learning_rate": 4.814814814814815e-05, "loss": 0.543, "step": 40 }, { "epoch": 12.923076923076923, "eval_accuracy": 0.8028169014084507, "eval_loss": 0.47447821497917175, "eval_runtime": 1.0767, "eval_samples_per_second": 65.942, "eval_steps_per_second": 2.786, "step": 42 }, { "epoch": 13.846153846153847, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.4056505262851715, "eval_runtime": 1.1102, "eval_samples_per_second": 63.953, "eval_steps_per_second": 2.702, "step": 45 }, { "epoch": 14.76923076923077, "eval_accuracy": 0.7746478873239436, "eval_loss": 0.4012921154499054, "eval_runtime": 1.0771, "eval_samples_per_second": 65.919, "eval_steps_per_second": 2.785, "step": 48 }, { "epoch": 15.384615384615385, "grad_norm": 5.340551376342773, "learning_rate": 4.62962962962963e-05, "loss": 0.4499, "step": 50 }, { "epoch": 16.0, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.36697399616241455, "eval_runtime": 1.1391, "eval_samples_per_second": 62.328, "eval_steps_per_second": 2.634, "step": 52 }, { "epoch": 16.923076923076923, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.4215153157711029, "eval_runtime": 1.1192, "eval_samples_per_second": 63.435, "eval_steps_per_second": 2.68, "step": 55 }, { "epoch": 17.846153846153847, "eval_accuracy": 0.7746478873239436, "eval_loss": 0.4862484037876129, "eval_runtime": 1.0754, "eval_samples_per_second": 66.023, "eval_steps_per_second": 2.79, "step": 58 }, { "epoch": 18.46153846153846, "grad_norm": 9.19057846069336, "learning_rate": 4.4444444444444447e-05, "loss": 0.3902, "step": 60 }, { "epoch": 18.76923076923077, "eval_accuracy": 0.7323943661971831, "eval_loss": 0.5781329274177551, "eval_runtime": 1.1009, "eval_samples_per_second": 64.491, "eval_steps_per_second": 2.725, "step": 61 }, { "epoch": 20.0, "eval_accuracy": 0.8028169014084507, "eval_loss": 0.5248808860778809, "eval_runtime": 1.1057, "eval_samples_per_second": 64.211, "eval_steps_per_second": 2.713, "step": 65 }, { "epoch": 20.923076923076923, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.3936827778816223, "eval_runtime": 1.1333, "eval_samples_per_second": 62.651, "eval_steps_per_second": 2.647, "step": 68 }, { "epoch": 21.53846153846154, "grad_norm": 3.5906007289886475, "learning_rate": 4.259259259259259e-05, "loss": 0.4029, "step": 70 }, { "epoch": 21.846153846153847, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.4131520092487335, "eval_runtime": 1.0935, "eval_samples_per_second": 64.93, "eval_steps_per_second": 2.744, "step": 71 }, { "epoch": 22.76923076923077, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.41784021258354187, "eval_runtime": 1.1092, "eval_samples_per_second": 64.011, "eval_steps_per_second": 2.705, "step": 74 }, { "epoch": 24.0, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.7273370027542114, "eval_runtime": 1.106, "eval_samples_per_second": 64.194, "eval_steps_per_second": 2.712, "step": 78 }, { "epoch": 24.615384615384617, "grad_norm": 6.034952640533447, "learning_rate": 4.074074074074074e-05, "loss": 0.3163, "step": 80 }, { "epoch": 24.923076923076923, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.4221486747264862, "eval_runtime": 1.0943, "eval_samples_per_second": 64.883, "eval_steps_per_second": 2.742, "step": 81 }, { "epoch": 25.846153846153847, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.4086194336414337, "eval_runtime": 1.1379, "eval_samples_per_second": 62.397, "eval_steps_per_second": 2.637, "step": 84 }, { "epoch": 26.76923076923077, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.39464449882507324, "eval_runtime": 1.1089, "eval_samples_per_second": 64.025, "eval_steps_per_second": 2.705, "step": 87 }, { "epoch": 27.692307692307693, "grad_norm": 6.2680439949035645, "learning_rate": 3.888888888888889e-05, "loss": 0.2786, "step": 90 }, { "epoch": 28.0, "eval_accuracy": 0.8028169014084507, "eval_loss": 0.5320041179656982, "eval_runtime": 1.1427, "eval_samples_per_second": 62.134, "eval_steps_per_second": 2.625, "step": 91 }, { "epoch": 28.923076923076923, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.41318005323410034, "eval_runtime": 1.1147, "eval_samples_per_second": 63.695, "eval_steps_per_second": 2.691, "step": 94 }, { "epoch": 29.846153846153847, "eval_accuracy": 0.7746478873239436, "eval_loss": 0.5542149543762207, "eval_runtime": 1.0924, "eval_samples_per_second": 64.994, "eval_steps_per_second": 2.746, "step": 97 }, { "epoch": 30.76923076923077, "grad_norm": 5.675084114074707, "learning_rate": 3.7037037037037037e-05, "loss": 0.2763, "step": 100 }, { "epoch": 30.76923076923077, "eval_accuracy": 0.9014084507042254, "eval_loss": 0.37337467074394226, "eval_runtime": 1.1285, "eval_samples_per_second": 62.913, "eval_steps_per_second": 2.658, "step": 100 }, { "epoch": 32.0, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.44785401225090027, "eval_runtime": 1.0865, "eval_samples_per_second": 65.347, "eval_steps_per_second": 2.761, "step": 104 }, { "epoch": 32.92307692307692, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.3481605350971222, "eval_runtime": 1.0909, "eval_samples_per_second": 65.081, "eval_steps_per_second": 2.75, "step": 107 }, { "epoch": 33.84615384615385, "grad_norm": 9.396614074707031, "learning_rate": 3.518518518518519e-05, "loss": 0.25, "step": 110 }, { "epoch": 33.84615384615385, "eval_accuracy": 0.7887323943661971, "eval_loss": 0.5442134737968445, "eval_runtime": 1.0623, "eval_samples_per_second": 66.836, "eval_steps_per_second": 2.824, "step": 110 }, { "epoch": 34.76923076923077, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.4211238920688629, "eval_runtime": 1.1024, "eval_samples_per_second": 64.406, "eval_steps_per_second": 2.721, "step": 113 }, { "epoch": 36.0, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.4860071539878845, "eval_runtime": 1.1377, "eval_samples_per_second": 62.405, "eval_steps_per_second": 2.637, "step": 117 }, { "epoch": 36.92307692307692, "grad_norm": 3.626502513885498, "learning_rate": 3.3333333333333335e-05, "loss": 0.2125, "step": 120 }, { "epoch": 36.92307692307692, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.46536797285079956, "eval_runtime": 1.1169, "eval_samples_per_second": 63.568, "eval_steps_per_second": 2.686, "step": 120 }, { "epoch": 37.84615384615385, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.477935254573822, "eval_runtime": 1.1116, "eval_samples_per_second": 63.871, "eval_steps_per_second": 2.699, "step": 123 }, { "epoch": 38.76923076923077, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.5691684484481812, "eval_runtime": 1.1081, "eval_samples_per_second": 64.071, "eval_steps_per_second": 2.707, "step": 126 }, { "epoch": 40.0, "grad_norm": 6.315129280090332, "learning_rate": 3.148148148148148e-05, "loss": 0.2225, "step": 130 }, { "epoch": 40.0, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.4911651611328125, "eval_runtime": 1.097, "eval_samples_per_second": 64.719, "eval_steps_per_second": 2.735, "step": 130 }, { "epoch": 40.92307692307692, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.4528195559978485, "eval_runtime": 1.115, "eval_samples_per_second": 63.678, "eval_steps_per_second": 2.691, "step": 133 }, { "epoch": 41.84615384615385, "eval_accuracy": 0.7887323943661971, "eval_loss": 0.4470233619213104, "eval_runtime": 1.1058, "eval_samples_per_second": 64.204, "eval_steps_per_second": 2.713, "step": 136 }, { "epoch": 42.76923076923077, "eval_accuracy": 0.8028169014084507, "eval_loss": 0.4251406788825989, "eval_runtime": 1.0717, "eval_samples_per_second": 66.249, "eval_steps_per_second": 2.799, "step": 139 }, { "epoch": 43.07692307692308, "grad_norm": 6.046936988830566, "learning_rate": 2.962962962962963e-05, "loss": 0.1991, "step": 140 }, { "epoch": 44.0, "eval_accuracy": 0.8028169014084507, "eval_loss": 0.4864138662815094, "eval_runtime": 1.068, "eval_samples_per_second": 66.48, "eval_steps_per_second": 2.809, "step": 143 }, { "epoch": 44.92307692307692, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.4651569426059723, "eval_runtime": 1.0698, "eval_samples_per_second": 66.368, "eval_steps_per_second": 2.804, "step": 146 }, { "epoch": 45.84615384615385, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.5949256420135498, "eval_runtime": 1.1229, "eval_samples_per_second": 63.23, "eval_steps_per_second": 2.672, "step": 149 }, { "epoch": 46.15384615384615, "grad_norm": 4.972904682159424, "learning_rate": 2.777777777777778e-05, "loss": 0.164, "step": 150 }, { "epoch": 46.76923076923077, "eval_accuracy": 0.7464788732394366, "eval_loss": 1.0035008192062378, "eval_runtime": 1.091, "eval_samples_per_second": 65.076, "eval_steps_per_second": 2.75, "step": 152 }, { "epoch": 48.0, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.6393434405326843, "eval_runtime": 1.1065, "eval_samples_per_second": 64.167, "eval_steps_per_second": 2.711, "step": 156 }, { "epoch": 48.92307692307692, "eval_accuracy": 0.676056338028169, "eval_loss": 0.9221746921539307, "eval_runtime": 1.1189, "eval_samples_per_second": 63.454, "eval_steps_per_second": 2.681, "step": 159 }, { "epoch": 49.23076923076923, "grad_norm": 31.394847869873047, "learning_rate": 2.5925925925925925e-05, "loss": 0.1974, "step": 160 }, { "epoch": 49.84615384615385, "eval_accuracy": 0.6901408450704225, "eval_loss": 1.0632798671722412, "eval_runtime": 1.0728, "eval_samples_per_second": 66.183, "eval_steps_per_second": 2.796, "step": 162 }, { "epoch": 50.76923076923077, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.6050125360488892, "eval_runtime": 1.1003, "eval_samples_per_second": 64.529, "eval_steps_per_second": 2.727, "step": 165 }, { "epoch": 52.0, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.7133720517158508, "eval_runtime": 1.1005, "eval_samples_per_second": 64.514, "eval_steps_per_second": 2.726, "step": 169 }, { "epoch": 52.30769230769231, "grad_norm": 4.075202465057373, "learning_rate": 2.4074074074074074e-05, "loss": 0.213, "step": 170 }, { "epoch": 52.92307692307692, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.6648813486099243, "eval_runtime": 1.1101, "eval_samples_per_second": 63.956, "eval_steps_per_second": 2.702, "step": 172 }, { "epoch": 53.84615384615385, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.7125517129898071, "eval_runtime": 1.097, "eval_samples_per_second": 64.721, "eval_steps_per_second": 2.735, "step": 175 }, { "epoch": 54.76923076923077, "eval_accuracy": 0.8169014084507042, "eval_loss": 0.6906399130821228, "eval_runtime": 1.0988, "eval_samples_per_second": 64.617, "eval_steps_per_second": 2.73, "step": 178 }, { "epoch": 55.38461538461539, "grad_norm": 4.072634220123291, "learning_rate": 2.2222222222222223e-05, "loss": 0.1642, "step": 180 }, { "epoch": 56.0, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.6955938339233398, "eval_runtime": 1.0988, "eval_samples_per_second": 64.617, "eval_steps_per_second": 2.73, "step": 182 }, { "epoch": 56.92307692307692, "eval_accuracy": 0.8028169014084507, "eval_loss": 0.5828216075897217, "eval_runtime": 1.1089, "eval_samples_per_second": 64.03, "eval_steps_per_second": 2.705, "step": 185 }, { "epoch": 57.84615384615385, "eval_accuracy": 0.8028169014084507, "eval_loss": 0.5865649580955505, "eval_runtime": 1.1049, "eval_samples_per_second": 64.258, "eval_steps_per_second": 2.715, "step": 188 }, { "epoch": 58.46153846153846, "grad_norm": 9.487194061279297, "learning_rate": 2.037037037037037e-05, "loss": 0.1657, "step": 190 }, { "epoch": 58.76923076923077, "eval_accuracy": 0.8169014084507042, "eval_loss": 0.6171615123748779, "eval_runtime": 1.1009, "eval_samples_per_second": 64.495, "eval_steps_per_second": 2.725, "step": 191 }, { "epoch": 60.0, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.7427834868431091, "eval_runtime": 1.0942, "eval_samples_per_second": 64.889, "eval_steps_per_second": 2.742, "step": 195 }, { "epoch": 60.92307692307692, "eval_accuracy": 0.8169014084507042, "eval_loss": 0.8981254696846008, "eval_runtime": 1.0972, "eval_samples_per_second": 64.708, "eval_steps_per_second": 2.734, "step": 198 }, { "epoch": 61.53846153846154, "grad_norm": 6.037697792053223, "learning_rate": 1.8518518518518518e-05, "loss": 0.1347, "step": 200 }, { "epoch": 61.84615384615385, "eval_accuracy": 0.8169014084507042, "eval_loss": 0.7168216705322266, "eval_runtime": 1.1259, "eval_samples_per_second": 63.063, "eval_steps_per_second": 2.665, "step": 201 }, { "epoch": 62.76923076923077, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.8026114106178284, "eval_runtime": 1.113, "eval_samples_per_second": 63.789, "eval_steps_per_second": 2.695, "step": 204 }, { "epoch": 64.0, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.8639427423477173, "eval_runtime": 1.1002, "eval_samples_per_second": 64.535, "eval_steps_per_second": 2.727, "step": 208 }, { "epoch": 64.61538461538461, "grad_norm": 7.2083282470703125, "learning_rate": 1.6666666666666667e-05, "loss": 0.1335, "step": 210 }, { "epoch": 64.92307692307692, "eval_accuracy": 0.8169014084507042, "eval_loss": 0.7604219913482666, "eval_runtime": 1.1013, "eval_samples_per_second": 64.472, "eval_steps_per_second": 2.724, "step": 211 }, { "epoch": 65.84615384615384, "eval_accuracy": 0.8169014084507042, "eval_loss": 0.7992713451385498, "eval_runtime": 1.0706, "eval_samples_per_second": 66.321, "eval_steps_per_second": 2.802, "step": 214 }, { "epoch": 66.76923076923077, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.833002507686615, "eval_runtime": 1.1411, "eval_samples_per_second": 62.218, "eval_steps_per_second": 2.629, "step": 217 }, { "epoch": 67.6923076923077, "grad_norm": 4.454225540161133, "learning_rate": 1.4814814814814815e-05, "loss": 0.145, "step": 220 }, { "epoch": 68.0, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.8143167495727539, "eval_runtime": 1.0982, "eval_samples_per_second": 64.653, "eval_steps_per_second": 2.732, "step": 221 }, { "epoch": 68.92307692307692, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.751991868019104, "eval_runtime": 1.1019, "eval_samples_per_second": 64.432, "eval_steps_per_second": 2.722, "step": 224 }, { "epoch": 69.84615384615384, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.721619725227356, "eval_runtime": 1.0719, "eval_samples_per_second": 66.236, "eval_steps_per_second": 2.799, "step": 227 }, { "epoch": 70.76923076923077, "grad_norm": 5.271942138671875, "learning_rate": 1.2962962962962962e-05, "loss": 0.1658, "step": 230 }, { "epoch": 70.76923076923077, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.7968146204948425, "eval_runtime": 1.1157, "eval_samples_per_second": 63.638, "eval_steps_per_second": 2.689, "step": 230 }, { "epoch": 72.0, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.7730194330215454, "eval_runtime": 1.1178, "eval_samples_per_second": 63.517, "eval_steps_per_second": 2.684, "step": 234 }, { "epoch": 72.92307692307692, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.7449730634689331, "eval_runtime": 1.1284, "eval_samples_per_second": 62.921, "eval_steps_per_second": 2.659, "step": 237 }, { "epoch": 73.84615384615384, "grad_norm": 4.669654846191406, "learning_rate": 1.1111111111111112e-05, "loss": 0.1381, "step": 240 }, { "epoch": 73.84615384615384, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.7855215072631836, "eval_runtime": 1.0837, "eval_samples_per_second": 65.515, "eval_steps_per_second": 2.768, "step": 240 }, { "epoch": 74.76923076923077, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.8253357410430908, "eval_runtime": 1.1299, "eval_samples_per_second": 62.836, "eval_steps_per_second": 2.655, "step": 243 }, { "epoch": 76.0, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.8064681887626648, "eval_runtime": 1.0974, "eval_samples_per_second": 64.697, "eval_steps_per_second": 2.734, "step": 247 }, { "epoch": 76.92307692307692, "grad_norm": 4.332691669464111, "learning_rate": 9.259259259259259e-06, "loss": 0.1306, "step": 250 }, { "epoch": 76.92307692307692, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.7778077721595764, "eval_runtime": 1.1004, "eval_samples_per_second": 64.523, "eval_steps_per_second": 2.726, "step": 250 }, { "epoch": 77.84615384615384, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.7814451456069946, "eval_runtime": 1.1215, "eval_samples_per_second": 63.309, "eval_steps_per_second": 2.675, "step": 253 }, { "epoch": 78.76923076923077, "eval_accuracy": 0.8309859154929577, "eval_loss": 0.733501672744751, "eval_runtime": 1.1106, "eval_samples_per_second": 63.928, "eval_steps_per_second": 2.701, "step": 256 }, { "epoch": 80.0, "grad_norm": 3.209725856781006, "learning_rate": 7.4074074074074075e-06, "loss": 0.1027, "step": 260 }, { "epoch": 80.0, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.7371585369110107, "eval_runtime": 1.0987, "eval_samples_per_second": 64.619, "eval_steps_per_second": 2.73, "step": 260 }, { "epoch": 80.92307692307692, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.7617682814598083, "eval_runtime": 1.0988, "eval_samples_per_second": 64.618, "eval_steps_per_second": 2.73, "step": 263 }, { "epoch": 81.84615384615384, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.7891401648521423, "eval_runtime": 1.0792, "eval_samples_per_second": 65.789, "eval_steps_per_second": 2.78, "step": 266 }, { "epoch": 82.76923076923077, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.828731119632721, "eval_runtime": 1.1039, "eval_samples_per_second": 64.319, "eval_steps_per_second": 2.718, "step": 269 }, { "epoch": 83.07692307692308, "grad_norm": 9.988916397094727, "learning_rate": 5.555555555555556e-06, "loss": 0.1296, "step": 270 }, { "epoch": 84.0, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.8412397503852844, "eval_runtime": 1.0672, "eval_samples_per_second": 66.527, "eval_steps_per_second": 2.811, "step": 273 }, { "epoch": 84.92307692307692, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.8014044761657715, "eval_runtime": 1.1292, "eval_samples_per_second": 62.877, "eval_steps_per_second": 2.657, "step": 276 }, { "epoch": 85.84615384615384, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.7529923915863037, "eval_runtime": 1.1018, "eval_samples_per_second": 64.442, "eval_steps_per_second": 2.723, "step": 279 }, { "epoch": 86.15384615384616, "grad_norm": 5.536340236663818, "learning_rate": 3.7037037037037037e-06, "loss": 0.1162, "step": 280 }, { "epoch": 86.76923076923077, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.7242565155029297, "eval_runtime": 1.096, "eval_samples_per_second": 64.781, "eval_steps_per_second": 2.737, "step": 282 }, { "epoch": 88.0, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.7246566414833069, "eval_runtime": 1.0962, "eval_samples_per_second": 64.771, "eval_steps_per_second": 2.737, "step": 286 }, { "epoch": 88.92307692307692, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.735383927822113, "eval_runtime": 1.0904, "eval_samples_per_second": 65.113, "eval_steps_per_second": 2.751, "step": 289 }, { "epoch": 89.23076923076923, "grad_norm": 3.339935064315796, "learning_rate": 1.8518518518518519e-06, "loss": 0.1166, "step": 290 }, { "epoch": 89.84615384615384, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.7390521764755249, "eval_runtime": 1.0977, "eval_samples_per_second": 64.682, "eval_steps_per_second": 2.733, "step": 292 }, { "epoch": 90.76923076923077, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.7390065789222717, "eval_runtime": 1.0658, "eval_samples_per_second": 66.614, "eval_steps_per_second": 2.815, "step": 295 }, { "epoch": 92.0, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.7374176979064941, "eval_runtime": 1.0983, "eval_samples_per_second": 64.643, "eval_steps_per_second": 2.731, "step": 299 }, { "epoch": 92.3076923076923, "grad_norm": 3.673130512237549, "learning_rate": 0.0, "loss": 0.1031, "step": 300 }, { "epoch": 92.3076923076923, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.7372613549232483, "eval_runtime": 1.1158, "eval_samples_per_second": 63.634, "eval_steps_per_second": 2.689, "step": 300 }, { "epoch": 92.3076923076923, "step": 300, "total_flos": 2.839022453308834e+18, "train_loss": 0.25909996310869854, "train_runtime": 1733.8719, "train_samples_per_second": 22.897, "train_steps_per_second": 0.173 }, { "epoch": 92.3076923076923, "eval_accuracy": 0.9014084507042254, "eval_loss": 0.37337467074394226, "eval_runtime": 1.0981, "eval_samples_per_second": 64.659, "eval_steps_per_second": 2.732, "step": 300 } ], "logging_steps": 10, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 2.839022453308834e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }