| { |
| "best_metric": 0.7840909090909091, |
| "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV45/checkpoint-140", |
| "epoch": 39.935064935064936, |
| "eval_steps": 500, |
| "global_step": 798, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.6233766233766234, |
| "grad_norm": 18.71070671081543, |
| "learning_rate": 7.5e-06, |
| "loss": 6.1945, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.45454545454545453, |
| "eval_loss": 1.2588036060333252, |
| "eval_runtime": 4.3451, |
| "eval_samples_per_second": 20.253, |
| "eval_steps_per_second": 0.69, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.2077922077922079, |
| "grad_norm": 14.680241584777832, |
| "learning_rate": 1.5e-05, |
| "loss": 4.9168, |
| "step": 24 |
| }, |
| { |
| "epoch": 1.8311688311688312, |
| "grad_norm": 19.30341148376465, |
| "learning_rate": 2.25e-05, |
| "loss": 4.5836, |
| "step": 36 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7159090909090909, |
| "eval_loss": 0.9657976031303406, |
| "eval_runtime": 2.9181, |
| "eval_samples_per_second": 30.157, |
| "eval_steps_per_second": 1.028, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.4155844155844157, |
| "grad_norm": 25.428447723388672, |
| "learning_rate": 3e-05, |
| "loss": 3.527, |
| "step": 48 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 19.649690628051758, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 2.9056, |
| "step": 60 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 0.7736997008323669, |
| "eval_runtime": 1.6707, |
| "eval_samples_per_second": 52.672, |
| "eval_steps_per_second": 1.796, |
| "step": 60 |
| }, |
| { |
| "epoch": 3.6233766233766236, |
| "grad_norm": 30.83364486694336, |
| "learning_rate": 4.5e-05, |
| "loss": 2.8061, |
| "step": 72 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.7727272727272727, |
| "eval_loss": 0.673829972743988, |
| "eval_runtime": 1.6507, |
| "eval_samples_per_second": 53.31, |
| "eval_steps_per_second": 1.817, |
| "step": 80 |
| }, |
| { |
| "epoch": 4.207792207792208, |
| "grad_norm": 31.61625099182129, |
| "learning_rate": 4.999617113753456e-05, |
| "loss": 2.0484, |
| "step": 84 |
| }, |
| { |
| "epoch": 4.8311688311688314, |
| "grad_norm": 37.459957122802734, |
| "learning_rate": 4.99387616539795e-05, |
| "loss": 1.9405, |
| "step": 96 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.7613636363636364, |
| "eval_loss": 0.626100480556488, |
| "eval_runtime": 1.6495, |
| "eval_samples_per_second": 53.351, |
| "eval_steps_per_second": 1.819, |
| "step": 100 |
| }, |
| { |
| "epoch": 5.415584415584416, |
| "grad_norm": 29.627758026123047, |
| "learning_rate": 4.981261550534304e-05, |
| "loss": 1.6502, |
| "step": 108 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 15.884918212890625, |
| "learning_rate": 4.9618080377917326e-05, |
| "loss": 1.4425, |
| "step": 120 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.75, |
| "eval_loss": 0.8126522302627563, |
| "eval_runtime": 1.6403, |
| "eval_samples_per_second": 53.65, |
| "eval_steps_per_second": 1.829, |
| "step": 120 |
| }, |
| { |
| "epoch": 6.623376623376624, |
| "grad_norm": 30.541797637939453, |
| "learning_rate": 4.935569245293244e-05, |
| "loss": 1.3554, |
| "step": 132 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.7840909090909091, |
| "eval_loss": 0.7812299728393555, |
| "eval_runtime": 1.6388, |
| "eval_samples_per_second": 53.696, |
| "eval_steps_per_second": 1.831, |
| "step": 140 |
| }, |
| { |
| "epoch": 7.207792207792208, |
| "grad_norm": 37.37839126586914, |
| "learning_rate": 4.902617492872402e-05, |
| "loss": 1.1744, |
| "step": 144 |
| }, |
| { |
| "epoch": 7.8311688311688314, |
| "grad_norm": 54.830101013183594, |
| "learning_rate": 4.863043602744095e-05, |
| "loss": 1.2975, |
| "step": 156 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.75, |
| "eval_loss": 0.8404767513275146, |
| "eval_runtime": 2.1899, |
| "eval_samples_per_second": 40.185, |
| "eval_steps_per_second": 1.37, |
| "step": 160 |
| }, |
| { |
| "epoch": 8.415584415584416, |
| "grad_norm": 39.1025390625, |
| "learning_rate": 4.81695664917871e-05, |
| "loss": 1.0383, |
| "step": 168 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 33.006813049316406, |
| "learning_rate": 4.764483657869654e-05, |
| "loss": 0.812, |
| "step": 180 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.7159090909090909, |
| "eval_loss": 1.0776864290237427, |
| "eval_runtime": 1.6116, |
| "eval_samples_per_second": 54.605, |
| "eval_steps_per_second": 1.862, |
| "step": 180 |
| }, |
| { |
| "epoch": 9.623376623376624, |
| "grad_norm": 22.014991760253906, |
| "learning_rate": 4.705769255822849e-05, |
| "loss": 0.7984, |
| "step": 192 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.7159090909090909, |
| "eval_loss": 0.9403623342514038, |
| "eval_runtime": 1.6439, |
| "eval_samples_per_second": 53.532, |
| "eval_steps_per_second": 1.825, |
| "step": 200 |
| }, |
| { |
| "epoch": 10.207792207792208, |
| "grad_norm": 37.21240234375, |
| "learning_rate": 4.640975272733168e-05, |
| "loss": 0.7945, |
| "step": 204 |
| }, |
| { |
| "epoch": 10.831168831168831, |
| "grad_norm": 25.584922790527344, |
| "learning_rate": 4.570280294946506e-05, |
| "loss": 0.7895, |
| "step": 216 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.7045454545454546, |
| "eval_loss": 1.0901598930358887, |
| "eval_runtime": 1.6182, |
| "eval_samples_per_second": 54.381, |
| "eval_steps_per_second": 1.854, |
| "step": 220 |
| }, |
| { |
| "epoch": 11.415584415584416, |
| "grad_norm": 42.140926361083984, |
| "learning_rate": 4.493879173236869e-05, |
| "loss": 0.6275, |
| "step": 228 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.09648128598928452, |
| "learning_rate": 4.411982485755156e-05, |
| "loss": 0.7333, |
| "step": 240 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.75, |
| "eval_loss": 1.099798321723938, |
| "eval_runtime": 1.619, |
| "eval_samples_per_second": 54.354, |
| "eval_steps_per_second": 1.853, |
| "step": 240 |
| }, |
| { |
| "epoch": 12.623376623376624, |
| "grad_norm": 25.823577880859375, |
| "learning_rate": 4.3248159576298576e-05, |
| "loss": 0.6073, |
| "step": 252 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.7386363636363636, |
| "eval_loss": 1.2733994722366333, |
| "eval_runtime": 1.6351, |
| "eval_samples_per_second": 53.82, |
| "eval_steps_per_second": 1.835, |
| "step": 260 |
| }, |
| { |
| "epoch": 13.207792207792208, |
| "grad_norm": 19.644241333007812, |
| "learning_rate": 4.232619838819377e-05, |
| "loss": 0.5408, |
| "step": 264 |
| }, |
| { |
| "epoch": 13.831168831168831, |
| "grad_norm": 36.02125549316406, |
| "learning_rate": 4.135648241930766e-05, |
| "loss": 0.6548, |
| "step": 276 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.7159090909090909, |
| "eval_loss": 1.303389549255371, |
| "eval_runtime": 2.1993, |
| "eval_samples_per_second": 40.012, |
| "eval_steps_per_second": 1.364, |
| "step": 280 |
| }, |
| { |
| "epoch": 14.415584415584416, |
| "grad_norm": 27.597558975219727, |
| "learning_rate": 4.034168441829963e-05, |
| "loss": 0.5159, |
| "step": 288 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 5.691001892089844, |
| "learning_rate": 3.928460138973984e-05, |
| "loss": 0.5538, |
| "step": 300 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.75, |
| "eval_loss": 1.189049243927002, |
| "eval_runtime": 1.6811, |
| "eval_samples_per_second": 52.348, |
| "eval_steps_per_second": 1.785, |
| "step": 300 |
| }, |
| { |
| "epoch": 15.623376623376624, |
| "grad_norm": 21.679243087768555, |
| "learning_rate": 3.818814688495475e-05, |
| "loss": 0.556, |
| "step": 312 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.75, |
| "eval_loss": 1.3661515712738037, |
| "eval_runtime": 2.2932, |
| "eval_samples_per_second": 38.374, |
| "eval_steps_per_second": 1.308, |
| "step": 320 |
| }, |
| { |
| "epoch": 16.207792207792206, |
| "grad_norm": 29.273984909057617, |
| "learning_rate": 3.705534297164438e-05, |
| "loss": 0.4476, |
| "step": 324 |
| }, |
| { |
| "epoch": 16.83116883116883, |
| "grad_norm": 23.221296310424805, |
| "learning_rate": 3.5889311904404676e-05, |
| "loss": 0.5273, |
| "step": 336 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.2832838296890259, |
| "eval_runtime": 2.0423, |
| "eval_samples_per_second": 43.089, |
| "eval_steps_per_second": 1.469, |
| "step": 340 |
| }, |
| { |
| "epoch": 17.415584415584416, |
| "grad_norm": 23.35724449157715, |
| "learning_rate": 3.469326751911314e-05, |
| "loss": 0.4096, |
| "step": 348 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 0.7785102128982544, |
| "learning_rate": 3.347050637489627e-05, |
| "loss": 0.3863, |
| "step": 360 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.7159090909090909, |
| "eval_loss": 1.2975611686706543, |
| "eval_runtime": 2.2746, |
| "eval_samples_per_second": 38.689, |
| "eval_steps_per_second": 1.319, |
| "step": 360 |
| }, |
| { |
| "epoch": 18.623376623376622, |
| "grad_norm": 17.82123565673828, |
| "learning_rate": 3.222439866809383e-05, |
| "loss": 0.5185, |
| "step": 372 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.7386363636363636, |
| "eval_loss": 1.2460757493972778, |
| "eval_runtime": 1.6484, |
| "eval_samples_per_second": 53.384, |
| "eval_steps_per_second": 1.82, |
| "step": 380 |
| }, |
| { |
| "epoch": 19.207792207792206, |
| "grad_norm": 20.32648468017578, |
| "learning_rate": 3.095837894326287e-05, |
| "loss": 0.4961, |
| "step": 384 |
| }, |
| { |
| "epoch": 19.83116883116883, |
| "grad_norm": 23.85995101928711, |
| "learning_rate": 2.967593662682395e-05, |
| "loss": 0.475, |
| "step": 396 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.7386363636363636, |
| "eval_loss": 1.2543120384216309, |
| "eval_runtime": 1.6377, |
| "eval_samples_per_second": 53.735, |
| "eval_steps_per_second": 1.832, |
| "step": 400 |
| }, |
| { |
| "epoch": 20.415584415584416, |
| "grad_norm": 7.2037811279296875, |
| "learning_rate": 2.838060640944115e-05, |
| "loss": 0.32, |
| "step": 408 |
| }, |
| { |
| "epoch": 21.0, |
| "grad_norm": 0.6710865497589111, |
| "learning_rate": 2.7075958503643745e-05, |
| "loss": 0.3021, |
| "step": 420 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.7727272727272727, |
| "eval_loss": 1.31428062915802, |
| "eval_runtime": 2.2517, |
| "eval_samples_per_second": 39.081, |
| "eval_steps_per_second": 1.332, |
| "step": 420 |
| }, |
| { |
| "epoch": 21.623376623376622, |
| "grad_norm": 12.076529502868652, |
| "learning_rate": 2.576558880354205e-05, |
| "loss": 0.3334, |
| "step": 432 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.75, |
| "eval_loss": 1.287263035774231, |
| "eval_runtime": 1.6412, |
| "eval_samples_per_second": 53.62, |
| "eval_steps_per_second": 1.828, |
| "step": 440 |
| }, |
| { |
| "epoch": 22.207792207792206, |
| "grad_norm": 21.53441047668457, |
| "learning_rate": 2.4453108973759122e-05, |
| "loss": 0.3226, |
| "step": 444 |
| }, |
| { |
| "epoch": 22.83116883116883, |
| "grad_norm": 17.34014892578125, |
| "learning_rate": 2.3142136494895552e-05, |
| "loss": 0.3773, |
| "step": 456 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.7386363636363636, |
| "eval_loss": 1.3992358446121216, |
| "eval_runtime": 3.8978, |
| "eval_samples_per_second": 22.577, |
| "eval_steps_per_second": 0.77, |
| "step": 460 |
| }, |
| { |
| "epoch": 23.415584415584416, |
| "grad_norm": 28.858182907104492, |
| "learning_rate": 2.183628469296411e-05, |
| "loss": 0.3067, |
| "step": 468 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 10.506282806396484, |
| "learning_rate": 2.0539152780275357e-05, |
| "loss": 0.2606, |
| "step": 480 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.7159090909090909, |
| "eval_loss": 1.5181022882461548, |
| "eval_runtime": 1.6351, |
| "eval_samples_per_second": 53.819, |
| "eval_steps_per_second": 1.835, |
| "step": 480 |
| }, |
| { |
| "epoch": 24.623376623376622, |
| "grad_norm": 14.785501480102539, |
| "learning_rate": 1.9254315935223725e-05, |
| "loss": 0.3344, |
| "step": 492 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.7613636363636364, |
| "eval_loss": 1.432998538017273, |
| "eval_runtime": 1.6323, |
| "eval_samples_per_second": 53.911, |
| "eval_steps_per_second": 1.838, |
| "step": 500 |
| }, |
| { |
| "epoch": 25.207792207792206, |
| "grad_norm": 25.188377380371094, |
| "learning_rate": 1.7985315448316243e-05, |
| "loss": 0.3907, |
| "step": 504 |
| }, |
| { |
| "epoch": 25.83116883116883, |
| "grad_norm": 25.56688117980957, |
| "learning_rate": 1.673564896160374e-05, |
| "loss": 0.3349, |
| "step": 516 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.7840909090909091, |
| "eval_loss": 1.416541576385498, |
| "eval_runtime": 1.6809, |
| "eval_samples_per_second": 52.353, |
| "eval_steps_per_second": 1.785, |
| "step": 520 |
| }, |
| { |
| "epoch": 26.415584415584416, |
| "grad_norm": 15.779555320739746, |
| "learning_rate": 1.550876082841669e-05, |
| "loss": 0.3379, |
| "step": 528 |
| }, |
| { |
| "epoch": 27.0, |
| "grad_norm": 1.8928312063217163, |
| "learning_rate": 1.430803261997638e-05, |
| "loss": 0.3246, |
| "step": 540 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.7613636363636364, |
| "eval_loss": 1.3633875846862793, |
| "eval_runtime": 1.6308, |
| "eval_samples_per_second": 53.962, |
| "eval_steps_per_second": 1.84, |
| "step": 540 |
| }, |
| { |
| "epoch": 27.623376623376622, |
| "grad_norm": 20.64362335205078, |
| "learning_rate": 1.3136773805047203e-05, |
| "loss": 0.3395, |
| "step": 552 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.7613636363636364, |
| "eval_loss": 1.3985376358032227, |
| "eval_runtime": 1.6495, |
| "eval_samples_per_second": 53.349, |
| "eval_steps_per_second": 1.819, |
| "step": 560 |
| }, |
| { |
| "epoch": 28.207792207792206, |
| "grad_norm": 12.24038314819336, |
| "learning_rate": 1.1998212628319214e-05, |
| "loss": 0.2321, |
| "step": 564 |
| }, |
| { |
| "epoch": 28.83116883116883, |
| "grad_norm": 21.42084312438965, |
| "learning_rate": 1.0895487212661759e-05, |
| "loss": 0.2606, |
| "step": 576 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.7613636363636364, |
| "eval_loss": 1.386614203453064, |
| "eval_runtime": 1.661, |
| "eval_samples_per_second": 52.981, |
| "eval_steps_per_second": 1.806, |
| "step": 580 |
| }, |
| { |
| "epoch": 29.415584415584416, |
| "grad_norm": 17.57806396484375, |
| "learning_rate": 9.831636909772579e-06, |
| "loss": 0.254, |
| "step": 588 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 15.217145919799805, |
| "learning_rate": 8.809593923061812e-06, |
| "loss": 0.2212, |
| "step": 600 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.75, |
| "eval_loss": 1.484897494316101, |
| "eval_runtime": 1.6678, |
| "eval_samples_per_second": 52.763, |
| "eval_steps_per_second": 1.799, |
| "step": 600 |
| }, |
| { |
| "epoch": 30.623376623376622, |
| "grad_norm": 19.78946304321289, |
| "learning_rate": 7.832175225860012e-06, |
| "loss": 0.2266, |
| "step": 612 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_accuracy": 0.7727272727272727, |
| "eval_loss": 1.4229885339736938, |
| "eval_runtime": 1.6298, |
| "eval_samples_per_second": 53.994, |
| "eval_steps_per_second": 1.841, |
| "step": 620 |
| }, |
| { |
| "epoch": 31.207792207792206, |
| "grad_norm": 18.53005027770996, |
| "learning_rate": 6.902074797225408e-06, |
| "loss": 0.1926, |
| "step": 624 |
| }, |
| { |
| "epoch": 31.83116883116883, |
| "grad_norm": 36.56352615356445, |
| "learning_rate": 6.021856196750178e-06, |
| "loss": 0.2525, |
| "step": 636 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_accuracy": 0.7727272727272727, |
| "eval_loss": 1.4287593364715576, |
| "eval_runtime": 1.6702, |
| "eval_samples_per_second": 52.688, |
| "eval_steps_per_second": 1.796, |
| "step": 640 |
| }, |
| { |
| "epoch": 32.41558441558441, |
| "grad_norm": 22.782241821289062, |
| "learning_rate": 5.1939454988312206e-06, |
| "loss": 0.2129, |
| "step": 648 |
| }, |
| { |
| "epoch": 33.0, |
| "grad_norm": 3.140561580657959, |
| "learning_rate": 4.420624605880136e-06, |
| "loss": 0.2241, |
| "step": 660 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_accuracy": 0.7613636363636364, |
| "eval_loss": 1.4496763944625854, |
| "eval_runtime": 1.6312, |
| "eval_samples_per_second": 53.947, |
| "eval_steps_per_second": 1.839, |
| "step": 660 |
| }, |
| { |
| "epoch": 33.62337662337662, |
| "grad_norm": 10.7069730758667, |
| "learning_rate": 3.7040249589025523e-06, |
| "loss": 0.1816, |
| "step": 672 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_accuracy": 0.7613636363636364, |
| "eval_loss": 1.4346766471862793, |
| "eval_runtime": 1.6494, |
| "eval_samples_per_second": 53.352, |
| "eval_steps_per_second": 1.819, |
| "step": 680 |
| }, |
| { |
| "epoch": 34.20779220779221, |
| "grad_norm": 6.652499198913574, |
| "learning_rate": 3.0461216627820032e-06, |
| "loss": 0.1774, |
| "step": 684 |
| }, |
| { |
| "epoch": 34.83116883116883, |
| "grad_norm": 15.814424514770508, |
| "learning_rate": 2.448728042460141e-06, |
| "loss": 0.2529, |
| "step": 696 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_accuracy": 0.75, |
| "eval_loss": 1.427807331085205, |
| "eval_runtime": 2.282, |
| "eval_samples_per_second": 38.562, |
| "eval_steps_per_second": 1.315, |
| "step": 700 |
| }, |
| { |
| "epoch": 35.41558441558441, |
| "grad_norm": 27.73215103149414, |
| "learning_rate": 1.913490645017846e-06, |
| "loss": 0.2284, |
| "step": 708 |
| }, |
| { |
| "epoch": 36.0, |
| "grad_norm": 1.1545226573944092, |
| "learning_rate": 1.4418847014323944e-06, |
| "loss": 0.189, |
| "step": 720 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_accuracy": 0.75, |
| "eval_loss": 1.428978681564331, |
| "eval_runtime": 1.6065, |
| "eval_samples_per_second": 54.776, |
| "eval_steps_per_second": 1.867, |
| "step": 720 |
| }, |
| { |
| "epoch": 36.62337662337662, |
| "grad_norm": 11.000960350036621, |
| "learning_rate": 1.0352100605192315e-06, |
| "loss": 0.2491, |
| "step": 732 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_accuracy": 0.7613636363636364, |
| "eval_loss": 1.4449158906936646, |
| "eval_runtime": 1.636, |
| "eval_samples_per_second": 53.788, |
| "eval_steps_per_second": 1.834, |
| "step": 740 |
| }, |
| { |
| "epoch": 37.20779220779221, |
| "grad_norm": 10.077706336975098, |
| "learning_rate": 6.945876062651985e-07, |
| "loss": 0.2124, |
| "step": 744 |
| }, |
| { |
| "epoch": 37.83116883116883, |
| "grad_norm": 12.775538444519043, |
| "learning_rate": 4.209561684278496e-07, |
| "loss": 0.2562, |
| "step": 756 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_accuracy": 0.75, |
| "eval_loss": 1.4514336585998535, |
| "eval_runtime": 1.6491, |
| "eval_samples_per_second": 53.364, |
| "eval_steps_per_second": 1.819, |
| "step": 760 |
| }, |
| { |
| "epoch": 38.41558441558441, |
| "grad_norm": 16.541933059692383, |
| "learning_rate": 2.1506993491592354e-07, |
| "loss": 0.2026, |
| "step": 768 |
| }, |
| { |
| "epoch": 39.0, |
| "grad_norm": 10.78030776977539, |
| "learning_rate": 7.749637308301361e-08, |
| "loss": 0.1872, |
| "step": 780 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_accuracy": 0.75, |
| "eval_loss": 1.4522464275360107, |
| "eval_runtime": 1.6558, |
| "eval_samples_per_second": 53.147, |
| "eval_steps_per_second": 1.812, |
| "step": 780 |
| }, |
| { |
| "epoch": 39.62337662337662, |
| "grad_norm": 10.336092948913574, |
| "learning_rate": 8.614665663816968e-09, |
| "loss": 0.223, |
| "step": 792 |
| }, |
| { |
| "epoch": 39.935064935064936, |
| "eval_accuracy": 0.75, |
| "eval_loss": 1.45267653465271, |
| "eval_runtime": 1.9683, |
| "eval_samples_per_second": 44.708, |
| "eval_steps_per_second": 1.524, |
| "step": 798 |
| }, |
| { |
| "epoch": 39.935064935064936, |
| "step": 798, |
| "total_flos": 3.181342596532273e+18, |
| "train_loss": 0.8569460838360894, |
| "train_runtime": 2907.2025, |
| "train_samples_per_second": 35.366, |
| "train_steps_per_second": 0.274 |
| } |
| ], |
| "logging_steps": 12, |
| "max_steps": 798, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 42, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.181342596532273e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|