| { |
| "best_metric": 0.9314285714285714, |
| "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV68/checkpoint-720", |
| "epoch": 42.52173913043478, |
| "eval_steps": 500, |
| "global_step": 765, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.5797101449275363, |
| "grad_norm": 3.7087650299072266, |
| "learning_rate": 3.896103896103896e-06, |
| "loss": 1.0651, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.4685714285714286, |
| "eval_loss": 1.0769113302230835, |
| "eval_runtime": 1.9723, |
| "eval_samples_per_second": 88.729, |
| "eval_steps_per_second": 5.577, |
| "step": 18 |
| }, |
| { |
| "epoch": 1.1159420289855073, |
| "grad_norm": 3.6832127571105957, |
| "learning_rate": 7.792207792207792e-06, |
| "loss": 0.9566, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.6956521739130435, |
| "grad_norm": 3.8521084785461426, |
| "learning_rate": 1.168831168831169e-05, |
| "loss": 0.9503, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7257142857142858, |
| "eval_loss": 0.8111104965209961, |
| "eval_runtime": 1.9367, |
| "eval_samples_per_second": 90.359, |
| "eval_steps_per_second": 5.68, |
| "step": 36 |
| }, |
| { |
| "epoch": 2.2318840579710146, |
| "grad_norm": 5.586236476898193, |
| "learning_rate": 1.5584415584415583e-05, |
| "loss": 0.7219, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.8115942028985508, |
| "grad_norm": 8.486871719360352, |
| "learning_rate": 1.948051948051948e-05, |
| "loss": 0.5745, |
| "step": 50 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.7314285714285714, |
| "eval_loss": 0.4972275197505951, |
| "eval_runtime": 1.904, |
| "eval_samples_per_second": 91.91, |
| "eval_steps_per_second": 5.777, |
| "step": 54 |
| }, |
| { |
| "epoch": 3.3478260869565215, |
| "grad_norm": 10.05865478515625, |
| "learning_rate": 2.337662337662338e-05, |
| "loss": 0.4438, |
| "step": 60 |
| }, |
| { |
| "epoch": 3.927536231884058, |
| "grad_norm": 12.097763061523438, |
| "learning_rate": 2.6883116883116883e-05, |
| "loss": 0.4746, |
| "step": 70 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.7485714285714286, |
| "eval_loss": 0.478755921125412, |
| "eval_runtime": 1.9296, |
| "eval_samples_per_second": 90.693, |
| "eval_steps_per_second": 5.701, |
| "step": 72 |
| }, |
| { |
| "epoch": 4.463768115942029, |
| "grad_norm": 5.598774433135986, |
| "learning_rate": 2.991279069767442e-05, |
| "loss": 0.3793, |
| "step": 80 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 3.355344295501709, |
| "learning_rate": 2.947674418604651e-05, |
| "loss": 0.4363, |
| "step": 90 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.7314285714285714, |
| "eval_loss": 0.5427026748657227, |
| "eval_runtime": 1.9262, |
| "eval_samples_per_second": 90.853, |
| "eval_steps_per_second": 5.711, |
| "step": 90 |
| }, |
| { |
| "epoch": 5.579710144927536, |
| "grad_norm": 10.868864059448242, |
| "learning_rate": 2.9040697674418607e-05, |
| "loss": 0.4362, |
| "step": 100 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.3581157624721527, |
| "eval_runtime": 1.8969, |
| "eval_samples_per_second": 92.257, |
| "eval_steps_per_second": 5.799, |
| "step": 108 |
| }, |
| { |
| "epoch": 6.115942028985507, |
| "grad_norm": 18.57970428466797, |
| "learning_rate": 2.86046511627907e-05, |
| "loss": 0.3911, |
| "step": 110 |
| }, |
| { |
| "epoch": 6.695652173913043, |
| "grad_norm": 11.689884185791016, |
| "learning_rate": 2.8168604651162793e-05, |
| "loss": 0.3476, |
| "step": 120 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.3572000563144684, |
| "eval_runtime": 1.9174, |
| "eval_samples_per_second": 91.27, |
| "eval_steps_per_second": 5.737, |
| "step": 126 |
| }, |
| { |
| "epoch": 7.231884057971015, |
| "grad_norm": 7.682909965515137, |
| "learning_rate": 2.7776162790697673e-05, |
| "loss": 0.3861, |
| "step": 130 |
| }, |
| { |
| "epoch": 7.811594202898551, |
| "grad_norm": 9.239214897155762, |
| "learning_rate": 2.7340116279069766e-05, |
| "loss": 0.3113, |
| "step": 140 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.7885714285714286, |
| "eval_loss": 0.4334910809993744, |
| "eval_runtime": 2.4972, |
| "eval_samples_per_second": 70.078, |
| "eval_steps_per_second": 4.405, |
| "step": 144 |
| }, |
| { |
| "epoch": 8.347826086956522, |
| "grad_norm": 10.808152198791504, |
| "learning_rate": 2.6904069767441863e-05, |
| "loss": 0.2685, |
| "step": 150 |
| }, |
| { |
| "epoch": 8.927536231884059, |
| "grad_norm": 11.47179889678955, |
| "learning_rate": 2.6468023255813956e-05, |
| "loss": 0.3943, |
| "step": 160 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.27819445729255676, |
| "eval_runtime": 2.4012, |
| "eval_samples_per_second": 72.88, |
| "eval_steps_per_second": 4.581, |
| "step": 162 |
| }, |
| { |
| "epoch": 9.46376811594203, |
| "grad_norm": 6.136239051818848, |
| "learning_rate": 2.6031976744186046e-05, |
| "loss": 0.2876, |
| "step": 170 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 1.0991929769515991, |
| "learning_rate": 2.559593023255814e-05, |
| "loss": 0.2574, |
| "step": 180 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.3320414125919342, |
| "eval_runtime": 1.9305, |
| "eval_samples_per_second": 90.652, |
| "eval_steps_per_second": 5.698, |
| "step": 180 |
| }, |
| { |
| "epoch": 10.579710144927537, |
| "grad_norm": 9.614068984985352, |
| "learning_rate": 2.5159883720930236e-05, |
| "loss": 0.2345, |
| "step": 190 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.8342857142857143, |
| "eval_loss": 0.4383019506931305, |
| "eval_runtime": 1.9283, |
| "eval_samples_per_second": 90.753, |
| "eval_steps_per_second": 5.704, |
| "step": 198 |
| }, |
| { |
| "epoch": 11.115942028985508, |
| "grad_norm": 10.854966163635254, |
| "learning_rate": 2.4723837209302326e-05, |
| "loss": 0.2437, |
| "step": 200 |
| }, |
| { |
| "epoch": 11.695652173913043, |
| "grad_norm": 8.31966495513916, |
| "learning_rate": 2.428779069767442e-05, |
| "loss": 0.3002, |
| "step": 210 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.3052798807621002, |
| "eval_runtime": 1.9211, |
| "eval_samples_per_second": 91.096, |
| "eval_steps_per_second": 5.726, |
| "step": 216 |
| }, |
| { |
| "epoch": 12.231884057971014, |
| "grad_norm": 12.32070541381836, |
| "learning_rate": 2.3851744186046512e-05, |
| "loss": 0.2268, |
| "step": 220 |
| }, |
| { |
| "epoch": 12.81159420289855, |
| "grad_norm": 5.423739433288574, |
| "learning_rate": 2.3415697674418605e-05, |
| "loss": 0.2038, |
| "step": 230 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.8742857142857143, |
| "eval_loss": 0.3189180791378021, |
| "eval_runtime": 1.9021, |
| "eval_samples_per_second": 92.005, |
| "eval_steps_per_second": 5.783, |
| "step": 234 |
| }, |
| { |
| "epoch": 13.347826086956522, |
| "grad_norm": 6.77400016784668, |
| "learning_rate": 2.29796511627907e-05, |
| "loss": 0.2148, |
| "step": 240 |
| }, |
| { |
| "epoch": 13.927536231884059, |
| "grad_norm": 7.098783016204834, |
| "learning_rate": 2.2543604651162792e-05, |
| "loss": 0.2244, |
| "step": 250 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.8742857142857143, |
| "eval_loss": 0.27661752700805664, |
| "eval_runtime": 1.9403, |
| "eval_samples_per_second": 90.191, |
| "eval_steps_per_second": 5.669, |
| "step": 252 |
| }, |
| { |
| "epoch": 14.46376811594203, |
| "grad_norm": 8.359834671020508, |
| "learning_rate": 2.2107558139534885e-05, |
| "loss": 0.1859, |
| "step": 260 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 25.05173683166504, |
| "learning_rate": 2.1671511627906975e-05, |
| "loss": 0.2277, |
| "step": 270 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2637348771095276, |
| "eval_runtime": 2.2383, |
| "eval_samples_per_second": 78.185, |
| "eval_steps_per_second": 4.914, |
| "step": 270 |
| }, |
| { |
| "epoch": 15.579710144927537, |
| "grad_norm": 6.100148677825928, |
| "learning_rate": 2.123546511627907e-05, |
| "loss": 0.2318, |
| "step": 280 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.8114285714285714, |
| "eval_loss": 0.46118730306625366, |
| "eval_runtime": 2.6144, |
| "eval_samples_per_second": 66.937, |
| "eval_steps_per_second": 4.207, |
| "step": 288 |
| }, |
| { |
| "epoch": 16.115942028985508, |
| "grad_norm": 6.950288772583008, |
| "learning_rate": 2.0799418604651165e-05, |
| "loss": 0.1786, |
| "step": 290 |
| }, |
| { |
| "epoch": 16.695652173913043, |
| "grad_norm": 14.06019401550293, |
| "learning_rate": 2.0363372093023254e-05, |
| "loss": 0.1908, |
| "step": 300 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.31668975949287415, |
| "eval_runtime": 4.7899, |
| "eval_samples_per_second": 36.535, |
| "eval_steps_per_second": 2.297, |
| "step": 306 |
| }, |
| { |
| "epoch": 17.231884057971016, |
| "grad_norm": 14.938794136047363, |
| "learning_rate": 1.9927325581395348e-05, |
| "loss": 0.2174, |
| "step": 310 |
| }, |
| { |
| "epoch": 17.81159420289855, |
| "grad_norm": 10.554563522338867, |
| "learning_rate": 1.9491279069767444e-05, |
| "loss": 0.1932, |
| "step": 320 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.9028571428571428, |
| "eval_loss": 0.29490649700164795, |
| "eval_runtime": 1.9334, |
| "eval_samples_per_second": 90.514, |
| "eval_steps_per_second": 5.689, |
| "step": 324 |
| }, |
| { |
| "epoch": 18.347826086956523, |
| "grad_norm": 11.751145362854004, |
| "learning_rate": 1.9055232558139538e-05, |
| "loss": 0.1432, |
| "step": 330 |
| }, |
| { |
| "epoch": 18.92753623188406, |
| "grad_norm": 9.6201171875, |
| "learning_rate": 1.8619186046511627e-05, |
| "loss": 0.1676, |
| "step": 340 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.9085714285714286, |
| "eval_loss": 0.26273345947265625, |
| "eval_runtime": 2.1044, |
| "eval_samples_per_second": 83.159, |
| "eval_steps_per_second": 5.227, |
| "step": 342 |
| }, |
| { |
| "epoch": 19.463768115942027, |
| "grad_norm": 5.4566731452941895, |
| "learning_rate": 1.822674418604651e-05, |
| "loss": 0.1472, |
| "step": 350 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.2691696584224701, |
| "learning_rate": 1.7790697674418608e-05, |
| "loss": 0.1442, |
| "step": 360 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.2584407329559326, |
| "eval_runtime": 4.6506, |
| "eval_samples_per_second": 37.629, |
| "eval_steps_per_second": 2.365, |
| "step": 360 |
| }, |
| { |
| "epoch": 20.579710144927535, |
| "grad_norm": 14.349730491638184, |
| "learning_rate": 1.7354651162790697e-05, |
| "loss": 0.1606, |
| "step": 370 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.2625867426395416, |
| "eval_runtime": 2.0381, |
| "eval_samples_per_second": 85.864, |
| "eval_steps_per_second": 5.397, |
| "step": 378 |
| }, |
| { |
| "epoch": 21.115942028985508, |
| "grad_norm": 6.261329650878906, |
| "learning_rate": 1.691860465116279e-05, |
| "loss": 0.1291, |
| "step": 380 |
| }, |
| { |
| "epoch": 21.695652173913043, |
| "grad_norm": 11.669342994689941, |
| "learning_rate": 1.6482558139534884e-05, |
| "loss": 0.1624, |
| "step": 390 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.9257142857142857, |
| "eval_loss": 0.2351078987121582, |
| "eval_runtime": 2.7162, |
| "eval_samples_per_second": 64.427, |
| "eval_steps_per_second": 4.05, |
| "step": 396 |
| }, |
| { |
| "epoch": 22.231884057971016, |
| "grad_norm": 9.660877227783203, |
| "learning_rate": 1.6046511627906977e-05, |
| "loss": 0.1447, |
| "step": 400 |
| }, |
| { |
| "epoch": 22.81159420289855, |
| "grad_norm": 15.922120094299316, |
| "learning_rate": 1.561046511627907e-05, |
| "loss": 0.1735, |
| "step": 410 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.9257142857142857, |
| "eval_loss": 0.27460697293281555, |
| "eval_runtime": 2.0043, |
| "eval_samples_per_second": 87.311, |
| "eval_steps_per_second": 5.488, |
| "step": 414 |
| }, |
| { |
| "epoch": 23.347826086956523, |
| "grad_norm": 7.466541767120361, |
| "learning_rate": 1.5174418604651163e-05, |
| "loss": 0.0943, |
| "step": 420 |
| }, |
| { |
| "epoch": 23.92753623188406, |
| "grad_norm": 7.257338047027588, |
| "learning_rate": 1.4738372093023255e-05, |
| "loss": 0.1604, |
| "step": 430 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.8914285714285715, |
| "eval_loss": 0.3236704170703888, |
| "eval_runtime": 1.9055, |
| "eval_samples_per_second": 91.842, |
| "eval_steps_per_second": 5.773, |
| "step": 432 |
| }, |
| { |
| "epoch": 24.463768115942027, |
| "grad_norm": 8.80977725982666, |
| "learning_rate": 1.430232558139535e-05, |
| "loss": 0.1495, |
| "step": 440 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 1.1711186170578003, |
| "learning_rate": 1.3866279069767441e-05, |
| "loss": 0.122, |
| "step": 450 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.8914285714285715, |
| "eval_loss": 0.28520700335502625, |
| "eval_runtime": 1.9027, |
| "eval_samples_per_second": 91.974, |
| "eval_steps_per_second": 5.781, |
| "step": 450 |
| }, |
| { |
| "epoch": 25.579710144927535, |
| "grad_norm": 10.047913551330566, |
| "learning_rate": 1.3430232558139536e-05, |
| "loss": 0.1447, |
| "step": 460 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.92, |
| "eval_loss": 0.2593800127506256, |
| "eval_runtime": 1.9095, |
| "eval_samples_per_second": 91.646, |
| "eval_steps_per_second": 5.761, |
| "step": 468 |
| }, |
| { |
| "epoch": 26.115942028985508, |
| "grad_norm": 4.829050064086914, |
| "learning_rate": 1.2994186046511628e-05, |
| "loss": 0.1318, |
| "step": 470 |
| }, |
| { |
| "epoch": 26.695652173913043, |
| "grad_norm": 3.806666851043701, |
| "learning_rate": 1.2558139534883723e-05, |
| "loss": 0.1265, |
| "step": 480 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.9028571428571428, |
| "eval_loss": 0.28569361567497253, |
| "eval_runtime": 1.9576, |
| "eval_samples_per_second": 89.395, |
| "eval_steps_per_second": 5.619, |
| "step": 486 |
| }, |
| { |
| "epoch": 27.231884057971016, |
| "grad_norm": 13.416851043701172, |
| "learning_rate": 1.2122093023255814e-05, |
| "loss": 0.1198, |
| "step": 490 |
| }, |
| { |
| "epoch": 27.81159420289855, |
| "grad_norm": 16.207693099975586, |
| "learning_rate": 1.1686046511627907e-05, |
| "loss": 0.1265, |
| "step": 500 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.8742857142857143, |
| "eval_loss": 0.32380279898643494, |
| "eval_runtime": 2.636, |
| "eval_samples_per_second": 66.388, |
| "eval_steps_per_second": 4.173, |
| "step": 504 |
| }, |
| { |
| "epoch": 28.347826086956523, |
| "grad_norm": 6.766117572784424, |
| "learning_rate": 1.125e-05, |
| "loss": 0.0857, |
| "step": 510 |
| }, |
| { |
| "epoch": 28.92753623188406, |
| "grad_norm": 2.6189181804656982, |
| "learning_rate": 1.0813953488372092e-05, |
| "loss": 0.122, |
| "step": 520 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.30293118953704834, |
| "eval_runtime": 2.1444, |
| "eval_samples_per_second": 81.609, |
| "eval_steps_per_second": 5.13, |
| "step": 522 |
| }, |
| { |
| "epoch": 29.463768115942027, |
| "grad_norm": 7.471499443054199, |
| "learning_rate": 1.0377906976744187e-05, |
| "loss": 0.1182, |
| "step": 530 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.14097607135772705, |
| "learning_rate": 9.941860465116279e-06, |
| "loss": 0.0929, |
| "step": 540 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.9028571428571428, |
| "eval_loss": 0.2936091423034668, |
| "eval_runtime": 1.9297, |
| "eval_samples_per_second": 90.69, |
| "eval_steps_per_second": 5.7, |
| "step": 540 |
| }, |
| { |
| "epoch": 30.579710144927535, |
| "grad_norm": 9.875027656555176, |
| "learning_rate": 9.505813953488372e-06, |
| "loss": 0.1276, |
| "step": 550 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.2777470052242279, |
| "eval_runtime": 1.9462, |
| "eval_samples_per_second": 89.918, |
| "eval_steps_per_second": 5.652, |
| "step": 558 |
| }, |
| { |
| "epoch": 31.115942028985508, |
| "grad_norm": 10.335461616516113, |
| "learning_rate": 9.069767441860465e-06, |
| "loss": 0.1278, |
| "step": 560 |
| }, |
| { |
| "epoch": 31.695652173913043, |
| "grad_norm": 5.649544715881348, |
| "learning_rate": 8.633720930232558e-06, |
| "loss": 0.1118, |
| "step": 570 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.2812146544456482, |
| "eval_runtime": 1.9255, |
| "eval_samples_per_second": 90.885, |
| "eval_steps_per_second": 5.713, |
| "step": 576 |
| }, |
| { |
| "epoch": 32.231884057971016, |
| "grad_norm": 3.817037582397461, |
| "learning_rate": 8.197674418604652e-06, |
| "loss": 0.1021, |
| "step": 580 |
| }, |
| { |
| "epoch": 32.81159420289855, |
| "grad_norm": 8.079113960266113, |
| "learning_rate": 7.761627906976745e-06, |
| "loss": 0.1058, |
| "step": 590 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_accuracy": 0.92, |
| "eval_loss": 0.2924804389476776, |
| "eval_runtime": 1.9242, |
| "eval_samples_per_second": 90.947, |
| "eval_steps_per_second": 5.717, |
| "step": 594 |
| }, |
| { |
| "epoch": 33.34782608695652, |
| "grad_norm": 7.013218402862549, |
| "learning_rate": 7.325581395348837e-06, |
| "loss": 0.0822, |
| "step": 600 |
| }, |
| { |
| "epoch": 33.927536231884055, |
| "grad_norm": 5.116663455963135, |
| "learning_rate": 6.88953488372093e-06, |
| "loss": 0.0824, |
| "step": 610 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_accuracy": 0.8914285714285715, |
| "eval_loss": 0.35194164514541626, |
| "eval_runtime": 2.0353, |
| "eval_samples_per_second": 85.982, |
| "eval_steps_per_second": 5.405, |
| "step": 612 |
| }, |
| { |
| "epoch": 34.46376811594203, |
| "grad_norm": 7.844626426696777, |
| "learning_rate": 6.453488372093024e-06, |
| "loss": 0.0959, |
| "step": 620 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 0.001561504672281444, |
| "learning_rate": 6.017441860465116e-06, |
| "loss": 0.1084, |
| "step": 630 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_accuracy": 0.92, |
| "eval_loss": 0.28469112515449524, |
| "eval_runtime": 2.7076, |
| "eval_samples_per_second": 64.634, |
| "eval_steps_per_second": 4.063, |
| "step": 630 |
| }, |
| { |
| "epoch": 35.57971014492754, |
| "grad_norm": 6.455016136169434, |
| "learning_rate": 5.581395348837209e-06, |
| "loss": 0.1074, |
| "step": 640 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.27351856231689453, |
| "eval_runtime": 1.9489, |
| "eval_samples_per_second": 89.796, |
| "eval_steps_per_second": 5.644, |
| "step": 648 |
| }, |
| { |
| "epoch": 36.11594202898551, |
| "grad_norm": 4.332115650177002, |
| "learning_rate": 5.145348837209302e-06, |
| "loss": 0.0896, |
| "step": 650 |
| }, |
| { |
| "epoch": 36.69565217391305, |
| "grad_norm": 17.33997917175293, |
| "learning_rate": 4.709302325581396e-06, |
| "loss": 0.1415, |
| "step": 660 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_accuracy": 0.9257142857142857, |
| "eval_loss": 0.27236348390579224, |
| "eval_runtime": 1.9276, |
| "eval_samples_per_second": 90.787, |
| "eval_steps_per_second": 5.707, |
| "step": 666 |
| }, |
| { |
| "epoch": 37.231884057971016, |
| "grad_norm": 5.769250392913818, |
| "learning_rate": 4.273255813953489e-06, |
| "loss": 0.0783, |
| "step": 670 |
| }, |
| { |
| "epoch": 37.81159420289855, |
| "grad_norm": 8.946736335754395, |
| "learning_rate": 3.837209302325582e-06, |
| "loss": 0.0702, |
| "step": 680 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_accuracy": 0.92, |
| "eval_loss": 0.28733909130096436, |
| "eval_runtime": 1.9313, |
| "eval_samples_per_second": 90.614, |
| "eval_steps_per_second": 5.696, |
| "step": 684 |
| }, |
| { |
| "epoch": 38.34782608695652, |
| "grad_norm": 3.792422294616699, |
| "learning_rate": 3.4011627906976744e-06, |
| "loss": 0.0708, |
| "step": 690 |
| }, |
| { |
| "epoch": 38.927536231884055, |
| "grad_norm": 10.4917573928833, |
| "learning_rate": 2.965116279069767e-06, |
| "loss": 0.0987, |
| "step": 700 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_accuracy": 0.92, |
| "eval_loss": 0.292362242937088, |
| "eval_runtime": 1.9062, |
| "eval_samples_per_second": 91.808, |
| "eval_steps_per_second": 5.771, |
| "step": 702 |
| }, |
| { |
| "epoch": 39.46376811594203, |
| "grad_norm": 5.309926509857178, |
| "learning_rate": 2.5290697674418604e-06, |
| "loss": 0.0749, |
| "step": 710 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 0.004375269636511803, |
| "learning_rate": 2.0930232558139536e-06, |
| "loss": 0.0637, |
| "step": 720 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_accuracy": 0.9314285714285714, |
| "eval_loss": 0.2867558002471924, |
| "eval_runtime": 1.9415, |
| "eval_samples_per_second": 90.138, |
| "eval_steps_per_second": 5.666, |
| "step": 720 |
| }, |
| { |
| "epoch": 40.57971014492754, |
| "grad_norm": 8.937047004699707, |
| "learning_rate": 1.6569767441860467e-06, |
| "loss": 0.1183, |
| "step": 730 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_accuracy": 0.92, |
| "eval_loss": 0.28917399048805237, |
| "eval_runtime": 1.9784, |
| "eval_samples_per_second": 88.457, |
| "eval_steps_per_second": 5.56, |
| "step": 738 |
| }, |
| { |
| "epoch": 41.11594202898551, |
| "grad_norm": 2.839174747467041, |
| "learning_rate": 1.2209302325581397e-06, |
| "loss": 0.0732, |
| "step": 740 |
| }, |
| { |
| "epoch": 41.69565217391305, |
| "grad_norm": 9.90665054321289, |
| "learning_rate": 7.848837209302327e-07, |
| "loss": 0.096, |
| "step": 750 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.29103556275367737, |
| "eval_runtime": 2.0391, |
| "eval_samples_per_second": 85.824, |
| "eval_steps_per_second": 5.395, |
| "step": 756 |
| }, |
| { |
| "epoch": 42.231884057971016, |
| "grad_norm": 10.19823932647705, |
| "learning_rate": 3.4883720930232557e-07, |
| "loss": 0.0719, |
| "step": 760 |
| }, |
| { |
| "epoch": 42.52173913043478, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.28974097967147827, |
| "eval_runtime": 2.2909, |
| "eval_samples_per_second": 76.39, |
| "eval_steps_per_second": 4.802, |
| "step": 765 |
| }, |
| { |
| "epoch": 42.52173913043478, |
| "step": 765, |
| "total_flos": 1.5068369042520146e+18, |
| "train_loss": 0.22573306322876924, |
| "train_runtime": 1056.0428, |
| "train_samples_per_second": 46.404, |
| "train_steps_per_second": 0.724 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 765, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 45, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5068369042520146e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|