| { | |
| "best_metric": 0.9779411764705882, | |
| "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-papsmear\\checkpoint-2448", | |
| "epoch": 99.34640522875817, | |
| "eval_steps": 500, | |
| "global_step": 3800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.26143790849673204, | |
| "grad_norm": 19.404264450073242, | |
| "learning_rate": 1.3157894736842106e-06, | |
| "loss": 1.8243, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.5228758169934641, | |
| "grad_norm": 9.874568939208984, | |
| "learning_rate": 2.631578947368421e-06, | |
| "loss": 1.7542, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 13.61699390411377, | |
| "learning_rate": 3.9473684210526315e-06, | |
| "loss": 1.7081, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.9934640522875817, | |
| "eval_accuracy": 0.2867647058823529, | |
| "eval_loss": 1.6642274856567383, | |
| "eval_runtime": 19.1091, | |
| "eval_samples_per_second": 7.117, | |
| "eval_steps_per_second": 0.89, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 1.0457516339869282, | |
| "grad_norm": 17.95810317993164, | |
| "learning_rate": 5.263157894736842e-06, | |
| "loss": 1.6316, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.3071895424836601, | |
| "grad_norm": 11.760519027709961, | |
| "learning_rate": 6.578947368421053e-06, | |
| "loss": 1.6191, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.5686274509803921, | |
| "grad_norm": 12.139671325683594, | |
| "learning_rate": 7.894736842105263e-06, | |
| "loss": 1.514, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.8300653594771243, | |
| "grad_norm": 11.897443771362305, | |
| "learning_rate": 9.210526315789474e-06, | |
| "loss": 1.4025, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.9869281045751634, | |
| "eval_accuracy": 0.4632352941176471, | |
| "eval_loss": 1.3760590553283691, | |
| "eval_runtime": 16.8545, | |
| "eval_samples_per_second": 8.069, | |
| "eval_steps_per_second": 1.009, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 2.0915032679738563, | |
| "grad_norm": 14.211647987365723, | |
| "learning_rate": 1.0526315789473684e-05, | |
| "loss": 1.341, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 21.328588485717773, | |
| "learning_rate": 1.1842105263157895e-05, | |
| "loss": 1.2617, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.6143790849673203, | |
| "grad_norm": 24.131996154785156, | |
| "learning_rate": 1.3157894736842106e-05, | |
| "loss": 1.1608, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.8758169934640523, | |
| "grad_norm": 23.461227416992188, | |
| "learning_rate": 1.4473684210526317e-05, | |
| "loss": 1.0918, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.980392156862745, | |
| "eval_accuracy": 0.5514705882352942, | |
| "eval_loss": 1.0276451110839844, | |
| "eval_runtime": 17.5433, | |
| "eval_samples_per_second": 7.752, | |
| "eval_steps_per_second": 0.969, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 3.1372549019607843, | |
| "grad_norm": 44.0300407409668, | |
| "learning_rate": 1.5789473684210526e-05, | |
| "loss": 0.9044, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.3986928104575163, | |
| "grad_norm": 23.61319923400879, | |
| "learning_rate": 1.7105263157894737e-05, | |
| "loss": 0.9409, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.6601307189542482, | |
| "grad_norm": 27.572128295898438, | |
| "learning_rate": 1.8421052631578947e-05, | |
| "loss": 0.9152, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.9215686274509802, | |
| "grad_norm": 20.785051345825195, | |
| "learning_rate": 1.9736842105263158e-05, | |
| "loss": 0.8051, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6691176470588235, | |
| "eval_loss": 0.7678546905517578, | |
| "eval_runtime": 17.2269, | |
| "eval_samples_per_second": 7.895, | |
| "eval_steps_per_second": 0.987, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 4.183006535947713, | |
| "grad_norm": 32.00216293334961, | |
| "learning_rate": 2.105263157894737e-05, | |
| "loss": 0.7821, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 4.444444444444445, | |
| "grad_norm": 23.564285278320312, | |
| "learning_rate": 2.236842105263158e-05, | |
| "loss": 0.8036, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 4.705882352941177, | |
| "grad_norm": 21.403562545776367, | |
| "learning_rate": 2.368421052631579e-05, | |
| "loss": 0.7355, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 4.967320261437909, | |
| "grad_norm": 31.243640899658203, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.635, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.993464052287582, | |
| "eval_accuracy": 0.7867647058823529, | |
| "eval_loss": 0.5927847623825073, | |
| "eval_runtime": 17.4003, | |
| "eval_samples_per_second": 7.816, | |
| "eval_steps_per_second": 0.977, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 5.228758169934641, | |
| "grad_norm": 23.90205192565918, | |
| "learning_rate": 2.6315789473684212e-05, | |
| "loss": 0.6363, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 5.490196078431373, | |
| "grad_norm": 23.38309669494629, | |
| "learning_rate": 2.7631578947368426e-05, | |
| "loss": 0.6285, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 5.751633986928105, | |
| "grad_norm": 41.387149810791016, | |
| "learning_rate": 2.8947368421052634e-05, | |
| "loss": 0.6051, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 5.9869281045751634, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 0.695731520652771, | |
| "eval_runtime": 17.5363, | |
| "eval_samples_per_second": 7.755, | |
| "eval_steps_per_second": 0.969, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 6.0130718954248366, | |
| "grad_norm": 33.84821319580078, | |
| "learning_rate": 3.0263157894736844e-05, | |
| "loss": 0.6503, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 6.2745098039215685, | |
| "grad_norm": 18.2890682220459, | |
| "learning_rate": 3.157894736842105e-05, | |
| "loss": 0.4905, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 6.5359477124183005, | |
| "grad_norm": 25.626060485839844, | |
| "learning_rate": 3.289473684210527e-05, | |
| "loss": 0.5262, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 6.7973856209150325, | |
| "grad_norm": 28.431270599365234, | |
| "learning_rate": 3.421052631578947e-05, | |
| "loss": 0.5539, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 6.980392156862745, | |
| "eval_accuracy": 0.7941176470588235, | |
| "eval_loss": 0.5016477108001709, | |
| "eval_runtime": 17.3512, | |
| "eval_samples_per_second": 7.838, | |
| "eval_steps_per_second": 0.98, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 7.0588235294117645, | |
| "grad_norm": 21.074764251708984, | |
| "learning_rate": 3.5526315789473684e-05, | |
| "loss": 0.4807, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 7.3202614379084965, | |
| "grad_norm": 21.632251739501953, | |
| "learning_rate": 3.6842105263157895e-05, | |
| "loss": 0.4704, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 7.5816993464052285, | |
| "grad_norm": 41.86575698852539, | |
| "learning_rate": 3.815789473684211e-05, | |
| "loss": 0.5141, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 7.8431372549019605, | |
| "grad_norm": 20.23293685913086, | |
| "learning_rate": 3.9473684210526316e-05, | |
| "loss": 0.4683, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8235294117647058, | |
| "eval_loss": 0.4732811748981476, | |
| "eval_runtime": 17.0473, | |
| "eval_samples_per_second": 7.978, | |
| "eval_steps_per_second": 0.997, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 8.104575163398692, | |
| "grad_norm": 67.42210388183594, | |
| "learning_rate": 4.078947368421053e-05, | |
| "loss": 0.451, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 8.366013071895425, | |
| "grad_norm": 22.807098388671875, | |
| "learning_rate": 4.210526315789474e-05, | |
| "loss": 0.4019, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 8.627450980392156, | |
| "grad_norm": 31.961091995239258, | |
| "learning_rate": 4.342105263157895e-05, | |
| "loss": 0.4663, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 8.88888888888889, | |
| "grad_norm": 26.965513229370117, | |
| "learning_rate": 4.473684210526316e-05, | |
| "loss": 0.4153, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 8.993464052287582, | |
| "eval_accuracy": 0.8529411764705882, | |
| "eval_loss": 0.4834950268268585, | |
| "eval_runtime": 16.944, | |
| "eval_samples_per_second": 8.026, | |
| "eval_steps_per_second": 1.003, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 9.15032679738562, | |
| "grad_norm": 21.733226776123047, | |
| "learning_rate": 4.605263157894737e-05, | |
| "loss": 0.473, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 9.411764705882353, | |
| "grad_norm": 17.1552734375, | |
| "learning_rate": 4.736842105263158e-05, | |
| "loss": 0.3912, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 9.673202614379084, | |
| "grad_norm": 39.66945266723633, | |
| "learning_rate": 4.868421052631579e-05, | |
| "loss": 0.465, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 9.934640522875817, | |
| "grad_norm": 24.060779571533203, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3954, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 9.986928104575163, | |
| "eval_accuracy": 0.8308823529411765, | |
| "eval_loss": 0.5431119203567505, | |
| "eval_runtime": 16.9702, | |
| "eval_samples_per_second": 8.014, | |
| "eval_steps_per_second": 1.002, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 10.196078431372548, | |
| "grad_norm": 22.754186630249023, | |
| "learning_rate": 4.985380116959065e-05, | |
| "loss": 0.309, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 10.457516339869281, | |
| "grad_norm": 25.09243392944336, | |
| "learning_rate": 4.970760233918128e-05, | |
| "loss": 0.2985, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 10.718954248366012, | |
| "grad_norm": 32.95780563354492, | |
| "learning_rate": 4.956140350877193e-05, | |
| "loss": 0.3551, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 10.980392156862745, | |
| "grad_norm": 24.594146728515625, | |
| "learning_rate": 4.941520467836258e-05, | |
| "loss": 0.3524, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 10.980392156862745, | |
| "eval_accuracy": 0.8235294117647058, | |
| "eval_loss": 0.4060741364955902, | |
| "eval_runtime": 16.9787, | |
| "eval_samples_per_second": 8.01, | |
| "eval_steps_per_second": 1.001, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 11.241830065359476, | |
| "grad_norm": 34.58118438720703, | |
| "learning_rate": 4.926900584795322e-05, | |
| "loss": 0.3015, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 11.50326797385621, | |
| "grad_norm": 17.467493057250977, | |
| "learning_rate": 4.912280701754386e-05, | |
| "loss": 0.332, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 11.764705882352942, | |
| "grad_norm": 11.450825691223145, | |
| "learning_rate": 4.8976608187134504e-05, | |
| "loss": 0.3546, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8382352941176471, | |
| "eval_loss": 0.4924784302711487, | |
| "eval_runtime": 17.0509, | |
| "eval_samples_per_second": 7.976, | |
| "eval_steps_per_second": 0.997, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 12.026143790849673, | |
| "grad_norm": 22.95159912109375, | |
| "learning_rate": 4.883040935672515e-05, | |
| "loss": 0.3362, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 12.287581699346406, | |
| "grad_norm": 15.78369140625, | |
| "learning_rate": 4.868421052631579e-05, | |
| "loss": 0.2589, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 12.549019607843137, | |
| "grad_norm": 18.571977615356445, | |
| "learning_rate": 4.853801169590643e-05, | |
| "loss": 0.2588, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 12.81045751633987, | |
| "grad_norm": 10.237850189208984, | |
| "learning_rate": 4.839181286549708e-05, | |
| "loss": 0.2922, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 12.993464052287582, | |
| "eval_accuracy": 0.875, | |
| "eval_loss": 0.36371880769729614, | |
| "eval_runtime": 16.7827, | |
| "eval_samples_per_second": 8.104, | |
| "eval_steps_per_second": 1.013, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 13.071895424836601, | |
| "grad_norm": 14.183631896972656, | |
| "learning_rate": 4.824561403508772e-05, | |
| "loss": 0.2683, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 13.333333333333334, | |
| "grad_norm": 15.362314224243164, | |
| "learning_rate": 4.8099415204678366e-05, | |
| "loss": 0.2178, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 13.594771241830065, | |
| "grad_norm": 31.49340057373047, | |
| "learning_rate": 4.7953216374269006e-05, | |
| "loss": 0.2095, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 13.856209150326798, | |
| "grad_norm": 39.85598373413086, | |
| "learning_rate": 4.780701754385965e-05, | |
| "loss": 0.2342, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 13.986928104575163, | |
| "eval_accuracy": 0.8970588235294118, | |
| "eval_loss": 0.32859814167022705, | |
| "eval_runtime": 16.8467, | |
| "eval_samples_per_second": 8.073, | |
| "eval_steps_per_second": 1.009, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 14.117647058823529, | |
| "grad_norm": 22.395517349243164, | |
| "learning_rate": 4.7660818713450294e-05, | |
| "loss": 0.2927, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 14.379084967320262, | |
| "grad_norm": 15.716471672058105, | |
| "learning_rate": 4.751461988304094e-05, | |
| "loss": 0.2419, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 14.640522875816993, | |
| "grad_norm": 13.827138900756836, | |
| "learning_rate": 4.736842105263158e-05, | |
| "loss": 0.2215, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 14.901960784313726, | |
| "grad_norm": 8.343385696411133, | |
| "learning_rate": 4.722222222222222e-05, | |
| "loss": 0.2083, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 14.980392156862745, | |
| "eval_accuracy": 0.8823529411764706, | |
| "eval_loss": 0.327125608921051, | |
| "eval_runtime": 17.1905, | |
| "eval_samples_per_second": 7.911, | |
| "eval_steps_per_second": 0.989, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 15.163398692810457, | |
| "grad_norm": 27.369592666625977, | |
| "learning_rate": 4.707602339181287e-05, | |
| "loss": 0.1837, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 15.42483660130719, | |
| "grad_norm": 4.707042217254639, | |
| "learning_rate": 4.6929824561403515e-05, | |
| "loss": 0.1872, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 15.686274509803921, | |
| "grad_norm": 19.026412963867188, | |
| "learning_rate": 4.678362573099415e-05, | |
| "loss": 0.2063, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 15.947712418300654, | |
| "grad_norm": 39.22539138793945, | |
| "learning_rate": 4.6637426900584796e-05, | |
| "loss": 0.2704, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8823529411764706, | |
| "eval_loss": 0.3700261414051056, | |
| "eval_runtime": 17.2498, | |
| "eval_samples_per_second": 7.884, | |
| "eval_steps_per_second": 0.986, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 16.209150326797385, | |
| "grad_norm": 4.610194683074951, | |
| "learning_rate": 4.649122807017544e-05, | |
| "loss": 0.1895, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 16.470588235294116, | |
| "grad_norm": 27.570838928222656, | |
| "learning_rate": 4.634502923976608e-05, | |
| "loss": 0.1492, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 16.73202614379085, | |
| "grad_norm": 13.742429733276367, | |
| "learning_rate": 4.619883040935672e-05, | |
| "loss": 0.1698, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 16.99346405228758, | |
| "grad_norm": 16.786169052124023, | |
| "learning_rate": 4.605263157894737e-05, | |
| "loss": 0.1871, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 16.99346405228758, | |
| "eval_accuracy": 0.8970588235294118, | |
| "eval_loss": 0.34471678733825684, | |
| "eval_runtime": 16.7473, | |
| "eval_samples_per_second": 8.121, | |
| "eval_steps_per_second": 1.015, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 17.254901960784313, | |
| "grad_norm": 15.884855270385742, | |
| "learning_rate": 4.590643274853802e-05, | |
| "loss": 0.1335, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 17.516339869281047, | |
| "grad_norm": 17.3248348236084, | |
| "learning_rate": 4.576023391812866e-05, | |
| "loss": 0.1399, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 17.77777777777778, | |
| "grad_norm": 16.090543746948242, | |
| "learning_rate": 4.56140350877193e-05, | |
| "loss": 0.226, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 17.986928104575163, | |
| "eval_accuracy": 0.8602941176470589, | |
| "eval_loss": 0.4279506206512451, | |
| "eval_runtime": 16.8179, | |
| "eval_samples_per_second": 8.087, | |
| "eval_steps_per_second": 1.011, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 18.03921568627451, | |
| "grad_norm": 17.314950942993164, | |
| "learning_rate": 4.5467836257309945e-05, | |
| "loss": 0.2657, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 18.30065359477124, | |
| "grad_norm": 26.111413955688477, | |
| "learning_rate": 4.5321637426900585e-05, | |
| "loss": 0.1238, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 18.562091503267975, | |
| "grad_norm": 34.5568962097168, | |
| "learning_rate": 4.517543859649123e-05, | |
| "loss": 0.3426, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 18.823529411764707, | |
| "grad_norm": 27.506118774414062, | |
| "learning_rate": 4.502923976608187e-05, | |
| "loss": 0.245, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 18.980392156862745, | |
| "eval_accuracy": 0.8088235294117647, | |
| "eval_loss": 0.6445416212081909, | |
| "eval_runtime": 16.6042, | |
| "eval_samples_per_second": 8.191, | |
| "eval_steps_per_second": 1.024, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 19.084967320261438, | |
| "grad_norm": 8.742308616638184, | |
| "learning_rate": 4.488304093567251e-05, | |
| "loss": 0.1876, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 19.34640522875817, | |
| "grad_norm": 37.74170684814453, | |
| "learning_rate": 4.473684210526316e-05, | |
| "loss": 0.1044, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 19.607843137254903, | |
| "grad_norm": 17.85502815246582, | |
| "learning_rate": 4.4590643274853806e-05, | |
| "loss": 0.1637, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 19.869281045751634, | |
| "grad_norm": 13.413275718688965, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.1545, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8602941176470589, | |
| "eval_loss": 0.41802164912223816, | |
| "eval_runtime": 16.9375, | |
| "eval_samples_per_second": 8.03, | |
| "eval_steps_per_second": 1.004, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 20.130718954248366, | |
| "grad_norm": 24.223968505859375, | |
| "learning_rate": 4.429824561403509e-05, | |
| "loss": 0.1333, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 20.392156862745097, | |
| "grad_norm": 22.863794326782227, | |
| "learning_rate": 4.4152046783625734e-05, | |
| "loss": 0.1223, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 20.65359477124183, | |
| "grad_norm": 20.22460174560547, | |
| "learning_rate": 4.400584795321638e-05, | |
| "loss": 0.1906, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 20.915032679738562, | |
| "grad_norm": 6.557627201080322, | |
| "learning_rate": 4.3859649122807014e-05, | |
| "loss": 0.0981, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 20.99346405228758, | |
| "eval_accuracy": 0.9044117647058824, | |
| "eval_loss": 0.32080766558647156, | |
| "eval_runtime": 17.4044, | |
| "eval_samples_per_second": 7.814, | |
| "eval_steps_per_second": 0.977, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 21.176470588235293, | |
| "grad_norm": 11.885444641113281, | |
| "learning_rate": 4.371345029239766e-05, | |
| "loss": 0.1654, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 21.437908496732025, | |
| "grad_norm": 16.748071670532227, | |
| "learning_rate": 4.356725146198831e-05, | |
| "loss": 0.1706, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 21.69934640522876, | |
| "grad_norm": 25.410442352294922, | |
| "learning_rate": 4.342105263157895e-05, | |
| "loss": 0.1121, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 21.96078431372549, | |
| "grad_norm": 24.631742477416992, | |
| "learning_rate": 4.327485380116959e-05, | |
| "loss": 0.1455, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 21.986928104575163, | |
| "eval_accuracy": 0.8602941176470589, | |
| "eval_loss": 0.425643652677536, | |
| "eval_runtime": 20.0595, | |
| "eval_samples_per_second": 6.78, | |
| "eval_steps_per_second": 0.847, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 22.22222222222222, | |
| "grad_norm": 9.926827430725098, | |
| "learning_rate": 4.3128654970760236e-05, | |
| "loss": 0.144, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 22.483660130718953, | |
| "grad_norm": 32.22057342529297, | |
| "learning_rate": 4.298245614035088e-05, | |
| "loss": 0.1328, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 22.745098039215687, | |
| "grad_norm": 6.770218849182129, | |
| "learning_rate": 4.283625730994152e-05, | |
| "loss": 0.2405, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 22.980392156862745, | |
| "eval_accuracy": 0.8970588235294118, | |
| "eval_loss": 0.34735360741615295, | |
| "eval_runtime": 36.4621, | |
| "eval_samples_per_second": 3.73, | |
| "eval_steps_per_second": 0.466, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 23.00653594771242, | |
| "grad_norm": 18.301342010498047, | |
| "learning_rate": 4.269005847953216e-05, | |
| "loss": 0.1407, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 23.26797385620915, | |
| "grad_norm": 25.70302963256836, | |
| "learning_rate": 4.254385964912281e-05, | |
| "loss": 0.1403, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 23.529411764705884, | |
| "grad_norm": 6.829775333404541, | |
| "learning_rate": 4.239766081871345e-05, | |
| "loss": 0.1278, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 23.790849673202615, | |
| "grad_norm": 15.183685302734375, | |
| "learning_rate": 4.22514619883041e-05, | |
| "loss": 0.1549, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.9044117647058824, | |
| "eval_loss": 0.39403286576271057, | |
| "eval_runtime": 30.2513, | |
| "eval_samples_per_second": 4.496, | |
| "eval_steps_per_second": 0.562, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 24.052287581699346, | |
| "grad_norm": 76.56197357177734, | |
| "learning_rate": 4.210526315789474e-05, | |
| "loss": 0.2019, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 24.313725490196077, | |
| "grad_norm": 10.338065147399902, | |
| "learning_rate": 4.195906432748538e-05, | |
| "loss": 0.1341, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 24.575163398692812, | |
| "grad_norm": 10.710972785949707, | |
| "learning_rate": 4.1812865497076025e-05, | |
| "loss": 0.1207, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 24.836601307189543, | |
| "grad_norm": 19.086135864257812, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.1721, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 24.99346405228758, | |
| "eval_accuracy": 0.8823529411764706, | |
| "eval_loss": 0.4279385805130005, | |
| "eval_runtime": 29.9969, | |
| "eval_samples_per_second": 4.534, | |
| "eval_steps_per_second": 0.567, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 25.098039215686274, | |
| "grad_norm": 6.991425514221191, | |
| "learning_rate": 4.152046783625731e-05, | |
| "loss": 0.0729, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 25.359477124183005, | |
| "grad_norm": 8.979483604431152, | |
| "learning_rate": 4.137426900584795e-05, | |
| "loss": 0.1826, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 25.62091503267974, | |
| "grad_norm": 11.570904731750488, | |
| "learning_rate": 4.12280701754386e-05, | |
| "loss": 0.1492, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 25.88235294117647, | |
| "grad_norm": 14.8778076171875, | |
| "learning_rate": 4.1081871345029247e-05, | |
| "loss": 0.1378, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 25.986928104575163, | |
| "eval_accuracy": 0.9044117647058824, | |
| "eval_loss": 0.387086421251297, | |
| "eval_runtime": 29.0075, | |
| "eval_samples_per_second": 4.688, | |
| "eval_steps_per_second": 0.586, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 26.143790849673202, | |
| "grad_norm": 11.985469818115234, | |
| "learning_rate": 4.093567251461988e-05, | |
| "loss": 0.1122, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 26.405228758169933, | |
| "grad_norm": 22.02225685119629, | |
| "learning_rate": 4.078947368421053e-05, | |
| "loss": 0.1172, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 26.666666666666668, | |
| "grad_norm": 1.2671743631362915, | |
| "learning_rate": 4.0643274853801174e-05, | |
| "loss": 0.0891, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 26.9281045751634, | |
| "grad_norm": 10.896835327148438, | |
| "learning_rate": 4.0497076023391814e-05, | |
| "loss": 0.0924, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 26.980392156862745, | |
| "eval_accuracy": 0.8455882352941176, | |
| "eval_loss": 0.7301138639450073, | |
| "eval_runtime": 28.9067, | |
| "eval_samples_per_second": 4.705, | |
| "eval_steps_per_second": 0.588, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 27.18954248366013, | |
| "grad_norm": 7.8527960777282715, | |
| "learning_rate": 4.0350877192982455e-05, | |
| "loss": 0.1348, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 27.45098039215686, | |
| "grad_norm": 2.1555140018463135, | |
| "learning_rate": 4.02046783625731e-05, | |
| "loss": 0.0675, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 27.712418300653596, | |
| "grad_norm": 7.751283645629883, | |
| "learning_rate": 4.005847953216375e-05, | |
| "loss": 0.0916, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 27.973856209150327, | |
| "grad_norm": 33.804786682128906, | |
| "learning_rate": 3.991228070175439e-05, | |
| "loss": 0.1325, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.9044117647058824, | |
| "eval_loss": 0.3712061643600464, | |
| "eval_runtime": 28.0451, | |
| "eval_samples_per_second": 4.849, | |
| "eval_steps_per_second": 0.606, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 28.235294117647058, | |
| "grad_norm": 7.706085205078125, | |
| "learning_rate": 3.976608187134503e-05, | |
| "loss": 0.0879, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 28.49673202614379, | |
| "grad_norm": 4.338534355163574, | |
| "learning_rate": 3.9619883040935676e-05, | |
| "loss": 0.1017, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 28.758169934640524, | |
| "grad_norm": 9.544697761535645, | |
| "learning_rate": 3.9473684210526316e-05, | |
| "loss": 0.1426, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 28.99346405228758, | |
| "eval_accuracy": 0.8602941176470589, | |
| "eval_loss": 0.440034419298172, | |
| "eval_runtime": 30.1321, | |
| "eval_samples_per_second": 4.513, | |
| "eval_steps_per_second": 0.564, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 29.019607843137255, | |
| "grad_norm": 0.3841346502304077, | |
| "learning_rate": 3.932748538011696e-05, | |
| "loss": 0.0981, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 29.281045751633986, | |
| "grad_norm": 9.533553123474121, | |
| "learning_rate": 3.9181286549707604e-05, | |
| "loss": 0.0926, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 29.54248366013072, | |
| "grad_norm": 26.160850524902344, | |
| "learning_rate": 3.9035087719298244e-05, | |
| "loss": 0.083, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 29.80392156862745, | |
| "grad_norm": 18.309621810913086, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.0866, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 29.986928104575163, | |
| "eval_accuracy": 0.9411764705882353, | |
| "eval_loss": 0.27793076634407043, | |
| "eval_runtime": 29.3246, | |
| "eval_samples_per_second": 4.638, | |
| "eval_steps_per_second": 0.58, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 30.065359477124183, | |
| "grad_norm": 24.974849700927734, | |
| "learning_rate": 3.874269005847954e-05, | |
| "loss": 0.11, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 30.326797385620914, | |
| "grad_norm": 3.7421281337738037, | |
| "learning_rate": 3.859649122807018e-05, | |
| "loss": 0.0712, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 30.58823529411765, | |
| "grad_norm": 10.041555404663086, | |
| "learning_rate": 3.845029239766082e-05, | |
| "loss": 0.0702, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 30.84967320261438, | |
| "grad_norm": 37.238948822021484, | |
| "learning_rate": 3.8304093567251465e-05, | |
| "loss": 0.0659, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 30.980392156862745, | |
| "eval_accuracy": 0.9411764705882353, | |
| "eval_loss": 0.3207360804080963, | |
| "eval_runtime": 34.3274, | |
| "eval_samples_per_second": 3.962, | |
| "eval_steps_per_second": 0.495, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 31.11111111111111, | |
| "grad_norm": 13.073234558105469, | |
| "learning_rate": 3.815789473684211e-05, | |
| "loss": 0.0547, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 31.372549019607842, | |
| "grad_norm": 3.1763381958007812, | |
| "learning_rate": 3.8011695906432746e-05, | |
| "loss": 0.0727, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 31.633986928104576, | |
| "grad_norm": 1.5747133493423462, | |
| "learning_rate": 3.786549707602339e-05, | |
| "loss": 0.1023, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 31.895424836601308, | |
| "grad_norm": 12.335155487060547, | |
| "learning_rate": 3.771929824561404e-05, | |
| "loss": 0.1175, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.9044117647058824, | |
| "eval_loss": 0.43389689922332764, | |
| "eval_runtime": 32.183, | |
| "eval_samples_per_second": 4.226, | |
| "eval_steps_per_second": 0.528, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 32.15686274509804, | |
| "grad_norm": 2.676323413848877, | |
| "learning_rate": 3.757309941520468e-05, | |
| "loss": 0.129, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 32.41830065359477, | |
| "grad_norm": 0.5916957259178162, | |
| "learning_rate": 3.742690058479532e-05, | |
| "loss": 0.0585, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 32.6797385620915, | |
| "grad_norm": 11.02872085571289, | |
| "learning_rate": 3.728070175438597e-05, | |
| "loss": 0.045, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 32.94117647058823, | |
| "grad_norm": 44.40802001953125, | |
| "learning_rate": 3.713450292397661e-05, | |
| "loss": 0.0455, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 32.99346405228758, | |
| "eval_accuracy": 0.9264705882352942, | |
| "eval_loss": 0.4536753296852112, | |
| "eval_runtime": 32.0477, | |
| "eval_samples_per_second": 4.244, | |
| "eval_steps_per_second": 0.53, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 33.20261437908497, | |
| "grad_norm": 0.4168817400932312, | |
| "learning_rate": 3.6988304093567254e-05, | |
| "loss": 0.0625, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 33.4640522875817, | |
| "grad_norm": 7.689728260040283, | |
| "learning_rate": 3.6842105263157895e-05, | |
| "loss": 0.1613, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 33.72549019607843, | |
| "grad_norm": 9.364749908447266, | |
| "learning_rate": 3.669590643274854e-05, | |
| "loss": 0.1001, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 33.98692810457516, | |
| "grad_norm": 14.09304428100586, | |
| "learning_rate": 3.654970760233918e-05, | |
| "loss": 0.1006, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 33.98692810457516, | |
| "eval_accuracy": 0.875, | |
| "eval_loss": 0.6521199345588684, | |
| "eval_runtime": 33.7228, | |
| "eval_samples_per_second": 4.033, | |
| "eval_steps_per_second": 0.504, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 34.248366013071895, | |
| "grad_norm": 14.115684509277344, | |
| "learning_rate": 3.640350877192983e-05, | |
| "loss": 0.1592, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 34.509803921568626, | |
| "grad_norm": 2.2361948490142822, | |
| "learning_rate": 3.625730994152047e-05, | |
| "loss": 0.0785, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 34.77124183006536, | |
| "grad_norm": 15.101175308227539, | |
| "learning_rate": 3.611111111111111e-05, | |
| "loss": 0.033, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 34.98039215686274, | |
| "eval_accuracy": 0.9044117647058824, | |
| "eval_loss": 0.5615760087966919, | |
| "eval_runtime": 20.5904, | |
| "eval_samples_per_second": 6.605, | |
| "eval_steps_per_second": 0.826, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 35.032679738562095, | |
| "grad_norm": 74.07561492919922, | |
| "learning_rate": 3.5964912280701756e-05, | |
| "loss": 0.1336, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 35.294117647058826, | |
| "grad_norm": 40.868961334228516, | |
| "learning_rate": 3.5818713450292403e-05, | |
| "loss": 0.1209, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 35.55555555555556, | |
| "grad_norm": 11.251754760742188, | |
| "learning_rate": 3.5672514619883044e-05, | |
| "loss": 0.0658, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 35.81699346405229, | |
| "grad_norm": 20.791095733642578, | |
| "learning_rate": 3.5526315789473684e-05, | |
| "loss": 0.0979, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.9191176470588235, | |
| "eval_loss": 0.3717995882034302, | |
| "eval_runtime": 21.531, | |
| "eval_samples_per_second": 6.316, | |
| "eval_steps_per_second": 0.79, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 36.07843137254902, | |
| "grad_norm": 13.336127281188965, | |
| "learning_rate": 3.538011695906433e-05, | |
| "loss": 0.0712, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 36.33986928104575, | |
| "grad_norm": 7.379011154174805, | |
| "learning_rate": 3.523391812865498e-05, | |
| "loss": 0.0826, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 36.60130718954248, | |
| "grad_norm": 1.9048967361450195, | |
| "learning_rate": 3.508771929824561e-05, | |
| "loss": 0.0791, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 36.86274509803921, | |
| "grad_norm": 32.38518142700195, | |
| "learning_rate": 3.494152046783626e-05, | |
| "loss": 0.1045, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 36.99346405228758, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.25290319323539734, | |
| "eval_runtime": 22.9294, | |
| "eval_samples_per_second": 5.931, | |
| "eval_steps_per_second": 0.741, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 37.12418300653595, | |
| "grad_norm": 14.719789505004883, | |
| "learning_rate": 3.4795321637426905e-05, | |
| "loss": 0.0977, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 37.38562091503268, | |
| "grad_norm": 21.388763427734375, | |
| "learning_rate": 3.4649122807017546e-05, | |
| "loss": 0.0374, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 37.64705882352941, | |
| "grad_norm": 7.066629886627197, | |
| "learning_rate": 3.4502923976608186e-05, | |
| "loss": 0.0819, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 37.908496732026144, | |
| "grad_norm": 4.583933353424072, | |
| "learning_rate": 3.435672514619883e-05, | |
| "loss": 0.0815, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 37.98692810457516, | |
| "eval_accuracy": 0.9338235294117647, | |
| "eval_loss": 0.3510648012161255, | |
| "eval_runtime": 21.3875, | |
| "eval_samples_per_second": 6.359, | |
| "eval_steps_per_second": 0.795, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 38.169934640522875, | |
| "grad_norm": 14.378546714782715, | |
| "learning_rate": 3.421052631578947e-05, | |
| "loss": 0.1109, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 38.431372549019606, | |
| "grad_norm": 4.1210408210754395, | |
| "learning_rate": 3.406432748538012e-05, | |
| "loss": 0.052, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 38.69281045751634, | |
| "grad_norm": 18.48431396484375, | |
| "learning_rate": 3.391812865497076e-05, | |
| "loss": 0.0932, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 38.95424836601307, | |
| "grad_norm": 30.51089859008789, | |
| "learning_rate": 3.377192982456141e-05, | |
| "loss": 0.0761, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 38.98039215686274, | |
| "eval_accuracy": 0.9338235294117647, | |
| "eval_loss": 0.31144019961357117, | |
| "eval_runtime": 32.6124, | |
| "eval_samples_per_second": 4.17, | |
| "eval_steps_per_second": 0.521, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 39.21568627450981, | |
| "grad_norm": 29.487356185913086, | |
| "learning_rate": 3.362573099415205e-05, | |
| "loss": 0.0995, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 39.47712418300654, | |
| "grad_norm": 4.752898216247559, | |
| "learning_rate": 3.3479532163742695e-05, | |
| "loss": 0.0986, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 39.73856209150327, | |
| "grad_norm": 23.433902740478516, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.0908, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 8.154867172241211, | |
| "learning_rate": 3.3187134502923975e-05, | |
| "loss": 0.0747, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.9338235294117647, | |
| "eval_loss": 0.2836870849132538, | |
| "eval_runtime": 33.717, | |
| "eval_samples_per_second": 4.034, | |
| "eval_steps_per_second": 0.504, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 40.26143790849673, | |
| "grad_norm": 66.09915924072266, | |
| "learning_rate": 3.304093567251462e-05, | |
| "loss": 0.0746, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 40.52287581699346, | |
| "grad_norm": 8.447415351867676, | |
| "learning_rate": 3.289473684210527e-05, | |
| "loss": 0.0809, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 40.78431372549019, | |
| "grad_norm": 11.7717866897583, | |
| "learning_rate": 3.274853801169591e-05, | |
| "loss": 0.0545, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 40.99346405228758, | |
| "eval_accuracy": 0.9411764705882353, | |
| "eval_loss": 0.42687493562698364, | |
| "eval_runtime": 30.8285, | |
| "eval_samples_per_second": 4.412, | |
| "eval_steps_per_second": 0.551, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 41.04575163398693, | |
| "grad_norm": 2.3586502075195312, | |
| "learning_rate": 3.260233918128655e-05, | |
| "loss": 0.058, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 41.30718954248366, | |
| "grad_norm": 31.519433975219727, | |
| "learning_rate": 3.24561403508772e-05, | |
| "loss": 0.0838, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 41.568627450980394, | |
| "grad_norm": 0.15550392866134644, | |
| "learning_rate": 3.230994152046784e-05, | |
| "loss": 0.0853, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 41.830065359477125, | |
| "grad_norm": 6.823671340942383, | |
| "learning_rate": 3.216374269005848e-05, | |
| "loss": 0.0796, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 41.98692810457516, | |
| "eval_accuracy": 0.9411764705882353, | |
| "eval_loss": 0.23307542502880096, | |
| "eval_runtime": 33.1415, | |
| "eval_samples_per_second": 4.104, | |
| "eval_steps_per_second": 0.513, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 42.091503267973856, | |
| "grad_norm": 11.52629566192627, | |
| "learning_rate": 3.2017543859649124e-05, | |
| "loss": 0.0903, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 42.35294117647059, | |
| "grad_norm": 11.996484756469727, | |
| "learning_rate": 3.187134502923977e-05, | |
| "loss": 0.0595, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 42.61437908496732, | |
| "grad_norm": 1.5475754737854004, | |
| "learning_rate": 3.172514619883041e-05, | |
| "loss": 0.0993, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 42.87581699346405, | |
| "grad_norm": 18.27874755859375, | |
| "learning_rate": 3.157894736842105e-05, | |
| "loss": 0.055, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 42.98039215686274, | |
| "eval_accuracy": 0.9485294117647058, | |
| "eval_loss": 0.28995171189308167, | |
| "eval_runtime": 31.1656, | |
| "eval_samples_per_second": 4.364, | |
| "eval_steps_per_second": 0.545, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 43.13725490196079, | |
| "grad_norm": 1.7079222202301025, | |
| "learning_rate": 3.14327485380117e-05, | |
| "loss": 0.0851, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 43.39869281045752, | |
| "grad_norm": 0.0829237625002861, | |
| "learning_rate": 3.128654970760234e-05, | |
| "loss": 0.061, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 43.66013071895425, | |
| "grad_norm": 2.6961874961853027, | |
| "learning_rate": 3.1140350877192986e-05, | |
| "loss": 0.0205, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 43.92156862745098, | |
| "grad_norm": 3.1870129108428955, | |
| "learning_rate": 3.0994152046783626e-05, | |
| "loss": 0.0706, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.3367806077003479, | |
| "eval_runtime": 25.249, | |
| "eval_samples_per_second": 5.386, | |
| "eval_steps_per_second": 0.673, | |
| "step": 1683 | |
| }, | |
| { | |
| "epoch": 44.18300653594771, | |
| "grad_norm": 10.678839683532715, | |
| "learning_rate": 3.084795321637427e-05, | |
| "loss": 0.0555, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 44.44444444444444, | |
| "grad_norm": 0.1511285901069641, | |
| "learning_rate": 3.0701754385964913e-05, | |
| "loss": 0.0463, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 44.705882352941174, | |
| "grad_norm": 19.222854614257812, | |
| "learning_rate": 3.055555555555556e-05, | |
| "loss": 0.0783, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 44.967320261437905, | |
| "grad_norm": 12.824193954467773, | |
| "learning_rate": 3.0409356725146197e-05, | |
| "loss": 0.0505, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 44.99346405228758, | |
| "eval_accuracy": 0.9485294117647058, | |
| "eval_loss": 0.3779818117618561, | |
| "eval_runtime": 19.0793, | |
| "eval_samples_per_second": 7.128, | |
| "eval_steps_per_second": 0.891, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 45.22875816993464, | |
| "grad_norm": 18.495044708251953, | |
| "learning_rate": 3.0263157894736844e-05, | |
| "loss": 0.0679, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 45.490196078431374, | |
| "grad_norm": 22.039566040039062, | |
| "learning_rate": 3.0116959064327488e-05, | |
| "loss": 0.0618, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 45.751633986928105, | |
| "grad_norm": 0.6790270209312439, | |
| "learning_rate": 2.997076023391813e-05, | |
| "loss": 0.0698, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 45.98692810457516, | |
| "eval_accuracy": 0.9191176470588235, | |
| "eval_loss": 0.48222464323043823, | |
| "eval_runtime": 33.9657, | |
| "eval_samples_per_second": 4.004, | |
| "eval_steps_per_second": 0.501, | |
| "step": 1759 | |
| }, | |
| { | |
| "epoch": 46.01307189542484, | |
| "grad_norm": 48.15066909790039, | |
| "learning_rate": 2.9824561403508772e-05, | |
| "loss": 0.0745, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 46.27450980392157, | |
| "grad_norm": 48.96921920776367, | |
| "learning_rate": 2.9678362573099415e-05, | |
| "loss": 0.11, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 46.5359477124183, | |
| "grad_norm": 16.973966598510742, | |
| "learning_rate": 2.9532163742690062e-05, | |
| "loss": 0.0183, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 46.79738562091503, | |
| "grad_norm": 11.563841819763184, | |
| "learning_rate": 2.9385964912280706e-05, | |
| "loss": 0.0275, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 46.98039215686274, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.34339553117752075, | |
| "eval_runtime": 33.4784, | |
| "eval_samples_per_second": 4.062, | |
| "eval_steps_per_second": 0.508, | |
| "step": 1797 | |
| }, | |
| { | |
| "epoch": 47.05882352941177, | |
| "grad_norm": 18.660812377929688, | |
| "learning_rate": 2.9239766081871346e-05, | |
| "loss": 0.0307, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 47.3202614379085, | |
| "grad_norm": 19.048458099365234, | |
| "learning_rate": 2.909356725146199e-05, | |
| "loss": 0.036, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 47.58169934640523, | |
| "grad_norm": 0.8519901037216187, | |
| "learning_rate": 2.8947368421052634e-05, | |
| "loss": 0.0491, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 47.84313725490196, | |
| "grad_norm": 0.9929773211479187, | |
| "learning_rate": 2.8801169590643277e-05, | |
| "loss": 0.0641, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_loss": 0.3386637568473816, | |
| "eval_runtime": 33.9575, | |
| "eval_samples_per_second": 4.005, | |
| "eval_steps_per_second": 0.501, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 48.10457516339869, | |
| "grad_norm": 27.548429489135742, | |
| "learning_rate": 2.8654970760233917e-05, | |
| "loss": 0.0634, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 48.36601307189542, | |
| "grad_norm": 0.4367322027683258, | |
| "learning_rate": 2.850877192982456e-05, | |
| "loss": 0.0756, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 48.627450980392155, | |
| "grad_norm": 18.30873680114746, | |
| "learning_rate": 2.8362573099415208e-05, | |
| "loss": 0.0134, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 48.888888888888886, | |
| "grad_norm": 0.011559017933905125, | |
| "learning_rate": 2.821637426900585e-05, | |
| "loss": 0.0484, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 48.99346405228758, | |
| "eval_accuracy": 0.9191176470588235, | |
| "eval_loss": 0.5349822640419006, | |
| "eval_runtime": 38.4788, | |
| "eval_samples_per_second": 3.534, | |
| "eval_steps_per_second": 0.442, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 49.150326797385624, | |
| "grad_norm": 2.1214957237243652, | |
| "learning_rate": 2.8070175438596492e-05, | |
| "loss": 0.088, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 49.411764705882355, | |
| "grad_norm": 27.645193099975586, | |
| "learning_rate": 2.7923976608187135e-05, | |
| "loss": 0.0621, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 49.673202614379086, | |
| "grad_norm": 1.3699434995651245, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.0528, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 49.93464052287582, | |
| "grad_norm": 8.130342483520508, | |
| "learning_rate": 2.7631578947368426e-05, | |
| "loss": 0.0388, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 49.98692810457516, | |
| "eval_accuracy": 0.9117647058823529, | |
| "eval_loss": 0.382554292678833, | |
| "eval_runtime": 33.8716, | |
| "eval_samples_per_second": 4.015, | |
| "eval_steps_per_second": 0.502, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 50.19607843137255, | |
| "grad_norm": 47.961002349853516, | |
| "learning_rate": 2.7485380116959063e-05, | |
| "loss": 0.0941, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 50.45751633986928, | |
| "grad_norm": 36.82217025756836, | |
| "learning_rate": 2.733918128654971e-05, | |
| "loss": 0.0863, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 50.71895424836601, | |
| "grad_norm": 5.911373615264893, | |
| "learning_rate": 2.7192982456140354e-05, | |
| "loss": 0.0324, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 50.98039215686274, | |
| "grad_norm": 24.99283790588379, | |
| "learning_rate": 2.7046783625730997e-05, | |
| "loss": 0.0347, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 50.98039215686274, | |
| "eval_accuracy": 0.9558823529411765, | |
| "eval_loss": 0.3738501965999603, | |
| "eval_runtime": 30.759, | |
| "eval_samples_per_second": 4.421, | |
| "eval_steps_per_second": 0.553, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 51.24183006535948, | |
| "grad_norm": 70.3333969116211, | |
| "learning_rate": 2.6900584795321637e-05, | |
| "loss": 0.0428, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 51.50326797385621, | |
| "grad_norm": 13.072953224182129, | |
| "learning_rate": 2.675438596491228e-05, | |
| "loss": 0.0505, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 51.76470588235294, | |
| "grad_norm": 39.30720520019531, | |
| "learning_rate": 2.6608187134502928e-05, | |
| "loss": 0.1046, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.9117647058823529, | |
| "eval_loss": 0.3074805736541748, | |
| "eval_runtime": 33.894, | |
| "eval_samples_per_second": 4.013, | |
| "eval_steps_per_second": 0.502, | |
| "step": 1989 | |
| }, | |
| { | |
| "epoch": 52.02614379084967, | |
| "grad_norm": 23.061525344848633, | |
| "learning_rate": 2.6461988304093572e-05, | |
| "loss": 0.0566, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 52.287581699346404, | |
| "grad_norm": 2.5243396759033203, | |
| "learning_rate": 2.6315789473684212e-05, | |
| "loss": 0.0605, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 52.549019607843135, | |
| "grad_norm": 11.470220565795898, | |
| "learning_rate": 2.6169590643274856e-05, | |
| "loss": 0.0767, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 52.810457516339866, | |
| "grad_norm": 0.23322105407714844, | |
| "learning_rate": 2.60233918128655e-05, | |
| "loss": 0.0298, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 52.99346405228758, | |
| "eval_accuracy": 0.9558823529411765, | |
| "eval_loss": 0.3557595908641815, | |
| "eval_runtime": 25.1218, | |
| "eval_samples_per_second": 5.414, | |
| "eval_steps_per_second": 0.677, | |
| "step": 2027 | |
| }, | |
| { | |
| "epoch": 53.071895424836605, | |
| "grad_norm": 4.624847412109375, | |
| "learning_rate": 2.5877192982456143e-05, | |
| "loss": 0.0563, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 53.333333333333336, | |
| "grad_norm": 0.25727781653404236, | |
| "learning_rate": 2.5730994152046783e-05, | |
| "loss": 0.0977, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 53.59477124183007, | |
| "grad_norm": 0.22140049934387207, | |
| "learning_rate": 2.5584795321637427e-05, | |
| "loss": 0.0199, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 53.8562091503268, | |
| "grad_norm": 0.9178116321563721, | |
| "learning_rate": 2.5438596491228074e-05, | |
| "loss": 0.0478, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 53.98692810457516, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_loss": 0.30555427074432373, | |
| "eval_runtime": 37.1043, | |
| "eval_samples_per_second": 3.665, | |
| "eval_steps_per_second": 0.458, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 54.11764705882353, | |
| "grad_norm": 19.221540451049805, | |
| "learning_rate": 2.5292397660818717e-05, | |
| "loss": 0.0289, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 54.37908496732026, | |
| "grad_norm": 1.848120093345642, | |
| "learning_rate": 2.5146198830409358e-05, | |
| "loss": 0.095, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 54.64052287581699, | |
| "grad_norm": 10.04775619506836, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0218, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 54.90196078431372, | |
| "grad_norm": 0.047169651836156845, | |
| "learning_rate": 2.485380116959064e-05, | |
| "loss": 0.0285, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 54.98039215686274, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.28512153029441833, | |
| "eval_runtime": 32.4012, | |
| "eval_samples_per_second": 4.197, | |
| "eval_steps_per_second": 0.525, | |
| "step": 2103 | |
| }, | |
| { | |
| "epoch": 55.16339869281046, | |
| "grad_norm": 2.4437642097473145, | |
| "learning_rate": 2.470760233918129e-05, | |
| "loss": 0.0029, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 55.42483660130719, | |
| "grad_norm": 14.518400192260742, | |
| "learning_rate": 2.456140350877193e-05, | |
| "loss": 0.0621, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 55.68627450980392, | |
| "grad_norm": 2.9272749423980713, | |
| "learning_rate": 2.4415204678362576e-05, | |
| "loss": 0.0129, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 55.947712418300654, | |
| "grad_norm": 19.935407638549805, | |
| "learning_rate": 2.4269005847953216e-05, | |
| "loss": 0.0407, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.9558823529411765, | |
| "eval_loss": 0.32225164771080017, | |
| "eval_runtime": 33.148, | |
| "eval_samples_per_second": 4.103, | |
| "eval_steps_per_second": 0.513, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 56.209150326797385, | |
| "grad_norm": 32.69438934326172, | |
| "learning_rate": 2.412280701754386e-05, | |
| "loss": 0.0161, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 56.470588235294116, | |
| "grad_norm": 0.04998353496193886, | |
| "learning_rate": 2.3976608187134503e-05, | |
| "loss": 0.0446, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 56.73202614379085, | |
| "grad_norm": 0.830470085144043, | |
| "learning_rate": 2.3830409356725147e-05, | |
| "loss": 0.1066, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 56.99346405228758, | |
| "grad_norm": 21.04816436767578, | |
| "learning_rate": 2.368421052631579e-05, | |
| "loss": 0.0459, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 56.99346405228758, | |
| "eval_accuracy": 0.9485294117647058, | |
| "eval_loss": 0.45745787024497986, | |
| "eval_runtime": 31.4986, | |
| "eval_samples_per_second": 4.318, | |
| "eval_steps_per_second": 0.54, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 57.254901960784316, | |
| "grad_norm": 6.693302631378174, | |
| "learning_rate": 2.3538011695906434e-05, | |
| "loss": 0.0569, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 57.51633986928105, | |
| "grad_norm": 12.218875885009766, | |
| "learning_rate": 2.3391812865497074e-05, | |
| "loss": 0.0455, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 57.77777777777778, | |
| "grad_norm": 56.21259689331055, | |
| "learning_rate": 2.324561403508772e-05, | |
| "loss": 0.0409, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 57.98692810457516, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.29300644993782043, | |
| "eval_runtime": 31.4287, | |
| "eval_samples_per_second": 4.327, | |
| "eval_steps_per_second": 0.541, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 58.03921568627451, | |
| "grad_norm": 0.48025286197662354, | |
| "learning_rate": 2.309941520467836e-05, | |
| "loss": 0.0526, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 58.30065359477124, | |
| "grad_norm": 6.530683994293213, | |
| "learning_rate": 2.295321637426901e-05, | |
| "loss": 0.0791, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 58.56209150326797, | |
| "grad_norm": 35.76517105102539, | |
| "learning_rate": 2.280701754385965e-05, | |
| "loss": 0.033, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 58.8235294117647, | |
| "grad_norm": 4.9538679122924805, | |
| "learning_rate": 2.2660818713450292e-05, | |
| "loss": 0.0743, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 58.98039215686274, | |
| "eval_accuracy": 0.9485294117647058, | |
| "eval_loss": 0.4032076299190521, | |
| "eval_runtime": 34.2283, | |
| "eval_samples_per_second": 3.973, | |
| "eval_steps_per_second": 0.497, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 59.08496732026144, | |
| "grad_norm": 8.96496868133545, | |
| "learning_rate": 2.2514619883040936e-05, | |
| "loss": 0.0358, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 59.34640522875817, | |
| "grad_norm": 10.487314224243164, | |
| "learning_rate": 2.236842105263158e-05, | |
| "loss": 0.0805, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 59.6078431372549, | |
| "grad_norm": 3.922236442565918, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.0096, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 59.869281045751634, | |
| "grad_norm": 5.181495666503906, | |
| "learning_rate": 2.2076023391812867e-05, | |
| "loss": 0.0346, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.9411764705882353, | |
| "eval_loss": 0.37382781505584717, | |
| "eval_runtime": 37.1282, | |
| "eval_samples_per_second": 3.663, | |
| "eval_steps_per_second": 0.458, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 60.130718954248366, | |
| "grad_norm": 0.059666648507118225, | |
| "learning_rate": 2.1929824561403507e-05, | |
| "loss": 0.0551, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 60.3921568627451, | |
| "grad_norm": 0.5856298804283142, | |
| "learning_rate": 2.1783625730994154e-05, | |
| "loss": 0.0331, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 60.65359477124183, | |
| "grad_norm": 5.777927875518799, | |
| "learning_rate": 2.1637426900584794e-05, | |
| "loss": 0.0112, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 60.91503267973856, | |
| "grad_norm": 13.134035110473633, | |
| "learning_rate": 2.149122807017544e-05, | |
| "loss": 0.0302, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 60.99346405228758, | |
| "eval_accuracy": 0.9485294117647058, | |
| "eval_loss": 0.3597317337989807, | |
| "eval_runtime": 31.126, | |
| "eval_samples_per_second": 4.369, | |
| "eval_steps_per_second": 0.546, | |
| "step": 2333 | |
| }, | |
| { | |
| "epoch": 61.1764705882353, | |
| "grad_norm": 28.286643981933594, | |
| "learning_rate": 2.134502923976608e-05, | |
| "loss": 0.0311, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 61.43790849673203, | |
| "grad_norm": 6.936996936798096, | |
| "learning_rate": 2.1198830409356725e-05, | |
| "loss": 0.139, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 61.69934640522876, | |
| "grad_norm": 1.0503500699996948, | |
| "learning_rate": 2.105263157894737e-05, | |
| "loss": 0.0666, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 61.96078431372549, | |
| "grad_norm": 5.756121635437012, | |
| "learning_rate": 2.0906432748538013e-05, | |
| "loss": 0.0488, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 61.98692810457516, | |
| "eval_accuracy": 0.9558823529411765, | |
| "eval_loss": 0.2594568133354187, | |
| "eval_runtime": 34.9133, | |
| "eval_samples_per_second": 3.895, | |
| "eval_steps_per_second": 0.487, | |
| "step": 2371 | |
| }, | |
| { | |
| "epoch": 62.22222222222222, | |
| "grad_norm": 17.791810989379883, | |
| "learning_rate": 2.0760233918128656e-05, | |
| "loss": 0.0294, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 62.48366013071895, | |
| "grad_norm": 0.014880876056849957, | |
| "learning_rate": 2.06140350877193e-05, | |
| "loss": 0.0516, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 62.745098039215684, | |
| "grad_norm": 33.730533599853516, | |
| "learning_rate": 2.046783625730994e-05, | |
| "loss": 0.0562, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 62.98039215686274, | |
| "eval_accuracy": 0.9411764705882353, | |
| "eval_loss": 0.3763536512851715, | |
| "eval_runtime": 35.0422, | |
| "eval_samples_per_second": 3.881, | |
| "eval_steps_per_second": 0.485, | |
| "step": 2409 | |
| }, | |
| { | |
| "epoch": 63.00653594771242, | |
| "grad_norm": 58.39078903198242, | |
| "learning_rate": 2.0321637426900587e-05, | |
| "loss": 0.0751, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 63.26797385620915, | |
| "grad_norm": 0.0864597037434578, | |
| "learning_rate": 2.0175438596491227e-05, | |
| "loss": 0.0393, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 63.529411764705884, | |
| "grad_norm": 18.966829299926758, | |
| "learning_rate": 2.0029239766081874e-05, | |
| "loss": 0.0251, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 63.790849673202615, | |
| "grad_norm": 25.66364288330078, | |
| "learning_rate": 1.9883040935672515e-05, | |
| "loss": 0.0216, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.9779411764705882, | |
| "eval_loss": 0.2643776834011078, | |
| "eval_runtime": 17.3782, | |
| "eval_samples_per_second": 7.826, | |
| "eval_steps_per_second": 0.978, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 64.05228758169935, | |
| "grad_norm": 1.6527997255325317, | |
| "learning_rate": 1.9736842105263158e-05, | |
| "loss": 0.054, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 64.31372549019608, | |
| "grad_norm": 0.06280579417943954, | |
| "learning_rate": 1.9590643274853802e-05, | |
| "loss": 0.0287, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 64.57516339869281, | |
| "grad_norm": 1.6318433284759521, | |
| "learning_rate": 1.9444444444444445e-05, | |
| "loss": 0.0399, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 64.83660130718954, | |
| "grad_norm": 1.7933380603790283, | |
| "learning_rate": 1.929824561403509e-05, | |
| "loss": 0.0219, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 64.99346405228758, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.30917930603027344, | |
| "eval_runtime": 17.1251, | |
| "eval_samples_per_second": 7.942, | |
| "eval_steps_per_second": 0.993, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 65.09803921568627, | |
| "grad_norm": 10.366903305053711, | |
| "learning_rate": 1.9152046783625733e-05, | |
| "loss": 0.0539, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 65.359477124183, | |
| "grad_norm": 0.2696276307106018, | |
| "learning_rate": 1.9005847953216373e-05, | |
| "loss": 0.0123, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 65.62091503267973, | |
| "grad_norm": 2.0707309246063232, | |
| "learning_rate": 1.885964912280702e-05, | |
| "loss": 0.0209, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 65.88235294117646, | |
| "grad_norm": 0.026714438572525978, | |
| "learning_rate": 1.871345029239766e-05, | |
| "loss": 0.0272, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 65.98692810457516, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.2898404896259308, | |
| "eval_runtime": 17.5281, | |
| "eval_samples_per_second": 7.759, | |
| "eval_steps_per_second": 0.97, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 66.14379084967321, | |
| "grad_norm": 0.15798357129096985, | |
| "learning_rate": 1.8567251461988304e-05, | |
| "loss": 0.0091, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 66.40522875816994, | |
| "grad_norm": 85.56695556640625, | |
| "learning_rate": 1.8421052631578947e-05, | |
| "loss": 0.0221, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 66.66666666666667, | |
| "grad_norm": 25.615230560302734, | |
| "learning_rate": 1.827485380116959e-05, | |
| "loss": 0.0645, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 66.9281045751634, | |
| "grad_norm": 22.72310447692871, | |
| "learning_rate": 1.8128654970760235e-05, | |
| "loss": 0.027, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 66.98039215686275, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.2693423628807068, | |
| "eval_runtime": 23.0579, | |
| "eval_samples_per_second": 5.898, | |
| "eval_steps_per_second": 0.737, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 67.18954248366013, | |
| "grad_norm": 24.883161544799805, | |
| "learning_rate": 1.7982456140350878e-05, | |
| "loss": 0.0293, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 67.45098039215686, | |
| "grad_norm": 6.90622615814209, | |
| "learning_rate": 1.7836257309941522e-05, | |
| "loss": 0.022, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 67.7124183006536, | |
| "grad_norm": 48.23540115356445, | |
| "learning_rate": 1.7690058479532165e-05, | |
| "loss": 0.0509, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 67.97385620915033, | |
| "grad_norm": 0.07863592356443405, | |
| "learning_rate": 1.7543859649122806e-05, | |
| "loss": 0.0397, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.9411764705882353, | |
| "eval_loss": 0.38426852226257324, | |
| "eval_runtime": 23.971, | |
| "eval_samples_per_second": 5.674, | |
| "eval_steps_per_second": 0.709, | |
| "step": 2601 | |
| }, | |
| { | |
| "epoch": 68.23529411764706, | |
| "grad_norm": 4.26972770690918, | |
| "learning_rate": 1.7397660818713453e-05, | |
| "loss": 0.0409, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 68.49673202614379, | |
| "grad_norm": 1.8150982856750488, | |
| "learning_rate": 1.7251461988304093e-05, | |
| "loss": 0.0315, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 68.75816993464052, | |
| "grad_norm": 13.07569694519043, | |
| "learning_rate": 1.7105263157894737e-05, | |
| "loss": 0.0154, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 68.99346405228758, | |
| "eval_accuracy": 0.9485294117647058, | |
| "eval_loss": 0.30511775612831116, | |
| "eval_runtime": 23.3134, | |
| "eval_samples_per_second": 5.834, | |
| "eval_steps_per_second": 0.729, | |
| "step": 2639 | |
| }, | |
| { | |
| "epoch": 69.01960784313725, | |
| "grad_norm": 0.576351523399353, | |
| "learning_rate": 1.695906432748538e-05, | |
| "loss": 0.0387, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 69.28104575163398, | |
| "grad_norm": 0.867915153503418, | |
| "learning_rate": 1.6812865497076024e-05, | |
| "loss": 0.0178, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 69.54248366013071, | |
| "grad_norm": 20.2279052734375, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0392, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 69.80392156862744, | |
| "grad_norm": 0.04353189095854759, | |
| "learning_rate": 1.652046783625731e-05, | |
| "loss": 0.0004, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 69.98692810457516, | |
| "eval_accuracy": 0.9411764705882353, | |
| "eval_loss": 0.39089399576187134, | |
| "eval_runtime": 23.3469, | |
| "eval_samples_per_second": 5.825, | |
| "eval_steps_per_second": 0.728, | |
| "step": 2677 | |
| }, | |
| { | |
| "epoch": 70.06535947712419, | |
| "grad_norm": 77.49730682373047, | |
| "learning_rate": 1.6374269005847955e-05, | |
| "loss": 0.0467, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 70.32679738562092, | |
| "grad_norm": 49.50137710571289, | |
| "learning_rate": 1.62280701754386e-05, | |
| "loss": 0.0228, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 70.58823529411765, | |
| "grad_norm": 0.5024857521057129, | |
| "learning_rate": 1.608187134502924e-05, | |
| "loss": 0.0045, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 70.84967320261438, | |
| "grad_norm": 3.8934128284454346, | |
| "learning_rate": 1.5935672514619886e-05, | |
| "loss": 0.0651, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 70.98039215686275, | |
| "eval_accuracy": 0.9485294117647058, | |
| "eval_loss": 0.29772186279296875, | |
| "eval_runtime": 25.8712, | |
| "eval_samples_per_second": 5.257, | |
| "eval_steps_per_second": 0.657, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 71.11111111111111, | |
| "grad_norm": 7.867006778717041, | |
| "learning_rate": 1.5789473684210526e-05, | |
| "loss": 0.008, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 71.37254901960785, | |
| "grad_norm": 13.64209270477295, | |
| "learning_rate": 1.564327485380117e-05, | |
| "loss": 0.0757, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 71.63398692810458, | |
| "grad_norm": 6.453034400939941, | |
| "learning_rate": 1.5497076023391813e-05, | |
| "loss": 0.0214, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 71.89542483660131, | |
| "grad_norm": 0.1501288115978241, | |
| "learning_rate": 1.5350877192982457e-05, | |
| "loss": 0.016, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.2694728374481201, | |
| "eval_runtime": 20.9056, | |
| "eval_samples_per_second": 6.505, | |
| "eval_steps_per_second": 0.813, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 72.15686274509804, | |
| "grad_norm": 0.034015778452157974, | |
| "learning_rate": 1.5204678362573099e-05, | |
| "loss": 0.012, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 72.41830065359477, | |
| "grad_norm": 11.159213066101074, | |
| "learning_rate": 1.5058479532163744e-05, | |
| "loss": 0.0444, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 72.6797385620915, | |
| "grad_norm": 2.5402066707611084, | |
| "learning_rate": 1.4912280701754386e-05, | |
| "loss": 0.0359, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 72.94117647058823, | |
| "grad_norm": 0.016565600410103798, | |
| "learning_rate": 1.4766081871345031e-05, | |
| "loss": 0.0351, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 72.99346405228758, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_loss": 0.2720423936843872, | |
| "eval_runtime": 22.3116, | |
| "eval_samples_per_second": 6.095, | |
| "eval_steps_per_second": 0.762, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 73.20261437908496, | |
| "grad_norm": 79.11601257324219, | |
| "learning_rate": 1.4619883040935673e-05, | |
| "loss": 0.044, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 73.4640522875817, | |
| "grad_norm": 5.53911018371582, | |
| "learning_rate": 1.4473684210526317e-05, | |
| "loss": 0.0298, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 73.72549019607843, | |
| "grad_norm": 0.40750911831855774, | |
| "learning_rate": 1.4327485380116959e-05, | |
| "loss": 0.011, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 73.98692810457516, | |
| "grad_norm": 0.9360626339912415, | |
| "learning_rate": 1.4181286549707604e-05, | |
| "loss": 0.0206, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 73.98692810457516, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_loss": 0.25490206480026245, | |
| "eval_runtime": 22.7726, | |
| "eval_samples_per_second": 5.972, | |
| "eval_steps_per_second": 0.747, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 74.2483660130719, | |
| "grad_norm": 6.835451602935791, | |
| "learning_rate": 1.4035087719298246e-05, | |
| "loss": 0.0109, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 74.50980392156863, | |
| "grad_norm": 0.1265513300895691, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.0436, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 74.77124183006536, | |
| "grad_norm": 0.20871244370937347, | |
| "learning_rate": 1.3742690058479531e-05, | |
| "loss": 0.0109, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 74.98039215686275, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_loss": 0.24122387170791626, | |
| "eval_runtime": 19.4498, | |
| "eval_samples_per_second": 6.992, | |
| "eval_steps_per_second": 0.874, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 75.0326797385621, | |
| "grad_norm": 24.267925262451172, | |
| "learning_rate": 1.3596491228070177e-05, | |
| "loss": 0.0207, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 75.29411764705883, | |
| "grad_norm": 9.061148643493652, | |
| "learning_rate": 1.3450292397660819e-05, | |
| "loss": 0.0105, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 75.55555555555556, | |
| "grad_norm": 1.2824314832687378, | |
| "learning_rate": 1.3304093567251464e-05, | |
| "loss": 0.0182, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 75.81699346405229, | |
| "grad_norm": 0.003347081132233143, | |
| "learning_rate": 1.3157894736842106e-05, | |
| "loss": 0.0012, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.9779411764705882, | |
| "eval_loss": 0.34939995408058167, | |
| "eval_runtime": 20.8219, | |
| "eval_samples_per_second": 6.532, | |
| "eval_steps_per_second": 0.816, | |
| "step": 2907 | |
| }, | |
| { | |
| "epoch": 76.07843137254902, | |
| "grad_norm": 5.410060882568359, | |
| "learning_rate": 1.301169590643275e-05, | |
| "loss": 0.0214, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 76.33986928104575, | |
| "grad_norm": 0.6613653898239136, | |
| "learning_rate": 1.2865497076023392e-05, | |
| "loss": 0.0261, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 76.60130718954248, | |
| "grad_norm": 1.0403037071228027, | |
| "learning_rate": 1.2719298245614037e-05, | |
| "loss": 0.0555, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 76.86274509803921, | |
| "grad_norm": 15.238615036010742, | |
| "learning_rate": 1.2573099415204679e-05, | |
| "loss": 0.0418, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 76.99346405228758, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.37292152643203735, | |
| "eval_runtime": 20.8077, | |
| "eval_samples_per_second": 6.536, | |
| "eval_steps_per_second": 0.817, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 77.12418300653594, | |
| "grad_norm": 31.79336166381836, | |
| "learning_rate": 1.242690058479532e-05, | |
| "loss": 0.0302, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 77.38562091503267, | |
| "grad_norm": 0.0776483416557312, | |
| "learning_rate": 1.2280701754385964e-05, | |
| "loss": 0.0094, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 77.6470588235294, | |
| "grad_norm": 63.487571716308594, | |
| "learning_rate": 1.2134502923976608e-05, | |
| "loss": 0.0473, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 77.90849673202614, | |
| "grad_norm": 0.09107412397861481, | |
| "learning_rate": 1.1988304093567252e-05, | |
| "loss": 0.0165, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 77.98692810457516, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.347072571516037, | |
| "eval_runtime": 17.8737, | |
| "eval_samples_per_second": 7.609, | |
| "eval_steps_per_second": 0.951, | |
| "step": 2983 | |
| }, | |
| { | |
| "epoch": 78.16993464052288, | |
| "grad_norm": 36.47078323364258, | |
| "learning_rate": 1.1842105263157895e-05, | |
| "loss": 0.0176, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 78.43137254901961, | |
| "grad_norm": 0.0024324676487594843, | |
| "learning_rate": 1.1695906432748537e-05, | |
| "loss": 0.0317, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 78.69281045751634, | |
| "grad_norm": 26.059871673583984, | |
| "learning_rate": 1.154970760233918e-05, | |
| "loss": 0.0699, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 78.95424836601308, | |
| "grad_norm": 38.14042282104492, | |
| "learning_rate": 1.1403508771929824e-05, | |
| "loss": 0.0163, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 78.98039215686275, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_loss": 0.29730716347694397, | |
| "eval_runtime": 18.5858, | |
| "eval_samples_per_second": 7.317, | |
| "eval_steps_per_second": 0.915, | |
| "step": 3021 | |
| }, | |
| { | |
| "epoch": 79.2156862745098, | |
| "grad_norm": 87.14070129394531, | |
| "learning_rate": 1.1257309941520468e-05, | |
| "loss": 0.0556, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 79.47712418300654, | |
| "grad_norm": 3.418160915374756, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.0073, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 79.73856209150327, | |
| "grad_norm": 22.285499572753906, | |
| "learning_rate": 1.0964912280701754e-05, | |
| "loss": 0.0249, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "grad_norm": 35.9242057800293, | |
| "learning_rate": 1.0818713450292397e-05, | |
| "loss": 0.0202, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.9558823529411765, | |
| "eval_loss": 0.3729775846004486, | |
| "eval_runtime": 19.8789, | |
| "eval_samples_per_second": 6.841, | |
| "eval_steps_per_second": 0.855, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 80.26143790849673, | |
| "grad_norm": 15.128210067749023, | |
| "learning_rate": 1.067251461988304e-05, | |
| "loss": 0.0628, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 80.52287581699346, | |
| "grad_norm": 29.2634220123291, | |
| "learning_rate": 1.0526315789473684e-05, | |
| "loss": 0.0244, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 80.7843137254902, | |
| "grad_norm": 79.84837341308594, | |
| "learning_rate": 1.0380116959064328e-05, | |
| "loss": 0.0368, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 80.99346405228758, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_loss": 0.2876713275909424, | |
| "eval_runtime": 19.4821, | |
| "eval_samples_per_second": 6.981, | |
| "eval_steps_per_second": 0.873, | |
| "step": 3098 | |
| }, | |
| { | |
| "epoch": 81.04575163398692, | |
| "grad_norm": 2.7281501293182373, | |
| "learning_rate": 1.023391812865497e-05, | |
| "loss": 0.0238, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 81.30718954248366, | |
| "grad_norm": 0.0004346697241999209, | |
| "learning_rate": 1.0087719298245614e-05, | |
| "loss": 0.0305, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 81.56862745098039, | |
| "grad_norm": 0.03860533982515335, | |
| "learning_rate": 9.941520467836257e-06, | |
| "loss": 0.0136, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 81.83006535947712, | |
| "grad_norm": 0.4280990958213806, | |
| "learning_rate": 9.795321637426901e-06, | |
| "loss": 0.0374, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 81.98692810457516, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.41433659195899963, | |
| "eval_runtime": 19.9936, | |
| "eval_samples_per_second": 6.802, | |
| "eval_steps_per_second": 0.85, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 82.09150326797386, | |
| "grad_norm": 31.7745418548584, | |
| "learning_rate": 9.649122807017545e-06, | |
| "loss": 0.0105, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 82.3529411764706, | |
| "grad_norm": 2.9742166996002197, | |
| "learning_rate": 9.502923976608186e-06, | |
| "loss": 0.0361, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 82.61437908496733, | |
| "grad_norm": 3.588392734527588, | |
| "learning_rate": 9.35672514619883e-06, | |
| "loss": 0.0648, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 82.87581699346406, | |
| "grad_norm": 0.4829164147377014, | |
| "learning_rate": 9.210526315789474e-06, | |
| "loss": 0.0296, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 82.98039215686275, | |
| "eval_accuracy": 0.9779411764705882, | |
| "eval_loss": 0.2895439565181732, | |
| "eval_runtime": 17.9847, | |
| "eval_samples_per_second": 7.562, | |
| "eval_steps_per_second": 0.945, | |
| "step": 3174 | |
| }, | |
| { | |
| "epoch": 83.13725490196079, | |
| "grad_norm": 22.893632888793945, | |
| "learning_rate": 9.064327485380117e-06, | |
| "loss": 0.0115, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 83.39869281045752, | |
| "grad_norm": 0.021368976682424545, | |
| "learning_rate": 8.918128654970761e-06, | |
| "loss": 0.0269, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 83.66013071895425, | |
| "grad_norm": 0.06225317716598511, | |
| "learning_rate": 8.771929824561403e-06, | |
| "loss": 0.0024, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 83.92156862745098, | |
| "grad_norm": 0.05705859139561653, | |
| "learning_rate": 8.625730994152046e-06, | |
| "loss": 0.0405, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.9558823529411765, | |
| "eval_loss": 0.29270094633102417, | |
| "eval_runtime": 19.1133, | |
| "eval_samples_per_second": 7.115, | |
| "eval_steps_per_second": 0.889, | |
| "step": 3213 | |
| }, | |
| { | |
| "epoch": 84.18300653594771, | |
| "grad_norm": 24.514904022216797, | |
| "learning_rate": 8.47953216374269e-06, | |
| "loss": 0.0098, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 84.44444444444444, | |
| "grad_norm": 0.596236526966095, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0035, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 84.70588235294117, | |
| "grad_norm": 0.050445396453142166, | |
| "learning_rate": 8.187134502923977e-06, | |
| "loss": 0.005, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 84.9673202614379, | |
| "grad_norm": 0.07400578260421753, | |
| "learning_rate": 8.04093567251462e-06, | |
| "loss": 0.0097, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 84.99346405228758, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.317930668592453, | |
| "eval_runtime": 18.575, | |
| "eval_samples_per_second": 7.322, | |
| "eval_steps_per_second": 0.915, | |
| "step": 3251 | |
| }, | |
| { | |
| "epoch": 85.22875816993464, | |
| "grad_norm": 12.950275421142578, | |
| "learning_rate": 7.894736842105263e-06, | |
| "loss": 0.0026, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 85.49019607843137, | |
| "grad_norm": 16.546571731567383, | |
| "learning_rate": 7.748538011695907e-06, | |
| "loss": 0.0257, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 85.7516339869281, | |
| "grad_norm": 0.6142169237136841, | |
| "learning_rate": 7.602339181286549e-06, | |
| "loss": 0.0182, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 85.98692810457516, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_loss": 0.30465030670166016, | |
| "eval_runtime": 18.7827, | |
| "eval_samples_per_second": 7.241, | |
| "eval_steps_per_second": 0.905, | |
| "step": 3289 | |
| }, | |
| { | |
| "epoch": 86.01307189542484, | |
| "grad_norm": 0.09201680123806, | |
| "learning_rate": 7.456140350877193e-06, | |
| "loss": 0.0086, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 86.27450980392157, | |
| "grad_norm": 0.6810176372528076, | |
| "learning_rate": 7.3099415204678366e-06, | |
| "loss": 0.0033, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 86.5359477124183, | |
| "grad_norm": 7.0328474044799805, | |
| "learning_rate": 7.163742690058479e-06, | |
| "loss": 0.023, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 86.79738562091504, | |
| "grad_norm": 0.5138120055198669, | |
| "learning_rate": 7.017543859649123e-06, | |
| "loss": 0.0207, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 86.98039215686275, | |
| "eval_accuracy": 0.9779411764705882, | |
| "eval_loss": 0.3018016815185547, | |
| "eval_runtime": 17.5979, | |
| "eval_samples_per_second": 7.728, | |
| "eval_steps_per_second": 0.966, | |
| "step": 3327 | |
| }, | |
| { | |
| "epoch": 87.05882352941177, | |
| "grad_norm": 0.11021004617214203, | |
| "learning_rate": 6.871345029239766e-06, | |
| "loss": 0.0711, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 87.3202614379085, | |
| "grad_norm": 0.03013734146952629, | |
| "learning_rate": 6.725146198830409e-06, | |
| "loss": 0.0424, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 87.58169934640523, | |
| "grad_norm": 69.32197570800781, | |
| "learning_rate": 6.578947368421053e-06, | |
| "loss": 0.0269, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 87.84313725490196, | |
| "grad_norm": 0.45887792110443115, | |
| "learning_rate": 6.432748538011696e-06, | |
| "loss": 0.0207, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.332051545381546, | |
| "eval_runtime": 17.8575, | |
| "eval_samples_per_second": 7.616, | |
| "eval_steps_per_second": 0.952, | |
| "step": 3366 | |
| }, | |
| { | |
| "epoch": 88.10457516339869, | |
| "grad_norm": 0.007120809052139521, | |
| "learning_rate": 6.286549707602339e-06, | |
| "loss": 0.0047, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 88.36601307189542, | |
| "grad_norm": 0.051657985895872116, | |
| "learning_rate": 6.140350877192982e-06, | |
| "loss": 0.0224, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 88.62745098039215, | |
| "grad_norm": 0.6093434691429138, | |
| "learning_rate": 5.994152046783626e-06, | |
| "loss": 0.0052, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 88.88888888888889, | |
| "grad_norm": 25.99680519104004, | |
| "learning_rate": 5.8479532163742686e-06, | |
| "loss": 0.003, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 88.99346405228758, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_loss": 0.30860844254493713, | |
| "eval_runtime": 18.245, | |
| "eval_samples_per_second": 7.454, | |
| "eval_steps_per_second": 0.932, | |
| "step": 3404 | |
| }, | |
| { | |
| "epoch": 89.15032679738562, | |
| "grad_norm": 31.555145263671875, | |
| "learning_rate": 5.701754385964912e-06, | |
| "loss": 0.0329, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 89.41176470588235, | |
| "grad_norm": 18.486536026000977, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 0.029, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 89.67320261437908, | |
| "grad_norm": 0.33306655287742615, | |
| "learning_rate": 5.409356725146199e-06, | |
| "loss": 0.0098, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 89.93464052287581, | |
| "grad_norm": 2.643474578857422, | |
| "learning_rate": 5.263157894736842e-06, | |
| "loss": 0.0157, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 89.98692810457516, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_loss": 0.2947893440723419, | |
| "eval_runtime": 18.1316, | |
| "eval_samples_per_second": 7.501, | |
| "eval_steps_per_second": 0.938, | |
| "step": 3442 | |
| }, | |
| { | |
| "epoch": 90.19607843137256, | |
| "grad_norm": 6.317154407501221, | |
| "learning_rate": 5.116959064327485e-06, | |
| "loss": 0.008, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 90.45751633986929, | |
| "grad_norm": 1.63987398147583, | |
| "learning_rate": 4.970760233918129e-06, | |
| "loss": 0.0219, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 90.71895424836602, | |
| "grad_norm": 8.074739456176758, | |
| "learning_rate": 4.824561403508772e-06, | |
| "loss": 0.0188, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 90.98039215686275, | |
| "grad_norm": 0.2915269136428833, | |
| "learning_rate": 4.678362573099415e-06, | |
| "loss": 0.0428, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 90.98039215686275, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_loss": 0.3174949586391449, | |
| "eval_runtime": 17.8483, | |
| "eval_samples_per_second": 7.62, | |
| "eval_steps_per_second": 0.952, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 91.24183006535948, | |
| "grad_norm": 0.3356679677963257, | |
| "learning_rate": 4.532163742690059e-06, | |
| "loss": 0.0161, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 91.50326797385621, | |
| "grad_norm": 1.1951477527618408, | |
| "learning_rate": 4.3859649122807014e-06, | |
| "loss": 0.0205, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 91.76470588235294, | |
| "grad_norm": 0.05076509341597557, | |
| "learning_rate": 4.239766081871345e-06, | |
| "loss": 0.0189, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.3239772915840149, | |
| "eval_runtime": 17.301, | |
| "eval_samples_per_second": 7.861, | |
| "eval_steps_per_second": 0.983, | |
| "step": 3519 | |
| }, | |
| { | |
| "epoch": 92.02614379084967, | |
| "grad_norm": 1.3812580108642578, | |
| "learning_rate": 4.093567251461989e-06, | |
| "loss": 0.0212, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 92.2875816993464, | |
| "grad_norm": 0.3320296108722687, | |
| "learning_rate": 3.9473684210526315e-06, | |
| "loss": 0.0073, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 92.54901960784314, | |
| "grad_norm": 0.009532331489026546, | |
| "learning_rate": 3.8011695906432747e-06, | |
| "loss": 0.0053, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 92.81045751633987, | |
| "grad_norm": 0.5157586932182312, | |
| "learning_rate": 3.6549707602339183e-06, | |
| "loss": 0.0046, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 92.99346405228758, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.341442346572876, | |
| "eval_runtime": 18.8672, | |
| "eval_samples_per_second": 7.208, | |
| "eval_steps_per_second": 0.901, | |
| "step": 3557 | |
| }, | |
| { | |
| "epoch": 93.0718954248366, | |
| "grad_norm": 61.38653564453125, | |
| "learning_rate": 3.5087719298245615e-06, | |
| "loss": 0.0246, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 93.33333333333333, | |
| "grad_norm": 0.477070152759552, | |
| "learning_rate": 3.3625730994152047e-06, | |
| "loss": 0.0639, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 93.59477124183006, | |
| "grad_norm": 68.3900375366211, | |
| "learning_rate": 3.216374269005848e-06, | |
| "loss": 0.0255, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 93.85620915032679, | |
| "grad_norm": 0.3444403111934662, | |
| "learning_rate": 3.070175438596491e-06, | |
| "loss": 0.0057, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 93.98692810457516, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.33292174339294434, | |
| "eval_runtime": 17.7377, | |
| "eval_samples_per_second": 7.667, | |
| "eval_steps_per_second": 0.958, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 94.11764705882354, | |
| "grad_norm": 0.04389649257063866, | |
| "learning_rate": 2.9239766081871343e-06, | |
| "loss": 0.0058, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 94.37908496732027, | |
| "grad_norm": 0.5849317908287048, | |
| "learning_rate": 2.777777777777778e-06, | |
| "loss": 0.0586, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 94.640522875817, | |
| "grad_norm": 0.019542796537280083, | |
| "learning_rate": 2.631578947368421e-06, | |
| "loss": 0.001, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 94.90196078431373, | |
| "grad_norm": 0.002426290884613991, | |
| "learning_rate": 2.4853801169590643e-06, | |
| "loss": 0.0165, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 94.98039215686275, | |
| "eval_accuracy": 0.9632352941176471, | |
| "eval_loss": 0.32402223348617554, | |
| "eval_runtime": 17.5747, | |
| "eval_samples_per_second": 7.738, | |
| "eval_steps_per_second": 0.967, | |
| "step": 3633 | |
| }, | |
| { | |
| "epoch": 95.16339869281046, | |
| "grad_norm": 2.353595495223999, | |
| "learning_rate": 2.3391812865497075e-06, | |
| "loss": 0.0009, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 95.42483660130719, | |
| "grad_norm": 0.7732095718383789, | |
| "learning_rate": 2.1929824561403507e-06, | |
| "loss": 0.0273, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 95.68627450980392, | |
| "grad_norm": 0.006318532861769199, | |
| "learning_rate": 2.0467836257309943e-06, | |
| "loss": 0.0219, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 95.94771241830065, | |
| "grad_norm": 0.12237526476383209, | |
| "learning_rate": 1.9005847953216373e-06, | |
| "loss": 0.006, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_loss": 0.3180083632469177, | |
| "eval_runtime": 18.1825, | |
| "eval_samples_per_second": 7.48, | |
| "eval_steps_per_second": 0.935, | |
| "step": 3672 | |
| }, | |
| { | |
| "epoch": 96.20915032679738, | |
| "grad_norm": 4.133842468261719, | |
| "learning_rate": 1.7543859649122807e-06, | |
| "loss": 0.0876, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 96.47058823529412, | |
| "grad_norm": 14.3917236328125, | |
| "learning_rate": 1.608187134502924e-06, | |
| "loss": 0.0033, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 96.73202614379085, | |
| "grad_norm": 0.6327334642410278, | |
| "learning_rate": 1.4619883040935671e-06, | |
| "loss": 0.0045, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 96.99346405228758, | |
| "grad_norm": 0.47620221972465515, | |
| "learning_rate": 1.3157894736842106e-06, | |
| "loss": 0.0172, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 96.99346405228758, | |
| "eval_accuracy": 0.9779411764705882, | |
| "eval_loss": 0.3103199303150177, | |
| "eval_runtime": 17.4264, | |
| "eval_samples_per_second": 7.804, | |
| "eval_steps_per_second": 0.976, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 97.25490196078431, | |
| "grad_norm": 43.838233947753906, | |
| "learning_rate": 1.1695906432748538e-06, | |
| "loss": 0.0047, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 97.51633986928104, | |
| "grad_norm": 0.001560373231768608, | |
| "learning_rate": 1.0233918128654972e-06, | |
| "loss": 0.0032, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 97.77777777777777, | |
| "grad_norm": 0.00045679722097702324, | |
| "learning_rate": 8.771929824561404e-07, | |
| "loss": 0.0109, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 97.98692810457516, | |
| "eval_accuracy": 0.9779411764705882, | |
| "eval_loss": 0.3034810721874237, | |
| "eval_runtime": 18.06, | |
| "eval_samples_per_second": 7.53, | |
| "eval_steps_per_second": 0.941, | |
| "step": 3748 | |
| }, | |
| { | |
| "epoch": 98.03921568627452, | |
| "grad_norm": 0.0029410182032734156, | |
| "learning_rate": 7.309941520467836e-07, | |
| "loss": 0.0093, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 98.30065359477125, | |
| "grad_norm": 0.060371335595846176, | |
| "learning_rate": 5.847953216374269e-07, | |
| "loss": 0.0147, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 98.56209150326798, | |
| "grad_norm": 0.0018022909061983228, | |
| "learning_rate": 4.385964912280702e-07, | |
| "loss": 0.0325, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 98.82352941176471, | |
| "grad_norm": 0.866423487663269, | |
| "learning_rate": 2.9239766081871344e-07, | |
| "loss": 0.0172, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 98.98039215686275, | |
| "eval_accuracy": 0.9779411764705882, | |
| "eval_loss": 0.3034467101097107, | |
| "eval_runtime": 20.5056, | |
| "eval_samples_per_second": 6.632, | |
| "eval_steps_per_second": 0.829, | |
| "step": 3786 | |
| }, | |
| { | |
| "epoch": 99.08496732026144, | |
| "grad_norm": 0.015289215371012688, | |
| "learning_rate": 1.4619883040935672e-07, | |
| "loss": 0.0003, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 99.34640522875817, | |
| "grad_norm": 0.3536844849586487, | |
| "learning_rate": 0.0, | |
| "loss": 0.0219, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 99.34640522875817, | |
| "eval_accuracy": 0.9779411764705882, | |
| "eval_loss": 0.3036399185657501, | |
| "eval_runtime": 18.1299, | |
| "eval_samples_per_second": 7.501, | |
| "eval_steps_per_second": 0.938, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 99.34640522875817, | |
| "step": 3800, | |
| "total_flos": 3.0228260830838784e+18, | |
| "train_loss": 0.1524556069365874, | |
| "train_runtime": 23400.6351, | |
| "train_samples_per_second": 5.231, | |
| "train_steps_per_second": 0.162 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3800, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.0228260830838784e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |