| { | |
| "best_metric": 0.8090909090909091, | |
| "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-Diabetic-Retinopathy-DA\\checkpoint-506", | |
| "epoch": 40.0, | |
| "eval_steps": 500, | |
| "global_step": 920, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 5.4347826086956525e-06, | |
| "loss": 1.6086, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.0869565217391305e-05, | |
| "loss": 1.5987, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.4909090909090909, | |
| "eval_loss": 1.5683298110961914, | |
| "eval_runtime": 1.0793, | |
| "eval_samples_per_second": 101.92, | |
| "eval_steps_per_second": 3.706, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.630434782608696e-05, | |
| "loss": 1.5503, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 1.4137, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.4909090909090909, | |
| "eval_loss": 1.263899564743042, | |
| "eval_runtime": 0.6653, | |
| "eval_samples_per_second": 165.342, | |
| "eval_steps_per_second": 6.012, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 2.7173913043478262e-05, | |
| "loss": 1.3316, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 3.260869565217392e-05, | |
| "loss": 1.1988, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7636363636363637, | |
| "eval_loss": 0.8725916743278503, | |
| "eval_runtime": 0.6051, | |
| "eval_samples_per_second": 181.777, | |
| "eval_steps_per_second": 6.61, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 3.804347826086957e-05, | |
| "loss": 1.058, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 4.347826086956522e-05, | |
| "loss": 0.9368, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 4.891304347826087e-05, | |
| "loss": 0.8533, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7545454545454545, | |
| "eval_loss": 0.6361170411109924, | |
| "eval_runtime": 0.6021, | |
| "eval_samples_per_second": 182.679, | |
| "eval_steps_per_second": 6.643, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 4.9516908212560386e-05, | |
| "loss": 0.852, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 4.891304347826087e-05, | |
| "loss": 0.8042, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7545454545454545, | |
| "eval_loss": 0.5984649658203125, | |
| "eval_runtime": 0.6091, | |
| "eval_samples_per_second": 180.58, | |
| "eval_steps_per_second": 6.567, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 4.830917874396135e-05, | |
| "loss": 0.7697, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 4.770531400966184e-05, | |
| "loss": 0.7349, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7545454545454545, | |
| "eval_loss": 0.5943260788917542, | |
| "eval_runtime": 0.6065, | |
| "eval_samples_per_second": 181.376, | |
| "eval_steps_per_second": 6.595, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 4.710144927536232e-05, | |
| "loss": 0.7623, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 4.64975845410628e-05, | |
| "loss": 0.7249, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 4.589371980676328e-05, | |
| "loss": 0.7003, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7636363636363637, | |
| "eval_loss": 0.5177913904190063, | |
| "eval_runtime": 0.6592, | |
| "eval_samples_per_second": 166.881, | |
| "eval_steps_per_second": 6.068, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 4.528985507246377e-05, | |
| "loss": 0.6755, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 4.4685990338164255e-05, | |
| "loss": 0.6641, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7545454545454545, | |
| "eval_loss": 0.5058346390724182, | |
| "eval_runtime": 0.6561, | |
| "eval_samples_per_second": 167.65, | |
| "eval_steps_per_second": 6.096, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 4.408212560386474e-05, | |
| "loss": 0.6263, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 4.347826086956522e-05, | |
| "loss": 0.641, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.5091794729232788, | |
| "eval_runtime": 0.6128, | |
| "eval_samples_per_second": 179.502, | |
| "eval_steps_per_second": 6.527, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 4.2874396135265707e-05, | |
| "loss": 0.6213, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 4.2270531400966186e-05, | |
| "loss": 0.599, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.6571, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7636363636363637, | |
| "eval_loss": 0.5319333076477051, | |
| "eval_runtime": 0.6289, | |
| "eval_samples_per_second": 174.897, | |
| "eval_steps_per_second": 6.36, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 4.106280193236715e-05, | |
| "loss": 0.626, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 10.87, | |
| "learning_rate": 4.045893719806764e-05, | |
| "loss": 0.6522, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.5725868344306946, | |
| "eval_runtime": 0.7147, | |
| "eval_samples_per_second": 153.916, | |
| "eval_steps_per_second": 5.597, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 11.3, | |
| "learning_rate": 3.985507246376812e-05, | |
| "loss": 0.5859, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 11.74, | |
| "learning_rate": 3.92512077294686e-05, | |
| "loss": 0.5659, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.5489825010299683, | |
| "eval_runtime": 0.6332, | |
| "eval_samples_per_second": 173.73, | |
| "eval_steps_per_second": 6.317, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "learning_rate": 3.864734299516908e-05, | |
| "loss": 0.556, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 12.61, | |
| "learning_rate": 3.804347826086957e-05, | |
| "loss": 0.5511, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.8, | |
| "eval_loss": 0.546451210975647, | |
| "eval_runtime": 0.6251, | |
| "eval_samples_per_second": 175.96, | |
| "eval_steps_per_second": 6.399, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "learning_rate": 3.743961352657005e-05, | |
| "loss": 0.5614, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 13.48, | |
| "learning_rate": 3.6835748792270534e-05, | |
| "loss": 0.5552, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 13.91, | |
| "learning_rate": 3.6231884057971014e-05, | |
| "loss": 0.5435, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.5727524757385254, | |
| "eval_runtime": 0.6272, | |
| "eval_samples_per_second": 175.393, | |
| "eval_steps_per_second": 6.378, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "learning_rate": 3.56280193236715e-05, | |
| "loss": 0.5447, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "learning_rate": 3.502415458937198e-05, | |
| "loss": 0.5259, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7636363636363637, | |
| "eval_loss": 0.6047121286392212, | |
| "eval_runtime": 0.6091, | |
| "eval_samples_per_second": 180.583, | |
| "eval_steps_per_second": 6.567, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 15.22, | |
| "learning_rate": 3.4420289855072465e-05, | |
| "loss": 0.507, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "learning_rate": 3.381642512077295e-05, | |
| "loss": 0.5496, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7818181818181819, | |
| "eval_loss": 0.6479418873786926, | |
| "eval_runtime": 0.6141, | |
| "eval_samples_per_second": 179.113, | |
| "eval_steps_per_second": 6.513, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 16.09, | |
| "learning_rate": 3.321256038647343e-05, | |
| "loss": 0.5197, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 16.52, | |
| "learning_rate": 3.260869565217392e-05, | |
| "loss": 0.4831, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "learning_rate": 3.2004830917874396e-05, | |
| "loss": 0.543, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.6039574146270752, | |
| "eval_runtime": 0.6076, | |
| "eval_samples_per_second": 181.028, | |
| "eval_steps_per_second": 6.583, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "learning_rate": 3.140096618357488e-05, | |
| "loss": 0.4882, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 17.83, | |
| "learning_rate": 3.079710144927536e-05, | |
| "loss": 0.4646, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7818181818181819, | |
| "eval_loss": 0.6269252896308899, | |
| "eval_runtime": 0.6351, | |
| "eval_samples_per_second": 173.193, | |
| "eval_steps_per_second": 6.298, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 18.26, | |
| "learning_rate": 3.0193236714975848e-05, | |
| "loss": 0.4597, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 18.7, | |
| "learning_rate": 2.9589371980676327e-05, | |
| "loss": 0.4867, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.6535181403160095, | |
| "eval_runtime": 0.6591, | |
| "eval_samples_per_second": 166.882, | |
| "eval_steps_per_second": 6.068, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 19.13, | |
| "learning_rate": 2.8985507246376814e-05, | |
| "loss": 0.4751, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 19.57, | |
| "learning_rate": 2.8381642512077293e-05, | |
| "loss": 0.4354, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.4357, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.6990672945976257, | |
| "eval_runtime": 0.6056, | |
| "eval_samples_per_second": 181.624, | |
| "eval_steps_per_second": 6.605, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 20.43, | |
| "learning_rate": 2.7173913043478262e-05, | |
| "loss": 0.4275, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 20.87, | |
| "learning_rate": 2.6570048309178748e-05, | |
| "loss": 0.4392, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7636363636363637, | |
| "eval_loss": 0.7126674056053162, | |
| "eval_runtime": 0.6271, | |
| "eval_samples_per_second": 175.399, | |
| "eval_steps_per_second": 6.378, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 21.3, | |
| "learning_rate": 2.5966183574879227e-05, | |
| "loss": 0.4595, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 21.74, | |
| "learning_rate": 2.5362318840579714e-05, | |
| "loss": 0.4403, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.8090909090909091, | |
| "eval_loss": 0.6974316239356995, | |
| "eval_runtime": 0.6812, | |
| "eval_samples_per_second": 161.491, | |
| "eval_steps_per_second": 5.872, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 22.17, | |
| "learning_rate": 2.4758454106280193e-05, | |
| "loss": 0.4305, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 22.61, | |
| "learning_rate": 2.4154589371980676e-05, | |
| "loss": 0.4358, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7818181818181819, | |
| "eval_loss": 0.688274085521698, | |
| "eval_runtime": 0.6268, | |
| "eval_samples_per_second": 175.487, | |
| "eval_steps_per_second": 6.381, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 23.04, | |
| "learning_rate": 2.355072463768116e-05, | |
| "loss": 0.4054, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 23.48, | |
| "learning_rate": 2.294685990338164e-05, | |
| "loss": 0.4162, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 23.91, | |
| "learning_rate": 2.2342995169082127e-05, | |
| "loss": 0.4094, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.8, | |
| "eval_loss": 0.6768017411231995, | |
| "eval_runtime": 0.6762, | |
| "eval_samples_per_second": 162.682, | |
| "eval_steps_per_second": 5.916, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 24.35, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 0.3892, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 24.78, | |
| "learning_rate": 2.1135265700483093e-05, | |
| "loss": 0.3913, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7636363636363637, | |
| "eval_loss": 0.7269611954689026, | |
| "eval_runtime": 0.6222, | |
| "eval_samples_per_second": 176.803, | |
| "eval_steps_per_second": 6.429, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 25.22, | |
| "learning_rate": 2.0531400966183576e-05, | |
| "loss": 0.3916, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 25.65, | |
| "learning_rate": 1.992753623188406e-05, | |
| "loss": 0.3686, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.7104293704032898, | |
| "eval_runtime": 0.6272, | |
| "eval_samples_per_second": 175.395, | |
| "eval_steps_per_second": 6.378, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 26.09, | |
| "learning_rate": 1.932367149758454e-05, | |
| "loss": 0.4003, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 26.52, | |
| "learning_rate": 1.8719806763285024e-05, | |
| "loss": 0.3857, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 26.96, | |
| "learning_rate": 1.8115942028985507e-05, | |
| "loss": 0.3679, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7818181818181819, | |
| "eval_loss": 0.7115088701248169, | |
| "eval_runtime": 0.6281, | |
| "eval_samples_per_second": 175.12, | |
| "eval_steps_per_second": 6.368, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 27.39, | |
| "learning_rate": 1.751207729468599e-05, | |
| "loss": 0.3723, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 27.83, | |
| "learning_rate": 1.6908212560386476e-05, | |
| "loss": 0.378, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.8090909090909091, | |
| "eval_loss": 0.8020210862159729, | |
| "eval_runtime": 0.6672, | |
| "eval_samples_per_second": 164.88, | |
| "eval_steps_per_second": 5.996, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 28.26, | |
| "learning_rate": 1.630434782608696e-05, | |
| "loss": 0.3979, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 28.7, | |
| "learning_rate": 1.570048309178744e-05, | |
| "loss": 0.3583, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.7524499893188477, | |
| "eval_runtime": 0.6692, | |
| "eval_samples_per_second": 164.387, | |
| "eval_steps_per_second": 5.978, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 29.13, | |
| "learning_rate": 1.5096618357487924e-05, | |
| "loss": 0.3708, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 29.57, | |
| "learning_rate": 1.4492753623188407e-05, | |
| "loss": 0.3351, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.3299, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.7783340215682983, | |
| "eval_runtime": 0.6563, | |
| "eval_samples_per_second": 167.609, | |
| "eval_steps_per_second": 6.095, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 30.43, | |
| "learning_rate": 1.3285024154589374e-05, | |
| "loss": 0.3476, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 30.87, | |
| "learning_rate": 1.2681159420289857e-05, | |
| "loss": 0.3672, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.8193163871765137, | |
| "eval_runtime": 0.6541, | |
| "eval_samples_per_second": 168.158, | |
| "eval_steps_per_second": 6.115, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 31.3, | |
| "learning_rate": 1.2077294685990338e-05, | |
| "loss": 0.3257, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 31.74, | |
| "learning_rate": 1.147342995169082e-05, | |
| "loss": 0.3567, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.809545636177063, | |
| "eval_runtime": 0.6397, | |
| "eval_samples_per_second": 171.969, | |
| "eval_steps_per_second": 6.253, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 32.17, | |
| "learning_rate": 1.0869565217391305e-05, | |
| "loss": 0.32, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 32.61, | |
| "learning_rate": 1.0265700483091788e-05, | |
| "loss": 0.3585, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.8323901295661926, | |
| "eval_runtime": 0.6281, | |
| "eval_samples_per_second": 175.12, | |
| "eval_steps_per_second": 6.368, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 33.04, | |
| "learning_rate": 9.66183574879227e-06, | |
| "loss": 0.3212, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 33.48, | |
| "learning_rate": 9.057971014492753e-06, | |
| "loss": 0.3189, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 33.91, | |
| "learning_rate": 8.454106280193238e-06, | |
| "loss": 0.3191, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.8041682243347168, | |
| "eval_runtime": 0.6299, | |
| "eval_samples_per_second": 174.635, | |
| "eval_steps_per_second": 6.35, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 34.35, | |
| "learning_rate": 7.85024154589372e-06, | |
| "loss": 0.3019, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 34.78, | |
| "learning_rate": 7.246376811594203e-06, | |
| "loss": 0.3144, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.8189137578010559, | |
| "eval_runtime": 0.6467, | |
| "eval_samples_per_second": 170.106, | |
| "eval_steps_per_second": 6.186, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 35.22, | |
| "learning_rate": 6.642512077294687e-06, | |
| "loss": 0.333, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 35.65, | |
| "learning_rate": 6.038647342995169e-06, | |
| "loss": 0.3452, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.8377164006233215, | |
| "eval_runtime": 0.6036, | |
| "eval_samples_per_second": 182.227, | |
| "eval_steps_per_second": 6.626, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 36.09, | |
| "learning_rate": 5.4347826086956525e-06, | |
| "loss": 0.2989, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 36.52, | |
| "learning_rate": 4.830917874396135e-06, | |
| "loss": 0.2819, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 36.96, | |
| "learning_rate": 4.227053140096619e-06, | |
| "loss": 0.3263, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.8204471468925476, | |
| "eval_runtime": 0.6006, | |
| "eval_samples_per_second": 183.137, | |
| "eval_steps_per_second": 6.66, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 37.39, | |
| "learning_rate": 3.6231884057971017e-06, | |
| "loss": 0.3016, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 37.83, | |
| "learning_rate": 3.0193236714975845e-06, | |
| "loss": 0.2939, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.7909090909090909, | |
| "eval_loss": 0.810295581817627, | |
| "eval_runtime": 0.6091, | |
| "eval_samples_per_second": 180.583, | |
| "eval_steps_per_second": 6.567, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 38.26, | |
| "learning_rate": 2.4154589371980677e-06, | |
| "loss": 0.2872, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 38.7, | |
| "learning_rate": 1.8115942028985508e-06, | |
| "loss": 0.3152, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.7818181818181819, | |
| "eval_loss": 0.8183740973472595, | |
| "eval_runtime": 0.6091, | |
| "eval_samples_per_second": 180.583, | |
| "eval_steps_per_second": 6.567, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 39.13, | |
| "learning_rate": 1.2077294685990338e-06, | |
| "loss": 0.3059, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 39.57, | |
| "learning_rate": 6.038647342995169e-07, | |
| "loss": 0.3041, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.2787, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.7818181818181819, | |
| "eval_loss": 0.8240975141525269, | |
| "eval_runtime": 0.6032, | |
| "eval_samples_per_second": 182.376, | |
| "eval_steps_per_second": 6.632, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "step": 920, | |
| "total_flos": 3.825055592868741e+18, | |
| "train_loss": 0.5426292188789533, | |
| "train_runtime": 1410.8495, | |
| "train_samples_per_second": 83.326, | |
| "train_steps_per_second": 0.652 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 920, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "total_flos": 3.825055592868741e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |