| { |
| "best_metric": 0.4581826627254486, |
| "best_model_checkpoint": "./vit-base-HAM-10000-sharpened-large-patch-32/checkpoint-1300", |
| "epoch": 4.0, |
| "global_step": 2004, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019900199600798404, |
| "loss": 1.2883, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019800399201596808, |
| "loss": 1.0071, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0001970059880239521, |
| "loss": 1.0821, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019600798403193614, |
| "loss": 1.1389, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019500998003992018, |
| "loss": 0.932, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019401197604790419, |
| "loss": 0.7723, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019301397205588822, |
| "loss": 0.8736, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00019201596806387228, |
| "loss": 0.7789, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0001910179640718563, |
| "loss": 0.6811, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00019001996007984032, |
| "loss": 0.6739, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_accuracy": 0.7256857855361596, |
| "eval_f1": 0.7256857855361597, |
| "eval_loss": 0.7774831652641296, |
| "eval_precision": 0.7256857855361596, |
| "eval_recall": 0.7256857855361596, |
| "eval_runtime": 38.3528, |
| "eval_samples_per_second": 52.278, |
| "eval_steps_per_second": 6.545, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00018902195608782435, |
| "loss": 0.8604, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0001880239520958084, |
| "loss": 0.7292, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00018702594810379242, |
| "loss": 0.7045, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00018602794411177646, |
| "loss": 0.838, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0001850299401197605, |
| "loss": 0.7668, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00018403193612774452, |
| "loss": 0.745, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00018303393213572856, |
| "loss": 0.8483, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00018203592814371256, |
| "loss": 0.6595, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00018103792415169662, |
| "loss": 0.7186, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00018003992015968066, |
| "loss": 0.6922, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_accuracy": 0.771072319201995, |
| "eval_f1": 0.771072319201995, |
| "eval_loss": 0.645542562007904, |
| "eval_precision": 0.771072319201995, |
| "eval_recall": 0.771072319201995, |
| "eval_runtime": 38.7638, |
| "eval_samples_per_second": 51.724, |
| "eval_steps_per_second": 6.475, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0001790419161676647, |
| "loss": 0.665, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001780439121756487, |
| "loss": 0.9191, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00017704590818363273, |
| "loss": 0.7978, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0001760479041916168, |
| "loss": 0.6338, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00017504990019960083, |
| "loss": 0.8688, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00017405189620758483, |
| "loss": 0.8124, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00017305389221556887, |
| "loss": 0.5754, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0001720558882235529, |
| "loss": 0.5264, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00017105788423153693, |
| "loss": 0.6262, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00017005988023952097, |
| "loss": 0.8219, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_accuracy": 0.742643391521197, |
| "eval_f1": 0.742643391521197, |
| "eval_loss": 0.7582290768623352, |
| "eval_precision": 0.742643391521197, |
| "eval_recall": 0.742643391521197, |
| "eval_runtime": 38.0282, |
| "eval_samples_per_second": 52.724, |
| "eval_steps_per_second": 6.6, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.000169061876247505, |
| "loss": 0.5713, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00016806387225548904, |
| "loss": 0.7999, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00016706586826347307, |
| "loss": 0.7414, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00016606786427145708, |
| "loss": 0.6376, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00016506986027944114, |
| "loss": 0.539, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00016407185628742517, |
| "loss": 0.637, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00016307385229540918, |
| "loss": 0.6501, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0001620758483033932, |
| "loss": 0.444, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00016107784431137724, |
| "loss": 0.6798, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0001600798403193613, |
| "loss": 0.6801, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_accuracy": 0.7650872817955112, |
| "eval_f1": 0.7650872817955112, |
| "eval_loss": 0.6363049149513245, |
| "eval_precision": 0.7650872817955112, |
| "eval_recall": 0.7650872817955112, |
| "eval_runtime": 38.6305, |
| "eval_samples_per_second": 51.902, |
| "eval_steps_per_second": 6.497, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0001590818363273453, |
| "loss": 0.7903, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00015808383233532935, |
| "loss": 0.6096, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00015708582834331338, |
| "loss": 0.6847, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0001560878243512974, |
| "loss": 0.7761, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.00015508982035928145, |
| "loss": 0.6405, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00015409181636726548, |
| "loss": 0.6396, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00015309381237524951, |
| "loss": 0.7242, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00015209580838323355, |
| "loss": 0.6011, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00015109780439121756, |
| "loss": 0.6377, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0001500998003992016, |
| "loss": 0.5499, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.7750623441396509, |
| "eval_f1": 0.7750623441396509, |
| "eval_loss": 0.6230655312538147, |
| "eval_precision": 0.7750623441396509, |
| "eval_recall": 0.7750623441396509, |
| "eval_runtime": 38.229, |
| "eval_samples_per_second": 52.447, |
| "eval_steps_per_second": 6.566, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00014910179640718565, |
| "loss": 0.5537, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00014810379241516968, |
| "loss": 0.5257, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.0001471057884231537, |
| "loss": 0.5025, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00014610778443113772, |
| "loss": 0.375, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00014510978043912176, |
| "loss": 0.6301, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00014411177644710582, |
| "loss": 0.549, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00014311377245508983, |
| "loss": 0.5298, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00014211576846307386, |
| "loss": 0.5017, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.0001411177644710579, |
| "loss": 0.344, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00014011976047904193, |
| "loss": 0.5156, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.2, |
| "eval_accuracy": 0.7760598503740649, |
| "eval_f1": 0.7760598503740649, |
| "eval_loss": 0.6398969292640686, |
| "eval_precision": 0.7760598503740649, |
| "eval_recall": 0.7760598503740649, |
| "eval_runtime": 38.1403, |
| "eval_samples_per_second": 52.569, |
| "eval_steps_per_second": 6.581, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00013912175648702593, |
| "loss": 0.4876, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00013812375249501, |
| "loss": 0.4899, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00013712574850299403, |
| "loss": 0.4315, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00013612774451097806, |
| "loss": 0.5865, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00013512974051896207, |
| "loss": 0.4416, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.0001341317365269461, |
| "loss": 0.5614, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00013313373253493016, |
| "loss": 0.5993, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.0001321357285429142, |
| "loss": 0.4292, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.0001311377245508982, |
| "loss": 0.5711, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00013013972055888224, |
| "loss": 0.4478, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.4, |
| "eval_accuracy": 0.801995012468828, |
| "eval_f1": 0.801995012468828, |
| "eval_loss": 0.5324347615242004, |
| "eval_precision": 0.801995012468828, |
| "eval_recall": 0.801995012468828, |
| "eval_runtime": 37.9285, |
| "eval_samples_per_second": 52.863, |
| "eval_steps_per_second": 6.618, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00012914171656686627, |
| "loss": 0.4795, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.0001281437125748503, |
| "loss": 0.5054, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00012714570858283434, |
| "loss": 0.4268, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.00012614770459081837, |
| "loss": 0.4477, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.0001251497005988024, |
| "loss": 0.5392, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 0.00012415169660678644, |
| "loss": 0.4299, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 0.00012315369261477045, |
| "loss": 0.3193, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.0001221556886227545, |
| "loss": 0.5999, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 0.00012115768463073853, |
| "loss": 0.5319, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 0.00012015968063872256, |
| "loss": 0.4364, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_accuracy": 0.7970074812967581, |
| "eval_f1": 0.797007481296758, |
| "eval_loss": 0.5596903562545776, |
| "eval_precision": 0.7970074812967581, |
| "eval_recall": 0.7970074812967581, |
| "eval_runtime": 38.1531, |
| "eval_samples_per_second": 52.551, |
| "eval_steps_per_second": 6.579, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 0.0001191616766467066, |
| "loss": 0.453, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 0.00011816367265469061, |
| "loss": 0.3049, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 0.00011716566866267466, |
| "loss": 0.5135, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 0.0001161676646706587, |
| "loss": 0.54, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 0.00011516966067864272, |
| "loss": 0.425, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.00011417165668662675, |
| "loss": 0.378, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 0.00011317365269461077, |
| "loss": 0.5278, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 0.00011217564870259483, |
| "loss": 0.4983, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 0.00011117764471057885, |
| "loss": 0.489, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 0.00011017964071856288, |
| "loss": 0.4545, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.8, |
| "eval_accuracy": 0.8114713216957606, |
| "eval_f1": 0.8114713216957605, |
| "eval_loss": 0.5212180018424988, |
| "eval_precision": 0.8114713216957606, |
| "eval_recall": 0.8114713216957606, |
| "eval_runtime": 37.2951, |
| "eval_samples_per_second": 53.76, |
| "eval_steps_per_second": 6.73, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 0.0001091816367265469, |
| "loss": 0.4246, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 0.00010818363273453094, |
| "loss": 0.4826, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 0.00010718562874251496, |
| "loss": 0.3276, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 0.00010618762475049902, |
| "loss": 0.4456, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 0.00010518962075848304, |
| "loss": 0.3797, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 0.00010419161676646707, |
| "loss": 0.4399, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 0.0001031936127744511, |
| "loss": 0.5069, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 0.00010219560878243513, |
| "loss": 0.4422, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 0.00010119760479041918, |
| "loss": 0.4464, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 0.00010019960079840321, |
| "loss": 0.4294, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.8264339152119701, |
| "eval_f1": 0.8264339152119702, |
| "eval_loss": 0.49257662892341614, |
| "eval_precision": 0.8264339152119701, |
| "eval_recall": 0.8264339152119701, |
| "eval_runtime": 37.6717, |
| "eval_samples_per_second": 53.223, |
| "eval_steps_per_second": 6.663, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 9.920159680638723e-05, |
| "loss": 0.3227, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 9.820359281437126e-05, |
| "loss": 0.1965, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 9.72055888223553e-05, |
| "loss": 0.2966, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 9.620758483033933e-05, |
| "loss": 0.2862, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 9.520958083832335e-05, |
| "loss": 0.1773, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 9.42115768463074e-05, |
| "loss": 0.2001, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 9.321357285429142e-05, |
| "loss": 0.2323, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 9.221556886227547e-05, |
| "loss": 0.2046, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 9.121756487025949e-05, |
| "loss": 0.2135, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 9.021956087824352e-05, |
| "loss": 0.135, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.2, |
| "eval_accuracy": 0.8204488778054863, |
| "eval_f1": 0.8204488778054863, |
| "eval_loss": 0.5448276400566101, |
| "eval_precision": 0.8204488778054863, |
| "eval_recall": 0.8204488778054863, |
| "eval_runtime": 37.5323, |
| "eval_samples_per_second": 53.421, |
| "eval_steps_per_second": 6.688, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 8.922155688622755e-05, |
| "loss": 0.2832, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 8.822355289421159e-05, |
| "loss": 0.2335, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 8.722554890219561e-05, |
| "loss": 0.2144, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 8.622754491017964e-05, |
| "loss": 0.226, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 8.522954091816367e-05, |
| "loss": 0.3022, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 8.423153692614771e-05, |
| "loss": 0.3257, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 8.323353293413174e-05, |
| "loss": 0.226, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 8.223552894211578e-05, |
| "loss": 0.2909, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 8.123752495009981e-05, |
| "loss": 0.3035, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 8.023952095808383e-05, |
| "loss": 0.2628, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.4, |
| "eval_accuracy": 0.830423940149626, |
| "eval_f1": 0.830423940149626, |
| "eval_loss": 0.4915546178817749, |
| "eval_precision": 0.830423940149626, |
| "eval_recall": 0.830423940149626, |
| "eval_runtime": 37.915, |
| "eval_samples_per_second": 52.881, |
| "eval_steps_per_second": 6.62, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 7.924151696606786e-05, |
| "loss": 0.2807, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 7.82435129740519e-05, |
| "loss": 0.2288, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 7.724550898203593e-05, |
| "loss": 0.2201, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 7.624750499001996e-05, |
| "loss": 0.2661, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 7.5249500998004e-05, |
| "loss": 0.1678, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 7.425149700598802e-05, |
| "loss": 0.22, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 7.325349301397207e-05, |
| "loss": 0.2717, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 7.225548902195609e-05, |
| "loss": 0.2301, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 7.125748502994012e-05, |
| "loss": 0.1739, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 7.025948103792415e-05, |
| "loss": 0.2577, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.59, |
| "eval_accuracy": 0.8403990024937655, |
| "eval_f1": 0.8403990024937654, |
| "eval_loss": 0.4581826627254486, |
| "eval_precision": 0.8403990024937655, |
| "eval_recall": 0.8403990024937655, |
| "eval_runtime": 37.5405, |
| "eval_samples_per_second": 53.409, |
| "eval_steps_per_second": 6.686, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 6.926147704590819e-05, |
| "loss": 0.2621, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 6.826347305389222e-05, |
| "loss": 0.1563, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 6.726546906187626e-05, |
| "loss": 0.2372, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 6.626746506986028e-05, |
| "loss": 0.1489, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 6.526946107784432e-05, |
| "loss": 0.2604, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 6.427145708582834e-05, |
| "loss": 0.2094, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 6.327345309381238e-05, |
| "loss": 0.2793, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 6.227544910179641e-05, |
| "loss": 0.1381, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 6.127744510978044e-05, |
| "loss": 0.244, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 6.027944111776448e-05, |
| "loss": 0.2093, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.79, |
| "eval_accuracy": 0.8344139650872818, |
| "eval_f1": 0.8344139650872819, |
| "eval_loss": 0.5079377889633179, |
| "eval_precision": 0.8344139650872818, |
| "eval_recall": 0.8344139650872818, |
| "eval_runtime": 37.8246, |
| "eval_samples_per_second": 53.008, |
| "eval_steps_per_second": 6.636, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 5.9281437125748505e-05, |
| "loss": 0.1457, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 5.828343313373253e-05, |
| "loss": 0.1364, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 5.728542914171657e-05, |
| "loss": 0.3349, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 5.62874251497006e-05, |
| "loss": 0.2107, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 5.5289421157684626e-05, |
| "loss": 0.276, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 5.429141716566867e-05, |
| "loss": 0.2177, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 5.3293413173652694e-05, |
| "loss": 0.1916, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 5.2295409181636734e-05, |
| "loss": 0.2323, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 5.129740518962076e-05, |
| "loss": 0.2431, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 5.029940119760479e-05, |
| "loss": 0.1415, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.99, |
| "eval_accuracy": 0.8438902743142145, |
| "eval_f1": 0.8438902743142145, |
| "eval_loss": 0.47602903842926025, |
| "eval_precision": 0.8438902743142145, |
| "eval_recall": 0.8438902743142145, |
| "eval_runtime": 37.8786, |
| "eval_samples_per_second": 52.932, |
| "eval_steps_per_second": 6.626, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.01, |
| "learning_rate": 4.930139720558882e-05, |
| "loss": 0.0882, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 4.8303393213572856e-05, |
| "loss": 0.0716, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 4.730538922155689e-05, |
| "loss": 0.0771, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.07, |
| "learning_rate": 4.630738522954092e-05, |
| "loss": 0.0552, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 4.530938123752495e-05, |
| "loss": 0.0835, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 4.4311377245508984e-05, |
| "loss": 0.0618, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 4.331337325349302e-05, |
| "loss": 0.0968, |
| "step": 1570 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 4.231536926147705e-05, |
| "loss": 0.069, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 4.131736526946108e-05, |
| "loss": 0.0743, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 4.031936127744511e-05, |
| "loss": 0.0686, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.19, |
| "eval_accuracy": 0.8443890274314214, |
| "eval_f1": 0.8443890274314214, |
| "eval_loss": 0.5378904938697815, |
| "eval_precision": 0.8443890274314214, |
| "eval_recall": 0.8443890274314214, |
| "eval_runtime": 38.0031, |
| "eval_samples_per_second": 52.759, |
| "eval_steps_per_second": 6.605, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.21, |
| "learning_rate": 3.9321357285429146e-05, |
| "loss": 0.0419, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.23, |
| "learning_rate": 3.832335329341318e-05, |
| "loss": 0.1127, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 3.732534930139721e-05, |
| "loss": 0.0298, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 3.632734530938124e-05, |
| "loss": 0.0502, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.29, |
| "learning_rate": 3.5329341317365274e-05, |
| "loss": 0.0558, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.31, |
| "learning_rate": 3.433133732534931e-05, |
| "loss": 0.0676, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.0565, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 3.233532934131737e-05, |
| "loss": 0.0362, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 3.13373253493014e-05, |
| "loss": 0.0209, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.39, |
| "learning_rate": 3.0339321357285433e-05, |
| "loss": 0.1031, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.39, |
| "eval_accuracy": 0.8384039900249377, |
| "eval_f1": 0.8384039900249377, |
| "eval_loss": 0.557210385799408, |
| "eval_precision": 0.8384039900249377, |
| "eval_recall": 0.8384039900249377, |
| "eval_runtime": 37.4452, |
| "eval_samples_per_second": 53.545, |
| "eval_steps_per_second": 6.703, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.41, |
| "learning_rate": 2.934131736526946e-05, |
| "loss": 0.0791, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 2.8343313373253494e-05, |
| "loss": 0.0941, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 2.7345309381237527e-05, |
| "loss": 0.0674, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.47, |
| "learning_rate": 2.634730538922156e-05, |
| "loss": 0.0417, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.49, |
| "learning_rate": 2.5349301397205588e-05, |
| "loss": 0.0647, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.51, |
| "learning_rate": 2.4351297405189622e-05, |
| "loss": 0.1045, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.53, |
| "learning_rate": 2.3353293413173656e-05, |
| "loss": 0.1059, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 2.2355289421157686e-05, |
| "loss": 0.0254, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 2.135728542914172e-05, |
| "loss": 0.0636, |
| "step": 1790 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 2.035928143712575e-05, |
| "loss": 0.102, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.59, |
| "eval_accuracy": 0.8463840399002494, |
| "eval_f1": 0.8463840399002494, |
| "eval_loss": 0.5342676639556885, |
| "eval_precision": 0.8463840399002494, |
| "eval_recall": 0.8463840399002494, |
| "eval_runtime": 37.4586, |
| "eval_samples_per_second": 53.526, |
| "eval_steps_per_second": 6.701, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.61, |
| "learning_rate": 1.9361277445109784e-05, |
| "loss": 0.0378, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 1.8363273453093814e-05, |
| "loss": 0.0388, |
| "step": 1820 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 1.7365269461077845e-05, |
| "loss": 0.0193, |
| "step": 1830 |
| }, |
| { |
| "epoch": 3.67, |
| "learning_rate": 1.6367265469061875e-05, |
| "loss": 0.0341, |
| "step": 1840 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 1.536926147704591e-05, |
| "loss": 0.0349, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 1.437125748502994e-05, |
| "loss": 0.0203, |
| "step": 1860 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 1.3373253493013973e-05, |
| "loss": 0.0903, |
| "step": 1870 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 1.2375249500998005e-05, |
| "loss": 0.0418, |
| "step": 1880 |
| }, |
| { |
| "epoch": 3.77, |
| "learning_rate": 1.1377245508982035e-05, |
| "loss": 0.0682, |
| "step": 1890 |
| }, |
| { |
| "epoch": 3.79, |
| "learning_rate": 1.0379241516966067e-05, |
| "loss": 0.0531, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.79, |
| "eval_accuracy": 0.8478802992518704, |
| "eval_f1": 0.8478802992518704, |
| "eval_loss": 0.5481980443000793, |
| "eval_precision": 0.8478802992518704, |
| "eval_recall": 0.8478802992518704, |
| "eval_runtime": 38.2352, |
| "eval_samples_per_second": 52.439, |
| "eval_steps_per_second": 6.565, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.81, |
| "learning_rate": 9.3812375249501e-06, |
| "loss": 0.0868, |
| "step": 1910 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 8.383233532934131e-06, |
| "loss": 0.0404, |
| "step": 1920 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 7.3852295409181634e-06, |
| "loss": 0.0331, |
| "step": 1930 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 6.3872255489021955e-06, |
| "loss": 0.0773, |
| "step": 1940 |
| }, |
| { |
| "epoch": 3.89, |
| "learning_rate": 5.3892215568862275e-06, |
| "loss": 0.0235, |
| "step": 1950 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 4.39121756487026e-06, |
| "loss": 0.022, |
| "step": 1960 |
| }, |
| { |
| "epoch": 3.93, |
| "learning_rate": 3.3932135728542917e-06, |
| "loss": 0.0996, |
| "step": 1970 |
| }, |
| { |
| "epoch": 3.95, |
| "learning_rate": 2.3952095808383237e-06, |
| "loss": 0.0377, |
| "step": 1980 |
| }, |
| { |
| "epoch": 3.97, |
| "learning_rate": 1.3972055888223554e-06, |
| "loss": 0.0766, |
| "step": 1990 |
| }, |
| { |
| "epoch": 3.99, |
| "learning_rate": 3.992015968063872e-07, |
| "loss": 0.0756, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.99, |
| "eval_accuracy": 0.8453865336658354, |
| "eval_f1": 0.8453865336658354, |
| "eval_loss": 0.5454253554344177, |
| "eval_precision": 0.8453865336658354, |
| "eval_recall": 0.8453865336658354, |
| "eval_runtime": 37.927, |
| "eval_samples_per_second": 52.865, |
| "eval_steps_per_second": 6.618, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.0, |
| "step": 2004, |
| "total_flos": 8.840915657570304e+18, |
| "train_loss": 0.376142632985811, |
| "train_runtime": 2351.3529, |
| "train_samples_per_second": 13.626, |
| "train_steps_per_second": 0.852 |
| } |
| ], |
| "max_steps": 2004, |
| "num_train_epochs": 4, |
| "total_flos": 8.840915657570304e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|