| { | |
| "best_metric": 0.9508196721311475, | |
| "best_model_checkpoint": "./vit-LungCancer1/checkpoint-1644", | |
| "epoch": 12.0, | |
| "global_step": 1644, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 5.839416058394161e-06, | |
| "loss": 1.5263, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.3138686131386862e-05, | |
| "loss": 1.2535, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.9708029197080295e-05, | |
| "loss": 1.0549, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 2.7007299270072995e-05, | |
| "loss": 1.0019, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.43065693430657e-05, | |
| "loss": 0.834, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.16058394160584e-05, | |
| "loss": 0.996, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.89051094890511e-05, | |
| "loss": 0.8396, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 5.6204379562043795e-05, | |
| "loss": 0.9554, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.35036496350365e-05, | |
| "loss": 0.8082, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 7.080291970802921e-05, | |
| "loss": 0.9226, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 7.81021897810219e-05, | |
| "loss": 0.7346, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.54014598540146e-05, | |
| "loss": 1.1845, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.27007299270073e-05, | |
| "loss": 0.8577, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7131147540983607, | |
| "eval_loss": 0.6326848268508911, | |
| "eval_runtime": 38.5798, | |
| "eval_samples_per_second": 3.162, | |
| "eval_steps_per_second": 0.415, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0001, | |
| "loss": 0.7385, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.0001072992700729927, | |
| "loss": 0.6147, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.00011459854014598541, | |
| "loss": 0.7883, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.0001218978102189781, | |
| "loss": 0.5992, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00012919708029197083, | |
| "loss": 0.6838, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.0001364963503649635, | |
| "loss": 0.602, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00014306569343065694, | |
| "loss": 0.7759, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00015036496350364964, | |
| "loss": 0.8934, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00015766423357664236, | |
| "loss": 0.7176, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00016496350364963503, | |
| "loss": 0.5465, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00017226277372262773, | |
| "loss": 0.456, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00017956204379562045, | |
| "loss": 1.0494, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.0001861313868613139, | |
| "loss": 0.6576, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.00019343065693430656, | |
| "loss": 0.8688, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7377049180327869, | |
| "eval_loss": 0.6352217197418213, | |
| "eval_runtime": 2.5417, | |
| "eval_samples_per_second": 48.0, | |
| "eval_steps_per_second": 6.295, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 0.00019991889699918898, | |
| "loss": 0.7188, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 0.0001991078669910787, | |
| "loss": 0.6241, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 0.00019829683698296838, | |
| "loss": 0.7577, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 0.00019748580697485807, | |
| "loss": 0.6734, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 0.00019667477696674778, | |
| "loss": 0.7604, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.00019586374695863747, | |
| "loss": 0.8586, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 0.0001950527169505272, | |
| "loss": 0.6902, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 0.0001942416869424169, | |
| "loss": 0.6251, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 0.00019343065693430656, | |
| "loss": 0.6236, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 0.00019261962692619628, | |
| "loss": 0.9306, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 0.00019180859691808597, | |
| "loss": 0.8566, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 0.00019099756690997568, | |
| "loss": 0.5177, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 0.0001901865369018654, | |
| "loss": 0.5699, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 0.00018937550689375506, | |
| "loss": 0.6084, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.680327868852459, | |
| "eval_loss": 1.126508355140686, | |
| "eval_runtime": 2.4124, | |
| "eval_samples_per_second": 50.573, | |
| "eval_steps_per_second": 6.632, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 0.00018856447688564478, | |
| "loss": 0.8547, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 0.00018775344687753446, | |
| "loss": 0.9728, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 0.00018694241686942418, | |
| "loss": 0.4814, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 0.0001861313868613139, | |
| "loss": 0.6865, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 0.00018532035685320356, | |
| "loss": 0.2393, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 0.00018450932684509327, | |
| "loss": 0.6487, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 0.000183698296836983, | |
| "loss": 0.9157, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 0.00018288726682887268, | |
| "loss": 0.8132, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 0.0001820762368207624, | |
| "loss": 0.4784, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 0.00018126520681265205, | |
| "loss": 0.4648, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 0.00018045417680454177, | |
| "loss": 0.3903, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 0.00017964314679643148, | |
| "loss": 0.5677, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 0.00017883211678832117, | |
| "loss": 0.3922, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7622950819672131, | |
| "eval_loss": 0.7107774019241333, | |
| "eval_runtime": 2.435, | |
| "eval_samples_per_second": 50.102, | |
| "eval_steps_per_second": 6.571, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 0.0001780210867802109, | |
| "loss": 0.8354, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 0.00017721005677210058, | |
| "loss": 0.1381, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 0.00017639902676399026, | |
| "loss": 0.5879, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 0.00017558799675587998, | |
| "loss": 0.7096, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 0.00017477696674776967, | |
| "loss": 0.5362, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 0.00017396593673965938, | |
| "loss": 0.4455, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 0.00017315490673154907, | |
| "loss": 0.475, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 0.00017234387672343876, | |
| "loss": 0.4872, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 0.00017153284671532848, | |
| "loss": 0.2913, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 0.0001707218167072182, | |
| "loss": 0.1585, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 0.00016991078669910788, | |
| "loss": 0.304, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 0.00016909975669099757, | |
| "loss": 0.2869, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 0.00016828872668288726, | |
| "loss": 0.1741, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 0.00016747769667477697, | |
| "loss": 0.2565, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7213114754098361, | |
| "eval_loss": 1.4021536111831665, | |
| "eval_runtime": 2.5436, | |
| "eval_samples_per_second": 47.963, | |
| "eval_steps_per_second": 6.29, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 0.3661, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 0.00016585563665855638, | |
| "loss": 0.457, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 0.00016504460665044607, | |
| "loss": 0.2173, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 0.00016423357664233578, | |
| "loss": 0.226, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 0.00016342254663422547, | |
| "loss": 0.3724, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 0.00016261151662611518, | |
| "loss": 0.1765, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 0.00016180048661800487, | |
| "loss": 0.4033, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 0.00016098945660989456, | |
| "loss": 0.6861, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 0.00016017842660178428, | |
| "loss": 0.4331, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 0.00015936739659367397, | |
| "loss": 0.2177, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 0.00015855636658556368, | |
| "loss": 0.4115, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 0.0001577453365774534, | |
| "loss": 0.4569, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 0.00015693430656934306, | |
| "loss": 0.3108, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 0.00015612327656123277, | |
| "loss": 0.4898, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7213114754098361, | |
| "eval_loss": 1.002202033996582, | |
| "eval_runtime": 2.5947, | |
| "eval_samples_per_second": 47.019, | |
| "eval_steps_per_second": 6.166, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 0.00015531224655312246, | |
| "loss": 0.2544, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 0.00015450121654501218, | |
| "loss": 0.1688, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 0.0001536901865369019, | |
| "loss": 0.2628, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 0.00015287915652879155, | |
| "loss": 0.0097, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 0.00015206812652068127, | |
| "loss": 0.5975, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 0.00015125709651257096, | |
| "loss": 0.1515, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 0.00015044606650446067, | |
| "loss": 0.0841, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 0.0001496350364963504, | |
| "loss": 0.12, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 0.00014882400648824008, | |
| "loss": 0.0841, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 0.00014801297648012977, | |
| "loss": 0.327, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 0.00014720194647201948, | |
| "loss": 0.1671, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 0.00014639091646390917, | |
| "loss": 0.1677, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 0.00014557988645579888, | |
| "loss": 0.325, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.819672131147541, | |
| "eval_loss": 1.1020625829696655, | |
| "eval_runtime": 2.5409, | |
| "eval_samples_per_second": 48.014, | |
| "eval_steps_per_second": 6.297, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 0.00014476885644768857, | |
| "loss": 0.0793, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 0.00014395782643957826, | |
| "loss": 0.3219, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 0.00014314679643146798, | |
| "loss": 0.1076, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 0.00014233576642335767, | |
| "loss": 0.0561, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 0.00014152473641524738, | |
| "loss": 0.034, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 0.00014071370640713707, | |
| "loss": 0.0886, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 0.00013990267639902676, | |
| "loss": 0.1077, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 0.00013909164639091647, | |
| "loss": 0.0872, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 0.00013828061638280616, | |
| "loss": 0.0343, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 0.00013746958637469588, | |
| "loss": 0.0941, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 0.00013665855636658557, | |
| "loss": 0.0696, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 0.00013584752635847525, | |
| "loss": 0.0313, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 0.00013503649635036497, | |
| "loss": 0.0473, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 0.00013422546634225469, | |
| "loss": 0.0217, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.860655737704918, | |
| "eval_loss": 1.2505751848220825, | |
| "eval_runtime": 2.4946, | |
| "eval_samples_per_second": 48.906, | |
| "eval_steps_per_second": 6.414, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "learning_rate": 0.00013341443633414437, | |
| "loss": 0.1985, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 0.00013260340632603406, | |
| "loss": 0.1631, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 0.00013179237631792375, | |
| "loss": 0.11, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 0.00013098134630981347, | |
| "loss": 0.2332, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 0.00013017031630170318, | |
| "loss": 0.1398, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 0.00012935928629359287, | |
| "loss": 0.0189, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 0.00012854825628548256, | |
| "loss": 0.1305, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 0.00012773722627737227, | |
| "loss": 0.0954, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 0.00012692619626926196, | |
| "loss": 0.0631, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 0.00012611516626115168, | |
| "loss": 0.0184, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "learning_rate": 0.00012530413625304137, | |
| "loss": 0.1647, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 0.00012449310624493108, | |
| "loss": 0.0028, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "learning_rate": 0.00012368207623682077, | |
| "loss": 0.0015, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 0.00012287104622871046, | |
| "loss": 0.1405, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8770491803278688, | |
| "eval_loss": 0.6274831295013428, | |
| "eval_runtime": 2.431, | |
| "eval_samples_per_second": 50.186, | |
| "eval_steps_per_second": 6.582, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "learning_rate": 0.00012206001622060017, | |
| "loss": 0.1252, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 0.00012124898621248988, | |
| "loss": 0.0605, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 0.00012043795620437956, | |
| "loss": 0.1781, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 0.00011962692619626928, | |
| "loss": 0.001, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 0.00011881589618815895, | |
| "loss": 0.1232, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "learning_rate": 0.00011800486618004867, | |
| "loss": 0.0477, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 0.00011719383617193837, | |
| "loss": 0.0041, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "learning_rate": 0.00011638280616382806, | |
| "loss": 0.0205, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "learning_rate": 0.00011557177615571778, | |
| "loss": 0.1789, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 0.00011476074614760745, | |
| "loss": 0.0133, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 0.00011394971613949717, | |
| "loss": 0.0806, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "learning_rate": 0.00011313868613138687, | |
| "loss": 0.1051, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "learning_rate": 0.00011232765612327656, | |
| "loss": 0.0284, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.00011151662611516627, | |
| "loss": 0.0433, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9426229508196722, | |
| "eval_loss": 0.33996063470840454, | |
| "eval_runtime": 2.4463, | |
| "eval_samples_per_second": 49.871, | |
| "eval_steps_per_second": 6.54, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 10.07, | |
| "learning_rate": 0.00011070559610705597, | |
| "loss": 0.0784, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 10.15, | |
| "learning_rate": 0.00010989456609894566, | |
| "loss": 0.0347, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 10.22, | |
| "learning_rate": 0.00010908353609083536, | |
| "loss": 0.019, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 10.29, | |
| "learning_rate": 0.00010827250608272505, | |
| "loss": 0.0011, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 10.36, | |
| "learning_rate": 0.00010746147607461477, | |
| "loss": 0.0007, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 10.44, | |
| "learning_rate": 0.00010665044606650447, | |
| "loss": 0.0015, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 10.51, | |
| "learning_rate": 0.00010583941605839416, | |
| "loss": 0.0733, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 10.58, | |
| "learning_rate": 0.00010502838605028386, | |
| "loss": 0.4108, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 10.66, | |
| "learning_rate": 0.00010421735604217358, | |
| "loss": 0.0772, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 10.73, | |
| "learning_rate": 0.00010340632603406326, | |
| "loss": 0.016, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "learning_rate": 0.00010259529602595297, | |
| "loss": 0.004, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "learning_rate": 0.00010178426601784266, | |
| "loss": 0.1367, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 10.95, | |
| "learning_rate": 0.00010097323600973236, | |
| "loss": 0.168, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.8934426229508197, | |
| "eval_loss": 0.5016093850135803, | |
| "eval_runtime": 2.4265, | |
| "eval_samples_per_second": 50.278, | |
| "eval_steps_per_second": 6.594, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 11.02, | |
| "learning_rate": 0.00010016220600162207, | |
| "loss": 0.1004, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 11.09, | |
| "learning_rate": 9.935117599351177e-05, | |
| "loss": 0.0006, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 11.17, | |
| "learning_rate": 9.854014598540146e-05, | |
| "loss": 0.0111, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 11.24, | |
| "learning_rate": 9.772911597729116e-05, | |
| "loss": 0.0278, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 11.31, | |
| "learning_rate": 9.691808596918087e-05, | |
| "loss": 0.0631, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 11.39, | |
| "learning_rate": 9.610705596107057e-05, | |
| "loss": 0.0002, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 11.46, | |
| "learning_rate": 9.529602595296027e-05, | |
| "loss": 0.0013, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 11.53, | |
| "learning_rate": 9.448499594484996e-05, | |
| "loss": 0.0008, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "learning_rate": 9.367396593673966e-05, | |
| "loss": 0.0001, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 11.68, | |
| "learning_rate": 9.286293592862936e-05, | |
| "loss": 0.0001, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 11.75, | |
| "learning_rate": 9.205190592051907e-05, | |
| "loss": 0.0002, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 11.82, | |
| "learning_rate": 9.124087591240877e-05, | |
| "loss": 0.0002, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "learning_rate": 9.042984590429846e-05, | |
| "loss": 0.0001, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 11.97, | |
| "learning_rate": 8.961881589618817e-05, | |
| "loss": 0.0001, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9508196721311475, | |
| "eval_loss": 0.2764035761356354, | |
| "eval_runtime": 2.435, | |
| "eval_samples_per_second": 50.104, | |
| "eval_steps_per_second": 6.571, | |
| "step": 1644 | |
| } | |
| ], | |
| "max_steps": 2740, | |
| "num_train_epochs": 20, | |
| "total_flos": 3.5908878492818473e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |