| { |
| "best_metric": 0.7957639098167419, |
| "best_model_checkpoint": "./vit-base-beans/checkpoint-3000", |
| "epoch": 15.0, |
| "global_step": 11700, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019982905982905984, |
| "loss": 1.8581, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019965811965811967, |
| "loss": 1.7758, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001994871794871795, |
| "loss": 1.7018, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019931623931623932, |
| "loss": 1.5662, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019914529914529915, |
| "loss": 1.6721, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019897435897435898, |
| "loss": 1.5795, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0001988034188034188, |
| "loss": 1.5471, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019863247863247864, |
| "loss": 1.5363, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019846153846153847, |
| "loss": 1.4139, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0001982905982905983, |
| "loss": 1.4193, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019811965811965813, |
| "loss": 1.3853, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019794871794871796, |
| "loss": 1.3645, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00019777777777777778, |
| "loss": 1.4095, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0001976068376068376, |
| "loss": 1.4114, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00019743589743589744, |
| "loss": 1.433, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00019726495726495727, |
| "loss": 1.5094, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00019709401709401713, |
| "loss": 1.2352, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00019692307692307696, |
| "loss": 1.2916, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00019675213675213676, |
| "loss": 1.2965, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00019658119658119659, |
| "loss": 1.3053, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00019641025641025642, |
| "loss": 1.3689, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00019623931623931624, |
| "loss": 1.3519, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00019606837606837607, |
| "loss": 1.4205, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0001958974358974359, |
| "loss": 1.2702, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00019572649572649573, |
| "loss": 1.2631, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00019555555555555556, |
| "loss": 1.2163, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0001953846153846154, |
| "loss": 1.1323, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00019521367521367522, |
| "loss": 1.3013, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019504273504273505, |
| "loss": 1.1129, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019487179487179487, |
| "loss": 1.4559, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0001947008547008547, |
| "loss": 1.2784, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00019452991452991453, |
| "loss": 1.3402, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00019435897435897436, |
| "loss": 1.2687, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001941880341880342, |
| "loss": 1.1328, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00019401709401709402, |
| "loss": 1.1216, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00019384615384615385, |
| "loss": 1.0489, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00019367521367521368, |
| "loss": 1.0993, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0001935042735042735, |
| "loss": 1.3569, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00019333333333333333, |
| "loss": 0.9691, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00019316239316239316, |
| "loss": 1.1364, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.000192991452991453, |
| "loss": 1.104, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00019282051282051282, |
| "loss": 1.1839, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00019264957264957265, |
| "loss": 1.1365, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0001924786324786325, |
| "loss": 0.9201, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00019230769230769233, |
| "loss": 1.1012, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00019213675213675216, |
| "loss": 1.042, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.000191965811965812, |
| "loss": 0.9697, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00019179487179487182, |
| "loss": 1.4043, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00019162393162393165, |
| "loss": 1.2395, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00019145299145299148, |
| "loss": 1.193, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0001912820512820513, |
| "loss": 1.2216, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00019111111111111114, |
| "loss": 1.0667, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00019094017094017097, |
| "loss": 1.0327, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.0001907692307692308, |
| "loss": 1.0287, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0001905982905982906, |
| "loss": 1.0963, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00019042735042735042, |
| "loss": 1.0812, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00019025641025641025, |
| "loss": 1.0625, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00019008547008547008, |
| "loss": 0.8884, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0001899145299145299, |
| "loss": 0.9661, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00018974358974358974, |
| "loss": 0.951, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00018957264957264957, |
| "loss": 0.93, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0001894017094017094, |
| "loss": 0.8576, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00018923076923076923, |
| "loss": 1.2505, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00018905982905982906, |
| "loss": 0.9469, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00018888888888888888, |
| "loss": 0.9869, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0001887179487179487, |
| "loss": 1.1052, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00018854700854700854, |
| "loss": 1.0779, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00018837606837606837, |
| "loss": 1.0328, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0001882051282051282, |
| "loss": 1.0377, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.00018803418803418803, |
| "loss": 0.9489, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00018786324786324786, |
| "loss": 0.8715, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.0001876923076923077, |
| "loss": 0.9167, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00018752136752136754, |
| "loss": 0.9519, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00018735042735042737, |
| "loss": 0.8548, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.0001871794871794872, |
| "loss": 0.827, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00018700854700854703, |
| "loss": 0.9922, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00018683760683760686, |
| "loss": 0.902, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0001866666666666667, |
| "loss": 0.8385, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00018649572649572652, |
| "loss": 0.5939, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.00018632478632478634, |
| "loss": 0.8229, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00018615384615384617, |
| "loss": 0.6577, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.000185982905982906, |
| "loss": 0.7075, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00018581196581196583, |
| "loss": 0.6764, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00018564102564102566, |
| "loss": 0.5858, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0001854700854700855, |
| "loss": 0.8485, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00018529914529914532, |
| "loss": 0.6396, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00018512820512820515, |
| "loss": 0.6054, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00018495726495726497, |
| "loss": 0.664, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0001847863247863248, |
| "loss": 0.5736, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00018461538461538463, |
| "loss": 0.6546, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00018444444444444446, |
| "loss": 0.6295, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00018427350427350426, |
| "loss": 0.7309, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.0001841025641025641, |
| "loss": 0.8071, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018393162393162392, |
| "loss": 0.667, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00018376068376068375, |
| "loss": 0.7265, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00018358974358974358, |
| "loss": 0.6872, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.0001834188034188034, |
| "loss": 0.8474, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00018324786324786324, |
| "loss": 0.6226, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.0001830769230769231, |
| "loss": 0.5453, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00018290598290598292, |
| "loss": 0.6832, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_accuracy": 0.6526374859708193, |
| "eval_loss": 0.9223159551620483, |
| "eval_runtime": 34.8394, |
| "eval_samples_per_second": 51.206, |
| "eval_steps_per_second": 6.401, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00018273504273504275, |
| "loss": 0.7721, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00018256410256410258, |
| "loss": 0.6434, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.0001823931623931624, |
| "loss": 0.7244, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00018222222222222224, |
| "loss": 0.6953, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00018205128205128207, |
| "loss": 0.6965, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.0001818803418803419, |
| "loss": 0.7892, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00018170940170940172, |
| "loss": 0.6513, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00018153846153846155, |
| "loss": 0.6469, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00018136752136752138, |
| "loss": 0.6468, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.0001811965811965812, |
| "loss": 0.7679, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00018102564102564104, |
| "loss": 0.4376, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00018085470085470087, |
| "loss": 0.844, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.0001806837606837607, |
| "loss": 0.7508, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00018051282051282052, |
| "loss": 0.7154, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.00018034188034188035, |
| "loss": 0.6632, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.00018017094017094018, |
| "loss": 0.7109, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.00018, |
| "loss": 0.6545, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.00017982905982905984, |
| "loss": 0.7316, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 0.00017965811965811967, |
| "loss": 0.7549, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 0.0001794871794871795, |
| "loss": 0.5296, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.00017931623931623933, |
| "loss": 1.0734, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.00017914529914529916, |
| "loss": 0.6724, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 0.00017897435897435898, |
| "loss": 0.5692, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 0.0001788034188034188, |
| "loss": 0.6338, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 0.00017863247863247864, |
| "loss": 0.5972, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 0.00017846153846153847, |
| "loss": 0.7404, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 0.0001782905982905983, |
| "loss": 0.5386, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 0.00017811965811965813, |
| "loss": 0.775, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 0.00017794871794871796, |
| "loss": 0.8524, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 0.00017777777777777779, |
| "loss": 0.6457, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 0.00017760683760683762, |
| "loss": 0.5095, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 0.00017743589743589744, |
| "loss": 0.7402, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 0.00017726495726495727, |
| "loss": 0.4563, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.0001770940170940171, |
| "loss": 0.5477, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 0.00017692307692307693, |
| "loss": 0.7047, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 0.00017675213675213676, |
| "loss": 0.9488, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 0.0001765811965811966, |
| "loss": 0.6216, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 0.00017641025641025642, |
| "loss": 0.6692, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 0.00017623931623931625, |
| "loss": 0.5363, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 0.00017606837606837607, |
| "loss": 0.512, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.0001758974358974359, |
| "loss": 0.7907, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 0.00017572649572649573, |
| "loss": 0.674, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 0.00017555555555555556, |
| "loss": 0.6912, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 0.0001753846153846154, |
| "loss": 0.875, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 0.00017521367521367522, |
| "loss": 0.9192, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 0.00017504273504273505, |
| "loss": 0.6845, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 0.00017487179487179488, |
| "loss": 0.9668, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 0.0001747008547008547, |
| "loss": 0.6267, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 0.00017452991452991453, |
| "loss": 0.7207, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 0.00017435897435897436, |
| "loss": 0.4895, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 0.0001741880341880342, |
| "loss": 0.7237, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 0.00017401709401709402, |
| "loss": 0.702, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 0.00017384615384615385, |
| "loss": 0.6706, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 0.00017367521367521368, |
| "loss": 0.6043, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 0.0001735042735042735, |
| "loss": 0.7136, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 0.00017333333333333334, |
| "loss": 0.5816, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 0.00017316239316239317, |
| "loss": 0.4576, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 0.000172991452991453, |
| "loss": 0.3339, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 0.00017282051282051285, |
| "loss": 0.4679, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 0.00017264957264957268, |
| "loss": 0.5122, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 0.0001724786324786325, |
| "loss": 0.2515, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 0.00017230769230769234, |
| "loss": 0.3069, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 0.00017213675213675217, |
| "loss": 0.3612, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 0.000171965811965812, |
| "loss": 0.4896, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 0.0001717948717948718, |
| "loss": 0.2227, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 0.00017162393162393162, |
| "loss": 0.3367, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 0.00017145299145299145, |
| "loss": 0.4207, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 0.00017128205128205128, |
| "loss": 0.3126, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 0.0001711111111111111, |
| "loss": 0.2902, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 0.00017094017094017094, |
| "loss": 0.6133, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 0.00017076923076923077, |
| "loss": 0.4473, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 0.0001705982905982906, |
| "loss": 0.6531, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 0.00017042735042735043, |
| "loss": 0.5508, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 0.00017025641025641026, |
| "loss": 0.4197, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 0.00017008547008547008, |
| "loss": 0.4295, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 0.0001699145299145299, |
| "loss": 0.3714, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 0.00016974358974358974, |
| "loss": 0.4605, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 0.00016957264957264957, |
| "loss": 0.3979, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 0.0001694017094017094, |
| "loss": 0.3915, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 0.00016923076923076923, |
| "loss": 0.3594, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 0.00016905982905982906, |
| "loss": 0.332, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 0.00016888888888888889, |
| "loss": 0.6412, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 0.00016871794871794871, |
| "loss": 0.5271, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 0.00016854700854700854, |
| "loss": 0.6231, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 0.00016837606837606837, |
| "loss": 0.4133, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 0.00016820512820512823, |
| "loss": 0.3524, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 0.00016803418803418806, |
| "loss": 0.4834, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 0.00016786324786324789, |
| "loss": 0.5658, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 0.00016769230769230772, |
| "loss": 0.479, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 0.00016752136752136754, |
| "loss": 0.4138, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 0.00016735042735042737, |
| "loss": 0.4342, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 0.0001671794871794872, |
| "loss": 0.4909, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 0.00016700854700854703, |
| "loss": 0.4699, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 0.00016683760683760686, |
| "loss": 0.1936, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 0.0001666666666666667, |
| "loss": 0.4344, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 0.00016649572649572652, |
| "loss": 0.2826, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 0.00016632478632478635, |
| "loss": 0.3518, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 0.00016615384615384617, |
| "loss": 0.4507, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 0.000165982905982906, |
| "loss": 0.4549, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 0.00016581196581196583, |
| "loss": 0.348, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_accuracy": 0.7008978675645342, |
| "eval_loss": 1.0017077922821045, |
| "eval_runtime": 39.0117, |
| "eval_samples_per_second": 45.73, |
| "eval_steps_per_second": 5.716, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 0.00016564102564102566, |
| "loss": 0.7027, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 0.00016547008547008546, |
| "loss": 0.5353, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 0.0001652991452991453, |
| "loss": 0.5958, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 0.00016512820512820512, |
| "loss": 0.4524, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 0.00016495726495726495, |
| "loss": 0.4626, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 0.00016478632478632478, |
| "loss": 0.4503, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 0.0001646153846153846, |
| "loss": 0.5792, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 0.00016444444444444444, |
| "loss": 0.3757, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 0.00016427350427350426, |
| "loss": 0.4774, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 0.0001641025641025641, |
| "loss": 0.6365, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 0.00016393162393162392, |
| "loss": 0.2231, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 0.00016376068376068375, |
| "loss": 0.5641, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 0.00016358974358974358, |
| "loss": 0.3998, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 0.00016341880341880344, |
| "loss": 0.4461, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 0.00016324786324786327, |
| "loss": 0.3812, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 0.0001630769230769231, |
| "loss": 0.407, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 0.00016290598290598292, |
| "loss": 0.5142, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 0.00016273504273504275, |
| "loss": 0.3531, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 0.00016256410256410258, |
| "loss": 0.4287, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 0.0001623931623931624, |
| "loss": 0.4242, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 0.00016222222222222224, |
| "loss": 0.282, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 0.00016205128205128207, |
| "loss": 0.5312, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 0.0001618803418803419, |
| "loss": 0.5925, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 0.00016170940170940172, |
| "loss": 0.3919, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 0.00016153846153846155, |
| "loss": 0.6241, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 0.00016136752136752138, |
| "loss": 0.3143, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 0.0001611965811965812, |
| "loss": 0.5144, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 0.00016102564102564104, |
| "loss": 0.1882, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 0.00016085470085470087, |
| "loss": 0.3812, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 0.0001606837606837607, |
| "loss": 0.2284, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 0.00016051282051282053, |
| "loss": 0.3319, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 0.00016034188034188036, |
| "loss": 0.7971, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 0.00016017094017094018, |
| "loss": 0.3977, |
| "step": 2330 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 0.00016, |
| "loss": 0.5652, |
| "step": 2340 |
| }, |
| { |
| "epoch": 3.01, |
| "learning_rate": 0.00015982905982905984, |
| "loss": 0.2831, |
| "step": 2350 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 0.00015965811965811967, |
| "loss": 0.1566, |
| "step": 2360 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 0.0001594871794871795, |
| "loss": 0.1572, |
| "step": 2370 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 0.0001593162393162393, |
| "loss": 0.2303, |
| "step": 2380 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 0.00015914529914529913, |
| "loss": 0.3149, |
| "step": 2390 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 0.00015897435897435896, |
| "loss": 0.3658, |
| "step": 2400 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 0.00015880341880341881, |
| "loss": 0.1089, |
| "step": 2410 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 0.00015863247863247864, |
| "loss": 0.3972, |
| "step": 2420 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 0.00015846153846153847, |
| "loss": 0.2249, |
| "step": 2430 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 0.0001582905982905983, |
| "loss": 0.1594, |
| "step": 2440 |
| }, |
| { |
| "epoch": 3.14, |
| "learning_rate": 0.00015811965811965813, |
| "loss": 0.2473, |
| "step": 2450 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 0.00015794871794871796, |
| "loss": 0.3014, |
| "step": 2460 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 0.0001577777777777778, |
| "loss": 0.2009, |
| "step": 2470 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 0.00015760683760683762, |
| "loss": 0.2739, |
| "step": 2480 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 0.00015743589743589745, |
| "loss": 0.3456, |
| "step": 2490 |
| }, |
| { |
| "epoch": 3.21, |
| "learning_rate": 0.00015726495726495727, |
| "loss": 0.201, |
| "step": 2500 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 0.0001570940170940171, |
| "loss": 0.4472, |
| "step": 2510 |
| }, |
| { |
| "epoch": 3.23, |
| "learning_rate": 0.00015692307692307693, |
| "loss": 0.3452, |
| "step": 2520 |
| }, |
| { |
| "epoch": 3.24, |
| "learning_rate": 0.00015675213675213676, |
| "loss": 0.1802, |
| "step": 2530 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 0.0001565811965811966, |
| "loss": 0.3302, |
| "step": 2540 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 0.00015641025641025642, |
| "loss": 0.2736, |
| "step": 2550 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 0.00015623931623931625, |
| "loss": 0.3162, |
| "step": 2560 |
| }, |
| { |
| "epoch": 3.29, |
| "learning_rate": 0.00015606837606837608, |
| "loss": 0.1153, |
| "step": 2570 |
| }, |
| { |
| "epoch": 3.31, |
| "learning_rate": 0.0001558974358974359, |
| "loss": 0.1698, |
| "step": 2580 |
| }, |
| { |
| "epoch": 3.32, |
| "learning_rate": 0.00015572649572649573, |
| "loss": 0.4336, |
| "step": 2590 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 0.00015555555555555556, |
| "loss": 0.1784, |
| "step": 2600 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 0.0001553846153846154, |
| "loss": 0.2714, |
| "step": 2610 |
| }, |
| { |
| "epoch": 3.36, |
| "learning_rate": 0.00015521367521367522, |
| "loss": 0.2657, |
| "step": 2620 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 0.00015504273504273505, |
| "loss": 0.3528, |
| "step": 2630 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 0.00015487179487179488, |
| "loss": 0.2416, |
| "step": 2640 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 0.0001547008547008547, |
| "loss": 0.1516, |
| "step": 2650 |
| }, |
| { |
| "epoch": 3.41, |
| "learning_rate": 0.00015452991452991454, |
| "loss": 0.3435, |
| "step": 2660 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 0.00015435897435897436, |
| "loss": 0.2755, |
| "step": 2670 |
| }, |
| { |
| "epoch": 3.44, |
| "learning_rate": 0.0001541880341880342, |
| "loss": 0.3667, |
| "step": 2680 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 0.00015401709401709402, |
| "loss": 0.2273, |
| "step": 2690 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 0.00015384615384615385, |
| "loss": 0.3083, |
| "step": 2700 |
| }, |
| { |
| "epoch": 3.47, |
| "learning_rate": 0.00015367521367521368, |
| "loss": 0.1352, |
| "step": 2710 |
| }, |
| { |
| "epoch": 3.49, |
| "learning_rate": 0.0001535042735042735, |
| "loss": 0.2642, |
| "step": 2720 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 0.00015333333333333334, |
| "loss": 0.2947, |
| "step": 2730 |
| }, |
| { |
| "epoch": 3.51, |
| "learning_rate": 0.0001531623931623932, |
| "loss": 0.2894, |
| "step": 2740 |
| }, |
| { |
| "epoch": 3.53, |
| "learning_rate": 0.000152991452991453, |
| "loss": 0.3171, |
| "step": 2750 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 0.00015282051282051282, |
| "loss": 0.1399, |
| "step": 2760 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 0.00015264957264957265, |
| "loss": 0.3285, |
| "step": 2770 |
| }, |
| { |
| "epoch": 3.56, |
| "learning_rate": 0.00015247863247863248, |
| "loss": 0.3482, |
| "step": 2780 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 0.0001523076923076923, |
| "loss": 0.2303, |
| "step": 2790 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 0.00015213675213675214, |
| "loss": 0.1022, |
| "step": 2800 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 0.00015196581196581197, |
| "loss": 0.5218, |
| "step": 2810 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 0.0001517948717948718, |
| "loss": 0.1958, |
| "step": 2820 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 0.00015162393162393163, |
| "loss": 0.2601, |
| "step": 2830 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 0.00015145299145299146, |
| "loss": 0.2364, |
| "step": 2840 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 0.00015128205128205128, |
| "loss": 0.2623, |
| "step": 2850 |
| }, |
| { |
| "epoch": 3.67, |
| "learning_rate": 0.0001511111111111111, |
| "loss": 0.3103, |
| "step": 2860 |
| }, |
| { |
| "epoch": 3.68, |
| "learning_rate": 0.00015094017094017094, |
| "loss": 0.5716, |
| "step": 2870 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 0.00015076923076923077, |
| "loss": 0.1348, |
| "step": 2880 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 0.0001505982905982906, |
| "loss": 0.1907, |
| "step": 2890 |
| }, |
| { |
| "epoch": 3.72, |
| "learning_rate": 0.00015042735042735043, |
| "loss": 0.1671, |
| "step": 2900 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 0.00015025641025641026, |
| "loss": 0.4659, |
| "step": 2910 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 0.00015008547008547009, |
| "loss": 0.1819, |
| "step": 2920 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 0.00014991452991452991, |
| "loss": 0.57, |
| "step": 2930 |
| }, |
| { |
| "epoch": 3.77, |
| "learning_rate": 0.00014974358974358974, |
| "loss": 0.4177, |
| "step": 2940 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 0.00014957264957264957, |
| "loss": 0.2709, |
| "step": 2950 |
| }, |
| { |
| "epoch": 3.79, |
| "learning_rate": 0.0001494017094017094, |
| "loss": 0.3616, |
| "step": 2960 |
| }, |
| { |
| "epoch": 3.81, |
| "learning_rate": 0.00014923076923076923, |
| "loss": 0.2519, |
| "step": 2970 |
| }, |
| { |
| "epoch": 3.82, |
| "learning_rate": 0.00014905982905982906, |
| "loss": 0.3444, |
| "step": 2980 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 0.0001488888888888889, |
| "loss": 0.4528, |
| "step": 2990 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 0.00014871794871794872, |
| "loss": 0.3906, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.85, |
| "eval_accuracy": 0.7536475869809203, |
| "eval_loss": 0.7957639098167419, |
| "eval_runtime": 34.4592, |
| "eval_samples_per_second": 51.771, |
| "eval_steps_per_second": 6.471, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.86, |
| "learning_rate": 0.00014854700854700857, |
| "loss": 0.3384, |
| "step": 3010 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 0.0001483760683760684, |
| "loss": 0.1817, |
| "step": 3020 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 0.00014820512820512823, |
| "loss": 0.4493, |
| "step": 3030 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 0.00014803418803418806, |
| "loss": 0.2129, |
| "step": 3040 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 0.0001478632478632479, |
| "loss": 0.3385, |
| "step": 3050 |
| }, |
| { |
| "epoch": 3.92, |
| "learning_rate": 0.00014769230769230772, |
| "loss": 0.2875, |
| "step": 3060 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 0.00014752136752136755, |
| "loss": 0.3154, |
| "step": 3070 |
| }, |
| { |
| "epoch": 3.95, |
| "learning_rate": 0.00014735042735042737, |
| "loss": 0.2525, |
| "step": 3080 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 0.0001471794871794872, |
| "loss": 0.3115, |
| "step": 3090 |
| }, |
| { |
| "epoch": 3.97, |
| "learning_rate": 0.00014700854700854703, |
| "loss": 0.2375, |
| "step": 3100 |
| }, |
| { |
| "epoch": 3.99, |
| "learning_rate": 0.00014683760683760683, |
| "loss": 0.121, |
| "step": 3110 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 0.00014666666666666666, |
| "loss": 0.2368, |
| "step": 3120 |
| }, |
| { |
| "epoch": 4.01, |
| "learning_rate": 0.0001464957264957265, |
| "loss": 0.1913, |
| "step": 3130 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 0.00014632478632478632, |
| "loss": 0.2868, |
| "step": 3140 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 0.00014615384615384615, |
| "loss": 0.1729, |
| "step": 3150 |
| }, |
| { |
| "epoch": 4.05, |
| "learning_rate": 0.00014598290598290598, |
| "loss": 0.1153, |
| "step": 3160 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 0.0001458119658119658, |
| "loss": 0.1036, |
| "step": 3170 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 0.00014564102564102564, |
| "loss": 0.0974, |
| "step": 3180 |
| }, |
| { |
| "epoch": 4.09, |
| "learning_rate": 0.00014547008547008546, |
| "loss": 0.1176, |
| "step": 3190 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 0.0001452991452991453, |
| "loss": 0.1305, |
| "step": 3200 |
| }, |
| { |
| "epoch": 4.12, |
| "learning_rate": 0.00014512820512820512, |
| "loss": 0.1496, |
| "step": 3210 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 0.00014495726495726495, |
| "loss": 0.143, |
| "step": 3220 |
| }, |
| { |
| "epoch": 4.14, |
| "learning_rate": 0.00014478632478632478, |
| "loss": 0.0647, |
| "step": 3230 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 0.0001446153846153846, |
| "loss": 0.0355, |
| "step": 3240 |
| }, |
| { |
| "epoch": 4.17, |
| "learning_rate": 0.00014444444444444444, |
| "loss": 0.0766, |
| "step": 3250 |
| }, |
| { |
| "epoch": 4.18, |
| "learning_rate": 0.00014427350427350427, |
| "loss": 0.1553, |
| "step": 3260 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 0.0001441025641025641, |
| "loss": 0.1441, |
| "step": 3270 |
| }, |
| { |
| "epoch": 4.21, |
| "learning_rate": 0.00014393162393162392, |
| "loss": 0.0466, |
| "step": 3280 |
| }, |
| { |
| "epoch": 4.22, |
| "learning_rate": 0.00014376068376068378, |
| "loss": 0.0734, |
| "step": 3290 |
| }, |
| { |
| "epoch": 4.23, |
| "learning_rate": 0.0001435897435897436, |
| "loss": 0.0801, |
| "step": 3300 |
| }, |
| { |
| "epoch": 4.24, |
| "learning_rate": 0.00014341880341880344, |
| "loss": 0.1471, |
| "step": 3310 |
| }, |
| { |
| "epoch": 4.26, |
| "learning_rate": 0.00014324786324786327, |
| "loss": 0.1419, |
| "step": 3320 |
| }, |
| { |
| "epoch": 4.27, |
| "learning_rate": 0.0001430769230769231, |
| "loss": 0.1019, |
| "step": 3330 |
| }, |
| { |
| "epoch": 4.28, |
| "learning_rate": 0.00014290598290598292, |
| "loss": 0.2108, |
| "step": 3340 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 0.00014273504273504275, |
| "loss": 0.0273, |
| "step": 3350 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 0.00014256410256410258, |
| "loss": 0.2777, |
| "step": 3360 |
| }, |
| { |
| "epoch": 4.32, |
| "learning_rate": 0.0001423931623931624, |
| "loss": 0.085, |
| "step": 3370 |
| }, |
| { |
| "epoch": 4.33, |
| "learning_rate": 0.00014222222222222224, |
| "loss": 0.136, |
| "step": 3380 |
| }, |
| { |
| "epoch": 4.35, |
| "learning_rate": 0.00014205128205128207, |
| "loss": 0.3193, |
| "step": 3390 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 0.0001418803418803419, |
| "loss": 0.1982, |
| "step": 3400 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 0.00014170940170940173, |
| "loss": 0.1471, |
| "step": 3410 |
| }, |
| { |
| "epoch": 4.38, |
| "learning_rate": 0.00014153846153846156, |
| "loss": 0.1064, |
| "step": 3420 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 0.00014136752136752138, |
| "loss": 0.2093, |
| "step": 3430 |
| }, |
| { |
| "epoch": 4.41, |
| "learning_rate": 0.0001411965811965812, |
| "loss": 0.3454, |
| "step": 3440 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 0.00014102564102564104, |
| "loss": 0.1812, |
| "step": 3450 |
| }, |
| { |
| "epoch": 4.44, |
| "learning_rate": 0.00014085470085470087, |
| "loss": 0.146, |
| "step": 3460 |
| }, |
| { |
| "epoch": 4.45, |
| "learning_rate": 0.0001406837606837607, |
| "loss": 0.1797, |
| "step": 3470 |
| }, |
| { |
| "epoch": 4.46, |
| "learning_rate": 0.0001405128205128205, |
| "loss": 0.1049, |
| "step": 3480 |
| }, |
| { |
| "epoch": 4.47, |
| "learning_rate": 0.00014034188034188033, |
| "loss": 0.1417, |
| "step": 3490 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 0.00014017094017094016, |
| "loss": 0.1369, |
| "step": 3500 |
| }, |
| { |
| "epoch": 4.5, |
| "learning_rate": 0.00014, |
| "loss": 0.0872, |
| "step": 3510 |
| }, |
| { |
| "epoch": 4.51, |
| "learning_rate": 0.00013982905982905982, |
| "loss": 0.0559, |
| "step": 3520 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 0.00013965811965811965, |
| "loss": 0.2372, |
| "step": 3530 |
| }, |
| { |
| "epoch": 4.54, |
| "learning_rate": 0.00013948717948717947, |
| "loss": 0.4092, |
| "step": 3540 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 0.0001393162393162393, |
| "loss": 0.2881, |
| "step": 3550 |
| }, |
| { |
| "epoch": 4.56, |
| "learning_rate": 0.00013914529914529916, |
| "loss": 0.3194, |
| "step": 3560 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 0.000138974358974359, |
| "loss": 0.4236, |
| "step": 3570 |
| }, |
| { |
| "epoch": 4.59, |
| "learning_rate": 0.00013880341880341882, |
| "loss": 0.0805, |
| "step": 3580 |
| }, |
| { |
| "epoch": 4.6, |
| "learning_rate": 0.00013863247863247865, |
| "loss": 0.1488, |
| "step": 3590 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 0.00013846153846153847, |
| "loss": 0.1332, |
| "step": 3600 |
| }, |
| { |
| "epoch": 4.63, |
| "learning_rate": 0.0001382905982905983, |
| "loss": 0.2034, |
| "step": 3610 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 0.00013811965811965813, |
| "loss": 0.1837, |
| "step": 3620 |
| }, |
| { |
| "epoch": 4.65, |
| "learning_rate": 0.00013794871794871796, |
| "loss": 0.156, |
| "step": 3630 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 0.0001377777777777778, |
| "loss": 0.1857, |
| "step": 3640 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 0.00013760683760683762, |
| "loss": 0.1481, |
| "step": 3650 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 0.00013743589743589745, |
| "loss": 0.3842, |
| "step": 3660 |
| }, |
| { |
| "epoch": 4.71, |
| "learning_rate": 0.00013726495726495728, |
| "loss": 0.1999, |
| "step": 3670 |
| }, |
| { |
| "epoch": 4.72, |
| "learning_rate": 0.0001370940170940171, |
| "loss": 0.3166, |
| "step": 3680 |
| }, |
| { |
| "epoch": 4.73, |
| "learning_rate": 0.00013692307692307693, |
| "loss": 0.3671, |
| "step": 3690 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 0.00013675213675213676, |
| "loss": 0.32, |
| "step": 3700 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 0.0001365811965811966, |
| "loss": 0.1638, |
| "step": 3710 |
| }, |
| { |
| "epoch": 4.77, |
| "learning_rate": 0.00013641025641025642, |
| "loss": 0.0734, |
| "step": 3720 |
| }, |
| { |
| "epoch": 4.78, |
| "learning_rate": 0.00013623931623931625, |
| "loss": 0.115, |
| "step": 3730 |
| }, |
| { |
| "epoch": 4.79, |
| "learning_rate": 0.00013606837606837608, |
| "loss": 0.1742, |
| "step": 3740 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 0.0001358974358974359, |
| "loss": 0.0934, |
| "step": 3750 |
| }, |
| { |
| "epoch": 4.82, |
| "learning_rate": 0.00013572649572649574, |
| "loss": 0.2705, |
| "step": 3760 |
| }, |
| { |
| "epoch": 4.83, |
| "learning_rate": 0.00013555555555555556, |
| "loss": 0.0109, |
| "step": 3770 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 0.0001353846153846154, |
| "loss": 0.2112, |
| "step": 3780 |
| }, |
| { |
| "epoch": 4.86, |
| "learning_rate": 0.00013521367521367522, |
| "loss": 0.0508, |
| "step": 3790 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 0.00013504273504273505, |
| "loss": 0.1096, |
| "step": 3800 |
| }, |
| { |
| "epoch": 4.88, |
| "learning_rate": 0.00013487179487179488, |
| "loss": 0.2732, |
| "step": 3810 |
| }, |
| { |
| "epoch": 4.9, |
| "learning_rate": 0.0001347008547008547, |
| "loss": 0.186, |
| "step": 3820 |
| }, |
| { |
| "epoch": 4.91, |
| "learning_rate": 0.00013452991452991454, |
| "loss": 0.1218, |
| "step": 3830 |
| }, |
| { |
| "epoch": 4.92, |
| "learning_rate": 0.00013435897435897437, |
| "loss": 0.2359, |
| "step": 3840 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 0.0001341880341880342, |
| "loss": 0.2884, |
| "step": 3850 |
| }, |
| { |
| "epoch": 4.95, |
| "learning_rate": 0.00013401709401709402, |
| "loss": 0.286, |
| "step": 3860 |
| }, |
| { |
| "epoch": 4.96, |
| "learning_rate": 0.00013384615384615385, |
| "loss": 0.0854, |
| "step": 3870 |
| }, |
| { |
| "epoch": 4.97, |
| "learning_rate": 0.00013367521367521368, |
| "loss": 0.0308, |
| "step": 3880 |
| }, |
| { |
| "epoch": 4.99, |
| "learning_rate": 0.0001335042735042735, |
| "loss": 0.1327, |
| "step": 3890 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 0.00013333333333333334, |
| "loss": 0.1869, |
| "step": 3900 |
| }, |
| { |
| "epoch": 5.01, |
| "learning_rate": 0.00013316239316239317, |
| "loss": 0.1636, |
| "step": 3910 |
| }, |
| { |
| "epoch": 5.03, |
| "learning_rate": 0.000132991452991453, |
| "loss": 0.079, |
| "step": 3920 |
| }, |
| { |
| "epoch": 5.04, |
| "learning_rate": 0.00013282051282051283, |
| "loss": 0.1349, |
| "step": 3930 |
| }, |
| { |
| "epoch": 5.05, |
| "learning_rate": 0.00013264957264957266, |
| "loss": 0.0564, |
| "step": 3940 |
| }, |
| { |
| "epoch": 5.06, |
| "learning_rate": 0.00013247863247863248, |
| "loss": 0.1325, |
| "step": 3950 |
| }, |
| { |
| "epoch": 5.08, |
| "learning_rate": 0.0001323076923076923, |
| "loss": 0.1156, |
| "step": 3960 |
| }, |
| { |
| "epoch": 5.09, |
| "learning_rate": 0.00013213675213675214, |
| "loss": 0.1017, |
| "step": 3970 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 0.00013196581196581197, |
| "loss": 0.1198, |
| "step": 3980 |
| }, |
| { |
| "epoch": 5.12, |
| "learning_rate": 0.0001317948717948718, |
| "loss": 0.1086, |
| "step": 3990 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 0.00013162393162393163, |
| "loss": 0.1966, |
| "step": 4000 |
| }, |
| { |
| "epoch": 5.13, |
| "eval_accuracy": 0.7727272727272727, |
| "eval_loss": 1.0012712478637695, |
| "eval_runtime": 36.9299, |
| "eval_samples_per_second": 48.308, |
| "eval_steps_per_second": 6.038, |
| "step": 4000 |
| }, |
| { |
| "epoch": 5.14, |
| "learning_rate": 0.00013145299145299146, |
| "loss": 0.0625, |
| "step": 4010 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 0.00013128205128205129, |
| "loss": 0.0244, |
| "step": 4020 |
| }, |
| { |
| "epoch": 5.17, |
| "learning_rate": 0.00013111111111111111, |
| "loss": 0.0416, |
| "step": 4030 |
| }, |
| { |
| "epoch": 5.18, |
| "learning_rate": 0.00013094017094017094, |
| "loss": 0.0371, |
| "step": 4040 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 0.00013076923076923077, |
| "loss": 0.075, |
| "step": 4050 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 0.0001305982905982906, |
| "loss": 0.0265, |
| "step": 4060 |
| }, |
| { |
| "epoch": 5.22, |
| "learning_rate": 0.00013042735042735043, |
| "loss": 0.0334, |
| "step": 4070 |
| }, |
| { |
| "epoch": 5.23, |
| "learning_rate": 0.00013025641025641026, |
| "loss": 0.1455, |
| "step": 4080 |
| }, |
| { |
| "epoch": 5.24, |
| "learning_rate": 0.0001300854700854701, |
| "loss": 0.1084, |
| "step": 4090 |
| }, |
| { |
| "epoch": 5.26, |
| "learning_rate": 0.00012991452991452992, |
| "loss": 0.0283, |
| "step": 4100 |
| }, |
| { |
| "epoch": 5.27, |
| "learning_rate": 0.00012974358974358975, |
| "loss": 0.1102, |
| "step": 4110 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 0.00012957264957264957, |
| "loss": 0.0225, |
| "step": 4120 |
| }, |
| { |
| "epoch": 5.29, |
| "learning_rate": 0.0001294017094017094, |
| "loss": 0.04, |
| "step": 4130 |
| }, |
| { |
| "epoch": 5.31, |
| "learning_rate": 0.00012923076923076923, |
| "loss": 0.1175, |
| "step": 4140 |
| }, |
| { |
| "epoch": 5.32, |
| "learning_rate": 0.00012905982905982906, |
| "loss": 0.0439, |
| "step": 4150 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 0.00012888888888888892, |
| "loss": 0.101, |
| "step": 4160 |
| }, |
| { |
| "epoch": 5.35, |
| "learning_rate": 0.00012871794871794875, |
| "loss": 0.0631, |
| "step": 4170 |
| }, |
| { |
| "epoch": 5.36, |
| "learning_rate": 0.00012854700854700857, |
| "loss": 0.0275, |
| "step": 4180 |
| }, |
| { |
| "epoch": 5.37, |
| "learning_rate": 0.0001283760683760684, |
| "loss": 0.1865, |
| "step": 4190 |
| }, |
| { |
| "epoch": 5.38, |
| "learning_rate": 0.00012820512820512823, |
| "loss": 0.1747, |
| "step": 4200 |
| }, |
| { |
| "epoch": 5.4, |
| "learning_rate": 0.00012803418803418803, |
| "loss": 0.0581, |
| "step": 4210 |
| }, |
| { |
| "epoch": 5.41, |
| "learning_rate": 0.00012786324786324786, |
| "loss": 0.5311, |
| "step": 4220 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 0.0001276923076923077, |
| "loss": 0.019, |
| "step": 4230 |
| }, |
| { |
| "epoch": 5.44, |
| "learning_rate": 0.00012752136752136752, |
| "loss": 0.1085, |
| "step": 4240 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 0.00012735042735042735, |
| "loss": 0.2244, |
| "step": 4250 |
| }, |
| { |
| "epoch": 5.46, |
| "learning_rate": 0.00012717948717948718, |
| "loss": 0.2485, |
| "step": 4260 |
| }, |
| { |
| "epoch": 5.47, |
| "learning_rate": 0.000127008547008547, |
| "loss": 0.1385, |
| "step": 4270 |
| }, |
| { |
| "epoch": 5.49, |
| "learning_rate": 0.00012683760683760684, |
| "loss": 0.1264, |
| "step": 4280 |
| }, |
| { |
| "epoch": 5.5, |
| "learning_rate": 0.00012666666666666666, |
| "loss": 0.1278, |
| "step": 4290 |
| }, |
| { |
| "epoch": 5.51, |
| "learning_rate": 0.0001264957264957265, |
| "loss": 0.0181, |
| "step": 4300 |
| }, |
| { |
| "epoch": 5.53, |
| "learning_rate": 0.00012632478632478632, |
| "loss": 0.1171, |
| "step": 4310 |
| }, |
| { |
| "epoch": 5.54, |
| "learning_rate": 0.00012615384615384615, |
| "loss": 0.1718, |
| "step": 4320 |
| }, |
| { |
| "epoch": 5.55, |
| "learning_rate": 0.00012598290598290598, |
| "loss": 0.0421, |
| "step": 4330 |
| }, |
| { |
| "epoch": 5.56, |
| "learning_rate": 0.0001258119658119658, |
| "loss": 0.0212, |
| "step": 4340 |
| }, |
| { |
| "epoch": 5.58, |
| "learning_rate": 0.00012564102564102564, |
| "loss": 0.0426, |
| "step": 4350 |
| }, |
| { |
| "epoch": 5.59, |
| "learning_rate": 0.00012547008547008547, |
| "loss": 0.0028, |
| "step": 4360 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 0.0001252991452991453, |
| "loss": 0.0967, |
| "step": 4370 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 0.00012512820512820512, |
| "loss": 0.0351, |
| "step": 4380 |
| }, |
| { |
| "epoch": 5.63, |
| "learning_rate": 0.00012495726495726495, |
| "loss": 0.0242, |
| "step": 4390 |
| }, |
| { |
| "epoch": 5.64, |
| "learning_rate": 0.00012478632478632478, |
| "loss": 0.328, |
| "step": 4400 |
| }, |
| { |
| "epoch": 5.65, |
| "learning_rate": 0.0001246153846153846, |
| "loss": 0.1125, |
| "step": 4410 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 0.00012444444444444444, |
| "loss": 0.0525, |
| "step": 4420 |
| }, |
| { |
| "epoch": 5.68, |
| "learning_rate": 0.0001242735042735043, |
| "loss": 0.1387, |
| "step": 4430 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 0.00012410256410256412, |
| "loss": 0.22, |
| "step": 4440 |
| }, |
| { |
| "epoch": 5.71, |
| "learning_rate": 0.00012393162393162395, |
| "loss": 0.1024, |
| "step": 4450 |
| }, |
| { |
| "epoch": 5.72, |
| "learning_rate": 0.00012376068376068378, |
| "loss": 0.1019, |
| "step": 4460 |
| }, |
| { |
| "epoch": 5.73, |
| "learning_rate": 0.0001235897435897436, |
| "loss": 0.0114, |
| "step": 4470 |
| }, |
| { |
| "epoch": 5.74, |
| "learning_rate": 0.00012341880341880344, |
| "loss": 0.0043, |
| "step": 4480 |
| }, |
| { |
| "epoch": 5.76, |
| "learning_rate": 0.00012324786324786327, |
| "loss": 0.0503, |
| "step": 4490 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 0.0001230769230769231, |
| "loss": 0.1472, |
| "step": 4500 |
| }, |
| { |
| "epoch": 5.78, |
| "learning_rate": 0.00012290598290598293, |
| "loss": 0.3344, |
| "step": 4510 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 0.00012273504273504276, |
| "loss": 0.1512, |
| "step": 4520 |
| }, |
| { |
| "epoch": 5.81, |
| "learning_rate": 0.00012256410256410258, |
| "loss": 0.1359, |
| "step": 4530 |
| }, |
| { |
| "epoch": 5.82, |
| "learning_rate": 0.0001223931623931624, |
| "loss": 0.0761, |
| "step": 4540 |
| }, |
| { |
| "epoch": 5.83, |
| "learning_rate": 0.00012222222222222224, |
| "loss": 0.2204, |
| "step": 4550 |
| }, |
| { |
| "epoch": 5.85, |
| "learning_rate": 0.00012205128205128207, |
| "loss": 0.121, |
| "step": 4560 |
| }, |
| { |
| "epoch": 5.86, |
| "learning_rate": 0.0001218803418803419, |
| "loss": 0.0244, |
| "step": 4570 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 0.0001217094017094017, |
| "loss": 0.0192, |
| "step": 4580 |
| }, |
| { |
| "epoch": 5.88, |
| "learning_rate": 0.00012153846153846153, |
| "loss": 0.017, |
| "step": 4590 |
| }, |
| { |
| "epoch": 5.9, |
| "learning_rate": 0.00012136752136752136, |
| "loss": 0.1092, |
| "step": 4600 |
| }, |
| { |
| "epoch": 5.91, |
| "learning_rate": 0.00012119658119658119, |
| "loss": 0.1026, |
| "step": 4610 |
| }, |
| { |
| "epoch": 5.92, |
| "learning_rate": 0.00012102564102564103, |
| "loss": 0.089, |
| "step": 4620 |
| }, |
| { |
| "epoch": 5.94, |
| "learning_rate": 0.00012085470085470086, |
| "loss": 0.0469, |
| "step": 4630 |
| }, |
| { |
| "epoch": 5.95, |
| "learning_rate": 0.00012068376068376069, |
| "loss": 0.1308, |
| "step": 4640 |
| }, |
| { |
| "epoch": 5.96, |
| "learning_rate": 0.00012051282051282052, |
| "loss": 0.1473, |
| "step": 4650 |
| }, |
| { |
| "epoch": 5.97, |
| "learning_rate": 0.00012034188034188035, |
| "loss": 0.1271, |
| "step": 4660 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 0.00012017094017094017, |
| "loss": 0.2707, |
| "step": 4670 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 0.00012, |
| "loss": 0.1334, |
| "step": 4680 |
| }, |
| { |
| "epoch": 6.01, |
| "learning_rate": 0.00011982905982905983, |
| "loss": 0.0675, |
| "step": 4690 |
| }, |
| { |
| "epoch": 6.03, |
| "learning_rate": 0.00011965811965811966, |
| "loss": 0.025, |
| "step": 4700 |
| }, |
| { |
| "epoch": 6.04, |
| "learning_rate": 0.00011948717948717949, |
| "loss": 0.0571, |
| "step": 4710 |
| }, |
| { |
| "epoch": 6.05, |
| "learning_rate": 0.00011931623931623932, |
| "loss": 0.131, |
| "step": 4720 |
| }, |
| { |
| "epoch": 6.06, |
| "learning_rate": 0.00011914529914529915, |
| "loss": 0.0201, |
| "step": 4730 |
| }, |
| { |
| "epoch": 6.08, |
| "learning_rate": 0.00011897435897435898, |
| "loss": 0.1559, |
| "step": 4740 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 0.0001188034188034188, |
| "loss": 0.0599, |
| "step": 4750 |
| }, |
| { |
| "epoch": 6.1, |
| "learning_rate": 0.00011863247863247863, |
| "loss": 0.0502, |
| "step": 4760 |
| }, |
| { |
| "epoch": 6.12, |
| "learning_rate": 0.00011846153846153846, |
| "loss": 0.0041, |
| "step": 4770 |
| }, |
| { |
| "epoch": 6.13, |
| "learning_rate": 0.00011829059829059829, |
| "loss": 0.0315, |
| "step": 4780 |
| }, |
| { |
| "epoch": 6.14, |
| "learning_rate": 0.00011811965811965813, |
| "loss": 0.0132, |
| "step": 4790 |
| }, |
| { |
| "epoch": 6.15, |
| "learning_rate": 0.00011794871794871796, |
| "loss": 0.0755, |
| "step": 4800 |
| }, |
| { |
| "epoch": 6.17, |
| "learning_rate": 0.00011777777777777779, |
| "loss": 0.1349, |
| "step": 4810 |
| }, |
| { |
| "epoch": 6.18, |
| "learning_rate": 0.00011760683760683762, |
| "loss": 0.0186, |
| "step": 4820 |
| }, |
| { |
| "epoch": 6.19, |
| "learning_rate": 0.00011743589743589745, |
| "loss": 0.0224, |
| "step": 4830 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 0.00011726495726495728, |
| "loss": 0.0728, |
| "step": 4840 |
| }, |
| { |
| "epoch": 6.22, |
| "learning_rate": 0.00011709401709401711, |
| "loss": 0.0094, |
| "step": 4850 |
| }, |
| { |
| "epoch": 6.23, |
| "learning_rate": 0.00011692307692307694, |
| "loss": 0.1172, |
| "step": 4860 |
| }, |
| { |
| "epoch": 6.24, |
| "learning_rate": 0.00011675213675213676, |
| "loss": 0.0078, |
| "step": 4870 |
| }, |
| { |
| "epoch": 6.26, |
| "learning_rate": 0.0001165811965811966, |
| "loss": 0.1227, |
| "step": 4880 |
| }, |
| { |
| "epoch": 6.27, |
| "learning_rate": 0.00011641025641025642, |
| "loss": 0.0078, |
| "step": 4890 |
| }, |
| { |
| "epoch": 6.28, |
| "learning_rate": 0.00011623931623931625, |
| "loss": 0.0053, |
| "step": 4900 |
| }, |
| { |
| "epoch": 6.29, |
| "learning_rate": 0.00011606837606837608, |
| "loss": 0.0513, |
| "step": 4910 |
| }, |
| { |
| "epoch": 6.31, |
| "learning_rate": 0.00011589743589743591, |
| "loss": 0.243, |
| "step": 4920 |
| }, |
| { |
| "epoch": 6.32, |
| "learning_rate": 0.00011572649572649574, |
| "loss": 0.0159, |
| "step": 4930 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 0.00011555555555555555, |
| "loss": 0.0781, |
| "step": 4940 |
| }, |
| { |
| "epoch": 6.35, |
| "learning_rate": 0.00011538461538461538, |
| "loss": 0.0939, |
| "step": 4950 |
| }, |
| { |
| "epoch": 6.36, |
| "learning_rate": 0.00011521367521367521, |
| "loss": 0.0423, |
| "step": 4960 |
| }, |
| { |
| "epoch": 6.37, |
| "learning_rate": 0.00011504273504273504, |
| "loss": 0.0027, |
| "step": 4970 |
| }, |
| { |
| "epoch": 6.38, |
| "learning_rate": 0.00011487179487179487, |
| "loss": 0.0013, |
| "step": 4980 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 0.0001147008547008547, |
| "loss": 0.0022, |
| "step": 4990 |
| }, |
| { |
| "epoch": 6.41, |
| "learning_rate": 0.00011452991452991453, |
| "loss": 0.0226, |
| "step": 5000 |
| }, |
| { |
| "epoch": 6.41, |
| "eval_accuracy": 0.7716049382716049, |
| "eval_loss": 1.2063742876052856, |
| "eval_runtime": 38.8135, |
| "eval_samples_per_second": 45.963, |
| "eval_steps_per_second": 5.745, |
| "step": 5000 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 0.00011435897435897435, |
| "loss": 0.0954, |
| "step": 5010 |
| }, |
| { |
| "epoch": 6.44, |
| "learning_rate": 0.00011418803418803418, |
| "loss": 0.175, |
| "step": 5020 |
| }, |
| { |
| "epoch": 6.45, |
| "learning_rate": 0.00011401709401709401, |
| "loss": 0.1029, |
| "step": 5030 |
| }, |
| { |
| "epoch": 6.46, |
| "learning_rate": 0.00011384615384615384, |
| "loss": 0.0504, |
| "step": 5040 |
| }, |
| { |
| "epoch": 6.47, |
| "learning_rate": 0.00011367521367521367, |
| "loss": 0.0413, |
| "step": 5050 |
| }, |
| { |
| "epoch": 6.49, |
| "learning_rate": 0.00011350427350427351, |
| "loss": 0.0692, |
| "step": 5060 |
| }, |
| { |
| "epoch": 6.5, |
| "learning_rate": 0.00011333333333333334, |
| "loss": 0.0874, |
| "step": 5070 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 0.00011316239316239317, |
| "loss": 0.0858, |
| "step": 5080 |
| }, |
| { |
| "epoch": 6.53, |
| "learning_rate": 0.000112991452991453, |
| "loss": 0.0158, |
| "step": 5090 |
| }, |
| { |
| "epoch": 6.54, |
| "learning_rate": 0.00011282051282051283, |
| "loss": 0.0103, |
| "step": 5100 |
| }, |
| { |
| "epoch": 6.55, |
| "learning_rate": 0.00011264957264957266, |
| "loss": 0.088, |
| "step": 5110 |
| }, |
| { |
| "epoch": 6.56, |
| "learning_rate": 0.00011247863247863249, |
| "loss": 0.0014, |
| "step": 5120 |
| }, |
| { |
| "epoch": 6.58, |
| "learning_rate": 0.00011230769230769231, |
| "loss": 0.0473, |
| "step": 5130 |
| }, |
| { |
| "epoch": 6.59, |
| "learning_rate": 0.00011213675213675214, |
| "loss": 0.103, |
| "step": 5140 |
| }, |
| { |
| "epoch": 6.6, |
| "learning_rate": 0.00011196581196581197, |
| "loss": 0.0271, |
| "step": 5150 |
| }, |
| { |
| "epoch": 6.62, |
| "learning_rate": 0.0001117948717948718, |
| "loss": 0.1189, |
| "step": 5160 |
| }, |
| { |
| "epoch": 6.63, |
| "learning_rate": 0.00011162393162393163, |
| "loss": 0.2419, |
| "step": 5170 |
| }, |
| { |
| "epoch": 6.64, |
| "learning_rate": 0.00011145299145299146, |
| "loss": 0.0575, |
| "step": 5180 |
| }, |
| { |
| "epoch": 6.65, |
| "learning_rate": 0.00011128205128205129, |
| "loss": 0.0331, |
| "step": 5190 |
| }, |
| { |
| "epoch": 6.67, |
| "learning_rate": 0.00011111111111111112, |
| "loss": 0.1995, |
| "step": 5200 |
| }, |
| { |
| "epoch": 6.68, |
| "learning_rate": 0.00011094017094017095, |
| "loss": 0.0383, |
| "step": 5210 |
| }, |
| { |
| "epoch": 6.69, |
| "learning_rate": 0.00011076923076923077, |
| "loss": 0.0024, |
| "step": 5220 |
| }, |
| { |
| "epoch": 6.71, |
| "learning_rate": 0.00011059829059829062, |
| "loss": 0.2491, |
| "step": 5230 |
| }, |
| { |
| "epoch": 6.72, |
| "learning_rate": 0.00011042735042735045, |
| "loss": 0.0031, |
| "step": 5240 |
| }, |
| { |
| "epoch": 6.73, |
| "learning_rate": 0.00011025641025641027, |
| "loss": 0.0082, |
| "step": 5250 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 0.0001100854700854701, |
| "loss": 0.0264, |
| "step": 5260 |
| }, |
| { |
| "epoch": 6.76, |
| "learning_rate": 0.00010991452991452993, |
| "loss": 0.0792, |
| "step": 5270 |
| }, |
| { |
| "epoch": 6.77, |
| "learning_rate": 0.00010974358974358976, |
| "loss": 0.1843, |
| "step": 5280 |
| }, |
| { |
| "epoch": 6.78, |
| "learning_rate": 0.00010957264957264959, |
| "loss": 0.1755, |
| "step": 5290 |
| }, |
| { |
| "epoch": 6.79, |
| "learning_rate": 0.00010940170940170942, |
| "loss": 0.2766, |
| "step": 5300 |
| }, |
| { |
| "epoch": 6.81, |
| "learning_rate": 0.00010923076923076922, |
| "loss": 0.1326, |
| "step": 5310 |
| }, |
| { |
| "epoch": 6.82, |
| "learning_rate": 0.00010905982905982905, |
| "loss": 0.005, |
| "step": 5320 |
| }, |
| { |
| "epoch": 6.83, |
| "learning_rate": 0.00010888888888888889, |
| "loss": 0.0611, |
| "step": 5330 |
| }, |
| { |
| "epoch": 6.85, |
| "learning_rate": 0.00010871794871794872, |
| "loss": 0.2345, |
| "step": 5340 |
| }, |
| { |
| "epoch": 6.86, |
| "learning_rate": 0.00010854700854700855, |
| "loss": 0.3693, |
| "step": 5350 |
| }, |
| { |
| "epoch": 6.87, |
| "learning_rate": 0.00010837606837606838, |
| "loss": 0.0654, |
| "step": 5360 |
| }, |
| { |
| "epoch": 6.88, |
| "learning_rate": 0.0001082051282051282, |
| "loss": 0.0929, |
| "step": 5370 |
| }, |
| { |
| "epoch": 6.9, |
| "learning_rate": 0.00010803418803418804, |
| "loss": 0.087, |
| "step": 5380 |
| }, |
| { |
| "epoch": 6.91, |
| "learning_rate": 0.00010786324786324786, |
| "loss": 0.0359, |
| "step": 5390 |
| }, |
| { |
| "epoch": 6.92, |
| "learning_rate": 0.0001076923076923077, |
| "loss": 0.0754, |
| "step": 5400 |
| }, |
| { |
| "epoch": 6.94, |
| "learning_rate": 0.00010752136752136752, |
| "loss": 0.0039, |
| "step": 5410 |
| }, |
| { |
| "epoch": 6.95, |
| "learning_rate": 0.00010735042735042735, |
| "loss": 0.0733, |
| "step": 5420 |
| }, |
| { |
| "epoch": 6.96, |
| "learning_rate": 0.00010717948717948718, |
| "loss": 0.0426, |
| "step": 5430 |
| }, |
| { |
| "epoch": 6.97, |
| "learning_rate": 0.00010700854700854701, |
| "loss": 0.0943, |
| "step": 5440 |
| }, |
| { |
| "epoch": 6.99, |
| "learning_rate": 0.00010683760683760684, |
| "loss": 0.163, |
| "step": 5450 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 0.00010666666666666667, |
| "loss": 0.0653, |
| "step": 5460 |
| }, |
| { |
| "epoch": 7.01, |
| "learning_rate": 0.0001064957264957265, |
| "loss": 0.0054, |
| "step": 5470 |
| }, |
| { |
| "epoch": 7.03, |
| "learning_rate": 0.00010632478632478632, |
| "loss": 0.0179, |
| "step": 5480 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 0.00010615384615384615, |
| "loss": 0.0124, |
| "step": 5490 |
| }, |
| { |
| "epoch": 7.05, |
| "learning_rate": 0.000105982905982906, |
| "loss": 0.0439, |
| "step": 5500 |
| }, |
| { |
| "epoch": 7.06, |
| "learning_rate": 0.00010581196581196582, |
| "loss": 0.0015, |
| "step": 5510 |
| }, |
| { |
| "epoch": 7.08, |
| "learning_rate": 0.00010564102564102565, |
| "loss": 0.0434, |
| "step": 5520 |
| }, |
| { |
| "epoch": 7.09, |
| "learning_rate": 0.00010547008547008548, |
| "loss": 0.0041, |
| "step": 5530 |
| }, |
| { |
| "epoch": 7.1, |
| "learning_rate": 0.00010529914529914531, |
| "loss": 0.0189, |
| "step": 5540 |
| }, |
| { |
| "epoch": 7.12, |
| "learning_rate": 0.00010512820512820514, |
| "loss": 0.0511, |
| "step": 5550 |
| }, |
| { |
| "epoch": 7.13, |
| "learning_rate": 0.00010495726495726497, |
| "loss": 0.0013, |
| "step": 5560 |
| }, |
| { |
| "epoch": 7.14, |
| "learning_rate": 0.0001047863247863248, |
| "loss": 0.0756, |
| "step": 5570 |
| }, |
| { |
| "epoch": 7.15, |
| "learning_rate": 0.00010461538461538463, |
| "loss": 0.018, |
| "step": 5580 |
| }, |
| { |
| "epoch": 7.17, |
| "learning_rate": 0.00010444444444444445, |
| "loss": 0.0416, |
| "step": 5590 |
| }, |
| { |
| "epoch": 7.18, |
| "learning_rate": 0.00010427350427350428, |
| "loss": 0.0718, |
| "step": 5600 |
| }, |
| { |
| "epoch": 7.19, |
| "learning_rate": 0.00010410256410256411, |
| "loss": 0.0846, |
| "step": 5610 |
| }, |
| { |
| "epoch": 7.21, |
| "learning_rate": 0.00010393162393162394, |
| "loss": 0.0753, |
| "step": 5620 |
| }, |
| { |
| "epoch": 7.22, |
| "learning_rate": 0.00010376068376068377, |
| "loss": 0.0051, |
| "step": 5630 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 0.0001035897435897436, |
| "loss": 0.0789, |
| "step": 5640 |
| }, |
| { |
| "epoch": 7.24, |
| "learning_rate": 0.00010341880341880343, |
| "loss": 0.089, |
| "step": 5650 |
| }, |
| { |
| "epoch": 7.26, |
| "learning_rate": 0.00010324786324786326, |
| "loss": 0.0561, |
| "step": 5660 |
| }, |
| { |
| "epoch": 7.27, |
| "learning_rate": 0.00010307692307692307, |
| "loss": 0.0184, |
| "step": 5670 |
| }, |
| { |
| "epoch": 7.28, |
| "learning_rate": 0.0001029059829059829, |
| "loss": 0.0023, |
| "step": 5680 |
| }, |
| { |
| "epoch": 7.29, |
| "learning_rate": 0.00010273504273504273, |
| "loss": 0.016, |
| "step": 5690 |
| }, |
| { |
| "epoch": 7.31, |
| "learning_rate": 0.00010256410256410256, |
| "loss": 0.0604, |
| "step": 5700 |
| }, |
| { |
| "epoch": 7.32, |
| "learning_rate": 0.00010239316239316239, |
| "loss": 0.138, |
| "step": 5710 |
| }, |
| { |
| "epoch": 7.33, |
| "learning_rate": 0.00010222222222222222, |
| "loss": 0.0214, |
| "step": 5720 |
| }, |
| { |
| "epoch": 7.35, |
| "learning_rate": 0.00010205128205128205, |
| "loss": 0.1264, |
| "step": 5730 |
| }, |
| { |
| "epoch": 7.36, |
| "learning_rate": 0.00010188034188034187, |
| "loss": 0.0757, |
| "step": 5740 |
| }, |
| { |
| "epoch": 7.37, |
| "learning_rate": 0.0001017094017094017, |
| "loss": 0.0039, |
| "step": 5750 |
| }, |
| { |
| "epoch": 7.38, |
| "learning_rate": 0.00010153846153846153, |
| "loss": 0.0066, |
| "step": 5760 |
| }, |
| { |
| "epoch": 7.4, |
| "learning_rate": 0.00010136752136752137, |
| "loss": 0.005, |
| "step": 5770 |
| }, |
| { |
| "epoch": 7.41, |
| "learning_rate": 0.0001011965811965812, |
| "loss": 0.0012, |
| "step": 5780 |
| }, |
| { |
| "epoch": 7.42, |
| "learning_rate": 0.00010102564102564103, |
| "loss": 0.0018, |
| "step": 5790 |
| }, |
| { |
| "epoch": 7.44, |
| "learning_rate": 0.00010085470085470086, |
| "loss": 0.0032, |
| "step": 5800 |
| }, |
| { |
| "epoch": 7.45, |
| "learning_rate": 0.00010068376068376069, |
| "loss": 0.0578, |
| "step": 5810 |
| }, |
| { |
| "epoch": 7.46, |
| "learning_rate": 0.00010051282051282052, |
| "loss": 0.0013, |
| "step": 5820 |
| }, |
| { |
| "epoch": 7.47, |
| "learning_rate": 0.00010034188034188035, |
| "loss": 0.0016, |
| "step": 5830 |
| }, |
| { |
| "epoch": 7.49, |
| "learning_rate": 0.00010017094017094018, |
| "loss": 0.0051, |
| "step": 5840 |
| }, |
| { |
| "epoch": 7.5, |
| "learning_rate": 0.0001, |
| "loss": 0.147, |
| "step": 5850 |
| }, |
| { |
| "epoch": 7.51, |
| "learning_rate": 9.982905982905983e-05, |
| "loss": 0.0103, |
| "step": 5860 |
| }, |
| { |
| "epoch": 7.53, |
| "learning_rate": 9.965811965811966e-05, |
| "loss": 0.0011, |
| "step": 5870 |
| }, |
| { |
| "epoch": 7.54, |
| "learning_rate": 9.948717948717949e-05, |
| "loss": 0.0829, |
| "step": 5880 |
| }, |
| { |
| "epoch": 7.55, |
| "learning_rate": 9.931623931623932e-05, |
| "loss": 0.0065, |
| "step": 5890 |
| }, |
| { |
| "epoch": 7.56, |
| "learning_rate": 9.914529914529915e-05, |
| "loss": 0.0598, |
| "step": 5900 |
| }, |
| { |
| "epoch": 7.58, |
| "learning_rate": 9.897435897435898e-05, |
| "loss": 0.0473, |
| "step": 5910 |
| }, |
| { |
| "epoch": 7.59, |
| "learning_rate": 9.88034188034188e-05, |
| "loss": 0.0007, |
| "step": 5920 |
| }, |
| { |
| "epoch": 7.6, |
| "learning_rate": 9.863247863247864e-05, |
| "loss": 0.012, |
| "step": 5930 |
| }, |
| { |
| "epoch": 7.62, |
| "learning_rate": 9.846153846153848e-05, |
| "loss": 0.093, |
| "step": 5940 |
| }, |
| { |
| "epoch": 7.63, |
| "learning_rate": 9.829059829059829e-05, |
| "loss": 0.0074, |
| "step": 5950 |
| }, |
| { |
| "epoch": 7.64, |
| "learning_rate": 9.811965811965812e-05, |
| "loss": 0.0175, |
| "step": 5960 |
| }, |
| { |
| "epoch": 7.65, |
| "learning_rate": 9.794871794871795e-05, |
| "loss": 0.0363, |
| "step": 5970 |
| }, |
| { |
| "epoch": 7.67, |
| "learning_rate": 9.777777777777778e-05, |
| "loss": 0.0488, |
| "step": 5980 |
| }, |
| { |
| "epoch": 7.68, |
| "learning_rate": 9.760683760683761e-05, |
| "loss": 0.0007, |
| "step": 5990 |
| }, |
| { |
| "epoch": 7.69, |
| "learning_rate": 9.743589743589744e-05, |
| "loss": 0.0844, |
| "step": 6000 |
| }, |
| { |
| "epoch": 7.69, |
| "eval_accuracy": 0.7946127946127947, |
| "eval_loss": 1.2210915088653564, |
| "eval_runtime": 34.2874, |
| "eval_samples_per_second": 52.031, |
| "eval_steps_per_second": 6.504, |
| "step": 6000 |
| }, |
| { |
| "epoch": 7.71, |
| "learning_rate": 9.726495726495727e-05, |
| "loss": 0.1933, |
| "step": 6010 |
| }, |
| { |
| "epoch": 7.72, |
| "learning_rate": 9.70940170940171e-05, |
| "loss": 0.0448, |
| "step": 6020 |
| }, |
| { |
| "epoch": 7.73, |
| "learning_rate": 9.692307692307692e-05, |
| "loss": 0.2629, |
| "step": 6030 |
| }, |
| { |
| "epoch": 7.74, |
| "learning_rate": 9.675213675213675e-05, |
| "loss": 0.0007, |
| "step": 6040 |
| }, |
| { |
| "epoch": 7.76, |
| "learning_rate": 9.658119658119658e-05, |
| "loss": 0.0125, |
| "step": 6050 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 9.641025641025641e-05, |
| "loss": 0.0129, |
| "step": 6060 |
| }, |
| { |
| "epoch": 7.78, |
| "learning_rate": 9.623931623931625e-05, |
| "loss": 0.0008, |
| "step": 6070 |
| }, |
| { |
| "epoch": 7.79, |
| "learning_rate": 9.606837606837608e-05, |
| "loss": 0.0009, |
| "step": 6080 |
| }, |
| { |
| "epoch": 7.81, |
| "learning_rate": 9.589743589743591e-05, |
| "loss": 0.0448, |
| "step": 6090 |
| }, |
| { |
| "epoch": 7.82, |
| "learning_rate": 9.572649572649574e-05, |
| "loss": 0.002, |
| "step": 6100 |
| }, |
| { |
| "epoch": 7.83, |
| "learning_rate": 9.555555555555557e-05, |
| "loss": 0.0514, |
| "step": 6110 |
| }, |
| { |
| "epoch": 7.85, |
| "learning_rate": 9.53846153846154e-05, |
| "loss": 0.0057, |
| "step": 6120 |
| }, |
| { |
| "epoch": 7.86, |
| "learning_rate": 9.521367521367521e-05, |
| "loss": 0.0145, |
| "step": 6130 |
| }, |
| { |
| "epoch": 7.87, |
| "learning_rate": 9.504273504273504e-05, |
| "loss": 0.001, |
| "step": 6140 |
| }, |
| { |
| "epoch": 7.88, |
| "learning_rate": 9.487179487179487e-05, |
| "loss": 0.0009, |
| "step": 6150 |
| }, |
| { |
| "epoch": 7.9, |
| "learning_rate": 9.47008547008547e-05, |
| "loss": 0.0953, |
| "step": 6160 |
| }, |
| { |
| "epoch": 7.91, |
| "learning_rate": 9.452991452991453e-05, |
| "loss": 0.0011, |
| "step": 6170 |
| }, |
| { |
| "epoch": 7.92, |
| "learning_rate": 9.435897435897436e-05, |
| "loss": 0.0007, |
| "step": 6180 |
| }, |
| { |
| "epoch": 7.94, |
| "learning_rate": 9.418803418803419e-05, |
| "loss": 0.0034, |
| "step": 6190 |
| }, |
| { |
| "epoch": 7.95, |
| "learning_rate": 9.401709401709401e-05, |
| "loss": 0.0738, |
| "step": 6200 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 9.384615384615386e-05, |
| "loss": 0.0482, |
| "step": 6210 |
| }, |
| { |
| "epoch": 7.97, |
| "learning_rate": 9.367521367521369e-05, |
| "loss": 0.0014, |
| "step": 6220 |
| }, |
| { |
| "epoch": 7.99, |
| "learning_rate": 9.350427350427351e-05, |
| "loss": 0.0605, |
| "step": 6230 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 9.333333333333334e-05, |
| "loss": 0.0622, |
| "step": 6240 |
| }, |
| { |
| "epoch": 8.01, |
| "learning_rate": 9.316239316239317e-05, |
| "loss": 0.0623, |
| "step": 6250 |
| }, |
| { |
| "epoch": 8.03, |
| "learning_rate": 9.2991452991453e-05, |
| "loss": 0.0084, |
| "step": 6260 |
| }, |
| { |
| "epoch": 8.04, |
| "learning_rate": 9.282051282051283e-05, |
| "loss": 0.0061, |
| "step": 6270 |
| }, |
| { |
| "epoch": 8.05, |
| "learning_rate": 9.264957264957266e-05, |
| "loss": 0.0011, |
| "step": 6280 |
| }, |
| { |
| "epoch": 8.06, |
| "learning_rate": 9.247863247863249e-05, |
| "loss": 0.0051, |
| "step": 6290 |
| }, |
| { |
| "epoch": 8.08, |
| "learning_rate": 9.230769230769232e-05, |
| "loss": 0.0658, |
| "step": 6300 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 9.213675213675213e-05, |
| "loss": 0.0007, |
| "step": 6310 |
| }, |
| { |
| "epoch": 8.1, |
| "learning_rate": 9.196581196581196e-05, |
| "loss": 0.0954, |
| "step": 6320 |
| }, |
| { |
| "epoch": 8.12, |
| "learning_rate": 9.179487179487179e-05, |
| "loss": 0.0012, |
| "step": 6330 |
| }, |
| { |
| "epoch": 8.13, |
| "learning_rate": 9.162393162393162e-05, |
| "loss": 0.1042, |
| "step": 6340 |
| }, |
| { |
| "epoch": 8.14, |
| "learning_rate": 9.145299145299146e-05, |
| "loss": 0.0006, |
| "step": 6350 |
| }, |
| { |
| "epoch": 8.15, |
| "learning_rate": 9.128205128205129e-05, |
| "loss": 0.0329, |
| "step": 6360 |
| }, |
| { |
| "epoch": 8.17, |
| "learning_rate": 9.111111111111112e-05, |
| "loss": 0.0551, |
| "step": 6370 |
| }, |
| { |
| "epoch": 8.18, |
| "learning_rate": 9.094017094017095e-05, |
| "loss": 0.0011, |
| "step": 6380 |
| }, |
| { |
| "epoch": 8.19, |
| "learning_rate": 9.076923076923078e-05, |
| "loss": 0.0012, |
| "step": 6390 |
| }, |
| { |
| "epoch": 8.21, |
| "learning_rate": 9.05982905982906e-05, |
| "loss": 0.0817, |
| "step": 6400 |
| }, |
| { |
| "epoch": 8.22, |
| "learning_rate": 9.042735042735043e-05, |
| "loss": 0.0014, |
| "step": 6410 |
| }, |
| { |
| "epoch": 8.23, |
| "learning_rate": 9.025641025641026e-05, |
| "loss": 0.0292, |
| "step": 6420 |
| }, |
| { |
| "epoch": 8.24, |
| "learning_rate": 9.008547008547009e-05, |
| "loss": 0.0098, |
| "step": 6430 |
| }, |
| { |
| "epoch": 8.26, |
| "learning_rate": 8.991452991452992e-05, |
| "loss": 0.0011, |
| "step": 6440 |
| }, |
| { |
| "epoch": 8.27, |
| "learning_rate": 8.974358974358975e-05, |
| "loss": 0.004, |
| "step": 6450 |
| }, |
| { |
| "epoch": 8.28, |
| "learning_rate": 8.957264957264958e-05, |
| "loss": 0.0988, |
| "step": 6460 |
| }, |
| { |
| "epoch": 8.29, |
| "learning_rate": 8.94017094017094e-05, |
| "loss": 0.0373, |
| "step": 6470 |
| }, |
| { |
| "epoch": 8.31, |
| "learning_rate": 8.923076923076924e-05, |
| "loss": 0.0431, |
| "step": 6480 |
| }, |
| { |
| "epoch": 8.32, |
| "learning_rate": 8.905982905982906e-05, |
| "loss": 0.0084, |
| "step": 6490 |
| }, |
| { |
| "epoch": 8.33, |
| "learning_rate": 8.888888888888889e-05, |
| "loss": 0.0023, |
| "step": 6500 |
| }, |
| { |
| "epoch": 8.35, |
| "learning_rate": 8.871794871794872e-05, |
| "loss": 0.086, |
| "step": 6510 |
| }, |
| { |
| "epoch": 8.36, |
| "learning_rate": 8.854700854700855e-05, |
| "loss": 0.0997, |
| "step": 6520 |
| }, |
| { |
| "epoch": 8.37, |
| "learning_rate": 8.837606837606838e-05, |
| "loss": 0.0008, |
| "step": 6530 |
| }, |
| { |
| "epoch": 8.38, |
| "learning_rate": 8.820512820512821e-05, |
| "loss": 0.0011, |
| "step": 6540 |
| }, |
| { |
| "epoch": 8.4, |
| "learning_rate": 8.803418803418804e-05, |
| "loss": 0.0637, |
| "step": 6550 |
| }, |
| { |
| "epoch": 8.41, |
| "learning_rate": 8.786324786324787e-05, |
| "loss": 0.0503, |
| "step": 6560 |
| }, |
| { |
| "epoch": 8.42, |
| "learning_rate": 8.76923076923077e-05, |
| "loss": 0.0587, |
| "step": 6570 |
| }, |
| { |
| "epoch": 8.44, |
| "learning_rate": 8.752136752136752e-05, |
| "loss": 0.0858, |
| "step": 6580 |
| }, |
| { |
| "epoch": 8.45, |
| "learning_rate": 8.735042735042735e-05, |
| "loss": 0.0006, |
| "step": 6590 |
| }, |
| { |
| "epoch": 8.46, |
| "learning_rate": 8.717948717948718e-05, |
| "loss": 0.0007, |
| "step": 6600 |
| }, |
| { |
| "epoch": 8.47, |
| "learning_rate": 8.700854700854701e-05, |
| "loss": 0.0172, |
| "step": 6610 |
| }, |
| { |
| "epoch": 8.49, |
| "learning_rate": 8.683760683760684e-05, |
| "loss": 0.0009, |
| "step": 6620 |
| }, |
| { |
| "epoch": 8.5, |
| "learning_rate": 8.666666666666667e-05, |
| "loss": 0.0006, |
| "step": 6630 |
| }, |
| { |
| "epoch": 8.51, |
| "learning_rate": 8.64957264957265e-05, |
| "loss": 0.184, |
| "step": 6640 |
| }, |
| { |
| "epoch": 8.53, |
| "learning_rate": 8.632478632478634e-05, |
| "loss": 0.167, |
| "step": 6650 |
| }, |
| { |
| "epoch": 8.54, |
| "learning_rate": 8.615384615384617e-05, |
| "loss": 0.0084, |
| "step": 6660 |
| }, |
| { |
| "epoch": 8.55, |
| "learning_rate": 8.5982905982906e-05, |
| "loss": 0.0274, |
| "step": 6670 |
| }, |
| { |
| "epoch": 8.56, |
| "learning_rate": 8.581196581196581e-05, |
| "loss": 0.0008, |
| "step": 6680 |
| }, |
| { |
| "epoch": 8.58, |
| "learning_rate": 8.564102564102564e-05, |
| "loss": 0.0596, |
| "step": 6690 |
| }, |
| { |
| "epoch": 8.59, |
| "learning_rate": 8.547008547008547e-05, |
| "loss": 0.0083, |
| "step": 6700 |
| }, |
| { |
| "epoch": 8.6, |
| "learning_rate": 8.52991452991453e-05, |
| "loss": 0.0212, |
| "step": 6710 |
| }, |
| { |
| "epoch": 8.62, |
| "learning_rate": 8.512820512820513e-05, |
| "loss": 0.0172, |
| "step": 6720 |
| }, |
| { |
| "epoch": 8.63, |
| "learning_rate": 8.495726495726496e-05, |
| "loss": 0.0042, |
| "step": 6730 |
| }, |
| { |
| "epoch": 8.64, |
| "learning_rate": 8.478632478632479e-05, |
| "loss": 0.1231, |
| "step": 6740 |
| }, |
| { |
| "epoch": 8.65, |
| "learning_rate": 8.461538461538461e-05, |
| "loss": 0.0012, |
| "step": 6750 |
| }, |
| { |
| "epoch": 8.67, |
| "learning_rate": 8.444444444444444e-05, |
| "loss": 0.0206, |
| "step": 6760 |
| }, |
| { |
| "epoch": 8.68, |
| "learning_rate": 8.427350427350427e-05, |
| "loss": 0.0012, |
| "step": 6770 |
| }, |
| { |
| "epoch": 8.69, |
| "learning_rate": 8.410256410256411e-05, |
| "loss": 0.0061, |
| "step": 6780 |
| }, |
| { |
| "epoch": 8.71, |
| "learning_rate": 8.393162393162394e-05, |
| "loss": 0.1411, |
| "step": 6790 |
| }, |
| { |
| "epoch": 8.72, |
| "learning_rate": 8.376068376068377e-05, |
| "loss": 0.0489, |
| "step": 6800 |
| }, |
| { |
| "epoch": 8.73, |
| "learning_rate": 8.35897435897436e-05, |
| "loss": 0.0044, |
| "step": 6810 |
| }, |
| { |
| "epoch": 8.74, |
| "learning_rate": 8.341880341880343e-05, |
| "loss": 0.0262, |
| "step": 6820 |
| }, |
| { |
| "epoch": 8.76, |
| "learning_rate": 8.324786324786326e-05, |
| "loss": 0.0793, |
| "step": 6830 |
| }, |
| { |
| "epoch": 8.77, |
| "learning_rate": 8.307692307692309e-05, |
| "loss": 0.0054, |
| "step": 6840 |
| }, |
| { |
| "epoch": 8.78, |
| "learning_rate": 8.290598290598292e-05, |
| "loss": 0.1281, |
| "step": 6850 |
| }, |
| { |
| "epoch": 8.79, |
| "learning_rate": 8.273504273504273e-05, |
| "loss": 0.0657, |
| "step": 6860 |
| }, |
| { |
| "epoch": 8.81, |
| "learning_rate": 8.256410256410256e-05, |
| "loss": 0.016, |
| "step": 6870 |
| }, |
| { |
| "epoch": 8.82, |
| "learning_rate": 8.239316239316239e-05, |
| "loss": 0.0133, |
| "step": 6880 |
| }, |
| { |
| "epoch": 8.83, |
| "learning_rate": 8.222222222222222e-05, |
| "loss": 0.0391, |
| "step": 6890 |
| }, |
| { |
| "epoch": 8.85, |
| "learning_rate": 8.205128205128205e-05, |
| "loss": 0.1083, |
| "step": 6900 |
| }, |
| { |
| "epoch": 8.86, |
| "learning_rate": 8.188034188034188e-05, |
| "loss": 0.1051, |
| "step": 6910 |
| }, |
| { |
| "epoch": 8.87, |
| "learning_rate": 8.170940170940172e-05, |
| "loss": 0.0368, |
| "step": 6920 |
| }, |
| { |
| "epoch": 8.88, |
| "learning_rate": 8.153846153846155e-05, |
| "loss": 0.0038, |
| "step": 6930 |
| }, |
| { |
| "epoch": 8.9, |
| "learning_rate": 8.136752136752138e-05, |
| "loss": 0.0129, |
| "step": 6940 |
| }, |
| { |
| "epoch": 8.91, |
| "learning_rate": 8.11965811965812e-05, |
| "loss": 0.1158, |
| "step": 6950 |
| }, |
| { |
| "epoch": 8.92, |
| "learning_rate": 8.102564102564103e-05, |
| "loss": 0.1172, |
| "step": 6960 |
| }, |
| { |
| "epoch": 8.94, |
| "learning_rate": 8.085470085470086e-05, |
| "loss": 0.0009, |
| "step": 6970 |
| }, |
| { |
| "epoch": 8.95, |
| "learning_rate": 8.068376068376069e-05, |
| "loss": 0.1745, |
| "step": 6980 |
| }, |
| { |
| "epoch": 8.96, |
| "learning_rate": 8.051282051282052e-05, |
| "loss": 0.0434, |
| "step": 6990 |
| }, |
| { |
| "epoch": 8.97, |
| "learning_rate": 8.034188034188035e-05, |
| "loss": 0.0006, |
| "step": 7000 |
| }, |
| { |
| "epoch": 8.97, |
| "eval_accuracy": 0.7884399551066218, |
| "eval_loss": 1.2867748737335205, |
| "eval_runtime": 36.1409, |
| "eval_samples_per_second": 49.362, |
| "eval_steps_per_second": 6.17, |
| "step": 7000 |
| }, |
| { |
| "epoch": 8.99, |
| "learning_rate": 8.017094017094018e-05, |
| "loss": 0.0005, |
| "step": 7010 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 8e-05, |
| "loss": 0.0017, |
| "step": 7020 |
| }, |
| { |
| "epoch": 9.01, |
| "learning_rate": 7.982905982905984e-05, |
| "loss": 0.0526, |
| "step": 7030 |
| }, |
| { |
| "epoch": 9.03, |
| "learning_rate": 7.965811965811965e-05, |
| "loss": 0.0812, |
| "step": 7040 |
| }, |
| { |
| "epoch": 9.04, |
| "learning_rate": 7.948717948717948e-05, |
| "loss": 0.0004, |
| "step": 7050 |
| }, |
| { |
| "epoch": 9.05, |
| "learning_rate": 7.931623931623932e-05, |
| "loss": 0.205, |
| "step": 7060 |
| }, |
| { |
| "epoch": 9.06, |
| "learning_rate": 7.914529914529915e-05, |
| "loss": 0.1026, |
| "step": 7070 |
| }, |
| { |
| "epoch": 9.08, |
| "learning_rate": 7.897435897435898e-05, |
| "loss": 0.0089, |
| "step": 7080 |
| }, |
| { |
| "epoch": 9.09, |
| "learning_rate": 7.880341880341881e-05, |
| "loss": 0.135, |
| "step": 7090 |
| }, |
| { |
| "epoch": 9.1, |
| "learning_rate": 7.863247863247864e-05, |
| "loss": 0.0046, |
| "step": 7100 |
| }, |
| { |
| "epoch": 9.12, |
| "learning_rate": 7.846153846153847e-05, |
| "loss": 0.1972, |
| "step": 7110 |
| }, |
| { |
| "epoch": 9.13, |
| "learning_rate": 7.82905982905983e-05, |
| "loss": 0.0195, |
| "step": 7120 |
| }, |
| { |
| "epoch": 9.14, |
| "learning_rate": 7.811965811965812e-05, |
| "loss": 0.0022, |
| "step": 7130 |
| }, |
| { |
| "epoch": 9.15, |
| "learning_rate": 7.794871794871795e-05, |
| "loss": 0.0015, |
| "step": 7140 |
| }, |
| { |
| "epoch": 9.17, |
| "learning_rate": 7.777777777777778e-05, |
| "loss": 0.0006, |
| "step": 7150 |
| }, |
| { |
| "epoch": 9.18, |
| "learning_rate": 7.760683760683761e-05, |
| "loss": 0.116, |
| "step": 7160 |
| }, |
| { |
| "epoch": 9.19, |
| "learning_rate": 7.743589743589744e-05, |
| "loss": 0.0126, |
| "step": 7170 |
| }, |
| { |
| "epoch": 9.21, |
| "learning_rate": 7.726495726495727e-05, |
| "loss": 0.0509, |
| "step": 7180 |
| }, |
| { |
| "epoch": 9.22, |
| "learning_rate": 7.70940170940171e-05, |
| "loss": 0.0017, |
| "step": 7190 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 7.692307692307693e-05, |
| "loss": 0.1155, |
| "step": 7200 |
| }, |
| { |
| "epoch": 9.24, |
| "learning_rate": 7.675213675213675e-05, |
| "loss": 0.0006, |
| "step": 7210 |
| }, |
| { |
| "epoch": 9.26, |
| "learning_rate": 7.65811965811966e-05, |
| "loss": 0.0807, |
| "step": 7220 |
| }, |
| { |
| "epoch": 9.27, |
| "learning_rate": 7.641025641025641e-05, |
| "loss": 0.0763, |
| "step": 7230 |
| }, |
| { |
| "epoch": 9.28, |
| "learning_rate": 7.623931623931624e-05, |
| "loss": 0.0361, |
| "step": 7240 |
| }, |
| { |
| "epoch": 9.29, |
| "learning_rate": 7.606837606837607e-05, |
| "loss": 0.0531, |
| "step": 7250 |
| }, |
| { |
| "epoch": 9.31, |
| "learning_rate": 7.58974358974359e-05, |
| "loss": 0.1016, |
| "step": 7260 |
| }, |
| { |
| "epoch": 9.32, |
| "learning_rate": 7.572649572649573e-05, |
| "loss": 0.0008, |
| "step": 7270 |
| }, |
| { |
| "epoch": 9.33, |
| "learning_rate": 7.555555555555556e-05, |
| "loss": 0.0055, |
| "step": 7280 |
| }, |
| { |
| "epoch": 9.35, |
| "learning_rate": 7.538461538461539e-05, |
| "loss": 0.1142, |
| "step": 7290 |
| }, |
| { |
| "epoch": 9.36, |
| "learning_rate": 7.521367521367521e-05, |
| "loss": 0.0007, |
| "step": 7300 |
| }, |
| { |
| "epoch": 9.37, |
| "learning_rate": 7.504273504273504e-05, |
| "loss": 0.0008, |
| "step": 7310 |
| }, |
| { |
| "epoch": 9.38, |
| "learning_rate": 7.487179487179487e-05, |
| "loss": 0.0006, |
| "step": 7320 |
| }, |
| { |
| "epoch": 9.4, |
| "learning_rate": 7.47008547008547e-05, |
| "loss": 0.0006, |
| "step": 7330 |
| }, |
| { |
| "epoch": 9.41, |
| "learning_rate": 7.452991452991453e-05, |
| "loss": 0.0048, |
| "step": 7340 |
| }, |
| { |
| "epoch": 9.42, |
| "learning_rate": 7.435897435897436e-05, |
| "loss": 0.0716, |
| "step": 7350 |
| }, |
| { |
| "epoch": 9.44, |
| "learning_rate": 7.41880341880342e-05, |
| "loss": 0.0031, |
| "step": 7360 |
| }, |
| { |
| "epoch": 9.45, |
| "learning_rate": 7.401709401709403e-05, |
| "loss": 0.1143, |
| "step": 7370 |
| }, |
| { |
| "epoch": 9.46, |
| "learning_rate": 7.384615384615386e-05, |
| "loss": 0.0634, |
| "step": 7380 |
| }, |
| { |
| "epoch": 9.47, |
| "learning_rate": 7.367521367521369e-05, |
| "loss": 0.0005, |
| "step": 7390 |
| }, |
| { |
| "epoch": 9.49, |
| "learning_rate": 7.350427350427352e-05, |
| "loss": 0.1023, |
| "step": 7400 |
| }, |
| { |
| "epoch": 9.5, |
| "learning_rate": 7.333333333333333e-05, |
| "loss": 0.0011, |
| "step": 7410 |
| }, |
| { |
| "epoch": 9.51, |
| "learning_rate": 7.316239316239316e-05, |
| "loss": 0.0422, |
| "step": 7420 |
| }, |
| { |
| "epoch": 9.53, |
| "learning_rate": 7.299145299145299e-05, |
| "loss": 0.0004, |
| "step": 7430 |
| }, |
| { |
| "epoch": 9.54, |
| "learning_rate": 7.282051282051282e-05, |
| "loss": 0.1052, |
| "step": 7440 |
| }, |
| { |
| "epoch": 9.55, |
| "learning_rate": 7.264957264957265e-05, |
| "loss": 0.0005, |
| "step": 7450 |
| }, |
| { |
| "epoch": 9.56, |
| "learning_rate": 7.247863247863248e-05, |
| "loss": 0.0243, |
| "step": 7460 |
| }, |
| { |
| "epoch": 9.58, |
| "learning_rate": 7.23076923076923e-05, |
| "loss": 0.0006, |
| "step": 7470 |
| }, |
| { |
| "epoch": 9.59, |
| "learning_rate": 7.213675213675213e-05, |
| "loss": 0.1699, |
| "step": 7480 |
| }, |
| { |
| "epoch": 9.6, |
| "learning_rate": 7.196581196581196e-05, |
| "loss": 0.0004, |
| "step": 7490 |
| }, |
| { |
| "epoch": 9.62, |
| "learning_rate": 7.17948717948718e-05, |
| "loss": 0.0036, |
| "step": 7500 |
| }, |
| { |
| "epoch": 9.63, |
| "learning_rate": 7.162393162393163e-05, |
| "loss": 0.0157, |
| "step": 7510 |
| }, |
| { |
| "epoch": 9.64, |
| "learning_rate": 7.145299145299146e-05, |
| "loss": 0.0008, |
| "step": 7520 |
| }, |
| { |
| "epoch": 9.65, |
| "learning_rate": 7.128205128205129e-05, |
| "loss": 0.0644, |
| "step": 7530 |
| }, |
| { |
| "epoch": 9.67, |
| "learning_rate": 7.111111111111112e-05, |
| "loss": 0.0851, |
| "step": 7540 |
| }, |
| { |
| "epoch": 9.68, |
| "learning_rate": 7.094017094017095e-05, |
| "loss": 0.139, |
| "step": 7550 |
| }, |
| { |
| "epoch": 9.69, |
| "learning_rate": 7.076923076923078e-05, |
| "loss": 0.0009, |
| "step": 7560 |
| }, |
| { |
| "epoch": 9.71, |
| "learning_rate": 7.05982905982906e-05, |
| "loss": 0.063, |
| "step": 7570 |
| }, |
| { |
| "epoch": 9.72, |
| "learning_rate": 7.042735042735044e-05, |
| "loss": 0.0027, |
| "step": 7580 |
| }, |
| { |
| "epoch": 9.73, |
| "learning_rate": 7.025641025641025e-05, |
| "loss": 0.227, |
| "step": 7590 |
| }, |
| { |
| "epoch": 9.74, |
| "learning_rate": 7.008547008547008e-05, |
| "loss": 0.0005, |
| "step": 7600 |
| }, |
| { |
| "epoch": 9.76, |
| "learning_rate": 6.991452991452991e-05, |
| "loss": 0.1772, |
| "step": 7610 |
| }, |
| { |
| "epoch": 9.77, |
| "learning_rate": 6.974358974358974e-05, |
| "loss": 0.0007, |
| "step": 7620 |
| }, |
| { |
| "epoch": 9.78, |
| "learning_rate": 6.957264957264958e-05, |
| "loss": 0.0034, |
| "step": 7630 |
| }, |
| { |
| "epoch": 9.79, |
| "learning_rate": 6.940170940170941e-05, |
| "loss": 0.027, |
| "step": 7640 |
| }, |
| { |
| "epoch": 9.81, |
| "learning_rate": 6.923076923076924e-05, |
| "loss": 0.0009, |
| "step": 7650 |
| }, |
| { |
| "epoch": 9.82, |
| "learning_rate": 6.905982905982907e-05, |
| "loss": 0.0663, |
| "step": 7660 |
| }, |
| { |
| "epoch": 9.83, |
| "learning_rate": 6.88888888888889e-05, |
| "loss": 0.095, |
| "step": 7670 |
| }, |
| { |
| "epoch": 9.85, |
| "learning_rate": 6.871794871794872e-05, |
| "loss": 0.0704, |
| "step": 7680 |
| }, |
| { |
| "epoch": 9.86, |
| "learning_rate": 6.854700854700855e-05, |
| "loss": 0.001, |
| "step": 7690 |
| }, |
| { |
| "epoch": 9.87, |
| "learning_rate": 6.837606837606838e-05, |
| "loss": 0.0015, |
| "step": 7700 |
| }, |
| { |
| "epoch": 9.88, |
| "learning_rate": 6.820512820512821e-05, |
| "loss": 0.0022, |
| "step": 7710 |
| }, |
| { |
| "epoch": 9.9, |
| "learning_rate": 6.803418803418804e-05, |
| "loss": 0.0419, |
| "step": 7720 |
| }, |
| { |
| "epoch": 9.91, |
| "learning_rate": 6.786324786324787e-05, |
| "loss": 0.0022, |
| "step": 7730 |
| }, |
| { |
| "epoch": 9.92, |
| "learning_rate": 6.76923076923077e-05, |
| "loss": 0.0007, |
| "step": 7740 |
| }, |
| { |
| "epoch": 9.94, |
| "learning_rate": 6.752136752136753e-05, |
| "loss": 0.215, |
| "step": 7750 |
| }, |
| { |
| "epoch": 9.95, |
| "learning_rate": 6.735042735042735e-05, |
| "loss": 0.0006, |
| "step": 7760 |
| }, |
| { |
| "epoch": 9.96, |
| "learning_rate": 6.717948717948718e-05, |
| "loss": 0.0018, |
| "step": 7770 |
| }, |
| { |
| "epoch": 9.97, |
| "learning_rate": 6.700854700854701e-05, |
| "loss": 0.0006, |
| "step": 7780 |
| }, |
| { |
| "epoch": 9.99, |
| "learning_rate": 6.683760683760684e-05, |
| "loss": 0.0007, |
| "step": 7790 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 0.0023, |
| "step": 7800 |
| }, |
| { |
| "epoch": 10.01, |
| "learning_rate": 6.64957264957265e-05, |
| "loss": 0.0004, |
| "step": 7810 |
| }, |
| { |
| "epoch": 10.03, |
| "learning_rate": 6.632478632478633e-05, |
| "loss": 0.0606, |
| "step": 7820 |
| }, |
| { |
| "epoch": 10.04, |
| "learning_rate": 6.615384615384616e-05, |
| "loss": 0.0005, |
| "step": 7830 |
| }, |
| { |
| "epoch": 10.05, |
| "learning_rate": 6.598290598290599e-05, |
| "loss": 0.0006, |
| "step": 7840 |
| }, |
| { |
| "epoch": 10.06, |
| "learning_rate": 6.581196581196581e-05, |
| "loss": 0.0335, |
| "step": 7850 |
| }, |
| { |
| "epoch": 10.08, |
| "learning_rate": 6.564102564102564e-05, |
| "loss": 0.0004, |
| "step": 7860 |
| }, |
| { |
| "epoch": 10.09, |
| "learning_rate": 6.547008547008547e-05, |
| "loss": 0.0004, |
| "step": 7870 |
| }, |
| { |
| "epoch": 10.1, |
| "learning_rate": 6.52991452991453e-05, |
| "loss": 0.0037, |
| "step": 7880 |
| }, |
| { |
| "epoch": 10.12, |
| "learning_rate": 6.512820512820513e-05, |
| "loss": 0.1206, |
| "step": 7890 |
| }, |
| { |
| "epoch": 10.13, |
| "learning_rate": 6.495726495726496e-05, |
| "loss": 0.0153, |
| "step": 7900 |
| }, |
| { |
| "epoch": 10.14, |
| "learning_rate": 6.478632478632479e-05, |
| "loss": 0.0006, |
| "step": 7910 |
| }, |
| { |
| "epoch": 10.15, |
| "learning_rate": 6.461538461538462e-05, |
| "loss": 0.0083, |
| "step": 7920 |
| }, |
| { |
| "epoch": 10.17, |
| "learning_rate": 6.444444444444446e-05, |
| "loss": 0.0032, |
| "step": 7930 |
| }, |
| { |
| "epoch": 10.18, |
| "learning_rate": 6.427350427350429e-05, |
| "loss": 0.0004, |
| "step": 7940 |
| }, |
| { |
| "epoch": 10.19, |
| "learning_rate": 6.410256410256412e-05, |
| "loss": 0.0084, |
| "step": 7950 |
| }, |
| { |
| "epoch": 10.21, |
| "learning_rate": 6.393162393162393e-05, |
| "loss": 0.0353, |
| "step": 7960 |
| }, |
| { |
| "epoch": 10.22, |
| "learning_rate": 6.376068376068376e-05, |
| "loss": 0.0019, |
| "step": 7970 |
| }, |
| { |
| "epoch": 10.23, |
| "learning_rate": 6.358974358974359e-05, |
| "loss": 0.0004, |
| "step": 7980 |
| }, |
| { |
| "epoch": 10.24, |
| "learning_rate": 6.341880341880342e-05, |
| "loss": 0.0117, |
| "step": 7990 |
| }, |
| { |
| "epoch": 10.26, |
| "learning_rate": 6.324786324786325e-05, |
| "loss": 0.0914, |
| "step": 8000 |
| }, |
| { |
| "epoch": 10.26, |
| "eval_accuracy": 0.7940516273849607, |
| "eval_loss": 1.275172233581543, |
| "eval_runtime": 35.1577, |
| "eval_samples_per_second": 50.743, |
| "eval_steps_per_second": 6.343, |
| "step": 8000 |
| }, |
| { |
| "epoch": 10.27, |
| "learning_rate": 6.307692307692308e-05, |
| "loss": 0.0618, |
| "step": 8010 |
| }, |
| { |
| "epoch": 10.28, |
| "learning_rate": 6.29059829059829e-05, |
| "loss": 0.0707, |
| "step": 8020 |
| }, |
| { |
| "epoch": 10.29, |
| "learning_rate": 6.273504273504273e-05, |
| "loss": 0.0011, |
| "step": 8030 |
| }, |
| { |
| "epoch": 10.31, |
| "learning_rate": 6.256410256410256e-05, |
| "loss": 0.0968, |
| "step": 8040 |
| }, |
| { |
| "epoch": 10.32, |
| "learning_rate": 6.239316239316239e-05, |
| "loss": 0.0003, |
| "step": 8050 |
| }, |
| { |
| "epoch": 10.33, |
| "learning_rate": 6.222222222222222e-05, |
| "loss": 0.0003, |
| "step": 8060 |
| }, |
| { |
| "epoch": 10.35, |
| "learning_rate": 6.205128205128206e-05, |
| "loss": 0.0041, |
| "step": 8070 |
| }, |
| { |
| "epoch": 10.36, |
| "learning_rate": 6.188034188034189e-05, |
| "loss": 0.0003, |
| "step": 8080 |
| }, |
| { |
| "epoch": 10.37, |
| "learning_rate": 6.170940170940172e-05, |
| "loss": 0.0024, |
| "step": 8090 |
| }, |
| { |
| "epoch": 10.38, |
| "learning_rate": 6.153846153846155e-05, |
| "loss": 0.0006, |
| "step": 8100 |
| }, |
| { |
| "epoch": 10.4, |
| "learning_rate": 6.136752136752138e-05, |
| "loss": 0.0006, |
| "step": 8110 |
| }, |
| { |
| "epoch": 10.41, |
| "learning_rate": 6.11965811965812e-05, |
| "loss": 0.0005, |
| "step": 8120 |
| }, |
| { |
| "epoch": 10.42, |
| "learning_rate": 6.1025641025641035e-05, |
| "loss": 0.0006, |
| "step": 8130 |
| }, |
| { |
| "epoch": 10.44, |
| "learning_rate": 6.085470085470085e-05, |
| "loss": 0.091, |
| "step": 8140 |
| }, |
| { |
| "epoch": 10.45, |
| "learning_rate": 6.068376068376068e-05, |
| "loss": 0.0003, |
| "step": 8150 |
| }, |
| { |
| "epoch": 10.46, |
| "learning_rate": 6.0512820512820515e-05, |
| "loss": 0.0004, |
| "step": 8160 |
| }, |
| { |
| "epoch": 10.47, |
| "learning_rate": 6.0341880341880344e-05, |
| "loss": 0.0175, |
| "step": 8170 |
| }, |
| { |
| "epoch": 10.49, |
| "learning_rate": 6.017094017094017e-05, |
| "loss": 0.0003, |
| "step": 8180 |
| }, |
| { |
| "epoch": 10.5, |
| "learning_rate": 6e-05, |
| "loss": 0.0003, |
| "step": 8190 |
| }, |
| { |
| "epoch": 10.51, |
| "learning_rate": 5.982905982905983e-05, |
| "loss": 0.0003, |
| "step": 8200 |
| }, |
| { |
| "epoch": 10.53, |
| "learning_rate": 5.965811965811966e-05, |
| "loss": 0.0005, |
| "step": 8210 |
| }, |
| { |
| "epoch": 10.54, |
| "learning_rate": 5.948717948717949e-05, |
| "loss": 0.0008, |
| "step": 8220 |
| }, |
| { |
| "epoch": 10.55, |
| "learning_rate": 5.931623931623932e-05, |
| "loss": 0.0005, |
| "step": 8230 |
| }, |
| { |
| "epoch": 10.56, |
| "learning_rate": 5.9145299145299146e-05, |
| "loss": 0.0006, |
| "step": 8240 |
| }, |
| { |
| "epoch": 10.58, |
| "learning_rate": 5.897435897435898e-05, |
| "loss": 0.0004, |
| "step": 8250 |
| }, |
| { |
| "epoch": 10.59, |
| "learning_rate": 5.880341880341881e-05, |
| "loss": 0.0003, |
| "step": 8260 |
| }, |
| { |
| "epoch": 10.6, |
| "learning_rate": 5.863247863247864e-05, |
| "loss": 0.174, |
| "step": 8270 |
| }, |
| { |
| "epoch": 10.62, |
| "learning_rate": 5.846153846153847e-05, |
| "loss": 0.0006, |
| "step": 8280 |
| }, |
| { |
| "epoch": 10.63, |
| "learning_rate": 5.82905982905983e-05, |
| "loss": 0.0003, |
| "step": 8290 |
| }, |
| { |
| "epoch": 10.64, |
| "learning_rate": 5.8119658119658126e-05, |
| "loss": 0.0004, |
| "step": 8300 |
| }, |
| { |
| "epoch": 10.65, |
| "learning_rate": 5.7948717948717954e-05, |
| "loss": 0.0003, |
| "step": 8310 |
| }, |
| { |
| "epoch": 10.67, |
| "learning_rate": 5.7777777777777776e-05, |
| "loss": 0.0003, |
| "step": 8320 |
| }, |
| { |
| "epoch": 10.68, |
| "learning_rate": 5.7606837606837605e-05, |
| "loss": 0.0004, |
| "step": 8330 |
| }, |
| { |
| "epoch": 10.69, |
| "learning_rate": 5.7435897435897434e-05, |
| "loss": 0.0281, |
| "step": 8340 |
| }, |
| { |
| "epoch": 10.71, |
| "learning_rate": 5.726495726495726e-05, |
| "loss": 0.0003, |
| "step": 8350 |
| }, |
| { |
| "epoch": 10.72, |
| "learning_rate": 5.709401709401709e-05, |
| "loss": 0.0004, |
| "step": 8360 |
| }, |
| { |
| "epoch": 10.73, |
| "learning_rate": 5.692307692307692e-05, |
| "loss": 0.0321, |
| "step": 8370 |
| }, |
| { |
| "epoch": 10.74, |
| "learning_rate": 5.6752136752136756e-05, |
| "loss": 0.0013, |
| "step": 8380 |
| }, |
| { |
| "epoch": 10.76, |
| "learning_rate": 5.6581196581196585e-05, |
| "loss": 0.0004, |
| "step": 8390 |
| }, |
| { |
| "epoch": 10.77, |
| "learning_rate": 5.6410256410256414e-05, |
| "loss": 0.1839, |
| "step": 8400 |
| }, |
| { |
| "epoch": 10.78, |
| "learning_rate": 5.623931623931624e-05, |
| "loss": 0.0004, |
| "step": 8410 |
| }, |
| { |
| "epoch": 10.79, |
| "learning_rate": 5.606837606837607e-05, |
| "loss": 0.0019, |
| "step": 8420 |
| }, |
| { |
| "epoch": 10.81, |
| "learning_rate": 5.58974358974359e-05, |
| "loss": 0.0005, |
| "step": 8430 |
| }, |
| { |
| "epoch": 10.82, |
| "learning_rate": 5.572649572649573e-05, |
| "loss": 0.0444, |
| "step": 8440 |
| }, |
| { |
| "epoch": 10.83, |
| "learning_rate": 5.555555555555556e-05, |
| "loss": 0.0005, |
| "step": 8450 |
| }, |
| { |
| "epoch": 10.85, |
| "learning_rate": 5.538461538461539e-05, |
| "loss": 0.0009, |
| "step": 8460 |
| }, |
| { |
| "epoch": 10.86, |
| "learning_rate": 5.521367521367522e-05, |
| "loss": 0.0004, |
| "step": 8470 |
| }, |
| { |
| "epoch": 10.87, |
| "learning_rate": 5.504273504273505e-05, |
| "loss": 0.0004, |
| "step": 8480 |
| }, |
| { |
| "epoch": 10.88, |
| "learning_rate": 5.487179487179488e-05, |
| "loss": 0.0005, |
| "step": 8490 |
| }, |
| { |
| "epoch": 10.9, |
| "learning_rate": 5.470085470085471e-05, |
| "loss": 0.0003, |
| "step": 8500 |
| }, |
| { |
| "epoch": 10.91, |
| "learning_rate": 5.4529914529914525e-05, |
| "loss": 0.0003, |
| "step": 8510 |
| }, |
| { |
| "epoch": 10.92, |
| "learning_rate": 5.435897435897436e-05, |
| "loss": 0.0004, |
| "step": 8520 |
| }, |
| { |
| "epoch": 10.94, |
| "learning_rate": 5.418803418803419e-05, |
| "loss": 0.0003, |
| "step": 8530 |
| }, |
| { |
| "epoch": 10.95, |
| "learning_rate": 5.401709401709402e-05, |
| "loss": 0.0004, |
| "step": 8540 |
| }, |
| { |
| "epoch": 10.96, |
| "learning_rate": 5.384615384615385e-05, |
| "loss": 0.0346, |
| "step": 8550 |
| }, |
| { |
| "epoch": 10.97, |
| "learning_rate": 5.3675213675213675e-05, |
| "loss": 0.0035, |
| "step": 8560 |
| }, |
| { |
| "epoch": 10.99, |
| "learning_rate": 5.3504273504273504e-05, |
| "loss": 0.0415, |
| "step": 8570 |
| }, |
| { |
| "epoch": 11.0, |
| "learning_rate": 5.333333333333333e-05, |
| "loss": 0.0003, |
| "step": 8580 |
| }, |
| { |
| "epoch": 11.01, |
| "learning_rate": 5.316239316239316e-05, |
| "loss": 0.0003, |
| "step": 8590 |
| }, |
| { |
| "epoch": 11.03, |
| "learning_rate": 5.2991452991453e-05, |
| "loss": 0.0013, |
| "step": 8600 |
| }, |
| { |
| "epoch": 11.04, |
| "learning_rate": 5.2820512820512826e-05, |
| "loss": 0.0126, |
| "step": 8610 |
| }, |
| { |
| "epoch": 11.05, |
| "learning_rate": 5.2649572649572655e-05, |
| "loss": 0.0003, |
| "step": 8620 |
| }, |
| { |
| "epoch": 11.06, |
| "learning_rate": 5.2478632478632484e-05, |
| "loss": 0.0003, |
| "step": 8630 |
| }, |
| { |
| "epoch": 11.08, |
| "learning_rate": 5.230769230769231e-05, |
| "loss": 0.0003, |
| "step": 8640 |
| }, |
| { |
| "epoch": 11.09, |
| "learning_rate": 5.213675213675214e-05, |
| "loss": 0.0003, |
| "step": 8650 |
| }, |
| { |
| "epoch": 11.1, |
| "learning_rate": 5.196581196581197e-05, |
| "loss": 0.0003, |
| "step": 8660 |
| }, |
| { |
| "epoch": 11.12, |
| "learning_rate": 5.17948717948718e-05, |
| "loss": 0.0004, |
| "step": 8670 |
| }, |
| { |
| "epoch": 11.13, |
| "learning_rate": 5.162393162393163e-05, |
| "loss": 0.0002, |
| "step": 8680 |
| }, |
| { |
| "epoch": 11.14, |
| "learning_rate": 5.145299145299145e-05, |
| "loss": 0.0002, |
| "step": 8690 |
| }, |
| { |
| "epoch": 11.15, |
| "learning_rate": 5.128205128205128e-05, |
| "loss": 0.0002, |
| "step": 8700 |
| }, |
| { |
| "epoch": 11.17, |
| "learning_rate": 5.111111111111111e-05, |
| "loss": 0.0003, |
| "step": 8710 |
| }, |
| { |
| "epoch": 11.18, |
| "learning_rate": 5.094017094017094e-05, |
| "loss": 0.0003, |
| "step": 8720 |
| }, |
| { |
| "epoch": 11.19, |
| "learning_rate": 5.0769230769230766e-05, |
| "loss": 0.0004, |
| "step": 8730 |
| }, |
| { |
| "epoch": 11.21, |
| "learning_rate": 5.05982905982906e-05, |
| "loss": 0.028, |
| "step": 8740 |
| }, |
| { |
| "epoch": 11.22, |
| "learning_rate": 5.042735042735043e-05, |
| "loss": 0.0002, |
| "step": 8750 |
| }, |
| { |
| "epoch": 11.23, |
| "learning_rate": 5.025641025641026e-05, |
| "loss": 0.0003, |
| "step": 8760 |
| }, |
| { |
| "epoch": 11.24, |
| "learning_rate": 5.008547008547009e-05, |
| "loss": 0.0025, |
| "step": 8770 |
| }, |
| { |
| "epoch": 11.26, |
| "learning_rate": 4.991452991452992e-05, |
| "loss": 0.0003, |
| "step": 8780 |
| }, |
| { |
| "epoch": 11.27, |
| "learning_rate": 4.9743589743589746e-05, |
| "loss": 0.0002, |
| "step": 8790 |
| }, |
| { |
| "epoch": 11.28, |
| "learning_rate": 4.9572649572649575e-05, |
| "loss": 0.0003, |
| "step": 8800 |
| }, |
| { |
| "epoch": 11.29, |
| "learning_rate": 4.94017094017094e-05, |
| "loss": 0.0002, |
| "step": 8810 |
| }, |
| { |
| "epoch": 11.31, |
| "learning_rate": 4.923076923076924e-05, |
| "loss": 0.0736, |
| "step": 8820 |
| }, |
| { |
| "epoch": 11.32, |
| "learning_rate": 4.905982905982906e-05, |
| "loss": 0.0002, |
| "step": 8830 |
| }, |
| { |
| "epoch": 11.33, |
| "learning_rate": 4.888888888888889e-05, |
| "loss": 0.0007, |
| "step": 8840 |
| }, |
| { |
| "epoch": 11.35, |
| "learning_rate": 4.871794871794872e-05, |
| "loss": 0.015, |
| "step": 8850 |
| }, |
| { |
| "epoch": 11.36, |
| "learning_rate": 4.854700854700855e-05, |
| "loss": 0.0067, |
| "step": 8860 |
| }, |
| { |
| "epoch": 11.37, |
| "learning_rate": 4.8376068376068376e-05, |
| "loss": 0.0003, |
| "step": 8870 |
| }, |
| { |
| "epoch": 11.38, |
| "learning_rate": 4.8205128205128205e-05, |
| "loss": 0.0002, |
| "step": 8880 |
| }, |
| { |
| "epoch": 11.4, |
| "learning_rate": 4.803418803418804e-05, |
| "loss": 0.0002, |
| "step": 8890 |
| }, |
| { |
| "epoch": 11.41, |
| "learning_rate": 4.786324786324787e-05, |
| "loss": 0.0002, |
| "step": 8900 |
| }, |
| { |
| "epoch": 11.42, |
| "learning_rate": 4.76923076923077e-05, |
| "loss": 0.0002, |
| "step": 8910 |
| }, |
| { |
| "epoch": 11.44, |
| "learning_rate": 4.752136752136752e-05, |
| "loss": 0.0004, |
| "step": 8920 |
| }, |
| { |
| "epoch": 11.45, |
| "learning_rate": 4.735042735042735e-05, |
| "loss": 0.0002, |
| "step": 8930 |
| }, |
| { |
| "epoch": 11.46, |
| "learning_rate": 4.717948717948718e-05, |
| "loss": 0.0003, |
| "step": 8940 |
| }, |
| { |
| "epoch": 11.47, |
| "learning_rate": 4.700854700854701e-05, |
| "loss": 0.0002, |
| "step": 8950 |
| }, |
| { |
| "epoch": 11.49, |
| "learning_rate": 4.683760683760684e-05, |
| "loss": 0.0002, |
| "step": 8960 |
| }, |
| { |
| "epoch": 11.5, |
| "learning_rate": 4.666666666666667e-05, |
| "loss": 0.0002, |
| "step": 8970 |
| }, |
| { |
| "epoch": 11.51, |
| "learning_rate": 4.64957264957265e-05, |
| "loss": 0.0002, |
| "step": 8980 |
| }, |
| { |
| "epoch": 11.53, |
| "learning_rate": 4.632478632478633e-05, |
| "loss": 0.0002, |
| "step": 8990 |
| }, |
| { |
| "epoch": 11.54, |
| "learning_rate": 4.615384615384616e-05, |
| "loss": 0.0003, |
| "step": 9000 |
| }, |
| { |
| "epoch": 11.54, |
| "eval_accuracy": 0.7951739618406285, |
| "eval_loss": 1.3951935768127441, |
| "eval_runtime": 35.2372, |
| "eval_samples_per_second": 50.628, |
| "eval_steps_per_second": 6.329, |
| "step": 9000 |
| }, |
| { |
| "epoch": 11.55, |
| "learning_rate": 4.598290598290598e-05, |
| "loss": 0.0002, |
| "step": 9010 |
| }, |
| { |
| "epoch": 11.56, |
| "learning_rate": 4.581196581196581e-05, |
| "loss": 0.0003, |
| "step": 9020 |
| }, |
| { |
| "epoch": 11.58, |
| "learning_rate": 4.5641025641025645e-05, |
| "loss": 0.0002, |
| "step": 9030 |
| }, |
| { |
| "epoch": 11.59, |
| "learning_rate": 4.5470085470085474e-05, |
| "loss": 0.0002, |
| "step": 9040 |
| }, |
| { |
| "epoch": 11.6, |
| "learning_rate": 4.52991452991453e-05, |
| "loss": 0.0002, |
| "step": 9050 |
| }, |
| { |
| "epoch": 11.62, |
| "learning_rate": 4.512820512820513e-05, |
| "loss": 0.0002, |
| "step": 9060 |
| }, |
| { |
| "epoch": 11.63, |
| "learning_rate": 4.495726495726496e-05, |
| "loss": 0.0002, |
| "step": 9070 |
| }, |
| { |
| "epoch": 11.64, |
| "learning_rate": 4.478632478632479e-05, |
| "loss": 0.0002, |
| "step": 9080 |
| }, |
| { |
| "epoch": 11.65, |
| "learning_rate": 4.461538461538462e-05, |
| "loss": 0.0002, |
| "step": 9090 |
| }, |
| { |
| "epoch": 11.67, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 0.0002, |
| "step": 9100 |
| }, |
| { |
| "epoch": 11.68, |
| "learning_rate": 4.4273504273504275e-05, |
| "loss": 0.0014, |
| "step": 9110 |
| }, |
| { |
| "epoch": 11.69, |
| "learning_rate": 4.4102564102564104e-05, |
| "loss": 0.0002, |
| "step": 9120 |
| }, |
| { |
| "epoch": 11.71, |
| "learning_rate": 4.393162393162393e-05, |
| "loss": 0.0005, |
| "step": 9130 |
| }, |
| { |
| "epoch": 11.72, |
| "learning_rate": 4.376068376068376e-05, |
| "loss": 0.0002, |
| "step": 9140 |
| }, |
| { |
| "epoch": 11.73, |
| "learning_rate": 4.358974358974359e-05, |
| "loss": 0.0002, |
| "step": 9150 |
| }, |
| { |
| "epoch": 11.74, |
| "learning_rate": 4.341880341880342e-05, |
| "loss": 0.0102, |
| "step": 9160 |
| }, |
| { |
| "epoch": 11.76, |
| "learning_rate": 4.324786324786325e-05, |
| "loss": 0.0002, |
| "step": 9170 |
| }, |
| { |
| "epoch": 11.77, |
| "learning_rate": 4.3076923076923084e-05, |
| "loss": 0.0002, |
| "step": 9180 |
| }, |
| { |
| "epoch": 11.78, |
| "learning_rate": 4.2905982905982906e-05, |
| "loss": 0.0002, |
| "step": 9190 |
| }, |
| { |
| "epoch": 11.79, |
| "learning_rate": 4.2735042735042735e-05, |
| "loss": 0.0006, |
| "step": 9200 |
| }, |
| { |
| "epoch": 11.81, |
| "learning_rate": 4.2564102564102564e-05, |
| "loss": 0.0002, |
| "step": 9210 |
| }, |
| { |
| "epoch": 11.82, |
| "learning_rate": 4.239316239316239e-05, |
| "loss": 0.0002, |
| "step": 9220 |
| }, |
| { |
| "epoch": 11.83, |
| "learning_rate": 4.222222222222222e-05, |
| "loss": 0.0002, |
| "step": 9230 |
| }, |
| { |
| "epoch": 11.85, |
| "learning_rate": 4.205128205128206e-05, |
| "loss": 0.0281, |
| "step": 9240 |
| }, |
| { |
| "epoch": 11.86, |
| "learning_rate": 4.1880341880341886e-05, |
| "loss": 0.0002, |
| "step": 9250 |
| }, |
| { |
| "epoch": 11.87, |
| "learning_rate": 4.1709401709401715e-05, |
| "loss": 0.0002, |
| "step": 9260 |
| }, |
| { |
| "epoch": 11.88, |
| "learning_rate": 4.1538461538461544e-05, |
| "loss": 0.0035, |
| "step": 9270 |
| }, |
| { |
| "epoch": 11.9, |
| "learning_rate": 4.1367521367521366e-05, |
| "loss": 0.0002, |
| "step": 9280 |
| }, |
| { |
| "epoch": 11.91, |
| "learning_rate": 4.1196581196581195e-05, |
| "loss": 0.0003, |
| "step": 9290 |
| }, |
| { |
| "epoch": 11.92, |
| "learning_rate": 4.1025641025641023e-05, |
| "loss": 0.0002, |
| "step": 9300 |
| }, |
| { |
| "epoch": 11.94, |
| "learning_rate": 4.085470085470086e-05, |
| "loss": 0.0008, |
| "step": 9310 |
| }, |
| { |
| "epoch": 11.95, |
| "learning_rate": 4.068376068376069e-05, |
| "loss": 0.0002, |
| "step": 9320 |
| }, |
| { |
| "epoch": 11.96, |
| "learning_rate": 4.051282051282052e-05, |
| "loss": 0.0002, |
| "step": 9330 |
| }, |
| { |
| "epoch": 11.97, |
| "learning_rate": 4.0341880341880346e-05, |
| "loss": 0.0002, |
| "step": 9340 |
| }, |
| { |
| "epoch": 11.99, |
| "learning_rate": 4.0170940170940174e-05, |
| "loss": 0.0002, |
| "step": 9350 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 4e-05, |
| "loss": 0.0003, |
| "step": 9360 |
| }, |
| { |
| "epoch": 12.01, |
| "learning_rate": 3.9829059829059825e-05, |
| "loss": 0.0002, |
| "step": 9370 |
| }, |
| { |
| "epoch": 12.03, |
| "learning_rate": 3.965811965811966e-05, |
| "loss": 0.0002, |
| "step": 9380 |
| }, |
| { |
| "epoch": 12.04, |
| "learning_rate": 3.948717948717949e-05, |
| "loss": 0.0002, |
| "step": 9390 |
| }, |
| { |
| "epoch": 12.05, |
| "learning_rate": 3.931623931623932e-05, |
| "loss": 0.0002, |
| "step": 9400 |
| }, |
| { |
| "epoch": 12.06, |
| "learning_rate": 3.914529914529915e-05, |
| "loss": 0.0002, |
| "step": 9410 |
| }, |
| { |
| "epoch": 12.08, |
| "learning_rate": 3.8974358974358976e-05, |
| "loss": 0.0002, |
| "step": 9420 |
| }, |
| { |
| "epoch": 12.09, |
| "learning_rate": 3.8803418803418805e-05, |
| "loss": 0.0002, |
| "step": 9430 |
| }, |
| { |
| "epoch": 12.1, |
| "learning_rate": 3.8632478632478634e-05, |
| "loss": 0.0002, |
| "step": 9440 |
| }, |
| { |
| "epoch": 12.12, |
| "learning_rate": 3.846153846153846e-05, |
| "loss": 0.0002, |
| "step": 9450 |
| }, |
| { |
| "epoch": 12.13, |
| "learning_rate": 3.82905982905983e-05, |
| "loss": 0.0002, |
| "step": 9460 |
| }, |
| { |
| "epoch": 12.14, |
| "learning_rate": 3.811965811965812e-05, |
| "loss": 0.0002, |
| "step": 9470 |
| }, |
| { |
| "epoch": 12.15, |
| "learning_rate": 3.794871794871795e-05, |
| "loss": 0.0002, |
| "step": 9480 |
| }, |
| { |
| "epoch": 12.17, |
| "learning_rate": 3.777777777777778e-05, |
| "loss": 0.0002, |
| "step": 9490 |
| }, |
| { |
| "epoch": 12.18, |
| "learning_rate": 3.760683760683761e-05, |
| "loss": 0.0002, |
| "step": 9500 |
| }, |
| { |
| "epoch": 12.19, |
| "learning_rate": 3.7435897435897436e-05, |
| "loss": 0.0002, |
| "step": 9510 |
| }, |
| { |
| "epoch": 12.21, |
| "learning_rate": 3.7264957264957265e-05, |
| "loss": 0.0003, |
| "step": 9520 |
| }, |
| { |
| "epoch": 12.22, |
| "learning_rate": 3.70940170940171e-05, |
| "loss": 0.0033, |
| "step": 9530 |
| }, |
| { |
| "epoch": 12.23, |
| "learning_rate": 3.692307692307693e-05, |
| "loss": 0.0002, |
| "step": 9540 |
| }, |
| { |
| "epoch": 12.24, |
| "learning_rate": 3.675213675213676e-05, |
| "loss": 0.0002, |
| "step": 9550 |
| }, |
| { |
| "epoch": 12.26, |
| "learning_rate": 3.658119658119658e-05, |
| "loss": 0.0002, |
| "step": 9560 |
| }, |
| { |
| "epoch": 12.27, |
| "learning_rate": 3.641025641025641e-05, |
| "loss": 0.0002, |
| "step": 9570 |
| }, |
| { |
| "epoch": 12.28, |
| "learning_rate": 3.623931623931624e-05, |
| "loss": 0.0002, |
| "step": 9580 |
| }, |
| { |
| "epoch": 12.29, |
| "learning_rate": 3.606837606837607e-05, |
| "loss": 0.0002, |
| "step": 9590 |
| }, |
| { |
| "epoch": 12.31, |
| "learning_rate": 3.58974358974359e-05, |
| "loss": 0.0002, |
| "step": 9600 |
| }, |
| { |
| "epoch": 12.32, |
| "learning_rate": 3.572649572649573e-05, |
| "loss": 0.0002, |
| "step": 9610 |
| }, |
| { |
| "epoch": 12.33, |
| "learning_rate": 3.555555555555556e-05, |
| "loss": 0.0169, |
| "step": 9620 |
| }, |
| { |
| "epoch": 12.35, |
| "learning_rate": 3.538461538461539e-05, |
| "loss": 0.0001, |
| "step": 9630 |
| }, |
| { |
| "epoch": 12.36, |
| "learning_rate": 3.521367521367522e-05, |
| "loss": 0.0002, |
| "step": 9640 |
| }, |
| { |
| "epoch": 12.37, |
| "learning_rate": 3.504273504273504e-05, |
| "loss": 0.0002, |
| "step": 9650 |
| }, |
| { |
| "epoch": 12.38, |
| "learning_rate": 3.487179487179487e-05, |
| "loss": 0.0002, |
| "step": 9660 |
| }, |
| { |
| "epoch": 12.4, |
| "learning_rate": 3.4700854700854704e-05, |
| "loss": 0.0002, |
| "step": 9670 |
| }, |
| { |
| "epoch": 12.41, |
| "learning_rate": 3.452991452991453e-05, |
| "loss": 0.0002, |
| "step": 9680 |
| }, |
| { |
| "epoch": 12.42, |
| "learning_rate": 3.435897435897436e-05, |
| "loss": 0.0002, |
| "step": 9690 |
| }, |
| { |
| "epoch": 12.44, |
| "learning_rate": 3.418803418803419e-05, |
| "loss": 0.0002, |
| "step": 9700 |
| }, |
| { |
| "epoch": 12.45, |
| "learning_rate": 3.401709401709402e-05, |
| "loss": 0.0004, |
| "step": 9710 |
| }, |
| { |
| "epoch": 12.46, |
| "learning_rate": 3.384615384615385e-05, |
| "loss": 0.0002, |
| "step": 9720 |
| }, |
| { |
| "epoch": 12.47, |
| "learning_rate": 3.367521367521368e-05, |
| "loss": 0.0002, |
| "step": 9730 |
| }, |
| { |
| "epoch": 12.49, |
| "learning_rate": 3.3504273504273506e-05, |
| "loss": 0.0002, |
| "step": 9740 |
| }, |
| { |
| "epoch": 12.5, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.0002, |
| "step": 9750 |
| }, |
| { |
| "epoch": 12.51, |
| "learning_rate": 3.3162393162393164e-05, |
| "loss": 0.0001, |
| "step": 9760 |
| }, |
| { |
| "epoch": 12.53, |
| "learning_rate": 3.299145299145299e-05, |
| "loss": 0.0002, |
| "step": 9770 |
| }, |
| { |
| "epoch": 12.54, |
| "learning_rate": 3.282051282051282e-05, |
| "loss": 0.0001, |
| "step": 9780 |
| }, |
| { |
| "epoch": 12.55, |
| "learning_rate": 3.264957264957265e-05, |
| "loss": 0.0002, |
| "step": 9790 |
| }, |
| { |
| "epoch": 12.56, |
| "learning_rate": 3.247863247863248e-05, |
| "loss": 0.0001, |
| "step": 9800 |
| }, |
| { |
| "epoch": 12.58, |
| "learning_rate": 3.230769230769231e-05, |
| "loss": 0.0001, |
| "step": 9810 |
| }, |
| { |
| "epoch": 12.59, |
| "learning_rate": 3.2136752136752144e-05, |
| "loss": 0.0002, |
| "step": 9820 |
| }, |
| { |
| "epoch": 12.6, |
| "learning_rate": 3.1965811965811966e-05, |
| "loss": 0.0001, |
| "step": 9830 |
| }, |
| { |
| "epoch": 12.62, |
| "learning_rate": 3.1794871794871795e-05, |
| "loss": 0.0002, |
| "step": 9840 |
| }, |
| { |
| "epoch": 12.63, |
| "learning_rate": 3.162393162393162e-05, |
| "loss": 0.0002, |
| "step": 9850 |
| }, |
| { |
| "epoch": 12.64, |
| "learning_rate": 3.145299145299145e-05, |
| "loss": 0.0002, |
| "step": 9860 |
| }, |
| { |
| "epoch": 12.65, |
| "learning_rate": 3.128205128205128e-05, |
| "loss": 0.0001, |
| "step": 9870 |
| }, |
| { |
| "epoch": 12.67, |
| "learning_rate": 3.111111111111111e-05, |
| "loss": 0.0001, |
| "step": 9880 |
| }, |
| { |
| "epoch": 12.68, |
| "learning_rate": 3.0940170940170946e-05, |
| "loss": 0.0001, |
| "step": 9890 |
| }, |
| { |
| "epoch": 12.69, |
| "learning_rate": 3.0769230769230774e-05, |
| "loss": 0.0001, |
| "step": 9900 |
| }, |
| { |
| "epoch": 12.71, |
| "learning_rate": 3.05982905982906e-05, |
| "loss": 0.0001, |
| "step": 9910 |
| }, |
| { |
| "epoch": 12.72, |
| "learning_rate": 3.0427350427350425e-05, |
| "loss": 0.0001, |
| "step": 9920 |
| }, |
| { |
| "epoch": 12.73, |
| "learning_rate": 3.0256410256410257e-05, |
| "loss": 0.0001, |
| "step": 9930 |
| }, |
| { |
| "epoch": 12.74, |
| "learning_rate": 3.0085470085470086e-05, |
| "loss": 0.0002, |
| "step": 9940 |
| }, |
| { |
| "epoch": 12.76, |
| "learning_rate": 2.9914529914529915e-05, |
| "loss": 0.0001, |
| "step": 9950 |
| }, |
| { |
| "epoch": 12.77, |
| "learning_rate": 2.9743589743589744e-05, |
| "loss": 0.0001, |
| "step": 9960 |
| }, |
| { |
| "epoch": 12.78, |
| "learning_rate": 2.9572649572649573e-05, |
| "loss": 0.0001, |
| "step": 9970 |
| }, |
| { |
| "epoch": 12.79, |
| "learning_rate": 2.9401709401709405e-05, |
| "loss": 0.0001, |
| "step": 9980 |
| }, |
| { |
| "epoch": 12.81, |
| "learning_rate": 2.9230769230769234e-05, |
| "loss": 0.0001, |
| "step": 9990 |
| }, |
| { |
| "epoch": 12.82, |
| "learning_rate": 2.9059829059829063e-05, |
| "loss": 0.0001, |
| "step": 10000 |
| }, |
| { |
| "epoch": 12.82, |
| "eval_accuracy": 0.7912457912457912, |
| "eval_loss": 1.479121208190918, |
| "eval_runtime": 38.2692, |
| "eval_samples_per_second": 46.617, |
| "eval_steps_per_second": 5.827, |
| "step": 10000 |
| }, |
| { |
| "epoch": 12.83, |
| "learning_rate": 2.8888888888888888e-05, |
| "loss": 0.0001, |
| "step": 10010 |
| }, |
| { |
| "epoch": 12.85, |
| "learning_rate": 2.8717948717948717e-05, |
| "loss": 0.0001, |
| "step": 10020 |
| }, |
| { |
| "epoch": 12.86, |
| "learning_rate": 2.8547008547008546e-05, |
| "loss": 0.0001, |
| "step": 10030 |
| }, |
| { |
| "epoch": 12.87, |
| "learning_rate": 2.8376068376068378e-05, |
| "loss": 0.0001, |
| "step": 10040 |
| }, |
| { |
| "epoch": 12.88, |
| "learning_rate": 2.8205128205128207e-05, |
| "loss": 0.0002, |
| "step": 10050 |
| }, |
| { |
| "epoch": 12.9, |
| "learning_rate": 2.8034188034188036e-05, |
| "loss": 0.0002, |
| "step": 10060 |
| }, |
| { |
| "epoch": 12.91, |
| "learning_rate": 2.7863247863247865e-05, |
| "loss": 0.0001, |
| "step": 10070 |
| }, |
| { |
| "epoch": 12.92, |
| "learning_rate": 2.7692307692307694e-05, |
| "loss": 0.0001, |
| "step": 10080 |
| }, |
| { |
| "epoch": 12.94, |
| "learning_rate": 2.7521367521367526e-05, |
| "loss": 0.0001, |
| "step": 10090 |
| }, |
| { |
| "epoch": 12.95, |
| "learning_rate": 2.7350427350427355e-05, |
| "loss": 0.0001, |
| "step": 10100 |
| }, |
| { |
| "epoch": 12.96, |
| "learning_rate": 2.717948717948718e-05, |
| "loss": 0.0001, |
| "step": 10110 |
| }, |
| { |
| "epoch": 12.97, |
| "learning_rate": 2.700854700854701e-05, |
| "loss": 0.0002, |
| "step": 10120 |
| }, |
| { |
| "epoch": 12.99, |
| "learning_rate": 2.6837606837606838e-05, |
| "loss": 0.0001, |
| "step": 10130 |
| }, |
| { |
| "epoch": 13.0, |
| "learning_rate": 2.6666666666666667e-05, |
| "loss": 0.0001, |
| "step": 10140 |
| }, |
| { |
| "epoch": 13.01, |
| "learning_rate": 2.64957264957265e-05, |
| "loss": 0.0001, |
| "step": 10150 |
| }, |
| { |
| "epoch": 13.03, |
| "learning_rate": 2.6324786324786328e-05, |
| "loss": 0.0001, |
| "step": 10160 |
| }, |
| { |
| "epoch": 13.04, |
| "learning_rate": 2.6153846153846157e-05, |
| "loss": 0.0001, |
| "step": 10170 |
| }, |
| { |
| "epoch": 13.05, |
| "learning_rate": 2.5982905982905985e-05, |
| "loss": 0.0001, |
| "step": 10180 |
| }, |
| { |
| "epoch": 13.06, |
| "learning_rate": 2.5811965811965814e-05, |
| "loss": 0.0001, |
| "step": 10190 |
| }, |
| { |
| "epoch": 13.08, |
| "learning_rate": 2.564102564102564e-05, |
| "loss": 0.0001, |
| "step": 10200 |
| }, |
| { |
| "epoch": 13.09, |
| "learning_rate": 2.547008547008547e-05, |
| "loss": 0.0001, |
| "step": 10210 |
| }, |
| { |
| "epoch": 13.1, |
| "learning_rate": 2.52991452991453e-05, |
| "loss": 0.0001, |
| "step": 10220 |
| }, |
| { |
| "epoch": 13.12, |
| "learning_rate": 2.512820512820513e-05, |
| "loss": 0.0001, |
| "step": 10230 |
| }, |
| { |
| "epoch": 13.13, |
| "learning_rate": 2.495726495726496e-05, |
| "loss": 0.0001, |
| "step": 10240 |
| }, |
| { |
| "epoch": 13.14, |
| "learning_rate": 2.4786324786324787e-05, |
| "loss": 0.0001, |
| "step": 10250 |
| }, |
| { |
| "epoch": 13.15, |
| "learning_rate": 2.461538461538462e-05, |
| "loss": 0.0001, |
| "step": 10260 |
| }, |
| { |
| "epoch": 13.17, |
| "learning_rate": 2.4444444444444445e-05, |
| "loss": 0.0001, |
| "step": 10270 |
| }, |
| { |
| "epoch": 13.18, |
| "learning_rate": 2.4273504273504274e-05, |
| "loss": 0.0001, |
| "step": 10280 |
| }, |
| { |
| "epoch": 13.19, |
| "learning_rate": 2.4102564102564103e-05, |
| "loss": 0.0001, |
| "step": 10290 |
| }, |
| { |
| "epoch": 13.21, |
| "learning_rate": 2.3931623931623935e-05, |
| "loss": 0.0001, |
| "step": 10300 |
| }, |
| { |
| "epoch": 13.22, |
| "learning_rate": 2.376068376068376e-05, |
| "loss": 0.0001, |
| "step": 10310 |
| }, |
| { |
| "epoch": 13.23, |
| "learning_rate": 2.358974358974359e-05, |
| "loss": 0.0001, |
| "step": 10320 |
| }, |
| { |
| "epoch": 13.24, |
| "learning_rate": 2.341880341880342e-05, |
| "loss": 0.0001, |
| "step": 10330 |
| }, |
| { |
| "epoch": 13.26, |
| "learning_rate": 2.324786324786325e-05, |
| "loss": 0.0001, |
| "step": 10340 |
| }, |
| { |
| "epoch": 13.27, |
| "learning_rate": 2.307692307692308e-05, |
| "loss": 0.0001, |
| "step": 10350 |
| }, |
| { |
| "epoch": 13.28, |
| "learning_rate": 2.2905982905982905e-05, |
| "loss": 0.0001, |
| "step": 10360 |
| }, |
| { |
| "epoch": 13.29, |
| "learning_rate": 2.2735042735042737e-05, |
| "loss": 0.0001, |
| "step": 10370 |
| }, |
| { |
| "epoch": 13.31, |
| "learning_rate": 2.2564102564102566e-05, |
| "loss": 0.0001, |
| "step": 10380 |
| }, |
| { |
| "epoch": 13.32, |
| "learning_rate": 2.2393162393162394e-05, |
| "loss": 0.0001, |
| "step": 10390 |
| }, |
| { |
| "epoch": 13.33, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 0.0001, |
| "step": 10400 |
| }, |
| { |
| "epoch": 13.35, |
| "learning_rate": 2.2051282051282052e-05, |
| "loss": 0.0001, |
| "step": 10410 |
| }, |
| { |
| "epoch": 13.36, |
| "learning_rate": 2.188034188034188e-05, |
| "loss": 0.0001, |
| "step": 10420 |
| }, |
| { |
| "epoch": 13.37, |
| "learning_rate": 2.170940170940171e-05, |
| "loss": 0.0001, |
| "step": 10430 |
| }, |
| { |
| "epoch": 13.38, |
| "learning_rate": 2.1538461538461542e-05, |
| "loss": 0.0001, |
| "step": 10440 |
| }, |
| { |
| "epoch": 13.4, |
| "learning_rate": 2.1367521367521368e-05, |
| "loss": 0.0001, |
| "step": 10450 |
| }, |
| { |
| "epoch": 13.41, |
| "learning_rate": 2.1196581196581196e-05, |
| "loss": 0.0001, |
| "step": 10460 |
| }, |
| { |
| "epoch": 13.42, |
| "learning_rate": 2.102564102564103e-05, |
| "loss": 0.0001, |
| "step": 10470 |
| }, |
| { |
| "epoch": 13.44, |
| "learning_rate": 2.0854700854700857e-05, |
| "loss": 0.0001, |
| "step": 10480 |
| }, |
| { |
| "epoch": 13.45, |
| "learning_rate": 2.0683760683760683e-05, |
| "loss": 0.0001, |
| "step": 10490 |
| }, |
| { |
| "epoch": 13.46, |
| "learning_rate": 2.0512820512820512e-05, |
| "loss": 0.0001, |
| "step": 10500 |
| }, |
| { |
| "epoch": 13.47, |
| "learning_rate": 2.0341880341880344e-05, |
| "loss": 0.0001, |
| "step": 10510 |
| }, |
| { |
| "epoch": 13.49, |
| "learning_rate": 2.0170940170940173e-05, |
| "loss": 0.0001, |
| "step": 10520 |
| }, |
| { |
| "epoch": 13.5, |
| "learning_rate": 2e-05, |
| "loss": 0.0001, |
| "step": 10530 |
| }, |
| { |
| "epoch": 13.51, |
| "learning_rate": 1.982905982905983e-05, |
| "loss": 0.0001, |
| "step": 10540 |
| }, |
| { |
| "epoch": 13.53, |
| "learning_rate": 1.965811965811966e-05, |
| "loss": 0.0001, |
| "step": 10550 |
| }, |
| { |
| "epoch": 13.54, |
| "learning_rate": 1.9487179487179488e-05, |
| "loss": 0.0001, |
| "step": 10560 |
| }, |
| { |
| "epoch": 13.55, |
| "learning_rate": 1.9316239316239317e-05, |
| "loss": 0.0001, |
| "step": 10570 |
| }, |
| { |
| "epoch": 13.56, |
| "learning_rate": 1.914529914529915e-05, |
| "loss": 0.0001, |
| "step": 10580 |
| }, |
| { |
| "epoch": 13.58, |
| "learning_rate": 1.8974358974358975e-05, |
| "loss": 0.0001, |
| "step": 10590 |
| }, |
| { |
| "epoch": 13.59, |
| "learning_rate": 1.8803418803418804e-05, |
| "loss": 0.0001, |
| "step": 10600 |
| }, |
| { |
| "epoch": 13.6, |
| "learning_rate": 1.8632478632478632e-05, |
| "loss": 0.0001, |
| "step": 10610 |
| }, |
| { |
| "epoch": 13.62, |
| "learning_rate": 1.8461538461538465e-05, |
| "loss": 0.0001, |
| "step": 10620 |
| }, |
| { |
| "epoch": 13.63, |
| "learning_rate": 1.829059829059829e-05, |
| "loss": 0.0001, |
| "step": 10630 |
| }, |
| { |
| "epoch": 13.64, |
| "learning_rate": 1.811965811965812e-05, |
| "loss": 0.0001, |
| "step": 10640 |
| }, |
| { |
| "epoch": 13.65, |
| "learning_rate": 1.794871794871795e-05, |
| "loss": 0.0001, |
| "step": 10650 |
| }, |
| { |
| "epoch": 13.67, |
| "learning_rate": 1.777777777777778e-05, |
| "loss": 0.0001, |
| "step": 10660 |
| }, |
| { |
| "epoch": 13.68, |
| "learning_rate": 1.760683760683761e-05, |
| "loss": 0.0043, |
| "step": 10670 |
| }, |
| { |
| "epoch": 13.69, |
| "learning_rate": 1.7435897435897434e-05, |
| "loss": 0.0001, |
| "step": 10680 |
| }, |
| { |
| "epoch": 13.71, |
| "learning_rate": 1.7264957264957267e-05, |
| "loss": 0.0001, |
| "step": 10690 |
| }, |
| { |
| "epoch": 13.72, |
| "learning_rate": 1.7094017094017095e-05, |
| "loss": 0.0001, |
| "step": 10700 |
| }, |
| { |
| "epoch": 13.73, |
| "learning_rate": 1.6923076923076924e-05, |
| "loss": 0.0003, |
| "step": 10710 |
| }, |
| { |
| "epoch": 13.74, |
| "learning_rate": 1.6752136752136753e-05, |
| "loss": 0.0001, |
| "step": 10720 |
| }, |
| { |
| "epoch": 13.76, |
| "learning_rate": 1.6581196581196582e-05, |
| "loss": 0.0001, |
| "step": 10730 |
| }, |
| { |
| "epoch": 13.77, |
| "learning_rate": 1.641025641025641e-05, |
| "loss": 0.0002, |
| "step": 10740 |
| }, |
| { |
| "epoch": 13.78, |
| "learning_rate": 1.623931623931624e-05, |
| "loss": 0.0001, |
| "step": 10750 |
| }, |
| { |
| "epoch": 13.79, |
| "learning_rate": 1.6068376068376072e-05, |
| "loss": 0.0001, |
| "step": 10760 |
| }, |
| { |
| "epoch": 13.81, |
| "learning_rate": 1.5897435897435897e-05, |
| "loss": 0.0001, |
| "step": 10770 |
| }, |
| { |
| "epoch": 13.82, |
| "learning_rate": 1.5726495726495726e-05, |
| "loss": 0.0001, |
| "step": 10780 |
| }, |
| { |
| "epoch": 13.83, |
| "learning_rate": 1.5555555555555555e-05, |
| "loss": 0.0001, |
| "step": 10790 |
| }, |
| { |
| "epoch": 13.85, |
| "learning_rate": 1.5384615384615387e-05, |
| "loss": 0.0001, |
| "step": 10800 |
| }, |
| { |
| "epoch": 13.86, |
| "learning_rate": 1.5213675213675213e-05, |
| "loss": 0.0001, |
| "step": 10810 |
| }, |
| { |
| "epoch": 13.87, |
| "learning_rate": 1.5042735042735043e-05, |
| "loss": 0.0001, |
| "step": 10820 |
| }, |
| { |
| "epoch": 13.88, |
| "learning_rate": 1.4871794871794872e-05, |
| "loss": 0.0001, |
| "step": 10830 |
| }, |
| { |
| "epoch": 13.9, |
| "learning_rate": 1.4700854700854703e-05, |
| "loss": 0.0001, |
| "step": 10840 |
| }, |
| { |
| "epoch": 13.91, |
| "learning_rate": 1.4529914529914531e-05, |
| "loss": 0.0001, |
| "step": 10850 |
| }, |
| { |
| "epoch": 13.92, |
| "learning_rate": 1.4358974358974359e-05, |
| "loss": 0.0001, |
| "step": 10860 |
| }, |
| { |
| "epoch": 13.94, |
| "learning_rate": 1.4188034188034189e-05, |
| "loss": 0.0001, |
| "step": 10870 |
| }, |
| { |
| "epoch": 13.95, |
| "learning_rate": 1.4017094017094018e-05, |
| "loss": 0.0001, |
| "step": 10880 |
| }, |
| { |
| "epoch": 13.96, |
| "learning_rate": 1.3846153846153847e-05, |
| "loss": 0.0001, |
| "step": 10890 |
| }, |
| { |
| "epoch": 13.97, |
| "learning_rate": 1.3675213675213677e-05, |
| "loss": 0.0001, |
| "step": 10900 |
| }, |
| { |
| "epoch": 13.99, |
| "learning_rate": 1.3504273504273504e-05, |
| "loss": 0.0001, |
| "step": 10910 |
| }, |
| { |
| "epoch": 14.0, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.0001, |
| "step": 10920 |
| }, |
| { |
| "epoch": 14.01, |
| "learning_rate": 1.3162393162393164e-05, |
| "loss": 0.0001, |
| "step": 10930 |
| }, |
| { |
| "epoch": 14.03, |
| "learning_rate": 1.2991452991452993e-05, |
| "loss": 0.0001, |
| "step": 10940 |
| }, |
| { |
| "epoch": 14.04, |
| "learning_rate": 1.282051282051282e-05, |
| "loss": 0.0001, |
| "step": 10950 |
| }, |
| { |
| "epoch": 14.05, |
| "learning_rate": 1.264957264957265e-05, |
| "loss": 0.0001, |
| "step": 10960 |
| }, |
| { |
| "epoch": 14.06, |
| "learning_rate": 1.247863247863248e-05, |
| "loss": 0.0001, |
| "step": 10970 |
| }, |
| { |
| "epoch": 14.08, |
| "learning_rate": 1.230769230769231e-05, |
| "loss": 0.0001, |
| "step": 10980 |
| }, |
| { |
| "epoch": 14.09, |
| "learning_rate": 1.2136752136752137e-05, |
| "loss": 0.0001, |
| "step": 10990 |
| }, |
| { |
| "epoch": 14.1, |
| "learning_rate": 1.1965811965811967e-05, |
| "loss": 0.0001, |
| "step": 11000 |
| }, |
| { |
| "epoch": 14.1, |
| "eval_accuracy": 0.7962962962962963, |
| "eval_loss": 1.4692177772521973, |
| "eval_runtime": 34.3664, |
| "eval_samples_per_second": 51.911, |
| "eval_steps_per_second": 6.489, |
| "step": 11000 |
| }, |
| { |
| "epoch": 14.12, |
| "learning_rate": 1.1794871794871795e-05, |
| "loss": 0.0001, |
| "step": 11010 |
| }, |
| { |
| "epoch": 14.13, |
| "learning_rate": 1.1623931623931625e-05, |
| "loss": 0.0001, |
| "step": 11020 |
| }, |
| { |
| "epoch": 14.14, |
| "learning_rate": 1.1452991452991452e-05, |
| "loss": 0.0001, |
| "step": 11030 |
| }, |
| { |
| "epoch": 14.15, |
| "learning_rate": 1.1282051282051283e-05, |
| "loss": 0.0001, |
| "step": 11040 |
| }, |
| { |
| "epoch": 14.17, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 0.0001, |
| "step": 11050 |
| }, |
| { |
| "epoch": 14.18, |
| "learning_rate": 1.094017094017094e-05, |
| "loss": 0.0001, |
| "step": 11060 |
| }, |
| { |
| "epoch": 14.19, |
| "learning_rate": 1.0769230769230771e-05, |
| "loss": 0.0001, |
| "step": 11070 |
| }, |
| { |
| "epoch": 14.21, |
| "learning_rate": 1.0598290598290598e-05, |
| "loss": 0.0001, |
| "step": 11080 |
| }, |
| { |
| "epoch": 14.22, |
| "learning_rate": 1.0427350427350429e-05, |
| "loss": 0.0001, |
| "step": 11090 |
| }, |
| { |
| "epoch": 14.23, |
| "learning_rate": 1.0256410256410256e-05, |
| "loss": 0.0001, |
| "step": 11100 |
| }, |
| { |
| "epoch": 14.24, |
| "learning_rate": 1.0085470085470086e-05, |
| "loss": 0.0001, |
| "step": 11110 |
| }, |
| { |
| "epoch": 14.26, |
| "learning_rate": 9.914529914529915e-06, |
| "loss": 0.0001, |
| "step": 11120 |
| }, |
| { |
| "epoch": 14.27, |
| "learning_rate": 9.743589743589744e-06, |
| "loss": 0.0001, |
| "step": 11130 |
| }, |
| { |
| "epoch": 14.28, |
| "learning_rate": 9.572649572649575e-06, |
| "loss": 0.0001, |
| "step": 11140 |
| }, |
| { |
| "epoch": 14.29, |
| "learning_rate": 9.401709401709402e-06, |
| "loss": 0.0001, |
| "step": 11150 |
| }, |
| { |
| "epoch": 14.31, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 0.0001, |
| "step": 11160 |
| }, |
| { |
| "epoch": 14.32, |
| "learning_rate": 9.05982905982906e-06, |
| "loss": 0.0001, |
| "step": 11170 |
| }, |
| { |
| "epoch": 14.33, |
| "learning_rate": 8.88888888888889e-06, |
| "loss": 0.0001, |
| "step": 11180 |
| }, |
| { |
| "epoch": 14.35, |
| "learning_rate": 8.717948717948717e-06, |
| "loss": 0.0001, |
| "step": 11190 |
| }, |
| { |
| "epoch": 14.36, |
| "learning_rate": 8.547008547008548e-06, |
| "loss": 0.0001, |
| "step": 11200 |
| }, |
| { |
| "epoch": 14.37, |
| "learning_rate": 8.376068376068377e-06, |
| "loss": 0.0001, |
| "step": 11210 |
| }, |
| { |
| "epoch": 14.38, |
| "learning_rate": 8.205128205128205e-06, |
| "loss": 0.0001, |
| "step": 11220 |
| }, |
| { |
| "epoch": 14.4, |
| "learning_rate": 8.034188034188036e-06, |
| "loss": 0.0001, |
| "step": 11230 |
| }, |
| { |
| "epoch": 14.41, |
| "learning_rate": 7.863247863247863e-06, |
| "loss": 0.0001, |
| "step": 11240 |
| }, |
| { |
| "epoch": 14.42, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 0.0001, |
| "step": 11250 |
| }, |
| { |
| "epoch": 14.44, |
| "learning_rate": 7.521367521367522e-06, |
| "loss": 0.0001, |
| "step": 11260 |
| }, |
| { |
| "epoch": 14.45, |
| "learning_rate": 7.350427350427351e-06, |
| "loss": 0.0001, |
| "step": 11270 |
| }, |
| { |
| "epoch": 14.46, |
| "learning_rate": 7.179487179487179e-06, |
| "loss": 0.0001, |
| "step": 11280 |
| }, |
| { |
| "epoch": 14.47, |
| "learning_rate": 7.008547008547009e-06, |
| "loss": 0.0001, |
| "step": 11290 |
| }, |
| { |
| "epoch": 14.49, |
| "learning_rate": 6.837606837606839e-06, |
| "loss": 0.0001, |
| "step": 11300 |
| }, |
| { |
| "epoch": 14.5, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.0001, |
| "step": 11310 |
| }, |
| { |
| "epoch": 14.51, |
| "learning_rate": 6.495726495726496e-06, |
| "loss": 0.0001, |
| "step": 11320 |
| }, |
| { |
| "epoch": 14.53, |
| "learning_rate": 6.324786324786325e-06, |
| "loss": 0.0001, |
| "step": 11330 |
| }, |
| { |
| "epoch": 14.54, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 0.0001, |
| "step": 11340 |
| }, |
| { |
| "epoch": 14.55, |
| "learning_rate": 5.982905982905984e-06, |
| "loss": 0.0001, |
| "step": 11350 |
| }, |
| { |
| "epoch": 14.56, |
| "learning_rate": 5.8119658119658126e-06, |
| "loss": 0.0001, |
| "step": 11360 |
| }, |
| { |
| "epoch": 14.58, |
| "learning_rate": 5.641025641025641e-06, |
| "loss": 0.0001, |
| "step": 11370 |
| }, |
| { |
| "epoch": 14.59, |
| "learning_rate": 5.47008547008547e-06, |
| "loss": 0.0001, |
| "step": 11380 |
| }, |
| { |
| "epoch": 14.6, |
| "learning_rate": 5.299145299145299e-06, |
| "loss": 0.0001, |
| "step": 11390 |
| }, |
| { |
| "epoch": 14.62, |
| "learning_rate": 5.128205128205128e-06, |
| "loss": 0.0001, |
| "step": 11400 |
| }, |
| { |
| "epoch": 14.63, |
| "learning_rate": 4.957264957264958e-06, |
| "loss": 0.0001, |
| "step": 11410 |
| }, |
| { |
| "epoch": 14.64, |
| "learning_rate": 4.786324786324787e-06, |
| "loss": 0.0001, |
| "step": 11420 |
| }, |
| { |
| "epoch": 14.65, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 0.0001, |
| "step": 11430 |
| }, |
| { |
| "epoch": 14.67, |
| "learning_rate": 4.444444444444445e-06, |
| "loss": 0.0001, |
| "step": 11440 |
| }, |
| { |
| "epoch": 14.68, |
| "learning_rate": 4.273504273504274e-06, |
| "loss": 0.0001, |
| "step": 11450 |
| }, |
| { |
| "epoch": 14.69, |
| "learning_rate": 4.102564102564103e-06, |
| "loss": 0.0001, |
| "step": 11460 |
| }, |
| { |
| "epoch": 14.71, |
| "learning_rate": 3.9316239316239315e-06, |
| "loss": 0.0001, |
| "step": 11470 |
| }, |
| { |
| "epoch": 14.72, |
| "learning_rate": 3.760683760683761e-06, |
| "loss": 0.0001, |
| "step": 11480 |
| }, |
| { |
| "epoch": 14.73, |
| "learning_rate": 3.5897435897435896e-06, |
| "loss": 0.0001, |
| "step": 11490 |
| }, |
| { |
| "epoch": 14.74, |
| "learning_rate": 3.4188034188034193e-06, |
| "loss": 0.0001, |
| "step": 11500 |
| }, |
| { |
| "epoch": 14.76, |
| "learning_rate": 3.247863247863248e-06, |
| "loss": 0.0001, |
| "step": 11510 |
| }, |
| { |
| "epoch": 14.77, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 0.0001, |
| "step": 11520 |
| }, |
| { |
| "epoch": 14.78, |
| "learning_rate": 2.9059829059829063e-06, |
| "loss": 0.0001, |
| "step": 11530 |
| }, |
| { |
| "epoch": 14.79, |
| "learning_rate": 2.735042735042735e-06, |
| "loss": 0.0001, |
| "step": 11540 |
| }, |
| { |
| "epoch": 14.81, |
| "learning_rate": 2.564102564102564e-06, |
| "loss": 0.0001, |
| "step": 11550 |
| }, |
| { |
| "epoch": 14.82, |
| "learning_rate": 2.3931623931623937e-06, |
| "loss": 0.0001, |
| "step": 11560 |
| }, |
| { |
| "epoch": 14.83, |
| "learning_rate": 2.2222222222222225e-06, |
| "loss": 0.0001, |
| "step": 11570 |
| }, |
| { |
| "epoch": 14.85, |
| "learning_rate": 2.0512820512820513e-06, |
| "loss": 0.0001, |
| "step": 11580 |
| }, |
| { |
| "epoch": 14.86, |
| "learning_rate": 1.8803418803418804e-06, |
| "loss": 0.0001, |
| "step": 11590 |
| }, |
| { |
| "epoch": 14.87, |
| "learning_rate": 1.7094017094017097e-06, |
| "loss": 0.0001, |
| "step": 11600 |
| }, |
| { |
| "epoch": 14.88, |
| "learning_rate": 1.5384615384615387e-06, |
| "loss": 0.0001, |
| "step": 11610 |
| }, |
| { |
| "epoch": 14.9, |
| "learning_rate": 1.3675213675213676e-06, |
| "loss": 0.0001, |
| "step": 11620 |
| }, |
| { |
| "epoch": 14.91, |
| "learning_rate": 1.1965811965811968e-06, |
| "loss": 0.0001, |
| "step": 11630 |
| }, |
| { |
| "epoch": 14.92, |
| "learning_rate": 1.0256410256410257e-06, |
| "loss": 0.0001, |
| "step": 11640 |
| }, |
| { |
| "epoch": 14.94, |
| "learning_rate": 8.547008547008548e-07, |
| "loss": 0.0001, |
| "step": 11650 |
| }, |
| { |
| "epoch": 14.95, |
| "learning_rate": 6.837606837606838e-07, |
| "loss": 0.0001, |
| "step": 11660 |
| }, |
| { |
| "epoch": 14.96, |
| "learning_rate": 5.128205128205128e-07, |
| "loss": 0.0001, |
| "step": 11670 |
| }, |
| { |
| "epoch": 14.97, |
| "learning_rate": 3.418803418803419e-07, |
| "loss": 0.0001, |
| "step": 11680 |
| }, |
| { |
| "epoch": 14.99, |
| "learning_rate": 1.7094017094017095e-07, |
| "loss": 0.0001, |
| "step": 11690 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 0.0, |
| "loss": 0.0001, |
| "step": 11700 |
| }, |
| { |
| "epoch": 15.0, |
| "step": 11700, |
| "total_flos": 7.248925543125934e+18, |
| "train_loss": 0.20681309804148698, |
| "train_runtime": 4138.6696, |
| "train_samples_per_second": 22.616, |
| "train_steps_per_second": 2.827 |
| } |
| ], |
| "max_steps": 11700, |
| "num_train_epochs": 15, |
| "total_flos": 7.248925543125934e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|