{ "best_metric": 0.9318181818181818, "best_model_checkpoint": "beit-base-patch16-224-85-fold5/checkpoint-92", "epoch": 100.0, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.29545454545454547, "eval_loss": 1.089578628540039, "eval_runtime": 0.7021, "eval_samples_per_second": 62.667, "eval_steps_per_second": 2.849, "step": 2 }, { "epoch": 2.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.6455699801445007, "eval_runtime": 0.6856, "eval_samples_per_second": 64.174, "eval_steps_per_second": 2.917, "step": 4 }, { "epoch": 3.0, "eval_accuracy": 0.7045454545454546, "eval_loss": 1.0355168581008911, "eval_runtime": 0.6996, "eval_samples_per_second": 62.897, "eval_steps_per_second": 2.859, "step": 6 }, { "epoch": 4.0, "eval_accuracy": 0.7045454545454546, "eval_loss": 0.9123974442481995, "eval_runtime": 0.6813, "eval_samples_per_second": 64.582, "eval_steps_per_second": 2.936, "step": 8 }, { "epoch": 5.0, "grad_norm": 9.428820610046387, "learning_rate": 2.5e-05, "loss": 0.7607, "step": 10 }, { "epoch": 5.0, "eval_accuracy": 0.7954545454545454, "eval_loss": 0.5808687210083008, "eval_runtime": 0.672, "eval_samples_per_second": 65.48, "eval_steps_per_second": 2.976, "step": 10 }, { "epoch": 6.0, "eval_accuracy": 0.75, "eval_loss": 0.6812124848365784, "eval_runtime": 0.6683, "eval_samples_per_second": 65.834, "eval_steps_per_second": 2.992, "step": 12 }, { "epoch": 7.0, "eval_accuracy": 0.75, "eval_loss": 0.6529408097267151, "eval_runtime": 0.6753, "eval_samples_per_second": 65.16, "eval_steps_per_second": 2.962, "step": 14 }, { "epoch": 8.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.717404305934906, "eval_runtime": 0.7009, "eval_samples_per_second": 62.778, "eval_steps_per_second": 2.854, "step": 16 }, { "epoch": 9.0, "eval_accuracy": 0.6136363636363636, "eval_loss": 0.6618563532829285, "eval_runtime": 0.6832, "eval_samples_per_second": 64.403, "eval_steps_per_second": 2.927, "step": 18 }, { "epoch": 10.0, "grad_norm": 6.730888843536377, "learning_rate": 5e-05, "loss": 0.4221, "step": 20 }, { "epoch": 10.0, "eval_accuracy": 0.75, "eval_loss": 0.80633944272995, "eval_runtime": 0.6867, "eval_samples_per_second": 64.075, "eval_steps_per_second": 2.912, "step": 20 }, { "epoch": 11.0, "eval_accuracy": 0.75, "eval_loss": 0.6371808648109436, "eval_runtime": 0.6802, "eval_samples_per_second": 64.689, "eval_steps_per_second": 2.94, "step": 22 }, { "epoch": 12.0, "eval_accuracy": 0.75, "eval_loss": 0.588646411895752, "eval_runtime": 0.6871, "eval_samples_per_second": 64.039, "eval_steps_per_second": 2.911, "step": 24 }, { "epoch": 13.0, "eval_accuracy": 0.6363636363636364, "eval_loss": 0.6359339356422424, "eval_runtime": 0.6771, "eval_samples_per_second": 64.986, "eval_steps_per_second": 2.954, "step": 26 }, { "epoch": 14.0, "eval_accuracy": 0.75, "eval_loss": 0.5584580898284912, "eval_runtime": 0.6833, "eval_samples_per_second": 64.391, "eval_steps_per_second": 2.927, "step": 28 }, { "epoch": 15.0, "grad_norm": 9.09820556640625, "learning_rate": 4.722222222222222e-05, "loss": 0.3287, "step": 30 }, { "epoch": 15.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.45406511425971985, "eval_runtime": 0.6778, "eval_samples_per_second": 64.917, "eval_steps_per_second": 2.951, "step": 30 }, { "epoch": 16.0, "eval_accuracy": 0.5681818181818182, "eval_loss": 0.7624431252479553, "eval_runtime": 0.6791, "eval_samples_per_second": 64.788, "eval_steps_per_second": 2.945, "step": 32 }, { "epoch": 17.0, "eval_accuracy": 0.75, "eval_loss": 0.6805771589279175, "eval_runtime": 0.6896, "eval_samples_per_second": 63.807, "eval_steps_per_second": 2.9, "step": 34 }, { "epoch": 18.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.7707971930503845, "eval_runtime": 0.6822, "eval_samples_per_second": 64.496, "eval_steps_per_second": 2.932, "step": 36 }, { "epoch": 19.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.416960209608078, "eval_runtime": 0.6839, "eval_samples_per_second": 64.337, "eval_steps_per_second": 2.924, "step": 38 }, { "epoch": 20.0, "grad_norm": 12.929576873779297, "learning_rate": 4.4444444444444447e-05, "loss": 0.3663, "step": 40 }, { "epoch": 20.0, "eval_accuracy": 0.7727272727272727, "eval_loss": 0.42817583680152893, "eval_runtime": 0.684, "eval_samples_per_second": 64.327, "eval_steps_per_second": 2.924, "step": 40 }, { "epoch": 21.0, "eval_accuracy": 0.75, "eval_loss": 0.5613061785697937, "eval_runtime": 0.6869, "eval_samples_per_second": 64.059, "eval_steps_per_second": 2.912, "step": 42 }, { "epoch": 22.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.4025155007839203, "eval_runtime": 0.7081, "eval_samples_per_second": 62.136, "eval_steps_per_second": 2.824, "step": 44 }, { "epoch": 23.0, "eval_accuracy": 0.7954545454545454, "eval_loss": 0.41085565090179443, "eval_runtime": 0.7009, "eval_samples_per_second": 62.774, "eval_steps_per_second": 2.853, "step": 46 }, { "epoch": 24.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.4372767508029938, "eval_runtime": 0.6888, "eval_samples_per_second": 63.878, "eval_steps_per_second": 2.904, "step": 48 }, { "epoch": 25.0, "grad_norm": 6.6011061668396, "learning_rate": 4.166666666666667e-05, "loss": 0.2344, "step": 50 }, { "epoch": 25.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.3210701644420624, "eval_runtime": 0.6782, "eval_samples_per_second": 64.873, "eval_steps_per_second": 2.949, "step": 50 }, { "epoch": 26.0, "eval_accuracy": 0.75, "eval_loss": 0.5560834407806396, "eval_runtime": 0.6873, "eval_samples_per_second": 64.014, "eval_steps_per_second": 2.91, "step": 52 }, { "epoch": 27.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.31488853693008423, "eval_runtime": 0.6879, "eval_samples_per_second": 63.96, "eval_steps_per_second": 2.907, "step": 54 }, { "epoch": 28.0, "eval_accuracy": 0.7954545454545454, "eval_loss": 0.31661316752433777, "eval_runtime": 0.679, "eval_samples_per_second": 64.805, "eval_steps_per_second": 2.946, "step": 56 }, { "epoch": 29.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.4163520932197571, "eval_runtime": 0.6778, "eval_samples_per_second": 64.92, "eval_steps_per_second": 2.951, "step": 58 }, { "epoch": 30.0, "grad_norm": 5.3891143798828125, "learning_rate": 3.888888888888889e-05, "loss": 0.2051, "step": 60 }, { "epoch": 30.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.4345444440841675, "eval_runtime": 0.6869, "eval_samples_per_second": 64.054, "eval_steps_per_second": 2.912, "step": 60 }, { "epoch": 31.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.31803134083747864, "eval_runtime": 0.6887, "eval_samples_per_second": 63.886, "eval_steps_per_second": 2.904, "step": 62 }, { "epoch": 32.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.3673260509967804, "eval_runtime": 0.6835, "eval_samples_per_second": 64.377, "eval_steps_per_second": 2.926, "step": 64 }, { "epoch": 33.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.43128955364227295, "eval_runtime": 0.682, "eval_samples_per_second": 64.517, "eval_steps_per_second": 2.933, "step": 66 }, { "epoch": 34.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.43590399622917175, "eval_runtime": 0.6788, "eval_samples_per_second": 64.824, "eval_steps_per_second": 2.947, "step": 68 }, { "epoch": 35.0, "grad_norm": 3.639585256576538, "learning_rate": 3.611111111111111e-05, "loss": 0.1694, "step": 70 }, { "epoch": 35.0, "eval_accuracy": 0.8181818181818182, "eval_loss": 0.36998674273490906, "eval_runtime": 0.6829, "eval_samples_per_second": 64.428, "eval_steps_per_second": 2.929, "step": 70 }, { "epoch": 36.0, "eval_accuracy": 0.7954545454545454, "eval_loss": 0.584349513053894, "eval_runtime": 0.6937, "eval_samples_per_second": 63.427, "eval_steps_per_second": 2.883, "step": 72 }, { "epoch": 37.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.4064345061779022, "eval_runtime": 0.7057, "eval_samples_per_second": 62.346, "eval_steps_per_second": 2.834, "step": 74 }, { "epoch": 38.0, "eval_accuracy": 0.8181818181818182, "eval_loss": 0.39915817975997925, "eval_runtime": 0.682, "eval_samples_per_second": 64.516, "eval_steps_per_second": 2.933, "step": 76 }, { "epoch": 39.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.3152655363082886, "eval_runtime": 0.6899, "eval_samples_per_second": 63.778, "eval_steps_per_second": 2.899, "step": 78 }, { "epoch": 40.0, "grad_norm": 5.693894863128662, "learning_rate": 3.3333333333333335e-05, "loss": 0.1566, "step": 80 }, { "epoch": 40.0, "eval_accuracy": 0.8181818181818182, "eval_loss": 0.5581021904945374, "eval_runtime": 0.6812, "eval_samples_per_second": 64.596, "eval_steps_per_second": 2.936, "step": 80 }, { "epoch": 41.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.2921257019042969, "eval_runtime": 0.6885, "eval_samples_per_second": 63.908, "eval_steps_per_second": 2.905, "step": 82 }, { "epoch": 42.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.32171034812927246, "eval_runtime": 0.684, "eval_samples_per_second": 64.326, "eval_steps_per_second": 2.924, "step": 84 }, { "epoch": 43.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.32552286982536316, "eval_runtime": 0.6845, "eval_samples_per_second": 64.277, "eval_steps_per_second": 2.922, "step": 86 }, { "epoch": 44.0, "eval_accuracy": 0.75, "eval_loss": 0.723772406578064, "eval_runtime": 0.7032, "eval_samples_per_second": 62.569, "eval_steps_per_second": 2.844, "step": 88 }, { "epoch": 45.0, "grad_norm": 9.99726676940918, "learning_rate": 3.055555555555556e-05, "loss": 0.1389, "step": 90 }, { "epoch": 45.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.40532952547073364, "eval_runtime": 0.6753, "eval_samples_per_second": 65.154, "eval_steps_per_second": 2.962, "step": 90 }, { "epoch": 46.0, "eval_accuracy": 0.9318181818181818, "eval_loss": 0.24986252188682556, "eval_runtime": 0.6918, "eval_samples_per_second": 63.603, "eval_steps_per_second": 2.891, "step": 92 }, { "epoch": 47.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.2584282159805298, "eval_runtime": 0.6836, "eval_samples_per_second": 64.362, "eval_steps_per_second": 2.926, "step": 94 }, { "epoch": 48.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.44320380687713623, "eval_runtime": 0.6851, "eval_samples_per_second": 64.225, "eval_steps_per_second": 2.919, "step": 96 }, { "epoch": 49.0, "eval_accuracy": 0.7954545454545454, "eval_loss": 0.6965329647064209, "eval_runtime": 0.6828, "eval_samples_per_second": 64.443, "eval_steps_per_second": 2.929, "step": 98 }, { "epoch": 50.0, "grad_norm": 5.456106662750244, "learning_rate": 2.777777777777778e-05, "loss": 0.1311, "step": 100 }, { "epoch": 50.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.39098644256591797, "eval_runtime": 0.6828, "eval_samples_per_second": 64.437, "eval_steps_per_second": 2.929, "step": 100 }, { "epoch": 51.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.30165335536003113, "eval_runtime": 0.6957, "eval_samples_per_second": 63.242, "eval_steps_per_second": 2.875, "step": 102 }, { "epoch": 52.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.30502772331237793, "eval_runtime": 0.6903, "eval_samples_per_second": 63.743, "eval_steps_per_second": 2.897, "step": 104 }, { "epoch": 53.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.21932415664196014, "eval_runtime": 0.6995, "eval_samples_per_second": 62.902, "eval_steps_per_second": 2.859, "step": 106 }, { "epoch": 54.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.23692327737808228, "eval_runtime": 0.6924, "eval_samples_per_second": 63.544, "eval_steps_per_second": 2.888, "step": 108 }, { "epoch": 55.0, "grad_norm": 2.8878471851348877, "learning_rate": 2.5e-05, "loss": 0.1386, "step": 110 }, { "epoch": 55.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.31426045298576355, "eval_runtime": 0.6968, "eval_samples_per_second": 63.146, "eval_steps_per_second": 2.87, "step": 110 }, { "epoch": 56.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.29323482513427734, "eval_runtime": 0.6884, "eval_samples_per_second": 63.916, "eval_steps_per_second": 2.905, "step": 112 }, { "epoch": 57.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.27247121930122375, "eval_runtime": 0.6889, "eval_samples_per_second": 63.868, "eval_steps_per_second": 2.903, "step": 114 }, { "epoch": 58.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.566352128982544, "eval_runtime": 0.6829, "eval_samples_per_second": 64.432, "eval_steps_per_second": 2.929, "step": 116 }, { "epoch": 59.0, "eval_accuracy": 0.8181818181818182, "eval_loss": 0.5874523520469666, "eval_runtime": 0.6823, "eval_samples_per_second": 64.484, "eval_steps_per_second": 2.931, "step": 118 }, { "epoch": 60.0, "grad_norm": 5.098616123199463, "learning_rate": 2.2222222222222223e-05, "loss": 0.1194, "step": 120 }, { "epoch": 60.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.46228501200675964, "eval_runtime": 0.6928, "eval_samples_per_second": 63.507, "eval_steps_per_second": 2.887, "step": 120 }, { "epoch": 61.0, "eval_accuracy": 0.8181818181818182, "eval_loss": 0.4716045558452606, "eval_runtime": 0.6856, "eval_samples_per_second": 64.174, "eval_steps_per_second": 2.917, "step": 122 }, { "epoch": 62.0, "eval_accuracy": 0.8181818181818182, "eval_loss": 0.5028097033500671, "eval_runtime": 0.6887, "eval_samples_per_second": 63.887, "eval_steps_per_second": 2.904, "step": 124 }, { "epoch": 63.0, "eval_accuracy": 0.8181818181818182, "eval_loss": 0.4557519257068634, "eval_runtime": 0.6871, "eval_samples_per_second": 64.04, "eval_steps_per_second": 2.911, "step": 126 }, { "epoch": 64.0, "eval_accuracy": 0.8181818181818182, "eval_loss": 0.47977954149246216, "eval_runtime": 0.6832, "eval_samples_per_second": 64.4, "eval_steps_per_second": 2.927, "step": 128 }, { "epoch": 65.0, "grad_norm": 4.329418659210205, "learning_rate": 1.9444444444444445e-05, "loss": 0.1122, "step": 130 }, { "epoch": 65.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.3826620876789093, "eval_runtime": 0.6842, "eval_samples_per_second": 64.31, "eval_steps_per_second": 2.923, "step": 130 }, { "epoch": 66.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.3652952313423157, "eval_runtime": 0.6915, "eval_samples_per_second": 63.63, "eval_steps_per_second": 2.892, "step": 132 }, { "epoch": 67.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.3972075581550598, "eval_runtime": 0.6803, "eval_samples_per_second": 64.678, "eval_steps_per_second": 2.94, "step": 134 }, { "epoch": 68.0, "eval_accuracy": 0.7727272727272727, "eval_loss": 0.5704622864723206, "eval_runtime": 0.6889, "eval_samples_per_second": 63.868, "eval_steps_per_second": 2.903, "step": 136 }, { "epoch": 69.0, "eval_accuracy": 0.7727272727272727, "eval_loss": 0.5934601426124573, "eval_runtime": 0.6795, "eval_samples_per_second": 64.752, "eval_steps_per_second": 2.943, "step": 138 }, { "epoch": 70.0, "grad_norm": 3.6549811363220215, "learning_rate": 1.6666666666666667e-05, "loss": 0.1041, "step": 140 }, { "epoch": 70.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.3905385136604309, "eval_runtime": 0.6809, "eval_samples_per_second": 64.623, "eval_steps_per_second": 2.937, "step": 140 }, { "epoch": 71.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.27905356884002686, "eval_runtime": 0.6869, "eval_samples_per_second": 64.051, "eval_steps_per_second": 2.911, "step": 142 }, { "epoch": 72.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.28452473878860474, "eval_runtime": 0.6826, "eval_samples_per_second": 64.463, "eval_steps_per_second": 2.93, "step": 144 }, { "epoch": 73.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.24005986750125885, "eval_runtime": 0.6824, "eval_samples_per_second": 64.477, "eval_steps_per_second": 2.931, "step": 146 }, { "epoch": 74.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.2259605973958969, "eval_runtime": 0.6828, "eval_samples_per_second": 64.437, "eval_steps_per_second": 2.929, "step": 148 }, { "epoch": 75.0, "grad_norm": 3.5433380603790283, "learning_rate": 1.388888888888889e-05, "loss": 0.0982, "step": 150 }, { "epoch": 75.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.2454221248626709, "eval_runtime": 0.6812, "eval_samples_per_second": 64.589, "eval_steps_per_second": 2.936, "step": 150 }, { "epoch": 76.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.3772827386856079, "eval_runtime": 0.7099, "eval_samples_per_second": 61.983, "eval_steps_per_second": 2.817, "step": 152 }, { "epoch": 77.0, "eval_accuracy": 0.8181818181818182, "eval_loss": 0.618475615978241, "eval_runtime": 0.6861, "eval_samples_per_second": 64.132, "eval_steps_per_second": 2.915, "step": 154 }, { "epoch": 78.0, "eval_accuracy": 0.7727272727272727, "eval_loss": 0.7238039374351501, "eval_runtime": 0.6796, "eval_samples_per_second": 64.746, "eval_steps_per_second": 2.943, "step": 156 }, { "epoch": 79.0, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.5469216704368591, "eval_runtime": 0.687, "eval_samples_per_second": 64.051, "eval_steps_per_second": 2.911, "step": 158 }, { "epoch": 80.0, "grad_norm": 3.139439344406128, "learning_rate": 1.1111111111111112e-05, "loss": 0.1065, "step": 160 }, { "epoch": 80.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.4317708909511566, "eval_runtime": 0.6729, "eval_samples_per_second": 65.391, "eval_steps_per_second": 2.972, "step": 160 }, { "epoch": 81.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.33475610613822937, "eval_runtime": 0.685, "eval_samples_per_second": 64.237, "eval_steps_per_second": 2.92, "step": 162 }, { "epoch": 82.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.3040803372859955, "eval_runtime": 0.7242, "eval_samples_per_second": 60.758, "eval_steps_per_second": 2.762, "step": 164 }, { "epoch": 83.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.33496472239494324, "eval_runtime": 0.6781, "eval_samples_per_second": 64.888, "eval_steps_per_second": 2.949, "step": 166 }, { "epoch": 84.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.34641969203948975, "eval_runtime": 0.6832, "eval_samples_per_second": 64.4, "eval_steps_per_second": 2.927, "step": 168 }, { "epoch": 85.0, "grad_norm": 3.164541482925415, "learning_rate": 8.333333333333334e-06, "loss": 0.0829, "step": 170 }, { "epoch": 85.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.33745819330215454, "eval_runtime": 0.6769, "eval_samples_per_second": 64.998, "eval_steps_per_second": 2.954, "step": 170 }, { "epoch": 86.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.33091866970062256, "eval_runtime": 0.6868, "eval_samples_per_second": 64.069, "eval_steps_per_second": 2.912, "step": 172 }, { "epoch": 87.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.33253809809684753, "eval_runtime": 0.6828, "eval_samples_per_second": 64.44, "eval_steps_per_second": 2.929, "step": 174 }, { "epoch": 88.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.3441101312637329, "eval_runtime": 0.6847, "eval_samples_per_second": 64.259, "eval_steps_per_second": 2.921, "step": 176 }, { "epoch": 89.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.34555572271347046, "eval_runtime": 0.7006, "eval_samples_per_second": 62.805, "eval_steps_per_second": 2.855, "step": 178 }, { "epoch": 90.0, "grad_norm": 2.9145150184631348, "learning_rate": 5.555555555555556e-06, "loss": 0.0902, "step": 180 }, { "epoch": 90.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.32443130016326904, "eval_runtime": 0.6801, "eval_samples_per_second": 64.698, "eval_steps_per_second": 2.941, "step": 180 }, { "epoch": 91.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.3126292824745178, "eval_runtime": 0.6994, "eval_samples_per_second": 62.913, "eval_steps_per_second": 2.86, "step": 182 }, { "epoch": 92.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.3116739094257355, "eval_runtime": 0.6939, "eval_samples_per_second": 63.409, "eval_steps_per_second": 2.882, "step": 184 }, { "epoch": 93.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.28766298294067383, "eval_runtime": 0.6892, "eval_samples_per_second": 63.845, "eval_steps_per_second": 2.902, "step": 186 }, { "epoch": 94.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.26428502798080444, "eval_runtime": 0.7034, "eval_samples_per_second": 62.55, "eval_steps_per_second": 2.843, "step": 188 }, { "epoch": 95.0, "grad_norm": 2.80922269821167, "learning_rate": 2.777777777777778e-06, "loss": 0.0838, "step": 190 }, { "epoch": 95.0, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.25247523188591003, "eval_runtime": 0.6832, "eval_samples_per_second": 64.4, "eval_steps_per_second": 2.927, "step": 190 }, { "epoch": 96.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.24620772898197174, "eval_runtime": 0.6948, "eval_samples_per_second": 63.325, "eval_steps_per_second": 2.878, "step": 192 }, { "epoch": 97.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.24165372550487518, "eval_runtime": 0.6884, "eval_samples_per_second": 63.918, "eval_steps_per_second": 2.905, "step": 194 }, { "epoch": 98.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.24015091359615326, "eval_runtime": 0.6834, "eval_samples_per_second": 64.382, "eval_steps_per_second": 2.926, "step": 196 }, { "epoch": 99.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.24094568192958832, "eval_runtime": 0.6957, "eval_samples_per_second": 63.249, "eval_steps_per_second": 2.875, "step": 198 }, { "epoch": 100.0, "grad_norm": 2.4933438301086426, "learning_rate": 0.0, "loss": 0.0747, "step": 200 }, { "epoch": 100.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.24256624281406403, "eval_runtime": 0.6904, "eval_samples_per_second": 63.734, "eval_steps_per_second": 2.897, "step": 200 }, { "epoch": 100.0, "step": 200, "total_flos": 1.8822505078960128e+18, "train_loss": 0.19619473487138747, "train_runtime": 1209.7214, "train_samples_per_second": 20.087, "train_steps_per_second": 0.165 }, { "epoch": 100.0, "eval_accuracy": 0.9318181818181818, "eval_loss": 0.24986252188682556, "eval_runtime": 0.7488, "eval_samples_per_second": 58.758, "eval_steps_per_second": 2.671, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 1.8822505078960128e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }