| { | |
| "best_metric": 0.03415210172533989, | |
| "best_model_checkpoint": "AlexWang99/byt5_3k_4d/checkpoint-376", | |
| "epoch": 94.0, | |
| "eval_steps": 500, | |
| "global_step": 376, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.47024813294410706, | |
| "eval_runtime": 10.75, | |
| "eval_samples_per_second": 930.235, | |
| "eval_steps_per_second": 1.209, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.4366174340248108, | |
| "eval_runtime": 10.8447, | |
| "eval_samples_per_second": 922.113, | |
| "eval_steps_per_second": 1.199, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 4.875e-05, | |
| "loss": 0.8301, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.4202331006526947, | |
| "eval_runtime": 10.7807, | |
| "eval_samples_per_second": 927.582, | |
| "eval_steps_per_second": 1.206, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.38434356451034546, | |
| "eval_runtime": 10.8022, | |
| "eval_samples_per_second": 925.735, | |
| "eval_steps_per_second": 1.203, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.7703, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.38975098729133606, | |
| "eval_runtime": 10.7363, | |
| "eval_samples_per_second": 931.418, | |
| "eval_steps_per_second": 1.211, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.35734379291534424, | |
| "eval_runtime": 10.725, | |
| "eval_samples_per_second": 932.399, | |
| "eval_steps_per_second": 1.212, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.34219321608543396, | |
| "eval_runtime": 10.9668, | |
| "eval_samples_per_second": 911.84, | |
| "eval_steps_per_second": 1.185, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 4.6250000000000006e-05, | |
| "loss": 0.7169, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.330382376909256, | |
| "eval_runtime": 10.8312, | |
| "eval_samples_per_second": 923.257, | |
| "eval_steps_per_second": 1.2, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.3048989772796631, | |
| "eval_runtime": 10.726, | |
| "eval_samples_per_second": 932.316, | |
| "eval_steps_per_second": 1.212, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.6727, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.30665355920791626, | |
| "eval_runtime": 10.9181, | |
| "eval_samples_per_second": 915.912, | |
| "eval_steps_per_second": 1.191, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 0.29648929834365845, | |
| "eval_runtime": 10.7354, | |
| "eval_samples_per_second": 931.494, | |
| "eval_steps_per_second": 1.211, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 0.26932698488235474, | |
| "eval_runtime": 10.8379, | |
| "eval_samples_per_second": 922.691, | |
| "eval_steps_per_second": 1.199, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 4.375e-05, | |
| "loss": 0.6394, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 0.271121084690094, | |
| "eval_runtime": 10.8381, | |
| "eval_samples_per_second": 922.67, | |
| "eval_steps_per_second": 1.199, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 0.25609534978866577, | |
| "eval_runtime": 10.7523, | |
| "eval_samples_per_second": 930.029, | |
| "eval_steps_per_second": 1.209, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.6047, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 0.2453787624835968, | |
| "eval_runtime": 10.9268, | |
| "eval_samples_per_second": 915.184, | |
| "eval_steps_per_second": 1.19, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 0.23679418861865997, | |
| "eval_runtime": 10.7591, | |
| "eval_samples_per_second": 929.449, | |
| "eval_steps_per_second": 1.208, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 0.22503486275672913, | |
| "eval_runtime": 10.8371, | |
| "eval_samples_per_second": 922.76, | |
| "eval_steps_per_second": 1.2, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "learning_rate": 4.125e-05, | |
| "loss": 0.565, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 0.21100571751594543, | |
| "eval_runtime": 10.8584, | |
| "eval_samples_per_second": 920.947, | |
| "eval_steps_per_second": 1.197, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 0.21086934208869934, | |
| "eval_runtime": 10.746, | |
| "eval_samples_per_second": 930.581, | |
| "eval_steps_per_second": 1.21, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 4e-05, | |
| "loss": 0.5368, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.1950305849313736, | |
| "eval_runtime": 10.7833, | |
| "eval_samples_per_second": 927.356, | |
| "eval_steps_per_second": 1.206, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 0.19742541015148163, | |
| "eval_runtime": 10.7496, | |
| "eval_samples_per_second": 930.268, | |
| "eval_steps_per_second": 1.209, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.18190620839595795, | |
| "eval_runtime": 11.0063, | |
| "eval_samples_per_second": 908.569, | |
| "eval_steps_per_second": 1.181, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "learning_rate": 3.875e-05, | |
| "loss": 0.518, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 0.1795072704553604, | |
| "eval_runtime": 10.855, | |
| "eval_samples_per_second": 921.235, | |
| "eval_steps_per_second": 1.198, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.16476453840732574, | |
| "eval_runtime": 10.7846, | |
| "eval_samples_per_second": 927.252, | |
| "eval_steps_per_second": 1.205, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.4862, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.16748683154582977, | |
| "eval_runtime": 10.9227, | |
| "eval_samples_per_second": 915.525, | |
| "eval_steps_per_second": 1.19, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.15504491329193115, | |
| "eval_runtime": 10.7793, | |
| "eval_samples_per_second": 927.702, | |
| "eval_steps_per_second": 1.206, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 0.15300293266773224, | |
| "eval_runtime": 10.8547, | |
| "eval_samples_per_second": 921.263, | |
| "eval_steps_per_second": 1.198, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 27.5, | |
| "learning_rate": 3.625e-05, | |
| "loss": 0.4628, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 0.14466118812561035, | |
| "eval_runtime": 10.8549, | |
| "eval_samples_per_second": 921.246, | |
| "eval_steps_per_second": 1.198, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 0.1442325860261917, | |
| "eval_runtime": 10.764, | |
| "eval_samples_per_second": 929.023, | |
| "eval_steps_per_second": 1.208, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.4408, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 0.13103845715522766, | |
| "eval_runtime": 10.9404, | |
| "eval_samples_per_second": 914.043, | |
| "eval_steps_per_second": 1.188, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 0.13363589346408844, | |
| "eval_runtime": 10.7472, | |
| "eval_samples_per_second": 930.475, | |
| "eval_steps_per_second": 1.21, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 0.12347704917192459, | |
| "eval_runtime": 10.8696, | |
| "eval_samples_per_second": 920.0, | |
| "eval_steps_per_second": 1.196, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 32.5, | |
| "learning_rate": 3.375000000000001e-05, | |
| "loss": 0.4192, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 0.1190723404288292, | |
| "eval_runtime": 10.8564, | |
| "eval_samples_per_second": 921.113, | |
| "eval_steps_per_second": 1.197, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 0.11926523596048355, | |
| "eval_runtime": 10.7486, | |
| "eval_samples_per_second": 930.351, | |
| "eval_steps_per_second": 1.209, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.4133, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 0.1123439222574234, | |
| "eval_runtime": 10.9392, | |
| "eval_samples_per_second": 914.14, | |
| "eval_steps_per_second": 1.188, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 0.11556507647037506, | |
| "eval_runtime": 10.7494, | |
| "eval_samples_per_second": 930.287, | |
| "eval_steps_per_second": 1.209, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 0.10512395203113556, | |
| "eval_runtime": 10.8606, | |
| "eval_samples_per_second": 920.755, | |
| "eval_steps_per_second": 1.197, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 37.5, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.3922, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 0.09992647171020508, | |
| "eval_runtime": 10.8473, | |
| "eval_samples_per_second": 921.889, | |
| "eval_steps_per_second": 1.198, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 0.09912022948265076, | |
| "eval_runtime": 10.751, | |
| "eval_samples_per_second": 930.148, | |
| "eval_steps_per_second": 1.209, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 3e-05, | |
| "loss": 0.3778, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 0.09952548891305923, | |
| "eval_runtime": 10.9239, | |
| "eval_samples_per_second": 915.427, | |
| "eval_steps_per_second": 1.19, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 0.09118303656578064, | |
| "eval_runtime": 10.7525, | |
| "eval_samples_per_second": 930.014, | |
| "eval_steps_per_second": 1.209, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 0.09032606333494186, | |
| "eval_runtime": 10.8423, | |
| "eval_samples_per_second": 922.31, | |
| "eval_steps_per_second": 1.199, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 42.5, | |
| "learning_rate": 2.8749999999999997e-05, | |
| "loss": 0.3655, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_loss": 0.08409227430820465, | |
| "eval_runtime": 10.8572, | |
| "eval_samples_per_second": 921.048, | |
| "eval_steps_per_second": 1.197, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 0.0789598897099495, | |
| "eval_runtime": 10.7437, | |
| "eval_samples_per_second": 930.776, | |
| "eval_steps_per_second": 1.21, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.3526, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_loss": 0.08267370611429214, | |
| "eval_runtime": 10.9337, | |
| "eval_samples_per_second": 914.6, | |
| "eval_steps_per_second": 1.189, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 0.07559242099523544, | |
| "eval_runtime": 10.7359, | |
| "eval_samples_per_second": 931.456, | |
| "eval_steps_per_second": 1.211, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_loss": 0.07468590885400772, | |
| "eval_runtime": 10.8561, | |
| "eval_samples_per_second": 921.142, | |
| "eval_steps_per_second": 1.197, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 47.5, | |
| "learning_rate": 2.625e-05, | |
| "loss": 0.3378, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 0.0737282931804657, | |
| "eval_runtime": 10.8372, | |
| "eval_samples_per_second": 922.747, | |
| "eval_steps_per_second": 1.2, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_loss": 0.07465548813343048, | |
| "eval_runtime": 10.7608, | |
| "eval_samples_per_second": 929.3, | |
| "eval_steps_per_second": 1.208, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.3308, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 0.07294411957263947, | |
| "eval_runtime": 10.9347, | |
| "eval_samples_per_second": 914.518, | |
| "eval_steps_per_second": 1.189, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_loss": 0.0665128082036972, | |
| "eval_runtime": 10.7481, | |
| "eval_samples_per_second": 930.401, | |
| "eval_steps_per_second": 1.21, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_loss": 0.06627500057220459, | |
| "eval_runtime": 10.8409, | |
| "eval_samples_per_second": 922.432, | |
| "eval_steps_per_second": 1.199, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 52.5, | |
| "learning_rate": 2.375e-05, | |
| "loss": 0.321, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_loss": 0.06422976404428482, | |
| "eval_runtime": 10.8561, | |
| "eval_samples_per_second": 921.142, | |
| "eval_steps_per_second": 1.197, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_loss": 0.06401015818119049, | |
| "eval_runtime": 10.7568, | |
| "eval_samples_per_second": 929.646, | |
| "eval_steps_per_second": 1.209, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.3084, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_loss": 0.06319215148687363, | |
| "eval_runtime": 10.9357, | |
| "eval_samples_per_second": 914.439, | |
| "eval_steps_per_second": 1.189, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_loss": 0.059920959174633026, | |
| "eval_runtime": 10.7511, | |
| "eval_samples_per_second": 930.141, | |
| "eval_steps_per_second": 1.209, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_loss": 0.057977043092250824, | |
| "eval_runtime": 10.8555, | |
| "eval_samples_per_second": 921.188, | |
| "eval_steps_per_second": 1.198, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 57.5, | |
| "learning_rate": 2.125e-05, | |
| "loss": 0.2967, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_loss": 0.05669580027461052, | |
| "eval_runtime": 10.8434, | |
| "eval_samples_per_second": 922.219, | |
| "eval_steps_per_second": 1.199, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_loss": 0.0525004044175148, | |
| "eval_runtime": 10.7506, | |
| "eval_samples_per_second": 930.18, | |
| "eval_steps_per_second": 1.209, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2928, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_loss": 0.05224265158176422, | |
| "eval_runtime": 10.9382, | |
| "eval_samples_per_second": 914.231, | |
| "eval_steps_per_second": 1.188, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_loss": 0.05358195677399635, | |
| "eval_runtime": 10.7598, | |
| "eval_samples_per_second": 929.387, | |
| "eval_steps_per_second": 1.208, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_loss": 0.052435729652643204, | |
| "eval_runtime": 10.8537, | |
| "eval_samples_per_second": 921.341, | |
| "eval_steps_per_second": 1.198, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 62.5, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.2929, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_loss": 0.056764379143714905, | |
| "eval_runtime": 10.8657, | |
| "eval_samples_per_second": 920.329, | |
| "eval_steps_per_second": 1.196, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_loss": 0.0530422069132328, | |
| "eval_runtime": 10.7635, | |
| "eval_samples_per_second": 929.065, | |
| "eval_steps_per_second": 1.208, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.283, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_loss": 0.04763900488615036, | |
| "eval_runtime": 10.9405, | |
| "eval_samples_per_second": 914.038, | |
| "eval_steps_per_second": 1.188, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_loss": 0.04787232354283333, | |
| "eval_runtime": 10.7497, | |
| "eval_samples_per_second": 930.255, | |
| "eval_steps_per_second": 1.209, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "eval_loss": 0.05069798231124878, | |
| "eval_runtime": 10.8615, | |
| "eval_samples_per_second": 920.685, | |
| "eval_steps_per_second": 1.197, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 67.5, | |
| "learning_rate": 1.6250000000000002e-05, | |
| "loss": 0.2766, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_loss": 0.04605843871831894, | |
| "eval_runtime": 10.8475, | |
| "eval_samples_per_second": 921.871, | |
| "eval_steps_per_second": 1.198, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "eval_loss": 0.04439115151762962, | |
| "eval_runtime": 10.7586, | |
| "eval_samples_per_second": 929.488, | |
| "eval_steps_per_second": 1.208, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.2677, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_loss": 0.0455540269613266, | |
| "eval_runtime": 10.9449, | |
| "eval_samples_per_second": 913.667, | |
| "eval_steps_per_second": 1.188, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "eval_loss": 0.04371872544288635, | |
| "eval_runtime": 10.758, | |
| "eval_samples_per_second": 929.543, | |
| "eval_steps_per_second": 1.208, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_loss": 0.04281056672334671, | |
| "eval_runtime": 10.8458, | |
| "eval_samples_per_second": 922.019, | |
| "eval_steps_per_second": 1.199, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 72.5, | |
| "learning_rate": 1.3750000000000002e-05, | |
| "loss": 0.2614, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "eval_loss": 0.04188177362084389, | |
| "eval_runtime": 10.8487, | |
| "eval_samples_per_second": 921.77, | |
| "eval_steps_per_second": 1.198, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_loss": 0.04140578955411911, | |
| "eval_runtime": 10.7507, | |
| "eval_samples_per_second": 930.172, | |
| "eval_steps_per_second": 1.209, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.2595, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_loss": 0.04175864905118942, | |
| "eval_runtime": 10.9254, | |
| "eval_samples_per_second": 915.297, | |
| "eval_steps_per_second": 1.19, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_loss": 0.04123968258500099, | |
| "eval_runtime": 10.7366, | |
| "eval_samples_per_second": 931.392, | |
| "eval_steps_per_second": 1.211, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "eval_loss": 0.03961934149265289, | |
| "eval_runtime": 10.8404, | |
| "eval_samples_per_second": 922.477, | |
| "eval_steps_per_second": 1.199, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 77.5, | |
| "learning_rate": 1.125e-05, | |
| "loss": 0.2582, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_loss": 0.03820143640041351, | |
| "eval_runtime": 10.8282, | |
| "eval_samples_per_second": 923.515, | |
| "eval_steps_per_second": 1.201, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "eval_loss": 0.038101743906736374, | |
| "eval_runtime": 10.7523, | |
| "eval_samples_per_second": 930.033, | |
| "eval_steps_per_second": 1.209, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2511, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_loss": 0.03866468369960785, | |
| "eval_runtime": 10.9151, | |
| "eval_samples_per_second": 916.163, | |
| "eval_steps_per_second": 1.191, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "eval_loss": 0.0387905091047287, | |
| "eval_runtime": 10.7459, | |
| "eval_samples_per_second": 930.586, | |
| "eval_steps_per_second": 1.21, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_loss": 0.03723302111029625, | |
| "eval_runtime": 10.8332, | |
| "eval_samples_per_second": 923.092, | |
| "eval_steps_per_second": 1.2, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 82.5, | |
| "learning_rate": 8.75e-06, | |
| "loss": 0.2481, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "eval_loss": 0.03600754216313362, | |
| "eval_runtime": 10.8588, | |
| "eval_samples_per_second": 920.91, | |
| "eval_steps_per_second": 1.197, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_loss": 0.036581408232450485, | |
| "eval_runtime": 10.7405, | |
| "eval_samples_per_second": 931.052, | |
| "eval_steps_per_second": 1.21, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.2474, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "eval_loss": 0.03646053001284599, | |
| "eval_runtime": 10.9336, | |
| "eval_samples_per_second": 914.608, | |
| "eval_steps_per_second": 1.189, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_loss": 0.03565111756324768, | |
| "eval_runtime": 10.7346, | |
| "eval_samples_per_second": 931.569, | |
| "eval_steps_per_second": 1.211, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "eval_loss": 0.035478148609399796, | |
| "eval_runtime": 10.8414, | |
| "eval_samples_per_second": 922.386, | |
| "eval_steps_per_second": 1.199, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 87.5, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.2537, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_loss": 0.03596709668636322, | |
| "eval_runtime": 10.8347, | |
| "eval_samples_per_second": 922.957, | |
| "eval_steps_per_second": 1.2, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "eval_loss": 0.03587077185511589, | |
| "eval_runtime": 10.7439, | |
| "eval_samples_per_second": 930.763, | |
| "eval_steps_per_second": 1.21, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2438, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_loss": 0.03546379879117012, | |
| "eval_runtime": 10.9027, | |
| "eval_samples_per_second": 917.201, | |
| "eval_steps_per_second": 1.192, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "eval_loss": 0.03530960530042648, | |
| "eval_runtime": 10.7401, | |
| "eval_samples_per_second": 931.093, | |
| "eval_steps_per_second": 1.21, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_loss": 0.034898195415735245, | |
| "eval_runtime": 10.838, | |
| "eval_samples_per_second": 922.682, | |
| "eval_steps_per_second": 1.199, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 92.5, | |
| "learning_rate": 3.75e-06, | |
| "loss": 0.2461, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "eval_loss": 0.03429694473743439, | |
| "eval_runtime": 10.8462, | |
| "eval_samples_per_second": 921.982, | |
| "eval_steps_per_second": 1.199, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_loss": 0.03415210172533989, | |
| "eval_runtime": 10.73, | |
| "eval_samples_per_second": 931.966, | |
| "eval_steps_per_second": 1.212, | |
| "step": 376 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 400, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "total_flos": 8096480649216000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |