| { | |
| "best_metric": 0.5188751220703125, | |
| "best_model_checkpoint": "AlexWang99/byt5_add_2k/checkpoint-450", | |
| "epoch": 150.0, | |
| "eval_steps": 500, | |
| "global_step": 450, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 7.222967147827148, | |
| "eval_runtime": 10.6822, | |
| "eval_samples_per_second": 936.137, | |
| "eval_steps_per_second": 1.217, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 5.985061168670654, | |
| "eval_runtime": 10.9203, | |
| "eval_samples_per_second": 915.722, | |
| "eval_steps_per_second": 1.19, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 5.1600236892700195, | |
| "eval_runtime": 10.7228, | |
| "eval_samples_per_second": 932.591, | |
| "eval_steps_per_second": 1.212, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 4.493813514709473, | |
| "eval_runtime": 11.0064, | |
| "eval_samples_per_second": 908.565, | |
| "eval_steps_per_second": 1.181, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 3.8893628120422363, | |
| "eval_runtime": 10.8904, | |
| "eval_samples_per_second": 918.238, | |
| "eval_steps_per_second": 1.194, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 3.341691493988037, | |
| "eval_runtime": 10.8018, | |
| "eval_samples_per_second": 925.772, | |
| "eval_steps_per_second": 1.204, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 2.9026920795440674, | |
| "eval_runtime": 10.8887, | |
| "eval_samples_per_second": 918.384, | |
| "eval_steps_per_second": 1.194, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 2.606121063232422, | |
| "eval_runtime": 10.8106, | |
| "eval_samples_per_second": 925.018, | |
| "eval_steps_per_second": 1.203, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 2.402636766433716, | |
| "eval_runtime": 11.0832, | |
| "eval_samples_per_second": 902.265, | |
| "eval_steps_per_second": 1.173, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 2.1915464401245117, | |
| "eval_runtime": 10.8272, | |
| "eval_samples_per_second": 923.604, | |
| "eval_steps_per_second": 1.201, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 2.1069843769073486, | |
| "eval_runtime": 10.8305, | |
| "eval_samples_per_second": 923.32, | |
| "eval_steps_per_second": 1.2, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 2.0630228519439697, | |
| "eval_runtime": 10.9163, | |
| "eval_samples_per_second": 916.063, | |
| "eval_steps_per_second": 1.191, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 1.9474797248840332, | |
| "eval_runtime": 10.8302, | |
| "eval_samples_per_second": 923.345, | |
| "eval_steps_per_second": 1.2, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 1.9065855741500854, | |
| "eval_runtime": 11.0817, | |
| "eval_samples_per_second": 902.388, | |
| "eval_steps_per_second": 1.173, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 1.8849406242370605, | |
| "eval_runtime": 10.8362, | |
| "eval_samples_per_second": 922.835, | |
| "eval_steps_per_second": 1.2, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 1.7817472219467163, | |
| "eval_runtime": 10.8435, | |
| "eval_samples_per_second": 922.207, | |
| "eval_steps_per_second": 1.199, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 1.7877730131149292, | |
| "eval_runtime": 10.9315, | |
| "eval_samples_per_second": 914.788, | |
| "eval_steps_per_second": 1.189, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 1.7331796884536743, | |
| "eval_runtime": 10.8485, | |
| "eval_samples_per_second": 921.788, | |
| "eval_steps_per_second": 1.198, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 1.692647099494934, | |
| "eval_runtime": 11.0859, | |
| "eval_samples_per_second": 902.043, | |
| "eval_steps_per_second": 1.173, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 1.6727197170257568, | |
| "eval_runtime": 10.8467, | |
| "eval_samples_per_second": 921.936, | |
| "eval_steps_per_second": 1.199, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 1.66093909740448, | |
| "eval_runtime": 10.8432, | |
| "eval_samples_per_second": 922.235, | |
| "eval_steps_per_second": 1.199, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 1.6616569757461548, | |
| "eval_runtime": 10.9265, | |
| "eval_samples_per_second": 915.204, | |
| "eval_steps_per_second": 1.19, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 1.6537153720855713, | |
| "eval_runtime": 10.8532, | |
| "eval_samples_per_second": 921.386, | |
| "eval_steps_per_second": 1.198, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 1.645085334777832, | |
| "eval_runtime": 11.0411, | |
| "eval_samples_per_second": 905.709, | |
| "eval_steps_per_second": 1.177, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 1.6413605213165283, | |
| "eval_runtime": 10.7889, | |
| "eval_samples_per_second": 926.88, | |
| "eval_steps_per_second": 1.205, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 1.635939598083496, | |
| "eval_runtime": 10.8026, | |
| "eval_samples_per_second": 925.7, | |
| "eval_steps_per_second": 1.203, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 1.6321375370025635, | |
| "eval_runtime": 10.9127, | |
| "eval_samples_per_second": 916.367, | |
| "eval_steps_per_second": 1.191, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 1.6276355981826782, | |
| "eval_runtime": 10.8251, | |
| "eval_samples_per_second": 923.783, | |
| "eval_steps_per_second": 1.201, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 1.6232202053070068, | |
| "eval_runtime": 11.0643, | |
| "eval_samples_per_second": 903.805, | |
| "eval_steps_per_second": 1.175, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 1.618307113647461, | |
| "eval_runtime": 10.8447, | |
| "eval_samples_per_second": 922.106, | |
| "eval_steps_per_second": 1.199, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 1.612666368484497, | |
| "eval_runtime": 10.8371, | |
| "eval_samples_per_second": 922.759, | |
| "eval_steps_per_second": 1.2, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 1.6067509651184082, | |
| "eval_runtime": 10.9144, | |
| "eval_samples_per_second": 916.219, | |
| "eval_steps_per_second": 1.191, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 1.5994765758514404, | |
| "eval_runtime": 10.8587, | |
| "eval_samples_per_second": 920.924, | |
| "eval_steps_per_second": 1.197, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 1.590742588043213, | |
| "eval_runtime": 11.0839, | |
| "eval_samples_per_second": 902.211, | |
| "eval_steps_per_second": 1.173, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 1.5813907384872437, | |
| "eval_runtime": 10.8372, | |
| "eval_samples_per_second": 922.744, | |
| "eval_steps_per_second": 1.2, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 1.570920467376709, | |
| "eval_runtime": 10.8433, | |
| "eval_samples_per_second": 922.23, | |
| "eval_steps_per_second": 1.199, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 1.5595602989196777, | |
| "eval_runtime": 10.9304, | |
| "eval_samples_per_second": 914.879, | |
| "eval_steps_per_second": 1.189, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 1.5501924753189087, | |
| "eval_runtime": 10.8429, | |
| "eval_samples_per_second": 922.259, | |
| "eval_steps_per_second": 1.199, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 1.5431841611862183, | |
| "eval_runtime": 11.1003, | |
| "eval_samples_per_second": 900.873, | |
| "eval_steps_per_second": 1.171, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 1.5323028564453125, | |
| "eval_runtime": 10.8468, | |
| "eval_samples_per_second": 921.928, | |
| "eval_steps_per_second": 1.199, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 1.5221501588821411, | |
| "eval_runtime": 10.8395, | |
| "eval_samples_per_second": 922.551, | |
| "eval_steps_per_second": 1.199, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 1.5130882263183594, | |
| "eval_runtime": 10.9396, | |
| "eval_samples_per_second": 914.108, | |
| "eval_steps_per_second": 1.188, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_loss": 1.5008268356323242, | |
| "eval_runtime": 10.8617, | |
| "eval_samples_per_second": 920.668, | |
| "eval_steps_per_second": 1.197, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 1.487107515335083, | |
| "eval_runtime": 11.0741, | |
| "eval_samples_per_second": 903.011, | |
| "eval_steps_per_second": 1.174, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_loss": 1.4803069829940796, | |
| "eval_runtime": 10.8489, | |
| "eval_samples_per_second": 921.753, | |
| "eval_steps_per_second": 1.198, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 1.4744497537612915, | |
| "eval_runtime": 10.8329, | |
| "eval_samples_per_second": 923.112, | |
| "eval_steps_per_second": 1.2, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_loss": 1.4682186841964722, | |
| "eval_runtime": 11.0865, | |
| "eval_samples_per_second": 902.0, | |
| "eval_steps_per_second": 1.173, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 1.4509010314941406, | |
| "eval_runtime": 10.862, | |
| "eval_samples_per_second": 920.64, | |
| "eval_steps_per_second": 1.197, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_loss": 1.4542888402938843, | |
| "eval_runtime": 11.0884, | |
| "eval_samples_per_second": 901.845, | |
| "eval_steps_per_second": 1.172, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 1.455941081047058, | |
| "eval_runtime": 10.8598, | |
| "eval_samples_per_second": 920.824, | |
| "eval_steps_per_second": 1.197, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_loss": 1.4334802627563477, | |
| "eval_runtime": 10.8586, | |
| "eval_samples_per_second": 920.928, | |
| "eval_steps_per_second": 1.197, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_loss": 1.4138292074203491, | |
| "eval_runtime": 10.87, | |
| "eval_samples_per_second": 919.964, | |
| "eval_steps_per_second": 1.196, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_loss": 1.4225151538848877, | |
| "eval_runtime": 10.8028, | |
| "eval_samples_per_second": 925.687, | |
| "eval_steps_per_second": 1.203, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_loss": 1.4350632429122925, | |
| "eval_runtime": 11.0684, | |
| "eval_samples_per_second": 903.472, | |
| "eval_steps_per_second": 1.175, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_loss": 1.3809276819229126, | |
| "eval_runtime": 10.7633, | |
| "eval_samples_per_second": 929.082, | |
| "eval_steps_per_second": 1.208, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_loss": 1.370450735092163, | |
| "eval_runtime": 10.785, | |
| "eval_samples_per_second": 927.215, | |
| "eval_steps_per_second": 1.205, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_loss": 1.3955777883529663, | |
| "eval_runtime": 10.895, | |
| "eval_samples_per_second": 917.848, | |
| "eval_steps_per_second": 1.193, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_loss": 1.368485927581787, | |
| "eval_runtime": 10.819, | |
| "eval_samples_per_second": 924.297, | |
| "eval_steps_per_second": 1.202, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_loss": 1.33143949508667, | |
| "eval_runtime": 10.8396, | |
| "eval_samples_per_second": 922.543, | |
| "eval_steps_per_second": 1.199, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_loss": 1.3212394714355469, | |
| "eval_runtime": 10.8092, | |
| "eval_samples_per_second": 925.139, | |
| "eval_steps_per_second": 1.203, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_loss": 1.3334152698516846, | |
| "eval_runtime": 10.9031, | |
| "eval_samples_per_second": 917.174, | |
| "eval_steps_per_second": 1.192, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_loss": 1.3178024291992188, | |
| "eval_runtime": 10.8916, | |
| "eval_samples_per_second": 918.135, | |
| "eval_steps_per_second": 1.194, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_loss": 1.2850910425186157, | |
| "eval_runtime": 10.7826, | |
| "eval_samples_per_second": 927.417, | |
| "eval_steps_per_second": 1.206, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_loss": 1.2826738357543945, | |
| "eval_runtime": 11.0431, | |
| "eval_samples_per_second": 905.544, | |
| "eval_steps_per_second": 1.177, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_loss": 1.276419758796692, | |
| "eval_runtime": 10.8459, | |
| "eval_samples_per_second": 922.009, | |
| "eval_steps_per_second": 1.199, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_loss": 1.2498911619186401, | |
| "eval_runtime": 10.8147, | |
| "eval_samples_per_second": 924.671, | |
| "eval_steps_per_second": 1.202, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "eval_loss": 1.2304234504699707, | |
| "eval_runtime": 10.9437, | |
| "eval_samples_per_second": 913.768, | |
| "eval_steps_per_second": 1.188, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_loss": 1.2450603246688843, | |
| "eval_runtime": 10.8378, | |
| "eval_samples_per_second": 922.698, | |
| "eval_steps_per_second": 1.2, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "eval_loss": 1.2278028726577759, | |
| "eval_runtime": 11.123, | |
| "eval_samples_per_second": 899.041, | |
| "eval_steps_per_second": 1.169, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_loss": 1.2043875455856323, | |
| "eval_runtime": 10.7963, | |
| "eval_samples_per_second": 926.245, | |
| "eval_steps_per_second": 1.204, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "eval_loss": 1.199507236480713, | |
| "eval_runtime": 10.8038, | |
| "eval_samples_per_second": 925.601, | |
| "eval_steps_per_second": 1.203, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_loss": 1.1909499168395996, | |
| "eval_runtime": 10.9182, | |
| "eval_samples_per_second": 915.902, | |
| "eval_steps_per_second": 1.191, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "eval_loss": 1.172565221786499, | |
| "eval_runtime": 10.8472, | |
| "eval_samples_per_second": 921.898, | |
| "eval_steps_per_second": 1.198, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_loss": 1.1473827362060547, | |
| "eval_runtime": 11.0541, | |
| "eval_samples_per_second": 904.638, | |
| "eval_steps_per_second": 1.176, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_loss": 1.1195847988128662, | |
| "eval_runtime": 10.924, | |
| "eval_samples_per_second": 915.419, | |
| "eval_steps_per_second": 1.19, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_loss": 1.1185705661773682, | |
| "eval_runtime": 10.8085, | |
| "eval_samples_per_second": 925.199, | |
| "eval_steps_per_second": 1.203, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "eval_loss": 1.1256279945373535, | |
| "eval_runtime": 11.0649, | |
| "eval_samples_per_second": 903.755, | |
| "eval_steps_per_second": 1.175, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_loss": 1.0834866762161255, | |
| "eval_runtime": 10.8281, | |
| "eval_samples_per_second": 923.523, | |
| "eval_steps_per_second": 1.201, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "eval_loss": 1.0588449239730835, | |
| "eval_runtime": 10.7832, | |
| "eval_samples_per_second": 927.372, | |
| "eval_steps_per_second": 1.206, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_loss": 1.0453879833221436, | |
| "eval_runtime": 10.8835, | |
| "eval_samples_per_second": 918.819, | |
| "eval_steps_per_second": 1.194, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "eval_loss": 1.0334476232528687, | |
| "eval_runtime": 10.8064, | |
| "eval_samples_per_second": 925.376, | |
| "eval_steps_per_second": 1.203, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_loss": 1.028348684310913, | |
| "eval_runtime": 10.9037, | |
| "eval_samples_per_second": 917.121, | |
| "eval_steps_per_second": 1.192, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "eval_loss": 1.0091253519058228, | |
| "eval_runtime": 10.8016, | |
| "eval_samples_per_second": 925.788, | |
| "eval_steps_per_second": 1.204, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_loss": 0.9820342659950256, | |
| "eval_runtime": 11.0485, | |
| "eval_samples_per_second": 905.101, | |
| "eval_steps_per_second": 1.177, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "eval_loss": 0.9488591551780701, | |
| "eval_runtime": 10.8216, | |
| "eval_samples_per_second": 924.074, | |
| "eval_steps_per_second": 1.201, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_loss": 0.9406836032867432, | |
| "eval_runtime": 10.8253, | |
| "eval_samples_per_second": 923.765, | |
| "eval_steps_per_second": 1.201, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "eval_loss": 0.9392226338386536, | |
| "eval_runtime": 10.9531, | |
| "eval_samples_per_second": 912.984, | |
| "eval_steps_per_second": 1.187, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_loss": 0.9232249855995178, | |
| "eval_runtime": 10.835, | |
| "eval_samples_per_second": 922.938, | |
| "eval_steps_per_second": 1.2, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "eval_loss": 0.902049720287323, | |
| "eval_runtime": 10.915, | |
| "eval_samples_per_second": 916.173, | |
| "eval_steps_per_second": 1.191, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_loss": 0.8934366106987, | |
| "eval_runtime": 10.833, | |
| "eval_samples_per_second": 923.105, | |
| "eval_steps_per_second": 1.2, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "eval_loss": 0.8797369003295898, | |
| "eval_runtime": 10.8275, | |
| "eval_samples_per_second": 923.576, | |
| "eval_steps_per_second": 1.201, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_loss": 0.8834591507911682, | |
| "eval_runtime": 10.8227, | |
| "eval_samples_per_second": 923.98, | |
| "eval_steps_per_second": 1.201, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "eval_loss": 0.8583576083183289, | |
| "eval_runtime": 10.8262, | |
| "eval_samples_per_second": 923.681, | |
| "eval_steps_per_second": 1.201, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_loss": 0.8160658478736877, | |
| "eval_runtime": 10.8597, | |
| "eval_samples_per_second": 920.834, | |
| "eval_steps_per_second": 1.197, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "eval_loss": 0.7998712658882141, | |
| "eval_runtime": 10.7874, | |
| "eval_samples_per_second": 927.007, | |
| "eval_steps_per_second": 1.205, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_loss": 0.8268041014671326, | |
| "eval_runtime": 11.0529, | |
| "eval_samples_per_second": 904.74, | |
| "eval_steps_per_second": 1.176, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 97.0, | |
| "eval_loss": 0.8250266313552856, | |
| "eval_runtime": 10.8021, | |
| "eval_samples_per_second": 925.746, | |
| "eval_steps_per_second": 1.203, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "eval_loss": 0.7785258293151855, | |
| "eval_runtime": 10.7519, | |
| "eval_samples_per_second": 930.071, | |
| "eval_steps_per_second": 1.209, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 99.0, | |
| "eval_loss": 0.7797490358352661, | |
| "eval_runtime": 10.8842, | |
| "eval_samples_per_second": 918.761, | |
| "eval_steps_per_second": 1.194, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_loss": 0.7988857626914978, | |
| "eval_runtime": 10.8165, | |
| "eval_samples_per_second": 924.512, | |
| "eval_steps_per_second": 1.202, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 101.0, | |
| "eval_loss": 0.7674239277839661, | |
| "eval_runtime": 10.8984, | |
| "eval_samples_per_second": 917.569, | |
| "eval_steps_per_second": 1.193, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 102.0, | |
| "eval_loss": 0.7283704280853271, | |
| "eval_runtime": 10.7882, | |
| "eval_samples_per_second": 926.941, | |
| "eval_steps_per_second": 1.205, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 103.0, | |
| "eval_loss": 0.7265847325325012, | |
| "eval_runtime": 10.7846, | |
| "eval_samples_per_second": 927.248, | |
| "eval_steps_per_second": 1.205, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "eval_loss": 0.7218329906463623, | |
| "eval_runtime": 10.8432, | |
| "eval_samples_per_second": 922.237, | |
| "eval_steps_per_second": 1.199, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 105.0, | |
| "eval_loss": 0.7195408344268799, | |
| "eval_runtime": 10.775, | |
| "eval_samples_per_second": 928.073, | |
| "eval_steps_per_second": 1.206, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 106.0, | |
| "eval_loss": 0.720020592212677, | |
| "eval_runtime": 11.033, | |
| "eval_samples_per_second": 906.372, | |
| "eval_steps_per_second": 1.178, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 107.0, | |
| "eval_loss": 0.7001694440841675, | |
| "eval_runtime": 10.7937, | |
| "eval_samples_per_second": 926.47, | |
| "eval_steps_per_second": 1.204, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 108.0, | |
| "eval_loss": 0.6704011559486389, | |
| "eval_runtime": 10.9903, | |
| "eval_samples_per_second": 909.893, | |
| "eval_steps_per_second": 1.183, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 109.0, | |
| "eval_loss": 0.6617050766944885, | |
| "eval_runtime": 11.0129, | |
| "eval_samples_per_second": 908.026, | |
| "eval_steps_per_second": 1.18, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 110.0, | |
| "eval_loss": 0.6687906384468079, | |
| "eval_runtime": 10.7417, | |
| "eval_samples_per_second": 930.948, | |
| "eval_steps_per_second": 1.21, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 111.0, | |
| "eval_loss": 0.6637664437294006, | |
| "eval_runtime": 10.8682, | |
| "eval_samples_per_second": 920.118, | |
| "eval_steps_per_second": 1.196, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 112.0, | |
| "eval_loss": 0.6454914212226868, | |
| "eval_runtime": 10.7981, | |
| "eval_samples_per_second": 926.087, | |
| "eval_steps_per_second": 1.204, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 113.0, | |
| "eval_loss": 0.6308099627494812, | |
| "eval_runtime": 10.8495, | |
| "eval_samples_per_second": 921.699, | |
| "eval_steps_per_second": 1.198, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 114.0, | |
| "eval_loss": 0.6411617398262024, | |
| "eval_runtime": 10.9089, | |
| "eval_samples_per_second": 916.679, | |
| "eval_steps_per_second": 1.192, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 115.0, | |
| "eval_loss": 0.6422205567359924, | |
| "eval_runtime": 10.7825, | |
| "eval_samples_per_second": 927.428, | |
| "eval_steps_per_second": 1.206, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 116.0, | |
| "eval_loss": 0.6265988349914551, | |
| "eval_runtime": 10.865, | |
| "eval_samples_per_second": 920.387, | |
| "eval_steps_per_second": 1.197, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 117.0, | |
| "eval_loss": 0.615440845489502, | |
| "eval_runtime": 10.8031, | |
| "eval_samples_per_second": 925.657, | |
| "eval_steps_per_second": 1.203, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 118.0, | |
| "eval_loss": 0.6053263545036316, | |
| "eval_runtime": 10.8755, | |
| "eval_samples_per_second": 919.496, | |
| "eval_steps_per_second": 1.195, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 119.0, | |
| "eval_loss": 0.6083167791366577, | |
| "eval_runtime": 10.861, | |
| "eval_samples_per_second": 920.722, | |
| "eval_steps_per_second": 1.197, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "eval_loss": 0.607414960861206, | |
| "eval_runtime": 10.7885, | |
| "eval_samples_per_second": 926.909, | |
| "eval_steps_per_second": 1.205, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 121.0, | |
| "eval_loss": 0.6041896343231201, | |
| "eval_runtime": 10.8677, | |
| "eval_samples_per_second": 920.161, | |
| "eval_steps_per_second": 1.196, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 122.0, | |
| "eval_loss": 0.5942515134811401, | |
| "eval_runtime": 10.786, | |
| "eval_samples_per_second": 927.126, | |
| "eval_steps_per_second": 1.205, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 123.0, | |
| "eval_loss": 0.5849249958992004, | |
| "eval_runtime": 10.8776, | |
| "eval_samples_per_second": 919.318, | |
| "eval_steps_per_second": 1.195, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 124.0, | |
| "eval_loss": 0.5770248770713806, | |
| "eval_runtime": 10.8665, | |
| "eval_samples_per_second": 920.261, | |
| "eval_steps_per_second": 1.196, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 125.0, | |
| "eval_loss": 0.5670948028564453, | |
| "eval_runtime": 10.7839, | |
| "eval_samples_per_second": 927.305, | |
| "eval_steps_per_second": 1.205, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 126.0, | |
| "eval_loss": 0.5637474656105042, | |
| "eval_runtime": 10.8663, | |
| "eval_samples_per_second": 920.276, | |
| "eval_steps_per_second": 1.196, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 127.0, | |
| "eval_loss": 0.5640723705291748, | |
| "eval_runtime": 10.7903, | |
| "eval_samples_per_second": 926.754, | |
| "eval_steps_per_second": 1.205, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 128.0, | |
| "eval_loss": 0.5665525197982788, | |
| "eval_runtime": 10.8832, | |
| "eval_samples_per_second": 918.844, | |
| "eval_steps_per_second": 1.194, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 129.0, | |
| "eval_loss": 0.5674743056297302, | |
| "eval_runtime": 10.8693, | |
| "eval_samples_per_second": 920.019, | |
| "eval_steps_per_second": 1.196, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 130.0, | |
| "eval_loss": 0.558768630027771, | |
| "eval_runtime": 10.7538, | |
| "eval_samples_per_second": 929.908, | |
| "eval_steps_per_second": 1.209, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 131.0, | |
| "eval_loss": 0.5466377139091492, | |
| "eval_runtime": 10.8966, | |
| "eval_samples_per_second": 917.721, | |
| "eval_steps_per_second": 1.193, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 132.0, | |
| "eval_loss": 0.5402641296386719, | |
| "eval_runtime": 10.8007, | |
| "eval_samples_per_second": 925.864, | |
| "eval_steps_per_second": 1.204, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 133.0, | |
| "eval_loss": 0.540850043296814, | |
| "eval_runtime": 10.8814, | |
| "eval_samples_per_second": 918.996, | |
| "eval_steps_per_second": 1.195, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 134.0, | |
| "eval_loss": 0.5402743220329285, | |
| "eval_runtime": 10.8778, | |
| "eval_samples_per_second": 919.303, | |
| "eval_steps_per_second": 1.195, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 135.0, | |
| "eval_loss": 0.5400083065032959, | |
| "eval_runtime": 10.7631, | |
| "eval_samples_per_second": 929.098, | |
| "eval_steps_per_second": 1.208, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 136.0, | |
| "eval_loss": 0.5397944450378418, | |
| "eval_runtime": 10.8575, | |
| "eval_samples_per_second": 921.018, | |
| "eval_steps_per_second": 1.197, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 137.0, | |
| "eval_loss": 0.5391473770141602, | |
| "eval_runtime": 10.8073, | |
| "eval_samples_per_second": 925.299, | |
| "eval_steps_per_second": 1.203, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 138.0, | |
| "eval_loss": 0.5366548895835876, | |
| "eval_runtime": 10.8743, | |
| "eval_samples_per_second": 919.601, | |
| "eval_steps_per_second": 1.195, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 139.0, | |
| "eval_loss": 0.5325959920883179, | |
| "eval_runtime": 10.8603, | |
| "eval_samples_per_second": 920.781, | |
| "eval_steps_per_second": 1.197, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 140.0, | |
| "eval_loss": 0.5299940705299377, | |
| "eval_runtime": 10.7821, | |
| "eval_samples_per_second": 927.467, | |
| "eval_steps_per_second": 1.206, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 141.0, | |
| "eval_loss": 0.5305985808372498, | |
| "eval_runtime": 10.8496, | |
| "eval_samples_per_second": 921.697, | |
| "eval_steps_per_second": 1.198, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 142.0, | |
| "eval_loss": 0.5292918682098389, | |
| "eval_runtime": 10.7928, | |
| "eval_samples_per_second": 926.542, | |
| "eval_steps_per_second": 1.205, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 143.0, | |
| "eval_loss": 0.5267909169197083, | |
| "eval_runtime": 10.8928, | |
| "eval_samples_per_second": 918.04, | |
| "eval_steps_per_second": 1.193, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 144.0, | |
| "eval_loss": 0.523544430732727, | |
| "eval_runtime": 10.8792, | |
| "eval_samples_per_second": 919.184, | |
| "eval_steps_per_second": 1.195, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 145.0, | |
| "eval_loss": 0.5205994248390198, | |
| "eval_runtime": 10.7887, | |
| "eval_samples_per_second": 926.892, | |
| "eval_steps_per_second": 1.205, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 146.0, | |
| "eval_loss": 0.519413411617279, | |
| "eval_runtime": 10.8656, | |
| "eval_samples_per_second": 920.336, | |
| "eval_steps_per_second": 1.196, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 147.0, | |
| "eval_loss": 0.519058108329773, | |
| "eval_runtime": 10.7991, | |
| "eval_samples_per_second": 926.005, | |
| "eval_steps_per_second": 1.204, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 148.0, | |
| "eval_loss": 0.5188890099525452, | |
| "eval_runtime": 10.8757, | |
| "eval_samples_per_second": 919.48, | |
| "eval_steps_per_second": 1.195, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 149.0, | |
| "eval_loss": 0.5188961625099182, | |
| "eval_runtime": 10.8633, | |
| "eval_samples_per_second": 920.533, | |
| "eval_steps_per_second": 1.197, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 150.0, | |
| "eval_loss": 0.5188751220703125, | |
| "eval_runtime": 10.8003, | |
| "eval_samples_per_second": 925.904, | |
| "eval_steps_per_second": 1.204, | |
| "step": 450 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 450, | |
| "num_train_epochs": 150, | |
| "save_steps": 500, | |
| "total_flos": 8613277286400000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |