hp_ablations_mistral_epoch2 / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 1
a05867f verified
{"current_steps": 10, "total_steps": 986, "loss": 0.7568, "lr": 5e-06, "epoch": 0.020253164556962026, "percentage": 1.01, "elapsed_time": "0:09:22", "remaining_time": "15:15:25"}
{"current_steps": 20, "total_steps": 986, "loss": 0.6504, "lr": 5e-06, "epoch": 0.04050632911392405, "percentage": 2.03, "elapsed_time": "0:18:42", "remaining_time": "15:03:32"}
{"current_steps": 30, "total_steps": 986, "loss": 0.6288, "lr": 5e-06, "epoch": 0.060759493670886074, "percentage": 3.04, "elapsed_time": "0:28:01", "remaining_time": "14:53:00"}
{"current_steps": 40, "total_steps": 986, "loss": 0.6134, "lr": 5e-06, "epoch": 0.0810126582278481, "percentage": 4.06, "elapsed_time": "0:37:19", "remaining_time": "14:42:48"}
{"current_steps": 50, "total_steps": 986, "loss": 0.6037, "lr": 5e-06, "epoch": 0.10126582278481013, "percentage": 5.07, "elapsed_time": "0:46:36", "remaining_time": "14:32:39"}
{"current_steps": 60, "total_steps": 986, "loss": 0.5983, "lr": 5e-06, "epoch": 0.12151898734177215, "percentage": 6.09, "elapsed_time": "0:55:55", "remaining_time": "14:23:05"}
{"current_steps": 70, "total_steps": 986, "loss": 0.5938, "lr": 5e-06, "epoch": 0.14177215189873418, "percentage": 7.1, "elapsed_time": "1:05:14", "remaining_time": "14:13:46"}
{"current_steps": 80, "total_steps": 986, "loss": 0.5882, "lr": 5e-06, "epoch": 0.1620253164556962, "percentage": 8.11, "elapsed_time": "1:14:32", "remaining_time": "14:04:12"}
{"current_steps": 90, "total_steps": 986, "loss": 0.5863, "lr": 5e-06, "epoch": 0.18227848101265823, "percentage": 9.13, "elapsed_time": "1:23:51", "remaining_time": "13:54:56"}
{"current_steps": 100, "total_steps": 986, "loss": 0.5888, "lr": 5e-06, "epoch": 0.20253164556962025, "percentage": 10.14, "elapsed_time": "1:33:10", "remaining_time": "13:45:35"}
{"current_steps": 110, "total_steps": 986, "loss": 0.5809, "lr": 5e-06, "epoch": 0.22278481012658227, "percentage": 11.16, "elapsed_time": "1:42:29", "remaining_time": "13:36:14"}
{"current_steps": 120, "total_steps": 986, "loss": 0.5788, "lr": 5e-06, "epoch": 0.2430379746835443, "percentage": 12.17, "elapsed_time": "1:51:48", "remaining_time": "13:26:51"}
{"current_steps": 130, "total_steps": 986, "loss": 0.5777, "lr": 5e-06, "epoch": 0.26329113924050634, "percentage": 13.18, "elapsed_time": "2:01:07", "remaining_time": "13:17:34"}
{"current_steps": 140, "total_steps": 986, "loss": 0.5672, "lr": 5e-06, "epoch": 0.28354430379746837, "percentage": 14.2, "elapsed_time": "2:10:26", "remaining_time": "13:08:16"}
{"current_steps": 150, "total_steps": 986, "loss": 0.5793, "lr": 5e-06, "epoch": 0.3037974683544304, "percentage": 15.21, "elapsed_time": "2:19:44", "remaining_time": "12:58:50"}
{"current_steps": 160, "total_steps": 986, "loss": 0.5788, "lr": 5e-06, "epoch": 0.3240506329113924, "percentage": 16.23, "elapsed_time": "2:29:02", "remaining_time": "12:49:26"}
{"current_steps": 170, "total_steps": 986, "loss": 0.5794, "lr": 5e-06, "epoch": 0.34430379746835443, "percentage": 17.24, "elapsed_time": "2:38:20", "remaining_time": "12:40:03"}
{"current_steps": 180, "total_steps": 986, "loss": 0.5742, "lr": 5e-06, "epoch": 0.36455696202531646, "percentage": 18.26, "elapsed_time": "2:47:39", "remaining_time": "12:30:45"}
{"current_steps": 190, "total_steps": 986, "loss": 0.5742, "lr": 5e-06, "epoch": 0.3848101265822785, "percentage": 19.27, "elapsed_time": "2:56:58", "remaining_time": "12:21:26"}
{"current_steps": 200, "total_steps": 986, "loss": 0.5695, "lr": 5e-06, "epoch": 0.4050632911392405, "percentage": 20.28, "elapsed_time": "3:06:17", "remaining_time": "12:12:06"}
{"current_steps": 210, "total_steps": 986, "loss": 0.5649, "lr": 5e-06, "epoch": 0.4253164556962025, "percentage": 21.3, "elapsed_time": "3:15:34", "remaining_time": "12:02:43"}
{"current_steps": 220, "total_steps": 986, "loss": 0.5692, "lr": 5e-06, "epoch": 0.44556962025316454, "percentage": 22.31, "elapsed_time": "3:24:53", "remaining_time": "11:53:24"}
{"current_steps": 230, "total_steps": 986, "loss": 0.5682, "lr": 5e-06, "epoch": 0.46582278481012657, "percentage": 23.33, "elapsed_time": "3:34:11", "remaining_time": "11:44:03"}
{"current_steps": 240, "total_steps": 986, "loss": 0.5643, "lr": 5e-06, "epoch": 0.4860759493670886, "percentage": 24.34, "elapsed_time": "3:43:29", "remaining_time": "11:34:40"}
{"current_steps": 250, "total_steps": 986, "loss": 0.5678, "lr": 5e-06, "epoch": 0.5063291139240507, "percentage": 25.35, "elapsed_time": "3:52:47", "remaining_time": "11:25:19"}
{"current_steps": 260, "total_steps": 986, "loss": 0.5548, "lr": 5e-06, "epoch": 0.5265822784810127, "percentage": 26.37, "elapsed_time": "4:02:04", "remaining_time": "11:15:57"}
{"current_steps": 270, "total_steps": 986, "loss": 0.5644, "lr": 5e-06, "epoch": 0.5468354430379747, "percentage": 27.38, "elapsed_time": "4:11:23", "remaining_time": "11:06:38"}
{"current_steps": 280, "total_steps": 986, "loss": 0.5593, "lr": 5e-06, "epoch": 0.5670886075949367, "percentage": 28.4, "elapsed_time": "4:20:41", "remaining_time": "10:57:17"}
{"current_steps": 290, "total_steps": 986, "loss": 0.5586, "lr": 5e-06, "epoch": 0.5873417721518988, "percentage": 29.41, "elapsed_time": "4:29:58", "remaining_time": "10:47:56"}
{"current_steps": 300, "total_steps": 986, "loss": 0.5582, "lr": 5e-06, "epoch": 0.6075949367088608, "percentage": 30.43, "elapsed_time": "4:39:16", "remaining_time": "10:38:35"}
{"current_steps": 310, "total_steps": 986, "loss": 0.5637, "lr": 5e-06, "epoch": 0.6278481012658228, "percentage": 31.44, "elapsed_time": "4:48:33", "remaining_time": "10:29:14"}
{"current_steps": 320, "total_steps": 986, "loss": 0.5599, "lr": 5e-06, "epoch": 0.6481012658227848, "percentage": 32.45, "elapsed_time": "4:57:50", "remaining_time": "10:19:52"}
{"current_steps": 330, "total_steps": 986, "loss": 0.5602, "lr": 5e-06, "epoch": 0.6683544303797468, "percentage": 33.47, "elapsed_time": "5:07:07", "remaining_time": "10:10:32"}
{"current_steps": 340, "total_steps": 986, "loss": 0.5604, "lr": 5e-06, "epoch": 0.6886075949367089, "percentage": 34.48, "elapsed_time": "5:16:25", "remaining_time": "10:01:12"}
{"current_steps": 350, "total_steps": 986, "loss": 0.5616, "lr": 5e-06, "epoch": 0.7088607594936709, "percentage": 35.5, "elapsed_time": "5:25:44", "remaining_time": "9:51:54"}
{"current_steps": 360, "total_steps": 986, "loss": 0.5636, "lr": 5e-06, "epoch": 0.7291139240506329, "percentage": 36.51, "elapsed_time": "5:35:02", "remaining_time": "9:42:36"}
{"current_steps": 370, "total_steps": 986, "loss": 0.5562, "lr": 5e-06, "epoch": 0.7493670886075949, "percentage": 37.53, "elapsed_time": "5:44:21", "remaining_time": "9:33:17"}
{"current_steps": 380, "total_steps": 986, "loss": 0.5527, "lr": 5e-06, "epoch": 0.769620253164557, "percentage": 38.54, "elapsed_time": "5:53:38", "remaining_time": "9:23:57"}
{"current_steps": 390, "total_steps": 986, "loss": 0.5555, "lr": 5e-06, "epoch": 0.789873417721519, "percentage": 39.55, "elapsed_time": "6:02:56", "remaining_time": "9:14:38"}
{"current_steps": 400, "total_steps": 986, "loss": 0.556, "lr": 5e-06, "epoch": 0.810126582278481, "percentage": 40.57, "elapsed_time": "6:12:14", "remaining_time": "9:05:20"}
{"current_steps": 410, "total_steps": 986, "loss": 0.5502, "lr": 5e-06, "epoch": 0.830379746835443, "percentage": 41.58, "elapsed_time": "6:21:34", "remaining_time": "8:56:03"}
{"current_steps": 420, "total_steps": 986, "loss": 0.5581, "lr": 5e-06, "epoch": 0.850632911392405, "percentage": 42.6, "elapsed_time": "6:30:52", "remaining_time": "8:46:45"}
{"current_steps": 430, "total_steps": 986, "loss": 0.5517, "lr": 5e-06, "epoch": 0.8708860759493671, "percentage": 43.61, "elapsed_time": "6:40:10", "remaining_time": "8:37:26"}
{"current_steps": 440, "total_steps": 986, "loss": 0.5573, "lr": 5e-06, "epoch": 0.8911392405063291, "percentage": 44.62, "elapsed_time": "6:49:28", "remaining_time": "8:28:07"}
{"current_steps": 450, "total_steps": 986, "loss": 0.5485, "lr": 5e-06, "epoch": 0.9113924050632911, "percentage": 45.64, "elapsed_time": "6:58:47", "remaining_time": "8:18:50"}
{"current_steps": 460, "total_steps": 986, "loss": 0.5527, "lr": 5e-06, "epoch": 0.9316455696202531, "percentage": 46.65, "elapsed_time": "7:08:06", "remaining_time": "8:09:31"}
{"current_steps": 470, "total_steps": 986, "loss": 0.5536, "lr": 5e-06, "epoch": 0.9518987341772152, "percentage": 47.67, "elapsed_time": "7:17:25", "remaining_time": "8:00:13"}
{"current_steps": 480, "total_steps": 986, "loss": 0.5506, "lr": 5e-06, "epoch": 0.9721518987341772, "percentage": 48.68, "elapsed_time": "7:26:43", "remaining_time": "7:50:55"}
{"current_steps": 490, "total_steps": 986, "loss": 0.5472, "lr": 5e-06, "epoch": 0.9924050632911392, "percentage": 49.7, "elapsed_time": "7:36:01", "remaining_time": "7:41:36"}
{"current_steps": 493, "total_steps": 986, "eval_loss": 0.06877367943525314, "epoch": 0.9984810126582279, "percentage": 50.0, "elapsed_time": "7:48:00", "remaining_time": "7:48:00"}
{"current_steps": 500, "total_steps": 986, "loss": 0.5062, "lr": 5e-06, "epoch": 1.0126582278481013, "percentage": 50.71, "elapsed_time": "7:54:55", "remaining_time": "7:41:38"}
{"current_steps": 510, "total_steps": 986, "loss": 0.4713, "lr": 5e-06, "epoch": 1.0329113924050632, "percentage": 51.72, "elapsed_time": "8:04:13", "remaining_time": "7:31:56"}
{"current_steps": 520, "total_steps": 986, "loss": 0.4665, "lr": 5e-06, "epoch": 1.0531645569620254, "percentage": 52.74, "elapsed_time": "8:13:31", "remaining_time": "7:22:16"}
{"current_steps": 530, "total_steps": 986, "loss": 0.4683, "lr": 5e-06, "epoch": 1.0734177215189873, "percentage": 53.75, "elapsed_time": "8:22:50", "remaining_time": "7:12:38"}
{"current_steps": 540, "total_steps": 986, "loss": 0.4641, "lr": 5e-06, "epoch": 1.0936708860759494, "percentage": 54.77, "elapsed_time": "8:32:10", "remaining_time": "7:03:01"}
{"current_steps": 550, "total_steps": 986, "loss": 0.4664, "lr": 5e-06, "epoch": 1.1139240506329113, "percentage": 55.78, "elapsed_time": "8:41:29", "remaining_time": "6:53:23"}
{"current_steps": 560, "total_steps": 986, "loss": 0.4633, "lr": 5e-06, "epoch": 1.1341772151898735, "percentage": 56.8, "elapsed_time": "8:50:48", "remaining_time": "6:43:47"}
{"current_steps": 570, "total_steps": 986, "loss": 0.4693, "lr": 5e-06, "epoch": 1.1544303797468354, "percentage": 57.81, "elapsed_time": "9:00:07", "remaining_time": "6:34:11"}
{"current_steps": 580, "total_steps": 986, "loss": 0.4677, "lr": 5e-06, "epoch": 1.1746835443037975, "percentage": 58.82, "elapsed_time": "9:09:26", "remaining_time": "6:24:36"}
{"current_steps": 590, "total_steps": 986, "loss": 0.4611, "lr": 5e-06, "epoch": 1.1949367088607594, "percentage": 59.84, "elapsed_time": "9:18:45", "remaining_time": "6:15:01"}
{"current_steps": 600, "total_steps": 986, "loss": 0.4683, "lr": 5e-06, "epoch": 1.2151898734177216, "percentage": 60.85, "elapsed_time": "9:28:04", "remaining_time": "6:05:27"}
{"current_steps": 610, "total_steps": 986, "loss": 0.4648, "lr": 5e-06, "epoch": 1.2354430379746835, "percentage": 61.87, "elapsed_time": "9:37:24", "remaining_time": "5:55:54"}
{"current_steps": 620, "total_steps": 986, "loss": 0.4691, "lr": 5e-06, "epoch": 1.2556962025316456, "percentage": 62.88, "elapsed_time": "9:46:43", "remaining_time": "5:46:21"}
{"current_steps": 630, "total_steps": 986, "loss": 0.4699, "lr": 5e-06, "epoch": 1.2759493670886077, "percentage": 63.89, "elapsed_time": "9:56:04", "remaining_time": "5:36:49"}
{"current_steps": 640, "total_steps": 986, "loss": 0.47, "lr": 5e-06, "epoch": 1.2962025316455696, "percentage": 64.91, "elapsed_time": "10:05:24", "remaining_time": "5:27:17"}
{"current_steps": 650, "total_steps": 986, "loss": 0.4747, "lr": 5e-06, "epoch": 1.3164556962025316, "percentage": 65.92, "elapsed_time": "10:14:44", "remaining_time": "5:17:46"}
{"current_steps": 660, "total_steps": 986, "loss": 0.4637, "lr": 5e-06, "epoch": 1.3367088607594937, "percentage": 66.94, "elapsed_time": "10:24:02", "remaining_time": "5:08:14"}
{"current_steps": 670, "total_steps": 986, "loss": 0.4768, "lr": 5e-06, "epoch": 1.3569620253164558, "percentage": 67.95, "elapsed_time": "10:33:20", "remaining_time": "4:58:42"}
{"current_steps": 680, "total_steps": 986, "loss": 0.4733, "lr": 5e-06, "epoch": 1.3772151898734177, "percentage": 68.97, "elapsed_time": "10:42:39", "remaining_time": "4:49:11"}
{"current_steps": 690, "total_steps": 986, "loss": 0.473, "lr": 5e-06, "epoch": 1.3974683544303796, "percentage": 69.98, "elapsed_time": "10:51:59", "remaining_time": "4:39:41"}
{"current_steps": 700, "total_steps": 986, "loss": 0.4712, "lr": 5e-06, "epoch": 1.4177215189873418, "percentage": 70.99, "elapsed_time": "11:01:17", "remaining_time": "4:30:10"}
{"current_steps": 710, "total_steps": 986, "loss": 0.477, "lr": 5e-06, "epoch": 1.437974683544304, "percentage": 72.01, "elapsed_time": "11:10:35", "remaining_time": "4:20:40"}
{"current_steps": 720, "total_steps": 986, "loss": 0.4772, "lr": 5e-06, "epoch": 1.4582278481012658, "percentage": 73.02, "elapsed_time": "11:19:54", "remaining_time": "4:11:11"}
{"current_steps": 730, "total_steps": 986, "loss": 0.4799, "lr": 5e-06, "epoch": 1.4784810126582277, "percentage": 74.04, "elapsed_time": "11:29:12", "remaining_time": "4:01:41"}
{"current_steps": 740, "total_steps": 986, "loss": 0.4728, "lr": 5e-06, "epoch": 1.4987341772151899, "percentage": 75.05, "elapsed_time": "11:38:31", "remaining_time": "3:52:12"}
{"current_steps": 750, "total_steps": 986, "loss": 0.4757, "lr": 5e-06, "epoch": 1.518987341772152, "percentage": 76.06, "elapsed_time": "11:47:50", "remaining_time": "3:42:44"}
{"current_steps": 760, "total_steps": 986, "loss": 0.4768, "lr": 5e-06, "epoch": 1.539240506329114, "percentage": 77.08, "elapsed_time": "11:57:09", "remaining_time": "3:33:15"}
{"current_steps": 770, "total_steps": 986, "loss": 0.4759, "lr": 5e-06, "epoch": 1.5594936708860758, "percentage": 78.09, "elapsed_time": "12:06:27", "remaining_time": "3:23:47"}
{"current_steps": 780, "total_steps": 986, "loss": 0.4766, "lr": 5e-06, "epoch": 1.579746835443038, "percentage": 79.11, "elapsed_time": "12:15:45", "remaining_time": "3:14:19"}
{"current_steps": 790, "total_steps": 986, "loss": 0.4789, "lr": 5e-06, "epoch": 1.6, "percentage": 80.12, "elapsed_time": "12:25:05", "remaining_time": "3:04:51"}
{"current_steps": 800, "total_steps": 986, "loss": 0.4736, "lr": 5e-06, "epoch": 1.620253164556962, "percentage": 81.14, "elapsed_time": "12:34:24", "remaining_time": "2:55:24"}
{"current_steps": 810, "total_steps": 986, "loss": 0.466, "lr": 5e-06, "epoch": 1.640506329113924, "percentage": 82.15, "elapsed_time": "12:43:43", "remaining_time": "2:45:56"}
{"current_steps": 820, "total_steps": 986, "loss": 0.4743, "lr": 5e-06, "epoch": 1.660759493670886, "percentage": 83.16, "elapsed_time": "12:53:02", "remaining_time": "2:36:29"}
{"current_steps": 830, "total_steps": 986, "loss": 0.4703, "lr": 5e-06, "epoch": 1.6810126582278482, "percentage": 84.18, "elapsed_time": "13:02:22", "remaining_time": "2:27:02"}
{"current_steps": 840, "total_steps": 986, "loss": 0.4711, "lr": 5e-06, "epoch": 1.70126582278481, "percentage": 85.19, "elapsed_time": "13:11:40", "remaining_time": "2:17:35"}
{"current_steps": 850, "total_steps": 986, "loss": 0.4734, "lr": 5e-06, "epoch": 1.721518987341772, "percentage": 86.21, "elapsed_time": "13:20:58", "remaining_time": "2:08:09"}
{"current_steps": 860, "total_steps": 986, "loss": 0.4733, "lr": 5e-06, "epoch": 1.7417721518987341, "percentage": 87.22, "elapsed_time": "13:30:18", "remaining_time": "1:58:43"}
{"current_steps": 870, "total_steps": 986, "loss": 0.4764, "lr": 5e-06, "epoch": 1.7620253164556963, "percentage": 88.24, "elapsed_time": "13:39:37", "remaining_time": "1:49:17"}
{"current_steps": 880, "total_steps": 986, "loss": 0.477, "lr": 5e-06, "epoch": 1.7822784810126582, "percentage": 89.25, "elapsed_time": "13:48:56", "remaining_time": "1:39:51"}
{"current_steps": 890, "total_steps": 986, "loss": 0.4741, "lr": 5e-06, "epoch": 1.80253164556962, "percentage": 90.26, "elapsed_time": "13:58:16", "remaining_time": "1:30:25"}
{"current_steps": 900, "total_steps": 986, "loss": 0.4744, "lr": 5e-06, "epoch": 1.8227848101265822, "percentage": 91.28, "elapsed_time": "14:07:35", "remaining_time": "1:20:59"}
{"current_steps": 910, "total_steps": 986, "loss": 0.477, "lr": 5e-06, "epoch": 1.8430379746835444, "percentage": 92.29, "elapsed_time": "14:16:54", "remaining_time": "1:11:33"}
{"current_steps": 920, "total_steps": 986, "loss": 0.4764, "lr": 5e-06, "epoch": 1.8632911392405065, "percentage": 93.31, "elapsed_time": "14:26:13", "remaining_time": "1:02:08"}
{"current_steps": 930, "total_steps": 986, "loss": 0.4734, "lr": 5e-06, "epoch": 1.8835443037974684, "percentage": 94.32, "elapsed_time": "14:35:32", "remaining_time": "0:52:43"}
{"current_steps": 940, "total_steps": 986, "loss": 0.4775, "lr": 5e-06, "epoch": 1.9037974683544303, "percentage": 95.33, "elapsed_time": "14:44:50", "remaining_time": "0:43:18"}
{"current_steps": 950, "total_steps": 986, "loss": 0.4785, "lr": 5e-06, "epoch": 1.9240506329113924, "percentage": 96.35, "elapsed_time": "14:54:10", "remaining_time": "0:33:53"}
{"current_steps": 960, "total_steps": 986, "loss": 0.4827, "lr": 5e-06, "epoch": 1.9443037974683546, "percentage": 97.36, "elapsed_time": "15:03:29", "remaining_time": "0:24:28"}
{"current_steps": 970, "total_steps": 986, "loss": 0.4807, "lr": 5e-06, "epoch": 1.9645569620253165, "percentage": 98.38, "elapsed_time": "15:12:48", "remaining_time": "0:15:03"}
{"current_steps": 980, "total_steps": 986, "loss": 0.4813, "lr": 5e-06, "epoch": 1.9848101265822784, "percentage": 99.39, "elapsed_time": "15:22:07", "remaining_time": "0:05:38"}
{"current_steps": 986, "total_steps": 986, "eval_loss": 0.06898781657218933, "epoch": 1.9969620253164557, "percentage": 100.0, "elapsed_time": "15:37:20", "remaining_time": "0:00:00"}
{"current_steps": 986, "total_steps": 986, "epoch": 1.9969620253164557, "percentage": 100.0, "elapsed_time": "15:38:38", "remaining_time": "0:00:00"}