hp_ablations_llama3_epoch4 / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 3
25b87c6 verified
{"current_steps": 10, "total_steps": 1688, "loss": 0.888, "lr": 5e-06, "epoch": 0.02365464222353637, "percentage": 0.59, "elapsed_time": "0:04:54", "remaining_time": "13:43:50"}
{"current_steps": 20, "total_steps": 1688, "loss": 0.7936, "lr": 5e-06, "epoch": 0.04730928444707274, "percentage": 1.18, "elapsed_time": "0:09:45", "remaining_time": "13:33:19"}
{"current_steps": 30, "total_steps": 1688, "loss": 0.7721, "lr": 5e-06, "epoch": 0.0709639266706091, "percentage": 1.78, "elapsed_time": "0:14:35", "remaining_time": "13:26:25"}
{"current_steps": 40, "total_steps": 1688, "loss": 0.7537, "lr": 5e-06, "epoch": 0.09461856889414548, "percentage": 2.37, "elapsed_time": "0:19:25", "remaining_time": "13:20:26"}
{"current_steps": 50, "total_steps": 1688, "loss": 0.7355, "lr": 5e-06, "epoch": 0.11827321111768184, "percentage": 2.96, "elapsed_time": "0:24:16", "remaining_time": "13:15:05"}
{"current_steps": 60, "total_steps": 1688, "loss": 0.7205, "lr": 5e-06, "epoch": 0.1419278533412182, "percentage": 3.55, "elapsed_time": "0:29:06", "remaining_time": "13:09:52"}
{"current_steps": 70, "total_steps": 1688, "loss": 0.7141, "lr": 5e-06, "epoch": 0.16558249556475457, "percentage": 4.15, "elapsed_time": "0:33:57", "remaining_time": "13:04:57"}
{"current_steps": 80, "total_steps": 1688, "loss": 0.703, "lr": 5e-06, "epoch": 0.18923713778829096, "percentage": 4.74, "elapsed_time": "0:38:48", "remaining_time": "12:59:53"}
{"current_steps": 90, "total_steps": 1688, "loss": 0.687, "lr": 5e-06, "epoch": 0.21289178001182732, "percentage": 5.33, "elapsed_time": "0:43:38", "remaining_time": "12:54:50"}
{"current_steps": 100, "total_steps": 1688, "loss": 0.6856, "lr": 5e-06, "epoch": 0.23654642223536368, "percentage": 5.92, "elapsed_time": "0:48:29", "remaining_time": "12:49:55"}
{"current_steps": 110, "total_steps": 1688, "loss": 0.6906, "lr": 5e-06, "epoch": 0.26020106445890007, "percentage": 6.52, "elapsed_time": "0:53:19", "remaining_time": "12:45:03"}
{"current_steps": 120, "total_steps": 1688, "loss": 0.6776, "lr": 5e-06, "epoch": 0.2838557066824364, "percentage": 7.11, "elapsed_time": "0:58:10", "remaining_time": "12:40:10"}
{"current_steps": 130, "total_steps": 1688, "loss": 0.6737, "lr": 5e-06, "epoch": 0.3075103489059728, "percentage": 7.7, "elapsed_time": "1:03:01", "remaining_time": "12:35:17"}
{"current_steps": 140, "total_steps": 1688, "loss": 0.682, "lr": 5e-06, "epoch": 0.33116499112950915, "percentage": 8.29, "elapsed_time": "1:07:52", "remaining_time": "12:30:26"}
{"current_steps": 150, "total_steps": 1688, "loss": 0.6647, "lr": 5e-06, "epoch": 0.35481963335304556, "percentage": 8.89, "elapsed_time": "1:12:42", "remaining_time": "12:25:32"}
{"current_steps": 160, "total_steps": 1688, "loss": 0.6693, "lr": 5e-06, "epoch": 0.3784742755765819, "percentage": 9.48, "elapsed_time": "1:17:33", "remaining_time": "12:20:40"}
{"current_steps": 170, "total_steps": 1688, "loss": 0.6668, "lr": 5e-06, "epoch": 0.4021289178001183, "percentage": 10.07, "elapsed_time": "1:22:24", "remaining_time": "12:15:49"}
{"current_steps": 180, "total_steps": 1688, "loss": 0.6739, "lr": 5e-06, "epoch": 0.42578356002365464, "percentage": 10.66, "elapsed_time": "1:27:14", "remaining_time": "12:10:55"}
{"current_steps": 190, "total_steps": 1688, "loss": 0.67, "lr": 5e-06, "epoch": 0.449438202247191, "percentage": 11.26, "elapsed_time": "1:32:05", "remaining_time": "12:06:04"}
{"current_steps": 200, "total_steps": 1688, "loss": 0.6689, "lr": 5e-06, "epoch": 0.47309284447072736, "percentage": 11.85, "elapsed_time": "1:36:56", "remaining_time": "12:01:13"}
{"current_steps": 210, "total_steps": 1688, "loss": 0.6693, "lr": 5e-06, "epoch": 0.4967474866942638, "percentage": 12.44, "elapsed_time": "1:41:46", "remaining_time": "11:56:19"}
{"current_steps": 220, "total_steps": 1688, "loss": 0.6684, "lr": 5e-06, "epoch": 0.5204021289178001, "percentage": 13.03, "elapsed_time": "1:46:37", "remaining_time": "11:51:31"}
{"current_steps": 230, "total_steps": 1688, "loss": 0.6601, "lr": 5e-06, "epoch": 0.5440567711413364, "percentage": 13.63, "elapsed_time": "1:51:28", "remaining_time": "11:46:41"}
{"current_steps": 240, "total_steps": 1688, "loss": 0.6645, "lr": 5e-06, "epoch": 0.5677114133648729, "percentage": 14.22, "elapsed_time": "1:56:19", "remaining_time": "11:41:48"}
{"current_steps": 250, "total_steps": 1688, "loss": 0.6615, "lr": 5e-06, "epoch": 0.5913660555884093, "percentage": 14.81, "elapsed_time": "2:01:09", "remaining_time": "11:36:56"}
{"current_steps": 260, "total_steps": 1688, "loss": 0.6521, "lr": 5e-06, "epoch": 0.6150206978119456, "percentage": 15.4, "elapsed_time": "2:06:00", "remaining_time": "11:32:06"}
{"current_steps": 270, "total_steps": 1688, "loss": 0.6636, "lr": 5e-06, "epoch": 0.638675340035482, "percentage": 16.0, "elapsed_time": "2:10:51", "remaining_time": "11:27:15"}
{"current_steps": 280, "total_steps": 1688, "loss": 0.6653, "lr": 5e-06, "epoch": 0.6623299822590183, "percentage": 16.59, "elapsed_time": "2:15:42", "remaining_time": "11:22:25"}
{"current_steps": 290, "total_steps": 1688, "loss": 0.6594, "lr": 5e-06, "epoch": 0.6859846244825547, "percentage": 17.18, "elapsed_time": "2:20:33", "remaining_time": "11:17:34"}
{"current_steps": 300, "total_steps": 1688, "loss": 0.6532, "lr": 5e-06, "epoch": 0.7096392667060911, "percentage": 17.77, "elapsed_time": "2:25:24", "remaining_time": "11:12:44"}
{"current_steps": 310, "total_steps": 1688, "loss": 0.6615, "lr": 5e-06, "epoch": 0.7332939089296274, "percentage": 18.36, "elapsed_time": "2:30:14", "remaining_time": "11:07:52"}
{"current_steps": 320, "total_steps": 1688, "loss": 0.6555, "lr": 5e-06, "epoch": 0.7569485511531638, "percentage": 18.96, "elapsed_time": "2:35:05", "remaining_time": "11:03:02"}
{"current_steps": 330, "total_steps": 1688, "loss": 0.6537, "lr": 5e-06, "epoch": 0.7806031933767001, "percentage": 19.55, "elapsed_time": "2:39:56", "remaining_time": "10:58:12"}
{"current_steps": 340, "total_steps": 1688, "loss": 0.6535, "lr": 5e-06, "epoch": 0.8042578356002366, "percentage": 20.14, "elapsed_time": "2:44:47", "remaining_time": "10:53:23"}
{"current_steps": 350, "total_steps": 1688, "loss": 0.6598, "lr": 5e-06, "epoch": 0.8279124778237729, "percentage": 20.73, "elapsed_time": "2:49:38", "remaining_time": "10:48:32"}
{"current_steps": 360, "total_steps": 1688, "loss": 0.6533, "lr": 5e-06, "epoch": 0.8515671200473093, "percentage": 21.33, "elapsed_time": "2:54:29", "remaining_time": "10:43:39"}
{"current_steps": 370, "total_steps": 1688, "loss": 0.6567, "lr": 5e-06, "epoch": 0.8752217622708457, "percentage": 21.92, "elapsed_time": "2:59:20", "remaining_time": "10:38:49"}
{"current_steps": 380, "total_steps": 1688, "loss": 0.6475, "lr": 5e-06, "epoch": 0.898876404494382, "percentage": 22.51, "elapsed_time": "3:04:10", "remaining_time": "10:33:57"}
{"current_steps": 390, "total_steps": 1688, "loss": 0.6548, "lr": 5e-06, "epoch": 0.9225310467179184, "percentage": 23.1, "elapsed_time": "3:09:01", "remaining_time": "10:29:07"}
{"current_steps": 400, "total_steps": 1688, "loss": 0.6464, "lr": 5e-06, "epoch": 0.9461856889414547, "percentage": 23.7, "elapsed_time": "3:13:52", "remaining_time": "10:24:16"}
{"current_steps": 410, "total_steps": 1688, "loss": 0.6489, "lr": 5e-06, "epoch": 0.9698403311649911, "percentage": 24.29, "elapsed_time": "3:18:43", "remaining_time": "10:19:24"}
{"current_steps": 420, "total_steps": 1688, "loss": 0.6479, "lr": 5e-06, "epoch": 0.9934949733885275, "percentage": 24.88, "elapsed_time": "3:23:33", "remaining_time": "10:14:33"}
{"current_steps": 422, "total_steps": 1688, "eval_loss": 0.6500382423400879, "epoch": 0.9982259018332348, "percentage": 25.0, "elapsed_time": "3:28:38", "remaining_time": "10:25:56"}
{"current_steps": 430, "total_steps": 1688, "loss": 0.6364, "lr": 5e-06, "epoch": 1.0171496156120639, "percentage": 25.47, "elapsed_time": "3:33:07", "remaining_time": "10:23:29"}
{"current_steps": 440, "total_steps": 1688, "loss": 0.6046, "lr": 5e-06, "epoch": 1.0408042578356003, "percentage": 26.07, "elapsed_time": "3:37:58", "remaining_time": "10:18:14"}
{"current_steps": 450, "total_steps": 1688, "loss": 0.6028, "lr": 5e-06, "epoch": 1.0644589000591367, "percentage": 26.66, "elapsed_time": "3:42:48", "remaining_time": "10:12:59"}
{"current_steps": 460, "total_steps": 1688, "loss": 0.6114, "lr": 5e-06, "epoch": 1.0881135422826729, "percentage": 27.25, "elapsed_time": "3:47:39", "remaining_time": "10:07:46"}
{"current_steps": 470, "total_steps": 1688, "loss": 0.6104, "lr": 5e-06, "epoch": 1.1117681845062093, "percentage": 27.84, "elapsed_time": "3:52:30", "remaining_time": "10:02:33"}
{"current_steps": 480, "total_steps": 1688, "loss": 0.6105, "lr": 5e-06, "epoch": 1.1354228267297457, "percentage": 28.44, "elapsed_time": "3:57:22", "remaining_time": "9:57:22"}
{"current_steps": 490, "total_steps": 1688, "loss": 0.6076, "lr": 5e-06, "epoch": 1.1590774689532821, "percentage": 29.03, "elapsed_time": "4:02:13", "remaining_time": "9:52:12"}
{"current_steps": 500, "total_steps": 1688, "loss": 0.6038, "lr": 5e-06, "epoch": 1.1827321111768185, "percentage": 29.62, "elapsed_time": "4:07:04", "remaining_time": "9:47:03"}
{"current_steps": 510, "total_steps": 1688, "loss": 0.6036, "lr": 5e-06, "epoch": 1.2063867534003547, "percentage": 30.21, "elapsed_time": "4:11:55", "remaining_time": "9:41:53"}
{"current_steps": 520, "total_steps": 1688, "loss": 0.6095, "lr": 5e-06, "epoch": 1.2300413956238911, "percentage": 30.81, "elapsed_time": "4:16:46", "remaining_time": "9:36:46"}
{"current_steps": 530, "total_steps": 1688, "loss": 0.6106, "lr": 5e-06, "epoch": 1.2536960378474276, "percentage": 31.4, "elapsed_time": "4:21:38", "remaining_time": "9:31:38"}
{"current_steps": 540, "total_steps": 1688, "loss": 0.611, "lr": 5e-06, "epoch": 1.277350680070964, "percentage": 31.99, "elapsed_time": "4:26:29", "remaining_time": "9:26:31"}
{"current_steps": 550, "total_steps": 1688, "loss": 0.6091, "lr": 5e-06, "epoch": 1.3010053222945004, "percentage": 32.58, "elapsed_time": "4:31:20", "remaining_time": "9:21:25"}
{"current_steps": 560, "total_steps": 1688, "loss": 0.6073, "lr": 5e-06, "epoch": 1.3246599645180366, "percentage": 33.18, "elapsed_time": "4:36:11", "remaining_time": "9:16:19"}
{"current_steps": 570, "total_steps": 1688, "loss": 0.6074, "lr": 5e-06, "epoch": 1.348314606741573, "percentage": 33.77, "elapsed_time": "4:41:02", "remaining_time": "9:11:15"}
{"current_steps": 580, "total_steps": 1688, "loss": 0.6109, "lr": 5e-06, "epoch": 1.3719692489651094, "percentage": 34.36, "elapsed_time": "4:45:54", "remaining_time": "9:06:10"}
{"current_steps": 590, "total_steps": 1688, "loss": 0.6115, "lr": 5e-06, "epoch": 1.3956238911886458, "percentage": 34.95, "elapsed_time": "4:50:45", "remaining_time": "9:01:05"}
{"current_steps": 600, "total_steps": 1688, "loss": 0.5986, "lr": 5e-06, "epoch": 1.4192785334121822, "percentage": 35.55, "elapsed_time": "4:55:36", "remaining_time": "8:56:02"}
{"current_steps": 610, "total_steps": 1688, "loss": 0.6004, "lr": 5e-06, "epoch": 1.4429331756357184, "percentage": 36.14, "elapsed_time": "5:00:27", "remaining_time": "8:50:58"}
{"current_steps": 620, "total_steps": 1688, "loss": 0.603, "lr": 5e-06, "epoch": 1.4665878178592548, "percentage": 36.73, "elapsed_time": "5:05:18", "remaining_time": "8:45:55"}
{"current_steps": 630, "total_steps": 1688, "loss": 0.6136, "lr": 5e-06, "epoch": 1.4902424600827913, "percentage": 37.32, "elapsed_time": "5:10:10", "remaining_time": "8:40:52"}
{"current_steps": 640, "total_steps": 1688, "loss": 0.6119, "lr": 5e-06, "epoch": 1.5138971023063275, "percentage": 37.91, "elapsed_time": "5:15:01", "remaining_time": "8:35:51"}
{"current_steps": 650, "total_steps": 1688, "loss": 0.6029, "lr": 5e-06, "epoch": 1.537551744529864, "percentage": 38.51, "elapsed_time": "5:19:52", "remaining_time": "8:30:49"}
{"current_steps": 660, "total_steps": 1688, "loss": 0.6071, "lr": 5e-06, "epoch": 1.5612063867534003, "percentage": 39.1, "elapsed_time": "5:24:43", "remaining_time": "8:25:47"}
{"current_steps": 670, "total_steps": 1688, "loss": 0.6039, "lr": 5e-06, "epoch": 1.5848610289769367, "percentage": 39.69, "elapsed_time": "5:29:35", "remaining_time": "8:20:46"}
{"current_steps": 680, "total_steps": 1688, "loss": 0.6082, "lr": 5e-06, "epoch": 1.6085156712004731, "percentage": 40.28, "elapsed_time": "5:34:26", "remaining_time": "8:15:45"}
{"current_steps": 690, "total_steps": 1688, "loss": 0.6015, "lr": 5e-06, "epoch": 1.6321703134240093, "percentage": 40.88, "elapsed_time": "5:39:17", "remaining_time": "8:10:45"}
{"current_steps": 700, "total_steps": 1688, "loss": 0.6088, "lr": 5e-06, "epoch": 1.655824955647546, "percentage": 41.47, "elapsed_time": "5:44:09", "remaining_time": "8:05:44"}
{"current_steps": 710, "total_steps": 1688, "loss": 0.6074, "lr": 5e-06, "epoch": 1.6794795978710821, "percentage": 42.06, "elapsed_time": "5:49:00", "remaining_time": "8:00:44"}
{"current_steps": 720, "total_steps": 1688, "loss": 0.6137, "lr": 5e-06, "epoch": 1.7031342400946186, "percentage": 42.65, "elapsed_time": "5:53:51", "remaining_time": "7:55:45"}
{"current_steps": 730, "total_steps": 1688, "loss": 0.6091, "lr": 5e-06, "epoch": 1.726788882318155, "percentage": 43.25, "elapsed_time": "5:58:43", "remaining_time": "7:50:45"}
{"current_steps": 740, "total_steps": 1688, "loss": 0.6038, "lr": 5e-06, "epoch": 1.7504435245416912, "percentage": 43.84, "elapsed_time": "6:03:34", "remaining_time": "7:45:46"}
{"current_steps": 750, "total_steps": 1688, "loss": 0.6095, "lr": 5e-06, "epoch": 1.7740981667652278, "percentage": 44.43, "elapsed_time": "6:08:26", "remaining_time": "7:40:47"}
{"current_steps": 760, "total_steps": 1688, "loss": 0.6142, "lr": 5e-06, "epoch": 1.797752808988764, "percentage": 45.02, "elapsed_time": "6:13:17", "remaining_time": "7:35:48"}
{"current_steps": 770, "total_steps": 1688, "loss": 0.615, "lr": 5e-06, "epoch": 1.8214074512123004, "percentage": 45.62, "elapsed_time": "6:18:08", "remaining_time": "7:30:49"}
{"current_steps": 780, "total_steps": 1688, "loss": 0.6068, "lr": 5e-06, "epoch": 1.8450620934358368, "percentage": 46.21, "elapsed_time": "6:23:00", "remaining_time": "7:25:51"}
{"current_steps": 790, "total_steps": 1688, "loss": 0.6056, "lr": 5e-06, "epoch": 1.868716735659373, "percentage": 46.8, "elapsed_time": "6:27:51", "remaining_time": "7:20:52"}
{"current_steps": 800, "total_steps": 1688, "loss": 0.612, "lr": 5e-06, "epoch": 1.8923713778829097, "percentage": 47.39, "elapsed_time": "6:32:42", "remaining_time": "7:15:54"}
{"current_steps": 810, "total_steps": 1688, "loss": 0.6118, "lr": 5e-06, "epoch": 1.9160260201064458, "percentage": 47.99, "elapsed_time": "6:37:34", "remaining_time": "7:10:56"}
{"current_steps": 820, "total_steps": 1688, "loss": 0.5986, "lr": 5e-06, "epoch": 1.9396806623299823, "percentage": 48.58, "elapsed_time": "6:42:25", "remaining_time": "7:05:58"}
{"current_steps": 830, "total_steps": 1688, "loss": 0.6111, "lr": 5e-06, "epoch": 1.9633353045535187, "percentage": 49.17, "elapsed_time": "6:47:16", "remaining_time": "7:01:01"}
{"current_steps": 840, "total_steps": 1688, "loss": 0.596, "lr": 5e-06, "epoch": 1.9869899467770549, "percentage": 49.76, "elapsed_time": "6:52:07", "remaining_time": "6:56:03"}
{"current_steps": 845, "total_steps": 1688, "eval_loss": 0.6398171186447144, "epoch": 1.9988172678888232, "percentage": 50.06, "elapsed_time": "6:58:30", "remaining_time": "6:57:31"}
{"current_steps": 850, "total_steps": 1688, "loss": 0.5986, "lr": 5e-06, "epoch": 2.0106445890005915, "percentage": 50.36, "elapsed_time": "7:01:41", "remaining_time": "6:55:44"}
{"current_steps": 860, "total_steps": 1688, "loss": 0.5657, "lr": 5e-06, "epoch": 2.0342992312241277, "percentage": 50.95, "elapsed_time": "7:06:33", "remaining_time": "6:50:40"}
{"current_steps": 870, "total_steps": 1688, "loss": 0.5657, "lr": 5e-06, "epoch": 2.057953873447664, "percentage": 51.54, "elapsed_time": "7:11:24", "remaining_time": "6:45:37"}
{"current_steps": 880, "total_steps": 1688, "loss": 0.5579, "lr": 5e-06, "epoch": 2.0816085156712005, "percentage": 52.13, "elapsed_time": "7:16:16", "remaining_time": "6:40:34"}
{"current_steps": 890, "total_steps": 1688, "loss": 0.5602, "lr": 5e-06, "epoch": 2.1052631578947367, "percentage": 52.73, "elapsed_time": "7:21:07", "remaining_time": "6:35:31"}
{"current_steps": 900, "total_steps": 1688, "loss": 0.5657, "lr": 5e-06, "epoch": 2.1289178001182734, "percentage": 53.32, "elapsed_time": "7:25:59", "remaining_time": "6:30:29"}
{"current_steps": 910, "total_steps": 1688, "loss": 0.5603, "lr": 5e-06, "epoch": 2.1525724423418096, "percentage": 53.91, "elapsed_time": "7:30:50", "remaining_time": "6:25:26"}
{"current_steps": 920, "total_steps": 1688, "loss": 0.5622, "lr": 5e-06, "epoch": 2.1762270845653457, "percentage": 54.5, "elapsed_time": "7:35:41", "remaining_time": "6:20:24"}
{"current_steps": 930, "total_steps": 1688, "loss": 0.5615, "lr": 5e-06, "epoch": 2.1998817267888824, "percentage": 55.09, "elapsed_time": "7:40:33", "remaining_time": "6:15:22"}
{"current_steps": 940, "total_steps": 1688, "loss": 0.5631, "lr": 5e-06, "epoch": 2.2235363690124186, "percentage": 55.69, "elapsed_time": "7:45:24", "remaining_time": "6:10:21"}
{"current_steps": 950, "total_steps": 1688, "loss": 0.5691, "lr": 5e-06, "epoch": 2.247191011235955, "percentage": 56.28, "elapsed_time": "7:50:15", "remaining_time": "6:05:19"}
{"current_steps": 960, "total_steps": 1688, "loss": 0.5628, "lr": 5e-06, "epoch": 2.2708456534594914, "percentage": 56.87, "elapsed_time": "7:55:06", "remaining_time": "6:00:17"}
{"current_steps": 970, "total_steps": 1688, "loss": 0.5646, "lr": 5e-06, "epoch": 2.2945002956830276, "percentage": 57.46, "elapsed_time": "7:59:57", "remaining_time": "5:55:16"}
{"current_steps": 980, "total_steps": 1688, "loss": 0.5685, "lr": 5e-06, "epoch": 2.3181549379065642, "percentage": 58.06, "elapsed_time": "8:04:48", "remaining_time": "5:50:15"}
{"current_steps": 990, "total_steps": 1688, "loss": 0.5662, "lr": 5e-06, "epoch": 2.3418095801301004, "percentage": 58.65, "elapsed_time": "8:09:40", "remaining_time": "5:45:14"}
{"current_steps": 1000, "total_steps": 1688, "loss": 0.5665, "lr": 5e-06, "epoch": 2.365464222353637, "percentage": 59.24, "elapsed_time": "8:14:31", "remaining_time": "5:40:14"}
{"current_steps": 1010, "total_steps": 1688, "loss": 0.5646, "lr": 5e-06, "epoch": 2.3891188645771733, "percentage": 59.83, "elapsed_time": "8:19:22", "remaining_time": "5:35:13"}
{"current_steps": 1020, "total_steps": 1688, "loss": 0.5621, "lr": 5e-06, "epoch": 2.4127735068007095, "percentage": 60.43, "elapsed_time": "8:24:13", "remaining_time": "5:30:13"}
{"current_steps": 1030, "total_steps": 1688, "loss": 0.5651, "lr": 5e-06, "epoch": 2.436428149024246, "percentage": 61.02, "elapsed_time": "8:29:05", "remaining_time": "5:25:13"}
{"current_steps": 1040, "total_steps": 1688, "loss": 0.5658, "lr": 5e-06, "epoch": 2.4600827912477823, "percentage": 61.61, "elapsed_time": "8:33:56", "remaining_time": "5:20:13"}
{"current_steps": 1050, "total_steps": 1688, "loss": 0.5683, "lr": 5e-06, "epoch": 2.483737433471319, "percentage": 62.2, "elapsed_time": "8:38:47", "remaining_time": "5:15:13"}
{"current_steps": 1060, "total_steps": 1688, "loss": 0.5676, "lr": 5e-06, "epoch": 2.507392075694855, "percentage": 62.8, "elapsed_time": "8:43:38", "remaining_time": "5:10:13"}
{"current_steps": 1070, "total_steps": 1688, "loss": 0.5699, "lr": 5e-06, "epoch": 2.5310467179183913, "percentage": 63.39, "elapsed_time": "8:48:29", "remaining_time": "5:05:14"}
{"current_steps": 1080, "total_steps": 1688, "loss": 0.5707, "lr": 5e-06, "epoch": 2.554701360141928, "percentage": 63.98, "elapsed_time": "8:53:20", "remaining_time": "5:00:14"}
{"current_steps": 1090, "total_steps": 1688, "loss": 0.565, "lr": 5e-06, "epoch": 2.578356002365464, "percentage": 64.57, "elapsed_time": "8:58:11", "remaining_time": "4:55:15"}
{"current_steps": 1100, "total_steps": 1688, "loss": 0.5671, "lr": 5e-06, "epoch": 2.6020106445890008, "percentage": 65.17, "elapsed_time": "9:03:02", "remaining_time": "4:50:16"}
{"current_steps": 1110, "total_steps": 1688, "loss": 0.5649, "lr": 5e-06, "epoch": 2.625665286812537, "percentage": 65.76, "elapsed_time": "9:07:53", "remaining_time": "4:45:17"}
{"current_steps": 1120, "total_steps": 1688, "loss": 0.5762, "lr": 5e-06, "epoch": 2.649319929036073, "percentage": 66.35, "elapsed_time": "9:12:44", "remaining_time": "4:40:19"}
{"current_steps": 1130, "total_steps": 1688, "loss": 0.5635, "lr": 5e-06, "epoch": 2.67297457125961, "percentage": 66.94, "elapsed_time": "9:17:36", "remaining_time": "4:35:20"}
{"current_steps": 1140, "total_steps": 1688, "loss": 0.5642, "lr": 5e-06, "epoch": 2.696629213483146, "percentage": 67.54, "elapsed_time": "9:22:27", "remaining_time": "4:30:22"}
{"current_steps": 1150, "total_steps": 1688, "loss": 0.5679, "lr": 5e-06, "epoch": 2.7202838557066826, "percentage": 68.13, "elapsed_time": "9:27:18", "remaining_time": "4:25:24"}
{"current_steps": 1160, "total_steps": 1688, "loss": 0.574, "lr": 5e-06, "epoch": 2.743938497930219, "percentage": 68.72, "elapsed_time": "9:32:10", "remaining_time": "4:20:26"}
{"current_steps": 1170, "total_steps": 1688, "loss": 0.5647, "lr": 5e-06, "epoch": 2.767593140153755, "percentage": 69.31, "elapsed_time": "9:37:01", "remaining_time": "4:15:27"}
{"current_steps": 1180, "total_steps": 1688, "loss": 0.5687, "lr": 5e-06, "epoch": 2.7912477823772917, "percentage": 69.91, "elapsed_time": "9:41:51", "remaining_time": "4:10:29"}
{"current_steps": 1190, "total_steps": 1688, "loss": 0.5681, "lr": 5e-06, "epoch": 2.814902424600828, "percentage": 70.5, "elapsed_time": "9:46:43", "remaining_time": "4:05:32"}
{"current_steps": 1200, "total_steps": 1688, "loss": 0.5666, "lr": 5e-06, "epoch": 2.8385570668243645, "percentage": 71.09, "elapsed_time": "9:51:33", "remaining_time": "4:00:34"}
{"current_steps": 1210, "total_steps": 1688, "loss": 0.568, "lr": 5e-06, "epoch": 2.8622117090479007, "percentage": 71.68, "elapsed_time": "9:56:25", "remaining_time": "3:55:36"}
{"current_steps": 1220, "total_steps": 1688, "loss": 0.562, "lr": 5e-06, "epoch": 2.885866351271437, "percentage": 72.27, "elapsed_time": "10:01:16", "remaining_time": "3:50:39"}
{"current_steps": 1230, "total_steps": 1688, "loss": 0.5685, "lr": 5e-06, "epoch": 2.9095209934949735, "percentage": 72.87, "elapsed_time": "10:06:07", "remaining_time": "3:45:41"}
{"current_steps": 1240, "total_steps": 1688, "loss": 0.5695, "lr": 5e-06, "epoch": 2.9331756357185097, "percentage": 73.46, "elapsed_time": "10:10:58", "remaining_time": "3:40:44"}
{"current_steps": 1250, "total_steps": 1688, "loss": 0.5776, "lr": 5e-06, "epoch": 2.9568302779420463, "percentage": 74.05, "elapsed_time": "10:15:50", "remaining_time": "3:35:47"}
{"current_steps": 1260, "total_steps": 1688, "loss": 0.5709, "lr": 5e-06, "epoch": 2.9804849201655825, "percentage": 74.64, "elapsed_time": "10:20:41", "remaining_time": "3:30:50"}
{"current_steps": 1268, "total_steps": 1688, "eval_loss": 0.6412045359611511, "epoch": 2.9994086339444115, "percentage": 75.12, "elapsed_time": "10:28:26", "remaining_time": "3:28:09"}
{"current_steps": 1270, "total_steps": 1688, "loss": 0.5734, "lr": 5e-06, "epoch": 3.0041395623891187, "percentage": 75.24, "elapsed_time": "10:30:16", "remaining_time": "3:27:26"}
{"current_steps": 1280, "total_steps": 1688, "loss": 0.5192, "lr": 5e-06, "epoch": 3.0277942046126554, "percentage": 75.83, "elapsed_time": "10:35:07", "remaining_time": "3:22:26"}
{"current_steps": 1290, "total_steps": 1688, "loss": 0.5252, "lr": 5e-06, "epoch": 3.0514488468361916, "percentage": 76.42, "elapsed_time": "10:39:59", "remaining_time": "3:17:27"}
{"current_steps": 1300, "total_steps": 1688, "loss": 0.518, "lr": 5e-06, "epoch": 3.075103489059728, "percentage": 77.01, "elapsed_time": "10:44:50", "remaining_time": "3:12:27"}
{"current_steps": 1310, "total_steps": 1688, "loss": 0.523, "lr": 5e-06, "epoch": 3.0987581312832644, "percentage": 77.61, "elapsed_time": "10:49:42", "remaining_time": "3:07:28"}
{"current_steps": 1320, "total_steps": 1688, "loss": 0.5206, "lr": 5e-06, "epoch": 3.1224127735068006, "percentage": 78.2, "elapsed_time": "10:54:33", "remaining_time": "3:02:29"}
{"current_steps": 1330, "total_steps": 1688, "loss": 0.5243, "lr": 5e-06, "epoch": 3.146067415730337, "percentage": 78.79, "elapsed_time": "10:59:24", "remaining_time": "2:57:29"}
{"current_steps": 1340, "total_steps": 1688, "loss": 0.5252, "lr": 5e-06, "epoch": 3.1697220579538734, "percentage": 79.38, "elapsed_time": "11:04:16", "remaining_time": "2:52:30"}
{"current_steps": 1350, "total_steps": 1688, "loss": 0.5239, "lr": 5e-06, "epoch": 3.19337670017741, "percentage": 79.98, "elapsed_time": "11:09:07", "remaining_time": "2:47:31"}
{"current_steps": 1360, "total_steps": 1688, "loss": 0.5284, "lr": 5e-06, "epoch": 3.2170313424009462, "percentage": 80.57, "elapsed_time": "11:13:59", "remaining_time": "2:42:32"}
{"current_steps": 1370, "total_steps": 1688, "loss": 0.5247, "lr": 5e-06, "epoch": 3.2406859846244824, "percentage": 81.16, "elapsed_time": "11:18:50", "remaining_time": "2:37:34"}
{"current_steps": 1380, "total_steps": 1688, "loss": 0.5256, "lr": 5e-06, "epoch": 3.264340626848019, "percentage": 81.75, "elapsed_time": "11:23:42", "remaining_time": "2:32:35"}
{"current_steps": 1390, "total_steps": 1688, "loss": 0.5251, "lr": 5e-06, "epoch": 3.2879952690715553, "percentage": 82.35, "elapsed_time": "11:28:33", "remaining_time": "2:27:37"}
{"current_steps": 1400, "total_steps": 1688, "loss": 0.5264, "lr": 5e-06, "epoch": 3.311649911295092, "percentage": 82.94, "elapsed_time": "11:33:25", "remaining_time": "2:22:38"}
{"current_steps": 1410, "total_steps": 1688, "loss": 0.5246, "lr": 5e-06, "epoch": 3.335304553518628, "percentage": 83.53, "elapsed_time": "11:38:17", "remaining_time": "2:17:40"}
{"current_steps": 1420, "total_steps": 1688, "loss": 0.5337, "lr": 5e-06, "epoch": 3.3589591957421643, "percentage": 84.12, "elapsed_time": "11:43:09", "remaining_time": "2:12:42"}
{"current_steps": 1430, "total_steps": 1688, "loss": 0.5293, "lr": 5e-06, "epoch": 3.382613837965701, "percentage": 84.72, "elapsed_time": "11:48:01", "remaining_time": "2:07:44"}
{"current_steps": 1440, "total_steps": 1688, "loss": 0.5237, "lr": 5e-06, "epoch": 3.406268480189237, "percentage": 85.31, "elapsed_time": "11:52:52", "remaining_time": "2:02:46"}
{"current_steps": 1450, "total_steps": 1688, "loss": 0.5271, "lr": 5e-06, "epoch": 3.4299231224127738, "percentage": 85.9, "elapsed_time": "11:57:44", "remaining_time": "1:57:48"}
{"current_steps": 1460, "total_steps": 1688, "loss": 0.5257, "lr": 5e-06, "epoch": 3.45357776463631, "percentage": 86.49, "elapsed_time": "12:02:36", "remaining_time": "1:52:50"}
{"current_steps": 1470, "total_steps": 1688, "loss": 0.5282, "lr": 5e-06, "epoch": 3.477232406859846, "percentage": 87.09, "elapsed_time": "12:07:27", "remaining_time": "1:47:52"}
{"current_steps": 1480, "total_steps": 1688, "loss": 0.5307, "lr": 5e-06, "epoch": 3.5008870490833828, "percentage": 87.68, "elapsed_time": "12:12:19", "remaining_time": "1:42:55"}
{"current_steps": 1490, "total_steps": 1688, "loss": 0.529, "lr": 5e-06, "epoch": 3.524541691306919, "percentage": 88.27, "elapsed_time": "12:17:10", "remaining_time": "1:37:57"}
{"current_steps": 1500, "total_steps": 1688, "loss": 0.5339, "lr": 5e-06, "epoch": 3.5481963335304556, "percentage": 88.86, "elapsed_time": "12:22:02", "remaining_time": "1:33:00"}
{"current_steps": 1510, "total_steps": 1688, "loss": 0.5304, "lr": 5e-06, "epoch": 3.571850975753992, "percentage": 89.45, "elapsed_time": "12:26:53", "remaining_time": "1:28:02"}
{"current_steps": 1520, "total_steps": 1688, "loss": 0.5284, "lr": 5e-06, "epoch": 3.595505617977528, "percentage": 90.05, "elapsed_time": "12:31:45", "remaining_time": "1:23:05"}
{"current_steps": 1530, "total_steps": 1688, "loss": 0.5227, "lr": 5e-06, "epoch": 3.619160260201064, "percentage": 90.64, "elapsed_time": "12:36:36", "remaining_time": "1:18:07"}
{"current_steps": 1540, "total_steps": 1688, "loss": 0.5322, "lr": 5e-06, "epoch": 3.642814902424601, "percentage": 91.23, "elapsed_time": "12:41:27", "remaining_time": "1:13:10"}
{"current_steps": 1550, "total_steps": 1688, "loss": 0.5277, "lr": 5e-06, "epoch": 3.6664695446481375, "percentage": 91.82, "elapsed_time": "12:46:19", "remaining_time": "1:08:13"}
{"current_steps": 1560, "total_steps": 1688, "loss": 0.5334, "lr": 5e-06, "epoch": 3.6901241868716737, "percentage": 92.42, "elapsed_time": "12:51:10", "remaining_time": "1:03:16"}
{"current_steps": 1570, "total_steps": 1688, "loss": 0.536, "lr": 5e-06, "epoch": 3.71377882909521, "percentage": 93.01, "elapsed_time": "12:56:01", "remaining_time": "0:58:19"}
{"current_steps": 1580, "total_steps": 1688, "loss": 0.534, "lr": 5e-06, "epoch": 3.737433471318746, "percentage": 93.6, "elapsed_time": "13:00:53", "remaining_time": "0:53:22"}
{"current_steps": 1590, "total_steps": 1688, "loss": 0.5281, "lr": 5e-06, "epoch": 3.7610881135422827, "percentage": 94.19, "elapsed_time": "13:05:44", "remaining_time": "0:48:25"}
{"current_steps": 1600, "total_steps": 1688, "loss": 0.5316, "lr": 5e-06, "epoch": 3.7847427557658193, "percentage": 94.79, "elapsed_time": "13:10:35", "remaining_time": "0:43:28"}
{"current_steps": 1610, "total_steps": 1688, "loss": 0.5353, "lr": 5e-06, "epoch": 3.8083973979893555, "percentage": 95.38, "elapsed_time": "13:15:25", "remaining_time": "0:38:32"}
{"current_steps": 1620, "total_steps": 1688, "loss": 0.5281, "lr": 5e-06, "epoch": 3.8320520402128917, "percentage": 95.97, "elapsed_time": "13:20:17", "remaining_time": "0:33:35"}
{"current_steps": 1630, "total_steps": 1688, "loss": 0.5304, "lr": 5e-06, "epoch": 3.855706682436428, "percentage": 96.56, "elapsed_time": "13:25:08", "remaining_time": "0:28:38"}
{"current_steps": 1640, "total_steps": 1688, "loss": 0.5349, "lr": 5e-06, "epoch": 3.8793613246599645, "percentage": 97.16, "elapsed_time": "13:29:58", "remaining_time": "0:23:42"}
{"current_steps": 1650, "total_steps": 1688, "loss": 0.5306, "lr": 5e-06, "epoch": 3.903015966883501, "percentage": 97.75, "elapsed_time": "13:34:50", "remaining_time": "0:18:45"}
{"current_steps": 1660, "total_steps": 1688, "loss": 0.5288, "lr": 5e-06, "epoch": 3.9266706091070374, "percentage": 98.34, "elapsed_time": "13:39:41", "remaining_time": "0:13:49"}
{"current_steps": 1670, "total_steps": 1688, "loss": 0.5289, "lr": 5e-06, "epoch": 3.9503252513305736, "percentage": 98.93, "elapsed_time": "13:44:32", "remaining_time": "0:08:53"}
{"current_steps": 1680, "total_steps": 1688, "loss": 0.5291, "lr": 5e-06, "epoch": 3.9739798935541097, "percentage": 99.53, "elapsed_time": "13:49:23", "remaining_time": "0:03:56"}
{"current_steps": 1688, "total_steps": 1688, "eval_loss": 0.6530380845069885, "epoch": 3.992903607332939, "percentage": 100.0, "elapsed_time": "13:58:02", "remaining_time": "0:00:00"}
{"current_steps": 1688, "total_steps": 1688, "epoch": 3.992903607332939, "percentage": 100.0, "elapsed_time": "13:59:09", "remaining_time": "0:00:00"}