hp_ablations_gemma_epoch4 / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 3
e3ce362 verified
{"current_steps": 10, "total_steps": 1772, "loss": 0.744, "lr": 5e-06, "epoch": 0.022566995768688293, "percentage": 0.56, "elapsed_time": "0:07:39", "remaining_time": "22:30:49"}
{"current_steps": 20, "total_steps": 1772, "loss": 0.6804, "lr": 5e-06, "epoch": 0.045133991537376586, "percentage": 1.13, "elapsed_time": "0:15:16", "remaining_time": "22:17:48"}
{"current_steps": 30, "total_steps": 1772, "loss": 0.6629, "lr": 5e-06, "epoch": 0.06770098730606489, "percentage": 1.69, "elapsed_time": "0:22:52", "remaining_time": "22:08:17"}
{"current_steps": 40, "total_steps": 1772, "loss": 0.6403, "lr": 5e-06, "epoch": 0.09026798307475317, "percentage": 2.26, "elapsed_time": "0:30:28", "remaining_time": "21:59:49"}
{"current_steps": 50, "total_steps": 1772, "loss": 0.6334, "lr": 5e-06, "epoch": 0.11283497884344147, "percentage": 2.82, "elapsed_time": "0:38:05", "remaining_time": "21:51:38"}
{"current_steps": 60, "total_steps": 1772, "loss": 0.6331, "lr": 5e-06, "epoch": 0.13540197461212977, "percentage": 3.39, "elapsed_time": "0:45:41", "remaining_time": "21:43:39"}
{"current_steps": 70, "total_steps": 1772, "loss": 0.6285, "lr": 5e-06, "epoch": 0.15796897038081806, "percentage": 3.95, "elapsed_time": "0:53:17", "remaining_time": "21:35:48"}
{"current_steps": 80, "total_steps": 1772, "loss": 0.6309, "lr": 5e-06, "epoch": 0.18053596614950634, "percentage": 4.51, "elapsed_time": "1:00:54", "remaining_time": "21:28:05"}
{"current_steps": 90, "total_steps": 1772, "loss": 0.6285, "lr": 5e-06, "epoch": 0.20310296191819463, "percentage": 5.08, "elapsed_time": "1:08:30", "remaining_time": "21:20:17"}
{"current_steps": 100, "total_steps": 1772, "loss": 0.6273, "lr": 5e-06, "epoch": 0.22566995768688294, "percentage": 5.64, "elapsed_time": "1:16:06", "remaining_time": "21:12:34"}
{"current_steps": 110, "total_steps": 1772, "loss": 0.6183, "lr": 5e-06, "epoch": 0.24823695345557123, "percentage": 6.21, "elapsed_time": "1:23:43", "remaining_time": "21:04:53"}
{"current_steps": 120, "total_steps": 1772, "loss": 0.6169, "lr": 5e-06, "epoch": 0.27080394922425954, "percentage": 6.77, "elapsed_time": "1:31:19", "remaining_time": "20:57:15"}
{"current_steps": 130, "total_steps": 1772, "loss": 0.6184, "lr": 5e-06, "epoch": 0.2933709449929478, "percentage": 7.34, "elapsed_time": "1:38:56", "remaining_time": "20:49:37"}
{"current_steps": 140, "total_steps": 1772, "loss": 0.6134, "lr": 5e-06, "epoch": 0.3159379407616361, "percentage": 7.9, "elapsed_time": "1:46:32", "remaining_time": "20:42:00"}
{"current_steps": 150, "total_steps": 1772, "loss": 0.6135, "lr": 5e-06, "epoch": 0.3385049365303244, "percentage": 8.47, "elapsed_time": "1:54:09", "remaining_time": "20:34:24"}
{"current_steps": 160, "total_steps": 1772, "loss": 0.6163, "lr": 5e-06, "epoch": 0.3610719322990127, "percentage": 9.03, "elapsed_time": "2:01:45", "remaining_time": "20:26:46"}
{"current_steps": 170, "total_steps": 1772, "loss": 0.6124, "lr": 5e-06, "epoch": 0.383638928067701, "percentage": 9.59, "elapsed_time": "2:09:22", "remaining_time": "20:19:09"}
{"current_steps": 180, "total_steps": 1772, "loss": 0.6086, "lr": 5e-06, "epoch": 0.40620592383638926, "percentage": 10.16, "elapsed_time": "2:16:59", "remaining_time": "20:11:33"}
{"current_steps": 190, "total_steps": 1772, "loss": 0.6034, "lr": 5e-06, "epoch": 0.4287729196050776, "percentage": 10.72, "elapsed_time": "2:24:35", "remaining_time": "20:03:56"}
{"current_steps": 200, "total_steps": 1772, "loss": 0.6094, "lr": 5e-06, "epoch": 0.4513399153737659, "percentage": 11.29, "elapsed_time": "2:32:12", "remaining_time": "19:56:20"}
{"current_steps": 210, "total_steps": 1772, "loss": 0.606, "lr": 5e-06, "epoch": 0.47390691114245415, "percentage": 11.85, "elapsed_time": "2:39:48", "remaining_time": "19:48:42"}
{"current_steps": 220, "total_steps": 1772, "loss": 0.5985, "lr": 5e-06, "epoch": 0.49647390691114246, "percentage": 12.42, "elapsed_time": "2:47:25", "remaining_time": "19:41:03"}
{"current_steps": 230, "total_steps": 1772, "loss": 0.6074, "lr": 5e-06, "epoch": 0.5190409026798307, "percentage": 12.98, "elapsed_time": "2:55:01", "remaining_time": "19:33:26"}
{"current_steps": 240, "total_steps": 1772, "loss": 0.6056, "lr": 5e-06, "epoch": 0.5416078984485191, "percentage": 13.54, "elapsed_time": "3:02:38", "remaining_time": "19:25:49"}
{"current_steps": 250, "total_steps": 1772, "loss": 0.5998, "lr": 5e-06, "epoch": 0.5641748942172073, "percentage": 14.11, "elapsed_time": "3:10:14", "remaining_time": "19:18:11"}
{"current_steps": 260, "total_steps": 1772, "loss": 0.6029, "lr": 5e-06, "epoch": 0.5867418899858956, "percentage": 14.67, "elapsed_time": "3:17:50", "remaining_time": "19:10:34"}
{"current_steps": 270, "total_steps": 1772, "loss": 0.5982, "lr": 5e-06, "epoch": 0.609308885754584, "percentage": 15.24, "elapsed_time": "3:25:27", "remaining_time": "19:02:57"}
{"current_steps": 280, "total_steps": 1772, "loss": 0.6043, "lr": 5e-06, "epoch": 0.6318758815232722, "percentage": 15.8, "elapsed_time": "3:33:04", "remaining_time": "18:55:20"}
{"current_steps": 290, "total_steps": 1772, "loss": 0.5979, "lr": 5e-06, "epoch": 0.6544428772919605, "percentage": 16.37, "elapsed_time": "3:40:40", "remaining_time": "18:47:43"}
{"current_steps": 300, "total_steps": 1772, "loss": 0.6042, "lr": 5e-06, "epoch": 0.6770098730606487, "percentage": 16.93, "elapsed_time": "3:48:16", "remaining_time": "18:40:05"}
{"current_steps": 310, "total_steps": 1772, "loss": 0.5972, "lr": 5e-06, "epoch": 0.6995768688293371, "percentage": 17.49, "elapsed_time": "3:55:53", "remaining_time": "18:32:28"}
{"current_steps": 320, "total_steps": 1772, "loss": 0.6006, "lr": 5e-06, "epoch": 0.7221438645980254, "percentage": 18.06, "elapsed_time": "4:03:29", "remaining_time": "18:24:50"}
{"current_steps": 330, "total_steps": 1772, "loss": 0.5962, "lr": 5e-06, "epoch": 0.7447108603667136, "percentage": 18.62, "elapsed_time": "4:11:06", "remaining_time": "18:17:14"}
{"current_steps": 340, "total_steps": 1772, "loss": 0.5956, "lr": 5e-06, "epoch": 0.767277856135402, "percentage": 19.19, "elapsed_time": "4:18:42", "remaining_time": "18:09:37"}
{"current_steps": 350, "total_steps": 1772, "loss": 0.5968, "lr": 5e-06, "epoch": 0.7898448519040903, "percentage": 19.75, "elapsed_time": "4:26:19", "remaining_time": "18:02:00"}
{"current_steps": 360, "total_steps": 1772, "loss": 0.596, "lr": 5e-06, "epoch": 0.8124118476727785, "percentage": 20.32, "elapsed_time": "4:33:55", "remaining_time": "17:54:24"}
{"current_steps": 370, "total_steps": 1772, "loss": 0.6027, "lr": 5e-06, "epoch": 0.8349788434414669, "percentage": 20.88, "elapsed_time": "4:41:32", "remaining_time": "17:46:48"}
{"current_steps": 380, "total_steps": 1772, "loss": 0.5999, "lr": 5e-06, "epoch": 0.8575458392101551, "percentage": 21.44, "elapsed_time": "4:49:08", "remaining_time": "17:39:11"}
{"current_steps": 390, "total_steps": 1772, "loss": 0.5941, "lr": 5e-06, "epoch": 0.8801128349788434, "percentage": 22.01, "elapsed_time": "4:56:45", "remaining_time": "17:31:34"}
{"current_steps": 400, "total_steps": 1772, "loss": 0.5962, "lr": 5e-06, "epoch": 0.9026798307475318, "percentage": 22.57, "elapsed_time": "5:04:21", "remaining_time": "17:23:57"}
{"current_steps": 410, "total_steps": 1772, "loss": 0.5939, "lr": 5e-06, "epoch": 0.92524682651622, "percentage": 23.14, "elapsed_time": "5:11:58", "remaining_time": "17:16:21"}
{"current_steps": 420, "total_steps": 1772, "loss": 0.5944, "lr": 5e-06, "epoch": 0.9478138222849083, "percentage": 23.7, "elapsed_time": "5:19:34", "remaining_time": "17:08:44"}
{"current_steps": 430, "total_steps": 1772, "loss": 0.5887, "lr": 5e-06, "epoch": 0.9703808180535967, "percentage": 24.27, "elapsed_time": "5:27:11", "remaining_time": "17:01:08"}
{"current_steps": 440, "total_steps": 1772, "loss": 0.5935, "lr": 5e-06, "epoch": 0.9929478138222849, "percentage": 24.83, "elapsed_time": "5:34:47", "remaining_time": "16:53:31"}
{"current_steps": 443, "total_steps": 1772, "eval_loss": 0.5913009643554688, "epoch": 0.9997179125528914, "percentage": 25.0, "elapsed_time": "5:43:10", "remaining_time": "17:09:30"}
{"current_steps": 450, "total_steps": 1772, "loss": 0.6021, "lr": 5e-06, "epoch": 1.0155148095909732, "percentage": 25.4, "elapsed_time": "5:49:27", "remaining_time": "17:06:38"}
{"current_steps": 460, "total_steps": 1772, "loss": 0.528, "lr": 5e-06, "epoch": 1.0380818053596614, "percentage": 25.96, "elapsed_time": "5:57:04", "remaining_time": "16:58:26"}
{"current_steps": 470, "total_steps": 1772, "loss": 0.543, "lr": 5e-06, "epoch": 1.0606488011283497, "percentage": 26.52, "elapsed_time": "6:04:40", "remaining_time": "16:50:14"}
{"current_steps": 480, "total_steps": 1772, "loss": 0.5306, "lr": 5e-06, "epoch": 1.0832157968970382, "percentage": 27.09, "elapsed_time": "6:12:17", "remaining_time": "16:42:05"}
{"current_steps": 490, "total_steps": 1772, "loss": 0.541, "lr": 5e-06, "epoch": 1.1057827926657264, "percentage": 27.65, "elapsed_time": "6:19:54", "remaining_time": "16:33:57"}
{"current_steps": 500, "total_steps": 1772, "loss": 0.5393, "lr": 5e-06, "epoch": 1.1283497884344147, "percentage": 28.22, "elapsed_time": "6:27:31", "remaining_time": "16:25:51"}
{"current_steps": 510, "total_steps": 1772, "loss": 0.5409, "lr": 5e-06, "epoch": 1.150916784203103, "percentage": 28.78, "elapsed_time": "6:35:08", "remaining_time": "16:17:46"}
{"current_steps": 520, "total_steps": 1772, "loss": 0.5336, "lr": 5e-06, "epoch": 1.1734837799717912, "percentage": 29.35, "elapsed_time": "6:42:45", "remaining_time": "16:09:42"}
{"current_steps": 530, "total_steps": 1772, "loss": 0.5393, "lr": 5e-06, "epoch": 1.1960507757404795, "percentage": 29.91, "elapsed_time": "6:50:21", "remaining_time": "16:01:39"}
{"current_steps": 540, "total_steps": 1772, "loss": 0.5371, "lr": 5e-06, "epoch": 1.2186177715091677, "percentage": 30.47, "elapsed_time": "6:57:58", "remaining_time": "15:53:36"}
{"current_steps": 550, "total_steps": 1772, "loss": 0.5422, "lr": 5e-06, "epoch": 1.2411847672778562, "percentage": 31.04, "elapsed_time": "7:05:34", "remaining_time": "15:45:34"}
{"current_steps": 560, "total_steps": 1772, "loss": 0.5438, "lr": 5e-06, "epoch": 1.2637517630465445, "percentage": 31.6, "elapsed_time": "7:13:11", "remaining_time": "15:37:32"}
{"current_steps": 570, "total_steps": 1772, "loss": 0.5415, "lr": 5e-06, "epoch": 1.2863187588152327, "percentage": 32.17, "elapsed_time": "7:20:48", "remaining_time": "15:29:32"}
{"current_steps": 580, "total_steps": 1772, "loss": 0.5409, "lr": 5e-06, "epoch": 1.308885754583921, "percentage": 32.73, "elapsed_time": "7:28:24", "remaining_time": "15:21:33"}
{"current_steps": 590, "total_steps": 1772, "loss": 0.5409, "lr": 5e-06, "epoch": 1.3314527503526092, "percentage": 33.3, "elapsed_time": "7:36:01", "remaining_time": "15:13:35"}
{"current_steps": 600, "total_steps": 1772, "loss": 0.5463, "lr": 5e-06, "epoch": 1.3540197461212977, "percentage": 33.86, "elapsed_time": "7:43:37", "remaining_time": "15:05:37"}
{"current_steps": 610, "total_steps": 1772, "loss": 0.539, "lr": 5e-06, "epoch": 1.376586741889986, "percentage": 34.42, "elapsed_time": "7:51:14", "remaining_time": "14:57:40"}
{"current_steps": 620, "total_steps": 1772, "loss": 0.543, "lr": 5e-06, "epoch": 1.3991537376586742, "percentage": 34.99, "elapsed_time": "7:58:51", "remaining_time": "14:49:44"}
{"current_steps": 630, "total_steps": 1772, "loss": 0.5411, "lr": 5e-06, "epoch": 1.4217207334273625, "percentage": 35.55, "elapsed_time": "8:06:27", "remaining_time": "14:41:48"}
{"current_steps": 640, "total_steps": 1772, "loss": 0.5407, "lr": 5e-06, "epoch": 1.4442877291960508, "percentage": 36.12, "elapsed_time": "8:14:04", "remaining_time": "14:33:53"}
{"current_steps": 650, "total_steps": 1772, "loss": 0.5418, "lr": 5e-06, "epoch": 1.466854724964739, "percentage": 36.68, "elapsed_time": "8:21:41", "remaining_time": "14:25:59"}
{"current_steps": 660, "total_steps": 1772, "loss": 0.5456, "lr": 5e-06, "epoch": 1.4894217207334273, "percentage": 37.25, "elapsed_time": "8:29:17", "remaining_time": "14:18:05"}
{"current_steps": 670, "total_steps": 1772, "loss": 0.5405, "lr": 5e-06, "epoch": 1.5119887165021155, "percentage": 37.81, "elapsed_time": "8:36:54", "remaining_time": "14:10:11"}
{"current_steps": 680, "total_steps": 1772, "loss": 0.5345, "lr": 5e-06, "epoch": 1.5345557122708038, "percentage": 38.37, "elapsed_time": "8:44:30", "remaining_time": "14:02:18"}
{"current_steps": 690, "total_steps": 1772, "loss": 0.537, "lr": 5e-06, "epoch": 1.5571227080394923, "percentage": 38.94, "elapsed_time": "8:52:07", "remaining_time": "13:54:25"}
{"current_steps": 700, "total_steps": 1772, "loss": 0.5407, "lr": 5e-06, "epoch": 1.5796897038081805, "percentage": 39.5, "elapsed_time": "8:59:44", "remaining_time": "13:46:33"}
{"current_steps": 710, "total_steps": 1772, "loss": 0.5437, "lr": 5e-06, "epoch": 1.6022566995768688, "percentage": 40.07, "elapsed_time": "9:07:20", "remaining_time": "13:38:42"}
{"current_steps": 720, "total_steps": 1772, "loss": 0.5516, "lr": 5e-06, "epoch": 1.6248236953455573, "percentage": 40.63, "elapsed_time": "9:14:57", "remaining_time": "13:30:51"}
{"current_steps": 730, "total_steps": 1772, "loss": 0.5347, "lr": 5e-06, "epoch": 1.6473906911142455, "percentage": 41.2, "elapsed_time": "9:22:33", "remaining_time": "13:23:00"}
{"current_steps": 740, "total_steps": 1772, "loss": 0.5506, "lr": 5e-06, "epoch": 1.6699576868829338, "percentage": 41.76, "elapsed_time": "9:30:10", "remaining_time": "13:15:09"}
{"current_steps": 750, "total_steps": 1772, "loss": 0.5424, "lr": 5e-06, "epoch": 1.692524682651622, "percentage": 42.33, "elapsed_time": "9:37:46", "remaining_time": "13:07:19"}
{"current_steps": 760, "total_steps": 1772, "loss": 0.5412, "lr": 5e-06, "epoch": 1.7150916784203103, "percentage": 42.89, "elapsed_time": "9:45:23", "remaining_time": "12:59:29"}
{"current_steps": 770, "total_steps": 1772, "loss": 0.5414, "lr": 5e-06, "epoch": 1.7376586741889986, "percentage": 43.45, "elapsed_time": "9:52:59", "remaining_time": "12:51:39"}
{"current_steps": 780, "total_steps": 1772, "loss": 0.5372, "lr": 5e-06, "epoch": 1.7602256699576868, "percentage": 44.02, "elapsed_time": "10:00:36", "remaining_time": "12:43:50"}
{"current_steps": 790, "total_steps": 1772, "loss": 0.5411, "lr": 5e-06, "epoch": 1.782792665726375, "percentage": 44.58, "elapsed_time": "10:08:13", "remaining_time": "12:36:02"}
{"current_steps": 800, "total_steps": 1772, "loss": 0.5364, "lr": 5e-06, "epoch": 1.8053596614950633, "percentage": 45.15, "elapsed_time": "10:15:49", "remaining_time": "12:28:13"}
{"current_steps": 810, "total_steps": 1772, "loss": 0.5471, "lr": 5e-06, "epoch": 1.8279266572637518, "percentage": 45.71, "elapsed_time": "10:23:26", "remaining_time": "12:20:26"}
{"current_steps": 820, "total_steps": 1772, "loss": 0.5343, "lr": 5e-06, "epoch": 1.85049365303244, "percentage": 46.28, "elapsed_time": "10:31:03", "remaining_time": "12:12:38"}
{"current_steps": 830, "total_steps": 1772, "loss": 0.546, "lr": 5e-06, "epoch": 1.8730606488011283, "percentage": 46.84, "elapsed_time": "10:38:39", "remaining_time": "12:04:50"}
{"current_steps": 840, "total_steps": 1772, "loss": 0.5461, "lr": 5e-06, "epoch": 1.8956276445698168, "percentage": 47.4, "elapsed_time": "10:46:16", "remaining_time": "11:57:03"}
{"current_steps": 850, "total_steps": 1772, "loss": 0.5342, "lr": 5e-06, "epoch": 1.918194640338505, "percentage": 47.97, "elapsed_time": "10:53:53", "remaining_time": "11:49:16"}
{"current_steps": 860, "total_steps": 1772, "loss": 0.5391, "lr": 5e-06, "epoch": 1.9407616361071933, "percentage": 48.53, "elapsed_time": "11:01:29", "remaining_time": "11:41:29"}
{"current_steps": 870, "total_steps": 1772, "loss": 0.5457, "lr": 5e-06, "epoch": 1.9633286318758816, "percentage": 49.1, "elapsed_time": "11:09:06", "remaining_time": "11:33:42"}
{"current_steps": 880, "total_steps": 1772, "loss": 0.5435, "lr": 5e-06, "epoch": 1.9858956276445698, "percentage": 49.66, "elapsed_time": "11:16:42", "remaining_time": "11:25:56"}
{"current_steps": 886, "total_steps": 1772, "eval_loss": 0.5871431231498718, "epoch": 1.9994358251057829, "percentage": 50.0, "elapsed_time": "11:27:27", "remaining_time": "11:27:27"}
{"current_steps": 890, "total_steps": 1772, "loss": 0.5672, "lr": 5e-06, "epoch": 2.008462623413258, "percentage": 50.23, "elapsed_time": "11:31:28", "remaining_time": "11:25:15"}
{"current_steps": 900, "total_steps": 1772, "loss": 0.4779, "lr": 5e-06, "epoch": 2.0310296191819464, "percentage": 50.79, "elapsed_time": "11:39:04", "remaining_time": "11:17:19"}
{"current_steps": 910, "total_steps": 1772, "loss": 0.4796, "lr": 5e-06, "epoch": 2.0535966149506346, "percentage": 51.35, "elapsed_time": "11:46:41", "remaining_time": "11:09:24"}
{"current_steps": 920, "total_steps": 1772, "loss": 0.472, "lr": 5e-06, "epoch": 2.076163610719323, "percentage": 51.92, "elapsed_time": "11:54:18", "remaining_time": "11:01:30"}
{"current_steps": 930, "total_steps": 1772, "loss": 0.4792, "lr": 5e-06, "epoch": 2.098730606488011, "percentage": 52.48, "elapsed_time": "12:01:55", "remaining_time": "10:53:36"}
{"current_steps": 940, "total_steps": 1772, "loss": 0.4838, "lr": 5e-06, "epoch": 2.1212976022566994, "percentage": 53.05, "elapsed_time": "12:09:31", "remaining_time": "10:45:42"}
{"current_steps": 950, "total_steps": 1772, "loss": 0.4855, "lr": 5e-06, "epoch": 2.143864598025388, "percentage": 53.61, "elapsed_time": "12:17:08", "remaining_time": "10:37:49"}
{"current_steps": 960, "total_steps": 1772, "loss": 0.482, "lr": 5e-06, "epoch": 2.1664315937940763, "percentage": 54.18, "elapsed_time": "12:24:44", "remaining_time": "10:29:56"}
{"current_steps": 970, "total_steps": 1772, "loss": 0.48, "lr": 5e-06, "epoch": 2.1889985895627646, "percentage": 54.74, "elapsed_time": "12:32:21", "remaining_time": "10:22:03"}
{"current_steps": 980, "total_steps": 1772, "loss": 0.475, "lr": 5e-06, "epoch": 2.211565585331453, "percentage": 55.3, "elapsed_time": "12:39:57", "remaining_time": "10:14:10"}
{"current_steps": 990, "total_steps": 1772, "loss": 0.4748, "lr": 5e-06, "epoch": 2.234132581100141, "percentage": 55.87, "elapsed_time": "12:47:34", "remaining_time": "10:06:18"}
{"current_steps": 1000, "total_steps": 1772, "loss": 0.4839, "lr": 5e-06, "epoch": 2.2566995768688294, "percentage": 56.43, "elapsed_time": "12:55:11", "remaining_time": "9:58:26"}
{"current_steps": 1010, "total_steps": 1772, "loss": 0.4907, "lr": 5e-06, "epoch": 2.2792665726375176, "percentage": 57.0, "elapsed_time": "13:02:47", "remaining_time": "9:50:35"}
{"current_steps": 1020, "total_steps": 1772, "loss": 0.485, "lr": 5e-06, "epoch": 2.301833568406206, "percentage": 57.56, "elapsed_time": "13:10:24", "remaining_time": "9:42:44"}
{"current_steps": 1030, "total_steps": 1772, "loss": 0.489, "lr": 5e-06, "epoch": 2.324400564174894, "percentage": 58.13, "elapsed_time": "13:18:01", "remaining_time": "9:34:53"}
{"current_steps": 1040, "total_steps": 1772, "loss": 0.487, "lr": 5e-06, "epoch": 2.3469675599435824, "percentage": 58.69, "elapsed_time": "13:25:38", "remaining_time": "9:27:03"}
{"current_steps": 1050, "total_steps": 1772, "loss": 0.4849, "lr": 5e-06, "epoch": 2.3695345557122707, "percentage": 59.26, "elapsed_time": "13:33:15", "remaining_time": "9:19:12"}
{"current_steps": 1060, "total_steps": 1772, "loss": 0.4844, "lr": 5e-06, "epoch": 2.392101551480959, "percentage": 59.82, "elapsed_time": "13:40:52", "remaining_time": "9:11:22"}
{"current_steps": 1070, "total_steps": 1772, "loss": 0.4848, "lr": 5e-06, "epoch": 2.414668547249647, "percentage": 60.38, "elapsed_time": "13:48:29", "remaining_time": "9:03:33"}
{"current_steps": 1080, "total_steps": 1772, "loss": 0.4883, "lr": 5e-06, "epoch": 2.4372355430183354, "percentage": 60.95, "elapsed_time": "13:56:05", "remaining_time": "8:55:43"}
{"current_steps": 1090, "total_steps": 1772, "loss": 0.4863, "lr": 5e-06, "epoch": 2.459802538787024, "percentage": 61.51, "elapsed_time": "14:03:42", "remaining_time": "8:47:54"}
{"current_steps": 1100, "total_steps": 1772, "loss": 0.4896, "lr": 5e-06, "epoch": 2.4823695345557124, "percentage": 62.08, "elapsed_time": "14:11:19", "remaining_time": "8:40:04"}
{"current_steps": 1110, "total_steps": 1772, "loss": 0.4922, "lr": 5e-06, "epoch": 2.5049365303244007, "percentage": 62.64, "elapsed_time": "14:18:56", "remaining_time": "8:32:15"}
{"current_steps": 1120, "total_steps": 1772, "loss": 0.4838, "lr": 5e-06, "epoch": 2.527503526093089, "percentage": 63.21, "elapsed_time": "14:26:32", "remaining_time": "8:24:27"}
{"current_steps": 1130, "total_steps": 1772, "loss": 0.4883, "lr": 5e-06, "epoch": 2.550070521861777, "percentage": 63.77, "elapsed_time": "14:34:09", "remaining_time": "8:16:38"}
{"current_steps": 1140, "total_steps": 1772, "loss": 0.4871, "lr": 5e-06, "epoch": 2.5726375176304654, "percentage": 64.33, "elapsed_time": "14:41:46", "remaining_time": "8:08:50"}
{"current_steps": 1150, "total_steps": 1772, "loss": 0.4877, "lr": 5e-06, "epoch": 2.5952045133991537, "percentage": 64.9, "elapsed_time": "14:49:22", "remaining_time": "8:01:02"}
{"current_steps": 1160, "total_steps": 1772, "loss": 0.4915, "lr": 5e-06, "epoch": 2.617771509167842, "percentage": 65.46, "elapsed_time": "14:56:59", "remaining_time": "7:53:14"}
{"current_steps": 1170, "total_steps": 1772, "loss": 0.4809, "lr": 5e-06, "epoch": 2.64033850493653, "percentage": 66.03, "elapsed_time": "15:04:36", "remaining_time": "7:45:26"}
{"current_steps": 1180, "total_steps": 1772, "loss": 0.4913, "lr": 5e-06, "epoch": 2.6629055007052185, "percentage": 66.59, "elapsed_time": "15:12:12", "remaining_time": "7:37:39"}
{"current_steps": 1190, "total_steps": 1772, "loss": 0.4919, "lr": 5e-06, "epoch": 2.685472496473907, "percentage": 67.16, "elapsed_time": "15:19:49", "remaining_time": "7:29:51"}
{"current_steps": 1200, "total_steps": 1772, "loss": 0.4901, "lr": 5e-06, "epoch": 2.7080394922425954, "percentage": 67.72, "elapsed_time": "15:27:26", "remaining_time": "7:22:04"}
{"current_steps": 1210, "total_steps": 1772, "loss": 0.4913, "lr": 5e-06, "epoch": 2.7306064880112837, "percentage": 68.28, "elapsed_time": "15:35:02", "remaining_time": "7:14:17"}
{"current_steps": 1220, "total_steps": 1772, "loss": 0.4884, "lr": 5e-06, "epoch": 2.753173483779972, "percentage": 68.85, "elapsed_time": "15:42:39", "remaining_time": "7:06:30"}
{"current_steps": 1230, "total_steps": 1772, "loss": 0.4945, "lr": 5e-06, "epoch": 2.77574047954866, "percentage": 69.41, "elapsed_time": "15:50:16", "remaining_time": "6:58:44"}
{"current_steps": 1240, "total_steps": 1772, "loss": 0.4934, "lr": 5e-06, "epoch": 2.7983074753173485, "percentage": 69.98, "elapsed_time": "15:57:52", "remaining_time": "6:50:57"}
{"current_steps": 1250, "total_steps": 1772, "loss": 0.4933, "lr": 5e-06, "epoch": 2.8208744710860367, "percentage": 70.54, "elapsed_time": "16:05:29", "remaining_time": "6:43:11"}
{"current_steps": 1260, "total_steps": 1772, "loss": 0.495, "lr": 5e-06, "epoch": 2.843441466854725, "percentage": 71.11, "elapsed_time": "16:13:06", "remaining_time": "6:35:25"}
{"current_steps": 1270, "total_steps": 1772, "loss": 0.4919, "lr": 5e-06, "epoch": 2.8660084626234132, "percentage": 71.67, "elapsed_time": "16:20:43", "remaining_time": "6:27:39"}
{"current_steps": 1280, "total_steps": 1772, "loss": 0.4901, "lr": 5e-06, "epoch": 2.8885754583921015, "percentage": 72.23, "elapsed_time": "16:28:19", "remaining_time": "6:19:53"}
{"current_steps": 1290, "total_steps": 1772, "loss": 0.4938, "lr": 5e-06, "epoch": 2.9111424541607898, "percentage": 72.8, "elapsed_time": "16:35:56", "remaining_time": "6:12:07"}
{"current_steps": 1300, "total_steps": 1772, "loss": 0.4889, "lr": 5e-06, "epoch": 2.933709449929478, "percentage": 73.36, "elapsed_time": "16:43:33", "remaining_time": "6:04:22"}
{"current_steps": 1310, "total_steps": 1772, "loss": 0.493, "lr": 5e-06, "epoch": 2.9562764456981663, "percentage": 73.93, "elapsed_time": "16:51:10", "remaining_time": "5:56:36"}
{"current_steps": 1320, "total_steps": 1772, "loss": 0.4899, "lr": 5e-06, "epoch": 2.9788434414668545, "percentage": 74.49, "elapsed_time": "16:58:47", "remaining_time": "5:48:51"}
{"current_steps": 1329, "total_steps": 1772, "eval_loss": 0.6006776094436646, "epoch": 2.9991537376586743, "percentage": 75.0, "elapsed_time": "17:11:53", "remaining_time": "5:43:57"}
{"current_steps": 1330, "total_steps": 1772, "loss": 0.5325, "lr": 5e-06, "epoch": 3.0014104372355432, "percentage": 75.06, "elapsed_time": "17:13:32", "remaining_time": "5:43:28"}
{"current_steps": 1340, "total_steps": 1772, "loss": 0.4154, "lr": 5e-06, "epoch": 3.0239774330042315, "percentage": 75.62, "elapsed_time": "17:21:10", "remaining_time": "5:35:39"}
{"current_steps": 1350, "total_steps": 1772, "loss": 0.4178, "lr": 5e-06, "epoch": 3.0465444287729198, "percentage": 76.19, "elapsed_time": "17:28:47", "remaining_time": "5:27:50"}
{"current_steps": 1360, "total_steps": 1772, "loss": 0.4179, "lr": 5e-06, "epoch": 3.069111424541608, "percentage": 76.75, "elapsed_time": "17:36:24", "remaining_time": "5:20:01"}
{"current_steps": 1370, "total_steps": 1772, "loss": 0.4199, "lr": 5e-06, "epoch": 3.0916784203102963, "percentage": 77.31, "elapsed_time": "17:44:01", "remaining_time": "5:12:12"}
{"current_steps": 1380, "total_steps": 1772, "loss": 0.4172, "lr": 5e-06, "epoch": 3.1142454160789845, "percentage": 77.88, "elapsed_time": "17:51:38", "remaining_time": "5:04:24"}
{"current_steps": 1390, "total_steps": 1772, "loss": 0.4196, "lr": 5e-06, "epoch": 3.136812411847673, "percentage": 78.44, "elapsed_time": "17:59:15", "remaining_time": "4:56:36"}
{"current_steps": 1400, "total_steps": 1772, "loss": 0.4217, "lr": 5e-06, "epoch": 3.159379407616361, "percentage": 79.01, "elapsed_time": "18:06:52", "remaining_time": "4:48:47"}
{"current_steps": 1410, "total_steps": 1772, "loss": 0.4228, "lr": 5e-06, "epoch": 3.1819464033850493, "percentage": 79.57, "elapsed_time": "18:14:29", "remaining_time": "4:40:59"}
{"current_steps": 1420, "total_steps": 1772, "loss": 0.4264, "lr": 5e-06, "epoch": 3.2045133991537376, "percentage": 80.14, "elapsed_time": "18:22:06", "remaining_time": "4:33:12"}
{"current_steps": 1430, "total_steps": 1772, "loss": 0.4236, "lr": 5e-06, "epoch": 3.227080394922426, "percentage": 80.7, "elapsed_time": "18:29:43", "remaining_time": "4:25:24"}
{"current_steps": 1440, "total_steps": 1772, "loss": 0.429, "lr": 5e-06, "epoch": 3.249647390691114, "percentage": 81.26, "elapsed_time": "18:37:20", "remaining_time": "4:17:36"}
{"current_steps": 1450, "total_steps": 1772, "loss": 0.4315, "lr": 5e-06, "epoch": 3.272214386459803, "percentage": 81.83, "elapsed_time": "18:44:57", "remaining_time": "4:09:49"}
{"current_steps": 1460, "total_steps": 1772, "loss": 0.4311, "lr": 5e-06, "epoch": 3.294781382228491, "percentage": 82.39, "elapsed_time": "18:52:34", "remaining_time": "4:02:01"}
{"current_steps": 1470, "total_steps": 1772, "loss": 0.429, "lr": 5e-06, "epoch": 3.3173483779971793, "percentage": 82.96, "elapsed_time": "19:00:11", "remaining_time": "3:54:14"}
{"current_steps": 1480, "total_steps": 1772, "loss": 0.4268, "lr": 5e-06, "epoch": 3.3399153737658676, "percentage": 83.52, "elapsed_time": "19:07:49", "remaining_time": "3:46:27"}
{"current_steps": 1490, "total_steps": 1772, "loss": 0.4291, "lr": 5e-06, "epoch": 3.362482369534556, "percentage": 84.09, "elapsed_time": "19:15:26", "remaining_time": "3:38:40"}
{"current_steps": 1500, "total_steps": 1772, "loss": 0.4369, "lr": 5e-06, "epoch": 3.385049365303244, "percentage": 84.65, "elapsed_time": "19:23:03", "remaining_time": "3:30:54"}
{"current_steps": 1510, "total_steps": 1772, "loss": 0.4289, "lr": 5e-06, "epoch": 3.4076163610719323, "percentage": 85.21, "elapsed_time": "19:30:40", "remaining_time": "3:23:07"}
{"current_steps": 1520, "total_steps": 1772, "loss": 0.4285, "lr": 5e-06, "epoch": 3.4301833568406206, "percentage": 85.78, "elapsed_time": "19:38:16", "remaining_time": "3:15:20"}
{"current_steps": 1530, "total_steps": 1772, "loss": 0.4339, "lr": 5e-06, "epoch": 3.452750352609309, "percentage": 86.34, "elapsed_time": "19:45:54", "remaining_time": "3:07:34"}
{"current_steps": 1540, "total_steps": 1772, "loss": 0.4298, "lr": 5e-06, "epoch": 3.475317348377997, "percentage": 86.91, "elapsed_time": "19:53:31", "remaining_time": "2:59:48"}
{"current_steps": 1550, "total_steps": 1772, "loss": 0.4367, "lr": 5e-06, "epoch": 3.4978843441466854, "percentage": 87.47, "elapsed_time": "20:01:08", "remaining_time": "2:52:02"}
{"current_steps": 1560, "total_steps": 1772, "loss": 0.4333, "lr": 5e-06, "epoch": 3.5204513399153736, "percentage": 88.04, "elapsed_time": "20:08:45", "remaining_time": "2:44:16"}
{"current_steps": 1570, "total_steps": 1772, "loss": 0.4381, "lr": 5e-06, "epoch": 3.543018335684062, "percentage": 88.6, "elapsed_time": "20:16:22", "remaining_time": "2:36:30"}
{"current_steps": 1580, "total_steps": 1772, "loss": 0.4287, "lr": 5e-06, "epoch": 3.56558533145275, "percentage": 89.16, "elapsed_time": "20:23:59", "remaining_time": "2:28:44"}
{"current_steps": 1590, "total_steps": 1772, "loss": 0.4288, "lr": 5e-06, "epoch": 3.5881523272214384, "percentage": 89.73, "elapsed_time": "20:31:36", "remaining_time": "2:20:58"}
{"current_steps": 1600, "total_steps": 1772, "loss": 0.4322, "lr": 5e-06, "epoch": 3.610719322990127, "percentage": 90.29, "elapsed_time": "20:39:13", "remaining_time": "2:13:12"}
{"current_steps": 1610, "total_steps": 1772, "loss": 0.4353, "lr": 5e-06, "epoch": 3.6332863187588154, "percentage": 90.86, "elapsed_time": "20:46:50", "remaining_time": "2:05:27"}
{"current_steps": 1620, "total_steps": 1772, "loss": 0.4321, "lr": 5e-06, "epoch": 3.6558533145275036, "percentage": 91.42, "elapsed_time": "20:54:27", "remaining_time": "1:57:42"}
{"current_steps": 1630, "total_steps": 1772, "loss": 0.4352, "lr": 5e-06, "epoch": 3.678420310296192, "percentage": 91.99, "elapsed_time": "21:02:04", "remaining_time": "1:49:56"}
{"current_steps": 1640, "total_steps": 1772, "loss": 0.436, "lr": 5e-06, "epoch": 3.70098730606488, "percentage": 92.55, "elapsed_time": "21:09:41", "remaining_time": "1:42:11"}
{"current_steps": 1650, "total_steps": 1772, "loss": 0.4321, "lr": 5e-06, "epoch": 3.7235543018335684, "percentage": 93.12, "elapsed_time": "21:17:18", "remaining_time": "1:34:26"}
{"current_steps": 1660, "total_steps": 1772, "loss": 0.4308, "lr": 5e-06, "epoch": 3.7461212976022567, "percentage": 93.68, "elapsed_time": "21:24:55", "remaining_time": "1:26:41"}
{"current_steps": 1670, "total_steps": 1772, "loss": 0.4363, "lr": 5e-06, "epoch": 3.768688293370945, "percentage": 94.24, "elapsed_time": "21:32:32", "remaining_time": "1:18:56"}
{"current_steps": 1680, "total_steps": 1772, "loss": 0.4421, "lr": 5e-06, "epoch": 3.791255289139633, "percentage": 94.81, "elapsed_time": "21:40:09", "remaining_time": "1:11:11"}
{"current_steps": 1690, "total_steps": 1772, "loss": 0.4332, "lr": 5e-06, "epoch": 3.8138222849083214, "percentage": 95.37, "elapsed_time": "21:47:46", "remaining_time": "1:03:27"}
{"current_steps": 1700, "total_steps": 1772, "loss": 0.431, "lr": 5e-06, "epoch": 3.83638928067701, "percentage": 95.94, "elapsed_time": "21:55:23", "remaining_time": "0:55:42"}
{"current_steps": 1710, "total_steps": 1772, "loss": 0.4398, "lr": 5e-06, "epoch": 3.8589562764456984, "percentage": 96.5, "elapsed_time": "22:03:00", "remaining_time": "0:47:58"}
{"current_steps": 1720, "total_steps": 1772, "loss": 0.4387, "lr": 5e-06, "epoch": 3.8815232722143866, "percentage": 97.07, "elapsed_time": "22:10:37", "remaining_time": "0:40:13"}
{"current_steps": 1730, "total_steps": 1772, "loss": 0.4365, "lr": 5e-06, "epoch": 3.904090267983075, "percentage": 97.63, "elapsed_time": "22:18:14", "remaining_time": "0:32:29"}
{"current_steps": 1740, "total_steps": 1772, "loss": 0.4373, "lr": 5e-06, "epoch": 3.926657263751763, "percentage": 98.19, "elapsed_time": "22:25:51", "remaining_time": "0:24:45"}
{"current_steps": 1750, "total_steps": 1772, "loss": 0.4366, "lr": 5e-06, "epoch": 3.9492242595204514, "percentage": 98.76, "elapsed_time": "22:33:28", "remaining_time": "0:17:00"}
{"current_steps": 1760, "total_steps": 1772, "loss": 0.4362, "lr": 5e-06, "epoch": 3.9717912552891397, "percentage": 99.32, "elapsed_time": "22:41:06", "remaining_time": "0:09:16"}
{"current_steps": 1770, "total_steps": 1772, "loss": 0.4359, "lr": 5e-06, "epoch": 3.994358251057828, "percentage": 99.89, "elapsed_time": "22:48:43", "remaining_time": "0:01:32"}
{"current_steps": 1772, "total_steps": 1772, "eval_loss": 0.6362190246582031, "epoch": 3.9988716502115658, "percentage": 100.0, "elapsed_time": "22:57:22", "remaining_time": "0:00:00"}
{"current_steps": 1772, "total_steps": 1772, "epoch": 3.9988716502115658, "percentage": 100.0, "elapsed_time": "22:58:41", "remaining_time": "0:00:00"}