hp_ablations_mistral_bsz1024 / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 2
4c2830f verified
{"current_steps": 10, "total_steps": 738, "loss": 0.7469, "learning_rate": 5e-06, "epoch": 0.04050632911392405, "percentage": 1.36, "elapsed_time": "0:09:28", "remaining_time": "11:29:58"}
{"current_steps": 20, "total_steps": 738, "loss": 0.6482, "learning_rate": 5e-06, "epoch": 0.0810126582278481, "percentage": 2.71, "elapsed_time": "0:18:53", "remaining_time": "11:18:03"}
{"current_steps": 30, "total_steps": 738, "loss": 0.6153, "learning_rate": 5e-06, "epoch": 0.12151898734177215, "percentage": 4.07, "elapsed_time": "0:28:17", "remaining_time": "11:07:43"}
{"current_steps": 40, "total_steps": 738, "loss": 0.5992, "learning_rate": 5e-06, "epoch": 0.1620253164556962, "percentage": 5.42, "elapsed_time": "0:37:41", "remaining_time": "10:57:50"}
{"current_steps": 50, "total_steps": 738, "loss": 0.5961, "learning_rate": 5e-06, "epoch": 0.20253164556962025, "percentage": 6.78, "elapsed_time": "0:47:05", "remaining_time": "10:48:02"}
{"current_steps": 60, "total_steps": 738, "loss": 0.5883, "learning_rate": 5e-06, "epoch": 0.2430379746835443, "percentage": 8.13, "elapsed_time": "0:56:29", "remaining_time": "10:38:21"}
{"current_steps": 70, "total_steps": 738, "loss": 0.578, "learning_rate": 5e-06, "epoch": 0.28354430379746837, "percentage": 9.49, "elapsed_time": "1:05:53", "remaining_time": "10:28:45"}
{"current_steps": 80, "total_steps": 738, "loss": 0.581, "learning_rate": 5e-06, "epoch": 0.3240506329113924, "percentage": 10.84, "elapsed_time": "1:15:17", "remaining_time": "10:19:19"}
{"current_steps": 90, "total_steps": 738, "loss": 0.5782, "learning_rate": 5e-06, "epoch": 0.36455696202531646, "percentage": 12.2, "elapsed_time": "1:24:41", "remaining_time": "10:09:49"}
{"current_steps": 100, "total_steps": 738, "loss": 0.5733, "learning_rate": 5e-06, "epoch": 0.4050632911392405, "percentage": 13.55, "elapsed_time": "1:34:05", "remaining_time": "10:00:19"}
{"current_steps": 110, "total_steps": 738, "loss": 0.5698, "learning_rate": 5e-06, "epoch": 0.44556962025316454, "percentage": 14.91, "elapsed_time": "1:43:29", "remaining_time": "9:50:52"}
{"current_steps": 120, "total_steps": 738, "loss": 0.5694, "learning_rate": 5e-06, "epoch": 0.4860759493670886, "percentage": 16.26, "elapsed_time": "1:52:53", "remaining_time": "9:41:25"}
{"current_steps": 130, "total_steps": 738, "loss": 0.5631, "learning_rate": 5e-06, "epoch": 0.5265822784810127, "percentage": 17.62, "elapsed_time": "2:02:17", "remaining_time": "9:31:56"}
{"current_steps": 140, "total_steps": 738, "loss": 0.5631, "learning_rate": 5e-06, "epoch": 0.5670886075949367, "percentage": 18.97, "elapsed_time": "2:11:42", "remaining_time": "9:22:33"}
{"current_steps": 150, "total_steps": 738, "loss": 0.5606, "learning_rate": 5e-06, "epoch": 0.6075949367088608, "percentage": 20.33, "elapsed_time": "2:21:05", "remaining_time": "9:13:05"}
{"current_steps": 160, "total_steps": 738, "loss": 0.5628, "learning_rate": 5e-06, "epoch": 0.6481012658227848, "percentage": 21.68, "elapsed_time": "2:30:29", "remaining_time": "9:03:38"}
{"current_steps": 170, "total_steps": 738, "loss": 0.5594, "learning_rate": 5e-06, "epoch": 0.6886075949367089, "percentage": 23.04, "elapsed_time": "2:39:53", "remaining_time": "8:54:12"}
{"current_steps": 180, "total_steps": 738, "loss": 0.5612, "learning_rate": 5e-06, "epoch": 0.7291139240506329, "percentage": 24.39, "elapsed_time": "2:49:16", "remaining_time": "8:44:45"}
{"current_steps": 190, "total_steps": 738, "loss": 0.5534, "learning_rate": 5e-06, "epoch": 0.769620253164557, "percentage": 25.75, "elapsed_time": "2:58:41", "remaining_time": "8:35:21"}
{"current_steps": 200, "total_steps": 738, "loss": 0.5553, "learning_rate": 5e-06, "epoch": 0.810126582278481, "percentage": 27.1, "elapsed_time": "3:08:05", "remaining_time": "8:25:58"}
{"current_steps": 210, "total_steps": 738, "loss": 0.5542, "learning_rate": 5e-06, "epoch": 0.850632911392405, "percentage": 28.46, "elapsed_time": "3:17:30", "remaining_time": "8:16:35"}
{"current_steps": 220, "total_steps": 738, "loss": 0.5542, "learning_rate": 5e-06, "epoch": 0.8911392405063291, "percentage": 29.81, "elapsed_time": "3:26:54", "remaining_time": "8:07:10"}
{"current_steps": 230, "total_steps": 738, "loss": 0.5502, "learning_rate": 5e-06, "epoch": 0.9316455696202531, "percentage": 31.17, "elapsed_time": "3:36:18", "remaining_time": "7:57:45"}
{"current_steps": 240, "total_steps": 738, "loss": 0.5522, "learning_rate": 5e-06, "epoch": 0.9721518987341772, "percentage": 32.52, "elapsed_time": "3:45:42", "remaining_time": "7:48:19"}
{"current_steps": 246, "total_steps": 738, "eval_loss": 0.0690777450799942, "epoch": 0.9964556962025316, "percentage": 33.33, "elapsed_time": "3:56:16", "remaining_time": "7:52:32"}
{"current_steps": 250, "total_steps": 738, "loss": 0.5352, "learning_rate": 5e-06, "epoch": 1.0131645569620253, "percentage": 33.88, "elapsed_time": "4:00:14", "remaining_time": "7:48:57"}
{"current_steps": 260, "total_steps": 738, "loss": 0.4945, "learning_rate": 5e-06, "epoch": 1.0536708860759494, "percentage": 35.23, "elapsed_time": "4:09:38", "remaining_time": "7:38:56"}
{"current_steps": 270, "total_steps": 738, "loss": 0.4929, "learning_rate": 5e-06, "epoch": 1.0941772151898734, "percentage": 36.59, "elapsed_time": "4:19:02", "remaining_time": "7:29:00"}
{"current_steps": 280, "total_steps": 738, "loss": 0.4906, "learning_rate": 5e-06, "epoch": 1.1346835443037975, "percentage": 37.94, "elapsed_time": "4:28:27", "remaining_time": "7:19:07"}
{"current_steps": 290, "total_steps": 738, "loss": 0.4965, "learning_rate": 5e-06, "epoch": 1.1751898734177215, "percentage": 39.3, "elapsed_time": "4:37:52", "remaining_time": "7:09:15"}
{"current_steps": 300, "total_steps": 738, "loss": 0.4935, "learning_rate": 5e-06, "epoch": 1.2156962025316456, "percentage": 40.65, "elapsed_time": "4:47:17", "remaining_time": "6:59:26"}
{"current_steps": 310, "total_steps": 738, "loss": 0.4946, "learning_rate": 5e-06, "epoch": 1.2562025316455696, "percentage": 42.01, "elapsed_time": "4:56:41", "remaining_time": "6:49:37"}
{"current_steps": 320, "total_steps": 738, "loss": 0.4952, "learning_rate": 5e-06, "epoch": 1.2967088607594937, "percentage": 43.36, "elapsed_time": "5:06:06", "remaining_time": "6:39:51"}
{"current_steps": 330, "total_steps": 738, "loss": 0.4939, "learning_rate": 5e-06, "epoch": 1.3372151898734177, "percentage": 44.72, "elapsed_time": "5:15:31", "remaining_time": "6:30:06"}
{"current_steps": 340, "total_steps": 738, "loss": 0.4997, "learning_rate": 5e-06, "epoch": 1.3777215189873417, "percentage": 46.07, "elapsed_time": "5:24:55", "remaining_time": "6:20:21"}
{"current_steps": 350, "total_steps": 738, "loss": 0.4956, "learning_rate": 5e-06, "epoch": 1.4182278481012658, "percentage": 47.43, "elapsed_time": "5:34:20", "remaining_time": "6:10:39"}
{"current_steps": 360, "total_steps": 738, "loss": 0.4997, "learning_rate": 5e-06, "epoch": 1.4587341772151898, "percentage": 48.78, "elapsed_time": "5:43:46", "remaining_time": "6:00:57"}
{"current_steps": 370, "total_steps": 738, "loss": 0.4981, "learning_rate": 5e-06, "epoch": 1.4992405063291139, "percentage": 50.14, "elapsed_time": "5:53:11", "remaining_time": "5:51:16"}
{"current_steps": 380, "total_steps": 738, "loss": 0.4981, "learning_rate": 5e-06, "epoch": 1.539746835443038, "percentage": 51.49, "elapsed_time": "6:02:36", "remaining_time": "5:41:36"}
{"current_steps": 390, "total_steps": 738, "loss": 0.4968, "learning_rate": 5e-06, "epoch": 1.5802531645569622, "percentage": 52.85, "elapsed_time": "6:12:00", "remaining_time": "5:31:57"}
{"current_steps": 400, "total_steps": 738, "loss": 0.4977, "learning_rate": 5e-06, "epoch": 1.620759493670886, "percentage": 54.2, "elapsed_time": "6:21:25", "remaining_time": "5:22:18"}
{"current_steps": 410, "total_steps": 738, "loss": 0.4931, "learning_rate": 5e-06, "epoch": 1.6612658227848103, "percentage": 55.56, "elapsed_time": "6:30:50", "remaining_time": "5:12:40"}
{"current_steps": 420, "total_steps": 738, "loss": 0.4937, "learning_rate": 5e-06, "epoch": 1.701772151898734, "percentage": 56.91, "elapsed_time": "6:40:14", "remaining_time": "5:03:02"}
{"current_steps": 430, "total_steps": 738, "loss": 0.496, "learning_rate": 5e-06, "epoch": 1.7422784810126584, "percentage": 58.27, "elapsed_time": "6:49:39", "remaining_time": "4:53:25"}
{"current_steps": 440, "total_steps": 738, "loss": 0.4984, "learning_rate": 5e-06, "epoch": 1.7827848101265822, "percentage": 59.62, "elapsed_time": "6:59:03", "remaining_time": "4:43:49"}
{"current_steps": 450, "total_steps": 738, "loss": 0.4962, "learning_rate": 5e-06, "epoch": 1.8232911392405065, "percentage": 60.98, "elapsed_time": "7:08:28", "remaining_time": "4:34:13"}
{"current_steps": 460, "total_steps": 738, "loss": 0.4963, "learning_rate": 5e-06, "epoch": 1.8637974683544303, "percentage": 62.33, "elapsed_time": "7:17:54", "remaining_time": "4:24:38"}
{"current_steps": 470, "total_steps": 738, "loss": 0.493, "learning_rate": 5e-06, "epoch": 1.9043037974683545, "percentage": 63.69, "elapsed_time": "7:27:19", "remaining_time": "4:15:04"}
{"current_steps": 480, "total_steps": 738, "loss": 0.4994, "learning_rate": 5e-06, "epoch": 1.9448101265822784, "percentage": 65.04, "elapsed_time": "7:36:44", "remaining_time": "4:05:29"}
{"current_steps": 490, "total_steps": 738, "loss": 0.4996, "learning_rate": 5e-06, "epoch": 1.9853164556962026, "percentage": 66.4, "elapsed_time": "7:46:09", "remaining_time": "3:55:55"}
{"current_steps": 493, "total_steps": 738, "eval_loss": 0.06833568215370178, "epoch": 1.9974683544303797, "percentage": 66.8, "elapsed_time": "7:53:36", "remaining_time": "3:55:21"}
{"current_steps": 500, "total_steps": 738, "loss": 0.4488, "learning_rate": 5e-06, "epoch": 2.0263291139240507, "percentage": 67.75, "elapsed_time": "8:00:38", "remaining_time": "3:48:47"}
{"current_steps": 510, "total_steps": 738, "loss": 0.4174, "learning_rate": 5e-06, "epoch": 2.0668354430379745, "percentage": 69.11, "elapsed_time": "8:10:02", "remaining_time": "3:39:04"}
{"current_steps": 520, "total_steps": 738, "loss": 0.4122, "learning_rate": 5e-06, "epoch": 2.1073417721518988, "percentage": 70.46, "elapsed_time": "8:19:26", "remaining_time": "3:29:23"}
{"current_steps": 530, "total_steps": 738, "loss": 0.4176, "learning_rate": 5e-06, "epoch": 2.1478481012658226, "percentage": 71.82, "elapsed_time": "8:28:52", "remaining_time": "3:19:42"}
{"current_steps": 540, "total_steps": 738, "loss": 0.4155, "learning_rate": 5e-06, "epoch": 2.188354430379747, "percentage": 73.17, "elapsed_time": "8:38:16", "remaining_time": "3:10:01"}
{"current_steps": 550, "total_steps": 738, "loss": 0.4159, "learning_rate": 5e-06, "epoch": 2.2288607594936707, "percentage": 74.53, "elapsed_time": "8:47:41", "remaining_time": "3:00:22"}
{"current_steps": 560, "total_steps": 738, "loss": 0.4168, "learning_rate": 5e-06, "epoch": 2.269367088607595, "percentage": 75.88, "elapsed_time": "8:57:06", "remaining_time": "2:50:43"}
{"current_steps": 570, "total_steps": 738, "loss": 0.4171, "learning_rate": 5e-06, "epoch": 2.309873417721519, "percentage": 77.24, "elapsed_time": "9:06:31", "remaining_time": "2:41:04"}
{"current_steps": 580, "total_steps": 738, "loss": 0.4213, "learning_rate": 5e-06, "epoch": 2.350379746835443, "percentage": 78.59, "elapsed_time": "9:15:56", "remaining_time": "2:31:26"}
{"current_steps": 590, "total_steps": 738, "loss": 0.4217, "learning_rate": 5e-06, "epoch": 2.390886075949367, "percentage": 79.95, "elapsed_time": "9:25:21", "remaining_time": "2:21:49"}
{"current_steps": 600, "total_steps": 738, "loss": 0.4203, "learning_rate": 5e-06, "epoch": 2.431392405063291, "percentage": 81.3, "elapsed_time": "9:34:46", "remaining_time": "2:12:11"}
{"current_steps": 610, "total_steps": 738, "loss": 0.4202, "learning_rate": 5e-06, "epoch": 2.4718987341772154, "percentage": 82.66, "elapsed_time": "9:44:10", "remaining_time": "2:02:34"}
{"current_steps": 620, "total_steps": 738, "loss": 0.4238, "learning_rate": 5e-06, "epoch": 2.512405063291139, "percentage": 84.01, "elapsed_time": "9:53:35", "remaining_time": "1:52:58"}
{"current_steps": 630, "total_steps": 738, "loss": 0.4173, "learning_rate": 5e-06, "epoch": 2.552911392405063, "percentage": 85.37, "elapsed_time": "10:03:00", "remaining_time": "1:43:22"}
{"current_steps": 640, "total_steps": 738, "loss": 0.4234, "learning_rate": 5e-06, "epoch": 2.5934177215189873, "percentage": 86.72, "elapsed_time": "10:12:24", "remaining_time": "1:33:46"}
{"current_steps": 650, "total_steps": 738, "loss": 0.4259, "learning_rate": 5e-06, "epoch": 2.6339240506329116, "percentage": 88.08, "elapsed_time": "10:21:49", "remaining_time": "1:24:11"}
{"current_steps": 660, "total_steps": 738, "loss": 0.4249, "learning_rate": 5e-06, "epoch": 2.6744303797468354, "percentage": 89.43, "elapsed_time": "10:31:15", "remaining_time": "1:14:36"}
{"current_steps": 670, "total_steps": 738, "loss": 0.4285, "learning_rate": 5e-06, "epoch": 2.714936708860759, "percentage": 90.79, "elapsed_time": "10:40:39", "remaining_time": "1:05:01"}
{"current_steps": 680, "total_steps": 738, "loss": 0.4234, "learning_rate": 5e-06, "epoch": 2.7554430379746835, "percentage": 92.14, "elapsed_time": "10:50:04", "remaining_time": "0:55:26"}
{"current_steps": 690, "total_steps": 738, "loss": 0.4271, "learning_rate": 5e-06, "epoch": 2.7959493670886078, "percentage": 93.5, "elapsed_time": "10:59:30", "remaining_time": "0:45:52"}
{"current_steps": 700, "total_steps": 738, "loss": 0.4252, "learning_rate": 5e-06, "epoch": 2.8364556962025316, "percentage": 94.85, "elapsed_time": "11:08:55", "remaining_time": "0:36:18"}
{"current_steps": 710, "total_steps": 738, "loss": 0.428, "learning_rate": 5e-06, "epoch": 2.876962025316456, "percentage": 96.21, "elapsed_time": "11:18:19", "remaining_time": "0:26:45"}
{"current_steps": 720, "total_steps": 738, "loss": 0.4271, "learning_rate": 5e-06, "epoch": 2.9174683544303797, "percentage": 97.56, "elapsed_time": "11:27:44", "remaining_time": "0:17:11"}
{"current_steps": 730, "total_steps": 738, "loss": 0.4325, "learning_rate": 5e-06, "epoch": 2.957974683544304, "percentage": 98.92, "elapsed_time": "11:37:08", "remaining_time": "0:07:38"}
{"current_steps": 738, "total_steps": 738, "eval_loss": 0.07060651481151581, "epoch": 2.990379746835443, "percentage": 100.0, "elapsed_time": "11:49:47", "remaining_time": "0:00:00"}
{"current_steps": 738, "total_steps": 738, "epoch": 2.990379746835443, "percentage": 100.0, "elapsed_time": "11:50:48", "remaining_time": "0:00:00"}