oh_v1.3_camel_math_x8 / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 3
1c9db42 verified
{"current_steps": 10, "total_steps": 975, "loss": 0.9796, "lr": 5e-06, "epoch": 0.03076923076923077, "percentage": 1.03, "elapsed_time": "0:09:34", "remaining_time": "15:24:22"}
{"current_steps": 20, "total_steps": 975, "loss": 0.8629, "lr": 5e-06, "epoch": 0.06153846153846154, "percentage": 2.05, "elapsed_time": "0:19:05", "remaining_time": "15:11:26"}
{"current_steps": 30, "total_steps": 975, "loss": 0.8232, "lr": 5e-06, "epoch": 0.09230769230769231, "percentage": 3.08, "elapsed_time": "0:28:36", "remaining_time": "15:00:55"}
{"current_steps": 40, "total_steps": 975, "loss": 0.8034, "lr": 5e-06, "epoch": 0.12307692307692308, "percentage": 4.1, "elapsed_time": "0:38:07", "remaining_time": "14:51:00"}
{"current_steps": 50, "total_steps": 975, "loss": 0.7826, "lr": 5e-06, "epoch": 0.15384615384615385, "percentage": 5.13, "elapsed_time": "0:47:39", "remaining_time": "14:41:40"}
{"current_steps": 60, "total_steps": 975, "loss": 0.7723, "lr": 5e-06, "epoch": 0.18461538461538463, "percentage": 6.15, "elapsed_time": "0:57:10", "remaining_time": "14:32:00"}
{"current_steps": 70, "total_steps": 975, "loss": 0.767, "lr": 5e-06, "epoch": 0.2153846153846154, "percentage": 7.18, "elapsed_time": "1:06:40", "remaining_time": "14:21:54"}
{"current_steps": 80, "total_steps": 975, "loss": 0.7559, "lr": 5e-06, "epoch": 0.24615384615384617, "percentage": 8.21, "elapsed_time": "1:16:11", "remaining_time": "14:12:28"}
{"current_steps": 90, "total_steps": 975, "loss": 0.7469, "lr": 5e-06, "epoch": 0.27692307692307694, "percentage": 9.23, "elapsed_time": "1:25:42", "remaining_time": "14:02:51"}
{"current_steps": 100, "total_steps": 975, "loss": 0.7414, "lr": 5e-06, "epoch": 0.3076923076923077, "percentage": 10.26, "elapsed_time": "1:35:13", "remaining_time": "13:53:09"}
{"current_steps": 110, "total_steps": 975, "loss": 0.7402, "lr": 5e-06, "epoch": 0.3384615384615385, "percentage": 11.28, "elapsed_time": "1:44:42", "remaining_time": "13:43:23"}
{"current_steps": 120, "total_steps": 975, "loss": 0.7409, "lr": 5e-06, "epoch": 0.36923076923076925, "percentage": 12.31, "elapsed_time": "1:54:13", "remaining_time": "13:33:47"}
{"current_steps": 130, "total_steps": 975, "loss": 0.7376, "lr": 5e-06, "epoch": 0.4, "percentage": 13.33, "elapsed_time": "2:03:44", "remaining_time": "13:24:17"}
{"current_steps": 140, "total_steps": 975, "loss": 0.7298, "lr": 5e-06, "epoch": 0.4307692307692308, "percentage": 14.36, "elapsed_time": "2:13:14", "remaining_time": "13:14:39"}
{"current_steps": 150, "total_steps": 975, "loss": 0.734, "lr": 5e-06, "epoch": 0.46153846153846156, "percentage": 15.38, "elapsed_time": "2:22:44", "remaining_time": "13:05:07"}
{"current_steps": 160, "total_steps": 975, "loss": 0.7301, "lr": 5e-06, "epoch": 0.49230769230769234, "percentage": 16.41, "elapsed_time": "2:32:15", "remaining_time": "12:55:34"}
{"current_steps": 170, "total_steps": 975, "loss": 0.7316, "lr": 5e-06, "epoch": 0.5230769230769231, "percentage": 17.44, "elapsed_time": "2:41:45", "remaining_time": "12:46:00"}
{"current_steps": 180, "total_steps": 975, "loss": 0.7253, "lr": 5e-06, "epoch": 0.5538461538461539, "percentage": 18.46, "elapsed_time": "2:51:17", "remaining_time": "12:36:32"}
{"current_steps": 190, "total_steps": 975, "loss": 0.7241, "lr": 5e-06, "epoch": 0.5846153846153846, "percentage": 19.49, "elapsed_time": "3:00:47", "remaining_time": "12:26:57"}
{"current_steps": 200, "total_steps": 975, "loss": 0.7228, "lr": 5e-06, "epoch": 0.6153846153846154, "percentage": 20.51, "elapsed_time": "3:10:19", "remaining_time": "12:17:29"}
{"current_steps": 210, "total_steps": 975, "loss": 0.7204, "lr": 5e-06, "epoch": 0.6461538461538462, "percentage": 21.54, "elapsed_time": "3:19:50", "remaining_time": "12:07:59"}
{"current_steps": 220, "total_steps": 975, "loss": 0.717, "lr": 5e-06, "epoch": 0.676923076923077, "percentage": 22.56, "elapsed_time": "3:29:22", "remaining_time": "11:58:31"}
{"current_steps": 230, "total_steps": 975, "loss": 0.7134, "lr": 5e-06, "epoch": 0.7076923076923077, "percentage": 23.59, "elapsed_time": "3:38:54", "remaining_time": "11:49:05"}
{"current_steps": 240, "total_steps": 975, "loss": 0.7159, "lr": 5e-06, "epoch": 0.7384615384615385, "percentage": 24.62, "elapsed_time": "3:48:26", "remaining_time": "11:39:36"}
{"current_steps": 250, "total_steps": 975, "loss": 0.711, "lr": 5e-06, "epoch": 0.7692307692307693, "percentage": 25.64, "elapsed_time": "3:57:58", "remaining_time": "11:30:07"}
{"current_steps": 260, "total_steps": 975, "loss": 0.7165, "lr": 5e-06, "epoch": 0.8, "percentage": 26.67, "elapsed_time": "4:07:29", "remaining_time": "11:20:37"}
{"current_steps": 270, "total_steps": 975, "loss": 0.7055, "lr": 5e-06, "epoch": 0.8307692307692308, "percentage": 27.69, "elapsed_time": "4:17:00", "remaining_time": "11:11:04"}
{"current_steps": 280, "total_steps": 975, "loss": 0.711, "lr": 5e-06, "epoch": 0.8615384615384616, "percentage": 28.72, "elapsed_time": "4:26:32", "remaining_time": "11:01:34"}
{"current_steps": 290, "total_steps": 975, "loss": 0.7084, "lr": 5e-06, "epoch": 0.8923076923076924, "percentage": 29.74, "elapsed_time": "4:36:04", "remaining_time": "10:52:05"}
{"current_steps": 300, "total_steps": 975, "loss": 0.7128, "lr": 5e-06, "epoch": 0.9230769230769231, "percentage": 30.77, "elapsed_time": "4:45:36", "remaining_time": "10:42:37"}
{"current_steps": 310, "total_steps": 975, "loss": 0.7086, "lr": 5e-06, "epoch": 0.9538461538461539, "percentage": 31.79, "elapsed_time": "4:55:06", "remaining_time": "10:33:04"}
{"current_steps": 320, "total_steps": 975, "loss": 0.7104, "lr": 5e-06, "epoch": 0.9846153846153847, "percentage": 32.82, "elapsed_time": "5:04:38", "remaining_time": "10:23:33"}
{"current_steps": 325, "total_steps": 975, "eval_loss": 0.7103046774864197, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "5:15:03", "remaining_time": "10:30:07"}
{"current_steps": 330, "total_steps": 975, "loss": 0.6897, "lr": 5e-06, "epoch": 1.0153846153846153, "percentage": 33.85, "elapsed_time": "5:20:58", "remaining_time": "10:27:22"}
{"current_steps": 340, "total_steps": 975, "loss": 0.6636, "lr": 5e-06, "epoch": 1.0461538461538462, "percentage": 34.87, "elapsed_time": "5:30:31", "remaining_time": "10:17:19"}
{"current_steps": 350, "total_steps": 975, "loss": 0.665, "lr": 5e-06, "epoch": 1.0769230769230769, "percentage": 35.9, "elapsed_time": "5:40:03", "remaining_time": "10:07:15"}
{"current_steps": 360, "total_steps": 975, "loss": 0.664, "lr": 5e-06, "epoch": 1.1076923076923078, "percentage": 36.92, "elapsed_time": "5:49:34", "remaining_time": "9:57:12"}
{"current_steps": 370, "total_steps": 975, "loss": 0.6634, "lr": 5e-06, "epoch": 1.1384615384615384, "percentage": 37.95, "elapsed_time": "5:59:08", "remaining_time": "9:47:13"}
{"current_steps": 380, "total_steps": 975, "loss": 0.6653, "lr": 5e-06, "epoch": 1.1692307692307693, "percentage": 38.97, "elapsed_time": "6:08:39", "remaining_time": "9:37:14"}
{"current_steps": 390, "total_steps": 975, "loss": 0.6605, "lr": 5e-06, "epoch": 1.2, "percentage": 40.0, "elapsed_time": "6:18:12", "remaining_time": "9:27:18"}
{"current_steps": 400, "total_steps": 975, "loss": 0.6649, "lr": 5e-06, "epoch": 1.2307692307692308, "percentage": 41.03, "elapsed_time": "6:27:44", "remaining_time": "9:17:22"}
{"current_steps": 410, "total_steps": 975, "loss": 0.6662, "lr": 5e-06, "epoch": 1.2615384615384615, "percentage": 42.05, "elapsed_time": "6:37:17", "remaining_time": "9:07:29"}
{"current_steps": 420, "total_steps": 975, "loss": 0.6631, "lr": 5e-06, "epoch": 1.2923076923076924, "percentage": 43.08, "elapsed_time": "6:46:50", "remaining_time": "8:57:37"}
{"current_steps": 430, "total_steps": 975, "loss": 0.6596, "lr": 5e-06, "epoch": 1.323076923076923, "percentage": 44.1, "elapsed_time": "6:56:22", "remaining_time": "8:47:44"}
{"current_steps": 440, "total_steps": 975, "loss": 0.6659, "lr": 5e-06, "epoch": 1.353846153846154, "percentage": 45.13, "elapsed_time": "7:05:54", "remaining_time": "8:37:52"}
{"current_steps": 450, "total_steps": 975, "loss": 0.6654, "lr": 5e-06, "epoch": 1.3846153846153846, "percentage": 46.15, "elapsed_time": "7:15:28", "remaining_time": "8:28:02"}
{"current_steps": 460, "total_steps": 975, "loss": 0.6602, "lr": 5e-06, "epoch": 1.4153846153846155, "percentage": 47.18, "elapsed_time": "7:24:58", "remaining_time": "8:18:11"}
{"current_steps": 470, "total_steps": 975, "loss": 0.6633, "lr": 5e-06, "epoch": 1.4461538461538461, "percentage": 48.21, "elapsed_time": "7:34:31", "remaining_time": "8:08:22"}
{"current_steps": 480, "total_steps": 975, "loss": 0.6606, "lr": 5e-06, "epoch": 1.476923076923077, "percentage": 49.23, "elapsed_time": "7:44:05", "remaining_time": "7:58:35"}
{"current_steps": 490, "total_steps": 975, "loss": 0.6619, "lr": 5e-06, "epoch": 1.5076923076923077, "percentage": 50.26, "elapsed_time": "7:53:38", "remaining_time": "7:48:48"}
{"current_steps": 500, "total_steps": 975, "loss": 0.6613, "lr": 5e-06, "epoch": 1.5384615384615383, "percentage": 51.28, "elapsed_time": "8:03:10", "remaining_time": "7:39:01"}
{"current_steps": 510, "total_steps": 975, "loss": 0.6666, "lr": 5e-06, "epoch": 1.5692307692307692, "percentage": 52.31, "elapsed_time": "8:12:44", "remaining_time": "7:29:15"}
{"current_steps": 520, "total_steps": 975, "loss": 0.6657, "lr": 5e-06, "epoch": 1.6, "percentage": 53.33, "elapsed_time": "8:22:15", "remaining_time": "7:19:28"}
{"current_steps": 530, "total_steps": 975, "loss": 0.6678, "lr": 5e-06, "epoch": 1.6307692307692307, "percentage": 54.36, "elapsed_time": "8:31:46", "remaining_time": "7:09:41"}
{"current_steps": 540, "total_steps": 975, "loss": 0.6593, "lr": 5e-06, "epoch": 1.6615384615384614, "percentage": 55.38, "elapsed_time": "8:41:17", "remaining_time": "6:59:55"}
{"current_steps": 550, "total_steps": 975, "loss": 0.6575, "lr": 5e-06, "epoch": 1.6923076923076923, "percentage": 56.41, "elapsed_time": "8:50:50", "remaining_time": "6:50:11"}
{"current_steps": 560, "total_steps": 975, "loss": 0.6631, "lr": 5e-06, "epoch": 1.7230769230769232, "percentage": 57.44, "elapsed_time": "9:00:21", "remaining_time": "6:40:26"}
{"current_steps": 570, "total_steps": 975, "loss": 0.6592, "lr": 5e-06, "epoch": 1.7538461538461538, "percentage": 58.46, "elapsed_time": "9:09:54", "remaining_time": "6:30:43"}
{"current_steps": 580, "total_steps": 975, "loss": 0.6594, "lr": 5e-06, "epoch": 1.7846153846153845, "percentage": 59.49, "elapsed_time": "9:19:27", "remaining_time": "6:21:00"}
{"current_steps": 590, "total_steps": 975, "loss": 0.6568, "lr": 5e-06, "epoch": 1.8153846153846154, "percentage": 60.51, "elapsed_time": "9:28:59", "remaining_time": "6:11:17"}
{"current_steps": 600, "total_steps": 975, "loss": 0.6599, "lr": 5e-06, "epoch": 1.8461538461538463, "percentage": 61.54, "elapsed_time": "9:38:30", "remaining_time": "6:01:34"}
{"current_steps": 610, "total_steps": 975, "loss": 0.6591, "lr": 5e-06, "epoch": 1.876923076923077, "percentage": 62.56, "elapsed_time": "9:48:01", "remaining_time": "5:51:51"}
{"current_steps": 620, "total_steps": 975, "loss": 0.6629, "lr": 5e-06, "epoch": 1.9076923076923076, "percentage": 63.59, "elapsed_time": "9:57:32", "remaining_time": "5:42:08"}
{"current_steps": 630, "total_steps": 975, "loss": 0.6586, "lr": 5e-06, "epoch": 1.9384615384615385, "percentage": 64.62, "elapsed_time": "10:07:05", "remaining_time": "5:32:27"}
{"current_steps": 640, "total_steps": 975, "loss": 0.6603, "lr": 5e-06, "epoch": 1.9692307692307693, "percentage": 65.64, "elapsed_time": "10:16:36", "remaining_time": "5:22:45"}
{"current_steps": 650, "total_steps": 975, "loss": 0.658, "lr": 5e-06, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "10:26:07", "remaining_time": "5:13:03"}
{"current_steps": 650, "total_steps": 975, "eval_loss": 0.6978012323379517, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "10:31:50", "remaining_time": "5:15:55"}
{"current_steps": 660, "total_steps": 975, "loss": 0.6088, "lr": 5e-06, "epoch": 2.0307692307692307, "percentage": 67.69, "elapsed_time": "10:42:33", "remaining_time": "5:06:40"}
{"current_steps": 670, "total_steps": 975, "loss": 0.6083, "lr": 5e-06, "epoch": 2.0615384615384613, "percentage": 68.72, "elapsed_time": "10:52:07", "remaining_time": "4:56:51"}
{"current_steps": 680, "total_steps": 975, "loss": 0.6149, "lr": 5e-06, "epoch": 2.0923076923076924, "percentage": 69.74, "elapsed_time": "11:01:40", "remaining_time": "4:47:02"}
{"current_steps": 690, "total_steps": 975, "loss": 0.6164, "lr": 5e-06, "epoch": 2.123076923076923, "percentage": 70.77, "elapsed_time": "11:11:10", "remaining_time": "4:37:13"}
{"current_steps": 700, "total_steps": 975, "loss": 0.6115, "lr": 5e-06, "epoch": 2.1538461538461537, "percentage": 71.79, "elapsed_time": "11:20:42", "remaining_time": "4:27:25"}
{"current_steps": 710, "total_steps": 975, "loss": 0.6153, "lr": 5e-06, "epoch": 2.184615384615385, "percentage": 72.82, "elapsed_time": "11:30:16", "remaining_time": "4:17:38"}
{"current_steps": 720, "total_steps": 975, "loss": 0.6133, "lr": 5e-06, "epoch": 2.2153846153846155, "percentage": 73.85, "elapsed_time": "11:39:48", "remaining_time": "4:07:50"}
{"current_steps": 730, "total_steps": 975, "loss": 0.6131, "lr": 5e-06, "epoch": 2.246153846153846, "percentage": 74.87, "elapsed_time": "11:49:20", "remaining_time": "3:58:03"}
{"current_steps": 740, "total_steps": 975, "loss": 0.6159, "lr": 5e-06, "epoch": 2.276923076923077, "percentage": 75.9, "elapsed_time": "11:58:53", "remaining_time": "3:48:17"}
{"current_steps": 750, "total_steps": 975, "loss": 0.6149, "lr": 5e-06, "epoch": 2.3076923076923075, "percentage": 76.92, "elapsed_time": "12:08:26", "remaining_time": "3:38:31"}
{"current_steps": 760, "total_steps": 975, "loss": 0.6132, "lr": 5e-06, "epoch": 2.3384615384615386, "percentage": 77.95, "elapsed_time": "12:17:57", "remaining_time": "3:28:45"}
{"current_steps": 770, "total_steps": 975, "loss": 0.6128, "lr": 5e-06, "epoch": 2.3692307692307693, "percentage": 78.97, "elapsed_time": "12:27:30", "remaining_time": "3:19:00"}
{"current_steps": 780, "total_steps": 975, "loss": 0.6164, "lr": 5e-06, "epoch": 2.4, "percentage": 80.0, "elapsed_time": "12:37:04", "remaining_time": "3:09:16"}
{"current_steps": 790, "total_steps": 975, "loss": 0.623, "lr": 5e-06, "epoch": 2.430769230769231, "percentage": 81.03, "elapsed_time": "12:46:36", "remaining_time": "2:59:31"}
{"current_steps": 800, "total_steps": 975, "loss": 0.6166, "lr": 5e-06, "epoch": 2.4615384615384617, "percentage": 82.05, "elapsed_time": "12:56:09", "remaining_time": "2:49:46"}
{"current_steps": 810, "total_steps": 975, "loss": 0.615, "lr": 5e-06, "epoch": 2.4923076923076923, "percentage": 83.08, "elapsed_time": "13:05:41", "remaining_time": "2:40:02"}
{"current_steps": 820, "total_steps": 975, "loss": 0.614, "lr": 5e-06, "epoch": 2.523076923076923, "percentage": 84.1, "elapsed_time": "13:15:14", "remaining_time": "2:30:19"}
{"current_steps": 830, "total_steps": 975, "loss": 0.6222, "lr": 5e-06, "epoch": 2.5538461538461537, "percentage": 85.13, "elapsed_time": "13:24:48", "remaining_time": "2:20:35"}
{"current_steps": 840, "total_steps": 975, "loss": 0.617, "lr": 5e-06, "epoch": 2.5846153846153848, "percentage": 86.15, "elapsed_time": "13:34:21", "remaining_time": "2:10:52"}
{"current_steps": 850, "total_steps": 975, "loss": 0.62, "lr": 5e-06, "epoch": 2.6153846153846154, "percentage": 87.18, "elapsed_time": "13:43:52", "remaining_time": "2:01:09"}
{"current_steps": 860, "total_steps": 975, "loss": 0.619, "lr": 5e-06, "epoch": 2.646153846153846, "percentage": 88.21, "elapsed_time": "13:53:23", "remaining_time": "1:51:26"}
{"current_steps": 870, "total_steps": 975, "loss": 0.6164, "lr": 5e-06, "epoch": 2.676923076923077, "percentage": 89.23, "elapsed_time": "14:02:55", "remaining_time": "1:41:43"}
{"current_steps": 880, "total_steps": 975, "loss": 0.6218, "lr": 5e-06, "epoch": 2.707692307692308, "percentage": 90.26, "elapsed_time": "14:12:29", "remaining_time": "1:32:01"}
{"current_steps": 890, "total_steps": 975, "loss": 0.6176, "lr": 5e-06, "epoch": 2.7384615384615385, "percentage": 91.28, "elapsed_time": "14:22:01", "remaining_time": "1:22:19"}
{"current_steps": 900, "total_steps": 975, "loss": 0.6217, "lr": 5e-06, "epoch": 2.769230769230769, "percentage": 92.31, "elapsed_time": "14:31:34", "remaining_time": "1:12:37"}
{"current_steps": 910, "total_steps": 975, "loss": 0.6154, "lr": 5e-06, "epoch": 2.8, "percentage": 93.33, "elapsed_time": "14:41:06", "remaining_time": "1:02:56"}
{"current_steps": 920, "total_steps": 975, "loss": 0.6189, "lr": 5e-06, "epoch": 2.830769230769231, "percentage": 94.36, "elapsed_time": "14:50:39", "remaining_time": "0:53:14"}
{"current_steps": 930, "total_steps": 975, "loss": 0.6211, "lr": 5e-06, "epoch": 2.8615384615384616, "percentage": 95.38, "elapsed_time": "15:00:13", "remaining_time": "0:43:33"}
{"current_steps": 940, "total_steps": 975, "loss": 0.6175, "lr": 5e-06, "epoch": 2.8923076923076922, "percentage": 96.41, "elapsed_time": "15:09:46", "remaining_time": "0:33:52"}
{"current_steps": 950, "total_steps": 975, "loss": 0.6222, "lr": 5e-06, "epoch": 2.9230769230769234, "percentage": 97.44, "elapsed_time": "15:19:20", "remaining_time": "0:24:11"}
{"current_steps": 960, "total_steps": 975, "loss": 0.6174, "lr": 5e-06, "epoch": 2.953846153846154, "percentage": 98.46, "elapsed_time": "15:28:53", "remaining_time": "0:14:30"}
{"current_steps": 970, "total_steps": 975, "loss": 0.6176, "lr": 5e-06, "epoch": 2.9846153846153847, "percentage": 99.49, "elapsed_time": "15:38:27", "remaining_time": "0:04:50"}
{"current_steps": 975, "total_steps": 975, "eval_loss": 0.6986876726150513, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "15:50:09", "remaining_time": "0:00:00"}
{"current_steps": 975, "total_steps": 975, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "15:51:38", "remaining_time": "0:00:00"}