e1_science_longest_r1_10k / trainer_log.jsonl
ryanmarten's picture
Training in progress, epoch 0
c18692c verified
{"current_steps": 1, "total_steps": 390, "loss": 1.0748, "lr": 1.0256410256410257e-06, "epoch": 0.0128, "percentage": 0.26, "elapsed_time": "0:02:02", "remaining_time": "13:15:48"}
{"current_steps": 2, "total_steps": 390, "loss": 1.0488, "lr": 2.0512820512820513e-06, "epoch": 0.0256, "percentage": 0.51, "elapsed_time": "0:04:10", "remaining_time": "13:30:33"}
{"current_steps": 3, "total_steps": 390, "loss": 1.0473, "lr": 3.0769230769230774e-06, "epoch": 0.0384, "percentage": 0.77, "elapsed_time": "0:06:13", "remaining_time": "13:23:23"}
{"current_steps": 4, "total_steps": 390, "loss": 1.0341, "lr": 4.102564102564103e-06, "epoch": 0.0512, "percentage": 1.03, "elapsed_time": "0:08:31", "remaining_time": "13:41:52"}
{"current_steps": 5, "total_steps": 390, "loss": 0.9985, "lr": 5.128205128205128e-06, "epoch": 0.064, "percentage": 1.28, "elapsed_time": "0:10:23", "remaining_time": "13:20:31"}
{"current_steps": 6, "total_steps": 390, "loss": 0.9441, "lr": 6.153846153846155e-06, "epoch": 0.0768, "percentage": 1.54, "elapsed_time": "0:12:27", "remaining_time": "13:17:25"}
{"current_steps": 7, "total_steps": 390, "loss": 0.9526, "lr": 7.17948717948718e-06, "epoch": 0.0896, "percentage": 1.79, "elapsed_time": "0:14:18", "remaining_time": "13:03:19"}
{"current_steps": 8, "total_steps": 390, "loss": 0.9775, "lr": 8.205128205128205e-06, "epoch": 0.1024, "percentage": 2.05, "elapsed_time": "0:16:20", "remaining_time": "13:00:07"}
{"current_steps": 9, "total_steps": 390, "loss": 0.9199, "lr": 9.230769230769232e-06, "epoch": 0.1152, "percentage": 2.31, "elapsed_time": "0:18:05", "remaining_time": "12:45:40"}
{"current_steps": 10, "total_steps": 390, "loss": 0.9202, "lr": 1.0256410256410256e-05, "epoch": 0.128, "percentage": 2.56, "elapsed_time": "0:19:55", "remaining_time": "12:37:01"}
{"current_steps": 11, "total_steps": 390, "loss": 0.9106, "lr": 1.1282051282051283e-05, "epoch": 0.1408, "percentage": 2.82, "elapsed_time": "0:21:51", "remaining_time": "12:33:07"}
{"current_steps": 12, "total_steps": 390, "loss": 0.9024, "lr": 1.230769230769231e-05, "epoch": 0.1536, "percentage": 3.08, "elapsed_time": "0:23:52", "remaining_time": "12:32:05"}
{"current_steps": 13, "total_steps": 390, "loss": 0.8304, "lr": 1.3333333333333333e-05, "epoch": 0.1664, "percentage": 3.33, "elapsed_time": "0:25:58", "remaining_time": "12:33:15"}
{"current_steps": 14, "total_steps": 390, "loss": 0.8731, "lr": 1.435897435897436e-05, "epoch": 0.1792, "percentage": 3.59, "elapsed_time": "0:27:56", "remaining_time": "12:30:35"}
{"current_steps": 15, "total_steps": 390, "loss": 0.843, "lr": 1.5384615384615387e-05, "epoch": 0.192, "percentage": 3.85, "elapsed_time": "0:29:59", "remaining_time": "12:29:47"}
{"current_steps": 16, "total_steps": 390, "loss": 0.8271, "lr": 1.641025641025641e-05, "epoch": 0.2048, "percentage": 4.1, "elapsed_time": "0:31:39", "remaining_time": "12:19:49"}
{"current_steps": 17, "total_steps": 390, "loss": 0.7774, "lr": 1.7435897435897438e-05, "epoch": 0.2176, "percentage": 4.36, "elapsed_time": "0:33:46", "remaining_time": "12:21:08"}
{"current_steps": 18, "total_steps": 390, "loss": 0.796, "lr": 1.8461538461538465e-05, "epoch": 0.2304, "percentage": 4.62, "elapsed_time": "0:35:48", "remaining_time": "12:20:04"}
{"current_steps": 19, "total_steps": 390, "loss": 0.807, "lr": 1.9487179487179488e-05, "epoch": 0.2432, "percentage": 4.87, "elapsed_time": "0:37:29", "remaining_time": "12:12:14"}
{"current_steps": 20, "total_steps": 390, "loss": 0.7896, "lr": 2.0512820512820512e-05, "epoch": 0.256, "percentage": 5.13, "elapsed_time": "0:39:31", "remaining_time": "12:11:07"}
{"current_steps": 21, "total_steps": 390, "loss": 0.7534, "lr": 2.153846153846154e-05, "epoch": 0.2688, "percentage": 5.38, "elapsed_time": "0:41:30", "remaining_time": "12:09:28"}
{"current_steps": 22, "total_steps": 390, "loss": 0.7644, "lr": 2.2564102564102566e-05, "epoch": 0.2816, "percentage": 5.64, "elapsed_time": "0:43:18", "remaining_time": "12:04:32"}
{"current_steps": 23, "total_steps": 390, "loss": 0.7489, "lr": 2.3589743589743593e-05, "epoch": 0.2944, "percentage": 5.9, "elapsed_time": "0:45:50", "remaining_time": "12:11:21"}
{"current_steps": 24, "total_steps": 390, "loss": 0.7633, "lr": 2.461538461538462e-05, "epoch": 0.3072, "percentage": 6.15, "elapsed_time": "0:47:32", "remaining_time": "12:04:56"}
{"current_steps": 25, "total_steps": 390, "loss": 0.7254, "lr": 2.5641025641025646e-05, "epoch": 0.32, "percentage": 6.41, "elapsed_time": "0:49:29", "remaining_time": "12:02:32"}
{"current_steps": 26, "total_steps": 390, "loss": 0.7623, "lr": 2.6666666666666667e-05, "epoch": 0.3328, "percentage": 6.67, "elapsed_time": "0:51:35", "remaining_time": "12:02:11"}
{"current_steps": 27, "total_steps": 390, "loss": 0.7648, "lr": 2.7692307692307694e-05, "epoch": 0.3456, "percentage": 6.92, "elapsed_time": "0:53:41", "remaining_time": "12:01:45"}
{"current_steps": 28, "total_steps": 390, "loss": 0.7259, "lr": 2.871794871794872e-05, "epoch": 0.3584, "percentage": 7.18, "elapsed_time": "0:55:50", "remaining_time": "12:01:55"}
{"current_steps": 29, "total_steps": 390, "loss": 0.7391, "lr": 2.9743589743589747e-05, "epoch": 0.3712, "percentage": 7.44, "elapsed_time": "0:57:44", "remaining_time": "11:58:46"}
{"current_steps": 30, "total_steps": 390, "loss": 0.75, "lr": 3.0769230769230774e-05, "epoch": 0.384, "percentage": 7.69, "elapsed_time": "0:59:58", "remaining_time": "11:59:47"}
{"current_steps": 31, "total_steps": 390, "loss": 2.2165, "lr": 3.1794871794871795e-05, "epoch": 0.3968, "percentage": 7.95, "elapsed_time": "1:01:58", "remaining_time": "11:57:47"}
{"current_steps": 32, "total_steps": 390, "loss": 0.7595, "lr": 3.282051282051282e-05, "epoch": 0.4096, "percentage": 8.21, "elapsed_time": "1:03:42", "remaining_time": "11:52:49"}
{"current_steps": 33, "total_steps": 390, "loss": 0.7404, "lr": 3.384615384615385e-05, "epoch": 0.4224, "percentage": 8.46, "elapsed_time": "1:06:08", "remaining_time": "11:55:28"}
{"current_steps": 34, "total_steps": 390, "loss": 0.7322, "lr": 3.4871794871794875e-05, "epoch": 0.4352, "percentage": 8.72, "elapsed_time": "1:08:07", "remaining_time": "11:53:18"}
{"current_steps": 35, "total_steps": 390, "loss": 0.7467, "lr": 3.58974358974359e-05, "epoch": 0.448, "percentage": 8.97, "elapsed_time": "1:10:09", "remaining_time": "11:51:38"}
{"current_steps": 36, "total_steps": 390, "loss": 0.6769, "lr": 3.692307692307693e-05, "epoch": 0.4608, "percentage": 9.23, "elapsed_time": "1:12:23", "remaining_time": "11:51:55"}
{"current_steps": 37, "total_steps": 390, "loss": 0.7393, "lr": 3.794871794871795e-05, "epoch": 0.4736, "percentage": 9.49, "elapsed_time": "1:14:24", "remaining_time": "11:49:55"}
{"current_steps": 38, "total_steps": 390, "loss": 0.6925, "lr": 3.8974358974358976e-05, "epoch": 0.4864, "percentage": 9.74, "elapsed_time": "1:16:26", "remaining_time": "11:48:04"}
{"current_steps": 39, "total_steps": 390, "loss": 0.7538, "lr": 4e-05, "epoch": 0.4992, "percentage": 10.0, "elapsed_time": "1:18:20", "remaining_time": "11:45:06"}
{"current_steps": 40, "total_steps": 390, "loss": 0.7127, "lr": 3.9999198907597046e-05, "epoch": 0.512, "percentage": 10.26, "elapsed_time": "1:20:18", "remaining_time": "11:42:43"}
{"current_steps": 41, "total_steps": 390, "loss": 0.7174, "lr": 3.9996795694563096e-05, "epoch": 0.5248, "percentage": 10.51, "elapsed_time": "1:22:31", "remaining_time": "11:42:26"}
{"current_steps": 42, "total_steps": 390, "loss": 0.7338, "lr": 3.999279055341771e-05, "epoch": 0.5376, "percentage": 10.77, "elapsed_time": "1:24:28", "remaining_time": "11:39:57"}
{"current_steps": 43, "total_steps": 390, "loss": 0.7148, "lr": 3.998718380500971e-05, "epoch": 0.5504, "percentage": 11.03, "elapsed_time": "1:26:31", "remaining_time": "11:38:17"}
{"current_steps": 44, "total_steps": 390, "loss": 0.718, "lr": 3.997997589849145e-05, "epoch": 0.5632, "percentage": 11.28, "elapsed_time": "1:28:22", "remaining_time": "11:34:53"}
{"current_steps": 45, "total_steps": 390, "loss": 0.6918, "lr": 3.9971167411282835e-05, "epoch": 0.576, "percentage": 11.54, "elapsed_time": "1:30:18", "remaining_time": "11:32:25"}
{"current_steps": 46, "total_steps": 390, "loss": 0.7187, "lr": 3.99607590490251e-05, "epoch": 0.5888, "percentage": 11.79, "elapsed_time": "1:32:20", "remaining_time": "11:30:32"}
{"current_steps": 47, "total_steps": 390, "loss": 0.6977, "lr": 3.9948751645524235e-05, "epoch": 0.6016, "percentage": 12.05, "elapsed_time": "1:34:30", "remaining_time": "11:29:42"}
{"current_steps": 48, "total_steps": 390, "loss": 0.7301, "lr": 3.9935146162684206e-05, "epoch": 0.6144, "percentage": 12.31, "elapsed_time": "1:36:21", "remaining_time": "11:26:31"}
{"current_steps": 49, "total_steps": 390, "loss": 0.7173, "lr": 3.9919943690429906e-05, "epoch": 0.6272, "percentage": 12.56, "elapsed_time": "1:38:17", "remaining_time": "11:24:01"}
{"current_steps": 50, "total_steps": 390, "loss": 0.7195, "lr": 3.9903145446619837e-05, "epoch": 0.64, "percentage": 12.82, "elapsed_time": "1:40:17", "remaining_time": "11:21:58"}
{"current_steps": 51, "total_steps": 390, "loss": 0.7018, "lr": 3.9884752776948564e-05, "epoch": 0.6528, "percentage": 13.08, "elapsed_time": "1:42:28", "remaining_time": "11:21:09"}
{"current_steps": 52, "total_steps": 390, "loss": 0.6788, "lr": 3.9864767154838864e-05, "epoch": 0.6656, "percentage": 13.33, "elapsed_time": "1:44:32", "remaining_time": "11:19:30"}
{"current_steps": 53, "total_steps": 390, "loss": 0.6935, "lr": 3.9843190181323744e-05, "epoch": 0.6784, "percentage": 13.59, "elapsed_time": "1:46:30", "remaining_time": "11:17:15"}
{"current_steps": 54, "total_steps": 390, "loss": 0.7084, "lr": 3.982002358491817e-05, "epoch": 0.6912, "percentage": 13.85, "elapsed_time": "1:48:31", "remaining_time": "11:15:17"}
{"current_steps": 55, "total_steps": 390, "loss": 0.688, "lr": 3.979526922148058e-05, "epoch": 0.704, "percentage": 14.1, "elapsed_time": "1:50:29", "remaining_time": "11:13:00"}
{"current_steps": 56, "total_steps": 390, "loss": 0.6694, "lr": 3.9768929074064206e-05, "epoch": 0.7168, "percentage": 14.36, "elapsed_time": "1:52:26", "remaining_time": "11:10:39"}
{"current_steps": 57, "total_steps": 390, "loss": 0.7231, "lr": 3.9741005252758255e-05, "epoch": 0.7296, "percentage": 14.62, "elapsed_time": "1:54:08", "remaining_time": "11:06:50"}
{"current_steps": 58, "total_steps": 390, "loss": 0.6888, "lr": 3.971149999451886e-05, "epoch": 0.7424, "percentage": 14.87, "elapsed_time": "1:56:10", "remaining_time": "11:04:58"}
{"current_steps": 59, "total_steps": 390, "loss": 0.7031, "lr": 3.9680415662989806e-05, "epoch": 0.7552, "percentage": 15.13, "elapsed_time": "1:58:13", "remaining_time": "11:03:12"}
{"current_steps": 60, "total_steps": 390, "loss": 0.6963, "lr": 3.9647754748313294e-05, "epoch": 0.768, "percentage": 15.38, "elapsed_time": "2:00:17", "remaining_time": "11:01:34"}
{"current_steps": 61, "total_steps": 390, "loss": 0.6669, "lr": 3.96135198669304e-05, "epoch": 0.7808, "percentage": 15.64, "elapsed_time": "2:02:09", "remaining_time": "10:58:50"}
{"current_steps": 62, "total_steps": 390, "loss": 0.6948, "lr": 3.957771376137144e-05, "epoch": 0.7936, "percentage": 15.9, "elapsed_time": "2:04:09", "remaining_time": "10:56:48"}
{"current_steps": 63, "total_steps": 390, "loss": 0.6791, "lr": 3.954033930003634e-05, "epoch": 0.8064, "percentage": 16.15, "elapsed_time": "2:06:13", "remaining_time": "10:55:11"}
{"current_steps": 64, "total_steps": 390, "loss": 0.7102, "lr": 3.9501399476964806e-05, "epoch": 0.8192, "percentage": 16.41, "elapsed_time": "2:08:19", "remaining_time": "10:53:40"}
{"current_steps": 65, "total_steps": 390, "loss": 0.6859, "lr": 3.946089741159648e-05, "epoch": 0.832, "percentage": 16.67, "elapsed_time": "2:10:17", "remaining_time": "10:51:27"}
{"current_steps": 66, "total_steps": 390, "loss": 0.6926, "lr": 3.9418836348521045e-05, "epoch": 0.8448, "percentage": 16.92, "elapsed_time": "2:12:31", "remaining_time": "10:50:35"}
{"current_steps": 67, "total_steps": 390, "loss": 0.6814, "lr": 3.937521965721831e-05, "epoch": 0.8576, "percentage": 17.18, "elapsed_time": "2:14:52", "remaining_time": "10:50:12"}
{"current_steps": 68, "total_steps": 390, "loss": 0.6537, "lr": 3.933005083178828e-05, "epoch": 0.8704, "percentage": 17.44, "elapsed_time": "2:16:52", "remaining_time": "10:48:09"}
{"current_steps": 69, "total_steps": 390, "loss": 0.6903, "lr": 3.928333349067125e-05, "epoch": 0.8832, "percentage": 17.69, "elapsed_time": "2:18:46", "remaining_time": "10:45:34"}
{"current_steps": 70, "total_steps": 390, "loss": 0.6546, "lr": 3.923507137635792e-05, "epoch": 0.896, "percentage": 17.95, "elapsed_time": "2:20:48", "remaining_time": "10:43:40"}
{"current_steps": 71, "total_steps": 390, "loss": 0.6989, "lr": 3.9185268355089606e-05, "epoch": 0.9088, "percentage": 18.21, "elapsed_time": "2:22:48", "remaining_time": "10:41:38"}
{"current_steps": 72, "total_steps": 390, "loss": 0.6649, "lr": 3.913392841654851e-05, "epoch": 0.9216, "percentage": 18.46, "elapsed_time": "2:24:27", "remaining_time": "10:38:00"}
{"current_steps": 73, "total_steps": 390, "loss": 0.6491, "lr": 3.9081055673538093e-05, "epoch": 0.9344, "percentage": 18.72, "elapsed_time": "2:26:33", "remaining_time": "10:36:23"}
{"current_steps": 74, "total_steps": 390, "loss": 0.6613, "lr": 3.902665436165364e-05, "epoch": 0.9472, "percentage": 18.97, "elapsed_time": "2:28:40", "remaining_time": "10:34:52"}
{"current_steps": 75, "total_steps": 390, "loss": 0.656, "lr": 3.897072883894291e-05, "epoch": 0.96, "percentage": 19.23, "elapsed_time": "2:30:49", "remaining_time": "10:33:26"}
{"current_steps": 76, "total_steps": 390, "loss": 0.6658, "lr": 3.8913283585557054e-05, "epoch": 0.9728, "percentage": 19.49, "elapsed_time": "2:32:43", "remaining_time": "10:30:58"}
{"current_steps": 77, "total_steps": 390, "loss": 0.6404, "lr": 3.885432320339167e-05, "epoch": 0.9856, "percentage": 19.74, "elapsed_time": "2:34:41", "remaining_time": "10:28:50"}
{"current_steps": 78, "total_steps": 390, "loss": 0.6723, "lr": 3.879385241571817e-05, "epoch": 0.9984, "percentage": 20.0, "elapsed_time": "2:36:30", "remaining_time": "10:26:00"}
{"current_steps": 79, "total_steps": 390, "loss": 0.5684, "lr": 3.873187606680543e-05, "epoch": 1.0112, "percentage": 20.26, "elapsed_time": "2:39:21", "remaining_time": "10:27:19"}