m1-32b / trainer_log.jsonl
Can111's picture
Initial upload of qwen2.5-32b-instruct_deepseek-reasoner_2004_03-10-21_lr1e-5_wd1e-4_epo5_len32768_tbs1
d55c213 verified
raw
history blame
6.12 kB
{"current_steps": 10, "total_steps": 310, "loss": 0.8005, "lr": 6.25e-06, "epoch": 0.1593625498007968, "percentage": 3.23, "elapsed_time": "0:25:32", "remaining_time": "12:46:26"}
{"current_steps": 20, "total_steps": 310, "loss": 0.6192, "lr": 9.995433337085492e-06, "epoch": 0.3187250996015936, "percentage": 6.45, "elapsed_time": "0:51:14", "remaining_time": "12:22:54"}
{"current_steps": 30, "total_steps": 310, "loss": 0.5349, "lr": 9.944154131125643e-06, "epoch": 0.47808764940239046, "percentage": 9.68, "elapsed_time": "1:16:27", "remaining_time": "11:53:34"}
{"current_steps": 40, "total_steps": 310, "loss": 0.5105, "lr": 9.836474315195148e-06, "epoch": 0.6374501992031872, "percentage": 12.9, "elapsed_time": "1:42:06", "remaining_time": "11:29:13"}
{"current_steps": 50, "total_steps": 310, "loss": 0.4992, "lr": 9.673622250534155e-06, "epoch": 0.796812749003984, "percentage": 16.13, "elapsed_time": "2:07:30", "remaining_time": "11:03:00"}
{"current_steps": 60, "total_steps": 310, "loss": 0.4943, "lr": 9.457455677726447e-06, "epoch": 0.9561752988047809, "percentage": 19.35, "elapsed_time": "2:33:04", "remaining_time": "10:37:50"}
{"current_steps": 70, "total_steps": 310, "loss": 0.4301, "lr": 9.190440524459203e-06, "epoch": 1.1115537848605577, "percentage": 22.58, "elapsed_time": "2:57:23", "remaining_time": "10:08:13"}
{"current_steps": 80, "total_steps": 310, "loss": 0.3869, "lr": 8.87562277536726e-06, "epoch": 1.2709163346613546, "percentage": 25.81, "elapsed_time": "3:23:13", "remaining_time": "9:44:15"}
{"current_steps": 90, "total_steps": 310, "loss": 0.3895, "lr": 8.516593724857598e-06, "epoch": 1.4302788844621515, "percentage": 29.03, "elapsed_time": "3:48:43", "remaining_time": "9:19:05"}
{"current_steps": 100, "total_steps": 310, "loss": 0.3809, "lr": 8.117449009293668e-06, "epoch": 1.5896414342629481, "percentage": 32.26, "elapsed_time": "4:14:31", "remaining_time": "8:54:29"}
{"current_steps": 110, "total_steps": 310, "loss": 0.3707, "lr": 7.682741885881314e-06, "epoch": 1.749003984063745, "percentage": 35.48, "elapsed_time": "4:39:34", "remaining_time": "8:28:18"}
{"current_steps": 120, "total_steps": 310, "loss": 0.3831, "lr": 7.217431291229068e-06, "epoch": 1.908366533864542, "percentage": 38.71, "elapsed_time": "5:05:19", "remaining_time": "8:03:26"}
{"current_steps": 130, "total_steps": 310, "loss": 0.3343, "lr": 6.726825272106539e-06, "epoch": 2.0637450199203187, "percentage": 41.94, "elapsed_time": "5:29:52", "remaining_time": "7:36:44"}
{"current_steps": 140, "total_steps": 310, "loss": 0.2776, "lr": 6.216520433716544e-06, "epoch": 2.2231075697211153, "percentage": 45.16, "elapsed_time": "5:55:06", "remaining_time": "7:11:12"}
{"current_steps": 150, "total_steps": 310, "loss": 0.2751, "lr": 5.69233809622687e-06, "epoch": 2.3824701195219125, "percentage": 48.39, "elapsed_time": "6:20:32", "remaining_time": "6:45:54"}
{"current_steps": 160, "total_steps": 310, "loss": 0.2758, "lr": 5.160257887858278e-06, "epoch": 2.541832669322709, "percentage": 51.61, "elapsed_time": "6:45:33", "remaining_time": "6:20:12"}
{"current_steps": 170, "total_steps": 310, "loss": 0.2711, "lr": 4.626349532067879e-06, "epoch": 2.7011952191235062, "percentage": 54.84, "elapsed_time": "7:10:32", "remaining_time": "5:54:34"}
{"current_steps": 180, "total_steps": 310, "loss": 0.2685, "lr": 4.096703606968007e-06, "epoch": 2.860557768924303, "percentage": 58.06, "elapsed_time": "7:36:17", "remaining_time": "5:29:32"}
{"current_steps": 190, "total_steps": 310, "loss": 0.2626, "lr": 3.5773620668448384e-06, "epoch": 3.0159362549800797, "percentage": 61.29, "elapsed_time": "8:01:27", "remaining_time": "5:04:04"}
{"current_steps": 200, "total_steps": 310, "loss": 0.1978, "lr": 3.074249318355046e-06, "epoch": 3.1752988047808763, "percentage": 64.52, "elapsed_time": "8:26:39", "remaining_time": "4:38:39"}
{"current_steps": 210, "total_steps": 310, "loss": 0.1886, "lr": 2.5931046376510875e-06, "epoch": 3.3346613545816735, "percentage": 67.74, "elapsed_time": "8:52:17", "remaining_time": "4:13:28"}
{"current_steps": 220, "total_steps": 310, "loss": 0.1918, "lr": 2.139416699389153e-06, "epoch": 3.49402390438247, "percentage": 70.97, "elapsed_time": "9:17:50", "remaining_time": "3:48:12"}
{"current_steps": 230, "total_steps": 310, "loss": 0.1908, "lr": 1.7183609644824096e-06, "epoch": 3.653386454183267, "percentage": 74.19, "elapsed_time": "9:43:31", "remaining_time": "3:22:57"}
{"current_steps": 240, "total_steps": 310, "loss": 0.1758, "lr": 1.3347406408508695e-06, "epoch": 3.812749003984064, "percentage": 77.42, "elapsed_time": "10:08:40", "remaining_time": "2:57:31"}
{"current_steps": 250, "total_steps": 310, "loss": 0.1944, "lr": 9.929318906602176e-07, "epoch": 3.9721115537848606, "percentage": 80.65, "elapsed_time": "10:34:36", "remaining_time": "2:32:18"}
{"current_steps": 260, "total_steps": 310, "loss": 0.1561, "lr": 6.968339090999188e-07, "epoch": 4.127490039840637, "percentage": 83.87, "elapsed_time": "10:59:27", "remaining_time": "2:06:49"}
{"current_steps": 270, "total_steps": 310, "loss": 0.1381, "lr": 4.4982444417866753e-07, "epoch": 4.286852589641434, "percentage": 87.1, "elapsed_time": "11:24:31", "remaining_time": "1:41:24"}
{"current_steps": 280, "total_steps": 310, "loss": 0.1532, "lr": 2.547212649466568e-07, "epoch": 4.446215139442231, "percentage": 90.32, "elapsed_time": "11:50:04", "remaining_time": "1:16:04"}
{"current_steps": 290, "total_steps": 310, "loss": 0.153, "lr": 1.1375001769728e-07, "epoch": 4.605577689243028, "percentage": 93.55, "elapsed_time": "12:15:38", "remaining_time": "0:50:44"}
{"current_steps": 300, "total_steps": 310, "loss": 0.1556, "lr": 2.8518836829732332e-08, "epoch": 4.764940239043825, "percentage": 96.77, "elapsed_time": "12:41:26", "remaining_time": "0:25:22"}
{"current_steps": 310, "total_steps": 310, "loss": 0.1487, "lr": 0.0, "epoch": 4.924302788844622, "percentage": 100.0, "elapsed_time": "13:06:42", "remaining_time": "0:00:00"}
{"current_steps": 310, "total_steps": 310, "epoch": 4.924302788844622, "percentage": 100.0, "elapsed_time": "13:06:42", "remaining_time": "0:00:00"}