hp_ablations_mistral_bsz2048 / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 2
1293d6e verified
{"current_steps": 10, "total_steps": 369, "loss": 0.7483, "learning_rate": 5e-06, "epoch": 0.08097165991902834, "percentage": 2.71, "elapsed_time": "0:09:36", "remaining_time": "5:44:43"}
{"current_steps": 20, "total_steps": 369, "loss": 0.6389, "learning_rate": 5e-06, "epoch": 0.16194331983805668, "percentage": 5.42, "elapsed_time": "0:19:07", "remaining_time": "5:33:41"}
{"current_steps": 30, "total_steps": 369, "loss": 0.609, "learning_rate": 5e-06, "epoch": 0.242914979757085, "percentage": 8.13, "elapsed_time": "0:28:38", "remaining_time": "5:23:40"}
{"current_steps": 40, "total_steps": 369, "loss": 0.5935, "learning_rate": 5e-06, "epoch": 0.32388663967611336, "percentage": 10.84, "elapsed_time": "0:38:10", "remaining_time": "5:14:02"}
{"current_steps": 50, "total_steps": 369, "loss": 0.5878, "learning_rate": 5e-06, "epoch": 0.4048582995951417, "percentage": 13.55, "elapsed_time": "0:47:41", "remaining_time": "5:04:17"}
{"current_steps": 60, "total_steps": 369, "loss": 0.5781, "learning_rate": 5e-06, "epoch": 0.48582995951417, "percentage": 16.26, "elapsed_time": "0:57:11", "remaining_time": "4:54:33"}
{"current_steps": 70, "total_steps": 369, "loss": 0.5704, "learning_rate": 5e-06, "epoch": 0.5668016194331984, "percentage": 18.97, "elapsed_time": "1:06:42", "remaining_time": "4:44:56"}
{"current_steps": 80, "total_steps": 369, "loss": 0.5698, "learning_rate": 5e-06, "epoch": 0.6477732793522267, "percentage": 21.68, "elapsed_time": "1:16:13", "remaining_time": "4:35:22"}
{"current_steps": 90, "total_steps": 369, "loss": 0.5695, "learning_rate": 5e-06, "epoch": 0.728744939271255, "percentage": 24.39, "elapsed_time": "1:25:44", "remaining_time": "4:25:47"}
{"current_steps": 100, "total_steps": 369, "loss": 0.563, "learning_rate": 5e-06, "epoch": 0.8097165991902834, "percentage": 27.1, "elapsed_time": "1:35:15", "remaining_time": "4:16:14"}
{"current_steps": 110, "total_steps": 369, "loss": 0.5615, "learning_rate": 5e-06, "epoch": 0.8906882591093117, "percentage": 29.81, "elapsed_time": "1:44:46", "remaining_time": "4:06:42"}
{"current_steps": 120, "total_steps": 369, "loss": 0.5579, "learning_rate": 5e-06, "epoch": 0.97165991902834, "percentage": 32.52, "elapsed_time": "1:54:16", "remaining_time": "3:57:06"}
{"current_steps": 123, "total_steps": 369, "eval_loss": 0.06959892809391022, "epoch": 0.9959514170040485, "percentage": 33.33, "elapsed_time": "1:59:42", "remaining_time": "3:59:24"}
{"current_steps": 130, "total_steps": 369, "loss": 0.5311, "learning_rate": 5e-06, "epoch": 1.0526315789473684, "percentage": 35.23, "elapsed_time": "2:06:40", "remaining_time": "3:52:52"}
{"current_steps": 140, "total_steps": 369, "loss": 0.514, "learning_rate": 5e-06, "epoch": 1.1336032388663968, "percentage": 37.94, "elapsed_time": "2:16:11", "remaining_time": "3:42:46"}
{"current_steps": 150, "total_steps": 369, "loss": 0.5142, "learning_rate": 5e-06, "epoch": 1.214574898785425, "percentage": 40.65, "elapsed_time": "2:25:42", "remaining_time": "3:32:43"}
{"current_steps": 160, "total_steps": 369, "loss": 0.5122, "learning_rate": 5e-06, "epoch": 1.2955465587044535, "percentage": 43.36, "elapsed_time": "2:35:13", "remaining_time": "3:22:46"}
{"current_steps": 170, "total_steps": 369, "loss": 0.5122, "learning_rate": 5e-06, "epoch": 1.376518218623482, "percentage": 46.07, "elapsed_time": "2:44:46", "remaining_time": "3:12:52"}
{"current_steps": 180, "total_steps": 369, "loss": 0.5125, "learning_rate": 5e-06, "epoch": 1.45748987854251, "percentage": 48.78, "elapsed_time": "2:54:18", "remaining_time": "3:03:01"}
{"current_steps": 190, "total_steps": 369, "loss": 0.512, "learning_rate": 5e-06, "epoch": 1.5384615384615383, "percentage": 51.49, "elapsed_time": "3:03:49", "remaining_time": "2:53:10"}
{"current_steps": 200, "total_steps": 369, "loss": 0.5101, "learning_rate": 5e-06, "epoch": 1.6194331983805668, "percentage": 54.2, "elapsed_time": "3:13:21", "remaining_time": "2:43:23"}
{"current_steps": 210, "total_steps": 369, "loss": 0.5065, "learning_rate": 5e-06, "epoch": 1.7004048582995952, "percentage": 56.91, "elapsed_time": "3:22:53", "remaining_time": "2:33:36"}
{"current_steps": 220, "total_steps": 369, "loss": 0.511, "learning_rate": 5e-06, "epoch": 1.7813765182186234, "percentage": 59.62, "elapsed_time": "3:32:25", "remaining_time": "2:23:51"}
{"current_steps": 230, "total_steps": 369, "loss": 0.5106, "learning_rate": 5e-06, "epoch": 1.8623481781376519, "percentage": 62.33, "elapsed_time": "3:41:57", "remaining_time": "2:14:08"}
{"current_steps": 240, "total_steps": 369, "loss": 0.5102, "learning_rate": 5e-06, "epoch": 1.9433198380566803, "percentage": 65.04, "elapsed_time": "3:51:28", "remaining_time": "2:04:25"}
{"current_steps": 247, "total_steps": 369, "eval_loss": 0.06831000745296478, "epoch": 2.0, "percentage": 66.94, "elapsed_time": "4:00:11", "remaining_time": "1:58:38"}
{"current_steps": 250, "total_steps": 369, "loss": 0.4952, "learning_rate": 5e-06, "epoch": 2.0242914979757085, "percentage": 67.75, "elapsed_time": "4:03:49", "remaining_time": "1:56:03"}
{"current_steps": 260, "total_steps": 369, "loss": 0.4582, "learning_rate": 5e-06, "epoch": 2.1052631578947367, "percentage": 70.46, "elapsed_time": "4:13:20", "remaining_time": "1:46:12"}
{"current_steps": 270, "total_steps": 369, "loss": 0.4596, "learning_rate": 5e-06, "epoch": 2.1862348178137654, "percentage": 73.17, "elapsed_time": "4:22:51", "remaining_time": "1:36:23"}
{"current_steps": 280, "total_steps": 369, "loss": 0.4599, "learning_rate": 5e-06, "epoch": 2.2672064777327936, "percentage": 75.88, "elapsed_time": "4:32:23", "remaining_time": "1:26:34"}
{"current_steps": 290, "total_steps": 369, "loss": 0.4623, "learning_rate": 5e-06, "epoch": 2.348178137651822, "percentage": 78.59, "elapsed_time": "4:41:53", "remaining_time": "1:16:47"}
{"current_steps": 300, "total_steps": 369, "loss": 0.4637, "learning_rate": 5e-06, "epoch": 2.42914979757085, "percentage": 81.3, "elapsed_time": "4:51:24", "remaining_time": "1:07:01"}
{"current_steps": 310, "total_steps": 369, "loss": 0.4649, "learning_rate": 5e-06, "epoch": 2.5101214574898787, "percentage": 84.01, "elapsed_time": "5:00:56", "remaining_time": "0:57:16"}
{"current_steps": 320, "total_steps": 369, "loss": 0.4618, "learning_rate": 5e-06, "epoch": 2.591093117408907, "percentage": 86.72, "elapsed_time": "5:10:27", "remaining_time": "0:47:32"}
{"current_steps": 330, "total_steps": 369, "loss": 0.4669, "learning_rate": 5e-06, "epoch": 2.672064777327935, "percentage": 89.43, "elapsed_time": "5:19:59", "remaining_time": "0:37:49"}
{"current_steps": 340, "total_steps": 369, "loss": 0.4674, "learning_rate": 5e-06, "epoch": 2.753036437246964, "percentage": 92.14, "elapsed_time": "5:29:31", "remaining_time": "0:28:06"}
{"current_steps": 350, "total_steps": 369, "loss": 0.4647, "learning_rate": 5e-06, "epoch": 2.834008097165992, "percentage": 94.85, "elapsed_time": "5:39:02", "remaining_time": "0:18:24"}
{"current_steps": 360, "total_steps": 369, "loss": 0.4643, "learning_rate": 5e-06, "epoch": 2.91497975708502, "percentage": 97.56, "elapsed_time": "5:48:33", "remaining_time": "0:08:42"}
{"current_steps": 369, "total_steps": 369, "eval_loss": 0.06875835359096527, "epoch": 2.9878542510121457, "percentage": 100.0, "elapsed_time": "6:00:01", "remaining_time": "0:00:00"}
{"current_steps": 369, "total_steps": 369, "epoch": 2.9878542510121457, "percentage": 100.0, "elapsed_time": "6:00:54", "remaining_time": "0:00:00"}