oh_v1.3_opengpt_x.5 / trainer_log.jsonl
gsmyrnis's picture
Training in progress, epoch 3
6fa821b verified
{"current_steps": 10, "total_steps": 819, "loss": 1.0405, "learning_rate": 5e-06, "epoch": 0.03663003663003663, "percentage": 1.22, "elapsed_time": "0:00:59", "remaining_time": "1:19:42"}
{"current_steps": 20, "total_steps": 819, "loss": 0.9101, "learning_rate": 5e-06, "epoch": 0.07326007326007326, "percentage": 2.44, "elapsed_time": "0:01:57", "remaining_time": "1:17:58"}
{"current_steps": 30, "total_steps": 819, "loss": 0.8722, "learning_rate": 5e-06, "epoch": 0.10989010989010989, "percentage": 3.66, "elapsed_time": "0:02:55", "remaining_time": "1:16:44"}
{"current_steps": 40, "total_steps": 819, "loss": 0.8529, "learning_rate": 5e-06, "epoch": 0.14652014652014653, "percentage": 4.88, "elapsed_time": "0:03:53", "remaining_time": "1:15:38"}
{"current_steps": 50, "total_steps": 819, "loss": 0.8259, "learning_rate": 5e-06, "epoch": 0.18315018315018314, "percentage": 6.11, "elapsed_time": "0:04:51", "remaining_time": "1:14:35"}
{"current_steps": 60, "total_steps": 819, "loss": 0.8144, "learning_rate": 5e-06, "epoch": 0.21978021978021978, "percentage": 7.33, "elapsed_time": "0:05:48", "remaining_time": "1:13:34"}
{"current_steps": 70, "total_steps": 819, "loss": 0.7986, "learning_rate": 5e-06, "epoch": 0.2564102564102564, "percentage": 8.55, "elapsed_time": "0:06:46", "remaining_time": "1:12:34"}
{"current_steps": 80, "total_steps": 819, "loss": 0.7911, "learning_rate": 5e-06, "epoch": 0.29304029304029305, "percentage": 9.77, "elapsed_time": "0:07:44", "remaining_time": "1:11:35"}
{"current_steps": 90, "total_steps": 819, "loss": 0.7807, "learning_rate": 5e-06, "epoch": 0.32967032967032966, "percentage": 10.99, "elapsed_time": "0:08:42", "remaining_time": "1:10:36"}
{"current_steps": 100, "total_steps": 819, "loss": 0.7819, "learning_rate": 5e-06, "epoch": 0.3663003663003663, "percentage": 12.21, "elapsed_time": "0:09:40", "remaining_time": "1:09:36"}
{"current_steps": 110, "total_steps": 819, "loss": 0.7791, "learning_rate": 5e-06, "epoch": 0.40293040293040294, "percentage": 13.43, "elapsed_time": "0:10:38", "remaining_time": "1:08:38"}
{"current_steps": 120, "total_steps": 819, "loss": 0.7688, "learning_rate": 5e-06, "epoch": 0.43956043956043955, "percentage": 14.65, "elapsed_time": "0:11:36", "remaining_time": "1:07:39"}
{"current_steps": 130, "total_steps": 819, "loss": 0.7669, "learning_rate": 5e-06, "epoch": 0.47619047619047616, "percentage": 15.87, "elapsed_time": "0:12:34", "remaining_time": "1:06:41"}
{"current_steps": 140, "total_steps": 819, "loss": 0.764, "learning_rate": 5e-06, "epoch": 0.5128205128205128, "percentage": 17.09, "elapsed_time": "0:13:32", "remaining_time": "1:05:42"}
{"current_steps": 150, "total_steps": 819, "loss": 0.7586, "learning_rate": 5e-06, "epoch": 0.5494505494505495, "percentage": 18.32, "elapsed_time": "0:14:30", "remaining_time": "1:04:44"}
{"current_steps": 160, "total_steps": 819, "loss": 0.7572, "learning_rate": 5e-06, "epoch": 0.5860805860805861, "percentage": 19.54, "elapsed_time": "0:15:28", "remaining_time": "1:03:45"}
{"current_steps": 170, "total_steps": 819, "loss": 0.7574, "learning_rate": 5e-06, "epoch": 0.6227106227106227, "percentage": 20.76, "elapsed_time": "0:16:26", "remaining_time": "1:02:46"}
{"current_steps": 180, "total_steps": 819, "loss": 0.7568, "learning_rate": 5e-06, "epoch": 0.6593406593406593, "percentage": 21.98, "elapsed_time": "0:17:24", "remaining_time": "1:01:48"}
{"current_steps": 190, "total_steps": 819, "loss": 0.7472, "learning_rate": 5e-06, "epoch": 0.6959706959706959, "percentage": 23.2, "elapsed_time": "0:18:22", "remaining_time": "1:00:50"}
{"current_steps": 200, "total_steps": 819, "loss": 0.751, "learning_rate": 5e-06, "epoch": 0.7326007326007326, "percentage": 24.42, "elapsed_time": "0:19:20", "remaining_time": "0:59:52"}
{"current_steps": 210, "total_steps": 819, "loss": 0.7477, "learning_rate": 5e-06, "epoch": 0.7692307692307693, "percentage": 25.64, "elapsed_time": "0:20:18", "remaining_time": "0:58:53"}
{"current_steps": 220, "total_steps": 819, "loss": 0.7473, "learning_rate": 5e-06, "epoch": 0.8058608058608059, "percentage": 26.86, "elapsed_time": "0:21:16", "remaining_time": "0:57:55"}
{"current_steps": 230, "total_steps": 819, "loss": 0.7422, "learning_rate": 5e-06, "epoch": 0.8424908424908425, "percentage": 28.08, "elapsed_time": "0:22:14", "remaining_time": "0:56:57"}
{"current_steps": 240, "total_steps": 819, "loss": 0.7417, "learning_rate": 5e-06, "epoch": 0.8791208791208791, "percentage": 29.3, "elapsed_time": "0:23:12", "remaining_time": "0:55:59"}
{"current_steps": 250, "total_steps": 819, "loss": 0.7409, "learning_rate": 5e-06, "epoch": 0.9157509157509157, "percentage": 30.53, "elapsed_time": "0:24:10", "remaining_time": "0:55:01"}
{"current_steps": 260, "total_steps": 819, "loss": 0.7431, "learning_rate": 5e-06, "epoch": 0.9523809523809523, "percentage": 31.75, "elapsed_time": "0:25:08", "remaining_time": "0:54:03"}
{"current_steps": 270, "total_steps": 819, "loss": 0.742, "learning_rate": 5e-06, "epoch": 0.989010989010989, "percentage": 32.97, "elapsed_time": "0:26:06", "remaining_time": "0:53:04"}
{"current_steps": 273, "total_steps": 819, "eval_loss": 0.7415268421173096, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:26:49", "remaining_time": "0:53:39"}
{"current_steps": 280, "total_steps": 819, "loss": 0.7069, "learning_rate": 5e-06, "epoch": 1.0256410256410255, "percentage": 34.19, "elapsed_time": "0:28:59", "remaining_time": "0:55:48"}
{"current_steps": 290, "total_steps": 819, "loss": 0.6897, "learning_rate": 5e-06, "epoch": 1.0622710622710623, "percentage": 35.41, "elapsed_time": "0:29:57", "remaining_time": "0:54:38"}
{"current_steps": 300, "total_steps": 819, "loss": 0.683, "learning_rate": 5e-06, "epoch": 1.098901098901099, "percentage": 36.63, "elapsed_time": "0:30:55", "remaining_time": "0:53:30"}
{"current_steps": 310, "total_steps": 819, "loss": 0.6868, "learning_rate": 5e-06, "epoch": 1.1355311355311355, "percentage": 37.85, "elapsed_time": "0:31:53", "remaining_time": "0:52:22"}
{"current_steps": 320, "total_steps": 819, "loss": 0.6831, "learning_rate": 5e-06, "epoch": 1.1721611721611722, "percentage": 39.07, "elapsed_time": "0:32:51", "remaining_time": "0:51:15"}
{"current_steps": 330, "total_steps": 819, "loss": 0.6869, "learning_rate": 5e-06, "epoch": 1.2087912087912087, "percentage": 40.29, "elapsed_time": "0:33:50", "remaining_time": "0:50:08"}
{"current_steps": 340, "total_steps": 819, "loss": 0.6878, "learning_rate": 5e-06, "epoch": 1.2454212454212454, "percentage": 41.51, "elapsed_time": "0:34:48", "remaining_time": "0:49:02"}
{"current_steps": 350, "total_steps": 819, "loss": 0.6925, "learning_rate": 5e-06, "epoch": 1.282051282051282, "percentage": 42.74, "elapsed_time": "0:35:46", "remaining_time": "0:47:56"}
{"current_steps": 360, "total_steps": 819, "loss": 0.6876, "learning_rate": 5e-06, "epoch": 1.3186813186813187, "percentage": 43.96, "elapsed_time": "0:36:44", "remaining_time": "0:46:51"}
{"current_steps": 370, "total_steps": 819, "loss": 0.6931, "learning_rate": 5e-06, "epoch": 1.3553113553113554, "percentage": 45.18, "elapsed_time": "0:37:42", "remaining_time": "0:45:46"}
{"current_steps": 380, "total_steps": 819, "loss": 0.6866, "learning_rate": 5e-06, "epoch": 1.3919413919413919, "percentage": 46.4, "elapsed_time": "0:38:41", "remaining_time": "0:44:41"}
{"current_steps": 390, "total_steps": 819, "loss": 0.6906, "learning_rate": 5e-06, "epoch": 1.4285714285714286, "percentage": 47.62, "elapsed_time": "0:39:39", "remaining_time": "0:43:37"}
{"current_steps": 400, "total_steps": 819, "loss": 0.6887, "learning_rate": 5e-06, "epoch": 1.4652014652014653, "percentage": 48.84, "elapsed_time": "0:40:37", "remaining_time": "0:42:33"}
{"current_steps": 410, "total_steps": 819, "loss": 0.6873, "learning_rate": 5e-06, "epoch": 1.5018315018315018, "percentage": 50.06, "elapsed_time": "0:41:35", "remaining_time": "0:41:29"}
{"current_steps": 420, "total_steps": 819, "loss": 0.6843, "learning_rate": 5e-06, "epoch": 1.5384615384615383, "percentage": 51.28, "elapsed_time": "0:42:34", "remaining_time": "0:40:26"}
{"current_steps": 430, "total_steps": 819, "loss": 0.6864, "learning_rate": 5e-06, "epoch": 1.575091575091575, "percentage": 52.5, "elapsed_time": "0:43:32", "remaining_time": "0:39:23"}
{"current_steps": 440, "total_steps": 819, "loss": 0.6839, "learning_rate": 5e-06, "epoch": 1.6117216117216118, "percentage": 53.72, "elapsed_time": "0:44:30", "remaining_time": "0:38:19"}
{"current_steps": 450, "total_steps": 819, "loss": 0.6928, "learning_rate": 5e-06, "epoch": 1.6483516483516483, "percentage": 54.95, "elapsed_time": "0:45:27", "remaining_time": "0:37:16"}
{"current_steps": 460, "total_steps": 819, "loss": 0.6834, "learning_rate": 5e-06, "epoch": 1.684981684981685, "percentage": 56.17, "elapsed_time": "0:46:25", "remaining_time": "0:36:14"}
{"current_steps": 470, "total_steps": 819, "loss": 0.6871, "learning_rate": 5e-06, "epoch": 1.7216117216117217, "percentage": 57.39, "elapsed_time": "0:47:23", "remaining_time": "0:35:11"}
{"current_steps": 480, "total_steps": 819, "loss": 0.6871, "learning_rate": 5e-06, "epoch": 1.7582417582417582, "percentage": 58.61, "elapsed_time": "0:48:21", "remaining_time": "0:34:09"}
{"current_steps": 490, "total_steps": 819, "loss": 0.6853, "learning_rate": 5e-06, "epoch": 1.7948717948717947, "percentage": 59.83, "elapsed_time": "0:49:19", "remaining_time": "0:33:07"}
{"current_steps": 500, "total_steps": 819, "loss": 0.6894, "learning_rate": 5e-06, "epoch": 1.8315018315018317, "percentage": 61.05, "elapsed_time": "0:50:17", "remaining_time": "0:32:05"}
{"current_steps": 510, "total_steps": 819, "loss": 0.6857, "learning_rate": 5e-06, "epoch": 1.8681318681318682, "percentage": 62.27, "elapsed_time": "0:51:15", "remaining_time": "0:31:03"}
{"current_steps": 520, "total_steps": 819, "loss": 0.6821, "learning_rate": 5e-06, "epoch": 1.9047619047619047, "percentage": 63.49, "elapsed_time": "0:52:13", "remaining_time": "0:30:01"}
{"current_steps": 530, "total_steps": 819, "loss": 0.6867, "learning_rate": 5e-06, "epoch": 1.9413919413919414, "percentage": 64.71, "elapsed_time": "0:53:11", "remaining_time": "0:29:00"}
{"current_steps": 540, "total_steps": 819, "loss": 0.6821, "learning_rate": 5e-06, "epoch": 1.978021978021978, "percentage": 65.93, "elapsed_time": "0:54:09", "remaining_time": "0:27:58"}
{"current_steps": 546, "total_steps": 819, "eval_loss": 0.7290822267532349, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:55:09", "remaining_time": "0:27:34"}
{"current_steps": 550, "total_steps": 819, "loss": 0.6593, "learning_rate": 5e-06, "epoch": 2.0146520146520146, "percentage": 67.16, "elapsed_time": "0:57:13", "remaining_time": "0:27:59"}
{"current_steps": 560, "total_steps": 819, "loss": 0.6276, "learning_rate": 5e-06, "epoch": 2.051282051282051, "percentage": 68.38, "elapsed_time": "0:58:11", "remaining_time": "0:26:55"}
{"current_steps": 570, "total_steps": 819, "loss": 0.6319, "learning_rate": 5e-06, "epoch": 2.087912087912088, "percentage": 69.6, "elapsed_time": "0:59:10", "remaining_time": "0:25:50"}
{"current_steps": 580, "total_steps": 819, "loss": 0.6317, "learning_rate": 5e-06, "epoch": 2.1245421245421245, "percentage": 70.82, "elapsed_time": "1:00:08", "remaining_time": "0:24:46"}
{"current_steps": 590, "total_steps": 819, "loss": 0.6303, "learning_rate": 5e-06, "epoch": 2.161172161172161, "percentage": 72.04, "elapsed_time": "1:01:06", "remaining_time": "0:23:43"}
{"current_steps": 600, "total_steps": 819, "loss": 0.6336, "learning_rate": 5e-06, "epoch": 2.197802197802198, "percentage": 73.26, "elapsed_time": "1:02:04", "remaining_time": "0:22:39"}
{"current_steps": 610, "total_steps": 819, "loss": 0.6324, "learning_rate": 5e-06, "epoch": 2.2344322344322345, "percentage": 74.48, "elapsed_time": "1:03:03", "remaining_time": "0:21:36"}
{"current_steps": 620, "total_steps": 819, "loss": 0.6354, "learning_rate": 5e-06, "epoch": 2.271062271062271, "percentage": 75.7, "elapsed_time": "1:04:01", "remaining_time": "0:20:32"}
{"current_steps": 630, "total_steps": 819, "loss": 0.6355, "learning_rate": 5e-06, "epoch": 2.3076923076923075, "percentage": 76.92, "elapsed_time": "1:04:59", "remaining_time": "0:19:29"}
{"current_steps": 640, "total_steps": 819, "loss": 0.6323, "learning_rate": 5e-06, "epoch": 2.3443223443223444, "percentage": 78.14, "elapsed_time": "1:05:57", "remaining_time": "0:18:26"}
{"current_steps": 650, "total_steps": 819, "loss": 0.6351, "learning_rate": 5e-06, "epoch": 2.380952380952381, "percentage": 79.37, "elapsed_time": "1:06:56", "remaining_time": "0:17:24"}
{"current_steps": 660, "total_steps": 819, "loss": 0.6309, "learning_rate": 5e-06, "epoch": 2.4175824175824174, "percentage": 80.59, "elapsed_time": "1:07:54", "remaining_time": "0:16:21"}
{"current_steps": 670, "total_steps": 819, "loss": 0.6335, "learning_rate": 5e-06, "epoch": 2.4542124542124544, "percentage": 81.81, "elapsed_time": "1:08:52", "remaining_time": "0:15:19"}
{"current_steps": 680, "total_steps": 819, "loss": 0.6383, "learning_rate": 5e-06, "epoch": 2.490842490842491, "percentage": 83.03, "elapsed_time": "1:09:50", "remaining_time": "0:14:16"}
{"current_steps": 690, "total_steps": 819, "loss": 0.6342, "learning_rate": 5e-06, "epoch": 2.5274725274725274, "percentage": 84.25, "elapsed_time": "1:10:49", "remaining_time": "0:13:14"}
{"current_steps": 700, "total_steps": 819, "loss": 0.6354, "learning_rate": 5e-06, "epoch": 2.564102564102564, "percentage": 85.47, "elapsed_time": "1:11:47", "remaining_time": "0:12:12"}
{"current_steps": 710, "total_steps": 819, "loss": 0.6332, "learning_rate": 5e-06, "epoch": 2.600732600732601, "percentage": 86.69, "elapsed_time": "1:12:45", "remaining_time": "0:11:10"}
{"current_steps": 720, "total_steps": 819, "loss": 0.6382, "learning_rate": 5e-06, "epoch": 2.6373626373626373, "percentage": 87.91, "elapsed_time": "1:13:43", "remaining_time": "0:10:08"}
{"current_steps": 730, "total_steps": 819, "loss": 0.637, "learning_rate": 5e-06, "epoch": 2.6739926739926743, "percentage": 89.13, "elapsed_time": "1:14:41", "remaining_time": "0:09:06"}
{"current_steps": 740, "total_steps": 819, "loss": 0.6336, "learning_rate": 5e-06, "epoch": 2.7106227106227108, "percentage": 90.35, "elapsed_time": "1:15:40", "remaining_time": "0:08:04"}
{"current_steps": 750, "total_steps": 819, "loss": 0.6371, "learning_rate": 5e-06, "epoch": 2.7472527472527473, "percentage": 91.58, "elapsed_time": "1:16:38", "remaining_time": "0:07:03"}
{"current_steps": 760, "total_steps": 819, "loss": 0.6333, "learning_rate": 5e-06, "epoch": 2.7838827838827838, "percentage": 92.8, "elapsed_time": "1:17:36", "remaining_time": "0:06:01"}
{"current_steps": 770, "total_steps": 819, "loss": 0.6356, "learning_rate": 5e-06, "epoch": 2.8205128205128203, "percentage": 94.02, "elapsed_time": "1:18:34", "remaining_time": "0:05:00"}
{"current_steps": 780, "total_steps": 819, "loss": 0.6336, "learning_rate": 5e-06, "epoch": 2.857142857142857, "percentage": 95.24, "elapsed_time": "1:19:32", "remaining_time": "0:03:58"}
{"current_steps": 790, "total_steps": 819, "loss": 0.6356, "learning_rate": 5e-06, "epoch": 2.8937728937728937, "percentage": 96.46, "elapsed_time": "1:20:31", "remaining_time": "0:02:57"}
{"current_steps": 800, "total_steps": 819, "loss": 0.6352, "learning_rate": 5e-06, "epoch": 2.9304029304029307, "percentage": 97.68, "elapsed_time": "1:21:29", "remaining_time": "0:01:56"}
{"current_steps": 810, "total_steps": 819, "loss": 0.6339, "learning_rate": 5e-06, "epoch": 2.967032967032967, "percentage": 98.9, "elapsed_time": "1:22:27", "remaining_time": "0:00:54"}
{"current_steps": 819, "total_steps": 819, "eval_loss": 0.7330417037010193, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:25:17", "remaining_time": "0:00:00"}
{"current_steps": 819, "total_steps": 819, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:26:58", "remaining_time": "0:00:00"}