oh_v1.3_opengpt_x2 / trainer_log.jsonl
gsmyrnis's picture
Training in progress, epoch 3
7ab0ec6 verified
{"current_steps": 10, "total_steps": 822, "loss": 1.0327, "learning_rate": 5e-06, "epoch": 0.0364963503649635, "percentage": 1.22, "elapsed_time": "0:01:01", "remaining_time": "1:22:35"}
{"current_steps": 20, "total_steps": 822, "loss": 0.9089, "learning_rate": 5e-06, "epoch": 0.072992700729927, "percentage": 2.43, "elapsed_time": "0:01:58", "remaining_time": "1:19:26"}
{"current_steps": 30, "total_steps": 822, "loss": 0.8647, "learning_rate": 5e-06, "epoch": 0.10948905109489052, "percentage": 3.65, "elapsed_time": "0:02:56", "remaining_time": "1:17:44"}
{"current_steps": 40, "total_steps": 822, "loss": 0.8455, "learning_rate": 5e-06, "epoch": 0.145985401459854, "percentage": 4.87, "elapsed_time": "0:03:54", "remaining_time": "1:16:25"}
{"current_steps": 50, "total_steps": 822, "loss": 0.8261, "learning_rate": 5e-06, "epoch": 0.18248175182481752, "percentage": 6.08, "elapsed_time": "0:04:52", "remaining_time": "1:15:14"}
{"current_steps": 60, "total_steps": 822, "loss": 0.8215, "learning_rate": 5e-06, "epoch": 0.21897810218978103, "percentage": 7.3, "elapsed_time": "0:05:50", "remaining_time": "1:14:07"}
{"current_steps": 70, "total_steps": 822, "loss": 0.806, "learning_rate": 5e-06, "epoch": 0.25547445255474455, "percentage": 8.52, "elapsed_time": "0:06:48", "remaining_time": "1:13:04"}
{"current_steps": 80, "total_steps": 822, "loss": 0.7944, "learning_rate": 5e-06, "epoch": 0.291970802919708, "percentage": 9.73, "elapsed_time": "0:07:45", "remaining_time": "1:12:01"}
{"current_steps": 90, "total_steps": 822, "loss": 0.7843, "learning_rate": 5e-06, "epoch": 0.3284671532846715, "percentage": 10.95, "elapsed_time": "0:08:43", "remaining_time": "1:10:59"}
{"current_steps": 100, "total_steps": 822, "loss": 0.7782, "learning_rate": 5e-06, "epoch": 0.36496350364963503, "percentage": 12.17, "elapsed_time": "0:09:41", "remaining_time": "1:09:59"}
{"current_steps": 110, "total_steps": 822, "loss": 0.7704, "learning_rate": 5e-06, "epoch": 0.40145985401459855, "percentage": 13.38, "elapsed_time": "0:10:39", "remaining_time": "1:08:58"}
{"current_steps": 120, "total_steps": 822, "loss": 0.7721, "learning_rate": 5e-06, "epoch": 0.43795620437956206, "percentage": 14.6, "elapsed_time": "0:11:37", "remaining_time": "1:07:58"}
{"current_steps": 130, "total_steps": 822, "loss": 0.7694, "learning_rate": 5e-06, "epoch": 0.4744525547445255, "percentage": 15.82, "elapsed_time": "0:12:35", "remaining_time": "1:06:59"}
{"current_steps": 140, "total_steps": 822, "loss": 0.7607, "learning_rate": 5e-06, "epoch": 0.5109489051094891, "percentage": 17.03, "elapsed_time": "0:13:32", "remaining_time": "1:05:59"}
{"current_steps": 150, "total_steps": 822, "loss": 0.7654, "learning_rate": 5e-06, "epoch": 0.5474452554744526, "percentage": 18.25, "elapsed_time": "0:14:30", "remaining_time": "1:05:00"}
{"current_steps": 160, "total_steps": 822, "loss": 0.7569, "learning_rate": 5e-06, "epoch": 0.583941605839416, "percentage": 19.46, "elapsed_time": "0:15:28", "remaining_time": "1:04:01"}
{"current_steps": 170, "total_steps": 822, "loss": 0.7558, "learning_rate": 5e-06, "epoch": 0.6204379562043796, "percentage": 20.68, "elapsed_time": "0:16:26", "remaining_time": "1:03:03"}
{"current_steps": 180, "total_steps": 822, "loss": 0.7555, "learning_rate": 5e-06, "epoch": 0.656934306569343, "percentage": 21.9, "elapsed_time": "0:17:24", "remaining_time": "1:02:04"}
{"current_steps": 190, "total_steps": 822, "loss": 0.7514, "learning_rate": 5e-06, "epoch": 0.6934306569343066, "percentage": 23.11, "elapsed_time": "0:18:22", "remaining_time": "1:01:05"}
{"current_steps": 200, "total_steps": 822, "loss": 0.7498, "learning_rate": 5e-06, "epoch": 0.7299270072992701, "percentage": 24.33, "elapsed_time": "0:19:19", "remaining_time": "1:00:07"}
{"current_steps": 210, "total_steps": 822, "loss": 0.747, "learning_rate": 5e-06, "epoch": 0.7664233576642335, "percentage": 25.55, "elapsed_time": "0:20:17", "remaining_time": "0:59:08"}
{"current_steps": 220, "total_steps": 822, "loss": 0.7455, "learning_rate": 5e-06, "epoch": 0.8029197080291971, "percentage": 26.76, "elapsed_time": "0:21:15", "remaining_time": "0:58:10"}
{"current_steps": 230, "total_steps": 822, "loss": 0.7437, "learning_rate": 5e-06, "epoch": 0.8394160583941606, "percentage": 27.98, "elapsed_time": "0:22:13", "remaining_time": "0:57:12"}
{"current_steps": 240, "total_steps": 822, "loss": 0.7421, "learning_rate": 5e-06, "epoch": 0.8759124087591241, "percentage": 29.2, "elapsed_time": "0:23:11", "remaining_time": "0:56:13"}
{"current_steps": 250, "total_steps": 822, "loss": 0.7423, "learning_rate": 5e-06, "epoch": 0.9124087591240876, "percentage": 30.41, "elapsed_time": "0:24:09", "remaining_time": "0:55:15"}
{"current_steps": 260, "total_steps": 822, "loss": 0.7427, "learning_rate": 5e-06, "epoch": 0.948905109489051, "percentage": 31.63, "elapsed_time": "0:25:06", "remaining_time": "0:54:17"}
{"current_steps": 270, "total_steps": 822, "loss": 0.7416, "learning_rate": 5e-06, "epoch": 0.9854014598540146, "percentage": 32.85, "elapsed_time": "0:26:04", "remaining_time": "0:53:19"}
{"current_steps": 274, "total_steps": 822, "eval_loss": 0.7413060665130615, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:26:53", "remaining_time": "0:53:47"}
{"current_steps": 280, "total_steps": 822, "loss": 0.7114, "learning_rate": 5e-06, "epoch": 1.0218978102189782, "percentage": 34.06, "elapsed_time": "0:28:59", "remaining_time": "0:56:07"}
{"current_steps": 290, "total_steps": 822, "loss": 0.6861, "learning_rate": 5e-06, "epoch": 1.0583941605839415, "percentage": 35.28, "elapsed_time": "0:29:59", "remaining_time": "0:55:01"}
{"current_steps": 300, "total_steps": 822, "loss": 0.6847, "learning_rate": 5e-06, "epoch": 1.094890510948905, "percentage": 36.5, "elapsed_time": "0:30:59", "remaining_time": "0:53:56"}
{"current_steps": 310, "total_steps": 822, "loss": 0.6888, "learning_rate": 5e-06, "epoch": 1.1313868613138687, "percentage": 37.71, "elapsed_time": "0:31:59", "remaining_time": "0:52:51"}
{"current_steps": 320, "total_steps": 822, "loss": 0.6864, "learning_rate": 5e-06, "epoch": 1.167883211678832, "percentage": 38.93, "elapsed_time": "0:33:00", "remaining_time": "0:51:46"}
{"current_steps": 330, "total_steps": 822, "loss": 0.6858, "learning_rate": 5e-06, "epoch": 1.2043795620437956, "percentage": 40.15, "elapsed_time": "0:34:00", "remaining_time": "0:50:41"}
{"current_steps": 340, "total_steps": 822, "loss": 0.6888, "learning_rate": 5e-06, "epoch": 1.2408759124087592, "percentage": 41.36, "elapsed_time": "0:35:00", "remaining_time": "0:49:37"}
{"current_steps": 350, "total_steps": 822, "loss": 0.6895, "learning_rate": 5e-06, "epoch": 1.2773722627737225, "percentage": 42.58, "elapsed_time": "0:36:00", "remaining_time": "0:48:33"}
{"current_steps": 360, "total_steps": 822, "loss": 0.6894, "learning_rate": 5e-06, "epoch": 1.313868613138686, "percentage": 43.8, "elapsed_time": "0:37:00", "remaining_time": "0:47:29"}
{"current_steps": 370, "total_steps": 822, "loss": 0.6842, "learning_rate": 5e-06, "epoch": 1.3503649635036497, "percentage": 45.01, "elapsed_time": "0:38:00", "remaining_time": "0:46:25"}
{"current_steps": 380, "total_steps": 822, "loss": 0.6865, "learning_rate": 5e-06, "epoch": 1.3868613138686132, "percentage": 46.23, "elapsed_time": "0:39:00", "remaining_time": "0:45:22"}
{"current_steps": 390, "total_steps": 822, "loss": 0.6866, "learning_rate": 5e-06, "epoch": 1.4233576642335766, "percentage": 47.45, "elapsed_time": "0:40:00", "remaining_time": "0:44:19"}
{"current_steps": 400, "total_steps": 822, "loss": 0.6865, "learning_rate": 5e-06, "epoch": 1.4598540145985401, "percentage": 48.66, "elapsed_time": "0:41:00", "remaining_time": "0:43:15"}
{"current_steps": 410, "total_steps": 822, "loss": 0.6846, "learning_rate": 5e-06, "epoch": 1.4963503649635037, "percentage": 49.88, "elapsed_time": "0:42:00", "remaining_time": "0:42:12"}
{"current_steps": 420, "total_steps": 822, "loss": 0.6921, "learning_rate": 5e-06, "epoch": 1.5328467153284673, "percentage": 51.09, "elapsed_time": "0:43:00", "remaining_time": "0:41:10"}
{"current_steps": 430, "total_steps": 822, "loss": 0.6875, "learning_rate": 5e-06, "epoch": 1.5693430656934306, "percentage": 52.31, "elapsed_time": "0:44:00", "remaining_time": "0:40:07"}
{"current_steps": 440, "total_steps": 822, "loss": 0.6863, "learning_rate": 5e-06, "epoch": 1.6058394160583942, "percentage": 53.53, "elapsed_time": "0:45:01", "remaining_time": "0:39:05"}
{"current_steps": 450, "total_steps": 822, "loss": 0.6846, "learning_rate": 5e-06, "epoch": 1.6423357664233578, "percentage": 54.74, "elapsed_time": "0:46:01", "remaining_time": "0:38:02"}
{"current_steps": 460, "total_steps": 822, "loss": 0.6852, "learning_rate": 5e-06, "epoch": 1.6788321167883211, "percentage": 55.96, "elapsed_time": "0:47:01", "remaining_time": "0:37:00"}
{"current_steps": 470, "total_steps": 822, "loss": 0.6903, "learning_rate": 5e-06, "epoch": 1.7153284671532847, "percentage": 57.18, "elapsed_time": "0:48:01", "remaining_time": "0:35:58"}
{"current_steps": 480, "total_steps": 822, "loss": 0.6857, "learning_rate": 5e-06, "epoch": 1.7518248175182483, "percentage": 58.39, "elapsed_time": "0:49:02", "remaining_time": "0:34:56"}
{"current_steps": 490, "total_steps": 822, "loss": 0.6875, "learning_rate": 5e-06, "epoch": 1.7883211678832116, "percentage": 59.61, "elapsed_time": "0:50:02", "remaining_time": "0:33:54"}
{"current_steps": 500, "total_steps": 822, "loss": 0.6868, "learning_rate": 5e-06, "epoch": 1.8248175182481752, "percentage": 60.83, "elapsed_time": "0:51:02", "remaining_time": "0:32:52"}
{"current_steps": 510, "total_steps": 822, "loss": 0.6823, "learning_rate": 5e-06, "epoch": 1.8613138686131387, "percentage": 62.04, "elapsed_time": "0:52:02", "remaining_time": "0:31:50"}
{"current_steps": 520, "total_steps": 822, "loss": 0.681, "learning_rate": 5e-06, "epoch": 1.897810218978102, "percentage": 63.26, "elapsed_time": "0:53:03", "remaining_time": "0:30:48"}
{"current_steps": 530, "total_steps": 822, "loss": 0.6801, "learning_rate": 5e-06, "epoch": 1.9343065693430657, "percentage": 64.48, "elapsed_time": "0:54:03", "remaining_time": "0:29:46"}
{"current_steps": 540, "total_steps": 822, "loss": 0.6856, "learning_rate": 5e-06, "epoch": 1.9708029197080292, "percentage": 65.69, "elapsed_time": "0:55:03", "remaining_time": "0:28:45"}
{"current_steps": 548, "total_steps": 822, "eval_loss": 0.7306408882141113, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:56:20", "remaining_time": "0:28:10"}
{"current_steps": 550, "total_steps": 822, "loss": 0.6708, "learning_rate": 5e-06, "epoch": 2.0072992700729926, "percentage": 66.91, "elapsed_time": "0:58:10", "remaining_time": "0:28:46"}
{"current_steps": 560, "total_steps": 822, "loss": 0.6358, "learning_rate": 5e-06, "epoch": 2.0437956204379564, "percentage": 68.13, "elapsed_time": "0:59:08", "remaining_time": "0:27:40"}
{"current_steps": 570, "total_steps": 822, "loss": 0.6306, "learning_rate": 5e-06, "epoch": 2.0802919708029197, "percentage": 69.34, "elapsed_time": "1:00:06", "remaining_time": "0:26:34"}
{"current_steps": 580, "total_steps": 822, "loss": 0.6289, "learning_rate": 5e-06, "epoch": 2.116788321167883, "percentage": 70.56, "elapsed_time": "1:01:04", "remaining_time": "0:25:29"}
{"current_steps": 590, "total_steps": 822, "loss": 0.6303, "learning_rate": 5e-06, "epoch": 2.153284671532847, "percentage": 71.78, "elapsed_time": "1:02:02", "remaining_time": "0:24:23"}
{"current_steps": 600, "total_steps": 822, "loss": 0.6291, "learning_rate": 5e-06, "epoch": 2.18978102189781, "percentage": 72.99, "elapsed_time": "1:03:01", "remaining_time": "0:23:19"}
{"current_steps": 610, "total_steps": 822, "loss": 0.6266, "learning_rate": 5e-06, "epoch": 2.2262773722627736, "percentage": 74.21, "elapsed_time": "1:03:59", "remaining_time": "0:22:14"}
{"current_steps": 620, "total_steps": 822, "loss": 0.6316, "learning_rate": 5e-06, "epoch": 2.2627737226277373, "percentage": 75.43, "elapsed_time": "1:04:57", "remaining_time": "0:21:09"}
{"current_steps": 630, "total_steps": 822, "loss": 0.6324, "learning_rate": 5e-06, "epoch": 2.2992700729927007, "percentage": 76.64, "elapsed_time": "1:05:55", "remaining_time": "0:20:05"}
{"current_steps": 640, "total_steps": 822, "loss": 0.6276, "learning_rate": 5e-06, "epoch": 2.335766423357664, "percentage": 77.86, "elapsed_time": "1:06:54", "remaining_time": "0:19:01"}
{"current_steps": 650, "total_steps": 822, "loss": 0.6376, "learning_rate": 5e-06, "epoch": 2.372262773722628, "percentage": 79.08, "elapsed_time": "1:07:52", "remaining_time": "0:17:57"}
{"current_steps": 660, "total_steps": 822, "loss": 0.6287, "learning_rate": 5e-06, "epoch": 2.408759124087591, "percentage": 80.29, "elapsed_time": "1:08:50", "remaining_time": "0:16:53"}
{"current_steps": 670, "total_steps": 822, "loss": 0.6355, "learning_rate": 5e-06, "epoch": 2.445255474452555, "percentage": 81.51, "elapsed_time": "1:09:48", "remaining_time": "0:15:50"}
{"current_steps": 680, "total_steps": 822, "loss": 0.6361, "learning_rate": 5e-06, "epoch": 2.4817518248175183, "percentage": 82.73, "elapsed_time": "1:10:46", "remaining_time": "0:14:46"}
{"current_steps": 690, "total_steps": 822, "loss": 0.6377, "learning_rate": 5e-06, "epoch": 2.5182481751824817, "percentage": 83.94, "elapsed_time": "1:11:45", "remaining_time": "0:13:43"}
{"current_steps": 700, "total_steps": 822, "loss": 0.6348, "learning_rate": 5e-06, "epoch": 2.554744525547445, "percentage": 85.16, "elapsed_time": "1:12:43", "remaining_time": "0:12:40"}
{"current_steps": 710, "total_steps": 822, "loss": 0.6409, "learning_rate": 5e-06, "epoch": 2.591240875912409, "percentage": 86.37, "elapsed_time": "1:13:41", "remaining_time": "0:11:37"}
{"current_steps": 720, "total_steps": 822, "loss": 0.6295, "learning_rate": 5e-06, "epoch": 2.627737226277372, "percentage": 87.59, "elapsed_time": "1:14:39", "remaining_time": "0:10:34"}
{"current_steps": 730, "total_steps": 822, "loss": 0.6353, "learning_rate": 5e-06, "epoch": 2.664233576642336, "percentage": 88.81, "elapsed_time": "1:15:37", "remaining_time": "0:09:31"}
{"current_steps": 740, "total_steps": 822, "loss": 0.6363, "learning_rate": 5e-06, "epoch": 2.7007299270072993, "percentage": 90.02, "elapsed_time": "1:16:36", "remaining_time": "0:08:29"}
{"current_steps": 750, "total_steps": 822, "loss": 0.6349, "learning_rate": 5e-06, "epoch": 2.7372262773722627, "percentage": 91.24, "elapsed_time": "1:17:34", "remaining_time": "0:07:26"}
{"current_steps": 760, "total_steps": 822, "loss": 0.6407, "learning_rate": 5e-06, "epoch": 2.7737226277372264, "percentage": 92.46, "elapsed_time": "1:18:32", "remaining_time": "0:06:24"}
{"current_steps": 770, "total_steps": 822, "loss": 0.631, "learning_rate": 5e-06, "epoch": 2.81021897810219, "percentage": 93.67, "elapsed_time": "1:19:30", "remaining_time": "0:05:22"}
{"current_steps": 780, "total_steps": 822, "loss": 0.6298, "learning_rate": 5e-06, "epoch": 2.846715328467153, "percentage": 94.89, "elapsed_time": "1:20:28", "remaining_time": "0:04:20"}
{"current_steps": 790, "total_steps": 822, "loss": 0.6325, "learning_rate": 5e-06, "epoch": 2.883211678832117, "percentage": 96.11, "elapsed_time": "1:21:27", "remaining_time": "0:03:17"}
{"current_steps": 800, "total_steps": 822, "loss": 0.6425, "learning_rate": 5e-06, "epoch": 2.9197080291970803, "percentage": 97.32, "elapsed_time": "1:22:25", "remaining_time": "0:02:15"}
{"current_steps": 810, "total_steps": 822, "loss": 0.6323, "learning_rate": 5e-06, "epoch": 2.9562043795620436, "percentage": 98.54, "elapsed_time": "1:23:23", "remaining_time": "0:01:14"}
{"current_steps": 820, "total_steps": 822, "loss": 0.6383, "learning_rate": 5e-06, "epoch": 2.9927007299270074, "percentage": 99.76, "elapsed_time": "1:24:21", "remaining_time": "0:00:12"}
{"current_steps": 822, "total_steps": 822, "eval_loss": 0.7370563745498657, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:26:35", "remaining_time": "0:00:00"}
{"current_steps": 822, "total_steps": 822, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:28:03", "remaining_time": "0:00:00"}