sdar4b-random-mask-esft-intent / trainer_log.jsonl
autoprogrammer's picture
SDAR-4B random_mask SFT on ESFT-intent (final)
191c270 verified
{"current_steps": 5, "total_steps": 342, "loss": 3.8867, "lr": 3.6363636363636366e-06, "epoch": 0.043859649122807015, "percentage": 1.46, "elapsed_time": "0:00:43", "remaining_time": "0:48:43"}
{"current_steps": 10, "total_steps": 342, "loss": 1.8678, "lr": 8.181818181818183e-06, "epoch": 0.08771929824561403, "percentage": 2.92, "elapsed_time": "0:01:07", "remaining_time": "0:37:15"}
{"current_steps": 15, "total_steps": 342, "loss": 0.5404, "lr": 9.997973265157192e-06, "epoch": 0.13157894736842105, "percentage": 4.39, "elapsed_time": "0:01:31", "remaining_time": "0:33:08"}
{"current_steps": 20, "total_steps": 342, "loss": 0.3354, "lr": 9.985593613021873e-06, "epoch": 0.17543859649122806, "percentage": 5.85, "elapsed_time": "0:01:55", "remaining_time": "0:30:52"}
{"current_steps": 25, "total_steps": 342, "loss": 0.2407, "lr": 9.961988113473708e-06, "epoch": 0.21929824561403508, "percentage": 7.31, "elapsed_time": "0:02:18", "remaining_time": "0:29:21"}
{"current_steps": 30, "total_steps": 342, "loss": 0.1945, "lr": 9.927209918004095e-06, "epoch": 0.2631578947368421, "percentage": 8.77, "elapsed_time": "0:02:42", "remaining_time": "0:28:13"}
{"current_steps": 35, "total_steps": 342, "loss": 0.1668, "lr": 9.881337335184879e-06, "epoch": 0.30701754385964913, "percentage": 10.23, "elapsed_time": "0:03:06", "remaining_time": "0:27:17"}
{"current_steps": 40, "total_steps": 342, "loss": 0.1493, "lr": 9.824473654344297e-06, "epoch": 0.3508771929824561, "percentage": 11.7, "elapsed_time": "0:03:30", "remaining_time": "0:26:29"}
{"current_steps": 45, "total_steps": 342, "loss": 0.1837, "lr": 9.756746912994832e-06, "epoch": 0.39473684210526316, "percentage": 13.16, "elapsed_time": "0:03:54", "remaining_time": "0:25:47"}
{"current_steps": 50, "total_steps": 342, "loss": 0.1241, "lr": 9.678309608536626e-06, "epoch": 0.43859649122807015, "percentage": 14.62, "elapsed_time": "0:04:18", "remaining_time": "0:25:08"}
{"current_steps": 55, "total_steps": 342, "loss": 0.1548, "lr": 9.58933835488563e-06, "epoch": 0.4824561403508772, "percentage": 16.08, "elapsed_time": "0:04:42", "remaining_time": "0:24:32"}
{"current_steps": 60, "total_steps": 342, "loss": 0.1292, "lr": 9.490033484799608e-06, "epoch": 0.5263157894736842, "percentage": 17.54, "elapsed_time": "0:05:06", "remaining_time": "0:23:58"}
{"current_steps": 65, "total_steps": 342, "loss": 0.138, "lr": 9.380618598797473e-06, "epoch": 0.5701754385964912, "percentage": 19.01, "elapsed_time": "0:05:29", "remaining_time": "0:23:25"}
{"current_steps": 70, "total_steps": 342, "loss": 0.1113, "lr": 9.26134006168757e-06, "epoch": 0.6140350877192983, "percentage": 20.47, "elapsed_time": "0:05:53", "remaining_time": "0:22:54"}
{"current_steps": 75, "total_steps": 342, "loss": 0.1128, "lr": 9.132466447838598e-06, "epoch": 0.6578947368421053, "percentage": 21.93, "elapsed_time": "0:06:17", "remaining_time": "0:22:24"}
{"current_steps": 80, "total_steps": 342, "loss": 0.1093, "lr": 8.994287936442226e-06, "epoch": 0.7017543859649122, "percentage": 23.39, "elapsed_time": "0:06:41", "remaining_time": "0:21:54"}
{"current_steps": 85, "total_steps": 342, "loss": 0.131, "lr": 8.84711565812904e-06, "epoch": 0.7456140350877193, "percentage": 24.85, "elapsed_time": "0:07:05", "remaining_time": "0:21:25"}
{"current_steps": 90, "total_steps": 342, "loss": 0.119, "lr": 8.691280994409044e-06, "epoch": 0.7894736842105263, "percentage": 26.32, "elapsed_time": "0:07:29", "remaining_time": "0:20:57"}
{"current_steps": 95, "total_steps": 342, "loss": 0.118, "lr": 8.527134831514116e-06, "epoch": 0.8333333333333334, "percentage": 27.78, "elapsed_time": "0:07:53", "remaining_time": "0:20:29"}
{"current_steps": 100, "total_steps": 342, "loss": 0.0927, "lr": 8.355046770322528e-06, "epoch": 0.8771929824561403, "percentage": 29.24, "elapsed_time": "0:08:16", "remaining_time": "0:20:02"}
{"current_steps": 105, "total_steps": 342, "loss": 0.1152, "lr": 8.175404294144482e-06, "epoch": 0.9210526315789473, "percentage": 30.7, "elapsed_time": "0:08:40", "remaining_time": "0:19:35"}
{"current_steps": 110, "total_steps": 342, "loss": 0.1091, "lr": 7.98861189624256e-06, "epoch": 0.9649122807017544, "percentage": 32.16, "elapsed_time": "0:09:04", "remaining_time": "0:19:08"}
{"current_steps": 115, "total_steps": 342, "loss": 0.1107, "lr": 7.79509016905158e-06, "epoch": 1.0087719298245614, "percentage": 33.63, "elapsed_time": "0:09:46", "remaining_time": "0:19:16"}
{"current_steps": 120, "total_steps": 342, "loss": 0.0573, "lr": 7.595274857148651e-06, "epoch": 1.0526315789473684, "percentage": 35.09, "elapsed_time": "0:10:09", "remaining_time": "0:18:48"}
{"current_steps": 125, "total_steps": 342, "loss": 0.0453, "lr": 7.389615876105773e-06, "epoch": 1.0964912280701755, "percentage": 36.55, "elapsed_time": "0:10:33", "remaining_time": "0:18:20"}
{"current_steps": 130, "total_steps": 342, "loss": 0.0634, "lr": 7.178576299434239e-06, "epoch": 1.1403508771929824, "percentage": 38.01, "elapsed_time": "0:10:57", "remaining_time": "0:17:52"}
{"current_steps": 135, "total_steps": 342, "loss": 0.0643, "lr": 6.962631315901861e-06, "epoch": 1.1842105263157894, "percentage": 39.47, "elapsed_time": "0:11:21", "remaining_time": "0:17:25"}
{"current_steps": 140, "total_steps": 342, "loss": 0.0573, "lr": 6.742267159570796e-06, "epoch": 1.2280701754385965, "percentage": 40.94, "elapsed_time": "0:11:45", "remaining_time": "0:16:57"}
{"current_steps": 145, "total_steps": 342, "loss": 0.0591, "lr": 6.51798001496514e-06, "epoch": 1.2719298245614035, "percentage": 42.4, "elapsed_time": "0:12:09", "remaining_time": "0:16:30"}
{"current_steps": 150, "total_steps": 342, "loss": 0.0493, "lr": 6.290274899833517e-06, "epoch": 1.3157894736842106, "percentage": 43.86, "elapsed_time": "0:12:33", "remaining_time": "0:16:04"}
{"current_steps": 155, "total_steps": 342, "loss": 0.0514, "lr": 6.059664528022267e-06, "epoch": 1.3596491228070176, "percentage": 45.32, "elapsed_time": "0:12:57", "remaining_time": "0:15:37"}
{"current_steps": 160, "total_steps": 342, "loss": 0.0552, "lr": 5.82666815501964e-06, "epoch": 1.4035087719298245, "percentage": 46.78, "elapsed_time": "0:13:20", "remaining_time": "0:15:11"}
{"current_steps": 165, "total_steps": 342, "loss": 0.0481, "lr": 5.5918104087704925e-06, "epoch": 1.4473684210526316, "percentage": 48.25, "elapsed_time": "0:13:44", "remaining_time": "0:14:44"}
{"current_steps": 170, "total_steps": 342, "loss": 0.0331, "lr": 5.355620108394018e-06, "epoch": 1.4912280701754386, "percentage": 49.71, "elapsed_time": "0:14:08", "remaining_time": "0:14:18"}
{"current_steps": 175, "total_steps": 342, "loss": 0.0596, "lr": 5.118629073464424e-06, "epoch": 1.5350877192982457, "percentage": 51.17, "elapsed_time": "0:14:32", "remaining_time": "0:13:52"}
{"current_steps": 180, "total_steps": 342, "loss": 0.0738, "lr": 4.8813709265355766e-06, "epoch": 1.5789473684210527, "percentage": 52.63, "elapsed_time": "0:14:56", "remaining_time": "0:13:26"}
{"current_steps": 185, "total_steps": 342, "loss": 0.0527, "lr": 4.644379891605983e-06, "epoch": 1.6228070175438596, "percentage": 54.09, "elapsed_time": "0:15:20", "remaining_time": "0:13:00"}
{"current_steps": 190, "total_steps": 342, "loss": 0.0774, "lr": 4.40818959122951e-06, "epoch": 1.6666666666666665, "percentage": 55.56, "elapsed_time": "0:15:44", "remaining_time": "0:12:35"}
{"current_steps": 195, "total_steps": 342, "loss": 0.0473, "lr": 4.173331844980362e-06, "epoch": 1.7105263157894737, "percentage": 57.02, "elapsed_time": "0:16:07", "remaining_time": "0:12:09"}
{"current_steps": 200, "total_steps": 342, "loss": 0.0408, "lr": 3.940335471977733e-06, "epoch": 1.7543859649122808, "percentage": 58.48, "elapsed_time": "0:16:31", "remaining_time": "0:11:44"}
{"current_steps": 205, "total_steps": 342, "loss": 0.055, "lr": 3.7097251001664824e-06, "epoch": 1.7982456140350878, "percentage": 59.94, "elapsed_time": "0:16:55", "remaining_time": "0:11:18"}
{"current_steps": 210, "total_steps": 342, "loss": 0.048, "lr": 3.482019985034861e-06, "epoch": 1.8421052631578947, "percentage": 61.4, "elapsed_time": "0:17:19", "remaining_time": "0:10:53"}
{"current_steps": 215, "total_steps": 342, "loss": 0.0462, "lr": 3.257732840429206e-06, "epoch": 1.8859649122807016, "percentage": 62.87, "elapsed_time": "0:17:43", "remaining_time": "0:10:28"}
{"current_steps": 220, "total_steps": 342, "loss": 0.044, "lr": 3.0373686840981396e-06, "epoch": 1.9298245614035088, "percentage": 64.33, "elapsed_time": "0:18:07", "remaining_time": "0:10:02"}
{"current_steps": 225, "total_steps": 342, "loss": 0.0362, "lr": 2.821423700565763e-06, "epoch": 1.973684210526316, "percentage": 65.79, "elapsed_time": "0:18:31", "remaining_time": "0:09:37"}
{"current_steps": 230, "total_steps": 342, "loss": 0.0255, "lr": 2.610384123894229e-06, "epoch": 2.017543859649123, "percentage": 67.25, "elapsed_time": "0:19:11", "remaining_time": "0:09:20"}
{"current_steps": 235, "total_steps": 342, "loss": 0.0206, "lr": 2.4047251428513485e-06, "epoch": 2.06140350877193, "percentage": 68.71, "elapsed_time": "0:19:35", "remaining_time": "0:08:55"}
{"current_steps": 240, "total_steps": 342, "loss": 0.0148, "lr": 2.2049098309484195e-06, "epoch": 2.1052631578947367, "percentage": 70.18, "elapsed_time": "0:19:59", "remaining_time": "0:08:29"}
{"current_steps": 245, "total_steps": 342, "loss": 0.01, "lr": 2.0113881037574423e-06, "epoch": 2.1491228070175437, "percentage": 71.64, "elapsed_time": "0:20:23", "remaining_time": "0:08:04"}
{"current_steps": 250, "total_steps": 342, "loss": 0.0158, "lr": 1.8245957058555203e-06, "epoch": 2.192982456140351, "percentage": 73.1, "elapsed_time": "0:20:47", "remaining_time": "0:07:39"}
{"current_steps": 255, "total_steps": 342, "loss": 0.0166, "lr": 1.6449532296774739e-06, "epoch": 2.236842105263158, "percentage": 74.56, "elapsed_time": "0:21:11", "remaining_time": "0:07:13"}
{"current_steps": 260, "total_steps": 342, "loss": 0.0098, "lr": 1.4728651684858835e-06, "epoch": 2.280701754385965, "percentage": 76.02, "elapsed_time": "0:21:35", "remaining_time": "0:06:48"}
{"current_steps": 265, "total_steps": 342, "loss": 0.008, "lr": 1.3087190055909572e-06, "epoch": 2.324561403508772, "percentage": 77.49, "elapsed_time": "0:21:59", "remaining_time": "0:06:23"}
{"current_steps": 270, "total_steps": 342, "loss": 0.0117, "lr": 1.1528843418709623e-06, "epoch": 2.3684210526315788, "percentage": 78.95, "elapsed_time": "0:22:22", "remaining_time": "0:05:58"}
{"current_steps": 275, "total_steps": 342, "loss": 0.0093, "lr": 1.005712063557776e-06, "epoch": 2.412280701754386, "percentage": 80.41, "elapsed_time": "0:22:46", "remaining_time": "0:05:32"}
{"current_steps": 280, "total_steps": 342, "loss": 0.0103, "lr": 8.675335521614036e-07, "epoch": 2.456140350877193, "percentage": 81.87, "elapsed_time": "0:23:10", "remaining_time": "0:05:07"}
{"current_steps": 285, "total_steps": 342, "loss": 0.0064, "lr": 7.386599383124321e-07, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "0:23:34", "remaining_time": "0:04:42"}
{"current_steps": 290, "total_steps": 342, "loss": 0.0146, "lr": 6.193814012025278e-07, "epoch": 2.543859649122807, "percentage": 84.8, "elapsed_time": "0:23:58", "remaining_time": "0:04:17"}
{"current_steps": 295, "total_steps": 342, "loss": 0.0156, "lr": 5.099665152003929e-07, "epoch": 2.587719298245614, "percentage": 86.26, "elapsed_time": "0:24:22", "remaining_time": "0:03:52"}
{"current_steps": 300, "total_steps": 342, "loss": 0.0125, "lr": 4.106616451143719e-07, "epoch": 2.6315789473684212, "percentage": 87.72, "elapsed_time": "0:24:46", "remaining_time": "0:03:28"}
{"current_steps": 305, "total_steps": 342, "loss": 0.0133, "lr": 3.2169039146337457e-07, "epoch": 2.675438596491228, "percentage": 89.18, "elapsed_time": "0:25:10", "remaining_time": "0:03:03"}
{"current_steps": 310, "total_steps": 342, "loss": 0.0091, "lr": 2.4325308700516805e-07, "epoch": 2.719298245614035, "percentage": 90.64, "elapsed_time": "0:25:34", "remaining_time": "0:02:38"}
{"current_steps": 315, "total_steps": 342, "loss": 0.0116, "lr": 1.7552634565570325e-07, "epoch": 2.763157894736842, "percentage": 92.11, "elapsed_time": "0:25:58", "remaining_time": "0:02:13"}
{"current_steps": 320, "total_steps": 342, "loss": 0.0113, "lr": 1.1866266481512234e-07, "epoch": 2.807017543859649, "percentage": 93.57, "elapsed_time": "0:26:22", "remaining_time": "0:01:48"}
{"current_steps": 325, "total_steps": 342, "loss": 0.0167, "lr": 7.279008199590543e-08, "epoch": 2.8508771929824563, "percentage": 95.03, "elapsed_time": "0:26:46", "remaining_time": "0:01:24"}
{"current_steps": 330, "total_steps": 342, "loss": 0.0135, "lr": 3.8011886526292394e-08, "epoch": 2.8947368421052633, "percentage": 96.49, "elapsed_time": "0:27:10", "remaining_time": "0:00:59"}
{"current_steps": 335, "total_steps": 342, "loss": 0.0084, "lr": 1.4406386978128017e-08, "epoch": 2.93859649122807, "percentage": 97.95, "elapsed_time": "0:27:34", "remaining_time": "0:00:34"}
{"current_steps": 340, "total_steps": 342, "loss": 0.0093, "lr": 2.0267348428087974e-09, "epoch": 2.982456140350877, "percentage": 99.42, "elapsed_time": "0:27:58", "remaining_time": "0:00:09"}
{"current_steps": 342, "total_steps": 342, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:28:24", "remaining_time": "0:00:00"}