sdar4b-rm-K1-esft-summary / trainer_log.jsonl
autoprogrammer's picture
SDAR-4B random_mask K=1 ESFT-summary (final)
0df7cce verified
{"current_steps": 5, "total_steps": 462, "loss": 3.0598, "lr": 9.998150522680437e-06, "epoch": 0.032467532467532464, "percentage": 1.08, "elapsed_time": "0:00:46", "remaining_time": "1:10:23"}
{"current_steps": 10, "total_steps": 462, "loss": 2.376, "lr": 9.990639365966835e-06, "epoch": 0.06493506493506493, "percentage": 2.16, "elapsed_time": "0:01:16", "remaining_time": "0:57:52"}
{"current_steps": 15, "total_steps": 462, "loss": 2.0184, "lr": 9.977359612865424e-06, "epoch": 0.09740259740259741, "percentage": 3.25, "elapsed_time": "0:01:47", "remaining_time": "0:53:11"}
{"current_steps": 20, "total_steps": 462, "loss": 1.9316, "lr": 9.95832661322021e-06, "epoch": 0.12987012987012986, "percentage": 4.33, "elapsed_time": "0:02:16", "remaining_time": "0:50:27"}
{"current_steps": 25, "total_steps": 462, "loss": 1.9113, "lr": 9.933562366956445e-06, "epoch": 0.16233766233766234, "percentage": 5.41, "elapsed_time": "0:02:47", "remaining_time": "0:48:49"}
{"current_steps": 30, "total_steps": 462, "loss": 1.7844, "lr": 9.903095498651276e-06, "epoch": 0.19480519480519481, "percentage": 6.49, "elapsed_time": "0:03:17", "remaining_time": "0:47:26"}
{"current_steps": 35, "total_steps": 462, "loss": 1.7329, "lr": 9.866961224447076e-06, "epoch": 0.22727272727272727, "percentage": 7.58, "elapsed_time": "0:03:47", "remaining_time": "0:46:20"}
{"current_steps": 40, "total_steps": 462, "loss": 1.7016, "lr": 9.8252013113457e-06, "epoch": 0.2597402597402597, "percentage": 8.66, "elapsed_time": "0:04:18", "remaining_time": "0:45:22"}
{"current_steps": 45, "total_steps": 462, "loss": 1.6416, "lr": 9.777864028930705e-06, "epoch": 0.2922077922077922, "percentage": 9.74, "elapsed_time": "0:04:48", "remaining_time": "0:44:30"}
{"current_steps": 50, "total_steps": 462, "loss": 1.659, "lr": 9.725004093573343e-06, "epoch": 0.3246753246753247, "percentage": 10.82, "elapsed_time": "0:05:18", "remaining_time": "0:43:44"}
{"current_steps": 55, "total_steps": 462, "loss": 1.663, "lr": 9.666682605186834e-06, "epoch": 0.35714285714285715, "percentage": 11.9, "elapsed_time": "0:05:48", "remaining_time": "0:42:59"}
{"current_steps": 60, "total_steps": 462, "loss": 1.6689, "lr": 9.602966976601995e-06, "epoch": 0.38961038961038963, "percentage": 12.99, "elapsed_time": "0:06:19", "remaining_time": "0:42:22"}
{"current_steps": 65, "total_steps": 462, "loss": 1.5457, "lr": 9.533930855645872e-06, "epoch": 0.42207792207792205, "percentage": 14.07, "elapsed_time": "0:06:49", "remaining_time": "0:41:42"}
{"current_steps": 70, "total_steps": 462, "loss": 1.5875, "lr": 9.45965404001347e-06, "epoch": 0.45454545454545453, "percentage": 15.15, "elapsed_time": "0:07:19", "remaining_time": "0:41:03"}
{"current_steps": 75, "total_steps": 462, "loss": 1.5141, "lr": 9.380222385030916e-06, "epoch": 0.487012987012987, "percentage": 16.23, "elapsed_time": "0:07:50", "remaining_time": "0:40:25"}
{"current_steps": 80, "total_steps": 462, "loss": 1.4924, "lr": 9.295727704416731e-06, "epoch": 0.5194805194805194, "percentage": 17.32, "elapsed_time": "0:08:20", "remaining_time": "0:39:48"}
{"current_steps": 85, "total_steps": 462, "loss": 1.6115, "lr": 9.206267664155906e-06, "epoch": 0.551948051948052, "percentage": 18.4, "elapsed_time": "0:08:50", "remaining_time": "0:39:13"}
{"current_steps": 90, "total_steps": 462, "loss": 1.5129, "lr": 9.111945669609408e-06, "epoch": 0.5844155844155844, "percentage": 19.48, "elapsed_time": "0:09:20", "remaining_time": "0:38:37"}
{"current_steps": 95, "total_steps": 462, "loss": 1.5082, "lr": 9.012870745989663e-06, "epoch": 0.6168831168831169, "percentage": 20.56, "elapsed_time": "0:09:51", "remaining_time": "0:38:04"}
{"current_steps": 100, "total_steps": 462, "loss": 1.4448, "lr": 8.90915741234015e-06, "epoch": 0.6493506493506493, "percentage": 21.65, "elapsed_time": "0:10:21", "remaining_time": "0:37:29"}
{"current_steps": 105, "total_steps": 462, "loss": 1.477, "lr": 8.800925549164742e-06, "epoch": 0.6818181818181818, "percentage": 22.73, "elapsed_time": "0:10:51", "remaining_time": "0:36:55"}
{"current_steps": 110, "total_steps": 462, "loss": 1.5399, "lr": 8.688300259859855e-06, "epoch": 0.7142857142857143, "percentage": 23.81, "elapsed_time": "0:11:21", "remaining_time": "0:36:21"}
{"current_steps": 115, "total_steps": 462, "loss": 1.4825, "lr": 8.571411726109518e-06, "epoch": 0.7467532467532467, "percentage": 24.89, "elapsed_time": "0:11:51", "remaining_time": "0:35:48"}
{"current_steps": 120, "total_steps": 462, "loss": 1.4567, "lr": 8.450395057410561e-06, "epoch": 0.7792207792207793, "percentage": 25.97, "elapsed_time": "0:12:22", "remaining_time": "0:35:15"}
{"current_steps": 125, "total_steps": 462, "loss": 1.5021, "lr": 8.325390134901794e-06, "epoch": 0.8116883116883117, "percentage": 27.06, "elapsed_time": "0:12:53", "remaining_time": "0:34:44"}
{"current_steps": 130, "total_steps": 462, "loss": 1.4908, "lr": 8.196541449677758e-06, "epoch": 0.8441558441558441, "percentage": 28.14, "elapsed_time": "0:13:23", "remaining_time": "0:34:12"}
{"current_steps": 135, "total_steps": 462, "loss": 1.4953, "lr": 8.063997935773885e-06, "epoch": 0.8766233766233766, "percentage": 29.22, "elapsed_time": "0:13:53", "remaining_time": "0:33:39"}
{"current_steps": 140, "total_steps": 462, "loss": 1.4379, "lr": 7.927912798016144e-06, "epoch": 0.9090909090909091, "percentage": 30.3, "elapsed_time": "0:14:23", "remaining_time": "0:33:07"}
{"current_steps": 145, "total_steps": 462, "loss": 1.409, "lr": 7.788443334934148e-06, "epoch": 0.9415584415584416, "percentage": 31.39, "elapsed_time": "0:14:54", "remaining_time": "0:32:34"}
{"current_steps": 150, "total_steps": 462, "loss": 1.508, "lr": 7.645750756942425e-06, "epoch": 0.974025974025974, "percentage": 32.47, "elapsed_time": "0:15:24", "remaining_time": "0:32:02"}
{"current_steps": 155, "total_steps": 462, "loss": 1.35, "lr": 7.500000000000001e-06, "epoch": 1.0064935064935066, "percentage": 33.55, "elapsed_time": "0:16:12", "remaining_time": "0:32:06"}
{"current_steps": 160, "total_steps": 462, "loss": 1.1552, "lr": 7.351359534963684e-06, "epoch": 1.0389610389610389, "percentage": 34.63, "elapsed_time": "0:16:42", "remaining_time": "0:31:33"}
{"current_steps": 165, "total_steps": 462, "loss": 1.1749, "lr": 7.200001172855436e-06, "epoch": 1.0714285714285714, "percentage": 35.71, "elapsed_time": "0:17:13", "remaining_time": "0:30:59"}
{"current_steps": 170, "total_steps": 462, "loss": 1.1209, "lr": 7.046099866268878e-06, "epoch": 1.103896103896104, "percentage": 36.8, "elapsed_time": "0:17:43", "remaining_time": "0:30:26"}
{"current_steps": 175, "total_steps": 462, "loss": 1.1318, "lr": 6.889833507144534e-06, "epoch": 1.1363636363636362, "percentage": 37.88, "elapsed_time": "0:18:13", "remaining_time": "0:29:53"}
{"current_steps": 180, "total_steps": 462, "loss": 1.1476, "lr": 6.731382721147509e-06, "epoch": 1.1688311688311688, "percentage": 38.96, "elapsed_time": "0:18:44", "remaining_time": "0:29:21"}
{"current_steps": 185, "total_steps": 462, "loss": 1.115, "lr": 6.570930658885314e-06, "epoch": 1.2012987012987013, "percentage": 40.04, "elapsed_time": "0:19:14", "remaining_time": "0:28:48"}
{"current_steps": 190, "total_steps": 462, "loss": 1.1295, "lr": 6.408662784207149e-06, "epoch": 1.2337662337662338, "percentage": 41.13, "elapsed_time": "0:19:44", "remaining_time": "0:28:16"}
{"current_steps": 195, "total_steps": 462, "loss": 1.1346, "lr": 6.244766659829351e-06, "epoch": 1.2662337662337662, "percentage": 42.21, "elapsed_time": "0:20:15", "remaining_time": "0:27:43"}
{"current_steps": 200, "total_steps": 462, "loss": 1.1116, "lr": 6.079431730534786e-06, "epoch": 1.2987012987012987, "percentage": 43.29, "elapsed_time": "0:20:45", "remaining_time": "0:27:11"}
{"current_steps": 205, "total_steps": 462, "loss": 1.109, "lr": 5.91284910419681e-06, "epoch": 1.3311688311688312, "percentage": 44.37, "elapsed_time": "0:21:15", "remaining_time": "0:26:38"}
{"current_steps": 210, "total_steps": 462, "loss": 1.1271, "lr": 5.745211330880872e-06, "epoch": 1.3636363636363638, "percentage": 45.45, "elapsed_time": "0:21:45", "remaining_time": "0:26:06"}
{"current_steps": 215, "total_steps": 462, "loss": 1.1049, "lr": 5.576712180279134e-06, "epoch": 1.396103896103896, "percentage": 46.54, "elapsed_time": "0:22:15", "remaining_time": "0:25:34"}
{"current_steps": 220, "total_steps": 462, "loss": 1.1382, "lr": 5.4075464177353165e-06, "epoch": 1.4285714285714286, "percentage": 47.62, "elapsed_time": "0:22:46", "remaining_time": "0:25:02"}
{"current_steps": 225, "total_steps": 462, "loss": 1.1146, "lr": 5.237909579118713e-06, "epoch": 1.4610389610389611, "percentage": 48.7, "elapsed_time": "0:23:16", "remaining_time": "0:24:31"}
{"current_steps": 230, "total_steps": 462, "loss": 1.0891, "lr": 5.06799774480755e-06, "epoch": 1.4935064935064934, "percentage": 49.78, "elapsed_time": "0:23:46", "remaining_time": "0:23:59"}
{"current_steps": 235, "total_steps": 462, "loss": 1.1009, "lr": 4.898007313042975e-06, "epoch": 1.525974025974026, "percentage": 50.87, "elapsed_time": "0:24:17", "remaining_time": "0:23:27"}
{"current_steps": 240, "total_steps": 462, "loss": 1.0945, "lr": 4.728134772915605e-06, "epoch": 1.5584415584415585, "percentage": 51.95, "elapsed_time": "0:24:47", "remaining_time": "0:22:55"}
{"current_steps": 245, "total_steps": 462, "loss": 1.1315, "lr": 4.558576477247097e-06, "epoch": 1.5909090909090908, "percentage": 53.03, "elapsed_time": "0:25:17", "remaining_time": "0:22:24"}
{"current_steps": 250, "total_steps": 462, "loss": 1.1564, "lr": 4.389528415629201e-06, "epoch": 1.6233766233766234, "percentage": 54.11, "elapsed_time": "0:25:48", "remaining_time": "0:21:52"}
{"current_steps": 255, "total_steps": 462, "loss": 1.1058, "lr": 4.221185987882684e-06, "epoch": 1.655844155844156, "percentage": 55.19, "elapsed_time": "0:26:18", "remaining_time": "0:21:21"}
{"current_steps": 260, "total_steps": 462, "loss": 1.1409, "lr": 4.053743778197951e-06, "epoch": 1.6883116883116882, "percentage": 56.28, "elapsed_time": "0:26:48", "remaining_time": "0:20:49"}
{"current_steps": 265, "total_steps": 462, "loss": 1.0984, "lr": 3.887395330218429e-06, "epoch": 1.7207792207792207, "percentage": 57.36, "elapsed_time": "0:27:18", "remaining_time": "0:20:18"}
{"current_steps": 270, "total_steps": 462, "loss": 1.0963, "lr": 3.7223329233267354e-06, "epoch": 1.7532467532467533, "percentage": 58.44, "elapsed_time": "0:27:49", "remaining_time": "0:19:46"}
{"current_steps": 275, "total_steps": 462, "loss": 1.0856, "lr": 3.558747350392146e-06, "epoch": 1.7857142857142856, "percentage": 59.52, "elapsed_time": "0:28:19", "remaining_time": "0:19:15"}
{"current_steps": 280, "total_steps": 462, "loss": 1.1198, "lr": 3.3968276972363224e-06, "epoch": 1.8181818181818183, "percentage": 60.61, "elapsed_time": "0:28:49", "remaining_time": "0:18:44"}
{"current_steps": 285, "total_steps": 462, "loss": 1.0981, "lr": 3.2367611240721796e-06, "epoch": 1.8506493506493507, "percentage": 61.69, "elapsed_time": "0:29:19", "remaining_time": "0:18:13"}
{"current_steps": 290, "total_steps": 462, "loss": 1.1316, "lr": 3.0787326491685287e-06, "epoch": 1.883116883116883, "percentage": 62.77, "elapsed_time": "0:29:50", "remaining_time": "0:17:41"}
{"current_steps": 295, "total_steps": 462, "loss": 1.1043, "lr": 2.9229249349905686e-06, "epoch": 1.9155844155844157, "percentage": 63.85, "elapsed_time": "0:30:20", "remaining_time": "0:17:10"}
{"current_steps": 300, "total_steps": 462, "loss": 1.1235, "lr": 2.7695180770633993e-06, "epoch": 1.948051948051948, "percentage": 64.94, "elapsed_time": "0:30:50", "remaining_time": "0:16:39"}
{"current_steps": 305, "total_steps": 462, "loss": 1.0871, "lr": 2.6186893958026245e-06, "epoch": 1.9805194805194806, "percentage": 66.02, "elapsed_time": "0:31:20", "remaining_time": "0:16:08"}
{"current_steps": 310, "total_steps": 462, "loss": 0.9514, "lr": 2.470613231552661e-06, "epoch": 2.012987012987013, "percentage": 67.1, "elapsed_time": "0:32:10", "remaining_time": "0:15:46"}
{"current_steps": 315, "total_steps": 462, "loss": 0.8202, "lr": 2.3254607430696393e-06, "epoch": 2.0454545454545454, "percentage": 68.18, "elapsed_time": "0:32:40", "remaining_time": "0:15:14"}
{"current_steps": 320, "total_steps": 462, "loss": 0.806, "lr": 2.1833997096818897e-06, "epoch": 2.0779220779220777, "percentage": 69.26, "elapsed_time": "0:33:10", "remaining_time": "0:14:43"}
{"current_steps": 325, "total_steps": 462, "loss": 0.8074, "lr": 2.0445943373566178e-06, "epoch": 2.1103896103896105, "percentage": 70.35, "elapsed_time": "0:33:41", "remaining_time": "0:14:11"}
{"current_steps": 330, "total_steps": 462, "loss": 0.8066, "lr": 1.9092050688969736e-06, "epoch": 2.142857142857143, "percentage": 71.43, "elapsed_time": "0:34:11", "remaining_time": "0:13:40"}
{"current_steps": 335, "total_steps": 462, "loss": 0.8523, "lr": 1.7773883984889178e-06, "epoch": 2.175324675324675, "percentage": 72.51, "elapsed_time": "0:34:41", "remaining_time": "0:13:09"}
{"current_steps": 340, "total_steps": 462, "loss": 0.77, "lr": 1.6492966908122033e-06, "epoch": 2.207792207792208, "percentage": 73.59, "elapsed_time": "0:35:12", "remaining_time": "0:12:37"}
{"current_steps": 345, "total_steps": 462, "loss": 0.8482, "lr": 1.5250780049246028e-06, "epoch": 2.24025974025974, "percentage": 74.68, "elapsed_time": "0:35:42", "remaining_time": "0:12:06"}
{"current_steps": 350, "total_steps": 462, "loss": 0.8015, "lr": 1.404875923122928e-06, "epoch": 2.2727272727272725, "percentage": 75.76, "elapsed_time": "0:36:13", "remaining_time": "0:11:35"}
{"current_steps": 355, "total_steps": 462, "loss": 0.8329, "lr": 1.2888293849786503e-06, "epoch": 2.3051948051948052, "percentage": 76.84, "elapsed_time": "0:36:43", "remaining_time": "0:11:04"}
{"current_steps": 360, "total_steps": 462, "loss": 0.8106, "lr": 1.1770725267399892e-06, "epoch": 2.3376623376623376, "percentage": 77.92, "elapsed_time": "0:37:13", "remaining_time": "0:10:32"}
{"current_steps": 365, "total_steps": 462, "loss": 0.7896, "lr": 1.0697345262860638e-06, "epoch": 2.3701298701298703, "percentage": 79.0, "elapsed_time": "0:37:44", "remaining_time": "0:10:01"}
{"current_steps": 370, "total_steps": 462, "loss": 0.7826, "lr": 9.6693945381235e-07, "epoch": 2.4025974025974026, "percentage": 80.09, "elapsed_time": "0:38:14", "remaining_time": "0:09:30"}
{"current_steps": 375, "total_steps": 462, "loss": 0.7492, "lr": 8.688061284200266e-07, "epoch": 2.435064935064935, "percentage": 81.17, "elapsed_time": "0:38:44", "remaining_time": "0:08:59"}
{"current_steps": 380, "total_steps": 462, "loss": 0.7946, "lr": 7.754479807749571e-07, "epoch": 2.4675324675324677, "percentage": 82.25, "elapsed_time": "0:39:14", "remaining_time": "0:08:28"}
{"current_steps": 385, "total_steps": 462, "loss": 0.7898, "lr": 6.86972921995096e-07, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "0:39:45", "remaining_time": "0:07:57"}
{"current_steps": 390, "total_steps": 462, "loss": 0.8299, "lr": 6.034832189178302e-07, "epoch": 2.5324675324675323, "percentage": 84.42, "elapsed_time": "0:40:15", "remaining_time": "0:07:25"}
{"current_steps": 395, "total_steps": 462, "loss": 0.7928, "lr": 5.250753758914506e-07, "epoch": 2.564935064935065, "percentage": 85.5, "elapsed_time": "0:40:45", "remaining_time": "0:06:54"}
{"current_steps": 400, "total_steps": 462, "loss": 0.7993, "lr": 4.5184002322740784e-07, "epoch": 2.5974025974025974, "percentage": 86.58, "elapsed_time": "0:41:15", "remaining_time": "0:06:23"}
{"current_steps": 405, "total_steps": 462, "loss": 0.7989, "lr": 3.8386181244224274e-07, "epoch": 2.62987012987013, "percentage": 87.66, "elapsed_time": "0:41:46", "remaining_time": "0:05:52"}
{"current_steps": 410, "total_steps": 462, "loss": 0.8242, "lr": 3.212193184103196e-07, "epoch": 2.6623376623376624, "percentage": 88.74, "elapsed_time": "0:42:16", "remaining_time": "0:05:21"}
{"current_steps": 415, "total_steps": 462, "loss": 0.7922, "lr": 2.6398494854045055e-07, "epoch": 2.6948051948051948, "percentage": 89.83, "elapsed_time": "0:42:46", "remaining_time": "0:04:50"}
{"current_steps": 420, "total_steps": 462, "loss": 0.8015, "lr": 2.1222485908137747e-07, "epoch": 2.7272727272727275, "percentage": 90.91, "elapsed_time": "0:43:16", "remaining_time": "0:04:19"}
{"current_steps": 425, "total_steps": 462, "loss": 0.7726, "lr": 1.659988786528821e-07, "epoch": 2.75974025974026, "percentage": 91.99, "elapsed_time": "0:43:47", "remaining_time": "0:03:48"}
{"current_steps": 430, "total_steps": 462, "loss": 0.7763, "lr": 1.253604390908819e-07, "epoch": 2.792207792207792, "percentage": 93.07, "elapsed_time": "0:44:17", "remaining_time": "0:03:17"}
{"current_steps": 435, "total_steps": 462, "loss": 0.8013, "lr": 9.035651368646647e-08, "epoch": 2.824675324675325, "percentage": 94.16, "elapsed_time": "0:44:47", "remaining_time": "0:02:46"}
{"current_steps": 440, "total_steps": 462, "loss": 0.7353, "lr": 6.102756289025957e-08, "epoch": 2.857142857142857, "percentage": 95.24, "elapsed_time": "0:45:17", "remaining_time": "0:02:15"}
{"current_steps": 445, "total_steps": 462, "loss": 0.8166, "lr": 3.7407487544861565e-08, "epoch": 2.8896103896103895, "percentage": 96.32, "elapsed_time": "0:45:47", "remaining_time": "0:01:44"}
{"current_steps": 450, "total_steps": 462, "loss": 0.8066, "lr": 1.9523589699433355e-08, "epoch": 2.9220779220779223, "percentage": 97.4, "elapsed_time": "0:46:18", "remaining_time": "0:01:14"}
{"current_steps": 455, "total_steps": 462, "loss": 0.7619, "lr": 7.3965410517179426e-09, "epoch": 2.9545454545454546, "percentage": 98.48, "elapsed_time": "0:46:48", "remaining_time": "0:00:43"}
{"current_steps": 460, "total_steps": 462, "loss": 0.8023, "lr": 1.040359053967599e-09, "epoch": 2.987012987012987, "percentage": 99.57, "elapsed_time": "0:47:18", "remaining_time": "0:00:12"}
{"current_steps": 462, "total_steps": 462, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:47:50", "remaining_time": "0:00:00"}