QwenLeanSFT_0326 / trainer_log.jsonl
WhiteGiverPlus's picture
Upload folder using huggingface_hub
49de29c verified
{"current_steps": 10, "total_steps": 960, "loss": 0.3901, "learning_rate": 6.25e-06, "epoch": 0.02080083203328133, "percentage": 1.04, "elapsed_time": "0:01:02", "remaining_time": "1:39:43"}
{"current_steps": 20, "total_steps": 960, "loss": 0.2205, "learning_rate": 1.25e-05, "epoch": 0.04160166406656266, "percentage": 2.08, "elapsed_time": "0:02:04", "remaining_time": "1:37:35"}
{"current_steps": 30, "total_steps": 960, "loss": 0.1896, "learning_rate": 1.8750000000000002e-05, "epoch": 0.062402496099843996, "percentage": 3.12, "elapsed_time": "0:03:10", "remaining_time": "1:38:28"}
{"current_steps": 40, "total_steps": 960, "loss": 0.1908, "learning_rate": 2.5e-05, "epoch": 0.08320332813312532, "percentage": 4.17, "elapsed_time": "0:04:15", "remaining_time": "1:38:00"}
{"current_steps": 50, "total_steps": 960, "loss": 0.1623, "learning_rate": 2.9996796251818968e-05, "epoch": 0.10400416016640665, "percentage": 5.21, "elapsed_time": "0:05:14", "remaining_time": "1:35:22"}
{"current_steps": 60, "total_steps": 960, "loss": 0.164, "learning_rate": 2.9884808696055675e-05, "epoch": 0.12480499219968799, "percentage": 6.25, "elapsed_time": "0:06:10", "remaining_time": "1:32:30"}
{"current_steps": 70, "total_steps": 960, "loss": 0.1737, "learning_rate": 2.9613999639484314e-05, "epoch": 0.1456058242329693, "percentage": 7.29, "elapsed_time": "0:07:09", "remaining_time": "1:30:55"}
{"current_steps": 80, "total_steps": 960, "loss": 0.1637, "learning_rate": 2.9187258625509518e-05, "epoch": 0.16640665626625065, "percentage": 8.33, "elapsed_time": "0:08:11", "remaining_time": "1:30:11"}
{"current_steps": 90, "total_steps": 960, "loss": 0.1647, "learning_rate": 2.86091389977234e-05, "epoch": 0.187207488299532, "percentage": 9.38, "elapsed_time": "0:09:09", "remaining_time": "1:28:34"}
{"current_steps": 100, "total_steps": 960, "loss": 0.1653, "learning_rate": 2.788580931554828e-05, "epoch": 0.2080083203328133, "percentage": 10.42, "elapsed_time": "0:10:09", "remaining_time": "1:27:20"}
{"current_steps": 110, "total_steps": 960, "loss": 0.165, "learning_rate": 2.7024987535462327e-05, "epoch": 0.22880915236609464, "percentage": 11.46, "elapsed_time": "0:11:16", "remaining_time": "1:27:05"}
{"current_steps": 120, "total_steps": 960, "loss": 0.1638, "learning_rate": 2.6035858660096975e-05, "epoch": 0.24960998439937598, "percentage": 12.5, "elapsed_time": "0:12:23", "remaining_time": "1:26:46"}
{"current_steps": 130, "total_steps": 960, "loss": 0.1355, "learning_rate": 2.49289767338935e-05, "epoch": 0.2704108164326573, "percentage": 13.54, "elapsed_time": "0:13:18", "remaining_time": "1:25:00"}
{"current_steps": 140, "total_steps": 960, "loss": 0.1621, "learning_rate": 2.3716152231029077e-05, "epoch": 0.2912116484659386, "percentage": 14.58, "elapsed_time": "0:14:16", "remaining_time": "1:23:34"}
{"current_steps": 150, "total_steps": 960, "loss": 0.1518, "learning_rate": 2.2410326037187558e-05, "epoch": 0.31201248049922, "percentage": 15.62, "elapsed_time": "0:15:18", "remaining_time": "1:22:37"}
{"current_steps": 160, "total_steps": 960, "loss": 0.1346, "learning_rate": 2.1025431369794546e-05, "epoch": 0.3328133125325013, "percentage": 16.67, "elapsed_time": "0:16:11", "remaining_time": "1:20:59"}
{"current_steps": 170, "total_steps": 960, "loss": 0.1534, "learning_rate": 1.9576245110033233e-05, "epoch": 0.3536141445657826, "percentage": 17.71, "elapsed_time": "0:17:19", "remaining_time": "1:20:31"}
{"current_steps": 180, "total_steps": 960, "loss": 0.1443, "learning_rate": 1.8078230132934514e-05, "epoch": 0.374414976599064, "percentage": 18.75, "elapsed_time": "0:18:19", "remaining_time": "1:19:23"}
{"current_steps": 190, "total_steps": 960, "loss": 0.148, "learning_rate": 1.6547370317885354e-05, "epoch": 0.3952158086323453, "percentage": 19.79, "elapsed_time": "0:19:25", "remaining_time": "1:18:44"}
{"current_steps": 200, "total_steps": 960, "loss": 0.1412, "learning_rate": 1.5e-05, "epoch": 0.4160166406656266, "percentage": 20.83, "elapsed_time": "0:20:28", "remaining_time": "1:17:46"}
{"current_steps": 210, "total_steps": 960, "loss": 0.1331, "learning_rate": 1.3452629682114646e-05, "epoch": 0.43681747269890797, "percentage": 21.88, "elapsed_time": "0:21:24", "remaining_time": "1:16:26"}
{"current_steps": 220, "total_steps": 960, "loss": 0.1317, "learning_rate": 1.1921769867065483e-05, "epoch": 0.4576183047321893, "percentage": 22.92, "elapsed_time": "0:22:25", "remaining_time": "1:15:26"}
{"current_steps": 230, "total_steps": 960, "loss": 0.1332, "learning_rate": 1.042375488996677e-05, "epoch": 0.4784191367654706, "percentage": 23.96, "elapsed_time": "0:23:23", "remaining_time": "1:14:15"}
{"current_steps": 240, "total_steps": 960, "loss": 0.1402, "learning_rate": 8.974568630205462e-06, "epoch": 0.49921996879875197, "percentage": 25.0, "elapsed_time": "0:24:23", "remaining_time": "1:13:09"}
{"current_steps": 250, "total_steps": 960, "loss": 0.1433, "learning_rate": 7.589673962812442e-06, "epoch": 0.5200208008320333, "percentage": 26.04, "elapsed_time": "0:25:28", "remaining_time": "1:12:20"}
{"current_steps": 260, "total_steps": 960, "loss": 0.1416, "learning_rate": 6.283847768970927e-06, "epoch": 0.5408216328653146, "percentage": 27.08, "elapsed_time": "0:26:37", "remaining_time": "1:11:41"}
{"current_steps": 270, "total_steps": 960, "loss": 0.1262, "learning_rate": 5.071023266106502e-06, "epoch": 0.5616224648985959, "percentage": 28.12, "elapsed_time": "0:27:43", "remaining_time": "1:10:51"}
{"current_steps": 280, "total_steps": 960, "loss": 0.1153, "learning_rate": 3.964141339903026e-06, "epoch": 0.5824232969318772, "percentage": 29.17, "elapsed_time": "0:28:36", "remaining_time": "1:09:29"}
{"current_steps": 290, "total_steps": 960, "loss": 0.1293, "learning_rate": 2.975012464537676e-06, "epoch": 0.6032241289651586, "percentage": 30.21, "elapsed_time": "0:29:40", "remaining_time": "1:08:33"}
{"current_steps": 300, "total_steps": 960, "loss": 0.1308, "learning_rate": 2.1141906844517207e-06, "epoch": 0.62402496099844, "percentage": 31.25, "elapsed_time": "0:30:46", "remaining_time": "1:07:41"}
{"current_steps": 310, "total_steps": 960, "loss": 0.122, "learning_rate": 1.390861002276602e-06, "epoch": 0.6448257930317213, "percentage": 32.29, "elapsed_time": "0:31:45", "remaining_time": "1:06:34"}
{"current_steps": 320, "total_steps": 960, "loss": 0.1165, "learning_rate": 8.127413744904805e-07, "epoch": 0.6656266250650026, "percentage": 33.33, "elapsed_time": "0:32:41", "remaining_time": "1:05:22"}
{"current_steps": 330, "total_steps": 960, "loss": 0.1344, "learning_rate": 3.860003605156881e-07, "epoch": 0.6864274570982839, "percentage": 34.38, "elapsed_time": "0:33:55", "remaining_time": "1:04:45"}
{"current_steps": 340, "total_steps": 960, "loss": 0.1144, "learning_rate": 1.1519130394432476e-07, "epoch": 0.7072282891315652, "percentage": 35.42, "elapsed_time": "0:34:51", "remaining_time": "1:03:34"}
{"current_steps": 350, "total_steps": 960, "loss": 0.1156, "learning_rate": 3.20374818103486e-09, "epoch": 0.7280291211648466, "percentage": 36.46, "elapsed_time": "0:35:50", "remaining_time": "1:02:27"}
{"current_steps": 360, "total_steps": 960, "loss": 0.1182, "learning_rate": 5.1232604899952296e-08, "epoch": 0.748829953198128, "percentage": 37.5, "elapsed_time": "0:36:48", "remaining_time": "1:01:20"}
{"current_steps": 370, "total_steps": 960, "loss": 0.1222, "learning_rate": 2.587654042896087e-07, "epoch": 0.7696307852314093, "percentage": 38.54, "elapsed_time": "0:37:50", "remaining_time": "1:00:20"}
{"current_steps": 380, "total_steps": 960, "loss": 0.1155, "learning_rate": 6.23587763126211e-07, "epoch": 0.7904316172646906, "percentage": 39.58, "elapsed_time": "0:38:56", "remaining_time": "0:59:25"}
{"current_steps": 390, "total_steps": 960, "loss": 0.1258, "learning_rate": 1.1418070123306974e-06, "epoch": 0.8112324492979719, "percentage": 40.62, "elapsed_time": "0:39:57", "remaining_time": "0:58:24"}
{"current_steps": 400, "total_steps": 960, "loss": 0.1278, "learning_rate": 1.8078937319026607e-06, "epoch": 0.8320332813312532, "percentage": 41.67, "elapsed_time": "0:40:58", "remaining_time": "0:57:22"}
{"current_steps": 410, "total_steps": 960, "loss": 0.1271, "learning_rate": 2.614740750051663e-06, "epoch": 0.8528341133645346, "percentage": 42.71, "elapsed_time": "0:42:03", "remaining_time": "0:56:25"}
{"current_steps": 420, "total_steps": 960, "loss": 0.1471, "learning_rate": 3.5537389770028983e-06, "epoch": 0.8736349453978159, "percentage": 43.75, "elapsed_time": "0:43:16", "remaining_time": "0:55:38"}
{"current_steps": 430, "total_steps": 960, "loss": 0.1139, "learning_rate": 4.614869264327555e-06, "epoch": 0.8944357774310973, "percentage": 44.79, "elapsed_time": "0:44:10", "remaining_time": "0:54:26"}
{"current_steps": 440, "total_steps": 960, "loss": 0.1101, "learning_rate": 5.786809309654979e-06, "epoch": 0.9152366094643786, "percentage": 45.83, "elapsed_time": "0:45:05", "remaining_time": "0:53:17"}
{"current_steps": 450, "total_steps": 960, "loss": 0.1315, "learning_rate": 7.0570544660893674e-06, "epoch": 0.9360374414976599, "percentage": 46.88, "elapsed_time": "0:46:09", "remaining_time": "0:52:19"}
{"current_steps": 460, "total_steps": 960, "loss": 0.1179, "learning_rate": 8.412051167289444e-06, "epoch": 0.9568382735309412, "percentage": 47.92, "elapsed_time": "0:47:12", "remaining_time": "0:51:19"}
{"current_steps": 470, "total_steps": 960, "loss": 0.1252, "learning_rate": 9.837341544560421e-06, "epoch": 0.9776391055642226, "percentage": 48.96, "elapsed_time": "0:48:13", "remaining_time": "0:50:16"}
{"current_steps": 480, "total_steps": 960, "loss": 0.1205, "learning_rate": 1.1317717692888014e-05, "epoch": 0.9984399375975039, "percentage": 50.0, "elapsed_time": "0:49:07", "remaining_time": "0:49:07"}
{"current_steps": 490, "total_steps": 960, "loss": 0.0949, "learning_rate": 1.28373839398898e-05, "epoch": 1.0192407696307852, "percentage": 51.04, "elapsed_time": "0:50:04", "remaining_time": "0:48:01"}
{"current_steps": 500, "total_steps": 960, "loss": 0.0831, "learning_rate": 1.4380125386267791e-05, "epoch": 1.0400416016640666, "percentage": 52.08, "elapsed_time": "0:50:54", "remaining_time": "0:46:50"}
{"current_steps": 510, "total_steps": 960, "loss": 0.1127, "learning_rate": 1.592948091942891e-05, "epoch": 1.0608424336973479, "percentage": 53.12, "elapsed_time": "0:52:01", "remaining_time": "0:45:54"}
{"current_steps": 520, "total_steps": 960, "loss": 0.1104, "learning_rate": 1.7468918854211013e-05, "epoch": 1.0816432657306292, "percentage": 54.17, "elapsed_time": "0:53:10", "remaining_time": "0:44:59"}
{"current_steps": 530, "total_steps": 960, "loss": 0.1132, "learning_rate": 1.898201332662109e-05, "epoch": 1.1024440977639105, "percentage": 55.21, "elapsed_time": "0:54:17", "remaining_time": "0:44:02"}
{"current_steps": 540, "total_steps": 960, "loss": 0.1006, "learning_rate": 2.045261955845744e-05, "epoch": 1.1232449297971918, "percentage": 56.25, "elapsed_time": "0:55:16", "remaining_time": "0:42:59"}
{"current_steps": 550, "total_steps": 960, "loss": 0.1012, "learning_rate": 2.186504612273522e-05, "epoch": 1.1440457618304731, "percentage": 57.29, "elapsed_time": "0:56:12", "remaining_time": "0:41:54"}
{"current_steps": 560, "total_steps": 960, "loss": 0.1132, "learning_rate": 2.3204222371836402e-05, "epoch": 1.1648465938637544, "percentage": 58.33, "elapsed_time": "0:57:14", "remaining_time": "0:40:53"}
{"current_steps": 570, "total_steps": 960, "loss": 0.0986, "learning_rate": 2.4455859241919324e-05, "epoch": 1.185647425897036, "percentage": 59.38, "elapsed_time": "0:58:13", "remaining_time": "0:39:50"}
{"current_steps": 580, "total_steps": 960, "loss": 0.1272, "learning_rate": 2.5606601717798212e-05, "epoch": 1.2064482579303173, "percentage": 60.42, "elapsed_time": "0:59:20", "remaining_time": "0:38:52"}
{"current_steps": 590, "total_steps": 960, "loss": 0.1255, "learning_rate": 2.6644171331486363e-05, "epoch": 1.2272490899635986, "percentage": 61.46, "elapsed_time": "1:00:21", "remaining_time": "0:37:51"}
{"current_steps": 600, "total_steps": 960, "loss": 0.1217, "learning_rate": 2.7557497173937928e-05, "epoch": 1.24804992199688, "percentage": 62.5, "elapsed_time": "1:01:27", "remaining_time": "0:36:52"}
{"current_steps": 610, "total_steps": 960, "loss": 0.1198, "learning_rate": 2.8336834022087772e-05, "epoch": 1.2688507540301612, "percentage": 63.54, "elapsed_time": "1:02:34", "remaining_time": "0:35:54"}
{"current_steps": 620, "total_steps": 960, "loss": 0.1207, "learning_rate": 2.8973866320769182e-05, "epoch": 1.2896515860634425, "percentage": 64.58, "elapsed_time": "1:03:38", "remaining_time": "0:34:53"}
{"current_steps": 630, "total_steps": 960, "loss": 0.1186, "learning_rate": 2.9461796910018204e-05, "epoch": 1.3104524180967239, "percentage": 65.62, "elapsed_time": "1:04:35", "remaining_time": "0:33:49"}
{"current_steps": 640, "total_steps": 960, "loss": 0.1254, "learning_rate": 2.979541955104084e-05, "epoch": 1.3312532501300052, "percentage": 66.67, "elapsed_time": "1:05:32", "remaining_time": "0:32:46"}
{"current_steps": 650, "total_steps": 960, "loss": 0.1285, "learning_rate": 2.997117447698802e-05, "epoch": 1.3520540821632865, "percentage": 67.71, "elapsed_time": "1:06:34", "remaining_time": "0:31:45"}
{"current_steps": 660, "total_steps": 960, "loss": 0.1148, "learning_rate": 2.9987186375809513e-05, "epoch": 1.3728549141965678, "percentage": 68.75, "elapsed_time": "1:07:37", "remaining_time": "0:30:44"}
{"current_steps": 670, "total_steps": 960, "loss": 0.1137, "learning_rate": 2.984328439990804e-05, "epoch": 1.3936557462298491, "percentage": 69.79, "elapsed_time": "1:08:40", "remaining_time": "0:29:43"}
{"current_steps": 680, "total_steps": 960, "loss": 0.1197, "learning_rate": 2.9541003989089956e-05, "epoch": 1.4144565782631306, "percentage": 70.83, "elapsed_time": "1:09:31", "remaining_time": "0:28:37"}
{"current_steps": 690, "total_steps": 960, "loss": 0.1332, "learning_rate": 2.9083570487361445e-05, "epoch": 1.435257410296412, "percentage": 71.88, "elapsed_time": "1:10:41", "remaining_time": "0:27:39"}
{"current_steps": 700, "total_steps": 960, "loss": 0.1246, "learning_rate": 2.8475864728379682e-05, "epoch": 1.4560582423296933, "percentage": 72.92, "elapsed_time": "1:11:45", "remaining_time": "0:26:39"}
{"current_steps": 710, "total_steps": 960, "loss": 0.1384, "learning_rate": 2.772437095676361e-05, "epoch": 1.4768590743629746, "percentage": 73.96, "elapsed_time": "1:12:55", "remaining_time": "0:25:40"}
{"current_steps": 720, "total_steps": 960, "loss": 0.1151, "learning_rate": 2.683710764094591e-05, "epoch": 1.497659906396256, "percentage": 75.0, "elapsed_time": "1:14:02", "remaining_time": "0:24:40"}
{"current_steps": 730, "total_steps": 960, "loss": 0.1196, "learning_rate": 2.5823541915795936e-05, "epoch": 1.5184607384295372, "percentage": 76.04, "elapsed_time": "1:14:59", "remaining_time": "0:23:37"}
{"current_steps": 740, "total_steps": 960, "loss": 0.107, "learning_rate": 2.4694488567914113e-05, "epoch": 1.5392615704628185, "percentage": 77.08, "elapsed_time": "1:15:59", "remaining_time": "0:22:35"}
{"current_steps": 750, "total_steps": 960, "loss": 0.1324, "learning_rate": 2.3461994641428768e-05, "epoch": 1.5600624024960998, "percentage": 78.12, "elapsed_time": "1:17:02", "remaining_time": "0:21:34"}
{"current_steps": 760, "total_steps": 960, "loss": 0.1197, "learning_rate": 2.213921089555611e-05, "epoch": 1.5808632345293812, "percentage": 79.17, "elapsed_time": "1:18:01", "remaining_time": "0:20:32"}
{"current_steps": 770, "total_steps": 960, "loss": 0.1209, "learning_rate": 2.074025148547636e-05, "epoch": 1.6016640665626625, "percentage": 80.21, "elapsed_time": "1:19:08", "remaining_time": "0:19:31"}
{"current_steps": 780, "total_steps": 960, "loss": 0.113, "learning_rate": 1.9280043363736577e-05, "epoch": 1.6224648985959438, "percentage": 81.25, "elapsed_time": "1:20:12", "remaining_time": "0:18:30"}
{"current_steps": 790, "total_steps": 960, "loss": 0.1149, "learning_rate": 1.7774167009073373e-05, "epoch": 1.643265730629225, "percentage": 82.29, "elapsed_time": "1:21:12", "remaining_time": "0:17:28"}
{"current_steps": 800, "total_steps": 960, "loss": 0.1083, "learning_rate": 1.6238690182084996e-05, "epoch": 1.6640665626625064, "percentage": 83.33, "elapsed_time": "1:22:11", "remaining_time": "0:16:26"}
{"current_steps": 810, "total_steps": 960, "loss": 0.1013, "learning_rate": 1.4689996481586692e-05, "epoch": 1.6848673946957877, "percentage": 84.38, "elapsed_time": "1:23:07", "remaining_time": "0:15:23"}
{"current_steps": 820, "total_steps": 960, "loss": 0.1004, "learning_rate": 1.3144610530959793e-05, "epoch": 1.705668226729069, "percentage": 85.42, "elapsed_time": "1:24:01", "remaining_time": "0:14:20"}
{"current_steps": 830, "total_steps": 960, "loss": 0.1045, "learning_rate": 1.1619021659762912e-05, "epoch": 1.7264690587623504, "percentage": 86.46, "elapsed_time": "1:25:05", "remaining_time": "0:13:19"}
{"current_steps": 840, "total_steps": 960, "loss": 0.0952, "learning_rate": 1.0129507961929739e-05, "epoch": 1.7472698907956317, "percentage": 87.5, "elapsed_time": "1:26:02", "remaining_time": "0:12:17"}
{"current_steps": 850, "total_steps": 960, "loss": 0.0919, "learning_rate": 8.69196260785939e-06, "epoch": 1.7680707228289132, "percentage": 88.54, "elapsed_time": "1:26:54", "remaining_time": "0:11:14"}
{"current_steps": 860, "total_steps": 960, "loss": 0.1008, "learning_rate": 7.321724263655999e-06, "epoch": 1.7888715548621945, "percentage": 89.58, "elapsed_time": "1:27:52", "remaining_time": "0:10:13"}
{"current_steps": 870, "total_steps": 960, "loss": 0.0986, "learning_rate": 6.033413426951388e-06, "epoch": 1.8096723868954758, "percentage": 90.62, "elapsed_time": "1:28:50", "remaining_time": "0:09:11"}
{"current_steps": 880, "total_steps": 960, "loss": 0.1144, "learning_rate": 4.840776425613894e-06, "epoch": 1.8304732189287571, "percentage": 91.67, "elapsed_time": "1:29:59", "remaining_time": "0:08:10"}
{"current_steps": 890, "total_steps": 960, "loss": 0.105, "learning_rate": 3.7565387438831093e-06, "epoch": 1.8512740509620385, "percentage": 92.71, "elapsed_time": "1:31:00", "remaining_time": "0:07:09"}
{"current_steps": 900, "total_steps": 960, "loss": 0.1027, "learning_rate": 2.792269240947083e-06, "epoch": 1.8720748829953198, "percentage": 93.75, "elapsed_time": "1:32:01", "remaining_time": "0:06:08"}
{"current_steps": 910, "total_steps": 960, "loss": 0.1049, "learning_rate": 1.958256710754496e-06, "epoch": 1.8928757150286013, "percentage": 94.79, "elapsed_time": "1:33:01", "remaining_time": "0:05:06"}
{"current_steps": 920, "total_steps": 960, "loss": 0.0987, "learning_rate": 1.2634001001741423e-06, "epoch": 1.9136765470618826, "percentage": 95.83, "elapsed_time": "1:34:01", "remaining_time": "0:04:05"}
{"current_steps": 930, "total_steps": 960, "loss": 0.1019, "learning_rate": 7.151135568777889e-07, "epoch": 1.934477379095164, "percentage": 96.88, "elapsed_time": "1:35:02", "remaining_time": "0:03:03"}
{"current_steps": 940, "total_steps": 960, "loss": 0.1044, "learning_rate": 3.192473200896845e-07, "epoch": 1.9552782111284452, "percentage": 97.92, "elapsed_time": "1:36:08", "remaining_time": "0:02:02"}
{"current_steps": 950, "total_steps": 960, "loss": 0.1004, "learning_rate": 8.002529830135996e-08, "epoch": 1.9760790431617266, "percentage": 98.96, "elapsed_time": "1:37:09", "remaining_time": "0:01:01"}
{"current_steps": 960, "total_steps": 960, "loss": 0.0966, "learning_rate": 0.0, "epoch": 1.9968798751950079, "percentage": 100.0, "elapsed_time": "1:38:06", "remaining_time": "0:00:00"}
{"current_steps": 960, "total_steps": 960, "epoch": 1.9968798751950079, "percentage": 100.0, "elapsed_time": "1:38:54", "remaining_time": "0:00:00"}