Mistral-EN-Part-2 / trainer_log.jsonl
SamChen888's picture
Upload folder using huggingface_hub
2308a88 verified
{"current_steps": 10, "total_steps": 696, "loss": 0.9267, "lr": 2.9984721919587606e-05, "epoch": 0.043010752688172046, "percentage": 1.44, "elapsed_time": "0:00:11", "remaining_time": "0:13:01", "throughput": 2301.21, "total_tokens": 26208}
{"current_steps": 20, "total_steps": 696, "loss": 0.1561, "lr": 2.9938918800982563e-05, "epoch": 0.08602150537634409, "percentage": 2.87, "elapsed_time": "0:00:20", "remaining_time": "0:11:29", "throughput": 2605.3, "total_tokens": 53152}
{"current_steps": 30, "total_steps": 696, "loss": 0.1764, "lr": 2.9862683948682103e-05, "epoch": 0.12903225806451613, "percentage": 4.31, "elapsed_time": "0:00:29", "remaining_time": "0:10:51", "throughput": 2718.87, "total_tokens": 79776}
{"current_steps": 40, "total_steps": 696, "loss": 0.1505, "lr": 2.975617265898004e-05, "epoch": 0.17204301075268819, "percentage": 5.75, "elapsed_time": "0:00:38", "remaining_time": "0:10:29", "throughput": 2776.68, "total_tokens": 106496}
{"current_steps": 50, "total_steps": 696, "loss": 0.1615, "lr": 2.961960190361624e-05, "epoch": 0.21505376344086022, "percentage": 7.18, "elapsed_time": "0:00:47", "remaining_time": "0:10:14", "throughput": 2813.4, "total_tokens": 133728}
{"current_steps": 60, "total_steps": 696, "loss": 0.1369, "lr": 2.9453249887788343e-05, "epoch": 0.25806451612903225, "percentage": 8.62, "elapsed_time": "0:00:56", "remaining_time": "0:09:58", "throughput": 2832.51, "total_tokens": 159936}
{"current_steps": 70, "total_steps": 696, "loss": 0.1253, "lr": 2.925745548342631e-05, "epoch": 0.3010752688172043, "percentage": 10.06, "elapsed_time": "0:01:05", "remaining_time": "0:09:44", "throughput": 2849.27, "total_tokens": 186240}
{"current_steps": 80, "total_steps": 696, "loss": 0.1479, "lr": 2.9032617538884018e-05, "epoch": 0.34408602150537637, "percentage": 11.49, "elapsed_time": "0:01:14", "remaining_time": "0:09:32", "throughput": 2862.47, "total_tokens": 212832}
{"current_steps": 90, "total_steps": 696, "loss": 0.1102, "lr": 2.877919406645433e-05, "epoch": 0.3870967741935484, "percentage": 12.93, "elapsed_time": "0:01:23", "remaining_time": "0:09:21", "throughput": 2874.09, "total_tokens": 239648}
{"current_steps": 100, "total_steps": 696, "loss": 0.099, "lr": 2.84977013093626e-05, "epoch": 0.43010752688172044, "percentage": 14.37, "elapsed_time": "0:01:32", "remaining_time": "0:09:10", "throughput": 2880.65, "total_tokens": 265888}
{"current_steps": 110, "total_steps": 696, "loss": 0.107, "lr": 2.818871269013928e-05, "epoch": 0.4731182795698925, "percentage": 15.8, "elapsed_time": "0:01:41", "remaining_time": "0:08:59", "throughput": 2886.12, "total_tokens": 292480}
{"current_steps": 120, "total_steps": 696, "loss": 0.1183, "lr": 2.7852857642513838e-05, "epoch": 0.5161290322580645, "percentage": 17.24, "elapsed_time": "0:01:50", "remaining_time": "0:08:49", "throughput": 2891.42, "total_tokens": 318784}
{"current_steps": 130, "total_steps": 696, "loss": 0.1097, "lr": 2.7490820329209546e-05, "epoch": 0.5591397849462365, "percentage": 18.68, "elapsed_time": "0:01:59", "remaining_time": "0:08:40", "throughput": 2896.48, "total_tokens": 346016}
{"current_steps": 140, "total_steps": 696, "loss": 0.0946, "lr": 2.7103338248251055e-05, "epoch": 0.6021505376344086, "percentage": 20.11, "elapsed_time": "0:02:08", "remaining_time": "0:08:30", "throughput": 2899.2, "total_tokens": 372384}
{"current_steps": 150, "total_steps": 696, "loss": 0.1251, "lr": 2.6691200730623874e-05, "epoch": 0.6451612903225806, "percentage": 21.55, "elapsed_time": "0:02:17", "remaining_time": "0:08:20", "throughput": 2902.17, "total_tokens": 399328}
{"current_steps": 160, "total_steps": 696, "loss": 0.1069, "lr": 2.6255247332346036e-05, "epoch": 0.6881720430107527, "percentage": 22.99, "elapsed_time": "0:02:26", "remaining_time": "0:08:11", "throughput": 2904.29, "total_tokens": 426048}
{"current_steps": 170, "total_steps": 696, "loss": 0.0904, "lr": 2.5796366124227532e-05, "epoch": 0.7311827956989247, "percentage": 24.43, "elapsed_time": "0:02:35", "remaining_time": "0:08:01", "throughput": 2906.29, "total_tokens": 452640}
{"current_steps": 180, "total_steps": 696, "loss": 0.1273, "lr": 2.531549188280135e-05, "epoch": 0.7741935483870968, "percentage": 25.86, "elapsed_time": "0:02:44", "remaining_time": "0:07:52", "throughput": 2908.27, "total_tokens": 479808}
{"current_steps": 190, "total_steps": 696, "loss": 0.1206, "lr": 2.481360418611132e-05, "epoch": 0.8172043010752689, "percentage": 27.3, "elapsed_time": "0:02:54", "remaining_time": "0:07:43", "throughput": 2908.28, "total_tokens": 506176}
{"current_steps": 200, "total_steps": 696, "loss": 0.103, "lr": 2.4291725418235848e-05, "epoch": 0.8602150537634409, "percentage": 28.74, "elapsed_time": "0:03:03", "remaining_time": "0:07:34", "throughput": 2908.42, "total_tokens": 533216}
{"current_steps": 210, "total_steps": 696, "loss": 0.0696, "lr": 2.3750918686612414e-05, "epoch": 0.9032258064516129, "percentage": 30.17, "elapsed_time": "0:03:12", "remaining_time": "0:07:26", "throughput": 2909.86, "total_tokens": 561056}
{"current_steps": 220, "total_steps": 696, "loss": 0.0822, "lr": 2.3192285656405456e-05, "epoch": 0.946236559139785, "percentage": 31.61, "elapsed_time": "0:03:22", "remaining_time": "0:07:17", "throughput": 2909.89, "total_tokens": 588160}
{"current_steps": 230, "total_steps": 696, "loss": 0.0913, "lr": 2.2616964306329183e-05, "epoch": 0.989247311827957, "percentage": 33.05, "elapsed_time": "0:03:31", "remaining_time": "0:07:08", "throughput": 2909.56, "total_tokens": 615168}
{"current_steps": 240, "total_steps": 696, "loss": 0.0735, "lr": 2.2026126610496852e-05, "epoch": 1.0301075268817204, "percentage": 34.48, "elapsed_time": "0:03:40", "remaining_time": "0:06:58", "throughput": 2906.22, "total_tokens": 639864}
{"current_steps": 250, "total_steps": 696, "loss": 0.0752, "lr": 2.1420976151018813e-05, "epoch": 1.0731182795698926, "percentage": 35.92, "elapsed_time": "0:03:49", "remaining_time": "0:06:49", "throughput": 2906.03, "total_tokens": 667224}
{"current_steps": 260, "total_steps": 696, "loss": 0.0588, "lr": 2.0802745666212592e-05, "epoch": 1.1161290322580646, "percentage": 37.36, "elapsed_time": "0:03:58", "remaining_time": "0:06:40", "throughput": 2905.16, "total_tokens": 693848}
{"current_steps": 270, "total_steps": 696, "loss": 0.0924, "lr": 2.0172694539419557e-05, "epoch": 1.1591397849462366, "percentage": 38.79, "elapsed_time": "0:04:08", "remaining_time": "0:06:31", "throughput": 2904.97, "total_tokens": 720568}
{"current_steps": 280, "total_steps": 696, "loss": 0.062, "lr": 1.953210623354359e-05, "epoch": 1.2021505376344086, "percentage": 40.23, "elapsed_time": "0:04:17", "remaining_time": "0:06:21", "throughput": 2905.07, "total_tokens": 746872}
{"current_steps": 290, "total_steps": 696, "loss": 0.0621, "lr": 1.888228567653781e-05, "epoch": 1.2451612903225806, "percentage": 41.67, "elapsed_time": "0:04:26", "remaining_time": "0:06:12", "throughput": 2905.29, "total_tokens": 773720}
{"current_steps": 300, "total_steps": 696, "loss": 0.075, "lr": 1.8224556603165363e-05, "epoch": 1.2881720430107526, "percentage": 43.1, "elapsed_time": "0:04:35", "remaining_time": "0:06:04", "throughput": 2905.84, "total_tokens": 801464}
{"current_steps": 310, "total_steps": 696, "loss": 0.0858, "lr": 1.7560258858449248e-05, "epoch": 1.3311827956989246, "percentage": 44.54, "elapsed_time": "0:04:45", "remaining_time": "0:05:55", "throughput": 2906.05, "total_tokens": 829144}
{"current_steps": 320, "total_steps": 696, "loss": 0.0697, "lr": 1.689074566830434e-05, "epoch": 1.3741935483870968, "percentage": 45.98, "elapsed_time": "0:04:54", "remaining_time": "0:05:46", "throughput": 2905.02, "total_tokens": 855672}
{"current_steps": 330, "total_steps": 696, "loss": 0.0827, "lr": 1.621738088291147e-05, "epoch": 1.4172043010752688, "percentage": 47.41, "elapsed_time": "0:05:03", "remaining_time": "0:05:37", "throughput": 2903.8, "total_tokens": 882424}
{"current_steps": 340, "total_steps": 696, "loss": 0.0651, "lr": 1.5541536198449044e-05, "epoch": 1.4602150537634409, "percentage": 48.85, "elapsed_time": "0:05:13", "remaining_time": "0:05:27", "throughput": 2902.99, "total_tokens": 908792}
{"current_steps": 350, "total_steps": 696, "loss": 0.0607, "lr": 1.4864588362841808e-05, "epoch": 1.5032258064516129, "percentage": 50.29, "elapsed_time": "0:05:22", "remaining_time": "0:05:18", "throughput": 2903.09, "total_tokens": 935672}
{"current_steps": 360, "total_steps": 696, "loss": 0.056, "lr": 1.4187916371218739e-05, "epoch": 1.546236559139785, "percentage": 51.72, "elapsed_time": "0:05:31", "remaining_time": "0:05:09", "throughput": 2902.53, "total_tokens": 961848}
{"current_steps": 370, "total_steps": 696, "loss": 0.0823, "lr": 1.3512898656793283e-05, "epoch": 1.589247311827957, "percentage": 53.16, "elapsed_time": "0:05:40", "remaining_time": "0:05:00", "throughput": 2902.81, "total_tokens": 988600}
{"current_steps": 380, "total_steps": 696, "loss": 0.058, "lr": 1.2840910282888211e-05, "epoch": 1.632258064516129, "percentage": 54.6, "elapsed_time": "0:05:49", "remaining_time": "0:04:50", "throughput": 2902.64, "total_tokens": 1014840}
{"current_steps": 390, "total_steps": 696, "loss": 0.0705, "lr": 1.2173320141825232e-05, "epoch": 1.675268817204301, "percentage": 56.03, "elapsed_time": "0:05:58", "remaining_time": "0:04:41", "throughput": 2902.41, "total_tokens": 1040856}
{"current_steps": 400, "total_steps": 696, "loss": 0.0514, "lr": 1.1511488166385349e-05, "epoch": 1.718279569892473, "percentage": 57.47, "elapsed_time": "0:06:07", "remaining_time": "0:04:32", "throughput": 2902.76, "total_tokens": 1067544}
{"current_steps": 410, "total_steps": 696, "loss": 0.0834, "lr": 1.0856762559520605e-05, "epoch": 1.761290322580645, "percentage": 58.91, "elapsed_time": "0:06:17", "remaining_time": "0:04:22", "throughput": 2903.25, "total_tokens": 1094584}
{"current_steps": 420, "total_steps": 696, "loss": 0.0583, "lr": 1.0210477047960303e-05, "epoch": 1.8043010752688171, "percentage": 60.34, "elapsed_time": "0:06:26", "remaining_time": "0:04:13", "throughput": 2903.01, "total_tokens": 1120760}
{"current_steps": 430, "total_steps": 696, "loss": 0.0922, "lr": 9.573948165306438e-06, "epoch": 1.8473118279569891, "percentage": 61.78, "elapsed_time": "0:06:35", "remaining_time": "0:04:04", "throughput": 2902.51, "total_tokens": 1146776}
{"current_steps": 440, "total_steps": 696, "loss": 0.0633, "lr": 8.948472570152874e-06, "epoch": 1.8903225806451613, "percentage": 63.22, "elapsed_time": "0:06:44", "remaining_time": "0:03:55", "throughput": 2903.29, "total_tokens": 1174424}
{"current_steps": 450, "total_steps": 696, "loss": 0.0597, "lr": 8.33532440469145e-06, "epoch": 1.9333333333333333, "percentage": 64.66, "elapsed_time": "0:06:53", "remaining_time": "0:03:46", "throughput": 2903.81, "total_tokens": 1201048}
{"current_steps": 460, "total_steps": 696, "loss": 0.0491, "lr": 7.735752699185711e-06, "epoch": 1.9763440860215054, "percentage": 66.09, "elapsed_time": "0:07:02", "remaining_time": "0:03:36", "throughput": 2903.71, "total_tokens": 1227576}
{"current_steps": 470, "total_steps": 696, "loss": 0.0341, "lr": 7.150978827599619e-06, "epoch": 2.0172043010752687, "percentage": 67.53, "elapsed_time": "0:07:11", "remaining_time": "0:03:27", "throughput": 2902.63, "total_tokens": 1252160}
{"current_steps": 480, "total_steps": 696, "loss": 0.0373, "lr": 6.582194019564266e-06, "epoch": 2.0602150537634407, "percentage": 68.97, "elapsed_time": "0:07:20", "remaining_time": "0:03:18", "throughput": 2903.07, "total_tokens": 1279328}
{"current_steps": 490, "total_steps": 696, "loss": 0.0281, "lr": 6.0305569337509225e-06, "epoch": 2.1032258064516127, "percentage": 70.4, "elapsed_time": "0:07:29", "remaining_time": "0:03:09", "throughput": 2903.01, "total_tokens": 1306304}
{"current_steps": 500, "total_steps": 696, "loss": 0.0183, "lr": 5.497191297593647e-06, "epoch": 2.146236559139785, "percentage": 71.84, "elapsed_time": "0:07:39", "remaining_time": "0:03:00", "throughput": 2902.59, "total_tokens": 1333184}
{"current_steps": 510, "total_steps": 696, "loss": 0.0334, "lr": 4.98318361816957e-06, "epoch": 2.189247311827957, "percentage": 73.28, "elapsed_time": "0:07:48", "remaining_time": "0:02:50", "throughput": 2901.7, "total_tokens": 1359872}
{"current_steps": 520, "total_steps": 696, "loss": 0.0282, "lr": 4.4895809688998655e-06, "epoch": 2.232258064516129, "percentage": 74.71, "elapsed_time": "0:07:58", "remaining_time": "0:02:41", "throughput": 2901.24, "total_tokens": 1387328}
{"current_steps": 530, "total_steps": 696, "loss": 0.0562, "lr": 4.017388856580178e-06, "epoch": 2.275268817204301, "percentage": 76.15, "elapsed_time": "0:08:07", "remaining_time": "0:02:32", "throughput": 2901.79, "total_tokens": 1414816}
{"current_steps": 540, "total_steps": 696, "loss": 0.0243, "lr": 3.567569173085455e-06, "epoch": 2.318279569892473, "percentage": 77.59, "elapsed_time": "0:08:16", "remaining_time": "0:02:23", "throughput": 2902.13, "total_tokens": 1441504}
{"current_steps": 550, "total_steps": 696, "loss": 0.044, "lr": 3.1410382359217645e-06, "epoch": 2.361290322580645, "percentage": 79.02, "elapsed_time": "0:08:25", "remaining_time": "0:02:14", "throughput": 2902.37, "total_tokens": 1467680}
{"current_steps": 560, "total_steps": 696, "loss": 0.0551, "lr": 2.7386649216166233e-06, "epoch": 2.404301075268817, "percentage": 80.46, "elapsed_time": "0:08:34", "remaining_time": "0:02:05", "throughput": 2902.6, "total_tokens": 1494176}
{"current_steps": 570, "total_steps": 696, "loss": 0.0258, "lr": 2.361268895750264e-06, "epoch": 2.447311827956989, "percentage": 81.9, "elapsed_time": "0:08:43", "remaining_time": "0:01:55", "throughput": 2902.83, "total_tokens": 1520544}
{"current_steps": 580, "total_steps": 696, "loss": 0.0415, "lr": 2.0096189432334194e-06, "epoch": 2.490322580645161, "percentage": 83.33, "elapsed_time": "0:08:52", "remaining_time": "0:01:46", "throughput": 2903.34, "total_tokens": 1547264}
{"current_steps": 590, "total_steps": 696, "loss": 0.0375, "lr": 1.6844314022329676e-06, "epoch": 2.533333333333333, "percentage": 84.77, "elapsed_time": "0:09:02", "remaining_time": "0:01:37", "throughput": 2903.89, "total_tokens": 1573920}
{"current_steps": 600, "total_steps": 696, "loss": 0.0235, "lr": 1.3863687049356465e-06, "epoch": 2.576344086021505, "percentage": 86.21, "elapsed_time": "0:09:11", "remaining_time": "0:01:28", "throughput": 2904.68, "total_tokens": 1600640}
{"current_steps": 610, "total_steps": 696, "loss": 0.0354, "lr": 1.116038028122413e-06, "epoch": 2.6193548387096772, "percentage": 87.64, "elapsed_time": "0:09:19", "remaining_time": "0:01:18", "throughput": 2905.09, "total_tokens": 1626848}
{"current_steps": 620, "total_steps": 696, "loss": 0.0357, "lr": 8.7399005630238e-07, "epoch": 2.6623655913978492, "percentage": 89.08, "elapsed_time": "0:09:29", "remaining_time": "0:01:09", "throughput": 2905.48, "total_tokens": 1653408}
{"current_steps": 630, "total_steps": 696, "loss": 0.0512, "lr": 6.607178599258268e-07, "epoch": 2.7053763440860212, "percentage": 90.52, "elapsed_time": "0:09:38", "remaining_time": "0:01:00", "throughput": 2905.88, "total_tokens": 1679968}
{"current_steps": 640, "total_steps": 696, "loss": 0.0243, "lr": 4.766558909615504e-07, "epoch": 2.7483870967741937, "percentage": 91.95, "elapsed_time": "0:09:47", "remaining_time": "0:00:51", "throughput": 2906.53, "total_tokens": 1706944}
{"current_steps": 650, "total_steps": 696, "loss": 0.0367, "lr": 3.22179097884579e-07, "epoch": 2.7913978494623657, "percentage": 93.39, "elapsed_time": "0:09:56", "remaining_time": "0:00:42", "throughput": 2907.18, "total_tokens": 1733888}
{"current_steps": 660, "total_steps": 696, "loss": 0.0377, "lr": 1.9760216187710788e-07, "epoch": 2.8344086021505377, "percentage": 94.83, "elapsed_time": "0:10:05", "remaining_time": "0:00:33", "throughput": 2907.66, "total_tokens": 1760448}
{"current_steps": 670, "total_steps": 696, "loss": 0.0471, "lr": 1.0317885579858522e-07, "epoch": 2.8774193548387097, "percentage": 96.26, "elapsed_time": "0:10:14", "remaining_time": "0:00:23", "throughput": 2907.89, "total_tokens": 1787072}
{"current_steps": 680, "total_steps": 696, "loss": 0.0222, "lr": 3.910152723075322e-08, "epoch": 2.9204301075268817, "percentage": 97.7, "elapsed_time": "0:10:23", "remaining_time": "0:00:14", "throughput": 2908.16, "total_tokens": 1813632}
{"current_steps": 690, "total_steps": 696, "loss": 0.0454, "lr": 5.50070665074065e-09, "epoch": 2.9634408602150537, "percentage": 99.14, "elapsed_time": "0:10:32", "remaining_time": "0:00:05", "throughput": 2908.39, "total_tokens": 1840384}
{"current_steps": 696, "total_steps": 696, "epoch": 2.989247311827957, "percentage": 100.0, "elapsed_time": "0:10:38", "remaining_time": "0:00:00", "throughput": 2905.07, "total_tokens": 1855776}