Mistral-EN-Part-1 / trainer_log.jsonl
SamChen888's picture
Upload folder using huggingface_hub
6fdcfa7 verified
{"current_steps": 10, "total_steps": 348, "loss": 0.8269, "lr": 2.9938918800982563e-05, "epoch": 0.08583690987124463, "percentage": 2.87, "elapsed_time": "0:00:21", "remaining_time": "0:12:06", "throughput": 1205.0, "total_tokens": 25888}
{"current_steps": 20, "total_steps": 348, "loss": 0.1558, "lr": 2.975617265898004e-05, "epoch": 0.17167381974248927, "percentage": 5.75, "elapsed_time": "0:00:30", "remaining_time": "0:08:21", "throughput": 1741.54, "total_tokens": 53280}
{"current_steps": 30, "total_steps": 348, "loss": 0.1655, "lr": 2.9453249887788343e-05, "epoch": 0.2575107296137339, "percentage": 8.62, "elapsed_time": "0:00:39", "remaining_time": "0:06:59", "throughput": 2021.0, "total_tokens": 79904}
{"current_steps": 40, "total_steps": 348, "loss": 0.1686, "lr": 2.9032617538884018e-05, "epoch": 0.34334763948497854, "percentage": 11.49, "elapsed_time": "0:00:48", "remaining_time": "0:06:13", "throughput": 2196.79, "total_tokens": 106464}
{"current_steps": 50, "total_steps": 348, "loss": 0.128, "lr": 2.84977013093626e-05, "epoch": 0.4291845493562232, "percentage": 14.37, "elapsed_time": "0:00:57", "remaining_time": "0:05:41", "throughput": 2312.55, "total_tokens": 132672}
{"current_steps": 60, "total_steps": 348, "loss": 0.1581, "lr": 2.7852857642513838e-05, "epoch": 0.5150214592274678, "percentage": 17.24, "elapsed_time": "0:01:06", "remaining_time": "0:05:20", "throughput": 2412.29, "total_tokens": 161312}
{"current_steps": 70, "total_steps": 348, "loss": 0.1106, "lr": 2.7103338248251055e-05, "epoch": 0.6008583690987125, "percentage": 20.11, "elapsed_time": "0:01:15", "remaining_time": "0:05:01", "throughput": 2474.02, "total_tokens": 187680}
{"current_steps": 80, "total_steps": 348, "loss": 0.1401, "lr": 2.6255247332346036e-05, "epoch": 0.6866952789699571, "percentage": 22.99, "elapsed_time": "0:01:25", "remaining_time": "0:04:44", "throughput": 2522.25, "total_tokens": 214400}
{"current_steps": 90, "total_steps": 348, "loss": 0.1282, "lr": 2.531549188280135e-05, "epoch": 0.7725321888412017, "percentage": 25.86, "elapsed_time": "0:01:33", "remaining_time": "0:04:29", "throughput": 2559.46, "total_tokens": 240480}
{"current_steps": 100, "total_steps": 348, "loss": 0.0936, "lr": 2.4291725418235848e-05, "epoch": 0.8583690987124464, "percentage": 28.74, "elapsed_time": "0:01:43", "remaining_time": "0:04:15", "throughput": 2591.37, "total_tokens": 267360}
{"current_steps": 110, "total_steps": 348, "loss": 0.1569, "lr": 2.3192285656405456e-05, "epoch": 0.944206008583691, "percentage": 31.61, "elapsed_time": "0:01:52", "remaining_time": "0:04:03", "throughput": 2617.95, "total_tokens": 294720}
{"current_steps": 120, "total_steps": 348, "loss": 0.0904, "lr": 2.2026126610496852e-05, "epoch": 1.0257510729613735, "percentage": 34.48, "elapsed_time": "0:02:01", "remaining_time": "0:03:51", "throughput": 2622.68, "total_tokens": 319184}
{"current_steps": 130, "total_steps": 348, "loss": 0.073, "lr": 2.0802745666212592e-05, "epoch": 1.111587982832618, "percentage": 37.36, "elapsed_time": "0:02:10", "remaining_time": "0:03:39", "throughput": 2640.79, "total_tokens": 345680}
{"current_steps": 140, "total_steps": 348, "loss": 0.058, "lr": 1.953210623354359e-05, "epoch": 1.1974248927038627, "percentage": 40.23, "elapsed_time": "0:02:19", "remaining_time": "0:03:27", "throughput": 2656.87, "total_tokens": 371824}
{"current_steps": 150, "total_steps": 348, "loss": 0.0818, "lr": 1.8224556603165363e-05, "epoch": 1.2832618025751072, "percentage": 43.1, "elapsed_time": "0:02:29", "remaining_time": "0:03:16", "throughput": 2673.66, "total_tokens": 398480}
{"current_steps": 160, "total_steps": 348, "loss": 0.0715, "lr": 1.689074566830434e-05, "epoch": 1.3690987124463518, "percentage": 45.98, "elapsed_time": "0:02:38", "remaining_time": "0:03:05", "throughput": 2687.4, "total_tokens": 424656}
{"current_steps": 170, "total_steps": 348, "loss": 0.0691, "lr": 1.5541536198449044e-05, "epoch": 1.4549356223175964, "percentage": 48.85, "elapsed_time": "0:02:47", "remaining_time": "0:02:54", "throughput": 2700.42, "total_tokens": 451088}
{"current_steps": 180, "total_steps": 348, "loss": 0.0713, "lr": 1.4187916371218739e-05, "epoch": 1.5407725321888412, "percentage": 51.72, "elapsed_time": "0:02:56", "remaining_time": "0:02:44", "throughput": 2713.99, "total_tokens": 478320}
{"current_steps": 190, "total_steps": 348, "loss": 0.0985, "lr": 1.2840910282888211e-05, "epoch": 1.6266094420600858, "percentage": 54.6, "elapsed_time": "0:03:05", "remaining_time": "0:02:34", "throughput": 2726.48, "total_tokens": 505328}
{"current_steps": 200, "total_steps": 348, "loss": 0.0728, "lr": 1.1511488166385349e-05, "epoch": 1.7124463519313304, "percentage": 57.47, "elapsed_time": "0:03:14", "remaining_time": "0:02:23", "throughput": 2736.15, "total_tokens": 531920}
{"current_steps": 210, "total_steps": 348, "loss": 0.1002, "lr": 1.0210477047960303e-05, "epoch": 1.7982832618025753, "percentage": 60.34, "elapsed_time": "0:03:23", "remaining_time": "0:02:13", "throughput": 2748.97, "total_tokens": 560496}
{"current_steps": 220, "total_steps": 348, "loss": 0.0976, "lr": 8.948472570152874e-06, "epoch": 1.8841201716738198, "percentage": 63.22, "elapsed_time": "0:03:32", "remaining_time": "0:02:03", "throughput": 2756.85, "total_tokens": 586384}
{"current_steps": 230, "total_steps": 348, "loss": 0.0801, "lr": 7.735752699185711e-06, "epoch": 1.9699570815450644, "percentage": 66.09, "elapsed_time": "0:03:41", "remaining_time": "0:01:53", "throughput": 2764.62, "total_tokens": 612496}
{"current_steps": 240, "total_steps": 348, "loss": 0.0533, "lr": 6.582194019564266e-06, "epoch": 2.051502145922747, "percentage": 68.97, "elapsed_time": "0:03:50", "remaining_time": "0:01:43", "throughput": 2769.81, "total_tokens": 637448}
{"current_steps": 250, "total_steps": 348, "loss": 0.059, "lr": 5.497191297593647e-06, "epoch": 2.1373390557939915, "percentage": 71.84, "elapsed_time": "0:03:59", "remaining_time": "0:01:33", "throughput": 2775.77, "total_tokens": 663528}
{"current_steps": 260, "total_steps": 348, "loss": 0.0505, "lr": 4.4895809688998655e-06, "epoch": 2.223175965665236, "percentage": 74.71, "elapsed_time": "0:04:07", "remaining_time": "0:01:23", "throughput": 2781.09, "total_tokens": 689384}
{"current_steps": 270, "total_steps": 348, "loss": 0.0404, "lr": 3.567569173085455e-06, "epoch": 2.3090128755364807, "percentage": 77.59, "elapsed_time": "0:04:16", "remaining_time": "0:01:14", "throughput": 2786.55, "total_tokens": 715560}
{"current_steps": 280, "total_steps": 348, "loss": 0.059, "lr": 2.7386649216166233e-06, "epoch": 2.3948497854077253, "percentage": 80.46, "elapsed_time": "0:04:25", "remaining_time": "0:01:04", "throughput": 2792.12, "total_tokens": 741960}
{"current_steps": 290, "total_steps": 348, "loss": 0.0429, "lr": 2.0096189432334194e-06, "epoch": 2.48068669527897, "percentage": 83.33, "elapsed_time": "0:04:34", "remaining_time": "0:00:54", "throughput": 2797.48, "total_tokens": 768808}
{"current_steps": 300, "total_steps": 348, "loss": 0.0341, "lr": 1.3863687049356465e-06, "epoch": 2.5665236051502145, "percentage": 86.21, "elapsed_time": "0:04:43", "remaining_time": "0:00:45", "throughput": 2802.16, "total_tokens": 795176}
{"current_steps": 310, "total_steps": 348, "loss": 0.0349, "lr": 8.7399005630238e-07, "epoch": 2.652360515021459, "percentage": 89.08, "elapsed_time": "0:04:52", "remaining_time": "0:00:35", "throughput": 2806.55, "total_tokens": 821864}
{"current_steps": 320, "total_steps": 348, "loss": 0.0375, "lr": 4.766558909615504e-07, "epoch": 2.7381974248927037, "percentage": 91.95, "elapsed_time": "0:05:02", "remaining_time": "0:00:26", "throughput": 2812.12, "total_tokens": 849800}
{"current_steps": 330, "total_steps": 348, "loss": 0.0184, "lr": 1.9760216187710788e-07, "epoch": 2.8240343347639483, "percentage": 94.83, "elapsed_time": "0:05:11", "remaining_time": "0:00:16", "throughput": 2816.04, "total_tokens": 876616}
{"current_steps": 340, "total_steps": 348, "loss": 0.0583, "lr": 3.910152723075322e-08, "epoch": 2.909871244635193, "percentage": 97.7, "elapsed_time": "0:05:20", "remaining_time": "0:00:07", "throughput": 2819.51, "total_tokens": 903048}
{"current_steps": 348, "total_steps": 348, "epoch": 2.978540772532189, "percentage": 100.0, "elapsed_time": "0:05:29", "remaining_time": "0:00:00", "throughput": 2806.23, "total_tokens": 924488}