mistral-mathqa-lora / trainer_log.jsonl
Kashmary's picture
Upload folder using huggingface_hub
35ef2bc verified
{"current_steps": 5, "total_steps": 189, "loss": 1.683, "lr": 0.0001997790438338385, "epoch": 0.08, "percentage": 2.65, "elapsed_time": "0:01:58", "remaining_time": "1:12:37", "throughput": 41.34, "total_tokens": 4896}
{"current_steps": 10, "total_steps": 189, "loss": 0.9406, "lr": 0.00019888308262251285, "epoch": 0.16, "percentage": 5.29, "elapsed_time": "0:04:07", "remaining_time": "1:13:45", "throughput": 41.03, "total_tokens": 10144}
{"current_steps": 15, "total_steps": 189, "loss": 1.1972, "lr": 0.00019730448705798239, "epoch": 0.24, "percentage": 7.94, "elapsed_time": "0:06:07", "remaining_time": "1:10:57", "throughput": 40.24, "total_tokens": 14768}
{"current_steps": 20, "total_steps": 189, "loss": 0.8463, "lr": 0.0001950541548947829, "epoch": 0.32, "percentage": 10.58, "elapsed_time": "0:08:08", "remaining_time": "1:08:45", "throughput": 39.85, "total_tokens": 19456}
{"current_steps": 25, "total_steps": 189, "loss": 0.9397, "lr": 0.00019214762118704076, "epoch": 0.4, "percentage": 13.23, "elapsed_time": "0:10:11", "remaining_time": "1:06:49", "throughput": 39.66, "total_tokens": 24240}
{"current_steps": 30, "total_steps": 189, "loss": 0.8065, "lr": 0.00018860495104301345, "epoch": 0.48, "percentage": 15.87, "elapsed_time": "0:12:12", "remaining_time": "1:04:41", "throughput": 39.5, "total_tokens": 28928}
{"current_steps": 35, "total_steps": 189, "loss": 1.0692, "lr": 0.0001844506011066308, "epoch": 0.56, "percentage": 18.52, "elapsed_time": "0:14:14", "remaining_time": "1:02:39", "throughput": 39.45, "total_tokens": 33712}
{"current_steps": 40, "total_steps": 189, "loss": 1.1197, "lr": 0.00017971325072229226, "epoch": 0.64, "percentage": 21.16, "elapsed_time": "0:16:15", "remaining_time": "1:00:33", "throughput": 39.35, "total_tokens": 38384}
{"current_steps": 45, "total_steps": 189, "loss": 0.9444, "lr": 0.00017442560394846516, "epoch": 0.72, "percentage": 23.81, "elapsed_time": "0:18:18", "remaining_time": "0:58:35", "throughput": 39.35, "total_tokens": 43232}
{"current_steps": 50, "total_steps": 189, "loss": 1.1552, "lr": 0.0001686241637868734, "epoch": 0.8, "percentage": 26.46, "elapsed_time": "0:20:16", "remaining_time": "0:56:22", "throughput": 39.12, "total_tokens": 47600}
{"current_steps": 55, "total_steps": 189, "loss": 0.7799, "lr": 0.00016234898018587337, "epoch": 0.88, "percentage": 29.1, "elapsed_time": "0:22:19", "remaining_time": "0:54:22", "throughput": 39.14, "total_tokens": 52416}
{"current_steps": 60, "total_steps": 189, "loss": 1.0976, "lr": 0.00015564337355766412, "epoch": 0.96, "percentage": 31.75, "elapsed_time": "0:24:20", "remaining_time": "0:52:20", "throughput": 39.04, "total_tokens": 57024}
{"current_steps": 65, "total_steps": 189, "loss": 0.6118, "lr": 0.00014855363571801523, "epoch": 1.032, "percentage": 34.39, "elapsed_time": "0:26:11", "remaining_time": "0:49:58", "throughput": 39.04, "total_tokens": 61376}
{"current_steps": 70, "total_steps": 189, "loss": 0.4582, "lr": 0.00014112871031306119, "epoch": 1.112, "percentage": 37.04, "elapsed_time": "0:28:08", "remaining_time": "0:47:50", "throughput": 38.98, "total_tokens": 65808}
{"current_steps": 75, "total_steps": 189, "loss": 0.4555, "lr": 0.00013341985493931877, "epoch": 1.192, "percentage": 39.68, "elapsed_time": "0:30:16", "remaining_time": "0:46:00", "throughput": 38.99, "total_tokens": 70800}
{"current_steps": 80, "total_steps": 189, "loss": 0.5214, "lr": 0.0001254802872894655, "epoch": 1.272, "percentage": 42.33, "elapsed_time": "0:32:21", "remaining_time": "0:44:05", "throughput": 39.01, "total_tokens": 75744}
{"current_steps": 85, "total_steps": 189, "loss": 0.396, "lr": 0.00011736481776669306, "epoch": 1.3519999999999999, "percentage": 44.97, "elapsed_time": "0:34:23", "remaining_time": "0:42:04", "throughput": 38.97, "total_tokens": 80400}
{"current_steps": 90, "total_steps": 189, "loss": 0.494, "lr": 0.00010912947110386484, "epoch": 1.432, "percentage": 47.62, "elapsed_time": "0:36:23", "remaining_time": "0:40:01", "throughput": 38.94, "total_tokens": 85040}
{"current_steps": 95, "total_steps": 189, "loss": 0.4208, "lr": 0.00010083109959960973, "epoch": 1.512, "percentage": 50.26, "elapsed_time": "0:38:21", "remaining_time": "0:37:57", "throughput": 38.87, "total_tokens": 89472}
{"current_steps": 100, "total_steps": 189, "loss": 0.3598, "lr": 9.252699064135758e-05, "epoch": 1.592, "percentage": 52.91, "elapsed_time": "0:40:23", "remaining_time": "0:35:56", "throughput": 38.84, "total_tokens": 94128}
{"current_steps": 105, "total_steps": 189, "loss": 0.3386, "lr": 8.427447122476148e-05, "epoch": 1.6720000000000002, "percentage": 55.56, "elapsed_time": "0:42:36", "remaining_time": "0:34:05", "throughput": 38.77, "total_tokens": 99120}
{"current_steps": 110, "total_steps": 189, "loss": 0.363, "lr": 7.613051219968623e-05, "epoch": 1.752, "percentage": 58.2, "elapsed_time": "0:44:47", "remaining_time": "0:32:10", "throughput": 38.78, "total_tokens": 104224}
{"current_steps": 115, "total_steps": 189, "loss": 0.4988, "lr": 6.815133497483157e-05, "epoch": 1.8319999999999999, "percentage": 60.85, "elapsed_time": "0:46:37", "remaining_time": "0:30:00", "throughput": 38.73, "total_tokens": 108352}
{"current_steps": 120, "total_steps": 189, "loss": 0.492, "lr": 6.039202339608432e-05, "epoch": 1.912, "percentage": 63.49, "elapsed_time": "0:48:41", "remaining_time": "0:28:00", "throughput": 38.75, "total_tokens": 113216}
{"current_steps": 125, "total_steps": 189, "loss": 0.3521, "lr": 5.290614347797802e-05, "epoch": 1.992, "percentage": 66.14, "elapsed_time": "0:50:48", "remaining_time": "0:26:00", "throughput": 38.75, "total_tokens": 118128}
{"current_steps": 130, "total_steps": 189, "loss": 0.2833, "lr": 4.574537361342407e-05, "epoch": 2.064, "percentage": 68.78, "elapsed_time": "0:52:30", "remaining_time": "0:23:49", "throughput": 38.72, "total_tokens": 121984}
{"current_steps": 135, "total_steps": 189, "loss": 0.1752, "lr": 3.89591478145437e-05, "epoch": 2.144, "percentage": 71.43, "elapsed_time": "0:54:23", "remaining_time": "0:21:45", "throughput": 38.69, "total_tokens": 126272}
{"current_steps": 140, "total_steps": 189, "loss": 0.1497, "lr": 3.259431444746846e-05, "epoch": 2.224, "percentage": 74.07, "elapsed_time": "0:56:28", "remaining_time": "0:19:45", "throughput": 38.72, "total_tokens": 131184}
{"current_steps": 145, "total_steps": 189, "loss": 0.123, "lr": 2.669481281701739e-05, "epoch": 2.304, "percentage": 76.72, "elapsed_time": "0:58:37", "remaining_time": "0:17:47", "throughput": 38.74, "total_tokens": 136256}
{"current_steps": 150, "total_steps": 189, "loss": 0.1846, "lr": 2.1301369833931117e-05, "epoch": 2.384, "percentage": 79.37, "elapsed_time": "1:00:35", "remaining_time": "0:15:45", "throughput": 38.73, "total_tokens": 140816}
{"current_steps": 155, "total_steps": 189, "loss": 0.1589, "lr": 1.6451218858706374e-05, "epoch": 2.464, "percentage": 82.01, "elapsed_time": "1:02:41", "remaining_time": "0:13:45", "throughput": 38.77, "total_tokens": 145824}
{"current_steps": 160, "total_steps": 189, "loss": 0.2244, "lr": 1.2177842662977135e-05, "epoch": 2.544, "percentage": 84.66, "elapsed_time": "1:04:46", "remaining_time": "0:11:44", "throughput": 38.78, "total_tokens": 150720}
{"current_steps": 165, "total_steps": 189, "loss": 0.243, "lr": 8.510742282896544e-06, "epoch": 2.624, "percentage": 87.3, "elapsed_time": "1:06:50", "remaining_time": "0:09:43", "throughput": 38.75, "total_tokens": 155408}
{"current_steps": 170, "total_steps": 189, "loss": 0.1998, "lr": 5.475233360227516e-06, "epoch": 2.7039999999999997, "percentage": 89.95, "elapsed_time": "1:08:44", "remaining_time": "0:07:41", "throughput": 38.7, "total_tokens": 159616}
{"current_steps": 175, "total_steps": 189, "loss": 0.1511, "lr": 3.092271377092215e-06, "epoch": 2.784, "percentage": 92.59, "elapsed_time": "1:10:49", "remaining_time": "0:05:39", "throughput": 38.72, "total_tokens": 164544}
{"current_steps": 180, "total_steps": 189, "loss": 0.1868, "lr": 1.378306990862177e-06, "epoch": 2.864, "percentage": 95.24, "elapsed_time": "1:13:02", "remaining_time": "0:03:39", "throughput": 38.73, "total_tokens": 169728}
{"current_steps": 185, "total_steps": 189, "loss": 0.2023, "lr": 3.451724678784518e-07, "epoch": 2.944, "percentage": 97.88, "elapsed_time": "1:15:01", "remaining_time": "0:01:37", "throughput": 38.72, "total_tokens": 174320}
{"current_steps": 189, "total_steps": 189, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:16:38", "remaining_time": "0:00:00", "throughput": 38.66, "total_tokens": 177776}