ADPrLlama / trainer_log.jsonl
jbenbudd's picture
train_1_epoch_test
61d2404
{"current_steps": 5, "total_steps": 220, "loss": 12.6644, "lr": 2.9999999999999997e-05, "epoch": 0.02280501710376283, "percentage": 2.27, "elapsed_time": "0:00:24", "remaining_time": "0:17:28", "throughput": 2531.08, "total_tokens": 61696}
{"current_steps": 10, "total_steps": 220, "loss": 5.0641, "lr": 6.75e-05, "epoch": 0.04561003420752566, "percentage": 4.55, "elapsed_time": "0:00:47", "remaining_time": "0:16:31", "throughput": 2607.85, "total_tokens": 123136}
{"current_steps": 15, "total_steps": 220, "loss": 1.5053, "lr": 0.00010499999999999999, "epoch": 0.06841505131128849, "percentage": 6.82, "elapsed_time": "0:01:10", "remaining_time": "0:15:59", "throughput": 2632.04, "total_tokens": 184832}
{"current_steps": 20, "total_steps": 220, "loss": 0.6917, "lr": 0.0001425, "epoch": 0.09122006841505131, "percentage": 9.09, "elapsed_time": "0:01:33", "remaining_time": "0:15:34", "throughput": 2647.7, "total_tokens": 247552}
{"current_steps": 25, "total_steps": 220, "loss": 0.5721, "lr": 0.00017999999999999998, "epoch": 0.11402508551881414, "percentage": 11.36, "elapsed_time": "0:01:56", "remaining_time": "0:15:07", "throughput": 2656.08, "total_tokens": 308992}
{"current_steps": 30, "total_steps": 220, "loss": 0.6254, "lr": 0.00021749999999999997, "epoch": 0.13683010262257697, "percentage": 13.64, "elapsed_time": "0:02:19", "remaining_time": "0:14:41", "throughput": 2659.03, "total_tokens": 370176}
{"current_steps": 35, "total_steps": 220, "loss": 0.6389, "lr": 0.00025499999999999996, "epoch": 0.15963511972633979, "percentage": 15.91, "elapsed_time": "0:02:41", "remaining_time": "0:14:15", "throughput": 2660.78, "total_tokens": 430720}
{"current_steps": 40, "total_steps": 220, "loss": 0.5258, "lr": 0.00029249999999999995, "epoch": 0.18244013683010263, "percentage": 18.18, "elapsed_time": "0:03:05", "remaining_time": "0:13:53", "throughput": 2659.66, "total_tokens": 492800}
{"current_steps": 45, "total_steps": 220, "loss": 0.5041, "lr": 0.0002996346075389736, "epoch": 0.20524515393386544, "percentage": 20.45, "elapsed_time": "0:03:28", "remaining_time": "0:13:29", "throughput": 2660.7, "total_tokens": 553984}
{"current_steps": 50, "total_steps": 220, "loss": 0.4831, "lr": 0.00029815325108927063, "epoch": 0.22805017103762829, "percentage": 22.73, "elapsed_time": "0:03:51", "remaining_time": "0:13:05", "throughput": 2661.54, "total_tokens": 615040}
{"current_steps": 55, "total_steps": 220, "loss": 0.4957, "lr": 0.0002955443589413994, "epoch": 0.2508551881413911, "percentage": 25.0, "elapsed_time": "0:04:14", "remaining_time": "0:12:42", "throughput": 2663.54, "total_tokens": 676736}
{"current_steps": 60, "total_steps": 220, "loss": 0.4764, "lr": 0.00029182778633989753, "epoch": 0.27366020524515394, "percentage": 27.27, "elapsed_time": "0:04:37", "remaining_time": "0:12:18", "throughput": 2664.2, "total_tokens": 738176}
{"current_steps": 65, "total_steps": 220, "loss": 0.4829, "lr": 0.0002870318186463901, "epoch": 0.29646522234891676, "percentage": 29.55, "elapsed_time": "0:05:00", "remaining_time": "0:11:55", "throughput": 2664.82, "total_tokens": 799488}
{"current_steps": 70, "total_steps": 220, "loss": 0.478, "lr": 0.00028119295607090933, "epoch": 0.31927023945267957, "percentage": 31.82, "elapsed_time": "0:05:23", "remaining_time": "0:11:32", "throughput": 2666.34, "total_tokens": 861568}
{"current_steps": 75, "total_steps": 220, "loss": 0.4771, "lr": 0.0002743556358832562, "epoch": 0.34207525655644244, "percentage": 34.09, "elapsed_time": "0:05:46", "remaining_time": "0:11:10", "throughput": 2666.42, "total_tokens": 924544}
{"current_steps": 80, "total_steps": 220, "loss": 0.4657, "lr": 0.0002665718942185456, "epoch": 0.36488027366020526, "percentage": 36.36, "elapsed_time": "0:06:09", "remaining_time": "0:10:46", "throughput": 2666.94, "total_tokens": 985472}
{"current_steps": 85, "total_steps": 220, "loss": 0.4831, "lr": 0.00025790097005079764, "epoch": 0.38768529076396807, "percentage": 38.64, "elapsed_time": "0:06:32", "remaining_time": "0:10:23", "throughput": 2667.09, "total_tokens": 1046912}
{"current_steps": 90, "total_steps": 220, "loss": 0.4778, "lr": 0.0002484088543485761, "epoch": 0.4104903078677309, "percentage": 40.91, "elapsed_time": "0:06:55", "remaining_time": "0:10:00", "throughput": 2668.18, "total_tokens": 1108992}
{"current_steps": 95, "total_steps": 220, "loss": 0.455, "lr": 0.00023816778784387094, "epoch": 0.43329532497149376, "percentage": 43.18, "elapsed_time": "0:07:18", "remaining_time": "0:09:36", "throughput": 2668.23, "total_tokens": 1170048}
{"current_steps": 100, "total_steps": 220, "loss": 0.4571, "lr": 0.00022725571123650813, "epoch": 0.45610034207525657, "percentage": 45.45, "elapsed_time": "0:07:41", "remaining_time": "0:09:13", "throughput": 2668.44, "total_tokens": 1230464}
{"current_steps": 100, "total_steps": 220, "eval_loss": 0.4646710157394409, "epoch": 0.45610034207525657, "percentage": 45.45, "elapsed_time": "0:08:13", "remaining_time": "0:09:52", "throughput": 2491.96, "total_tokens": 1230464}
{"current_steps": 105, "total_steps": 220, "loss": 0.4667, "lr": 0.0002157556720183616, "epoch": 0.4789053591790194, "percentage": 47.73, "elapsed_time": "0:08:36", "remaining_time": "0:09:26", "throughput": 2500.4, "total_tokens": 1292288}
{"current_steps": 110, "total_steps": 220, "loss": 0.4758, "lr": 0.000203755192431795, "epoch": 0.5017103762827823, "percentage": 50.0, "elapsed_time": "0:08:59", "remaining_time": "0:08:59", "throughput": 2507.87, "total_tokens": 1353344}
{"current_steps": 115, "total_steps": 220, "loss": 0.4783, "lr": 0.00019134560337254986, "epoch": 0.5245153933865451, "percentage": 52.27, "elapsed_time": "0:09:22", "remaining_time": "0:08:33", "throughput": 2515.39, "total_tokens": 1415040}
{"current_steps": 120, "total_steps": 220, "loss": 0.4643, "lr": 0.0001786213493064817, "epoch": 0.5473204104903079, "percentage": 54.55, "elapsed_time": "0:09:45", "remaining_time": "0:08:07", "throughput": 2521.98, "total_tokens": 1476480}
{"current_steps": 125, "total_steps": 220, "loss": 0.4542, "lr": 0.000165679269490148, "epoch": 0.5701254275940707, "percentage": 56.82, "elapsed_time": "0:10:08", "remaining_time": "0:07:42", "throughput": 2527.64, "total_tokens": 1537664}
{"current_steps": 130, "total_steps": 220, "loss": 0.4539, "lr": 0.00015261786096559254, "epoch": 0.5929304446978335, "percentage": 59.09, "elapsed_time": "0:10:31", "remaining_time": "0:07:16", "throughput": 2533.11, "total_tokens": 1598848}
{"current_steps": 135, "total_steps": 220, "loss": 0.4563, "lr": 0.00013953652893838119, "epoch": 0.6157354618015963, "percentage": 61.36, "elapsed_time": "0:10:54", "remaining_time": "0:06:51", "throughput": 2538.83, "total_tokens": 1660800}
{"current_steps": 140, "total_steps": 220, "loss": 0.4434, "lr": 0.00012653483024396533, "epoch": 0.6385404789053591, "percentage": 63.64, "elapsed_time": "0:11:16", "remaining_time": "0:06:26", "throughput": 2543.39, "total_tokens": 1721600}
{"current_steps": 145, "total_steps": 220, "loss": 0.4484, "lr": 0.00011371171566004985, "epoch": 0.661345496009122, "percentage": 65.91, "elapsed_time": "0:11:39", "remaining_time": "0:06:01", "throughput": 2547.95, "total_tokens": 1783168}
{"current_steps": 150, "total_steps": 220, "loss": 0.4314, "lr": 0.00010116477683142652, "epoch": 0.6841505131128849, "percentage": 68.18, "elapsed_time": "0:12:02", "remaining_time": "0:05:37", "throughput": 2552.51, "total_tokens": 1844992}
{"current_steps": 155, "total_steps": 220, "loss": 0.4211, "lr": 8.898950353862998e-05, "epoch": 0.7069555302166477, "percentage": 70.45, "elapsed_time": "0:12:25", "remaining_time": "0:05:12", "throughput": 2556.34, "total_tokens": 1906048}
{"current_steps": 160, "total_steps": 220, "loss": 0.4324, "lr": 7.727855696304944e-05, "epoch": 0.7297605473204105, "percentage": 72.73, "elapsed_time": "0:12:48", "remaining_time": "0:04:48", "throughput": 2560.29, "total_tokens": 1967744}
{"current_steps": 165, "total_steps": 220, "loss": 0.4093, "lr": 6.612106447938799e-05, "epoch": 0.7525655644241733, "percentage": 75.0, "elapsed_time": "0:13:11", "remaining_time": "0:04:23", "throughput": 2563.4, "total_tokens": 2028032}
{"current_steps": 170, "total_steps": 220, "loss": 0.4113, "lr": 5.56019413425244e-05, "epoch": 0.7753705815279361, "percentage": 77.27, "elapsed_time": "0:13:33", "remaining_time": "0:03:59", "throughput": 2566.5, "total_tokens": 2088448}
{"current_steps": 175, "total_steps": 220, "loss": 0.4142, "lr": 4.5801244431150394e-05, "epoch": 0.798175598631699, "percentage": 79.55, "elapsed_time": "0:13:56", "remaining_time": "0:03:35", "throughput": 2569.89, "total_tokens": 2150144}
{"current_steps": 180, "total_steps": 220, "loss": 0.407, "lr": 3.6793562966584196e-05, "epoch": 0.8209806157354618, "percentage": 81.82, "elapsed_time": "0:14:19", "remaining_time": "0:03:11", "throughput": 2572.74, "total_tokens": 2211584}
{"current_steps": 185, "total_steps": 220, "loss": 0.3836, "lr": 2.8647450843757897e-05, "epoch": 0.8437856328392246, "percentage": 84.09, "elapsed_time": "0:14:42", "remaining_time": "0:02:46", "throughput": 2575.48, "total_tokens": 2272256}
{"current_steps": 190, "total_steps": 220, "loss": 0.3904, "lr": 2.1424904894683165e-05, "epoch": 0.8665906499429875, "percentage": 86.36, "elapsed_time": "0:15:05", "remaining_time": "0:02:22", "throughput": 2578.09, "total_tokens": 2333696}
{"current_steps": 195, "total_steps": 220, "loss": 0.4011, "lr": 1.5180893055124977e-05, "epoch": 0.8893956670467503, "percentage": 88.64, "elapsed_time": "0:15:28", "remaining_time": "0:01:58", "throughput": 2580.67, "total_tokens": 2394880}
{"current_steps": 200, "total_steps": 220, "loss": 0.3809, "lr": 9.962936025419754e-06, "epoch": 0.9122006841505131, "percentage": 90.91, "elapsed_time": "0:15:50", "remaining_time": "0:01:35", "throughput": 2582.96, "total_tokens": 2455680}
{"current_steps": 200, "total_steps": 220, "eval_loss": 0.3808976411819458, "epoch": 0.9122006841505131, "percentage": 90.91, "elapsed_time": "0:16:23", "remaining_time": "0:01:38", "throughput": 2497.07, "total_tokens": 2455680}
{"current_steps": 205, "total_steps": 220, "loss": 0.3799, "lr": 5.810745609252165e-06, "epoch": 0.935005701254276, "percentage": 93.18, "elapsed_time": "0:16:46", "remaining_time": "0:01:13", "throughput": 2501.27, "total_tokens": 2517376}
{"current_steps": 210, "total_steps": 220, "loss": 0.3944, "lr": 2.7559224828504035e-06, "epoch": 0.9578107183580388, "percentage": 95.45, "elapsed_time": "0:17:09", "remaining_time": "0:00:49", "throughput": 2505.48, "total_tokens": 2578816}
{"current_steps": 215, "total_steps": 220, "loss": 0.3721, "lr": 8.217156947590064e-07, "epoch": 0.9806157354618016, "percentage": 97.73, "elapsed_time": "0:17:32", "remaining_time": "0:00:24", "throughput": 2509.24, "total_tokens": 2640128}
{"current_steps": 220, "total_steps": 220, "loss": 0.3749, "lr": 2.284572654130956e-08, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:17:51", "remaining_time": "0:00:00", "throughput": 2512.25, "total_tokens": 2691984}
{"current_steps": 220, "total_steps": 220, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:17:54", "remaining_time": "0:00:00", "throughput": 2504.7, "total_tokens": 2691984}