Training in progress, step 19080
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +192 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1074144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b5c53a176bd4fd802f5f4c729ca8e345e92d1d2d4514d56755d4be2cced7570
|
| 3 |
size 1074144
|
trainer_log.jsonl
CHANGED
|
@@ -3643,3 +3643,195 @@
|
|
| 3643 |
{"current_steps": 18125, "total_steps": 19080, "loss": 0.3894, "lr": 3.8139625693680847e-07, "epoch": 9.499475890985325, "percentage": 94.99, "elapsed_time": "1:11:09", "remaining_time": "0:03:44", "throughput": 2774.37, "total_tokens": 11844936}
|
| 3644 |
{"current_steps": 18126, "total_steps": 19080, "eval_loss": 0.4834924042224884, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "1:11:24", "remaining_time": "0:03:45", "throughput": 2764.97, "total_tokens": 11845704}
|
| 3645 |
{"current_steps": 18130, "total_steps": 19080, "loss": 0.47, "lr": 3.774272317146277e-07, "epoch": 9.5020964360587, "percentage": 95.02, "elapsed_time": "1:11:25", "remaining_time": "0:03:44", "throughput": 2764.47, "total_tokens": 11847912}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3643 |
{"current_steps": 18125, "total_steps": 19080, "loss": 0.3894, "lr": 3.8139625693680847e-07, "epoch": 9.499475890985325, "percentage": 94.99, "elapsed_time": "1:11:09", "remaining_time": "0:03:44", "throughput": 2774.37, "total_tokens": 11844936}
|
| 3644 |
{"current_steps": 18126, "total_steps": 19080, "eval_loss": 0.4834924042224884, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "1:11:24", "remaining_time": "0:03:45", "throughput": 2764.97, "total_tokens": 11845704}
|
| 3645 |
{"current_steps": 18130, "total_steps": 19080, "loss": 0.47, "lr": 3.774272317146277e-07, "epoch": 9.5020964360587, "percentage": 95.02, "elapsed_time": "1:11:25", "remaining_time": "0:03:44", "throughput": 2764.47, "total_tokens": 11847912}
|
| 3646 |
+
{"current_steps": 18135, "total_steps": 19080, "loss": 0.364, "lr": 3.7347880953662597e-07, "epoch": 9.504716981132075, "percentage": 95.05, "elapsed_time": "1:11:27", "remaining_time": "0:03:43", "throughput": 2764.63, "total_tokens": 11853192}
|
| 3647 |
+
{"current_steps": 18140, "total_steps": 19080, "loss": 0.5445, "lr": 3.6955099370666045e-07, "epoch": 9.50733752620545, "percentage": 95.07, "elapsed_time": "1:11:28", "remaining_time": "0:03:42", "throughput": 2764.7, "total_tokens": 11856808}
|
| 3648 |
+
{"current_steps": 18145, "total_steps": 19080, "loss": 0.7121, "lr": 3.656437875113522e-07, "epoch": 9.509958071278826, "percentage": 95.1, "elapsed_time": "1:11:29", "remaining_time": "0:03:41", "throughput": 2764.77, "total_tokens": 11860360}
|
| 3649 |
+
{"current_steps": 18150, "total_steps": 19080, "loss": 0.3561, "lr": 3.617571942200693e-07, "epoch": 9.5125786163522, "percentage": 95.13, "elapsed_time": "1:11:30", "remaining_time": "0:03:39", "throughput": 2764.8, "total_tokens": 11863048}
|
| 3650 |
+
{"current_steps": 18155, "total_steps": 19080, "loss": 0.442, "lr": 3.5789121708493523e-07, "epoch": 9.515199161425576, "percentage": 95.15, "elapsed_time": "1:11:32", "remaining_time": "0:03:38", "throughput": 2764.89, "total_tokens": 11867016}
|
| 3651 |
+
{"current_steps": 18160, "total_steps": 19080, "loss": 0.4434, "lr": 3.5404585934082635e-07, "epoch": 9.517819706498951, "percentage": 95.18, "elapsed_time": "1:11:33", "remaining_time": "0:03:37", "throughput": 2764.98, "total_tokens": 11870728}
|
| 3652 |
+
{"current_steps": 18165, "total_steps": 19080, "loss": 0.4806, "lr": 3.502211242053577e-07, "epoch": 9.520440251572326, "percentage": 95.2, "elapsed_time": "1:11:34", "remaining_time": "0:03:36", "throughput": 2765.03, "total_tokens": 11873928}
|
| 3653 |
+
{"current_steps": 18170, "total_steps": 19080, "loss": 0.4182, "lr": 3.4641701487889697e-07, "epoch": 9.523060796645701, "percentage": 95.23, "elapsed_time": "1:11:35", "remaining_time": "0:03:35", "throughput": 2765.06, "total_tokens": 11876968}
|
| 3654 |
+
{"current_steps": 18175, "total_steps": 19080, "loss": 0.4497, "lr": 3.4263353454454806e-07, "epoch": 9.525681341719078, "percentage": 95.26, "elapsed_time": "1:11:36", "remaining_time": "0:03:33", "throughput": 2765.13, "total_tokens": 11880648}
|
| 3655 |
+
{"current_steps": 18180, "total_steps": 19080, "loss": 0.4647, "lr": 3.3887068636815346e-07, "epoch": 9.528301886792454, "percentage": 95.28, "elapsed_time": "1:11:37", "remaining_time": "0:03:32", "throughput": 2765.16, "total_tokens": 11883560}
|
| 3656 |
+
{"current_steps": 18185, "total_steps": 19080, "loss": 0.5099, "lr": 3.351284734982918e-07, "epoch": 9.530922431865829, "percentage": 95.31, "elapsed_time": "1:11:39", "remaining_time": "0:03:31", "throughput": 2765.31, "total_tokens": 11888296}
|
| 3657 |
+
{"current_steps": 18190, "total_steps": 19080, "loss": 0.4306, "lr": 3.3140689906628054e-07, "epoch": 9.533542976939204, "percentage": 95.34, "elapsed_time": "1:11:40", "remaining_time": "0:03:30", "throughput": 2765.44, "total_tokens": 11892872}
|
| 3658 |
+
{"current_steps": 18195, "total_steps": 19080, "loss": 0.4139, "lr": 3.2770596618615645e-07, "epoch": 9.536163522012579, "percentage": 95.36, "elapsed_time": "1:11:41", "remaining_time": "0:03:29", "throughput": 2765.47, "total_tokens": 11895944}
|
| 3659 |
+
{"current_steps": 18200, "total_steps": 19080, "loss": 0.4909, "lr": 3.240256779546952e-07, "epoch": 9.538784067085954, "percentage": 95.39, "elapsed_time": "1:11:42", "remaining_time": "0:03:28", "throughput": 2765.51, "total_tokens": 11898696}
|
| 3660 |
+
{"current_steps": 18205, "total_steps": 19080, "loss": 0.3618, "lr": 3.2036603745139447e-07, "epoch": 9.54140461215933, "percentage": 95.41, "elapsed_time": "1:11:44", "remaining_time": "0:03:26", "throughput": 2765.71, "total_tokens": 11904744}
|
| 3661 |
+
{"current_steps": 18210, "total_steps": 19080, "loss": 0.4295, "lr": 3.167270477384743e-07, "epoch": 9.544025157232705, "percentage": 95.44, "elapsed_time": "1:11:45", "remaining_time": "0:03:25", "throughput": 2765.76, "total_tokens": 11907656}
|
| 3662 |
+
{"current_steps": 18215, "total_steps": 19080, "loss": 0.538, "lr": 3.1310871186086834e-07, "epoch": 9.54664570230608, "percentage": 95.47, "elapsed_time": "1:11:46", "remaining_time": "0:03:24", "throughput": 2765.77, "total_tokens": 11910504}
|
| 3663 |
+
{"current_steps": 18220, "total_steps": 19080, "loss": 0.5356, "lr": 3.095110328462464e-07, "epoch": 9.549266247379455, "percentage": 95.49, "elapsed_time": "1:11:47", "remaining_time": "0:03:23", "throughput": 2765.79, "total_tokens": 11913192}
|
| 3664 |
+
{"current_steps": 18225, "total_steps": 19080, "loss": 0.6039, "lr": 3.0593401370497264e-07, "epoch": 9.55188679245283, "percentage": 95.52, "elapsed_time": "1:11:48", "remaining_time": "0:03:22", "throughput": 2765.86, "total_tokens": 11916680}
|
| 3665 |
+
{"current_steps": 18230, "total_steps": 19080, "loss": 0.4052, "lr": 3.0237765743013626e-07, "epoch": 9.554507337526205, "percentage": 95.55, "elapsed_time": "1:11:49", "remaining_time": "0:03:20", "throughput": 2765.89, "total_tokens": 11919656}
|
| 3666 |
+
{"current_steps": 18235, "total_steps": 19080, "loss": 0.4324, "lr": 2.9884196699753453e-07, "epoch": 9.55712788259958, "percentage": 95.57, "elapsed_time": "1:11:50", "remaining_time": "0:03:19", "throughput": 2765.91, "total_tokens": 11922632}
|
| 3667 |
+
{"current_steps": 18240, "total_steps": 19080, "loss": 0.4843, "lr": 2.953269453656704e-07, "epoch": 9.559748427672956, "percentage": 95.6, "elapsed_time": "1:11:51", "remaining_time": "0:03:18", "throughput": 2765.97, "total_tokens": 11926280}
|
| 3668 |
+
{"current_steps": 18245, "total_steps": 19080, "loss": 0.3137, "lr": 2.9183259547575504e-07, "epoch": 9.56236897274633, "percentage": 95.62, "elapsed_time": "1:11:52", "remaining_time": "0:03:17", "throughput": 2766.02, "total_tokens": 11929768}
|
| 3669 |
+
{"current_steps": 18250, "total_steps": 19080, "loss": 0.4426, "lr": 2.883589202517023e-07, "epoch": 9.564989517819706, "percentage": 95.65, "elapsed_time": "1:11:54", "remaining_time": "0:03:16", "throughput": 2766.09, "total_tokens": 11933480}
|
| 3670 |
+
{"current_steps": 18255, "total_steps": 19080, "loss": 0.3635, "lr": 2.849059226001177e-07, "epoch": 9.567610062893081, "percentage": 95.68, "elapsed_time": "1:11:55", "remaining_time": "0:03:15", "throughput": 2766.11, "total_tokens": 11936200}
|
| 3671 |
+
{"current_steps": 18260, "total_steps": 19080, "loss": 0.5611, "lr": 2.8147360541032065e-07, "epoch": 9.570230607966456, "percentage": 95.7, "elapsed_time": "1:11:56", "remaining_time": "0:03:13", "throughput": 2766.09, "total_tokens": 11938472}
|
| 3672 |
+
{"current_steps": 18265, "total_steps": 19080, "loss": 0.5361, "lr": 2.780619715543109e-07, "epoch": 9.572851153039831, "percentage": 95.73, "elapsed_time": "1:11:57", "remaining_time": "0:03:12", "throughput": 2766.18, "total_tokens": 11942280}
|
| 3673 |
+
{"current_steps": 18270, "total_steps": 19080, "loss": 0.4159, "lr": 2.746710238867911e-07, "epoch": 9.575471698113208, "percentage": 95.75, "elapsed_time": "1:11:58", "remaining_time": "0:03:11", "throughput": 2766.26, "total_tokens": 11945800}
|
| 3674 |
+
{"current_steps": 18275, "total_steps": 19080, "loss": 0.4314, "lr": 2.713007652451499e-07, "epoch": 9.578092243186584, "percentage": 95.78, "elapsed_time": "1:11:59", "remaining_time": "0:03:10", "throughput": 2766.27, "total_tokens": 11948200}
|
| 3675 |
+
{"current_steps": 18280, "total_steps": 19080, "loss": 0.499, "lr": 2.6795119844946757e-07, "epoch": 9.580712788259959, "percentage": 95.81, "elapsed_time": "1:12:00", "remaining_time": "0:03:09", "throughput": 2766.35, "total_tokens": 11951656}
|
| 3676 |
+
{"current_steps": 18285, "total_steps": 19080, "loss": 0.4606, "lr": 2.646223263025077e-07, "epoch": 9.583333333333334, "percentage": 95.83, "elapsed_time": "1:12:01", "remaining_time": "0:03:07", "throughput": 2766.42, "total_tokens": 11955208}
|
| 3677 |
+
{"current_steps": 18290, "total_steps": 19080, "loss": 0.3742, "lr": 2.6131415158971993e-07, "epoch": 9.585953878406709, "percentage": 95.86, "elapsed_time": "1:12:02", "remaining_time": "0:03:06", "throughput": 2766.43, "total_tokens": 11957768}
|
| 3678 |
+
{"current_steps": 18295, "total_steps": 19080, "loss": 0.4479, "lr": 2.5802667707922887e-07, "epoch": 9.588574423480084, "percentage": 95.89, "elapsed_time": "1:12:03", "remaining_time": "0:03:05", "throughput": 2766.47, "total_tokens": 11960552}
|
| 3679 |
+
{"current_steps": 18300, "total_steps": 19080, "loss": 0.5256, "lr": 2.54759905521848e-07, "epoch": 9.59119496855346, "percentage": 95.91, "elapsed_time": "1:12:04", "remaining_time": "0:03:04", "throughput": 2766.56, "total_tokens": 11964552}
|
| 3680 |
+
{"current_steps": 18305, "total_steps": 19080, "loss": 0.4309, "lr": 2.51513839651063e-07, "epoch": 9.593815513626835, "percentage": 95.94, "elapsed_time": "1:12:05", "remaining_time": "0:03:03", "throughput": 2766.58, "total_tokens": 11966824}
|
| 3681 |
+
{"current_steps": 18310, "total_steps": 19080, "loss": 0.4487, "lr": 2.4828848218302615e-07, "epoch": 9.59643605870021, "percentage": 95.96, "elapsed_time": "1:12:06", "remaining_time": "0:03:01", "throughput": 2766.61, "total_tokens": 11969832}
|
| 3682 |
+
{"current_steps": 18315, "total_steps": 19080, "loss": 0.405, "lr": 2.450838358165786e-07, "epoch": 9.599056603773585, "percentage": 95.99, "elapsed_time": "1:12:07", "remaining_time": "0:03:00", "throughput": 2766.62, "total_tokens": 11972424}
|
| 3683 |
+
{"current_steps": 18320, "total_steps": 19080, "loss": 0.4735, "lr": 2.41899903233217e-07, "epoch": 9.60167714884696, "percentage": 96.02, "elapsed_time": "1:12:08", "remaining_time": "0:02:59", "throughput": 2766.73, "total_tokens": 11976552}
|
| 3684 |
+
{"current_steps": 18325, "total_steps": 19080, "loss": 0.4398, "lr": 2.387366870971103e-07, "epoch": 9.604297693920335, "percentage": 96.04, "elapsed_time": "1:12:09", "remaining_time": "0:02:58", "throughput": 2766.78, "total_tokens": 11979720}
|
| 3685 |
+
{"current_steps": 18330, "total_steps": 19080, "loss": 0.5279, "lr": 2.3559419005509675e-07, "epoch": 9.60691823899371, "percentage": 96.07, "elapsed_time": "1:12:10", "remaining_time": "0:02:57", "throughput": 2766.82, "total_tokens": 11982536}
|
| 3686 |
+
{"current_steps": 18335, "total_steps": 19080, "loss": 0.3406, "lr": 2.3247241473667026e-07, "epoch": 9.609538784067086, "percentage": 96.1, "elapsed_time": "1:12:11", "remaining_time": "0:02:56", "throughput": 2766.85, "total_tokens": 11985384}
|
| 3687 |
+
{"current_steps": 18340, "total_steps": 19080, "loss": 0.4203, "lr": 2.2937136375399126e-07, "epoch": 9.61215932914046, "percentage": 96.12, "elapsed_time": "1:12:12", "remaining_time": "0:02:54", "throughput": 2766.9, "total_tokens": 11988712}
|
| 3688 |
+
{"current_steps": 18345, "total_steps": 19080, "loss": 0.4329, "lr": 2.2629103970188137e-07, "epoch": 9.614779874213836, "percentage": 96.15, "elapsed_time": "1:12:14", "remaining_time": "0:02:53", "throughput": 2767.03, "total_tokens": 11993352}
|
| 3689 |
+
{"current_steps": 18350, "total_steps": 19080, "loss": 0.471, "lr": 2.2323144515780935e-07, "epoch": 9.617400419287211, "percentage": 96.17, "elapsed_time": "1:12:15", "remaining_time": "0:02:52", "throughput": 2767.03, "total_tokens": 11995848}
|
| 3690 |
+
{"current_steps": 18355, "total_steps": 19080, "loss": 0.5246, "lr": 2.201925826819079e-07, "epoch": 9.620020964360586, "percentage": 96.2, "elapsed_time": "1:12:16", "remaining_time": "0:02:51", "throughput": 2767.1, "total_tokens": 11999336}
|
| 3691 |
+
{"current_steps": 18360, "total_steps": 19080, "loss": 0.4717, "lr": 2.1717445481695408e-07, "epoch": 9.622641509433961, "percentage": 96.23, "elapsed_time": "1:12:17", "remaining_time": "0:02:50", "throughput": 2767.18, "total_tokens": 12003080}
|
| 3692 |
+
{"current_steps": 18365, "total_steps": 19080, "loss": 0.2833, "lr": 2.1417706408838333e-07, "epoch": 9.625262054507338, "percentage": 96.25, "elapsed_time": "1:12:18", "remaining_time": "0:02:48", "throughput": 2767.29, "total_tokens": 12007240}
|
| 3693 |
+
{"current_steps": 18370, "total_steps": 19080, "loss": 0.5024, "lr": 2.112004130042755e-07, "epoch": 9.627882599580714, "percentage": 96.28, "elapsed_time": "1:12:19", "remaining_time": "0:02:47", "throughput": 2767.32, "total_tokens": 12009928}
|
| 3694 |
+
{"current_steps": 18375, "total_steps": 19080, "loss": 0.4274, "lr": 2.082445040553549e-07, "epoch": 9.630503144654089, "percentage": 96.31, "elapsed_time": "1:12:21", "remaining_time": "0:02:46", "throughput": 2767.37, "total_tokens": 12013384}
|
| 3695 |
+
{"current_steps": 18380, "total_steps": 19080, "loss": 0.4971, "lr": 2.053093397149902e-07, "epoch": 9.633123689727464, "percentage": 96.33, "elapsed_time": "1:12:22", "remaining_time": "0:02:45", "throughput": 2767.4, "total_tokens": 12016520}
|
| 3696 |
+
{"current_steps": 18385, "total_steps": 19080, "loss": 0.4228, "lr": 2.0239492243919467e-07, "epoch": 9.635744234800839, "percentage": 96.36, "elapsed_time": "1:12:23", "remaining_time": "0:02:44", "throughput": 2767.46, "total_tokens": 12020040}
|
| 3697 |
+
{"current_steps": 18390, "total_steps": 19080, "loss": 0.4486, "lr": 1.9950125466662028e-07, "epoch": 9.638364779874214, "percentage": 96.38, "elapsed_time": "1:12:24", "remaining_time": "0:02:43", "throughput": 2767.51, "total_tokens": 12023464}
|
| 3698 |
+
{"current_steps": 18395, "total_steps": 19080, "loss": 0.3351, "lr": 1.9662833881855248e-07, "epoch": 9.64098532494759, "percentage": 96.41, "elapsed_time": "1:12:25", "remaining_time": "0:02:41", "throughput": 2767.57, "total_tokens": 12026664}
|
| 3699 |
+
{"current_steps": 18400, "total_steps": 19080, "loss": 0.3942, "lr": 1.9377617729891828e-07, "epoch": 9.643605870020965, "percentage": 96.44, "elapsed_time": "1:12:26", "remaining_time": "0:02:40", "throughput": 2767.67, "total_tokens": 12030440}
|
| 3700 |
+
{"current_steps": 18405, "total_steps": 19080, "loss": 0.5038, "lr": 1.9094477249427534e-07, "epoch": 9.64622641509434, "percentage": 96.46, "elapsed_time": "1:12:27", "remaining_time": "0:02:39", "throughput": 2767.67, "total_tokens": 12032872}
|
| 3701 |
+
{"current_steps": 18410, "total_steps": 19080, "loss": 0.3857, "lr": 1.8813412677381737e-07, "epoch": 9.648846960167715, "percentage": 96.49, "elapsed_time": "1:12:29", "remaining_time": "0:02:38", "throughput": 2767.82, "total_tokens": 12037864}
|
| 3702 |
+
{"current_steps": 18415, "total_steps": 19080, "loss": 0.312, "lr": 1.8534424248935756e-07, "epoch": 9.65146750524109, "percentage": 96.51, "elapsed_time": "1:12:30", "remaining_time": "0:02:37", "throughput": 2767.85, "total_tokens": 12040904}
|
| 3703 |
+
{"current_steps": 18420, "total_steps": 19080, "loss": 0.4536, "lr": 1.8257512197535076e-07, "epoch": 9.654088050314465, "percentage": 96.54, "elapsed_time": "1:12:31", "remaining_time": "0:02:35", "throughput": 2767.9, "total_tokens": 12044296}
|
| 3704 |
+
{"current_steps": 18425, "total_steps": 19080, "loss": 0.3555, "lr": 1.7982676754886574e-07, "epoch": 9.65670859538784, "percentage": 96.57, "elapsed_time": "1:12:32", "remaining_time": "0:02:34", "throughput": 2767.92, "total_tokens": 12047208}
|
| 3705 |
+
{"current_steps": 18430, "total_steps": 19080, "loss": 0.5172, "lr": 1.7709918150959904e-07, "epoch": 9.659329140461216, "percentage": 96.59, "elapsed_time": "1:12:33", "remaining_time": "0:02:33", "throughput": 2768.0, "total_tokens": 12050696}
|
| 3706 |
+
{"current_steps": 18435, "total_steps": 19080, "loss": 0.5077, "lr": 1.7439236613987775e-07, "epoch": 9.66194968553459, "percentage": 96.62, "elapsed_time": "1:12:34", "remaining_time": "0:02:32", "throughput": 2768.07, "total_tokens": 12054536}
|
| 3707 |
+
{"current_steps": 18440, "total_steps": 19080, "loss": 0.463, "lr": 1.717063237046318e-07, "epoch": 9.664570230607966, "percentage": 96.65, "elapsed_time": "1:12:35", "remaining_time": "0:02:31", "throughput": 2768.06, "total_tokens": 12056776}
|
| 3708 |
+
{"current_steps": 18445, "total_steps": 19080, "loss": 0.516, "lr": 1.6904105645142444e-07, "epoch": 9.667190775681341, "percentage": 96.67, "elapsed_time": "1:12:36", "remaining_time": "0:02:29", "throughput": 2768.09, "total_tokens": 12059720}
|
| 3709 |
+
{"current_steps": 18450, "total_steps": 19080, "loss": 0.4543, "lr": 1.6639656661043e-07, "epoch": 9.669811320754716, "percentage": 96.7, "elapsed_time": "1:12:37", "remaining_time": "0:02:28", "throughput": 2768.16, "total_tokens": 12063304}
|
| 3710 |
+
{"current_steps": 18455, "total_steps": 19080, "loss": 0.5142, "lr": 1.6377285639443407e-07, "epoch": 9.672431865828091, "percentage": 96.72, "elapsed_time": "1:12:39", "remaining_time": "0:02:27", "throughput": 2768.25, "total_tokens": 12067592}
|
| 3711 |
+
{"current_steps": 18460, "total_steps": 19080, "loss": 0.4691, "lr": 1.61169927998836e-07, "epoch": 9.675052410901468, "percentage": 96.75, "elapsed_time": "1:12:40", "remaining_time": "0:02:26", "throughput": 2768.3, "total_tokens": 12070856}
|
| 3712 |
+
{"current_steps": 18465, "total_steps": 19080, "loss": 0.474, "lr": 1.5858778360165195e-07, "epoch": 9.677672955974844, "percentage": 96.78, "elapsed_time": "1:12:41", "remaining_time": "0:02:25", "throughput": 2768.35, "total_tokens": 12074280}
|
| 3713 |
+
{"current_steps": 18470, "total_steps": 19080, "loss": 0.4082, "lr": 1.5602642536350075e-07, "epoch": 9.680293501048219, "percentage": 96.8, "elapsed_time": "1:12:42", "remaining_time": "0:02:24", "throughput": 2768.37, "total_tokens": 12077288}
|
| 3714 |
+
{"current_steps": 18475, "total_steps": 19080, "loss": 0.3857, "lr": 1.5348585542760974e-07, "epoch": 9.682914046121594, "percentage": 96.83, "elapsed_time": "1:12:44", "remaining_time": "0:02:22", "throughput": 2768.5, "total_tokens": 12082056}
|
| 3715 |
+
{"current_steps": 18480, "total_steps": 19080, "loss": 0.5345, "lr": 1.5096607591980894e-07, "epoch": 9.685534591194969, "percentage": 96.86, "elapsed_time": "1:12:45", "remaining_time": "0:02:21", "throughput": 2768.56, "total_tokens": 12085128}
|
| 3716 |
+
{"current_steps": 18485, "total_steps": 19080, "loss": 0.5619, "lr": 1.4846708894853955e-07, "epoch": 9.688155136268344, "percentage": 96.88, "elapsed_time": "1:12:46", "remaining_time": "0:02:20", "throughput": 2768.65, "total_tokens": 12089032}
|
| 3717 |
+
{"current_steps": 18490, "total_steps": 19080, "loss": 0.5405, "lr": 1.459888966048373e-07, "epoch": 9.69077568134172, "percentage": 96.91, "elapsed_time": "1:12:47", "remaining_time": "0:02:19", "throughput": 2768.66, "total_tokens": 12091496}
|
| 3718 |
+
{"current_steps": 18495, "total_steps": 19080, "loss": 0.5699, "lr": 1.4353150096234058e-07, "epoch": 9.693396226415095, "percentage": 96.93, "elapsed_time": "1:12:48", "remaining_time": "0:02:18", "throughput": 2768.75, "total_tokens": 12095208}
|
| 3719 |
+
{"current_steps": 18500, "total_steps": 19080, "loss": 0.519, "lr": 1.410949040772852e-07, "epoch": 9.69601677148847, "percentage": 96.96, "elapsed_time": "1:12:49", "remaining_time": "0:02:16", "throughput": 2768.84, "total_tokens": 12099176}
|
| 3720 |
+
{"current_steps": 18505, "total_steps": 19080, "loss": 0.5599, "lr": 1.3867910798850692e-07, "epoch": 9.698637316561845, "percentage": 96.99, "elapsed_time": "1:12:50", "remaining_time": "0:02:15", "throughput": 2768.89, "total_tokens": 12102408}
|
| 3721 |
+
{"current_steps": 18510, "total_steps": 19080, "loss": 0.552, "lr": 1.3628411471742764e-07, "epoch": 9.70125786163522, "percentage": 97.01, "elapsed_time": "1:12:51", "remaining_time": "0:02:14", "throughput": 2768.93, "total_tokens": 12105704}
|
| 3722 |
+
{"current_steps": 18515, "total_steps": 19080, "loss": 0.5464, "lr": 1.3390992626807485e-07, "epoch": 9.703878406708595, "percentage": 97.04, "elapsed_time": "1:12:52", "remaining_time": "0:02:13", "throughput": 2768.95, "total_tokens": 12108520}
|
| 3723 |
+
{"current_steps": 18520, "total_steps": 19080, "loss": 0.4415, "lr": 1.315565446270567e-07, "epoch": 9.70649895178197, "percentage": 97.06, "elapsed_time": "1:12:54", "remaining_time": "0:02:12", "throughput": 2769.01, "total_tokens": 12111912}
|
| 3724 |
+
{"current_steps": 18525, "total_steps": 19080, "loss": 0.435, "lr": 1.292239717635785e-07, "epoch": 9.709119496855346, "percentage": 97.09, "elapsed_time": "1:12:55", "remaining_time": "0:02:11", "throughput": 2769.08, "total_tokens": 12116040}
|
| 3725 |
+
{"current_steps": 18530, "total_steps": 19080, "loss": 0.4188, "lr": 1.269122096294262e-07, "epoch": 9.71174004192872, "percentage": 97.12, "elapsed_time": "1:12:56", "remaining_time": "0:02:09", "throughput": 2769.12, "total_tokens": 12118632}
|
| 3726 |
+
{"current_steps": 18535, "total_steps": 19080, "loss": 0.3341, "lr": 1.24621260158983e-07, "epoch": 9.714360587002096, "percentage": 97.14, "elapsed_time": "1:12:57", "remaining_time": "0:02:08", "throughput": 2769.13, "total_tokens": 12121192}
|
| 3727 |
+
{"current_steps": 18540, "total_steps": 19080, "loss": 0.4317, "lr": 1.2235112526920723e-07, "epoch": 9.716981132075471, "percentage": 97.17, "elapsed_time": "1:12:58", "remaining_time": "0:02:07", "throughput": 2769.17, "total_tokens": 12123976}
|
| 3728 |
+
{"current_steps": 18545, "total_steps": 19080, "loss": 0.5838, "lr": 1.2010180685964324e-07, "epoch": 9.719601677148846, "percentage": 97.2, "elapsed_time": "1:12:59", "remaining_time": "0:02:06", "throughput": 2769.24, "total_tokens": 12127816}
|
| 3729 |
+
{"current_steps": 18550, "total_steps": 19080, "loss": 0.5292, "lr": 1.1787330681241881e-07, "epoch": 9.722222222222221, "percentage": 97.22, "elapsed_time": "1:13:00", "remaining_time": "0:02:05", "throughput": 2769.34, "total_tokens": 12131848}
|
| 3730 |
+
{"current_steps": 18555, "total_steps": 19080, "loss": 0.4411, "lr": 1.156656269922396e-07, "epoch": 9.724842767295598, "percentage": 97.25, "elapsed_time": "1:13:01", "remaining_time": "0:02:03", "throughput": 2769.4, "total_tokens": 12135432}
|
| 3731 |
+
{"current_steps": 18560, "total_steps": 19080, "loss": 0.5024, "lr": 1.1347876924639455e-07, "epoch": 9.727463312368974, "percentage": 97.27, "elapsed_time": "1:13:02", "remaining_time": "0:02:02", "throughput": 2769.45, "total_tokens": 12138376}
|
| 3732 |
+
{"current_steps": 18565, "total_steps": 19080, "loss": 0.566, "lr": 1.1131273540474496e-07, "epoch": 9.730083857442349, "percentage": 97.3, "elapsed_time": "1:13:03", "remaining_time": "0:02:01", "throughput": 2769.51, "total_tokens": 12141480}
|
| 3733 |
+
{"current_steps": 18570, "total_steps": 19080, "loss": 0.417, "lr": 1.091675272797299e-07, "epoch": 9.732704402515724, "percentage": 97.33, "elapsed_time": "1:13:04", "remaining_time": "0:02:00", "throughput": 2769.55, "total_tokens": 12144168}
|
| 3734 |
+
{"current_steps": 18575, "total_steps": 19080, "loss": 0.5753, "lr": 1.0704314666635795e-07, "epoch": 9.735324947589099, "percentage": 97.35, "elapsed_time": "1:13:06", "remaining_time": "0:01:59", "throughput": 2769.64, "total_tokens": 12148168}
|
| 3735 |
+
{"current_steps": 18580, "total_steps": 19080, "loss": 0.3291, "lr": 1.0493959534221832e-07, "epoch": 9.737945492662474, "percentage": 97.38, "elapsed_time": "1:13:07", "remaining_time": "0:01:58", "throughput": 2769.65, "total_tokens": 12150696}
|
| 3736 |
+
{"current_steps": 18585, "total_steps": 19080, "loss": 0.3677, "lr": 1.0285687506746133e-07, "epoch": 9.74056603773585, "percentage": 97.41, "elapsed_time": "1:13:08", "remaining_time": "0:01:56", "throughput": 2769.69, "total_tokens": 12153928}
|
| 3737 |
+
{"current_steps": 18590, "total_steps": 19080, "loss": 0.4279, "lr": 1.0079498758481798e-07, "epoch": 9.743186582809225, "percentage": 97.43, "elapsed_time": "1:13:09", "remaining_time": "0:01:55", "throughput": 2769.74, "total_tokens": 12157384}
|
| 3738 |
+
{"current_steps": 18595, "total_steps": 19080, "loss": 0.4519, "lr": 9.87539346195776e-08, "epoch": 9.7458071278826, "percentage": 97.46, "elapsed_time": "1:13:10", "remaining_time": "0:01:54", "throughput": 2769.77, "total_tokens": 12160200}
|
| 3739 |
+
{"current_steps": 18600, "total_steps": 19080, "loss": 0.6373, "lr": 9.673371787960183e-08, "epoch": 9.748427672955975, "percentage": 97.48, "elapsed_time": "1:13:11", "remaining_time": "0:01:53", "throughput": 2769.77, "total_tokens": 12162504}
|
| 3740 |
+
{"current_steps": 18605, "total_steps": 19080, "loss": 0.4657, "lr": 9.473433905531626e-08, "epoch": 9.75104821802935, "percentage": 97.51, "elapsed_time": "1:13:12", "remaining_time": "0:01:52", "throughput": 2769.8, "total_tokens": 12165288}
|
| 3741 |
+
{"current_steps": 18610, "total_steps": 19080, "loss": 0.4409, "lr": 9.275579981970483e-08, "epoch": 9.753668763102725, "percentage": 97.54, "elapsed_time": "1:13:13", "remaining_time": "0:01:50", "throughput": 2769.82, "total_tokens": 12167912}
|
| 3742 |
+
{"current_steps": 18615, "total_steps": 19080, "loss": 0.5368, "lr": 9.07981018283266e-08, "epoch": 9.7562893081761, "percentage": 97.56, "elapsed_time": "1:13:14", "remaining_time": "0:01:49", "throughput": 2769.92, "total_tokens": 12171624}
|
| 3743 |
+
{"current_steps": 18620, "total_steps": 19080, "loss": 0.4864, "lr": 8.886124671928786e-08, "epoch": 9.758909853249476, "percentage": 97.59, "elapsed_time": "1:13:15", "remaining_time": "0:01:48", "throughput": 2769.97, "total_tokens": 12174632}
|
| 3744 |
+
{"current_steps": 18625, "total_steps": 19080, "loss": 0.5812, "lr": 8.694523611326444e-08, "epoch": 9.76153039832285, "percentage": 97.62, "elapsed_time": "1:13:16", "remaining_time": "0:01:47", "throughput": 2770.01, "total_tokens": 12177896}
|
| 3745 |
+
{"current_steps": 18630, "total_steps": 19080, "loss": 0.4935, "lr": 8.505007161348222e-08, "epoch": 9.764150943396226, "percentage": 97.64, "elapsed_time": "1:13:17", "remaining_time": "0:01:46", "throughput": 2770.08, "total_tokens": 12181256}
|
| 3746 |
+
{"current_steps": 18635, "total_steps": 19080, "loss": 0.5143, "lr": 8.31757548057338e-08, "epoch": 9.766771488469601, "percentage": 97.67, "elapsed_time": "1:13:18", "remaining_time": "0:01:45", "throughput": 2770.09, "total_tokens": 12183848}
|
| 3747 |
+
{"current_steps": 18640, "total_steps": 19080, "loss": 0.4401, "lr": 8.132228725835634e-08, "epoch": 9.769392033542976, "percentage": 97.69, "elapsed_time": "1:13:19", "remaining_time": "0:01:43", "throughput": 2770.15, "total_tokens": 12187208}
|
| 3748 |
+
{"current_steps": 18645, "total_steps": 19080, "loss": 0.4805, "lr": 7.948967052225087e-08, "epoch": 9.772012578616351, "percentage": 97.72, "elapsed_time": "1:13:20", "remaining_time": "0:01:42", "throughput": 2770.19, "total_tokens": 12190568}
|
| 3749 |
+
{"current_steps": 18650, "total_steps": 19080, "loss": 0.3346, "lr": 7.767790613086301e-08, "epoch": 9.774633123689728, "percentage": 97.75, "elapsed_time": "1:13:21", "remaining_time": "0:01:41", "throughput": 2770.22, "total_tokens": 12193224}
|
| 3750 |
+
{"current_steps": 18655, "total_steps": 19080, "loss": 0.4305, "lr": 7.588699560019952e-08, "epoch": 9.777253668763104, "percentage": 97.77, "elapsed_time": "1:13:22", "remaining_time": "0:01:40", "throughput": 2770.23, "total_tokens": 12195816}
|
| 3751 |
+
{"current_steps": 18660, "total_steps": 19080, "loss": 0.4623, "lr": 7.411694042881168e-08, "epoch": 9.779874213836479, "percentage": 97.8, "elapsed_time": "1:13:23", "remaining_time": "0:01:39", "throughput": 2770.28, "total_tokens": 12198856}
|
| 3752 |
+
{"current_steps": 18665, "total_steps": 19080, "loss": 0.546, "lr": 7.23677420977953e-08, "epoch": 9.782494758909854, "percentage": 97.82, "elapsed_time": "1:13:24", "remaining_time": "0:01:37", "throughput": 2770.33, "total_tokens": 12201992}
|
| 3753 |
+
{"current_steps": 18670, "total_steps": 19080, "loss": 0.4817, "lr": 7.063940207080733e-08, "epoch": 9.785115303983229, "percentage": 97.85, "elapsed_time": "1:13:25", "remaining_time": "0:01:36", "throughput": 2770.42, "total_tokens": 12205608}
|
| 3754 |
+
{"current_steps": 18675, "total_steps": 19080, "loss": 0.5414, "lr": 6.893192179403817e-08, "epoch": 9.787735849056604, "percentage": 97.88, "elapsed_time": "1:13:26", "remaining_time": "0:01:35", "throughput": 2770.5, "total_tokens": 12209352}
|
| 3755 |
+
{"current_steps": 18680, "total_steps": 19080, "loss": 0.4559, "lr": 6.724530269623108e-08, "epoch": 9.79035639412998, "percentage": 97.9, "elapsed_time": "1:13:28", "remaining_time": "0:01:34", "throughput": 2770.6, "total_tokens": 12213768}
|
| 3756 |
+
{"current_steps": 18685, "total_steps": 19080, "loss": 0.4483, "lr": 6.557954618867102e-08, "epoch": 9.792976939203355, "percentage": 97.93, "elapsed_time": "1:13:29", "remaining_time": "0:01:33", "throughput": 2770.64, "total_tokens": 12216776}
|
| 3757 |
+
{"current_steps": 18690, "total_steps": 19080, "loss": 0.3922, "lr": 6.393465366519024e-08, "epoch": 9.79559748427673, "percentage": 97.96, "elapsed_time": "1:13:30", "remaining_time": "0:01:32", "throughput": 2770.72, "total_tokens": 12221000}
|
| 3758 |
+
{"current_steps": 18695, "total_steps": 19080, "loss": 0.4646, "lr": 6.231062650215724e-08, "epoch": 9.798218029350105, "percentage": 97.98, "elapsed_time": "1:13:31", "remaining_time": "0:01:30", "throughput": 2770.77, "total_tokens": 12224200}
|
| 3759 |
+
{"current_steps": 18700, "total_steps": 19080, "loss": 0.4701, "lr": 6.070746605848221e-08, "epoch": 9.80083857442348, "percentage": 98.01, "elapsed_time": "1:13:32", "remaining_time": "0:01:29", "throughput": 2770.79, "total_tokens": 12226920}
|
| 3760 |
+
{"current_steps": 18705, "total_steps": 19080, "loss": 0.5069, "lr": 5.912517367561987e-08, "epoch": 9.803459119496855, "percentage": 98.03, "elapsed_time": "1:13:33", "remaining_time": "0:01:28", "throughput": 2770.84, "total_tokens": 12229960}
|
| 3761 |
+
{"current_steps": 18710, "total_steps": 19080, "loss": 0.4213, "lr": 5.756375067755837e-08, "epoch": 9.80607966457023, "percentage": 98.06, "elapsed_time": "1:13:34", "remaining_time": "0:01:27", "throughput": 2770.81, "total_tokens": 12232040}
|
| 3762 |
+
{"current_steps": 18715, "total_steps": 19080, "loss": 0.397, "lr": 5.602319837082481e-08, "epoch": 9.808700209643606, "percentage": 98.09, "elapsed_time": "1:13:35", "remaining_time": "0:01:26", "throughput": 2770.8, "total_tokens": 12234472}
|
| 3763 |
+
{"current_steps": 18720, "total_steps": 19080, "loss": 0.351, "lr": 5.450351804448528e-08, "epoch": 9.81132075471698, "percentage": 98.11, "elapsed_time": "1:13:36", "remaining_time": "0:01:24", "throughput": 2770.82, "total_tokens": 12237448}
|
| 3764 |
+
{"current_steps": 18725, "total_steps": 19080, "loss": 0.4553, "lr": 5.3004710970133705e-08, "epoch": 9.813941299790356, "percentage": 98.14, "elapsed_time": "1:13:37", "remaining_time": "0:01:23", "throughput": 2770.87, "total_tokens": 12240264}
|
| 3765 |
+
{"current_steps": 18730, "total_steps": 19080, "loss": 0.3996, "lr": 5.1526778401911334e-08, "epoch": 9.816561844863731, "percentage": 98.17, "elapsed_time": "1:13:38", "remaining_time": "0:01:22", "throughput": 2770.89, "total_tokens": 12243176}
|
| 3766 |
+
{"current_steps": 18735, "total_steps": 19080, "loss": 0.4716, "lr": 5.0069721576476156e-08, "epoch": 9.819182389937106, "percentage": 98.19, "elapsed_time": "1:13:39", "remaining_time": "0:01:21", "throughput": 2770.89, "total_tokens": 12245864}
|
| 3767 |
+
{"current_steps": 18740, "total_steps": 19080, "loss": 0.4376, "lr": 4.863354171303347e-08, "epoch": 9.821802935010481, "percentage": 98.22, "elapsed_time": "1:13:40", "remaining_time": "0:01:20", "throughput": 2770.92, "total_tokens": 12248712}
|
| 3768 |
+
{"current_steps": 18745, "total_steps": 19080, "loss": 0.5239, "lr": 4.72182400133081e-08, "epoch": 9.824423480083858, "percentage": 98.24, "elapsed_time": "1:13:41", "remaining_time": "0:01:19", "throughput": 2770.93, "total_tokens": 12251144}
|
| 3769 |
+
{"current_steps": 18750, "total_steps": 19080, "loss": 0.4595, "lr": 4.582381766156385e-08, "epoch": 9.827044025157234, "percentage": 98.27, "elapsed_time": "1:13:42", "remaining_time": "0:01:17", "throughput": 2771.01, "total_tokens": 12255336}
|
| 3770 |
+
{"current_steps": 18755, "total_steps": 19080, "loss": 0.5102, "lr": 4.445027582458683e-08, "epoch": 9.829664570230609, "percentage": 98.3, "elapsed_time": "1:13:43", "remaining_time": "0:01:16", "throughput": 2771.03, "total_tokens": 12257672}
|
| 3771 |
+
{"current_steps": 18760, "total_steps": 19080, "loss": 0.4621, "lr": 4.309761565169379e-08, "epoch": 9.832285115303984, "percentage": 98.32, "elapsed_time": "1:13:44", "remaining_time": "0:01:15", "throughput": 2771.11, "total_tokens": 12261032}
|
| 3772 |
+
{"current_steps": 18765, "total_steps": 19080, "loss": 0.4415, "lr": 4.1765838274732125e-08, "epoch": 9.834905660377359, "percentage": 98.35, "elapsed_time": "1:13:45", "remaining_time": "0:01:14", "throughput": 2771.19, "total_tokens": 12264488}
|
| 3773 |
+
{"current_steps": 18770, "total_steps": 19080, "loss": 0.4718, "lr": 4.045494480807155e-08, "epoch": 9.837526205450734, "percentage": 98.38, "elapsed_time": "1:13:46", "remaining_time": "0:01:13", "throughput": 2771.24, "total_tokens": 12267432}
|
| 3774 |
+
{"current_steps": 18775, "total_steps": 19080, "loss": 0.3475, "lr": 3.916493634860407e-08, "epoch": 9.84014675052411, "percentage": 98.4, "elapsed_time": "1:13:47", "remaining_time": "0:01:11", "throughput": 2771.3, "total_tokens": 12270888}
|
| 3775 |
+
{"current_steps": 18780, "total_steps": 19080, "loss": 0.4467, "lr": 3.789581397575515e-08, "epoch": 9.842767295597485, "percentage": 98.43, "elapsed_time": "1:13:48", "remaining_time": "0:01:10", "throughput": 2771.32, "total_tokens": 12273896}
|
| 3776 |
+
{"current_steps": 18785, "total_steps": 19080, "loss": 0.5189, "lr": 3.664757875146418e-08, "epoch": 9.84538784067086, "percentage": 98.45, "elapsed_time": "1:13:49", "remaining_time": "0:01:09", "throughput": 2771.33, "total_tokens": 12276328}
|
| 3777 |
+
{"current_steps": 18790, "total_steps": 19080, "loss": 0.408, "lr": 3.5420231720198485e-08, "epoch": 9.848008385744235, "percentage": 98.48, "elapsed_time": "1:13:50", "remaining_time": "0:01:08", "throughput": 2771.2, "total_tokens": 12279016}
|
| 3778 |
+
{"current_steps": 18795, "total_steps": 19080, "loss": 0.3635, "lr": 3.421377390894764e-08, "epoch": 9.85062893081761, "percentage": 98.51, "elapsed_time": "1:13:51", "remaining_time": "0:01:07", "throughput": 2771.2, "total_tokens": 12281512}
|
| 3779 |
+
{"current_steps": 18800, "total_steps": 19080, "loss": 0.5342, "lr": 3.3028206327218035e-08, "epoch": 9.853249475890985, "percentage": 98.53, "elapsed_time": "1:13:53", "remaining_time": "0:01:06", "throughput": 2771.26, "total_tokens": 12285160}
|
| 3780 |
+
{"current_steps": 18805, "total_steps": 19080, "loss": 0.4698, "lr": 3.1863529967041117e-08, "epoch": 9.85587002096436, "percentage": 98.56, "elapsed_time": "1:13:54", "remaining_time": "0:01:04", "throughput": 2771.33, "total_tokens": 12288616}
|
| 3781 |
+
{"current_steps": 18810, "total_steps": 19080, "loss": 0.5412, "lr": 3.071974580296233e-08, "epoch": 9.858490566037736, "percentage": 98.58, "elapsed_time": "1:13:55", "remaining_time": "0:01:03", "throughput": 2771.4, "total_tokens": 12292680}
|
| 3782 |
+
{"current_steps": 18815, "total_steps": 19080, "loss": 0.412, "lr": 2.9596854792052207e-08, "epoch": 9.86111111111111, "percentage": 98.61, "elapsed_time": "1:13:56", "remaining_time": "0:01:02", "throughput": 2771.53, "total_tokens": 12297160}
|
| 3783 |
+
{"current_steps": 18820, "total_steps": 19080, "loss": 0.6077, "lr": 2.8494857873889724e-08, "epoch": 9.863731656184486, "percentage": 98.64, "elapsed_time": "1:13:58", "remaining_time": "0:01:01", "throughput": 2771.57, "total_tokens": 12300520}
|
| 3784 |
+
{"current_steps": 18825, "total_steps": 19080, "loss": 0.4706, "lr": 2.741375597057616e-08, "epoch": 9.866352201257861, "percentage": 98.66, "elapsed_time": "1:13:59", "remaining_time": "0:01:00", "throughput": 2771.62, "total_tokens": 12303496}
|
| 3785 |
+
{"current_steps": 18830, "total_steps": 19080, "loss": 0.5319, "lr": 2.6353549986729566e-08, "epoch": 9.868972746331236, "percentage": 98.69, "elapsed_time": "1:14:00", "remaining_time": "0:00:58", "throughput": 2771.67, "total_tokens": 12306568}
|
| 3786 |
+
{"current_steps": 18835, "total_steps": 19080, "loss": 0.5532, "lr": 2.531424080948197e-08, "epoch": 9.871593291404611, "percentage": 98.72, "elapsed_time": "1:14:01", "remaining_time": "0:00:57", "throughput": 2771.67, "total_tokens": 12309160}
|
| 3787 |
+
{"current_steps": 18840, "total_steps": 19080, "loss": 0.4161, "lr": 2.4295829308482176e-08, "epoch": 9.874213836477988, "percentage": 98.74, "elapsed_time": "1:14:02", "remaining_time": "0:00:56", "throughput": 2771.75, "total_tokens": 12312776}
|
| 3788 |
+
{"current_steps": 18845, "total_steps": 19080, "loss": 0.3986, "lr": 2.329831633588464e-08, "epoch": 9.876834381551364, "percentage": 98.77, "elapsed_time": "1:14:03", "remaining_time": "0:00:55", "throughput": 2771.8, "total_tokens": 12316104}
|
| 3789 |
+
{"current_steps": 18850, "total_steps": 19080, "loss": 0.5599, "lr": 2.232170272636891e-08, "epoch": 9.879454926624739, "percentage": 98.79, "elapsed_time": "1:14:04", "remaining_time": "0:00:54", "throughput": 2771.8, "total_tokens": 12318568}
|
| 3790 |
+
{"current_steps": 18855, "total_steps": 19080, "loss": 0.5172, "lr": 2.136598929711464e-08, "epoch": 9.882075471698114, "percentage": 98.82, "elapsed_time": "1:14:05", "remaining_time": "0:00:53", "throughput": 2771.85, "total_tokens": 12321736}
|
| 3791 |
+
{"current_steps": 18860, "total_steps": 19080, "loss": 0.492, "lr": 2.0431176847823807e-08, "epoch": 9.884696016771489, "percentage": 98.85, "elapsed_time": "1:14:06", "remaining_time": "0:00:51", "throughput": 2771.91, "total_tokens": 12325032}
|
| 3792 |
+
{"current_steps": 18865, "total_steps": 19080, "loss": 0.5057, "lr": 1.9517266160704038e-08, "epoch": 9.887316561844864, "percentage": 98.87, "elapsed_time": "1:14:07", "remaining_time": "0:00:50", "throughput": 2772.0, "total_tokens": 12329160}
|
| 3793 |
+
{"current_steps": 18870, "total_steps": 19080, "loss": 0.6807, "lr": 1.8624258000471405e-08, "epoch": 9.88993710691824, "percentage": 98.9, "elapsed_time": "1:14:08", "remaining_time": "0:00:49", "throughput": 2772.05, "total_tokens": 12332392}
|
| 3794 |
+
{"current_steps": 18875, "total_steps": 19080, "loss": 0.4312, "lr": 1.7752153114358737e-08, "epoch": 9.892557651991615, "percentage": 98.93, "elapsed_time": "1:14:10", "remaining_time": "0:00:48", "throughput": 2772.13, "total_tokens": 12336456}
|
| 3795 |
+
{"current_steps": 18880, "total_steps": 19080, "loss": 0.4153, "lr": 1.6900952232098977e-08, "epoch": 9.89517819706499, "percentage": 98.95, "elapsed_time": "1:14:11", "remaining_time": "0:00:47", "throughput": 2772.14, "total_tokens": 12339080}
|
| 3796 |
+
{"current_steps": 18885, "total_steps": 19080, "loss": 0.3972, "lr": 1.6070656065939048e-08, "epoch": 9.897798742138365, "percentage": 98.98, "elapsed_time": "1:14:12", "remaining_time": "0:00:45", "throughput": 2772.15, "total_tokens": 12341768}
|
| 3797 |
+
{"current_steps": 18890, "total_steps": 19080, "loss": 0.4125, "lr": 1.526126531063432e-08, "epoch": 9.90041928721174, "percentage": 99.0, "elapsed_time": "1:14:13", "remaining_time": "0:00:44", "throughput": 2772.19, "total_tokens": 12344936}
|
| 3798 |
+
{"current_steps": 18895, "total_steps": 19080, "loss": 0.4603, "lr": 1.4472780643445817e-08, "epoch": 9.903039832285115, "percentage": 99.03, "elapsed_time": "1:14:14", "remaining_time": "0:00:43", "throughput": 2772.32, "total_tokens": 12349416}
|
| 3799 |
+
{"current_steps": 18900, "total_steps": 19080, "loss": 0.5117, "lr": 1.3705202724142996e-08, "epoch": 9.90566037735849, "percentage": 99.06, "elapsed_time": "1:14:15", "remaining_time": "0:00:42", "throughput": 2772.35, "total_tokens": 12352360}
|
| 3800 |
+
{"current_steps": 18905, "total_steps": 19080, "loss": 0.5271, "lr": 1.2958532194995432e-08, "epoch": 9.908280922431866, "percentage": 99.08, "elapsed_time": "1:14:16", "remaining_time": "0:00:41", "throughput": 2772.41, "total_tokens": 12355688}
|
| 3801 |
+
{"current_steps": 18910, "total_steps": 19080, "loss": 0.4518, "lr": 1.2232769680789457e-08, "epoch": 9.91090146750524, "percentage": 99.11, "elapsed_time": "1:14:17", "remaining_time": "0:00:40", "throughput": 2772.49, "total_tokens": 12359560}
|
| 3802 |
+
{"current_steps": 18915, "total_steps": 19080, "loss": 0.5226, "lr": 1.152791578880319e-08, "epoch": 9.913522012578616, "percentage": 99.14, "elapsed_time": "1:14:19", "remaining_time": "0:00:38", "throughput": 2772.58, "total_tokens": 12363656}
|
| 3803 |
+
{"current_steps": 18920, "total_steps": 19080, "loss": 0.3914, "lr": 1.0843971108828732e-08, "epoch": 9.916142557651991, "percentage": 99.16, "elapsed_time": "1:14:20", "remaining_time": "0:00:37", "throughput": 2772.68, "total_tokens": 12367688}
|
| 3804 |
+
{"current_steps": 18925, "total_steps": 19080, "loss": 0.469, "lr": 1.018093621316385e-08, "epoch": 9.918763102725366, "percentage": 99.19, "elapsed_time": "1:14:21", "remaining_time": "0:00:36", "throughput": 2772.76, "total_tokens": 12371400}
|
| 3805 |
+
{"current_steps": 18930, "total_steps": 19080, "loss": 0.4517, "lr": 9.53881165659809e-09, "epoch": 9.921383647798741, "percentage": 99.21, "elapsed_time": "1:14:22", "remaining_time": "0:00:35", "throughput": 2772.79, "total_tokens": 12374152}
|
| 3806 |
+
{"current_steps": 18935, "total_steps": 19080, "loss": 0.3564, "lr": 8.91759797644054e-09, "epoch": 9.924004192872118, "percentage": 99.24, "elapsed_time": "1:14:24", "remaining_time": "0:00:34", "throughput": 2772.9, "total_tokens": 12378472}
|
| 3807 |
+
{"current_steps": 18940, "total_steps": 19080, "loss": 0.5057, "lr": 8.317295692486516e-09, "epoch": 9.926624737945493, "percentage": 99.27, "elapsed_time": "1:14:25", "remaining_time": "0:00:33", "throughput": 2772.92, "total_tokens": 12381480}
|
| 3808 |
+
{"current_steps": 18945, "total_steps": 19080, "loss": 0.4257, "lr": 7.737905307045323e-09, "epoch": 9.929245283018869, "percentage": 99.29, "elapsed_time": "1:14:26", "remaining_time": "0:00:31", "throughput": 2772.97, "total_tokens": 12384648}
|
| 3809 |
+
{"current_steps": 18950, "total_steps": 19080, "loss": 0.3688, "lr": 7.179427304926378e-09, "epoch": 9.931865828092244, "percentage": 99.32, "elapsed_time": "1:14:27", "remaining_time": "0:00:30", "throughput": 2772.99, "total_tokens": 12387432}
|
| 3810 |
+
{"current_steps": 18955, "total_steps": 19080, "loss": 0.4423, "lr": 6.641862153433653e-09, "epoch": 9.934486373165619, "percentage": 99.34, "elapsed_time": "1:14:28", "remaining_time": "0:00:29", "throughput": 2773.03, "total_tokens": 12390984}
|
| 3811 |
+
{"current_steps": 18960, "total_steps": 19080, "loss": 0.3978, "lr": 6.125210302382333e-09, "epoch": 9.937106918238994, "percentage": 99.37, "elapsed_time": "1:14:29", "remaining_time": "0:00:28", "throughput": 2773.09, "total_tokens": 12394760}
|
| 3812 |
+
{"current_steps": 18965, "total_steps": 19080, "loss": 0.5307, "lr": 5.629472184079387e-09, "epoch": 9.93972746331237, "percentage": 99.4, "elapsed_time": "1:14:30", "remaining_time": "0:00:27", "throughput": 2773.16, "total_tokens": 12397768}
|
| 3813 |
+
{"current_steps": 18970, "total_steps": 19080, "loss": 0.3971, "lr": 5.154648213334668e-09, "epoch": 9.942348008385745, "percentage": 99.42, "elapsed_time": "1:14:31", "remaining_time": "0:00:25", "throughput": 2773.21, "total_tokens": 12400968}
|
| 3814 |
+
{"current_steps": 18975, "total_steps": 19080, "loss": 0.5256, "lr": 4.700738787466463e-09, "epoch": 9.94496855345912, "percentage": 99.45, "elapsed_time": "1:14:33", "remaining_time": "0:00:24", "throughput": 2773.32, "total_tokens": 12406664}
|
| 3815 |
+
{"current_steps": 18980, "total_steps": 19080, "loss": 0.446, "lr": 4.26774428627652e-09, "epoch": 9.947589098532495, "percentage": 99.48, "elapsed_time": "1:14:34", "remaining_time": "0:00:23", "throughput": 2773.34, "total_tokens": 12409448}
|
| 3816 |
+
{"current_steps": 18985, "total_steps": 19080, "loss": 0.3501, "lr": 3.855665072080572e-09, "epoch": 9.95020964360587, "percentage": 99.5, "elapsed_time": "1:14:35", "remaining_time": "0:00:22", "throughput": 2773.39, "total_tokens": 12412744}
|
| 3817 |
+
{"current_steps": 18990, "total_steps": 19080, "loss": 0.6239, "lr": 3.464501489683358e-09, "epoch": 9.952830188679245, "percentage": 99.53, "elapsed_time": "1:14:36", "remaining_time": "0:00:21", "throughput": 2773.42, "total_tokens": 12415656}
|
| 3818 |
+
{"current_steps": 18995, "total_steps": 19080, "loss": 0.5632, "lr": 3.094253866398056e-09, "epoch": 9.95545073375262, "percentage": 99.55, "elapsed_time": "1:14:37", "remaining_time": "0:00:20", "throughput": 2773.44, "total_tokens": 12418472}
|
| 3819 |
+
{"current_steps": 19000, "total_steps": 19080, "loss": 0.4081, "lr": 2.7449225120268484e-09, "epoch": 9.958071278825996, "percentage": 99.58, "elapsed_time": "1:14:38", "remaining_time": "0:00:18", "throughput": 2773.49, "total_tokens": 12421768}
|
| 3820 |
+
{"current_steps": 19005, "total_steps": 19080, "loss": 0.445, "lr": 2.416507718877581e-09, "epoch": 9.96069182389937, "percentage": 99.61, "elapsed_time": "1:14:40", "remaining_time": "0:00:17", "throughput": 2773.59, "total_tokens": 12425800}
|
| 3821 |
+
{"current_steps": 19010, "total_steps": 19080, "loss": 0.5609, "lr": 2.109009761747105e-09, "epoch": 9.963312368972746, "percentage": 99.63, "elapsed_time": "1:14:41", "remaining_time": "0:00:16", "throughput": 2773.65, "total_tokens": 12429288}
|
| 3822 |
+
{"current_steps": 19015, "total_steps": 19080, "loss": 0.3956, "lr": 1.8224288979434844e-09, "epoch": 9.965932914046121, "percentage": 99.66, "elapsed_time": "1:14:42", "remaining_time": "0:00:15", "throughput": 2773.74, "total_tokens": 12433160}
|
| 3823 |
+
{"current_steps": 19020, "total_steps": 19080, "loss": 0.4545, "lr": 1.5567653672554638e-09, "epoch": 9.968553459119496, "percentage": 99.69, "elapsed_time": "1:14:43", "remaining_time": "0:00:14", "throughput": 2773.75, "total_tokens": 12435944}
|
| 3824 |
+
{"current_steps": 19025, "total_steps": 19080, "loss": 0.4001, "lr": 1.3120193919857748e-09, "epoch": 9.971174004192871, "percentage": 99.71, "elapsed_time": "1:14:44", "remaining_time": "0:00:12", "throughput": 2773.74, "total_tokens": 12438216}
|
| 3825 |
+
{"current_steps": 19030, "total_steps": 19080, "loss": 0.4123, "lr": 1.0881911769261565e-09, "epoch": 9.973794549266248, "percentage": 99.74, "elapsed_time": "1:14:45", "remaining_time": "0:00:11", "throughput": 2773.77, "total_tokens": 12440904}
|
| 3826 |
+
{"current_steps": 19035, "total_steps": 19080, "loss": 0.5261, "lr": 8.852809093601311e-10, "epoch": 9.976415094339622, "percentage": 99.76, "elapsed_time": "1:14:46", "remaining_time": "0:00:10", "throughput": 2773.8, "total_tokens": 12443752}
|
| 3827 |
+
{"current_steps": 19040, "total_steps": 19080, "loss": 0.6735, "lr": 7.03288759076881e-10, "epoch": 9.979035639412999, "percentage": 99.79, "elapsed_time": "1:14:47", "remaining_time": "0:00:09", "throughput": 2773.8, "total_tokens": 12446152}
|
| 3828 |
+
{"current_steps": 19045, "total_steps": 19080, "loss": 0.461, "lr": 5.422148783629233e-10, "epoch": 9.981656184486374, "percentage": 99.82, "elapsed_time": "1:14:48", "remaining_time": "0:00:08", "throughput": 2773.86, "total_tokens": 12449160}
|
| 3829 |
+
{"current_steps": 19050, "total_steps": 19080, "loss": 0.4993, "lr": 4.0205940199100623e-10, "epoch": 9.984276729559749, "percentage": 99.84, "elapsed_time": "1:14:49", "remaining_time": "0:00:07", "throughput": 2773.96, "total_tokens": 12453064}
|
| 3830 |
+
{"current_steps": 19055, "total_steps": 19080, "loss": 0.458, "lr": 2.828224472395391e-10, "epoch": 9.986897274633124, "percentage": 99.87, "elapsed_time": "1:14:50", "remaining_time": "0:00:05", "throughput": 2773.98, "total_tokens": 12455944}
|
| 3831 |
+
{"current_steps": 19060, "total_steps": 19080, "loss": 0.5059, "lr": 1.8450411388426515e-10, "epoch": 9.9895178197065, "percentage": 99.9, "elapsed_time": "1:14:51", "remaining_time": "0:00:04", "throughput": 2774.04, "total_tokens": 12459528}
|
| 3832 |
+
{"current_steps": 19065, "total_steps": 19080, "loss": 0.4861, "lr": 1.0710448418715935e-10, "epoch": 9.992138364779874, "percentage": 99.92, "elapsed_time": "1:14:52", "remaining_time": "0:00:03", "throughput": 2774.11, "total_tokens": 12463048}
|
| 3833 |
+
{"current_steps": 19070, "total_steps": 19080, "loss": 0.4444, "lr": 5.062362291585743e-11, "epoch": 9.99475890985325, "percentage": 99.95, "elapsed_time": "1:14:53", "remaining_time": "0:00:02", "throughput": 2774.22, "total_tokens": 12467240}
|
| 3834 |
+
{"current_steps": 19075, "total_steps": 19080, "loss": 0.6473, "lr": 1.5061577329777976e-11, "epoch": 9.997379454926625, "percentage": 99.97, "elapsed_time": "1:14:54", "remaining_time": "0:00:01", "throughput": 2774.26, "total_tokens": 12470216}
|
| 3835 |
+
{"current_steps": 19080, "total_steps": 19080, "loss": 0.6659, "lr": 4.183771884491705e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:14:56", "remaining_time": "0:00:00", "throughput": 2774.2, "total_tokens": 12472912}
|
| 3836 |
+
{"current_steps": 19080, "total_steps": 19080, "eval_loss": 0.48326343297958374, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:15:10", "remaining_time": "0:00:00", "throughput": 2765.27, "total_tokens": 12472912}
|
| 3837 |
+
{"current_steps": 19080, "total_steps": 19080, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:15:11", "remaining_time": "0:00:00", "throughput": 2764.75, "total_tokens": 12472912}
|