Training in progress, step 25870
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +255 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8388736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a3b069c66e9bd0218521feb3a4332cec309499b4d1ff7ef3b5b75685715f0a1
|
| 3 |
size 8388736
|
trainer_log.jsonl
CHANGED
|
@@ -4937,3 +4937,258 @@
|
|
| 4937 |
{"current_steps": 24590, "total_steps": 25870, "loss": 0.1489, "lr": 3.725191724845922e-07, "epoch": 9.505218399690762, "percentage": 95.05, "elapsed_time": "0:37:57", "remaining_time": "0:01:58", "throughput": 1637.93, "total_tokens": 3730640}
|
| 4938 |
{"current_steps": 24595, "total_steps": 25870, "loss": 0.1625, "lr": 3.6962398686913315e-07, "epoch": 9.507151140316969, "percentage": 95.07, "elapsed_time": "0:37:58", "remaining_time": "0:01:58", "throughput": 1637.98, "total_tokens": 3731456}
|
| 4939 |
{"current_steps": 24600, "total_steps": 25870, "loss": 0.1085, "lr": 3.667400119581221e-07, "epoch": 9.509083880943177, "percentage": 95.09, "elapsed_time": "0:37:58", "remaining_time": "0:01:57", "throughput": 1637.98, "total_tokens": 3732160}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4937 |
{"current_steps": 24590, "total_steps": 25870, "loss": 0.1489, "lr": 3.725191724845922e-07, "epoch": 9.505218399690762, "percentage": 95.05, "elapsed_time": "0:37:57", "remaining_time": "0:01:58", "throughput": 1637.93, "total_tokens": 3730640}
|
| 4938 |
{"current_steps": 24595, "total_steps": 25870, "loss": 0.1625, "lr": 3.6962398686913315e-07, "epoch": 9.507151140316969, "percentage": 95.07, "elapsed_time": "0:37:58", "remaining_time": "0:01:58", "throughput": 1637.98, "total_tokens": 3731456}
|
| 4939 |
{"current_steps": 24600, "total_steps": 25870, "loss": 0.1085, "lr": 3.667400119581221e-07, "epoch": 9.509083880943177, "percentage": 95.09, "elapsed_time": "0:37:58", "remaining_time": "0:01:57", "throughput": 1637.98, "total_tokens": 3732160}
|
| 4940 |
+
{"current_steps": 24605, "total_steps": 25870, "loss": 0.076, "lr": 3.638672490642203e-07, "epoch": 9.511016621569386, "percentage": 95.11, "elapsed_time": "0:37:58", "remaining_time": "0:01:57", "throughput": 1637.97, "total_tokens": 3732848}
|
| 4941 |
+
{"current_steps": 24610, "total_steps": 25870, "loss": 0.1067, "lr": 3.6100569949499006e-07, "epoch": 9.512949362195593, "percentage": 95.13, "elapsed_time": "0:37:59", "remaining_time": "0:01:56", "throughput": 1638.0, "total_tokens": 3733600}
|
| 4942 |
+
{"current_steps": 24615, "total_steps": 25870, "loss": 0.0465, "lr": 3.58155364552884e-07, "epoch": 9.514882102821801, "percentage": 95.15, "elapsed_time": "0:37:59", "remaining_time": "0:01:56", "throughput": 1638.04, "total_tokens": 3734368}
|
| 4943 |
+
{"current_steps": 24620, "total_steps": 25870, "loss": 0.446, "lr": 3.5531624553525877e-07, "epoch": 9.51681484344801, "percentage": 95.17, "elapsed_time": "0:38:00", "remaining_time": "0:01:55", "throughput": 1638.06, "total_tokens": 3735088}
|
| 4944 |
+
{"current_steps": 24625, "total_steps": 25870, "loss": 0.1867, "lr": 3.524883437343557e-07, "epoch": 9.518747584074218, "percentage": 95.19, "elapsed_time": "0:38:00", "remaining_time": "0:01:55", "throughput": 1638.11, "total_tokens": 3735888}
|
| 4945 |
+
{"current_steps": 24630, "total_steps": 25870, "loss": 0.1392, "lr": 3.496716604373201e-07, "epoch": 9.520680324700425, "percentage": 95.21, "elapsed_time": "0:38:01", "remaining_time": "0:01:54", "throughput": 1638.18, "total_tokens": 3736720}
|
| 4946 |
+
{"current_steps": 24635, "total_steps": 25870, "loss": 0.0616, "lr": 3.4686619692618495e-07, "epoch": 9.522613065326633, "percentage": 95.23, "elapsed_time": "0:38:01", "remaining_time": "0:01:54", "throughput": 1638.19, "total_tokens": 3737440}
|
| 4947 |
+
{"current_steps": 24640, "total_steps": 25870, "loss": 0.2283, "lr": 3.440719544778814e-07, "epoch": 9.52454580595284, "percentage": 95.25, "elapsed_time": "0:38:01", "remaining_time": "0:01:53", "throughput": 1638.26, "total_tokens": 3738272}
|
| 4948 |
+
{"current_steps": 24645, "total_steps": 25870, "loss": 0.0939, "lr": 3.412889343642256e-07, "epoch": 9.52647854657905, "percentage": 95.26, "elapsed_time": "0:38:02", "remaining_time": "0:01:53", "throughput": 1638.27, "total_tokens": 3738976}
|
| 4949 |
+
{"current_steps": 24650, "total_steps": 25870, "loss": 0.0434, "lr": 3.385171378519347e-07, "epoch": 9.528411287205257, "percentage": 95.28, "elapsed_time": "0:38:02", "remaining_time": "0:01:52", "throughput": 1638.32, "total_tokens": 3739760}
|
| 4950 |
+
{"current_steps": 24655, "total_steps": 25870, "loss": 0.2067, "lr": 3.357565662026108e-07, "epoch": 9.530344027831465, "percentage": 95.3, "elapsed_time": "0:38:03", "remaining_time": "0:01:52", "throughput": 1638.36, "total_tokens": 3740544}
|
| 4951 |
+
{"current_steps": 24660, "total_steps": 25870, "loss": 0.0489, "lr": 3.330072206727514e-07, "epoch": 9.532276768457674, "percentage": 95.32, "elapsed_time": "0:38:03", "remaining_time": "0:01:52", "throughput": 1638.42, "total_tokens": 3741344}
|
| 4952 |
+
{"current_steps": 24665, "total_steps": 25870, "loss": 0.0889, "lr": 3.302691025137361e-07, "epoch": 9.534209509083881, "percentage": 95.34, "elapsed_time": "0:38:03", "remaining_time": "0:01:51", "throughput": 1638.45, "total_tokens": 3742096}
|
| 4953 |
+
{"current_steps": 24670, "total_steps": 25870, "loss": 0.161, "lr": 3.275422129718486e-07, "epoch": 9.536142249710089, "percentage": 95.36, "elapsed_time": "0:38:04", "remaining_time": "0:01:51", "throughput": 1638.47, "total_tokens": 3742848}
|
| 4954 |
+
{"current_steps": 24675, "total_steps": 25870, "loss": 0.1842, "lr": 3.2482655328825163e-07, "epoch": 9.538074990336296, "percentage": 95.38, "elapsed_time": "0:38:04", "remaining_time": "0:01:50", "throughput": 1638.46, "total_tokens": 3743504}
|
| 4955 |
+
{"current_steps": 24680, "total_steps": 25870, "loss": 0.2067, "lr": 3.221221246989925e-07, "epoch": 9.540007730962504, "percentage": 95.4, "elapsed_time": "0:38:05", "remaining_time": "0:01:50", "throughput": 1638.45, "total_tokens": 3744160}
|
| 4956 |
+
{"current_steps": 24685, "total_steps": 25870, "loss": 0.2803, "lr": 3.1942892843501995e-07, "epoch": 9.541940471588713, "percentage": 95.42, "elapsed_time": "0:38:05", "remaining_time": "0:01:49", "throughput": 1638.47, "total_tokens": 3744880}
|
| 4957 |
+
{"current_steps": 24690, "total_steps": 25870, "loss": 0.1036, "lr": 3.167469657221589e-07, "epoch": 9.54387321221492, "percentage": 95.44, "elapsed_time": "0:38:06", "remaining_time": "0:01:49", "throughput": 1638.48, "total_tokens": 3745584}
|
| 4958 |
+
{"current_steps": 24695, "total_steps": 25870, "loss": 0.1186, "lr": 3.140762377811246e-07, "epoch": 9.545805952841128, "percentage": 95.46, "elapsed_time": "0:38:06", "remaining_time": "0:01:48", "throughput": 1638.57, "total_tokens": 3746480}
|
| 4959 |
+
{"current_steps": 24700, "total_steps": 25870, "loss": 0.2084, "lr": 3.1141674582752246e-07, "epoch": 9.547738693467338, "percentage": 95.48, "elapsed_time": "0:38:06", "remaining_time": "0:01:48", "throughput": 1638.62, "total_tokens": 3747264}
|
| 4960 |
+
{"current_steps": 24705, "total_steps": 25870, "loss": 0.1497, "lr": 3.087684910718397e-07, "epoch": 9.549671434093545, "percentage": 95.5, "elapsed_time": "0:38:07", "remaining_time": "0:01:47", "throughput": 1638.63, "total_tokens": 3747984}
|
| 4961 |
+
{"current_steps": 24710, "total_steps": 25870, "loss": 0.3382, "lr": 3.0613147471944824e-07, "epoch": 9.551604174719753, "percentage": 95.52, "elapsed_time": "0:38:07", "remaining_time": "0:01:47", "throughput": 1638.64, "total_tokens": 3748688}
|
| 4962 |
+
{"current_steps": 24715, "total_steps": 25870, "loss": 0.0371, "lr": 3.0350569797060744e-07, "epoch": 9.55353691534596, "percentage": 95.54, "elapsed_time": "0:38:08", "remaining_time": "0:01:46", "throughput": 1638.62, "total_tokens": 3749328}
|
| 4963 |
+
{"current_steps": 24720, "total_steps": 25870, "loss": 0.0999, "lr": 3.008911620204613e-07, "epoch": 9.555469655972168, "percentage": 95.55, "elapsed_time": "0:38:08", "remaining_time": "0:01:46", "throughput": 1638.6, "total_tokens": 3749968}
|
| 4964 |
+
{"current_steps": 24725, "total_steps": 25870, "loss": 0.0379, "lr": 2.982878680590301e-07, "epoch": 9.557402396598377, "percentage": 95.57, "elapsed_time": "0:38:08", "remaining_time": "0:01:45", "throughput": 1638.61, "total_tokens": 3750656}
|
| 4965 |
+
{"current_steps": 24730, "total_steps": 25870, "loss": 0.101, "lr": 2.9569581727122985e-07, "epoch": 9.559335137224584, "percentage": 95.59, "elapsed_time": "0:38:09", "remaining_time": "0:01:45", "throughput": 1638.66, "total_tokens": 3751456}
|
| 4966 |
+
{"current_steps": 24735, "total_steps": 25870, "loss": 0.1639, "lr": 2.931150108368502e-07, "epoch": 9.561267877850792, "percentage": 95.61, "elapsed_time": "0:38:09", "remaining_time": "0:01:45", "throughput": 1638.68, "total_tokens": 3752192}
|
| 4967 |
+
{"current_steps": 24740, "total_steps": 25870, "loss": 0.1603, "lr": 2.9054544993056265e-07, "epoch": 9.563200618477001, "percentage": 95.63, "elapsed_time": "0:38:10", "remaining_time": "0:01:44", "throughput": 1638.69, "total_tokens": 3752896}
|
| 4968 |
+
{"current_steps": 24745, "total_steps": 25870, "loss": 0.0486, "lr": 2.879871357219233e-07, "epoch": 9.565133359103209, "percentage": 95.65, "elapsed_time": "0:38:10", "remaining_time": "0:01:44", "throughput": 1638.72, "total_tokens": 3753648}
|
| 4969 |
+
{"current_steps": 24750, "total_steps": 25870, "loss": 0.1618, "lr": 2.854400693753728e-07, "epoch": 9.567066099729416, "percentage": 95.67, "elapsed_time": "0:38:11", "remaining_time": "0:01:43", "throughput": 1638.84, "total_tokens": 3754608}
|
| 4970 |
+
{"current_steps": 24755, "total_steps": 25870, "loss": 0.2732, "lr": 2.8290425205022285e-07, "epoch": 9.568998840355624, "percentage": 95.69, "elapsed_time": "0:38:11", "remaining_time": "0:01:43", "throughput": 1638.85, "total_tokens": 3755296}
|
| 4971 |
+
{"current_steps": 24760, "total_steps": 25870, "loss": 0.1419, "lr": 2.8037968490067236e-07, "epoch": 9.570931580981831, "percentage": 95.71, "elapsed_time": "0:38:11", "remaining_time": "0:01:42", "throughput": 1638.87, "total_tokens": 3756032}
|
| 4972 |
+
{"current_steps": 24765, "total_steps": 25870, "loss": 0.2494, "lr": 2.7786636907579653e-07, "epoch": 9.57286432160804, "percentage": 95.73, "elapsed_time": "0:38:12", "remaining_time": "0:01:42", "throughput": 1638.92, "total_tokens": 3756816}
|
| 4973 |
+
{"current_steps": 24770, "total_steps": 25870, "loss": 0.2025, "lr": 2.753643057195471e-07, "epoch": 9.574797062234248, "percentage": 95.75, "elapsed_time": "0:38:12", "remaining_time": "0:01:41", "throughput": 1638.95, "total_tokens": 3757568}
|
| 4974 |
+
{"current_steps": 24775, "total_steps": 25870, "loss": 0.1722, "lr": 2.7287349597076573e-07, "epoch": 9.576729802860456, "percentage": 95.77, "elapsed_time": "0:38:13", "remaining_time": "0:01:41", "throughput": 1639.04, "total_tokens": 3758448}
|
| 4975 |
+
{"current_steps": 24780, "total_steps": 25870, "loss": 0.16, "lr": 2.7039394096315674e-07, "epoch": 9.578662543486665, "percentage": 95.79, "elapsed_time": "0:38:13", "remaining_time": "0:01:40", "throughput": 1639.04, "total_tokens": 3759152}
|
| 4976 |
+
{"current_steps": 24785, "total_steps": 25870, "loss": 0.0726, "lr": 2.6792564182531175e-07, "epoch": 9.580595284112873, "percentage": 95.81, "elapsed_time": "0:38:13", "remaining_time": "0:01:40", "throughput": 1639.09, "total_tokens": 3759936}
|
| 4977 |
+
{"current_steps": 24790, "total_steps": 25870, "loss": 0.1228, "lr": 2.6546859968069326e-07, "epoch": 9.58252802473908, "percentage": 95.83, "elapsed_time": "0:38:14", "remaining_time": "0:01:39", "throughput": 1639.12, "total_tokens": 3760704}
|
| 4978 |
+
{"current_steps": 24795, "total_steps": 25870, "loss": 0.2239, "lr": 2.6302281564764555e-07, "epoch": 9.584460765365288, "percentage": 95.84, "elapsed_time": "0:38:14", "remaining_time": "0:01:39", "throughput": 1639.14, "total_tokens": 3761408}
|
| 4979 |
+
{"current_steps": 24800, "total_steps": 25870, "loss": 0.3728, "lr": 2.605882908393864e-07, "epoch": 9.586393505991495, "percentage": 95.86, "elapsed_time": "0:38:15", "remaining_time": "0:01:39", "throughput": 1639.16, "total_tokens": 3762144}
|
| 4980 |
+
{"current_steps": 24805, "total_steps": 25870, "loss": 0.1993, "lr": 2.581650263640045e-07, "epoch": 9.588326246617704, "percentage": 95.88, "elapsed_time": "0:38:15", "remaining_time": "0:01:38", "throughput": 1639.2, "total_tokens": 3762912}
|
| 4981 |
+
{"current_steps": 24810, "total_steps": 25870, "loss": 0.2928, "lr": 2.557530233244704e-07, "epoch": 9.590258987243912, "percentage": 95.9, "elapsed_time": "0:38:15", "remaining_time": "0:01:38", "throughput": 1639.23, "total_tokens": 3763664}
|
| 4982 |
+
{"current_steps": 24815, "total_steps": 25870, "loss": 0.2683, "lr": 2.533522828186252e-07, "epoch": 9.59219172787012, "percentage": 95.92, "elapsed_time": "0:38:16", "remaining_time": "0:01:37", "throughput": 1639.27, "total_tokens": 3764432}
|
| 4983 |
+
{"current_steps": 24820, "total_steps": 25870, "loss": 0.3456, "lr": 2.5096280593918386e-07, "epoch": 9.594124468496329, "percentage": 95.94, "elapsed_time": "0:38:16", "remaining_time": "0:01:37", "throughput": 1639.27, "total_tokens": 3765104}
|
| 4984 |
+
{"current_steps": 24825, "total_steps": 25870, "loss": 0.0931, "lr": 2.4858459377373744e-07, "epoch": 9.596057209122536, "percentage": 95.96, "elapsed_time": "0:38:17", "remaining_time": "0:01:36", "throughput": 1639.29, "total_tokens": 3765824}
|
| 4985 |
+
{"current_steps": 24830, "total_steps": 25870, "loss": 0.1185, "lr": 2.462176474047423e-07, "epoch": 9.597989949748744, "percentage": 95.98, "elapsed_time": "0:38:17", "remaining_time": "0:01:36", "throughput": 1639.3, "total_tokens": 3766528}
|
| 4986 |
+
{"current_steps": 24835, "total_steps": 25870, "loss": 0.1361, "lr": 2.438619679095366e-07, "epoch": 9.599922690374951, "percentage": 96.0, "elapsed_time": "0:38:18", "remaining_time": "0:01:35", "throughput": 1639.29, "total_tokens": 3767184}
|
| 4987 |
+
{"current_steps": 24840, "total_steps": 25870, "loss": 0.0706, "lr": 2.415175563603267e-07, "epoch": 9.601855431001159, "percentage": 96.02, "elapsed_time": "0:38:18", "remaining_time": "0:01:35", "throughput": 1639.3, "total_tokens": 3767888}
|
| 4988 |
+
{"current_steps": 24845, "total_steps": 25870, "loss": 0.1869, "lr": 2.391844138241839e-07, "epoch": 9.603788171627368, "percentage": 96.04, "elapsed_time": "0:38:18", "remaining_time": "0:01:34", "throughput": 1639.34, "total_tokens": 3768656}
|
| 4989 |
+
{"current_steps": 24850, "total_steps": 25870, "loss": 0.1021, "lr": 2.3686254136306429e-07, "epoch": 9.605720912253576, "percentage": 96.06, "elapsed_time": "0:38:19", "remaining_time": "0:01:34", "throughput": 1639.47, "total_tokens": 3769632}
|
| 4990 |
+
{"current_steps": 24855, "total_steps": 25870, "loss": 0.1267, "lr": 2.3455194003377813e-07, "epoch": 9.607653652879783, "percentage": 96.08, "elapsed_time": "0:38:19", "remaining_time": "0:01:33", "throughput": 1639.52, "total_tokens": 3770432}
|
| 4991 |
+
{"current_steps": 24860, "total_steps": 25870, "loss": 0.0452, "lr": 2.3225261088801465e-07, "epoch": 9.609586393505992, "percentage": 96.1, "elapsed_time": "0:38:20", "remaining_time": "0:01:33", "throughput": 1639.52, "total_tokens": 3771104}
|
| 4992 |
+
{"current_steps": 24865, "total_steps": 25870, "loss": 0.1827, "lr": 2.299645549723367e-07, "epoch": 9.6115191341322, "percentage": 96.12, "elapsed_time": "0:38:20", "remaining_time": "0:01:32", "throughput": 1639.6, "total_tokens": 3771968}
|
| 4993 |
+
{"current_steps": 24870, "total_steps": 25870, "loss": 0.0887, "lr": 2.2768777332816403e-07, "epoch": 9.613451874758407, "percentage": 96.13, "elapsed_time": "0:38:20", "remaining_time": "0:01:32", "throughput": 1639.63, "total_tokens": 3772720}
|
| 4994 |
+
{"current_steps": 24875, "total_steps": 25870, "loss": 0.2496, "lr": 2.254222669917927e-07, "epoch": 9.615384615384615, "percentage": 96.15, "elapsed_time": "0:38:21", "remaining_time": "0:01:32", "throughput": 1639.66, "total_tokens": 3773472}
|
| 4995 |
+
{"current_steps": 24880, "total_steps": 25870, "loss": 0.176, "lr": 2.231680369943895e-07, "epoch": 9.617317356010822, "percentage": 96.17, "elapsed_time": "0:38:21", "remaining_time": "0:01:31", "throughput": 1639.73, "total_tokens": 3774304}
|
| 4996 |
+
{"current_steps": 24885, "total_steps": 25870, "loss": 0.1823, "lr": 2.209250843619809e-07, "epoch": 9.619250096637032, "percentage": 96.19, "elapsed_time": "0:38:22", "remaining_time": "0:01:31", "throughput": 1639.78, "total_tokens": 3775104}
|
| 4997 |
+
{"current_steps": 24890, "total_steps": 25870, "loss": 0.0781, "lr": 2.186934101154614e-07, "epoch": 9.62118283726324, "percentage": 96.21, "elapsed_time": "0:38:22", "remaining_time": "0:01:30", "throughput": 1639.83, "total_tokens": 3775904}
|
| 4998 |
+
{"current_steps": 24895, "total_steps": 25870, "loss": 0.2331, "lr": 2.164730152706018e-07, "epoch": 9.623115577889447, "percentage": 96.23, "elapsed_time": "0:38:23", "remaining_time": "0:01:30", "throughput": 1639.86, "total_tokens": 3776640}
|
| 4999 |
+
{"current_steps": 24900, "total_steps": 25870, "loss": 0.1741, "lr": 2.1426390083802705e-07, "epoch": 9.625048318515656, "percentage": 96.25, "elapsed_time": "0:38:23", "remaining_time": "0:01:29", "throughput": 1639.93, "total_tokens": 3777472}
|
| 5000 |
+
{"current_steps": 24905, "total_steps": 25870, "loss": 0.1268, "lr": 2.1206606782323002e-07, "epoch": 9.626981059141864, "percentage": 96.27, "elapsed_time": "0:38:23", "remaining_time": "0:01:29", "throughput": 1640.0, "total_tokens": 3778320}
|
| 5001 |
+
{"current_steps": 24910, "total_steps": 25870, "loss": 0.366, "lr": 2.0987951722657718e-07, "epoch": 9.628913799768071, "percentage": 96.29, "elapsed_time": "0:38:24", "remaining_time": "0:01:28", "throughput": 1640.03, "total_tokens": 3779072}
|
| 5002 |
+
{"current_steps": 24915, "total_steps": 25870, "loss": 0.4968, "lr": 2.0770425004329185e-07, "epoch": 9.630846540394279, "percentage": 96.31, "elapsed_time": "0:38:24", "remaining_time": "0:01:28", "throughput": 1640.03, "total_tokens": 3779760}
|
| 5003 |
+
{"current_steps": 24920, "total_steps": 25870, "loss": 0.1131, "lr": 2.0554026726345987e-07, "epoch": 9.632779281020486, "percentage": 96.33, "elapsed_time": "0:38:25", "remaining_time": "0:01:27", "throughput": 1640.1, "total_tokens": 3780592}
|
| 5004 |
+
{"current_steps": 24925, "total_steps": 25870, "loss": 0.0448, "lr": 2.0338756987203778e-07, "epoch": 9.634712021646695, "percentage": 96.35, "elapsed_time": "0:38:25", "remaining_time": "0:01:27", "throughput": 1640.13, "total_tokens": 3781328}
|
| 5005 |
+
{"current_steps": 24930, "total_steps": 25870, "loss": 0.0468, "lr": 2.0124615884884456e-07, "epoch": 9.636644762272903, "percentage": 96.37, "elapsed_time": "0:38:25", "remaining_time": "0:01:26", "throughput": 1640.15, "total_tokens": 3782064}
|
| 5006 |
+
{"current_steps": 24935, "total_steps": 25870, "loss": 0.1043, "lr": 1.9911603516855338e-07, "epoch": 9.63857750289911, "percentage": 96.39, "elapsed_time": "0:38:26", "remaining_time": "0:01:26", "throughput": 1640.15, "total_tokens": 3782752}
|
| 5007 |
+
{"current_steps": 24940, "total_steps": 25870, "loss": 0.1549, "lr": 1.9699719980071362e-07, "epoch": 9.64051024352532, "percentage": 96.41, "elapsed_time": "0:38:26", "remaining_time": "0:01:26", "throughput": 1640.21, "total_tokens": 3783568}
|
| 5008 |
+
{"current_steps": 24945, "total_steps": 25870, "loss": 0.2789, "lr": 1.948896537097261e-07, "epoch": 9.642442984151527, "percentage": 96.42, "elapsed_time": "0:38:27", "remaining_time": "0:01:25", "throughput": 1640.19, "total_tokens": 3784208}
|
| 5009 |
+
{"current_steps": 24950, "total_steps": 25870, "loss": 0.1487, "lr": 1.9279339785485684e-07, "epoch": 9.644375724777735, "percentage": 96.44, "elapsed_time": "0:38:27", "remaining_time": "0:01:25", "throughput": 1640.25, "total_tokens": 3785024}
|
| 5010 |
+
{"current_steps": 24955, "total_steps": 25870, "loss": 0.2323, "lr": 1.9070843319023145e-07, "epoch": 9.646308465403942, "percentage": 96.46, "elapsed_time": "0:38:28", "remaining_time": "0:01:24", "throughput": 1640.25, "total_tokens": 3785712}
|
| 5011 |
+
{"current_steps": 24960, "total_steps": 25870, "loss": 0.2192, "lr": 1.886347606648381e-07, "epoch": 9.64824120603015, "percentage": 96.48, "elapsed_time": "0:38:28", "remaining_time": "0:01:24", "throughput": 1640.32, "total_tokens": 3786544}
|
| 5012 |
+
{"current_steps": 24965, "total_steps": 25870, "loss": 0.1269, "lr": 1.8657238122252452e-07, "epoch": 9.65017394665636, "percentage": 96.5, "elapsed_time": "0:38:28", "remaining_time": "0:01:23", "throughput": 1640.38, "total_tokens": 3787376}
|
| 5013 |
+
{"current_steps": 24970, "total_steps": 25870, "loss": 0.294, "lr": 1.8452129580200094e-07, "epoch": 9.652106687282567, "percentage": 96.52, "elapsed_time": "0:38:29", "remaining_time": "0:01:23", "throughput": 1640.39, "total_tokens": 3788064}
|
| 5014 |
+
{"current_steps": 24975, "total_steps": 25870, "loss": 0.2437, "lr": 1.824815053368345e-07, "epoch": 9.654039427908774, "percentage": 96.54, "elapsed_time": "0:38:29", "remaining_time": "0:01:22", "throughput": 1640.37, "total_tokens": 3788720}
|
| 5015 |
+
{"current_steps": 24980, "total_steps": 25870, "loss": 0.2956, "lr": 1.8045301075544642e-07, "epoch": 9.655972168534984, "percentage": 96.56, "elapsed_time": "0:38:30", "remaining_time": "0:01:22", "throughput": 1640.36, "total_tokens": 3789376}
|
| 5016 |
+
{"current_steps": 24985, "total_steps": 25870, "loss": 0.0315, "lr": 1.7843581298112867e-07, "epoch": 9.657904909161191, "percentage": 96.58, "elapsed_time": "0:38:30", "remaining_time": "0:01:21", "throughput": 1640.4, "total_tokens": 3790144}
|
| 5017 |
+
{"current_steps": 24990, "total_steps": 25870, "loss": 0.1192, "lr": 1.7642991293201904e-07, "epoch": 9.659837649787399, "percentage": 96.6, "elapsed_time": "0:38:30", "remaining_time": "0:01:21", "throughput": 1640.42, "total_tokens": 3790864}
|
| 5018 |
+
{"current_steps": 24995, "total_steps": 25870, "loss": 0.2152, "lr": 1.744353115211178e-07, "epoch": 9.661770390413606, "percentage": 96.62, "elapsed_time": "0:38:31", "remaining_time": "0:01:20", "throughput": 1640.45, "total_tokens": 3791616}
|
| 5019 |
+
{"current_steps": 25000, "total_steps": 25870, "loss": 0.2552, "lr": 1.724520096562876e-07, "epoch": 9.663703131039814, "percentage": 96.64, "elapsed_time": "0:38:31", "remaining_time": "0:01:20", "throughput": 1640.45, "total_tokens": 3792288}
|
| 5020 |
+
{"current_steps": 25005, "total_steps": 25870, "loss": 0.1794, "lr": 1.7048000824024245e-07, "epoch": 9.665635871666023, "percentage": 96.66, "elapsed_time": "0:38:32", "remaining_time": "0:01:19", "throughput": 1640.44, "total_tokens": 3792944}
|
| 5021 |
+
{"current_steps": 25010, "total_steps": 25870, "loss": 0.245, "lr": 1.6851930817054774e-07, "epoch": 9.66756861229223, "percentage": 96.68, "elapsed_time": "0:38:32", "remaining_time": "0:01:19", "throughput": 1640.49, "total_tokens": 3793744}
|
| 5022 |
+
{"current_steps": 25015, "total_steps": 25870, "loss": 0.1618, "lr": 1.6656991033963955e-07, "epoch": 9.669501352918438, "percentage": 96.7, "elapsed_time": "0:38:32", "remaining_time": "0:01:19", "throughput": 1640.54, "total_tokens": 3794544}
|
| 5023 |
+
{"current_steps": 25020, "total_steps": 25870, "loss": 0.1641, "lr": 1.6463181563479702e-07, "epoch": 9.671434093544647, "percentage": 96.71, "elapsed_time": "0:38:33", "remaining_time": "0:01:18", "throughput": 1640.56, "total_tokens": 3795280}
|
| 5024 |
+
{"current_steps": 25025, "total_steps": 25870, "loss": 0.1236, "lr": 1.6270502493815898e-07, "epoch": 9.673366834170855, "percentage": 96.73, "elapsed_time": "0:38:33", "remaining_time": "0:01:18", "throughput": 1640.59, "total_tokens": 3796032}
|
| 5025 |
+
{"current_steps": 25030, "total_steps": 25870, "loss": 0.2125, "lr": 1.6078953912672113e-07, "epoch": 9.675299574797062, "percentage": 96.75, "elapsed_time": "0:38:34", "remaining_time": "0:01:17", "throughput": 1640.65, "total_tokens": 3796848}
|
| 5026 |
+
{"current_steps": 25035, "total_steps": 25870, "loss": 0.1853, "lr": 1.588853590723277e-07, "epoch": 9.67723231542327, "percentage": 96.77, "elapsed_time": "0:38:34", "remaining_time": "0:01:17", "throughput": 1640.71, "total_tokens": 3797664}
|
| 5027 |
+
{"current_steps": 25040, "total_steps": 25870, "loss": 0.2468, "lr": 1.5699248564168545e-07, "epoch": 9.679165056049477, "percentage": 96.79, "elapsed_time": "0:38:35", "remaining_time": "0:01:16", "throughput": 1640.77, "total_tokens": 3798480}
|
| 5028 |
+
{"current_steps": 25045, "total_steps": 25870, "loss": 0.0761, "lr": 1.5511091969634683e-07, "epoch": 9.681097796675687, "percentage": 96.81, "elapsed_time": "0:38:35", "remaining_time": "0:01:16", "throughput": 1640.81, "total_tokens": 3799248}
|
| 5029 |
+
{"current_steps": 25050, "total_steps": 25870, "loss": 0.2343, "lr": 1.5324066209272126e-07, "epoch": 9.683030537301894, "percentage": 96.83, "elapsed_time": "0:38:35", "remaining_time": "0:01:15", "throughput": 1640.87, "total_tokens": 3800064}
|
| 5030 |
+
{"current_steps": 25055, "total_steps": 25870, "loss": 0.1682, "lr": 1.5138171368206943e-07, "epoch": 9.684963277928102, "percentage": 96.85, "elapsed_time": "0:38:36", "remaining_time": "0:01:15", "throughput": 1640.94, "total_tokens": 3800912}
|
| 5031 |
+
{"current_steps": 25060, "total_steps": 25870, "loss": 0.0824, "lr": 1.4953407531051177e-07, "epoch": 9.686896018554311, "percentage": 96.87, "elapsed_time": "0:38:36", "remaining_time": "0:01:14", "throughput": 1640.94, "total_tokens": 3801600}
|
| 5032 |
+
{"current_steps": 25065, "total_steps": 25870, "loss": 0.0784, "lr": 1.476977478190089e-07, "epoch": 9.688828759180518, "percentage": 96.89, "elapsed_time": "0:38:37", "remaining_time": "0:01:14", "throughput": 1640.95, "total_tokens": 3802288}
|
| 5033 |
+
{"current_steps": 25070, "total_steps": 25870, "loss": 0.1194, "lr": 1.4587273204338114e-07, "epoch": 9.690761499806726, "percentage": 96.91, "elapsed_time": "0:38:37", "remaining_time": "0:01:13", "throughput": 1640.96, "total_tokens": 3803008}
|
| 5034 |
+
{"current_steps": 25075, "total_steps": 25870, "loss": 0.2088, "lr": 1.4405902881430288e-07, "epoch": 9.692694240432933, "percentage": 96.93, "elapsed_time": "0:38:37", "remaining_time": "0:01:13", "throughput": 1641.07, "total_tokens": 3803936}
|
| 5035 |
+
{"current_steps": 25080, "total_steps": 25870, "loss": 0.6107, "lr": 1.422566389572888e-07, "epoch": 9.694626981059141, "percentage": 96.95, "elapsed_time": "0:38:38", "remaining_time": "0:01:13", "throughput": 1641.13, "total_tokens": 3804768}
|
| 5036 |
+
{"current_steps": 25085, "total_steps": 25870, "loss": 0.1854, "lr": 1.4046556329271043e-07, "epoch": 9.69655972168535, "percentage": 96.97, "elapsed_time": "0:38:38", "remaining_time": "0:01:12", "throughput": 1641.16, "total_tokens": 3805520}
|
| 5037 |
+
{"current_steps": 25090, "total_steps": 25870, "loss": 0.1288, "lr": 1.3868580263579622e-07, "epoch": 9.698492462311558, "percentage": 96.98, "elapsed_time": "0:38:39", "remaining_time": "0:01:12", "throughput": 1641.2, "total_tokens": 3806272}
|
| 5038 |
+
{"current_steps": 25095, "total_steps": 25870, "loss": 0.1999, "lr": 1.3691735779661207e-07, "epoch": 9.700425202937765, "percentage": 97.0, "elapsed_time": "0:38:39", "remaining_time": "0:01:11", "throughput": 1641.26, "total_tokens": 3807104}
|
| 5039 |
+
{"current_steps": 25100, "total_steps": 25870, "loss": 0.1544, "lr": 1.3516022958008078e-07, "epoch": 9.702357943563975, "percentage": 97.02, "elapsed_time": "0:38:40", "remaining_time": "0:01:11", "throughput": 1641.31, "total_tokens": 3807904}
|
| 5040 |
+
{"current_steps": 25105, "total_steps": 25870, "loss": 0.022, "lr": 1.3341441878597094e-07, "epoch": 9.704290684190182, "percentage": 97.04, "elapsed_time": "0:38:40", "remaining_time": "0:01:10", "throughput": 1641.33, "total_tokens": 3808624}
|
| 5041 |
+
{"current_steps": 25110, "total_steps": 25870, "loss": 0.0525, "lr": 1.3167992620890524e-07, "epoch": 9.70622342481639, "percentage": 97.06, "elapsed_time": "0:38:40", "remaining_time": "0:01:10", "throughput": 1641.35, "total_tokens": 3809360}
|
| 5042 |
+
{"current_steps": 25115, "total_steps": 25870, "loss": 0.3158, "lr": 1.299567526383494e-07, "epoch": 9.708156165442597, "percentage": 97.08, "elapsed_time": "0:38:41", "remaining_time": "0:01:09", "throughput": 1641.33, "total_tokens": 3810000}
|
| 5043 |
+
{"current_steps": 25120, "total_steps": 25870, "loss": 0.0965, "lr": 1.2824489885862046e-07, "epoch": 9.710088906068805, "percentage": 97.1, "elapsed_time": "0:38:41", "remaining_time": "0:01:09", "throughput": 1641.34, "total_tokens": 3810704}
|
| 5044 |
+
{"current_steps": 25125, "total_steps": 25870, "loss": 0.0774, "lr": 1.265443656488785e-07, "epoch": 9.712021646695014, "percentage": 97.12, "elapsed_time": "0:38:42", "remaining_time": "0:01:08", "throughput": 1641.35, "total_tokens": 3811408}
|
| 5045 |
+
{"current_steps": 25130, "total_steps": 25870, "loss": 0.2393, "lr": 1.2485515378313772e-07, "epoch": 9.713954387321222, "percentage": 97.14, "elapsed_time": "0:38:42", "remaining_time": "0:01:08", "throughput": 1641.37, "total_tokens": 3812144}
|
| 5046 |
+
{"current_steps": 25135, "total_steps": 25870, "loss": 0.1959, "lr": 1.2317726403025533e-07, "epoch": 9.715887127947429, "percentage": 97.16, "elapsed_time": "0:38:42", "remaining_time": "0:01:07", "throughput": 1641.48, "total_tokens": 3813072}
|
| 5047 |
+
{"current_steps": 25140, "total_steps": 25870, "loss": 0.3139, "lr": 1.2151069715393148e-07, "epoch": 9.717819868573638, "percentage": 97.18, "elapsed_time": "0:38:43", "remaining_time": "0:01:07", "throughput": 1641.54, "total_tokens": 3813904}
|
| 5048 |
+
{"current_steps": 25145, "total_steps": 25870, "loss": 0.147, "lr": 1.1985545391272336e-07, "epoch": 9.719752609199846, "percentage": 97.2, "elapsed_time": "0:38:43", "remaining_time": "0:01:07", "throughput": 1641.57, "total_tokens": 3814656}
|
| 5049 |
+
{"current_steps": 25150, "total_steps": 25870, "loss": 0.104, "lr": 1.182115350600227e-07, "epoch": 9.721685349826053, "percentage": 97.22, "elapsed_time": "0:38:44", "remaining_time": "0:01:06", "throughput": 1641.61, "total_tokens": 3815440}
|
| 5050 |
+
{"current_steps": 25155, "total_steps": 25870, "loss": 0.1528, "lr": 1.1657894134407544e-07, "epoch": 9.72361809045226, "percentage": 97.24, "elapsed_time": "0:38:44", "remaining_time": "0:01:06", "throughput": 1641.63, "total_tokens": 3816160}
|
| 5051 |
+
{"current_steps": 25160, "total_steps": 25870, "loss": 0.1569, "lr": 1.1495767350796494e-07, "epoch": 9.725550831078468, "percentage": 97.26, "elapsed_time": "0:38:45", "remaining_time": "0:01:05", "throughput": 1641.64, "total_tokens": 3816864}
|
| 5052 |
+
{"current_steps": 25165, "total_steps": 25870, "loss": 0.1351, "lr": 1.1334773228962592e-07, "epoch": 9.727483571704678, "percentage": 97.27, "elapsed_time": "0:38:45", "remaining_time": "0:01:05", "throughput": 1641.62, "total_tokens": 3817504}
|
| 5053 |
+
{"current_steps": 25170, "total_steps": 25870, "loss": 0.3159, "lr": 1.1174911842183888e-07, "epoch": 9.729416312330885, "percentage": 97.29, "elapsed_time": "0:38:45", "remaining_time": "0:01:04", "throughput": 1641.61, "total_tokens": 3818160}
|
| 5054 |
+
{"current_steps": 25175, "total_steps": 25870, "loss": 0.2625, "lr": 1.1016183263221902e-07, "epoch": 9.731349052957093, "percentage": 97.31, "elapsed_time": "0:38:46", "remaining_time": "0:01:04", "throughput": 1641.63, "total_tokens": 3818896}
|
| 5055 |
+
{"current_steps": 25180, "total_steps": 25870, "loss": 0.1059, "lr": 1.0858587564323563e-07, "epoch": 9.733281793583302, "percentage": 97.33, "elapsed_time": "0:38:46", "remaining_time": "0:01:03", "throughput": 1641.71, "total_tokens": 3819744}
|
| 5056 |
+
{"current_steps": 25185, "total_steps": 25870, "loss": 0.2865, "lr": 1.070212481721955e-07, "epoch": 9.73521453420951, "percentage": 97.35, "elapsed_time": "0:38:47", "remaining_time": "0:01:03", "throughput": 1641.71, "total_tokens": 3820432}
|
| 5057 |
+
{"current_steps": 25190, "total_steps": 25870, "loss": 0.1714, "lr": 1.0546795093125118e-07, "epoch": 9.737147274835717, "percentage": 97.37, "elapsed_time": "0:38:47", "remaining_time": "0:01:02", "throughput": 1641.75, "total_tokens": 3821200}
|
| 5058 |
+
{"current_steps": 25195, "total_steps": 25870, "loss": 0.1353, "lr": 1.0392598462739823e-07, "epoch": 9.739080015461925, "percentage": 97.39, "elapsed_time": "0:38:47", "remaining_time": "0:01:02", "throughput": 1641.76, "total_tokens": 3821904}
|
| 5059 |
+
{"current_steps": 25200, "total_steps": 25870, "loss": 0.2265, "lr": 1.0239534996247525e-07, "epoch": 9.741012756088132, "percentage": 97.41, "elapsed_time": "0:38:48", "remaining_time": "0:01:01", "throughput": 1641.77, "total_tokens": 3822608}
|
| 5060 |
+
{"current_steps": 25205, "total_steps": 25870, "loss": 0.064, "lr": 1.0087604763316383e-07, "epoch": 9.742945496714341, "percentage": 97.43, "elapsed_time": "0:38:48", "remaining_time": "0:01:01", "throughput": 1641.87, "total_tokens": 3823520}
|
| 5061 |
+
{"current_steps": 25210, "total_steps": 25870, "loss": 0.2233, "lr": 9.936807833098028e-08, "epoch": 9.744878237340549, "percentage": 97.45, "elapsed_time": "0:38:49", "remaining_time": "0:01:00", "throughput": 1641.98, "total_tokens": 3824464}
|
| 5062 |
+
{"current_steps": 25215, "total_steps": 25870, "loss": 0.116, "lr": 9.787144274229499e-08, "epoch": 9.746810977966756, "percentage": 97.47, "elapsed_time": "0:38:49", "remaining_time": "0:01:00", "throughput": 1641.98, "total_tokens": 3825152}
|
| 5063 |
+
{"current_steps": 25220, "total_steps": 25870, "loss": 0.2243, "lr": 9.638614154830749e-08, "epoch": 9.748743718592966, "percentage": 97.49, "elapsed_time": "0:38:50", "remaining_time": "0:01:00", "throughput": 1641.99, "total_tokens": 3825840}
|
| 5064 |
+
{"current_steps": 25225, "total_steps": 25870, "loss": 0.1454, "lr": 9.49121754250687e-08, "epoch": 9.750676459219173, "percentage": 97.51, "elapsed_time": "0:38:50", "remaining_time": "0:00:59", "throughput": 1642.01, "total_tokens": 3826592}
|
| 5065 |
+
{"current_steps": 25230, "total_steps": 25870, "loss": 0.1698, "lr": 9.344954504346138e-08, "epoch": 9.75260919984538, "percentage": 97.53, "elapsed_time": "0:38:50", "remaining_time": "0:00:59", "throughput": 1642.04, "total_tokens": 3827328}
|
| 5066 |
+
{"current_steps": 25235, "total_steps": 25870, "loss": 0.073, "lr": 9.199825106921688e-08, "epoch": 9.754541940471588, "percentage": 97.55, "elapsed_time": "0:38:51", "remaining_time": "0:00:58", "throughput": 1642.12, "total_tokens": 3828208}
|
| 5067 |
+
{"current_steps": 25240, "total_steps": 25870, "loss": 0.0173, "lr": 9.055829416290129e-08, "epoch": 9.756474681097796, "percentage": 97.56, "elapsed_time": "0:38:51", "remaining_time": "0:00:58", "throughput": 1642.14, "total_tokens": 3828928}
|
| 5068 |
+
{"current_steps": 25245, "total_steps": 25870, "loss": 0.0347, "lr": 8.912967497992086e-08, "epoch": 9.758407421724005, "percentage": 97.58, "elapsed_time": "0:38:52", "remaining_time": "0:00:57", "throughput": 1642.17, "total_tokens": 3829680}
|
| 5069 |
+
{"current_steps": 25250, "total_steps": 25870, "loss": 0.0553, "lr": 8.771239417052491e-08, "epoch": 9.760340162350213, "percentage": 97.6, "elapsed_time": "0:38:52", "remaining_time": "0:00:57", "throughput": 1642.19, "total_tokens": 3830416}
|
| 5070 |
+
{"current_steps": 25255, "total_steps": 25870, "loss": 0.0612, "lr": 8.63064523797974e-08, "epoch": 9.76227290297642, "percentage": 97.62, "elapsed_time": "0:38:52", "remaining_time": "0:00:56", "throughput": 1642.23, "total_tokens": 3831184}
|
| 5071 |
+
{"current_steps": 25260, "total_steps": 25870, "loss": 0.0693, "lr": 8.491185024766257e-08, "epoch": 9.76420564360263, "percentage": 97.64, "elapsed_time": "0:38:53", "remaining_time": "0:00:56", "throughput": 1642.3, "total_tokens": 3832016}
|
| 5072 |
+
{"current_steps": 25265, "total_steps": 25870, "loss": 0.3391, "lr": 8.352858840888767e-08, "epoch": 9.766138384228837, "percentage": 97.66, "elapsed_time": "0:38:53", "remaining_time": "0:00:55", "throughput": 1642.34, "total_tokens": 3832800}
|
| 5073 |
+
{"current_steps": 25270, "total_steps": 25870, "loss": 0.2305, "lr": 8.21566674930746e-08, "epoch": 9.768071124855044, "percentage": 97.68, "elapsed_time": "0:38:54", "remaining_time": "0:00:55", "throughput": 1642.39, "total_tokens": 3833584}
|
| 5074 |
+
{"current_steps": 25275, "total_steps": 25870, "loss": 0.071, "lr": 8.079608812466e-08, "epoch": 9.770003865481252, "percentage": 97.7, "elapsed_time": "0:38:54", "remaining_time": "0:00:54", "throughput": 1642.41, "total_tokens": 3834320}
|
| 5075 |
+
{"current_steps": 25280, "total_steps": 25870, "loss": 0.2922, "lr": 7.944685092292625e-08, "epoch": 9.77193660610746, "percentage": 97.72, "elapsed_time": "0:38:54", "remaining_time": "0:00:54", "throughput": 1642.45, "total_tokens": 3835088}
|
| 5076 |
+
{"current_steps": 25285, "total_steps": 25870, "loss": 0.4593, "lr": 7.81089565019849e-08, "epoch": 9.773869346733669, "percentage": 97.74, "elapsed_time": "0:38:55", "remaining_time": "0:00:54", "throughput": 1642.46, "total_tokens": 3835792}
|
| 5077 |
+
{"current_steps": 25290, "total_steps": 25870, "loss": 0.0977, "lr": 7.678240547079329e-08, "epoch": 9.775802087359876, "percentage": 97.76, "elapsed_time": "0:38:55", "remaining_time": "0:00:53", "throughput": 1642.46, "total_tokens": 3836480}
|
| 5078 |
+
{"current_steps": 25295, "total_steps": 25870, "loss": 0.4057, "lr": 7.546719843313788e-08, "epoch": 9.777734827986084, "percentage": 97.78, "elapsed_time": "0:38:56", "remaining_time": "0:00:53", "throughput": 1642.5, "total_tokens": 3837264}
|
| 5079 |
+
{"current_steps": 25300, "total_steps": 25870, "loss": 0.1941, "lr": 7.416333598764535e-08, "epoch": 9.779667568612293, "percentage": 97.8, "elapsed_time": "0:38:56", "remaining_time": "0:00:52", "throughput": 1642.51, "total_tokens": 3837952}
|
| 5080 |
+
{"current_steps": 25305, "total_steps": 25870, "loss": 0.1263, "lr": 7.28708187277799e-08, "epoch": 9.7816003092385, "percentage": 97.82, "elapsed_time": "0:38:57", "remaining_time": "0:00:52", "throughput": 1642.52, "total_tokens": 3838672}
|
| 5081 |
+
{"current_steps": 25310, "total_steps": 25870, "loss": 0.1283, "lr": 7.158964724184314e-08, "epoch": 9.783533049864708, "percentage": 97.84, "elapsed_time": "0:38:57", "remaining_time": "0:00:51", "throughput": 1642.55, "total_tokens": 3839424}
|
| 5082 |
+
{"current_steps": 25315, "total_steps": 25870, "loss": 0.2434, "lr": 7.031982211296583e-08, "epoch": 9.785465790490916, "percentage": 97.85, "elapsed_time": "0:38:57", "remaining_time": "0:00:51", "throughput": 1642.66, "total_tokens": 3840368}
|
| 5083 |
+
{"current_steps": 25320, "total_steps": 25870, "loss": 0.2664, "lr": 6.906134391912178e-08, "epoch": 9.787398531117123, "percentage": 97.87, "elapsed_time": "0:38:58", "remaining_time": "0:00:50", "throughput": 1642.7, "total_tokens": 3841136}
|
| 5084 |
+
{"current_steps": 25325, "total_steps": 25870, "loss": 0.0734, "lr": 6.781421323311388e-08, "epoch": 9.789331271743333, "percentage": 97.89, "elapsed_time": "0:38:58", "remaining_time": "0:00:50", "throughput": 1642.78, "total_tokens": 3842000}
|
| 5085 |
+
{"current_steps": 25330, "total_steps": 25870, "loss": 0.1171, "lr": 6.657843062258528e-08, "epoch": 9.79126401236954, "percentage": 97.91, "elapsed_time": "0:38:59", "remaining_time": "0:00:49", "throughput": 1642.79, "total_tokens": 3842704}
|
| 5086 |
+
{"current_steps": 25335, "total_steps": 25870, "loss": 0.1166, "lr": 6.535399665001107e-08, "epoch": 9.793196752995748, "percentage": 97.93, "elapsed_time": "0:38:59", "remaining_time": "0:00:49", "throughput": 1642.79, "total_tokens": 3843392}
|
| 5087 |
+
{"current_steps": 25340, "total_steps": 25870, "loss": 0.167, "lr": 6.4140911872701e-08, "epoch": 9.795129493621957, "percentage": 97.95, "elapsed_time": "0:38:59", "remaining_time": "0:00:48", "throughput": 1642.88, "total_tokens": 3844304}
|
| 5088 |
+
{"current_steps": 25345, "total_steps": 25870, "loss": 0.1722, "lr": 6.293917684280503e-08, "epoch": 9.797062234248164, "percentage": 97.97, "elapsed_time": "0:39:00", "remaining_time": "0:00:48", "throughput": 1642.93, "total_tokens": 3845088}
|
| 5089 |
+
{"current_steps": 25350, "total_steps": 25870, "loss": 0.2063, "lr": 6.174879210729401e-08, "epoch": 9.798994974874372, "percentage": 97.99, "elapsed_time": "0:39:00", "remaining_time": "0:00:48", "throughput": 1642.93, "total_tokens": 3845776}
|
| 5090 |
+
{"current_steps": 25355, "total_steps": 25870, "loss": 0.2188, "lr": 6.05697582079845e-08, "epoch": 9.80092771550058, "percentage": 98.01, "elapsed_time": "0:39:01", "remaining_time": "0:00:47", "throughput": 1642.91, "total_tokens": 3846416}
|
| 5091 |
+
{"current_steps": 25360, "total_steps": 25870, "loss": 0.0397, "lr": 5.940207568152778e-08, "epoch": 9.802860456126787, "percentage": 98.03, "elapsed_time": "0:39:01", "remaining_time": "0:00:47", "throughput": 1642.9, "total_tokens": 3847072}
|
| 5092 |
+
{"current_steps": 25365, "total_steps": 25870, "loss": 0.2794, "lr": 5.824574505939595e-08, "epoch": 9.804793196752996, "percentage": 98.05, "elapsed_time": "0:39:02", "remaining_time": "0:00:46", "throughput": 1642.94, "total_tokens": 3847856}
|
| 5093 |
+
{"current_steps": 25370, "total_steps": 25870, "loss": 0.2019, "lr": 5.710076686790411e-08, "epoch": 9.806725937379204, "percentage": 98.07, "elapsed_time": "0:39:02", "remaining_time": "0:00:46", "throughput": 1642.97, "total_tokens": 3848608}
|
| 5094 |
+
{"current_steps": 25375, "total_steps": 25870, "loss": 0.1445, "lr": 5.596714162819927e-08, "epoch": 9.808658678005411, "percentage": 98.09, "elapsed_time": "0:39:02", "remaining_time": "0:00:45", "throughput": 1643.02, "total_tokens": 3849392}
|
| 5095 |
+
{"current_steps": 25380, "total_steps": 25870, "loss": 0.0473, "lr": 5.4844869856257586e-08, "epoch": 9.81059141863162, "percentage": 98.11, "elapsed_time": "0:39:03", "remaining_time": "0:00:45", "throughput": 1643.05, "total_tokens": 3850160}
|
| 5096 |
+
{"current_steps": 25385, "total_steps": 25870, "loss": 0.3002, "lr": 5.3733952062889895e-08, "epoch": 9.812524159257828, "percentage": 98.13, "elapsed_time": "0:39:03", "remaining_time": "0:00:44", "throughput": 1643.06, "total_tokens": 3850864}
|
| 5097 |
+
{"current_steps": 25390, "total_steps": 25870, "loss": 0.2676, "lr": 5.2634388753741716e-08, "epoch": 9.814456899884036, "percentage": 98.14, "elapsed_time": "0:39:04", "remaining_time": "0:00:44", "throughput": 1643.1, "total_tokens": 3851648}
|
| 5098 |
+
{"current_steps": 25395, "total_steps": 25870, "loss": 0.1463, "lr": 5.154618042928494e-08, "epoch": 9.816389640510243, "percentage": 98.16, "elapsed_time": "0:39:04", "remaining_time": "0:00:43", "throughput": 1643.08, "total_tokens": 3852272}
|
| 5099 |
+
{"current_steps": 25400, "total_steps": 25870, "loss": 0.3442, "lr": 5.046932758482892e-08, "epoch": 9.81832238113645, "percentage": 98.18, "elapsed_time": "0:39:04", "remaining_time": "0:00:43", "throughput": 1643.07, "total_tokens": 3852944}
|
| 5100 |
+
{"current_steps": 25405, "total_steps": 25870, "loss": 0.0178, "lr": 4.9403830710506584e-08, "epoch": 9.82025512176266, "percentage": 98.2, "elapsed_time": "0:39:05", "remaining_time": "0:00:42", "throughput": 1643.08, "total_tokens": 3853648}
|
| 5101 |
+
{"current_steps": 25410, "total_steps": 25870, "loss": 0.1573, "lr": 4.834969029129388e-08, "epoch": 9.822187862388867, "percentage": 98.22, "elapsed_time": "0:39:05", "remaining_time": "0:00:42", "throughput": 1643.13, "total_tokens": 3854432}
|
| 5102 |
+
{"current_steps": 25415, "total_steps": 25870, "loss": 0.1263, "lr": 4.730690680698202e-08, "epoch": 9.824120603015075, "percentage": 98.24, "elapsed_time": "0:39:06", "remaining_time": "0:00:42", "throughput": 1643.23, "total_tokens": 3855360}
|
| 5103 |
+
{"current_steps": 25420, "total_steps": 25870, "loss": 0.2335, "lr": 4.627548073221077e-08, "epoch": 9.826053343641284, "percentage": 98.26, "elapsed_time": "0:39:06", "remaining_time": "0:00:41", "throughput": 1643.28, "total_tokens": 3856144}
|
| 5104 |
+
{"current_steps": 25425, "total_steps": 25870, "loss": 0.1048, "lr": 4.525541253643517e-08, "epoch": 9.827986084267492, "percentage": 98.28, "elapsed_time": "0:39:07", "remaining_time": "0:00:41", "throughput": 1643.39, "total_tokens": 3857088}
|
| 5105 |
+
{"current_steps": 25430, "total_steps": 25870, "loss": 0.1832, "lr": 4.42467026839477e-08, "epoch": 9.8299188248937, "percentage": 98.3, "elapsed_time": "0:39:07", "remaining_time": "0:00:40", "throughput": 1643.4, "total_tokens": 3857792}
|
| 5106 |
+
{"current_steps": 25435, "total_steps": 25870, "loss": 0.096, "lr": 4.324935163387556e-08, "epoch": 9.831851565519907, "percentage": 98.32, "elapsed_time": "0:39:07", "remaining_time": "0:00:40", "throughput": 1643.44, "total_tokens": 3858560}
|
| 5107 |
+
{"current_steps": 25440, "total_steps": 25870, "loss": 0.2175, "lr": 4.226335984016672e-08, "epoch": 9.833784306146114, "percentage": 98.34, "elapsed_time": "0:39:08", "remaining_time": "0:00:39", "throughput": 1643.48, "total_tokens": 3859344}
|
| 5108 |
+
{"current_steps": 25445, "total_steps": 25870, "loss": 0.2429, "lr": 4.128872775160386e-08, "epoch": 9.835717046772324, "percentage": 98.36, "elapsed_time": "0:39:08", "remaining_time": "0:00:39", "throughput": 1643.51, "total_tokens": 3860096}
|
| 5109 |
+
{"current_steps": 25450, "total_steps": 25870, "loss": 0.2135, "lr": 4.0325455811796034e-08, "epoch": 9.837649787398531, "percentage": 98.38, "elapsed_time": "0:39:09", "remaining_time": "0:00:38", "throughput": 1643.55, "total_tokens": 3860864}
|
| 5110 |
+
{"current_steps": 25455, "total_steps": 25870, "loss": 0.1391, "lr": 3.937354445918695e-08, "epoch": 9.839582528024739, "percentage": 98.4, "elapsed_time": "0:39:09", "remaining_time": "0:00:38", "throughput": 1643.59, "total_tokens": 3861648}
|
| 5111 |
+
{"current_steps": 25460, "total_steps": 25870, "loss": 0.1153, "lr": 3.843299412704393e-08, "epoch": 9.841515268650948, "percentage": 98.42, "elapsed_time": "0:39:09", "remaining_time": "0:00:37", "throughput": 1643.62, "total_tokens": 3862400}
|
| 5112 |
+
{"current_steps": 25465, "total_steps": 25870, "loss": 0.3846, "lr": 3.750380524346897e-08, "epoch": 9.843448009277155, "percentage": 98.43, "elapsed_time": "0:39:10", "remaining_time": "0:00:37", "throughput": 1643.65, "total_tokens": 3863152}
|
| 5113 |
+
{"current_steps": 25470, "total_steps": 25870, "loss": 0.1766, "lr": 3.658597823138488e-08, "epoch": 9.845380749903363, "percentage": 98.45, "elapsed_time": "0:39:10", "remaining_time": "0:00:36", "throughput": 1643.69, "total_tokens": 3863920}
|
| 5114 |
+
{"current_steps": 25475, "total_steps": 25870, "loss": 0.1637, "lr": 3.567951350855192e-08, "epoch": 9.84731349052957, "percentage": 98.47, "elapsed_time": "0:39:11", "remaining_time": "0:00:36", "throughput": 1643.8, "total_tokens": 3864864}
|
| 5115 |
+
{"current_steps": 25480, "total_steps": 25870, "loss": 0.0538, "lr": 3.4784411487553955e-08, "epoch": 9.849246231155778, "percentage": 98.49, "elapsed_time": "0:39:11", "remaining_time": "0:00:35", "throughput": 1643.84, "total_tokens": 3865648}
|
| 5116 |
+
{"current_steps": 25485, "total_steps": 25870, "loss": 0.4145, "lr": 3.390067257580121e-08, "epoch": 9.851178971781987, "percentage": 98.51, "elapsed_time": "0:39:12", "remaining_time": "0:00:35", "throughput": 1643.94, "total_tokens": 3866560}
|
| 5117 |
+
{"current_steps": 25490, "total_steps": 25870, "loss": 0.0714, "lr": 3.302829717553302e-08, "epoch": 9.853111712408195, "percentage": 98.53, "elapsed_time": "0:39:12", "remaining_time": "0:00:35", "throughput": 1643.96, "total_tokens": 3867280}
|
| 5118 |
+
{"current_steps": 25495, "total_steps": 25870, "loss": 0.1192, "lr": 3.216728568382066e-08, "epoch": 9.855044453034402, "percentage": 98.55, "elapsed_time": "0:39:12", "remaining_time": "0:00:34", "throughput": 1643.96, "total_tokens": 3867968}
|
| 5119 |
+
{"current_steps": 25500, "total_steps": 25870, "loss": 0.2033, "lr": 3.131763849256175e-08, "epoch": 9.856977193660612, "percentage": 98.57, "elapsed_time": "0:39:13", "remaining_time": "0:00:34", "throughput": 1643.98, "total_tokens": 3868704}
|
| 5120 |
+
{"current_steps": 25505, "total_steps": 25870, "loss": 0.1969, "lr": 3.047935598847473e-08, "epoch": 9.85890993428682, "percentage": 98.59, "elapsed_time": "0:39:13", "remaining_time": "0:00:33", "throughput": 1644.02, "total_tokens": 3869472}
|
| 5121 |
+
{"current_steps": 25510, "total_steps": 25870, "loss": 0.1262, "lr": 2.9652438553115503e-08, "epoch": 9.860842674913027, "percentage": 98.61, "elapsed_time": "0:39:14", "remaining_time": "0:00:33", "throughput": 1644.03, "total_tokens": 3870192}
|
| 5122 |
+
{"current_steps": 25515, "total_steps": 25870, "loss": 0.1537, "lr": 2.883688656285799e-08, "epoch": 9.862775415539234, "percentage": 98.63, "elapsed_time": "0:39:14", "remaining_time": "0:00:32", "throughput": 1644.07, "total_tokens": 3870976}
|
| 5123 |
+
{"current_steps": 25520, "total_steps": 25870, "loss": 0.0601, "lr": 2.8032700388910814e-08, "epoch": 9.864708156165442, "percentage": 98.65, "elapsed_time": "0:39:14", "remaining_time": "0:00:32", "throughput": 1644.06, "total_tokens": 3871632}
|
| 5124 |
+
{"current_steps": 25525, "total_steps": 25870, "loss": 0.119, "lr": 2.723988039730063e-08, "epoch": 9.866640896791651, "percentage": 98.67, "elapsed_time": "0:39:15", "remaining_time": "0:00:31", "throughput": 1644.07, "total_tokens": 3872336}
|
| 5125 |
+
{"current_steps": 25530, "total_steps": 25870, "loss": 0.1446, "lr": 2.6458426948888783e-08, "epoch": 9.868573637417859, "percentage": 98.69, "elapsed_time": "0:39:15", "remaining_time": "0:00:31", "throughput": 1644.12, "total_tokens": 3873136}
|
| 5126 |
+
{"current_steps": 25535, "total_steps": 25870, "loss": 0.404, "lr": 2.5688340399357414e-08, "epoch": 9.870506378044066, "percentage": 98.71, "elapsed_time": "0:39:16", "remaining_time": "0:00:30", "throughput": 1644.21, "total_tokens": 3874048}
|
| 5127 |
+
{"current_steps": 25540, "total_steps": 25870, "loss": 0.4245, "lr": 2.492962109922059e-08, "epoch": 9.872439118670275, "percentage": 98.72, "elapsed_time": "0:39:16", "remaining_time": "0:00:30", "throughput": 1644.25, "total_tokens": 3874816}
|
| 5128 |
+
{"current_steps": 25545, "total_steps": 25870, "loss": 0.2655, "lr": 2.4182269393813183e-08, "epoch": 9.874371859296483, "percentage": 98.74, "elapsed_time": "0:39:17", "remaining_time": "0:00:29", "throughput": 1644.22, "total_tokens": 3875440}
|
| 5129 |
+
{"current_steps": 25550, "total_steps": 25870, "loss": 0.0847, "lr": 2.3446285623296426e-08, "epoch": 9.87630459992269, "percentage": 98.76, "elapsed_time": "0:39:17", "remaining_time": "0:00:29", "throughput": 1644.2, "total_tokens": 3876080}
|
| 5130 |
+
{"current_steps": 25555, "total_steps": 25870, "loss": 0.1286, "lr": 2.2721670122663464e-08, "epoch": 9.878237340548898, "percentage": 98.78, "elapsed_time": "0:39:17", "remaining_time": "0:00:29", "throughput": 1644.23, "total_tokens": 3876816}
|
| 5131 |
+
{"current_steps": 25560, "total_steps": 25870, "loss": 0.3101, "lr": 2.2008423221722696e-08, "epoch": 9.880170081175105, "percentage": 98.8, "elapsed_time": "0:39:18", "remaining_time": "0:00:28", "throughput": 1644.29, "total_tokens": 3877648}
|
| 5132 |
+
{"current_steps": 25565, "total_steps": 25870, "loss": 0.0813, "lr": 2.130654524511999e-08, "epoch": 9.882102821801315, "percentage": 98.82, "elapsed_time": "0:39:18", "remaining_time": "0:00:28", "throughput": 1644.29, "total_tokens": 3878336}
|
| 5133 |
+
{"current_steps": 25570, "total_steps": 25870, "loss": 0.2011, "lr": 2.0616036512316472e-08, "epoch": 9.884035562427522, "percentage": 98.84, "elapsed_time": "0:39:19", "remaining_time": "0:00:27", "throughput": 1644.32, "total_tokens": 3879088}
|
| 5134 |
+
{"current_steps": 25575, "total_steps": 25870, "loss": 0.2652, "lr": 1.9936897337605176e-08, "epoch": 9.88596830305373, "percentage": 98.86, "elapsed_time": "0:39:19", "remaining_time": "0:00:27", "throughput": 1644.37, "total_tokens": 3879872}
|
| 5135 |
+
{"current_steps": 25580, "total_steps": 25870, "loss": 0.2441, "lr": 1.9269128030099948e-08, "epoch": 9.887901043679939, "percentage": 98.88, "elapsed_time": "0:39:19", "remaining_time": "0:00:26", "throughput": 1644.4, "total_tokens": 3880624}
|
| 5136 |
+
{"current_steps": 25585, "total_steps": 25870, "loss": 0.1643, "lr": 1.8612728893740993e-08, "epoch": 9.889833784306147, "percentage": 98.9, "elapsed_time": "0:39:20", "remaining_time": "0:00:26", "throughput": 1644.45, "total_tokens": 3881440}
|
| 5137 |
+
{"current_steps": 25590, "total_steps": 25870, "loss": 0.0512, "lr": 1.7967700227292106e-08, "epoch": 9.891766524932354, "percentage": 98.92, "elapsed_time": "0:39:20", "remaining_time": "0:00:25", "throughput": 1644.5, "total_tokens": 3882224}
|
| 5138 |
+
{"current_steps": 25595, "total_steps": 25870, "loss": 0.3936, "lr": 1.7334042324346212e-08, "epoch": 9.893699265558562, "percentage": 98.94, "elapsed_time": "0:39:21", "remaining_time": "0:00:25", "throughput": 1644.53, "total_tokens": 3882992}
|
| 5139 |
+
{"current_steps": 25600, "total_steps": 25870, "loss": 0.097, "lr": 1.671175547331427e-08, "epoch": 9.89563200618477, "percentage": 98.96, "elapsed_time": "0:39:21", "remaining_time": "0:00:24", "throughput": 1644.56, "total_tokens": 3883728}
|
| 5140 |
+
{"current_steps": 25605, "total_steps": 25870, "loss": 0.059, "lr": 1.6100839957439164e-08, "epoch": 9.897564746810978, "percentage": 98.98, "elapsed_time": "0:39:21", "remaining_time": "0:00:24", "throughput": 1644.57, "total_tokens": 3884448}
|
| 5141 |
+
{"current_steps": 25610, "total_steps": 25870, "loss": 0.1348, "lr": 1.5501296054779013e-08, "epoch": 9.899497487437186, "percentage": 98.99, "elapsed_time": "0:39:22", "remaining_time": "0:00:23", "throughput": 1644.6, "total_tokens": 3885184}
|
| 5142 |
+
{"current_steps": 25615, "total_steps": 25870, "loss": 0.0385, "lr": 1.491312403822387e-08, "epoch": 9.901430228063393, "percentage": 99.01, "elapsed_time": "0:39:22", "remaining_time": "0:00:23", "throughput": 1644.66, "total_tokens": 3886016}
|
| 5143 |
+
{"current_steps": 25620, "total_steps": 25870, "loss": 0.3957, "lr": 1.4336324175481808e-08, "epoch": 9.903362968689603, "percentage": 99.03, "elapsed_time": "0:39:23", "remaining_time": "0:00:23", "throughput": 1644.76, "total_tokens": 3886944}
|
| 5144 |
+
{"current_steps": 25625, "total_steps": 25870, "loss": 0.1345, "lr": 1.3770896729092819e-08, "epoch": 9.90529570931581, "percentage": 99.05, "elapsed_time": "0:39:23", "remaining_time": "0:00:22", "throughput": 1644.79, "total_tokens": 3887680}
|
| 5145 |
+
{"current_steps": 25630, "total_steps": 25870, "loss": 0.186, "lr": 1.3216841956409376e-08, "epoch": 9.907228449942018, "percentage": 99.07, "elapsed_time": "0:39:24", "remaining_time": "0:00:22", "throughput": 1644.8, "total_tokens": 3888400}
|
| 5146 |
+
{"current_steps": 25635, "total_steps": 25870, "loss": 0.2066, "lr": 1.2674160109618638e-08, "epoch": 9.909161190568225, "percentage": 99.09, "elapsed_time": "0:39:24", "remaining_time": "0:00:21", "throughput": 1644.78, "total_tokens": 3889024}
|
| 5147 |
+
{"current_steps": 25640, "total_steps": 25870, "loss": 0.1313, "lr": 1.2142851435725799e-08, "epoch": 9.911093931194433, "percentage": 99.11, "elapsed_time": "0:39:24", "remaining_time": "0:00:21", "throughput": 1644.79, "total_tokens": 3889728}
|
| 5148 |
+
{"current_steps": 25645, "total_steps": 25870, "loss": 0.1123, "lr": 1.1622916176556863e-08, "epoch": 9.913026671820642, "percentage": 99.13, "elapsed_time": "0:39:25", "remaining_time": "0:00:20", "throughput": 1644.84, "total_tokens": 3890528}
|
| 5149 |
+
{"current_steps": 25650, "total_steps": 25870, "loss": 0.2266, "lr": 1.1114354568766972e-08, "epoch": 9.91495941244685, "percentage": 99.15, "elapsed_time": "0:39:25", "remaining_time": "0:00:20", "throughput": 1644.85, "total_tokens": 3891248}
|
| 5150 |
+
{"current_steps": 25655, "total_steps": 25870, "loss": 0.1387, "lr": 1.0617166843832071e-08, "epoch": 9.916892153073057, "percentage": 99.17, "elapsed_time": "0:39:26", "remaining_time": "0:00:19", "throughput": 1644.85, "total_tokens": 3891936}
|
| 5151 |
+
{"current_steps": 25660, "total_steps": 25870, "loss": 0.1062, "lr": 1.0131353228048924e-08, "epoch": 9.918824893699266, "percentage": 99.19, "elapsed_time": "0:39:26", "remaining_time": "0:00:19", "throughput": 1644.87, "total_tokens": 3892656}
|
| 5152 |
+
{"current_steps": 25665, "total_steps": 25870, "loss": 0.1177, "lr": 9.656913942540646e-09, "epoch": 9.920757634325474, "percentage": 99.21, "elapsed_time": "0:39:26", "remaining_time": "0:00:18", "throughput": 1644.89, "total_tokens": 3893376}
|
| 5153 |
+
{"current_steps": 25670, "total_steps": 25870, "loss": 0.0738, "lr": 9.19384920325117e-09, "epoch": 9.922690374951681, "percentage": 99.23, "elapsed_time": "0:39:27", "remaining_time": "0:00:18", "throughput": 1645.03, "total_tokens": 3894400}
|
| 5154 |
+
{"current_steps": 25675, "total_steps": 25870, "loss": 0.0757, "lr": 8.742159220950785e-09, "epoch": 9.924623115577889, "percentage": 99.25, "elapsed_time": "0:39:27", "remaining_time": "0:00:17", "throughput": 1645.08, "total_tokens": 3895200}
|
| 5155 |
+
{"current_steps": 25680, "total_steps": 25870, "loss": 0.0853, "lr": 8.30184420122504e-09, "epoch": 9.926555856204097, "percentage": 99.27, "elapsed_time": "0:39:28", "remaining_time": "0:00:17", "throughput": 1645.13, "total_tokens": 3896000}
|
| 5156 |
+
{"current_steps": 25685, "total_steps": 25870, "loss": 0.2231, "lr": 7.872904344491395e-09, "epoch": 9.928488596830306, "percentage": 99.28, "elapsed_time": "0:39:28", "remaining_time": "0:00:17", "throughput": 1645.2, "total_tokens": 3896832}
|
| 5157 |
+
{"current_steps": 25690, "total_steps": 25870, "loss": 0.0735, "lr": 7.455339845982567e-09, "epoch": 9.930421337456513, "percentage": 99.3, "elapsed_time": "0:39:29", "remaining_time": "0:00:16", "throughput": 1645.19, "total_tokens": 3897504}
|
| 5158 |
+
{"current_steps": 25695, "total_steps": 25870, "loss": 0.099, "lr": 7.049150895754864e-09, "epoch": 9.93235407808272, "percentage": 99.32, "elapsed_time": "0:39:29", "remaining_time": "0:00:16", "throughput": 1645.23, "total_tokens": 3898272}
|
| 5159 |
+
{"current_steps": 25700, "total_steps": 25870, "loss": 0.0833, "lr": 6.654337678690947e-09, "epoch": 9.93428681870893, "percentage": 99.34, "elapsed_time": "0:39:29", "remaining_time": "0:00:15", "throughput": 1645.24, "total_tokens": 3898992}
|
| 5160 |
+
{"current_steps": 25705, "total_steps": 25870, "loss": 0.2237, "lr": 6.270900374491518e-09, "epoch": 9.936219559335138, "percentage": 99.36, "elapsed_time": "0:39:30", "remaining_time": "0:00:15", "throughput": 1645.29, "total_tokens": 3899792}
|
| 5161 |
+
{"current_steps": 25710, "total_steps": 25870, "loss": 0.0704, "lr": 5.898839157680858e-09, "epoch": 9.938152299961345, "percentage": 99.38, "elapsed_time": "0:39:30", "remaining_time": "0:00:14", "throughput": 1645.28, "total_tokens": 3900448}
|
| 5162 |
+
{"current_steps": 25715, "total_steps": 25870, "loss": 0.2757, "lr": 5.5381541976068375e-09, "epoch": 9.940085040587553, "percentage": 99.4, "elapsed_time": "0:39:31", "remaining_time": "0:00:14", "throughput": 1645.33, "total_tokens": 3901248}
|
| 5163 |
+
{"current_steps": 25720, "total_steps": 25870, "loss": 0.2418, "lr": 5.188845658438135e-09, "epoch": 9.94201778121376, "percentage": 99.42, "elapsed_time": "0:39:31", "remaining_time": "0:00:13", "throughput": 1645.35, "total_tokens": 3901984}
|
| 5164 |
+
{"current_steps": 25725, "total_steps": 25870, "loss": 0.1437, "lr": 4.850913699164239e-09, "epoch": 9.94395052183997, "percentage": 99.44, "elapsed_time": "0:39:31", "remaining_time": "0:00:13", "throughput": 1645.38, "total_tokens": 3902736}
|
| 5165 |
+
{"current_steps": 25730, "total_steps": 25870, "loss": 0.1241, "lr": 4.524358473598223e-09, "epoch": 9.945883262466177, "percentage": 99.46, "elapsed_time": "0:39:32", "remaining_time": "0:00:12", "throughput": 1645.46, "total_tokens": 3903600}
|
| 5166 |
+
{"current_steps": 25735, "total_steps": 25870, "loss": 0.2341, "lr": 4.209180130371193e-09, "epoch": 9.947816003092385, "percentage": 99.48, "elapsed_time": "0:39:32", "remaining_time": "0:00:12", "throughput": 1645.47, "total_tokens": 3904304}
|
| 5167 |
+
{"current_steps": 25740, "total_steps": 25870, "loss": 0.3132, "lr": 3.905378812943395e-09, "epoch": 9.949748743718594, "percentage": 99.5, "elapsed_time": "0:39:33", "remaining_time": "0:00:11", "throughput": 1645.53, "total_tokens": 3905136}
|
| 5168 |
+
{"current_steps": 25745, "total_steps": 25870, "loss": 0.2163, "lr": 3.6129546595903286e-09, "epoch": 9.951681484344801, "percentage": 99.52, "elapsed_time": "0:39:33", "remaining_time": "0:00:11", "throughput": 1645.54, "total_tokens": 3905840}
|
| 5169 |
+
{"current_steps": 25750, "total_steps": 25870, "loss": 0.1102, "lr": 3.3319078034110828e-09, "epoch": 9.953614224971009, "percentage": 99.54, "elapsed_time": "0:39:34", "remaining_time": "0:00:11", "throughput": 1645.64, "total_tokens": 3906752}
|
| 5170 |
+
{"current_steps": 25755, "total_steps": 25870, "loss": 0.0837, "lr": 3.062238372325554e-09, "epoch": 9.955546965597216, "percentage": 99.56, "elapsed_time": "0:39:34", "remaining_time": "0:00:10", "throughput": 1645.64, "total_tokens": 3907456}
|
| 5171 |
+
{"current_steps": 25760, "total_steps": 25870, "loss": 0.0817, "lr": 2.8039464890744493e-09, "epoch": 9.957479706223424, "percentage": 99.57, "elapsed_time": "0:39:34", "remaining_time": "0:00:10", "throughput": 1645.65, "total_tokens": 3908160}
|
| 5172 |
+
{"current_steps": 25765, "total_steps": 25870, "loss": 0.3036, "lr": 2.5570322712276107e-09, "epoch": 9.959412446849633, "percentage": 99.59, "elapsed_time": "0:39:35", "remaining_time": "0:00:09", "throughput": 1645.74, "total_tokens": 3909056}
|
| 5173 |
+
{"current_steps": 25770, "total_steps": 25870, "loss": 0.1298, "lr": 2.3214958311645885e-09, "epoch": 9.96134518747584, "percentage": 99.61, "elapsed_time": "0:39:35", "remaining_time": "0:00:09", "throughput": 1645.81, "total_tokens": 3909888}
|
| 5174 |
+
{"current_steps": 25775, "total_steps": 25870, "loss": 0.0709, "lr": 2.0973372760912937e-09, "epoch": 9.963277928102048, "percentage": 99.63, "elapsed_time": "0:39:36", "remaining_time": "0:00:08", "throughput": 1645.88, "total_tokens": 3910752}
|
| 5175 |
+
{"current_steps": 25780, "total_steps": 25870, "loss": 0.2867, "lr": 1.8845567080372216e-09, "epoch": 9.965210668728258, "percentage": 99.65, "elapsed_time": "0:39:36", "remaining_time": "0:00:08", "throughput": 1645.91, "total_tokens": 3911488}
|
| 5176 |
+
{"current_steps": 25785, "total_steps": 25870, "loss": 0.0891, "lr": 1.6831542238499032e-09, "epoch": 9.967143409354465, "percentage": 99.67, "elapsed_time": "0:39:36", "remaining_time": "0:00:07", "throughput": 1645.95, "total_tokens": 3912272}
|
| 5177 |
+
{"current_steps": 25790, "total_steps": 25870, "loss": 0.2585, "lr": 1.4931299152004529e-09, "epoch": 9.969076149980673, "percentage": 99.69, "elapsed_time": "0:39:37", "remaining_time": "0:00:07", "throughput": 1645.93, "total_tokens": 3912896}
|
| 5178 |
+
{"current_steps": 25795, "total_steps": 25870, "loss": 0.2991, "lr": 1.314483868580796e-09, "epoch": 9.97100889060688, "percentage": 99.71, "elapsed_time": "0:39:37", "remaining_time": "0:00:06", "throughput": 1645.93, "total_tokens": 3913584}
|
| 5179 |
+
{"current_steps": 25800, "total_steps": 25870, "loss": 0.0834, "lr": 1.1472161653008905e-09, "epoch": 9.972941631233088, "percentage": 99.73, "elapsed_time": "0:39:38", "remaining_time": "0:00:06", "throughput": 1645.94, "total_tokens": 3914288}
|
| 5180 |
+
{"current_steps": 25805, "total_steps": 25870, "loss": 0.1454, "lr": 9.913268814942812e-10, "epoch": 9.974874371859297, "percentage": 99.75, "elapsed_time": "0:39:38", "remaining_time": "0:00:05", "throughput": 1645.98, "total_tokens": 3915072}
|
| 5181 |
+
{"current_steps": 25810, "total_steps": 25870, "loss": 0.2329, "lr": 8.468160881153209e-10, "epoch": 9.976807112485504, "percentage": 99.77, "elapsed_time": "0:39:38", "remaining_time": "0:00:05", "throughput": 1645.98, "total_tokens": 3915760}
|
| 5182 |
+
{"current_steps": 25815, "total_steps": 25870, "loss": 0.1856, "lr": 7.13683850939173e-10, "epoch": 9.978739853111712, "percentage": 99.79, "elapsed_time": "0:39:39", "remaining_time": "0:00:05", "throughput": 1645.97, "total_tokens": 3916432}
|
| 5183 |
+
{"current_steps": 25820, "total_steps": 25870, "loss": 0.0941, "lr": 5.919302305618101e-10, "epoch": 9.980672593737921, "percentage": 99.81, "elapsed_time": "0:39:39", "remaining_time": "0:00:04", "throughput": 1646.0, "total_tokens": 3917184}
|
| 5184 |
+
{"current_steps": 25825, "total_steps": 25870, "loss": 0.2357, "lr": 4.815552824000147e-10, "epoch": 9.982605334364129, "percentage": 99.83, "elapsed_time": "0:39:40", "remaining_time": "0:00:04", "throughput": 1646.04, "total_tokens": 3917952}
|
| 5185 |
+
{"current_steps": 25830, "total_steps": 25870, "loss": 0.1008, "lr": 3.8255905669415394e-10, "epoch": 9.984538074990336, "percentage": 99.85, "elapsed_time": "0:39:40", "remaining_time": "0:00:03", "throughput": 1646.09, "total_tokens": 3918736}
|
| 5186 |
+
{"current_steps": 25835, "total_steps": 25870, "loss": 0.0511, "lr": 2.949415984998538e-10, "epoch": 9.986470815616544, "percentage": 99.86, "elapsed_time": "0:39:41", "remaining_time": "0:00:03", "throughput": 1646.08, "total_tokens": 3919408}
|
| 5187 |
+
{"current_steps": 25840, "total_steps": 25870, "loss": 0.2496, "lr": 2.1870294770187648e-10, "epoch": 9.988403556242751, "percentage": 99.88, "elapsed_time": "0:39:41", "remaining_time": "0:00:02", "throughput": 1646.12, "total_tokens": 3920176}
|
| 5188 |
+
{"current_steps": 25845, "total_steps": 25870, "loss": 0.1108, "lr": 1.5384313899469148e-10, "epoch": 9.99033629686896, "percentage": 99.9, "elapsed_time": "0:39:41", "remaining_time": "0:00:02", "throughput": 1646.1, "total_tokens": 3920832}
|
| 5189 |
+
{"current_steps": 25850, "total_steps": 25870, "loss": 0.1871, "lr": 1.003622019019046e-10, "epoch": 9.992269037495168, "percentage": 99.92, "elapsed_time": "0:39:42", "remaining_time": "0:00:01", "throughput": 1646.16, "total_tokens": 3921648}
|
| 5190 |
+
{"current_steps": 25855, "total_steps": 25870, "loss": 0.1578, "lr": 5.826016076793117e-11, "epoch": 9.994201778121376, "percentage": 99.94, "elapsed_time": "0:39:42", "remaining_time": "0:00:01", "throughput": 1646.18, "total_tokens": 3922368}
|
| 5191 |
+
{"current_steps": 25860, "total_steps": 25870, "loss": 0.2741, "lr": 2.7537034752445067e-11, "epoch": 9.996134518747585, "percentage": 99.96, "elapsed_time": "0:39:43", "remaining_time": "0:00:00", "throughput": 1646.16, "total_tokens": 3923008}
|
| 5192 |
+
{"current_steps": 25865, "total_steps": 25870, "loss": 0.2628, "lr": 8.192837841480837e-12, "epoch": 9.998067259373792, "percentage": 99.98, "elapsed_time": "0:39:43", "remaining_time": "0:00:00", "throughput": 1646.24, "total_tokens": 3923888}
|
| 5193 |
+
{"current_steps": 25870, "total_steps": 25870, "loss": 0.1342, "lr": 2.2757884188262524e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:39:44", "remaining_time": "0:00:00", "throughput": 1646.22, "total_tokens": 3924688}
|
| 5194 |
+
{"current_steps": 25870, "total_steps": 25870, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:39:45", "remaining_time": "0:00:00", "throughput": 1645.21, "total_tokens": 3924688}
|