Training in progress, step 15694
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +166 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1638528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6cdf053bce4a09b556887d86be29e6556aba5cb876d1fde71b6ca47a2d74d3df
|
| 3 |
size 1638528
|
trainer_log.jsonl
CHANGED
|
@@ -2992,3 +2992,169 @@
|
|
| 2992 |
{"current_steps": 14870, "total_steps": 16520, "loss": 0.0017, "lr": 3.0117565848956863e-05, "epoch": 18.002421307506054, "percentage": 90.01, "elapsed_time": "0:49:04", "remaining_time": "0:05:26", "throughput": 2068.6, "total_tokens": 6091120}
|
| 2993 |
{"current_steps": 14875, "total_steps": 16520, "loss": 0.0029, "lr": 2.9937261775058676e-05, "epoch": 18.008474576271187, "percentage": 90.04, "elapsed_time": "0:49:05", "remaining_time": "0:05:25", "throughput": 2068.63, "total_tokens": 6093232}
|
| 2994 |
{"current_steps": 14880, "total_steps": 16520, "loss": 0.0004, "lr": 2.9757482376237488e-05, "epoch": 18.01452784503632, "percentage": 90.07, "elapsed_time": "0:49:06", "remaining_time": "0:05:24", "throughput": 2068.72, "total_tokens": 6095472}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2992 |
{"current_steps": 14870, "total_steps": 16520, "loss": 0.0017, "lr": 3.0117565848956863e-05, "epoch": 18.002421307506054, "percentage": 90.01, "elapsed_time": "0:49:04", "remaining_time": "0:05:26", "throughput": 2068.6, "total_tokens": 6091120}
|
| 2993 |
{"current_steps": 14875, "total_steps": 16520, "loss": 0.0029, "lr": 2.9937261775058676e-05, "epoch": 18.008474576271187, "percentage": 90.04, "elapsed_time": "0:49:05", "remaining_time": "0:05:25", "throughput": 2068.63, "total_tokens": 6093232}
|
| 2994 |
{"current_steps": 14880, "total_steps": 16520, "loss": 0.0004, "lr": 2.9757482376237488e-05, "epoch": 18.01452784503632, "percentage": 90.07, "elapsed_time": "0:49:06", "remaining_time": "0:05:24", "throughput": 2068.72, "total_tokens": 6095472}
|
| 2995 |
+
{"current_steps": 14885, "total_steps": 16520, "loss": 0.0003, "lr": 2.957822785315978e-05, "epoch": 18.020581113801452, "percentage": 90.1, "elapsed_time": "0:49:07", "remaining_time": "0:05:23", "throughput": 2068.75, "total_tokens": 6097456}
|
| 2996 |
+
{"current_steps": 14890, "total_steps": 16520, "loss": 0.0014, "lr": 2.9399498405905778e-05, "epoch": 18.026634382566584, "percentage": 90.13, "elapsed_time": "0:49:08", "remaining_time": "0:05:22", "throughput": 2068.76, "total_tokens": 6099472}
|
| 2997 |
+
{"current_steps": 14895, "total_steps": 16520, "loss": 0.0006, "lr": 2.9221294233970074e-05, "epoch": 18.03268765133172, "percentage": 90.16, "elapsed_time": "0:49:09", "remaining_time": "0:05:21", "throughput": 2068.79, "total_tokens": 6101456}
|
| 2998 |
+
{"current_steps": 14900, "total_steps": 16520, "loss": 0.0009, "lr": 2.9043615536260725e-05, "epoch": 18.038740920096853, "percentage": 90.19, "elapsed_time": "0:49:10", "remaining_time": "0:05:20", "throughput": 2068.81, "total_tokens": 6103472}
|
| 2999 |
+
{"current_steps": 14905, "total_steps": 16520, "loss": 0.0016, "lr": 2.8866462511099313e-05, "epoch": 18.044794188861985, "percentage": 90.22, "elapsed_time": "0:49:11", "remaining_time": "0:05:19", "throughput": 2068.77, "total_tokens": 6105648}
|
| 3000 |
+
{"current_steps": 14910, "total_steps": 16520, "loss": 0.0012, "lr": 2.8689835356220607e-05, "epoch": 18.050847457627118, "percentage": 90.25, "elapsed_time": "0:49:12", "remaining_time": "0:05:18", "throughput": 2068.79, "total_tokens": 6107664}
|
| 3001 |
+
{"current_steps": 14915, "total_steps": 16520, "loss": 0.0023, "lr": 2.8513734268772574e-05, "epoch": 18.05690072639225, "percentage": 90.28, "elapsed_time": "0:49:13", "remaining_time": "0:05:17", "throughput": 2068.8, "total_tokens": 6109648}
|
| 3002 |
+
{"current_steps": 14920, "total_steps": 16520, "loss": 0.0016, "lr": 2.8338159445315758e-05, "epoch": 18.062953995157386, "percentage": 90.31, "elapsed_time": "0:49:14", "remaining_time": "0:05:16", "throughput": 2068.85, "total_tokens": 6111824}
|
| 3003 |
+
{"current_steps": 14925, "total_steps": 16520, "loss": 0.0025, "lr": 2.816311108182368e-05, "epoch": 18.06900726392252, "percentage": 90.35, "elapsed_time": "0:49:15", "remaining_time": "0:05:15", "throughput": 2068.87, "total_tokens": 6113776}
|
| 3004 |
+
{"current_steps": 14930, "total_steps": 16520, "loss": 0.0006, "lr": 2.7988589373681705e-05, "epoch": 18.07506053268765, "percentage": 90.38, "elapsed_time": "0:49:16", "remaining_time": "0:05:14", "throughput": 2068.9, "total_tokens": 6115856}
|
| 3005 |
+
{"current_steps": 14935, "total_steps": 16520, "loss": 0.0029, "lr": 2.7814594515687797e-05, "epoch": 18.081113801452783, "percentage": 90.41, "elapsed_time": "0:49:17", "remaining_time": "0:05:13", "throughput": 2068.94, "total_tokens": 6117968}
|
| 3006 |
+
{"current_steps": 14940, "total_steps": 16520, "loss": 0.0005, "lr": 2.7641126702051766e-05, "epoch": 18.087167070217916, "percentage": 90.44, "elapsed_time": "0:49:17", "remaining_time": "0:05:12", "throughput": 2068.95, "total_tokens": 6119952}
|
| 3007 |
+
{"current_steps": 14945, "total_steps": 16520, "loss": 0.0014, "lr": 2.7468186126394847e-05, "epoch": 18.093220338983052, "percentage": 90.47, "elapsed_time": "0:49:18", "remaining_time": "0:05:11", "throughput": 2068.98, "total_tokens": 6122000}
|
| 3008 |
+
{"current_steps": 14950, "total_steps": 16520, "loss": 0.0091, "lr": 2.729577298175029e-05, "epoch": 18.099273607748184, "percentage": 90.5, "elapsed_time": "0:49:19", "remaining_time": "0:05:10", "throughput": 2069.01, "total_tokens": 6124048}
|
| 3009 |
+
{"current_steps": 14955, "total_steps": 16520, "loss": 0.003, "lr": 2.71238874605621e-05, "epoch": 18.105326876513317, "percentage": 90.53, "elapsed_time": "0:49:20", "remaining_time": "0:05:09", "throughput": 2069.03, "total_tokens": 6125968}
|
| 3010 |
+
{"current_steps": 14960, "total_steps": 16520, "loss": 0.0006, "lr": 2.6952529754685816e-05, "epoch": 18.11138014527845, "percentage": 90.56, "elapsed_time": "0:49:21", "remaining_time": "0:05:08", "throughput": 2069.04, "total_tokens": 6127952}
|
| 3011 |
+
{"current_steps": 14965, "total_steps": 16520, "loss": 0.0019, "lr": 2.6781700055387604e-05, "epoch": 18.11743341404358, "percentage": 90.59, "elapsed_time": "0:49:22", "remaining_time": "0:05:07", "throughput": 2069.06, "total_tokens": 6129968}
|
| 3012 |
+
{"current_steps": 14970, "total_steps": 16520, "loss": 0.0005, "lr": 2.6611398553344268e-05, "epoch": 18.123486682808718, "percentage": 90.62, "elapsed_time": "0:49:23", "remaining_time": "0:05:06", "throughput": 2069.11, "total_tokens": 6132048}
|
| 3013 |
+
{"current_steps": 14975, "total_steps": 16520, "loss": 0.001, "lr": 2.64416254386432e-05, "epoch": 18.12953995157385, "percentage": 90.65, "elapsed_time": "0:49:24", "remaining_time": "0:05:05", "throughput": 2069.14, "total_tokens": 6134096}
|
| 3014 |
+
{"current_steps": 14980, "total_steps": 16520, "loss": 0.0007, "lr": 2.6272380900781824e-05, "epoch": 18.135593220338983, "percentage": 90.68, "elapsed_time": "0:49:25", "remaining_time": "0:05:04", "throughput": 2069.18, "total_tokens": 6136208}
|
| 3015 |
+
{"current_steps": 14985, "total_steps": 16520, "loss": 0.0004, "lr": 2.610366512866763e-05, "epoch": 18.141646489104115, "percentage": 90.71, "elapsed_time": "0:49:26", "remaining_time": "0:05:03", "throughput": 2069.2, "total_tokens": 6138224}
|
| 3016 |
+
{"current_steps": 14990, "total_steps": 16520, "loss": 0.0018, "lr": 2.5935478310618155e-05, "epoch": 18.147699757869248, "percentage": 90.74, "elapsed_time": "0:49:27", "remaining_time": "0:05:02", "throughput": 2069.22, "total_tokens": 6140272}
|
| 3017 |
+
{"current_steps": 14995, "total_steps": 16520, "loss": 0.0015, "lr": 2.5767820634360005e-05, "epoch": 18.153753026634384, "percentage": 90.77, "elapsed_time": "0:49:28", "remaining_time": "0:05:01", "throughput": 2069.3, "total_tokens": 6142512}
|
| 3018 |
+
{"current_steps": 15000, "total_steps": 16520, "loss": 0.0005, "lr": 2.5600692287029825e-05, "epoch": 18.159806295399516, "percentage": 90.8, "elapsed_time": "0:49:29", "remaining_time": "0:05:00", "throughput": 2069.31, "total_tokens": 6144432}
|
| 3019 |
+
{"current_steps": 15005, "total_steps": 16520, "loss": 0.004, "lr": 2.5434093455172892e-05, "epoch": 18.16585956416465, "percentage": 90.83, "elapsed_time": "0:49:30", "remaining_time": "0:04:59", "throughput": 2069.32, "total_tokens": 6146384}
|
| 3020 |
+
{"current_steps": 15010, "total_steps": 16520, "loss": 0.0017, "lr": 2.5268024324743675e-05, "epoch": 18.17191283292978, "percentage": 90.86, "elapsed_time": "0:49:31", "remaining_time": "0:04:58", "throughput": 2069.34, "total_tokens": 6148400}
|
| 3021 |
+
{"current_steps": 15015, "total_steps": 16520, "loss": 0.0004, "lr": 2.5102485081105463e-05, "epoch": 18.177966101694917, "percentage": 90.89, "elapsed_time": "0:49:32", "remaining_time": "0:04:57", "throughput": 2069.36, "total_tokens": 6150352}
|
| 3022 |
+
{"current_steps": 15020, "total_steps": 16520, "loss": 0.0008, "lr": 2.4937475909029895e-05, "epoch": 18.18401937046005, "percentage": 90.92, "elapsed_time": "0:49:33", "remaining_time": "0:04:56", "throughput": 2069.43, "total_tokens": 6152560}
|
| 3023 |
+
{"current_steps": 15025, "total_steps": 16520, "loss": 0.0031, "lr": 2.4772996992697082e-05, "epoch": 18.190072639225182, "percentage": 90.95, "elapsed_time": "0:49:34", "remaining_time": "0:04:55", "throughput": 2069.49, "total_tokens": 6154672}
|
| 3024 |
+
{"current_steps": 15030, "total_steps": 16520, "loss": 0.0005, "lr": 2.460904851569534e-05, "epoch": 18.196125907990314, "percentage": 90.98, "elapsed_time": "0:49:34", "remaining_time": "0:04:54", "throughput": 2069.53, "total_tokens": 6156784}
|
| 3025 |
+
{"current_steps": 15035, "total_steps": 16520, "loss": 0.0012, "lr": 2.4445630661020735e-05, "epoch": 18.202179176755447, "percentage": 91.01, "elapsed_time": "0:49:35", "remaining_time": "0:04:53", "throughput": 2069.6, "total_tokens": 6158992}
|
| 3026 |
+
{"current_steps": 15040, "total_steps": 16520, "loss": 0.0005, "lr": 2.4282743611077186e-05, "epoch": 18.208232445520583, "percentage": 91.04, "elapsed_time": "0:49:36", "remaining_time": "0:04:52", "throughput": 2069.65, "total_tokens": 6160976}
|
| 3027 |
+
{"current_steps": 15045, "total_steps": 16520, "loss": 0.0005, "lr": 2.412038754767626e-05, "epoch": 18.214285714285715, "percentage": 91.07, "elapsed_time": "0:49:37", "remaining_time": "0:04:51", "throughput": 2069.7, "total_tokens": 6163120}
|
| 3028 |
+
{"current_steps": 15050, "total_steps": 16520, "loss": 0.0011, "lr": 2.3958562652036497e-05, "epoch": 18.220338983050848, "percentage": 91.1, "elapsed_time": "0:49:38", "remaining_time": "0:04:50", "throughput": 2069.74, "total_tokens": 6165168}
|
| 3029 |
+
{"current_steps": 15055, "total_steps": 16520, "loss": 0.0033, "lr": 2.379726910478386e-05, "epoch": 18.22639225181598, "percentage": 91.13, "elapsed_time": "0:49:39", "remaining_time": "0:04:49", "throughput": 2069.77, "total_tokens": 6167152}
|
| 3030 |
+
{"current_steps": 15060, "total_steps": 16520, "loss": 0.0006, "lr": 2.3636507085951163e-05, "epoch": 18.232445520581113, "percentage": 91.16, "elapsed_time": "0:49:40", "remaining_time": "0:04:48", "throughput": 2069.8, "total_tokens": 6169136}
|
| 3031 |
+
{"current_steps": 15065, "total_steps": 16520, "loss": 0.001, "lr": 2.347627677497788e-05, "epoch": 18.23849878934625, "percentage": 91.19, "elapsed_time": "0:49:41", "remaining_time": "0:04:47", "throughput": 2069.82, "total_tokens": 6171184}
|
| 3032 |
+
{"current_steps": 15070, "total_steps": 16520, "loss": 0.0012, "lr": 2.3316578350709993e-05, "epoch": 18.24455205811138, "percentage": 91.22, "elapsed_time": "0:49:42", "remaining_time": "0:04:46", "throughput": 2069.85, "total_tokens": 6173168}
|
| 3033 |
+
{"current_steps": 15075, "total_steps": 16520, "loss": 0.0016, "lr": 2.3157411991399924e-05, "epoch": 18.250605326876514, "percentage": 91.25, "elapsed_time": "0:49:43", "remaining_time": "0:04:45", "throughput": 2069.88, "total_tokens": 6175216}
|
| 3034 |
+
{"current_steps": 15080, "total_steps": 16520, "loss": 0.0004, "lr": 2.2998777874705945e-05, "epoch": 18.256658595641646, "percentage": 91.28, "elapsed_time": "0:49:44", "remaining_time": "0:04:44", "throughput": 2069.94, "total_tokens": 6177392}
|
| 3035 |
+
{"current_steps": 15085, "total_steps": 16520, "loss": 0.0103, "lr": 2.284067617769281e-05, "epoch": 18.26271186440678, "percentage": 91.31, "elapsed_time": "0:49:45", "remaining_time": "0:04:43", "throughput": 2069.97, "total_tokens": 6179408}
|
| 3036 |
+
{"current_steps": 15090, "total_steps": 16520, "loss": 0.0014, "lr": 2.268310707683019e-05, "epoch": 18.268765133171915, "percentage": 91.34, "elapsed_time": "0:49:46", "remaining_time": "0:04:42", "throughput": 2070.01, "total_tokens": 6181520}
|
| 3037 |
+
{"current_steps": 15095, "total_steps": 16520, "loss": 0.0009, "lr": 2.2526070747994066e-05, "epoch": 18.274818401937047, "percentage": 91.37, "elapsed_time": "0:49:47", "remaining_time": "0:04:41", "throughput": 2070.05, "total_tokens": 6183536}
|
| 3038 |
+
{"current_steps": 15100, "total_steps": 16520, "loss": 0.0004, "lr": 2.2369567366465172e-05, "epoch": 18.28087167070218, "percentage": 91.4, "elapsed_time": "0:49:48", "remaining_time": "0:04:41", "throughput": 2070.06, "total_tokens": 6185552}
|
| 3039 |
+
{"current_steps": 15105, "total_steps": 16520, "loss": 0.0013, "lr": 2.2213597106929607e-05, "epoch": 18.286924939467312, "percentage": 91.43, "elapsed_time": "0:49:49", "remaining_time": "0:04:40", "throughput": 2070.07, "total_tokens": 6187472}
|
| 3040 |
+
{"current_steps": 15110, "total_steps": 16520, "loss": 0.0012, "lr": 2.2058160143478666e-05, "epoch": 18.292978208232444, "percentage": 91.46, "elapsed_time": "0:49:49", "remaining_time": "0:04:39", "throughput": 2070.11, "total_tokens": 6189520}
|
| 3041 |
+
{"current_steps": 15115, "total_steps": 16520, "loss": 0.0003, "lr": 2.1903256649607738e-05, "epoch": 18.29903147699758, "percentage": 91.5, "elapsed_time": "0:49:50", "remaining_time": "0:04:38", "throughput": 2070.15, "total_tokens": 6191600}
|
| 3042 |
+
{"current_steps": 15120, "total_steps": 16520, "loss": 0.0007, "lr": 2.1748886798217405e-05, "epoch": 18.305084745762713, "percentage": 91.53, "elapsed_time": "0:49:51", "remaining_time": "0:04:37", "throughput": 2070.18, "total_tokens": 6193680}
|
| 3043 |
+
{"current_steps": 15125, "total_steps": 16520, "loss": 0.0009, "lr": 2.1595050761612333e-05, "epoch": 18.311138014527845, "percentage": 91.56, "elapsed_time": "0:49:52", "remaining_time": "0:04:36", "throughput": 2070.25, "total_tokens": 6195888}
|
| 3044 |
+
{"current_steps": 15130, "total_steps": 16520, "loss": 0.0015, "lr": 2.144174871150134e-05, "epoch": 18.317191283292978, "percentage": 91.59, "elapsed_time": "0:49:53", "remaining_time": "0:04:35", "throughput": 2070.28, "total_tokens": 6197968}
|
| 3045 |
+
{"current_steps": 15135, "total_steps": 16520, "loss": 0.0006, "lr": 2.1288980818997272e-05, "epoch": 18.32324455205811, "percentage": 91.62, "elapsed_time": "0:49:54", "remaining_time": "0:04:34", "throughput": 2070.3, "total_tokens": 6199984}
|
| 3046 |
+
{"current_steps": 15140, "total_steps": 16520, "loss": 0.0013, "lr": 2.1136747254616727e-05, "epoch": 18.329297820823246, "percentage": 91.65, "elapsed_time": "0:49:55", "remaining_time": "0:04:33", "throughput": 2070.33, "total_tokens": 6201936}
|
| 3047 |
+
{"current_steps": 15145, "total_steps": 16520, "loss": 0.0017, "lr": 2.0985048188279843e-05, "epoch": 18.33535108958838, "percentage": 91.68, "elapsed_time": "0:49:56", "remaining_time": "0:04:32", "throughput": 2070.36, "total_tokens": 6203952}
|
| 3048 |
+
{"current_steps": 15150, "total_steps": 16520, "loss": 0.0003, "lr": 2.0833883789310448e-05, "epoch": 18.34140435835351, "percentage": 91.71, "elapsed_time": "0:49:57", "remaining_time": "0:04:31", "throughput": 2070.4, "total_tokens": 6205968}
|
| 3049 |
+
{"current_steps": 15155, "total_steps": 16520, "loss": 0.0021, "lr": 2.0683254226435078e-05, "epoch": 18.347457627118644, "percentage": 91.74, "elapsed_time": "0:49:58", "remaining_time": "0:04:30", "throughput": 2070.44, "total_tokens": 6208080}
|
| 3050 |
+
{"current_steps": 15160, "total_steps": 16520, "loss": 0.0031, "lr": 2.053315966778374e-05, "epoch": 18.353510895883776, "percentage": 91.77, "elapsed_time": "0:49:59", "remaining_time": "0:04:29", "throughput": 2070.5, "total_tokens": 6210224}
|
| 3051 |
+
{"current_steps": 15165, "total_steps": 16520, "loss": 0.0007, "lr": 2.038360028088915e-05, "epoch": 18.359564164648912, "percentage": 91.8, "elapsed_time": "0:50:00", "remaining_time": "0:04:28", "throughput": 2070.51, "total_tokens": 6212208}
|
| 3052 |
+
{"current_steps": 15170, "total_steps": 16520, "loss": 0.0008, "lr": 2.0234576232686597e-05, "epoch": 18.365617433414045, "percentage": 91.83, "elapsed_time": "0:50:01", "remaining_time": "0:04:27", "throughput": 2070.53, "total_tokens": 6214192}
|
| 3053 |
+
{"current_steps": 15175, "total_steps": 16520, "loss": 0.0012, "lr": 2.0086087689513864e-05, "epoch": 18.371670702179177, "percentage": 91.86, "elapsed_time": "0:50:02", "remaining_time": "0:04:26", "throughput": 2070.57, "total_tokens": 6216144}
|
| 3054 |
+
{"current_steps": 15180, "total_steps": 16520, "loss": 0.0029, "lr": 1.9938134817111096e-05, "epoch": 18.37772397094431, "percentage": 91.89, "elapsed_time": "0:50:03", "remaining_time": "0:04:25", "throughput": 2070.63, "total_tokens": 6218256}
|
| 3055 |
+
{"current_steps": 15185, "total_steps": 16520, "loss": 0.0005, "lr": 1.9790717780620414e-05, "epoch": 18.383777239709442, "percentage": 91.92, "elapsed_time": "0:50:04", "remaining_time": "0:04:24", "throughput": 2070.66, "total_tokens": 6220304}
|
| 3056 |
+
{"current_steps": 15190, "total_steps": 16520, "loss": 0.0024, "lr": 1.964383674458592e-05, "epoch": 18.389830508474578, "percentage": 91.95, "elapsed_time": "0:50:04", "remaining_time": "0:04:23", "throughput": 2070.71, "total_tokens": 6222384}
|
| 3057 |
+
{"current_steps": 15195, "total_steps": 16520, "loss": 0.004, "lr": 1.9497491872953466e-05, "epoch": 18.39588377723971, "percentage": 91.98, "elapsed_time": "0:50:05", "remaining_time": "0:04:22", "throughput": 2070.73, "total_tokens": 6224208}
|
| 3058 |
+
{"current_steps": 15200, "total_steps": 16520, "loss": 0.0012, "lr": 1.9351683329070335e-05, "epoch": 18.401937046004843, "percentage": 92.01, "elapsed_time": "0:50:06", "remaining_time": "0:04:21", "throughput": 2070.75, "total_tokens": 6226256}
|
| 3059 |
+
{"current_steps": 15205, "total_steps": 16520, "loss": 0.0008, "lr": 1.92064112756854e-05, "epoch": 18.407990314769975, "percentage": 92.04, "elapsed_time": "0:50:07", "remaining_time": "0:04:20", "throughput": 2070.8, "total_tokens": 6228400}
|
| 3060 |
+
{"current_steps": 15210, "total_steps": 16520, "loss": 0.0019, "lr": 1.9061675874948283e-05, "epoch": 18.414043583535108, "percentage": 92.07, "elapsed_time": "0:50:08", "remaining_time": "0:04:19", "throughput": 2070.87, "total_tokens": 6230576}
|
| 3061 |
+
{"current_steps": 15215, "total_steps": 16520, "loss": 0.0013, "lr": 1.8917477288410257e-05, "epoch": 18.420096852300244, "percentage": 92.1, "elapsed_time": "0:50:09", "remaining_time": "0:04:18", "throughput": 2070.91, "total_tokens": 6232688}
|
| 3062 |
+
{"current_steps": 15220, "total_steps": 16520, "loss": 0.0005, "lr": 1.877381567702269e-05, "epoch": 18.426150121065376, "percentage": 92.13, "elapsed_time": "0:50:10", "remaining_time": "0:04:17", "throughput": 2070.93, "total_tokens": 6234704}
|
| 3063 |
+
{"current_steps": 15225, "total_steps": 16520, "loss": 0.0004, "lr": 1.8630691201138137e-05, "epoch": 18.43220338983051, "percentage": 92.16, "elapsed_time": "0:50:11", "remaining_time": "0:04:16", "throughput": 2070.97, "total_tokens": 6236752}
|
| 3064 |
+
{"current_steps": 15230, "total_steps": 16520, "loss": 0.001, "lr": 1.848810402050932e-05, "epoch": 18.43825665859564, "percentage": 92.19, "elapsed_time": "0:50:12", "remaining_time": "0:04:15", "throughput": 2071.01, "total_tokens": 6238832}
|
| 3065 |
+
{"current_steps": 15235, "total_steps": 16520, "loss": 0.0008, "lr": 1.834605429428943e-05, "epoch": 18.444309927360774, "percentage": 92.22, "elapsed_time": "0:50:13", "remaining_time": "0:04:14", "throughput": 2071.06, "total_tokens": 6240976}
|
| 3066 |
+
{"current_steps": 15240, "total_steps": 16520, "loss": 0.0006, "lr": 1.8204542181031568e-05, "epoch": 18.45036319612591, "percentage": 92.25, "elapsed_time": "0:50:14", "remaining_time": "0:04:13", "throughput": 2071.08, "total_tokens": 6243024}
|
| 3067 |
+
{"current_steps": 15245, "total_steps": 16520, "loss": 0.0006, "lr": 1.8063567838688955e-05, "epoch": 18.456416464891042, "percentage": 92.28, "elapsed_time": "0:50:15", "remaining_time": "0:04:12", "throughput": 2071.15, "total_tokens": 6245232}
|
| 3068 |
+
{"current_steps": 15250, "total_steps": 16520, "loss": 0.0004, "lr": 1.7923131424614326e-05, "epoch": 18.462469733656174, "percentage": 92.31, "elapsed_time": "0:50:16", "remaining_time": "0:04:11", "throughput": 2071.2, "total_tokens": 6247280}
|
| 3069 |
+
{"current_steps": 15255, "total_steps": 16520, "loss": 0.0007, "lr": 1.7783233095560346e-05, "epoch": 18.468523002421307, "percentage": 92.34, "elapsed_time": "0:50:17", "remaining_time": "0:04:10", "throughput": 2071.21, "total_tokens": 6249264}
|
| 3070 |
+
{"current_steps": 15260, "total_steps": 16520, "loss": 0.0015, "lr": 1.7643873007678667e-05, "epoch": 18.47457627118644, "percentage": 92.37, "elapsed_time": "0:50:18", "remaining_time": "0:04:09", "throughput": 2071.28, "total_tokens": 6251504}
|
| 3071 |
+
{"current_steps": 15265, "total_steps": 16520, "loss": 0.0016, "lr": 1.7505051316520358e-05, "epoch": 18.480629539951575, "percentage": 92.4, "elapsed_time": "0:50:19", "remaining_time": "0:04:08", "throughput": 2071.32, "total_tokens": 6253616}
|
| 3072 |
+
{"current_steps": 15270, "total_steps": 16520, "loss": 0.0006, "lr": 1.736676817703581e-05, "epoch": 18.486682808716708, "percentage": 92.43, "elapsed_time": "0:50:20", "remaining_time": "0:04:07", "throughput": 2071.37, "total_tokens": 6255728}
|
| 3073 |
+
{"current_steps": 15275, "total_steps": 16520, "loss": 0.0015, "lr": 1.7229023743573656e-05, "epoch": 18.49273607748184, "percentage": 92.46, "elapsed_time": "0:50:21", "remaining_time": "0:04:06", "throughput": 2071.41, "total_tokens": 6257840}
|
| 3074 |
+
{"current_steps": 15280, "total_steps": 16520, "loss": 0.0016, "lr": 1.7091818169881924e-05, "epoch": 18.498789346246973, "percentage": 92.49, "elapsed_time": "0:50:22", "remaining_time": "0:04:05", "throughput": 2071.46, "total_tokens": 6259952}
|
| 3075 |
+
{"current_steps": 15285, "total_steps": 16520, "loss": 0.0007, "lr": 1.695515160910671e-05, "epoch": 18.504842615012105, "percentage": 92.52, "elapsed_time": "0:50:22", "remaining_time": "0:04:04", "throughput": 2071.52, "total_tokens": 6262160}
|
| 3076 |
+
{"current_steps": 15290, "total_steps": 16520, "loss": 0.0005, "lr": 1.6819024213792666e-05, "epoch": 18.51089588377724, "percentage": 92.55, "elapsed_time": "0:50:23", "remaining_time": "0:04:03", "throughput": 2071.6, "total_tokens": 6264368}
|
| 3077 |
+
{"current_steps": 15295, "total_steps": 16520, "loss": 0.0014, "lr": 1.668343613588258e-05, "epoch": 18.516949152542374, "percentage": 92.58, "elapsed_time": "0:50:24", "remaining_time": "0:04:02", "throughput": 2071.61, "total_tokens": 6266384}
|
| 3078 |
+
{"current_steps": 15300, "total_steps": 16520, "loss": 0.0005, "lr": 1.6548387526717277e-05, "epoch": 18.523002421307506, "percentage": 92.62, "elapsed_time": "0:50:25", "remaining_time": "0:04:01", "throughput": 2071.63, "total_tokens": 6268336}
|
| 3079 |
+
{"current_steps": 15305, "total_steps": 16520, "loss": 0.0016, "lr": 1.6413878537035408e-05, "epoch": 18.52905569007264, "percentage": 92.65, "elapsed_time": "0:50:26", "remaining_time": "0:04:00", "throughput": 2071.66, "total_tokens": 6270384}
|
| 3080 |
+
{"current_steps": 15310, "total_steps": 16520, "loss": 0.0065, "lr": 1.627990931697354e-05, "epoch": 18.53510895883777, "percentage": 92.68, "elapsed_time": "0:50:27", "remaining_time": "0:03:59", "throughput": 2071.66, "total_tokens": 6272240}
|
| 3081 |
+
{"current_steps": 15315, "total_steps": 16520, "loss": 0.003, "lr": 1.6146480016065278e-05, "epoch": 18.541162227602907, "percentage": 92.71, "elapsed_time": "0:50:28", "remaining_time": "0:03:58", "throughput": 2071.69, "total_tokens": 6274224}
|
| 3082 |
+
{"current_steps": 15320, "total_steps": 16520, "loss": 0.0018, "lr": 1.6013590783242104e-05, "epoch": 18.54721549636804, "percentage": 92.74, "elapsed_time": "0:50:29", "remaining_time": "0:03:57", "throughput": 2071.7, "total_tokens": 6276240}
|
| 3083 |
+
{"current_steps": 15325, "total_steps": 16520, "loss": 0.0019, "lr": 1.5881241766832312e-05, "epoch": 18.553268765133172, "percentage": 92.77, "elapsed_time": "0:50:30", "remaining_time": "0:03:56", "throughput": 2071.72, "total_tokens": 6278256}
|
| 3084 |
+
{"current_steps": 15330, "total_steps": 16520, "loss": 0.0023, "lr": 1.5749433114561395e-05, "epoch": 18.559322033898304, "percentage": 92.8, "elapsed_time": "0:50:31", "remaining_time": "0:03:55", "throughput": 2071.75, "total_tokens": 6280336}
|
| 3085 |
+
{"current_steps": 15335, "total_steps": 16520, "loss": 0.0032, "lr": 1.5618164973551674e-05, "epoch": 18.565375302663437, "percentage": 92.83, "elapsed_time": "0:50:32", "remaining_time": "0:03:54", "throughput": 2071.79, "total_tokens": 6282288}
|
| 3086 |
+
{"current_steps": 15340, "total_steps": 16520, "loss": 0.0011, "lr": 1.5487437490322163e-05, "epoch": 18.571428571428573, "percentage": 92.86, "elapsed_time": "0:50:33", "remaining_time": "0:03:53", "throughput": 2071.85, "total_tokens": 6284464}
|
| 3087 |
+
{"current_steps": 15345, "total_steps": 16520, "loss": 0.0205, "lr": 1.5357250810788315e-05, "epoch": 18.577481840193705, "percentage": 92.89, "elapsed_time": "0:50:34", "remaining_time": "0:03:52", "throughput": 2071.9, "total_tokens": 6286544}
|
| 3088 |
+
{"current_steps": 15350, "total_steps": 16520, "loss": 0.0012, "lr": 1.5227605080262108e-05, "epoch": 18.583535108958838, "percentage": 92.92, "elapsed_time": "0:50:35", "remaining_time": "0:03:51", "throughput": 2071.92, "total_tokens": 6288432}
|
| 3089 |
+
{"current_steps": 15355, "total_steps": 16520, "loss": 0.0007, "lr": 1.5098500443451568e-05, "epoch": 18.58958837772397, "percentage": 92.95, "elapsed_time": "0:50:36", "remaining_time": "0:03:50", "throughput": 2071.99, "total_tokens": 6290672}
|
| 3090 |
+
{"current_steps": 15360, "total_steps": 16520, "loss": 0.0003, "lr": 1.4969937044460868e-05, "epoch": 18.595641646489103, "percentage": 92.98, "elapsed_time": "0:50:36", "remaining_time": "0:03:49", "throughput": 2072.01, "total_tokens": 6292688}
|
| 3091 |
+
{"current_steps": 15365, "total_steps": 16520, "loss": 0.0007, "lr": 1.4841915026790053e-05, "epoch": 18.60169491525424, "percentage": 93.01, "elapsed_time": "0:50:37", "remaining_time": "0:03:48", "throughput": 2072.02, "total_tokens": 6294576}
|
| 3092 |
+
{"current_steps": 15370, "total_steps": 16520, "loss": 0.0003, "lr": 1.471443453333471e-05, "epoch": 18.60774818401937, "percentage": 93.04, "elapsed_time": "0:50:38", "remaining_time": "0:03:47", "throughput": 2072.06, "total_tokens": 6296592}
|
| 3093 |
+
{"current_steps": 15375, "total_steps": 16520, "loss": 0.002, "lr": 1.45874957063864e-05, "epoch": 18.613801452784504, "percentage": 93.07, "elapsed_time": "0:50:39", "remaining_time": "0:03:46", "throughput": 2072.07, "total_tokens": 6298512}
|
| 3094 |
+
{"current_steps": 15380, "total_steps": 16520, "loss": 0.0424, "lr": 1.4461098687631623e-05, "epoch": 18.619854721549636, "percentage": 93.1, "elapsed_time": "0:50:40", "remaining_time": "0:03:45", "throughput": 2072.1, "total_tokens": 6300528}
|
| 3095 |
+
{"current_steps": 15385, "total_steps": 16520, "loss": 0.001, "lr": 1.433524361815236e-05, "epoch": 18.62590799031477, "percentage": 93.13, "elapsed_time": "0:50:41", "remaining_time": "0:03:44", "throughput": 2072.13, "total_tokens": 6302480}
|
| 3096 |
+
{"current_steps": 15390, "total_steps": 16520, "loss": 0.0016, "lr": 1.4209930638425739e-05, "epoch": 18.631961259079905, "percentage": 93.16, "elapsed_time": "0:50:42", "remaining_time": "0:03:43", "throughput": 2072.16, "total_tokens": 6304400}
|
| 3097 |
+
{"current_steps": 15395, "total_steps": 16520, "loss": 0.0255, "lr": 1.4085159888323596e-05, "epoch": 18.638014527845037, "percentage": 93.19, "elapsed_time": "0:50:43", "remaining_time": "0:03:42", "throughput": 2072.2, "total_tokens": 6306448}
|
| 3098 |
+
{"current_steps": 15400, "total_steps": 16520, "loss": 0.0007, "lr": 1.396093150711275e-05, "epoch": 18.64406779661017, "percentage": 93.22, "elapsed_time": "0:50:44", "remaining_time": "0:03:41", "throughput": 2072.22, "total_tokens": 6308464}
|
| 3099 |
+
{"current_steps": 15405, "total_steps": 16520, "loss": 0.0006, "lr": 1.383724563345451e-05, "epoch": 18.650121065375302, "percentage": 93.25, "elapsed_time": "0:50:45", "remaining_time": "0:03:40", "throughput": 2072.27, "total_tokens": 6310544}
|
| 3100 |
+
{"current_steps": 15410, "total_steps": 16520, "loss": 0.0007, "lr": 1.371410240540466e-05, "epoch": 18.656174334140434, "percentage": 93.28, "elapsed_time": "0:50:46", "remaining_time": "0:03:39", "throughput": 2072.31, "total_tokens": 6312592}
|
| 3101 |
+
{"current_steps": 15415, "total_steps": 16520, "loss": 0.0004, "lr": 1.3591501960413421e-05, "epoch": 18.66222760290557, "percentage": 93.31, "elapsed_time": "0:50:47", "remaining_time": "0:03:38", "throughput": 2072.32, "total_tokens": 6314576}
|
| 3102 |
+
{"current_steps": 15420, "total_steps": 16520, "loss": 0.0005, "lr": 1.3469444435325052e-05, "epoch": 18.668280871670703, "percentage": 93.34, "elapsed_time": "0:50:48", "remaining_time": "0:03:37", "throughput": 2072.37, "total_tokens": 6316688}
|
| 3103 |
+
{"current_steps": 15425, "total_steps": 16520, "loss": 0.0009, "lr": 1.3347929966377737e-05, "epoch": 18.674334140435835, "percentage": 93.37, "elapsed_time": "0:50:48", "remaining_time": "0:03:36", "throughput": 2072.38, "total_tokens": 6318544}
|
| 3104 |
+
{"current_steps": 15430, "total_steps": 16520, "loss": 0.0014, "lr": 1.322695868920376e-05, "epoch": 18.680387409200968, "percentage": 93.4, "elapsed_time": "0:50:49", "remaining_time": "0:03:35", "throughput": 2072.46, "total_tokens": 6320816}
|
| 3105 |
+
{"current_steps": 15435, "total_steps": 16520, "loss": 0.0003, "lr": 1.3106530738828836e-05, "epoch": 18.6864406779661, "percentage": 93.43, "elapsed_time": "0:50:50", "remaining_time": "0:03:34", "throughput": 2072.49, "total_tokens": 6322864}
|
| 3106 |
+
{"current_steps": 15440, "total_steps": 16520, "loss": 0.0004, "lr": 1.2986646249672385e-05, "epoch": 18.692493946731236, "percentage": 93.46, "elapsed_time": "0:50:51", "remaining_time": "0:03:33", "throughput": 2072.53, "total_tokens": 6324944}
|
| 3107 |
+
{"current_steps": 15445, "total_steps": 16520, "loss": 0.0009, "lr": 1.2867305355547255e-05, "epoch": 18.69854721549637, "percentage": 93.49, "elapsed_time": "0:50:52", "remaining_time": "0:03:32", "throughput": 2072.57, "total_tokens": 6327056}
|
| 3108 |
+
{"current_steps": 15450, "total_steps": 16520, "loss": 0.0013, "lr": 1.2748508189659447e-05, "epoch": 18.7046004842615, "percentage": 93.52, "elapsed_time": "0:50:53", "remaining_time": "0:03:31", "throughput": 2072.61, "total_tokens": 6329136}
|
| 3109 |
+
{"current_steps": 15455, "total_steps": 16520, "loss": 0.0007, "lr": 1.2630254884608006e-05, "epoch": 18.710653753026634, "percentage": 93.55, "elapsed_time": "0:50:54", "remaining_time": "0:03:30", "throughput": 2072.6, "total_tokens": 6331088}
|
| 3110 |
+
{"current_steps": 15460, "total_steps": 16520, "loss": 0.0019, "lr": 1.2512545572385125e-05, "epoch": 18.716707021791766, "percentage": 93.58, "elapsed_time": "0:50:55", "remaining_time": "0:03:29", "throughput": 2072.62, "total_tokens": 6333040}
|
| 3111 |
+
{"current_steps": 15465, "total_steps": 16520, "loss": 0.0005, "lr": 1.2395380384375654e-05, "epoch": 18.722760290556902, "percentage": 93.61, "elapsed_time": "0:50:56", "remaining_time": "0:03:28", "throughput": 2072.65, "total_tokens": 6335088}
|
| 3112 |
+
{"current_steps": 15470, "total_steps": 16520, "loss": 0.0021, "lr": 1.2278759451357203e-05, "epoch": 18.728813559322035, "percentage": 93.64, "elapsed_time": "0:50:57", "remaining_time": "0:03:27", "throughput": 2072.71, "total_tokens": 6337200}
|
| 3113 |
+
{"current_steps": 15475, "total_steps": 16520, "loss": 0.0017, "lr": 1.21626829034997e-05, "epoch": 18.734866828087167, "percentage": 93.67, "elapsed_time": "0:50:58", "remaining_time": "0:03:26", "throughput": 2072.73, "total_tokens": 6339248}
|
| 3114 |
+
{"current_steps": 15480, "total_steps": 16520, "loss": 0.0008, "lr": 1.204715087036573e-05, "epoch": 18.7409200968523, "percentage": 93.7, "elapsed_time": "0:50:59", "remaining_time": "0:03:25", "throughput": 2072.79, "total_tokens": 6341424}
|
| 3115 |
+
{"current_steps": 15485, "total_steps": 16520, "loss": 0.0016, "lr": 1.1932163480909864e-05, "epoch": 18.746973365617432, "percentage": 93.73, "elapsed_time": "0:51:00", "remaining_time": "0:03:24", "throughput": 2072.82, "total_tokens": 6343408}
|
| 3116 |
+
{"current_steps": 15490, "total_steps": 16520, "loss": 0.0007, "lr": 1.181772086347882e-05, "epoch": 18.753026634382568, "percentage": 93.77, "elapsed_time": "0:51:01", "remaining_time": "0:03:23", "throughput": 2072.84, "total_tokens": 6345456}
|
| 3117 |
+
{"current_steps": 15495, "total_steps": 16520, "loss": 0.001, "lr": 1.170382314581142e-05, "epoch": 18.7590799031477, "percentage": 93.8, "elapsed_time": "0:51:02", "remaining_time": "0:03:22", "throughput": 2072.86, "total_tokens": 6347408}
|
| 3118 |
+
{"current_steps": 15500, "total_steps": 16520, "loss": 0.0006, "lr": 1.1590470455037861e-05, "epoch": 18.765133171912833, "percentage": 93.83, "elapsed_time": "0:51:03", "remaining_time": "0:03:21", "throughput": 2072.87, "total_tokens": 6349392}
|
| 3119 |
+
{"current_steps": 15505, "total_steps": 16520, "loss": 0.0011, "lr": 1.1477662917680487e-05, "epoch": 18.771186440677965, "percentage": 93.86, "elapsed_time": "0:51:04", "remaining_time": "0:03:20", "throughput": 2072.88, "total_tokens": 6351408}
|
| 3120 |
+
{"current_steps": 15510, "total_steps": 16520, "loss": 0.0005, "lr": 1.1365400659652803e-05, "epoch": 18.777239709443098, "percentage": 93.89, "elapsed_time": "0:51:04", "remaining_time": "0:03:19", "throughput": 2072.91, "total_tokens": 6353456}
|
| 3121 |
+
{"current_steps": 15515, "total_steps": 16520, "loss": 0.0008, "lr": 1.1253683806259907e-05, "epoch": 18.783292978208234, "percentage": 93.92, "elapsed_time": "0:51:05", "remaining_time": "0:03:18", "throughput": 2072.93, "total_tokens": 6355440}
|
| 3122 |
+
{"current_steps": 15520, "total_steps": 16520, "loss": 0.0005, "lr": 1.1142512482197886e-05, "epoch": 18.789346246973366, "percentage": 93.95, "elapsed_time": "0:51:06", "remaining_time": "0:03:17", "throughput": 2072.96, "total_tokens": 6357488}
|
| 3123 |
+
{"current_steps": 15525, "total_steps": 16520, "loss": 0.001, "lr": 1.1031886811554203e-05, "epoch": 18.7953995157385, "percentage": 93.98, "elapsed_time": "0:51:07", "remaining_time": "0:03:16", "throughput": 2072.98, "total_tokens": 6359472}
|
| 3124 |
+
{"current_steps": 15530, "total_steps": 16520, "loss": 0.0009, "lr": 1.0921806917806976e-05, "epoch": 18.80145278450363, "percentage": 94.01, "elapsed_time": "0:51:08", "remaining_time": "0:03:15", "throughput": 2073.02, "total_tokens": 6361520}
|
| 3125 |
+
{"current_steps": 15535, "total_steps": 16520, "loss": 0.0006, "lr": 1.0812272923825417e-05, "epoch": 18.807506053268764, "percentage": 94.04, "elapsed_time": "0:51:09", "remaining_time": "0:03:14", "throughput": 2073.08, "total_tokens": 6363728}
|
| 3126 |
+
{"current_steps": 15540, "total_steps": 16520, "loss": 0.0014, "lr": 1.0703284951869174e-05, "epoch": 18.8135593220339, "percentage": 94.07, "elapsed_time": "0:51:10", "remaining_time": "0:03:13", "throughput": 2073.11, "total_tokens": 6365776}
|
| 3127 |
+
{"current_steps": 15545, "total_steps": 16520, "loss": 0.0009, "lr": 1.059484312358866e-05, "epoch": 18.819612590799032, "percentage": 94.1, "elapsed_time": "0:51:11", "remaining_time": "0:03:12", "throughput": 2073.11, "total_tokens": 6367568}
|
| 3128 |
+
{"current_steps": 15550, "total_steps": 16520, "loss": 0.0011, "lr": 1.0486947560024496e-05, "epoch": 18.825665859564165, "percentage": 94.13, "elapsed_time": "0:51:12", "remaining_time": "0:03:11", "throughput": 2073.16, "total_tokens": 6369680}
|
| 3129 |
+
{"current_steps": 15555, "total_steps": 16520, "loss": 0.0037, "lr": 1.037959838160768e-05, "epoch": 18.831719128329297, "percentage": 94.16, "elapsed_time": "0:51:13", "remaining_time": "0:03:10", "throughput": 2073.18, "total_tokens": 6371728}
|
| 3130 |
+
{"current_steps": 15560, "total_steps": 16520, "loss": 0.0006, "lr": 1.0272795708159311e-05, "epoch": 18.83777239709443, "percentage": 94.19, "elapsed_time": "0:51:14", "remaining_time": "0:03:09", "throughput": 2073.23, "total_tokens": 6373840}
|
| 3131 |
+
{"current_steps": 15565, "total_steps": 16520, "loss": 0.002, "lr": 1.0166539658890472e-05, "epoch": 18.843825665859566, "percentage": 94.22, "elapsed_time": "0:51:15", "remaining_time": "0:03:08", "throughput": 2073.25, "total_tokens": 6375888}
|
| 3132 |
+
{"current_steps": 15570, "total_steps": 16520, "loss": 0.0008, "lr": 1.0060830352402183e-05, "epoch": 18.849878934624698, "percentage": 94.25, "elapsed_time": "0:51:16", "remaining_time": "0:03:07", "throughput": 2073.3, "total_tokens": 6378064}
|
| 3133 |
+
{"current_steps": 15575, "total_steps": 16520, "loss": 0.0005, "lr": 9.955667906685229e-06, "epoch": 18.85593220338983, "percentage": 94.28, "elapsed_time": "0:51:17", "remaining_time": "0:03:06", "throughput": 2073.32, "total_tokens": 6380080}
|
| 3134 |
+
{"current_steps": 15580, "total_steps": 16520, "loss": 0.0004, "lr": 9.851052439119823e-06, "epoch": 18.861985472154963, "percentage": 94.31, "elapsed_time": "0:51:18", "remaining_time": "0:03:05", "throughput": 2073.34, "total_tokens": 6382096}
|
| 3135 |
+
{"current_steps": 15585, "total_steps": 16520, "loss": 0.0008, "lr": 9.746984066475728e-06, "epoch": 18.868038740920095, "percentage": 94.34, "elapsed_time": "0:51:19", "remaining_time": "0:03:04", "throughput": 2073.37, "total_tokens": 6384048}
|
| 3136 |
+
{"current_steps": 15590, "total_steps": 16520, "loss": 0.0008, "lr": 9.643462904912248e-06, "epoch": 18.87409200968523, "percentage": 94.37, "elapsed_time": "0:51:20", "remaining_time": "0:03:03", "throughput": 2073.41, "total_tokens": 6386160}
|
| 3137 |
+
{"current_steps": 15595, "total_steps": 16520, "loss": 0.0006, "lr": 9.54048906997751e-06, "epoch": 18.880145278450364, "percentage": 94.4, "elapsed_time": "0:51:21", "remaining_time": "0:03:02", "throughput": 2073.47, "total_tokens": 6388368}
|
| 3138 |
+
{"current_steps": 15600, "total_steps": 16520, "loss": 0.0006, "lr": 9.438062676609127e-06, "epoch": 18.886198547215496, "percentage": 94.43, "elapsed_time": "0:51:21", "remaining_time": "0:03:01", "throughput": 2073.5, "total_tokens": 6390352}
|
| 3139 |
+
{"current_steps": 15605, "total_steps": 16520, "loss": 0.0008, "lr": 9.336183839133427e-06, "epoch": 18.89225181598063, "percentage": 94.46, "elapsed_time": "0:51:22", "remaining_time": "0:03:00", "throughput": 2073.52, "total_tokens": 6392400}
|
| 3140 |
+
{"current_steps": 15610, "total_steps": 16520, "loss": 0.0006, "lr": 9.234852671265614e-06, "epoch": 18.89830508474576, "percentage": 94.49, "elapsed_time": "0:51:23", "remaining_time": "0:02:59", "throughput": 2073.59, "total_tokens": 6394608}
|
| 3141 |
+
{"current_steps": 15615, "total_steps": 16520, "loss": 0.0017, "lr": 9.134069286109603e-06, "epoch": 18.904358353510897, "percentage": 94.52, "elapsed_time": "0:51:24", "remaining_time": "0:02:58", "throughput": 2073.62, "total_tokens": 6396624}
|
| 3142 |
+
{"current_steps": 15620, "total_steps": 16520, "loss": 0.0008, "lr": 9.033833796157909e-06, "epoch": 18.91041162227603, "percentage": 94.55, "elapsed_time": "0:51:25", "remaining_time": "0:02:57", "throughput": 2073.69, "total_tokens": 6398768}
|
| 3143 |
+
{"current_steps": 15625, "total_steps": 16520, "loss": 0.0013, "lr": 8.934146313291425e-06, "epoch": 18.916464891041162, "percentage": 94.58, "elapsed_time": "0:51:26", "remaining_time": "0:02:56", "throughput": 2073.7, "total_tokens": 6400688}
|
| 3144 |
+
{"current_steps": 15630, "total_steps": 16520, "loss": 0.0004, "lr": 8.835006948779589e-06, "epoch": 18.922518159806295, "percentage": 94.61, "elapsed_time": "0:51:27", "remaining_time": "0:02:55", "throughput": 2073.71, "total_tokens": 6402672}
|
| 3145 |
+
{"current_steps": 15635, "total_steps": 16520, "loss": 0.0005, "lr": 8.736415813279609e-06, "epoch": 18.928571428571427, "percentage": 94.64, "elapsed_time": "0:51:28", "remaining_time": "0:02:54", "throughput": 2073.76, "total_tokens": 6404752}
|
| 3146 |
+
{"current_steps": 15640, "total_steps": 16520, "loss": 0.001, "lr": 8.638373016837343e-06, "epoch": 18.934624697336563, "percentage": 94.67, "elapsed_time": "0:51:29", "remaining_time": "0:02:53", "throughput": 2073.78, "total_tokens": 6406800}
|
| 3147 |
+
{"current_steps": 15645, "total_steps": 16520, "loss": 0.0014, "lr": 8.540878668885977e-06, "epoch": 18.940677966101696, "percentage": 94.7, "elapsed_time": "0:51:30", "remaining_time": "0:02:52", "throughput": 2073.81, "total_tokens": 6408880}
|
| 3148 |
+
{"current_steps": 15650, "total_steps": 16520, "loss": 0.0006, "lr": 8.443932878247017e-06, "epoch": 18.946731234866828, "percentage": 94.73, "elapsed_time": "0:51:31", "remaining_time": "0:02:51", "throughput": 2073.85, "total_tokens": 6410992}
|
| 3149 |
+
{"current_steps": 15655, "total_steps": 16520, "loss": 0.0006, "lr": 8.347535753129575e-06, "epoch": 18.95278450363196, "percentage": 94.76, "elapsed_time": "0:51:32", "remaining_time": "0:02:50", "throughput": 2073.9, "total_tokens": 6413072}
|
| 3150 |
+
{"current_steps": 15660, "total_steps": 16520, "loss": 0.0018, "lr": 8.251687401130137e-06, "epoch": 18.958837772397093, "percentage": 94.79, "elapsed_time": "0:51:33", "remaining_time": "0:02:49", "throughput": 2073.98, "total_tokens": 6415344}
|
| 3151 |
+
{"current_steps": 15665, "total_steps": 16520, "loss": 0.0004, "lr": 8.156387929232844e-06, "epoch": 18.96489104116223, "percentage": 94.82, "elapsed_time": "0:51:34", "remaining_time": "0:02:48", "throughput": 2074.0, "total_tokens": 6417392}
|
| 3152 |
+
{"current_steps": 15670, "total_steps": 16520, "loss": 0.0004, "lr": 8.061637443809222e-06, "epoch": 18.97094430992736, "percentage": 94.85, "elapsed_time": "0:51:35", "remaining_time": "0:02:47", "throughput": 2074.05, "total_tokens": 6419536}
|
| 3153 |
+
{"current_steps": 15675, "total_steps": 16520, "loss": 0.002, "lr": 7.967436050617894e-06, "epoch": 18.976997578692494, "percentage": 94.88, "elapsed_time": "0:51:36", "remaining_time": "0:02:46", "throughput": 2074.1, "total_tokens": 6421616}
|
| 3154 |
+
{"current_steps": 15680, "total_steps": 16520, "loss": 0.0021, "lr": 7.873783854804694e-06, "epoch": 18.983050847457626, "percentage": 94.92, "elapsed_time": "0:51:37", "remaining_time": "0:02:45", "throughput": 2074.18, "total_tokens": 6423792}
|
| 3155 |
+
{"current_steps": 15685, "total_steps": 16520, "loss": 0.001, "lr": 7.780680960902397e-06, "epoch": 18.98910411622276, "percentage": 94.95, "elapsed_time": "0:51:37", "remaining_time": "0:02:44", "throughput": 2074.23, "total_tokens": 6425936}
|
| 3156 |
+
{"current_steps": 15690, "total_steps": 16520, "loss": 0.0013, "lr": 7.688127472830653e-06, "epoch": 18.995157384987895, "percentage": 94.98, "elapsed_time": "0:51:38", "remaining_time": "0:02:43", "throughput": 2074.3, "total_tokens": 6428080}
|
| 3157 |
+
{"current_steps": 15694, "total_steps": 16520, "eval_loss": 0.37710338830947876, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:51:47", "remaining_time": "0:02:43", "throughput": 2069.06, "total_tokens": 6429416}
|
| 3158 |
+
{"current_steps": 15695, "total_steps": 16520, "loss": 0.0011, "lr": 7.59612349389599e-06, "epoch": 19.001210653753027, "percentage": 95.01, "elapsed_time": "0:51:48", "remaining_time": "0:02:43", "throughput": 2068.41, "total_tokens": 6429832}
|
| 3159 |
+
{"current_steps": 15700, "total_steps": 16520, "loss": 0.0008, "lr": 7.50466912679143e-06, "epoch": 19.00726392251816, "percentage": 95.04, "elapsed_time": "0:51:49", "remaining_time": "0:02:42", "throughput": 2068.41, "total_tokens": 6431848}
|
| 3160 |
+
{"current_steps": 15705, "total_steps": 16520, "loss": 0.0019, "lr": 7.413764473596596e-06, "epoch": 19.013317191283292, "percentage": 95.07, "elapsed_time": "0:51:50", "remaining_time": "0:02:41", "throughput": 2068.45, "total_tokens": 6433960}
|