| {"current_steps": 1, "total_steps": 130, "loss": 1.9108, "lr": 1.9997080140801932e-05, "epoch": 0.07272727272727272, "percentage": 0.77, "elapsed_time": "0:00:25", "remaining_time": "0:55:44", "throughput": 10111.46, "total_tokens": 262144} |
| {"current_steps": 2, "total_steps": 130, "loss": 2.0082, "lr": 1.998832226832327e-05, "epoch": 0.14545454545454545, "percentage": 1.54, "elapsed_time": "0:00:50", "remaining_time": "0:53:26", "throughput": 10465.94, "total_tokens": 524288} |
| {"current_steps": 3, "total_steps": 130, "loss": 1.6925, "lr": 1.9973731496914914e-05, "epoch": 0.21818181818181817, "percentage": 2.31, "elapsed_time": "0:01:14", "remaining_time": "0:52:20", "throughput": 10601.54, "total_tokens": 786432} |
| {"current_steps": 4, "total_steps": 130, "loss": 1.5721, "lr": 1.995331634717649e-05, "epoch": 0.2909090909090909, "percentage": 3.08, "elapsed_time": "0:01:38", "remaining_time": "0:51:37", "throughput": 10662.52, "total_tokens": 1048576} |
| {"current_steps": 5, "total_steps": 130, "loss": 1.4889, "lr": 1.992708874098054e-05, "epoch": 0.36363636363636365, "percentage": 3.85, "elapsed_time": "0:02:02", "remaining_time": "0:51:01", "throughput": 10701.56, "total_tokens": 1310720} |
| {"current_steps": 6, "total_steps": 130, "loss": 1.4526, "lr": 1.9895063994510512e-05, "epoch": 0.43636363636363634, "percentage": 4.62, "elapsed_time": "0:02:26", "remaining_time": "0:50:30", "throughput": 10727.42, "total_tokens": 1572864} |
| {"current_steps": 7, "total_steps": 130, "loss": 1.3744, "lr": 1.985726080931651e-05, "epoch": 0.509090909090909, "percentage": 5.38, "elapsed_time": "0:02:50", "remaining_time": "0:50:00", "throughput": 10746.34, "total_tokens": 1835008} |
| {"current_steps": 8, "total_steps": 130, "loss": 1.2968, "lr": 1.9813701261394136e-05, "epoch": 0.5818181818181818, "percentage": 6.15, "elapsed_time": "0:03:14", "remaining_time": "0:49:33", "throughput": 10756.82, "total_tokens": 2097152} |
| {"current_steps": 9, "total_steps": 130, "loss": 1.262, "lr": 1.9764410788292724e-05, "epoch": 0.6545454545454545, "percentage": 6.92, "elapsed_time": "0:03:39", "remaining_time": "0:49:07", "throughput": 10760.48, "total_tokens": 2359296} |
| {"current_steps": 10, "total_steps": 130, "loss": 1.195, "lr": 1.9709418174260523e-05, "epoch": 0.7272727272727273, "percentage": 7.69, "elapsed_time": "0:04:03", "remaining_time": "0:48:41", "throughput": 10768.99, "total_tokens": 2621440} |
| {"current_steps": 11, "total_steps": 130, "loss": 1.1555, "lr": 1.9648755533435517e-05, "epoch": 0.8, "percentage": 8.46, "elapsed_time": "0:04:27", "remaining_time": "0:48:14", "throughput": 10775.65, "total_tokens": 2883584} |
| {"current_steps": 12, "total_steps": 130, "loss": 1.1154, "lr": 1.9582458291091664e-05, "epoch": 0.8727272727272727, "percentage": 9.23, "elapsed_time": "0:04:51", "remaining_time": "0:47:49", "throughput": 10780.33, "total_tokens": 3145728} |
| {"current_steps": 13, "total_steps": 130, "loss": 1.0681, "lr": 1.9510565162951538e-05, "epoch": 0.9454545454545454, "percentage": 10.0, "elapsed_time": "0:05:15", "remaining_time": "0:47:23", "throughput": 10785.49, "total_tokens": 3407872} |
| {"current_steps": 14, "total_steps": 130, "loss": 1.954, "lr": 1.9433118132577432e-05, "epoch": 1.0727272727272728, "percentage": 10.77, "elapsed_time": "0:05:46", "remaining_time": "0:47:48", "throughput": 10790.94, "total_tokens": 3735552} |
| {"current_steps": 15, "total_steps": 130, "loss": 0.9358, "lr": 1.9350162426854152e-05, "epoch": 1.1454545454545455, "percentage": 11.54, "elapsed_time": "0:06:10", "remaining_time": "0:47:20", "throughput": 10791.57, "total_tokens": 3997696} |
| {"current_steps": 16, "total_steps": 130, "loss": 0.894, "lr": 1.9261746489577767e-05, "epoch": 1.2181818181818183, "percentage": 12.31, "elapsed_time": "0:06:34", "remaining_time": "0:46:52", "throughput": 10792.86, "total_tokens": 4259840} |
| {"current_steps": 17, "total_steps": 130, "loss": 0.8595, "lr": 1.9167921953165827e-05, "epoch": 1.290909090909091, "percentage": 13.08, "elapsed_time": "0:06:58", "remaining_time": "0:46:24", "throughput": 10795.29, "total_tokens": 4521984} |
| {"current_steps": 18, "total_steps": 130, "loss": 0.8532, "lr": 1.9068743608505454e-05, "epoch": 1.3636363636363638, "percentage": 13.85, "elapsed_time": "0:07:23", "remaining_time": "0:45:57", "throughput": 10797.16, "total_tokens": 4784128} |
| {"current_steps": 19, "total_steps": 130, "loss": 0.8409, "lr": 1.896426937295704e-05, "epoch": 1.4363636363636363, "percentage": 14.62, "elapsed_time": "0:07:47", "remaining_time": "0:45:30", "throughput": 10798.83, "total_tokens": 5046272} |
| {"current_steps": 20, "total_steps": 130, "loss": 0.8412, "lr": 1.8854560256532098e-05, "epoch": 1.509090909090909, "percentage": 15.38, "elapsed_time": "0:08:11", "remaining_time": "0:45:03", "throughput": 10799.79, "total_tokens": 5308416} |
| {"current_steps": 21, "total_steps": 130, "loss": 0.805, "lr": 1.873968032626518e-05, "epoch": 1.5818181818181818, "percentage": 16.15, "elapsed_time": "0:08:35", "remaining_time": "0:44:36", "throughput": 10802.63, "total_tokens": 5570560} |
| {"current_steps": 22, "total_steps": 130, "loss": 0.7872, "lr": 1.8619696668800494e-05, "epoch": 1.6545454545454545, "percentage": 16.92, "elapsed_time": "0:08:59", "remaining_time": "0:44:10", "throughput": 10804.89, "total_tokens": 5832704} |
| {"current_steps": 23, "total_steps": 130, "loss": 0.7957, "lr": 1.8494679351215212e-05, "epoch": 1.7272727272727273, "percentage": 17.69, "elapsed_time": "0:09:24", "remaining_time": "0:43:43", "throughput": 10806.43, "total_tokens": 6094848} |
| {"current_steps": 24, "total_steps": 130, "loss": 0.7657, "lr": 1.8364701380102267e-05, "epoch": 1.8, "percentage": 18.46, "elapsed_time": "0:09:48", "remaining_time": "0:43:17", "throughput": 10807.09, "total_tokens": 6356992} |
| {"current_steps": 25, "total_steps": 130, "loss": 0.7456, "lr": 1.8229838658936566e-05, "epoch": 1.8727272727272726, "percentage": 19.23, "elapsed_time": "0:10:12", "remaining_time": "0:42:52", "throughput": 10807.13, "total_tokens": 6619136} |
| {"current_steps": 26, "total_steps": 130, "loss": 0.7682, "lr": 1.8090169943749477e-05, "epoch": 1.9454545454545453, "percentage": 20.0, "elapsed_time": "0:10:36", "remaining_time": "0:42:27", "throughput": 10806.77, "total_tokens": 6881280} |
| {"current_steps": 27, "total_steps": 130, "loss": 1.3684, "lr": 1.7945776797137544e-05, "epoch": 2.0727272727272728, "percentage": 20.77, "elapsed_time": "0:11:06", "remaining_time": "0:42:24", "throughput": 10808.94, "total_tokens": 7208960} |
| {"current_steps": 28, "total_steps": 130, "loss": 0.5876, "lr": 1.7796743540632226e-05, "epoch": 2.1454545454545455, "percentage": 21.54, "elapsed_time": "0:11:31", "remaining_time": "0:41:57", "throughput": 10809.36, "total_tokens": 7471104} |
| {"current_steps": 29, "total_steps": 130, "loss": 0.5906, "lr": 1.7643157205458483e-05, "epoch": 2.2181818181818183, "percentage": 22.31, "elapsed_time": "0:11:55", "remaining_time": "0:41:31", "throughput": 10810.41, "total_tokens": 7733248} |
| {"current_steps": 30, "total_steps": 130, "loss": 0.5904, "lr": 1.7485107481711014e-05, "epoch": 2.290909090909091, "percentage": 23.08, "elapsed_time": "0:12:19", "remaining_time": "0:41:05", "throughput": 10810.93, "total_tokens": 7995392} |
| {"current_steps": 31, "total_steps": 130, "loss": 0.5566, "lr": 1.7322686665977738e-05, "epoch": 2.3636363636363638, "percentage": 23.85, "elapsed_time": "0:12:43", "remaining_time": "0:40:39", "throughput": 10811.3, "total_tokens": 8257536} |
| {"current_steps": 32, "total_steps": 130, "loss": 0.55, "lr": 1.715598960744121e-05, "epoch": 2.4363636363636365, "percentage": 24.62, "elapsed_time": "0:13:08", "remaining_time": "0:40:13", "throughput": 10811.3, "total_tokens": 8519680} |
| {"current_steps": 33, "total_steps": 130, "loss": 0.5437, "lr": 1.6985113652489374e-05, "epoch": 2.509090909090909, "percentage": 25.38, "elapsed_time": "0:13:32", "remaining_time": "0:39:47", "throughput": 10811.51, "total_tokens": 8781824} |
| {"current_steps": 34, "total_steps": 130, "loss": 0.5362, "lr": 1.6810158587867973e-05, "epoch": 2.581818181818182, "percentage": 26.15, "elapsed_time": "0:13:56", "remaining_time": "0:39:21", "throughput": 10812.27, "total_tokens": 9043968} |
| {"current_steps": 35, "total_steps": 130, "loss": 0.533, "lr": 1.6631226582407954e-05, "epoch": 2.6545454545454543, "percentage": 26.92, "elapsed_time": "0:14:20", "remaining_time": "0:38:56", "throughput": 10812.79, "total_tokens": 9306112} |
| {"current_steps": 36, "total_steps": 130, "loss": 0.5285, "lr": 1.6448422127361707e-05, "epoch": 2.7272727272727275, "percentage": 27.69, "elapsed_time": "0:14:44", "remaining_time": "0:38:30", "throughput": 10812.86, "total_tokens": 9568256} |
| {"current_steps": 37, "total_steps": 130, "loss": 0.523, "lr": 1.626185197538314e-05, "epoch": 2.8, "percentage": 28.46, "elapsed_time": "0:15:09", "remaining_time": "0:38:05", "throughput": 10812.88, "total_tokens": 9830400} |
| {"current_steps": 38, "total_steps": 130, "loss": 0.5036, "lr": 1.6071625078187113e-05, "epoch": 2.8727272727272726, "percentage": 29.23, "elapsed_time": "0:15:33", "remaining_time": "0:37:39", "throughput": 10814.6, "total_tokens": 10092544} |
| {"current_steps": 39, "total_steps": 130, "loss": 0.5291, "lr": 1.5877852522924733e-05, "epoch": 2.9454545454545453, "percentage": 30.0, "elapsed_time": "0:15:57", "remaining_time": "0:37:13", "throughput": 10815.27, "total_tokens": 10354688} |
| {"current_steps": 40, "total_steps": 130, "loss": 0.9222, "lr": 1.568064746731156e-05, "epoch": 3.0727272727272728, "percentage": 30.77, "elapsed_time": "0:16:27", "remaining_time": "0:37:02", "throughput": 10816.03, "total_tokens": 10682368} |
| {"current_steps": 41, "total_steps": 130, "loss": 0.3799, "lr": 1.5480125073546705e-05, "epoch": 3.1454545454545455, "percentage": 31.54, "elapsed_time": "0:16:51", "remaining_time": "0:36:36", "throughput": 10816.95, "total_tokens": 10944512} |
| {"current_steps": 42, "total_steps": 130, "loss": 0.3707, "lr": 1.527640244106133e-05, "epoch": 3.2181818181818183, "percentage": 32.31, "elapsed_time": "0:17:15", "remaining_time": "0:36:10", "throughput": 10817.35, "total_tokens": 11206656} |
| {"current_steps": 43, "total_steps": 130, "loss": 0.3597, "lr": 1.5069598538135905e-05, "epoch": 3.290909090909091, "percentage": 33.08, "elapsed_time": "0:17:40", "remaining_time": "0:35:45", "throughput": 10817.53, "total_tokens": 11468800} |
| {"current_steps": 44, "total_steps": 130, "loss": 0.371, "lr": 1.485983413242606e-05, "epoch": 3.3636363636363638, "percentage": 33.85, "elapsed_time": "0:18:04", "remaining_time": "0:35:19", "throughput": 10817.51, "total_tokens": 11730944} |
| {"current_steps": 45, "total_steps": 130, "loss": 0.3371, "lr": 1.4647231720437687e-05, "epoch": 3.4363636363636365, "percentage": 34.62, "elapsed_time": "0:18:28", "remaining_time": "0:34:54", "throughput": 10818.27, "total_tokens": 11993088} |
| {"current_steps": 46, "total_steps": 130, "loss": 0.3451, "lr": 1.4431915455992416e-05, "epoch": 3.509090909090909, "percentage": 35.38, "elapsed_time": "0:18:52", "remaining_time": "0:34:28", "throughput": 10818.51, "total_tokens": 12255232} |
| {"current_steps": 47, "total_steps": 130, "loss": 0.3496, "lr": 1.4214011077725293e-05, "epoch": 3.581818181818182, "percentage": 36.15, "elapsed_time": "0:19:16", "remaining_time": "0:34:03", "throughput": 10819.32, "total_tokens": 12517376} |
| {"current_steps": 48, "total_steps": 130, "loss": 0.3289, "lr": 1.3993645835656955e-05, "epoch": 3.6545454545454543, "percentage": 36.92, "elapsed_time": "0:19:41", "remaining_time": "0:33:37", "throughput": 10819.3, "total_tokens": 12779520} |
| {"current_steps": 49, "total_steps": 130, "loss": 0.3367, "lr": 1.3770948416883205e-05, "epoch": 3.7272727272727275, "percentage": 37.69, "elapsed_time": "0:20:05", "remaining_time": "0:33:12", "throughput": 10819.64, "total_tokens": 13041664} |
| {"current_steps": 50, "total_steps": 130, "loss": 0.3251, "lr": 1.3546048870425356e-05, "epoch": 3.8, "percentage": 38.46, "elapsed_time": "0:20:29", "remaining_time": "0:32:47", "throughput": 10820.1, "total_tokens": 13303808} |
| {"current_steps": 51, "total_steps": 130, "loss": 0.3273, "lr": 1.3319078531285286e-05, "epoch": 3.8727272727272726, "percentage": 39.23, "elapsed_time": "0:20:53", "remaining_time": "0:32:22", "throughput": 10820.34, "total_tokens": 13565952} |
| {"current_steps": 52, "total_steps": 130, "loss": 0.3283, "lr": 1.3090169943749475e-05, "epoch": 3.9454545454545453, "percentage": 40.0, "elapsed_time": "0:21:17", "remaining_time": "0:31:56", "throughput": 10820.53, "total_tokens": 13828096} |
| {"current_steps": 53, "total_steps": 130, "loss": 0.5664, "lr": 1.2859456783986892e-05, "epoch": 4.072727272727272, "percentage": 40.77, "elapsed_time": "0:21:48", "remaining_time": "0:31:40", "throughput": 10820.83, "total_tokens": 14155776} |
| {"current_steps": 54, "total_steps": 130, "loss": 0.2062, "lr": 1.262707378198587e-05, "epoch": 4.1454545454545455, "percentage": 41.54, "elapsed_time": "0:22:12", "remaining_time": "0:31:15", "throughput": 10821.3, "total_tokens": 14417920} |
| {"current_steps": 55, "total_steps": 130, "loss": 0.1994, "lr": 1.2393156642875579e-05, "epoch": 4.218181818181818, "percentage": 42.31, "elapsed_time": "0:22:36", "remaining_time": "0:30:49", "throughput": 10821.69, "total_tokens": 14680064} |
| {"current_steps": 56, "total_steps": 130, "loss": 0.1985, "lr": 1.2157841967678064e-05, "epoch": 4.290909090909091, "percentage": 43.08, "elapsed_time": "0:23:00", "remaining_time": "0:30:24", "throughput": 10822.14, "total_tokens": 14942208} |
| {"current_steps": 57, "total_steps": 130, "loss": 0.1943, "lr": 1.1921267173537085e-05, "epoch": 4.363636363636363, "percentage": 43.85, "elapsed_time": "0:23:24", "remaining_time": "0:29:59", "throughput": 10821.84, "total_tokens": 15204352} |
| {"current_steps": 58, "total_steps": 130, "loss": 0.1783, "lr": 1.1683570413470384e-05, "epoch": 4.4363636363636365, "percentage": 44.62, "elapsed_time": "0:23:49", "remaining_time": "0:29:34", "throughput": 10821.67, "total_tokens": 15466496} |
| {"current_steps": 59, "total_steps": 130, "loss": 0.1862, "lr": 1.1444890495692214e-05, "epoch": 4.509090909090909, "percentage": 45.38, "elapsed_time": "0:24:13", "remaining_time": "0:29:08", "throughput": 10822.4, "total_tokens": 15728640} |
| {"current_steps": 60, "total_steps": 130, "loss": 0.1765, "lr": 1.1205366802553231e-05, "epoch": 4.581818181818182, "percentage": 46.15, "elapsed_time": "0:24:37", "remaining_time": "0:28:43", "throughput": 10822.78, "total_tokens": 15990784} |
| {"current_steps": 61, "total_steps": 130, "loss": 0.1744, "lr": 1.0965139209145153e-05, "epoch": 4.654545454545454, "percentage": 46.92, "elapsed_time": "0:25:01", "remaining_time": "0:28:18", "throughput": 10823.57, "total_tokens": 16252928} |
| {"current_steps": 62, "total_steps": 130, "loss": 0.1726, "lr": 1.0724348001617626e-05, "epoch": 4.7272727272727275, "percentage": 47.69, "elapsed_time": "0:25:25", "remaining_time": "0:27:53", "throughput": 10824.04, "total_tokens": 16515072} |
| {"current_steps": 63, "total_steps": 130, "loss": 0.1842, "lr": 1.0483133795255072e-05, "epoch": 4.8, "percentage": 48.46, "elapsed_time": "0:25:49", "remaining_time": "0:27:28", "throughput": 10824.67, "total_tokens": 16777216} |
| {"current_steps": 64, "total_steps": 130, "loss": 0.1739, "lr": 1.0241637452361323e-05, "epoch": 4.872727272727273, "percentage": 49.23, "elapsed_time": "0:26:14", "remaining_time": "0:27:03", "throughput": 10824.95, "total_tokens": 17039360} |
| {"current_steps": 65, "total_steps": 130, "loss": 0.1746, "lr": 1e-05, "epoch": 4.945454545454545, "percentage": 50.0, "elapsed_time": "0:26:38", "remaining_time": "0:26:38", "throughput": 10825.38, "total_tokens": 17301504} |
| {"current_steps": 66, "total_steps": 130, "loss": 0.2826, "lr": 9.75836254763868e-06, "epoch": 5.072727272727272, "percentage": 50.77, "elapsed_time": "0:27:08", "remaining_time": "0:26:19", "throughput": 10825.88, "total_tokens": 17629184} |
| {"current_steps": 67, "total_steps": 130, "loss": 0.1051, "lr": 9.516866204744932e-06, "epoch": 5.1454545454545455, "percentage": 51.54, "elapsed_time": "0:27:32", "remaining_time": "0:25:53", "throughput": 10825.86, "total_tokens": 17891328} |
| {"current_steps": 68, "total_steps": 130, "loss": 0.0981, "lr": 9.275651998382377e-06, "epoch": 5.218181818181818, "percentage": 52.31, "elapsed_time": "0:27:56", "remaining_time": "0:25:28", "throughput": 10825.71, "total_tokens": 18153472} |
| {"current_steps": 69, "total_steps": 130, "loss": 0.0913, "lr": 9.034860790854848e-06, "epoch": 5.290909090909091, "percentage": 53.08, "elapsed_time": "0:28:21", "remaining_time": "0:25:03", "throughput": 10825.96, "total_tokens": 18415616} |
| {"current_steps": 70, "total_steps": 130, "loss": 0.0951, "lr": 8.79463319744677e-06, "epoch": 5.363636363636363, "percentage": 53.85, "elapsed_time": "0:28:45", "remaining_time": "0:24:38", "throughput": 10826.02, "total_tokens": 18677760} |
| {"current_steps": 71, "total_steps": 130, "loss": 0.091, "lr": 8.55510950430779e-06, "epoch": 5.4363636363636365, "percentage": 54.62, "elapsed_time": "0:29:09", "remaining_time": "0:24:13", "throughput": 10826.38, "total_tokens": 18939904} |
| {"current_steps": 72, "total_steps": 130, "loss": 0.0892, "lr": 8.316429586529616e-06, "epoch": 5.509090909090909, "percentage": 55.38, "elapsed_time": "0:29:33", "remaining_time": "0:23:48", "throughput": 10825.89, "total_tokens": 19202048} |
| {"current_steps": 73, "total_steps": 130, "loss": 0.0905, "lr": 8.078732826462917e-06, "epoch": 5.581818181818182, "percentage": 56.15, "elapsed_time": "0:29:57", "remaining_time": "0:23:23", "throughput": 10826.08, "total_tokens": 19464192} |
| {"current_steps": 74, "total_steps": 130, "loss": 0.0879, "lr": 7.84215803232194e-06, "epoch": 5.654545454545454, "percentage": 56.92, "elapsed_time": "0:30:22", "remaining_time": "0:22:58", "throughput": 10826.11, "total_tokens": 19726336} |
| {"current_steps": 75, "total_steps": 130, "loss": 0.081, "lr": 7.606843357124426e-06, "epoch": 5.7272727272727275, "percentage": 57.69, "elapsed_time": "0:30:46", "remaining_time": "0:22:33", "throughput": 10826.33, "total_tokens": 19988480} |
| {"current_steps": 76, "total_steps": 130, "loss": 0.0825, "lr": 7.372926218014131e-06, "epoch": 5.8, "percentage": 58.46, "elapsed_time": "0:31:10", "remaining_time": "0:22:08", "throughput": 10826.76, "total_tokens": 20250624} |
| {"current_steps": 77, "total_steps": 130, "loss": 0.0807, "lr": 7.140543216013109e-06, "epoch": 5.872727272727273, "percentage": 59.23, "elapsed_time": "0:31:34", "remaining_time": "0:21:44", "throughput": 10826.92, "total_tokens": 20512768} |
| {"current_steps": 78, "total_steps": 130, "loss": 0.0801, "lr": 6.909830056250527e-06, "epoch": 5.945454545454545, "percentage": 60.0, "elapsed_time": "0:31:58", "remaining_time": "0:21:19", "throughput": 10827.07, "total_tokens": 20774912} |
| {"current_steps": 79, "total_steps": 130, "loss": 0.1262, "lr": 6.680921468714718e-06, "epoch": 6.072727272727272, "percentage": 60.77, "elapsed_time": "0:32:28", "remaining_time": "0:20:58", "throughput": 10827.48, "total_tokens": 21102592} |
| {"current_steps": 80, "total_steps": 130, "loss": 0.044, "lr": 6.453951129574644e-06, "epoch": 6.1454545454545455, "percentage": 61.54, "elapsed_time": "0:32:53", "remaining_time": "0:20:33", "throughput": 10827.57, "total_tokens": 21364736} |
| {"current_steps": 81, "total_steps": 130, "loss": 0.0394, "lr": 6.229051583116796e-06, "epoch": 6.218181818181818, "percentage": 62.31, "elapsed_time": "0:33:17", "remaining_time": "0:20:08", "throughput": 10827.7, "total_tokens": 21626880} |
| {"current_steps": 82, "total_steps": 130, "loss": 0.0426, "lr": 6.006354164343047e-06, "epoch": 6.290909090909091, "percentage": 63.08, "elapsed_time": "0:33:41", "remaining_time": "0:19:43", "throughput": 10827.63, "total_tokens": 21889024} |
| {"current_steps": 83, "total_steps": 130, "loss": 0.0403, "lr": 5.785988922274711e-06, "epoch": 6.363636363636363, "percentage": 63.85, "elapsed_time": "0:34:05", "remaining_time": "0:19:18", "throughput": 10827.61, "total_tokens": 22151168} |
| {"current_steps": 84, "total_steps": 130, "loss": 0.0402, "lr": 5.5680845440075885e-06, "epoch": 6.4363636363636365, "percentage": 64.62, "elapsed_time": "0:34:29", "remaining_time": "0:18:53", "throughput": 10827.72, "total_tokens": 22413312} |
| {"current_steps": 85, "total_steps": 130, "loss": 0.0413, "lr": 5.352768279562315e-06, "epoch": 6.509090909090909, "percentage": 65.38, "elapsed_time": "0:34:54", "remaining_time": "0:18:28", "throughput": 10827.84, "total_tokens": 22675456} |
| {"current_steps": 86, "total_steps": 130, "loss": 0.0386, "lr": 5.14016586757394e-06, "epoch": 6.581818181818182, "percentage": 66.15, "elapsed_time": "0:35:18", "remaining_time": "0:18:03", "throughput": 10828.1, "total_tokens": 22937600} |
| {"current_steps": 87, "total_steps": 130, "loss": 0.042, "lr": 4.930401461864099e-06, "epoch": 6.654545454545454, "percentage": 66.92, "elapsed_time": "0:35:42", "remaining_time": "0:17:38", "throughput": 10828.17, "total_tokens": 23199744} |
| {"current_steps": 88, "total_steps": 130, "loss": 0.044, "lr": 4.7235975589386715e-06, "epoch": 6.7272727272727275, "percentage": 67.69, "elapsed_time": "0:36:06", "remaining_time": "0:17:14", "throughput": 10828.38, "total_tokens": 23461888} |
| {"current_steps": 89, "total_steps": 130, "loss": 0.0385, "lr": 4.519874926453303e-06, "epoch": 6.8, "percentage": 68.46, "elapsed_time": "0:36:30", "remaining_time": "0:16:49", "throughput": 10828.52, "total_tokens": 23724032} |
| {"current_steps": 90, "total_steps": 130, "loss": 0.0421, "lr": 4.319352532688444e-06, "epoch": 6.872727272727273, "percentage": 69.23, "elapsed_time": "0:36:55", "remaining_time": "0:16:24", "throughput": 10828.47, "total_tokens": 23986176} |
| {"current_steps": 91, "total_steps": 130, "loss": 0.0403, "lr": 4.12214747707527e-06, "epoch": 6.945454545454545, "percentage": 70.0, "elapsed_time": "0:37:19", "remaining_time": "0:15:59", "throughput": 10828.43, "total_tokens": 24248320} |
| {"current_steps": 92, "total_steps": 130, "loss": 0.0622, "lr": 3.9283749218128885e-06, "epoch": 7.072727272727272, "percentage": 70.77, "elapsed_time": "0:37:49", "remaining_time": "0:15:37", "throughput": 10828.36, "total_tokens": 24576000} |
| {"current_steps": 93, "total_steps": 130, "loss": 0.0202, "lr": 3.738148024616863e-06, "epoch": 7.1454545454545455, "percentage": 71.54, "elapsed_time": "0:38:13", "remaining_time": "0:15:12", "throughput": 10828.32, "total_tokens": 24838144} |
| {"current_steps": 94, "total_steps": 130, "loss": 0.0206, "lr": 3.5515778726382967e-06, "epoch": 7.218181818181818, "percentage": 72.31, "elapsed_time": "0:38:38", "remaining_time": "0:14:47", "throughput": 10828.37, "total_tokens": 25100288} |
| {"current_steps": 95, "total_steps": 130, "loss": 0.0197, "lr": 3.3687734175920505e-06, "epoch": 7.290909090909091, "percentage": 73.08, "elapsed_time": "0:39:02", "remaining_time": "0:14:22", "throughput": 10828.61, "total_tokens": 25362432} |
| {"current_steps": 96, "total_steps": 130, "loss": 0.0184, "lr": 3.1898414121320277e-06, "epoch": 7.363636363636363, "percentage": 73.85, "elapsed_time": "0:39:26", "remaining_time": "0:13:58", "throughput": 10828.64, "total_tokens": 25624576} |
| {"current_steps": 97, "total_steps": 130, "loss": 0.0191, "lr": 3.0148863475106315e-06, "epoch": 7.4363636363636365, "percentage": 74.62, "elapsed_time": "0:39:50", "remaining_time": "0:13:33", "throughput": 10828.83, "total_tokens": 25886720} |
| {"current_steps": 98, "total_steps": 130, "loss": 0.0186, "lr": 2.8440103925587904e-06, "epoch": 7.509090909090909, "percentage": 75.38, "elapsed_time": "0:40:14", "remaining_time": "0:13:08", "throughput": 10828.9, "total_tokens": 26148864} |
| {"current_steps": 99, "total_steps": 130, "loss": 0.0181, "lr": 2.6773133340222677e-06, "epoch": 7.581818181818182, "percentage": 76.15, "elapsed_time": "0:40:38", "remaining_time": "0:12:43", "throughput": 10829.06, "total_tokens": 26411008} |
| {"current_steps": 100, "total_steps": 130, "loss": 0.0196, "lr": 2.514892518288988e-06, "epoch": 7.654545454545454, "percentage": 76.92, "elapsed_time": "0:41:03", "remaining_time": "0:12:18", "throughput": 10829.22, "total_tokens": 26673152} |
| {"current_steps": 101, "total_steps": 130, "loss": 0.0179, "lr": 2.3568427945415163e-06, "epoch": 7.7272727272727275, "percentage": 77.69, "elapsed_time": "0:41:27", "remaining_time": "0:11:54", "throughput": 10829.49, "total_tokens": 26935296} |
| {"current_steps": 102, "total_steps": 130, "loss": 0.0187, "lr": 2.2032564593677773e-06, "epoch": 7.8, "percentage": 78.46, "elapsed_time": "0:41:51", "remaining_time": "0:11:29", "throughput": 10829.69, "total_tokens": 27197440} |
| {"current_steps": 103, "total_steps": 130, "loss": 0.0178, "lr": 2.0542232028624585e-06, "epoch": 7.872727272727273, "percentage": 79.23, "elapsed_time": "0:42:15", "remaining_time": "0:11:04", "throughput": 10829.96, "total_tokens": 27459584} |
| {"current_steps": 104, "total_steps": 130, "loss": 0.0187, "lr": 1.9098300562505266e-06, "epoch": 7.945454545454545, "percentage": 80.0, "elapsed_time": "0:42:39", "remaining_time": "0:10:39", "throughput": 10830.31, "total_tokens": 27721728} |
| {"current_steps": 105, "total_steps": 130, "loss": 0.0294, "lr": 1.7701613410634367e-06, "epoch": 8.072727272727272, "percentage": 80.77, "elapsed_time": "0:43:09", "remaining_time": "0:10:16", "throughput": 10830.67, "total_tokens": 28049408} |
| {"current_steps": 106, "total_steps": 130, "loss": 0.0091, "lr": 1.6352986198977327e-06, "epoch": 8.145454545454545, "percentage": 81.54, "elapsed_time": "0:43:33", "remaining_time": "0:09:51", "throughput": 10830.89, "total_tokens": 28311552} |
| {"current_steps": 107, "total_steps": 130, "loss": 0.0089, "lr": 1.5053206487847916e-06, "epoch": 8.218181818181819, "percentage": 82.31, "elapsed_time": "0:43:58", "remaining_time": "0:09:27", "throughput": 10830.84, "total_tokens": 28573696} |
| {"current_steps": 108, "total_steps": 130, "loss": 0.0098, "lr": 1.3803033311995072e-06, "epoch": 8.290909090909091, "percentage": 83.08, "elapsed_time": "0:44:22", "remaining_time": "0:09:02", "throughput": 10830.92, "total_tokens": 28835840} |
| {"current_steps": 109, "total_steps": 130, "loss": 0.0096, "lr": 1.2603196737348211e-06, "epoch": 8.363636363636363, "percentage": 83.85, "elapsed_time": "0:44:46", "remaining_time": "0:08:37", "throughput": 10830.92, "total_tokens": 29097984} |
| {"current_steps": 110, "total_steps": 130, "loss": 0.0088, "lr": 1.1454397434679022e-06, "epoch": 8.436363636363636, "percentage": 84.62, "elapsed_time": "0:45:10", "remaining_time": "0:08:12", "throughput": 10831.05, "total_tokens": 29360128} |
| {"current_steps": 111, "total_steps": 130, "loss": 0.0093, "lr": 1.0357306270429623e-06, "epoch": 8.50909090909091, "percentage": 85.38, "elapsed_time": "0:45:34", "remaining_time": "0:07:48", "throughput": 10831.08, "total_tokens": 29622272} |
| {"current_steps": 112, "total_steps": 130, "loss": 0.0091, "lr": 9.312563914945461e-07, "epoch": 8.581818181818182, "percentage": 86.15, "elapsed_time": "0:45:59", "remaining_time": "0:07:23", "throughput": 10831.07, "total_tokens": 29884416} |
| {"current_steps": 113, "total_steps": 130, "loss": 0.0082, "lr": 8.320780468341761e-07, "epoch": 8.654545454545454, "percentage": 86.92, "elapsed_time": "0:46:23", "remaining_time": "0:06:58", "throughput": 10831.03, "total_tokens": 30146560} |
| {"current_steps": 114, "total_steps": 130, "loss": 0.0088, "lr": 7.382535104222366e-07, "epoch": 8.727272727272727, "percentage": 87.69, "elapsed_time": "0:46:47", "remaining_time": "0:06:34", "throughput": 10831.05, "total_tokens": 30408704} |
| {"current_steps": 115, "total_steps": 130, "loss": 0.0092, "lr": 6.498375731458529e-07, "epoch": 8.8, "percentage": 88.46, "elapsed_time": "0:47:11", "remaining_time": "0:06:09", "throughput": 10831.16, "total_tokens": 30670848} |
| {"current_steps": 116, "total_steps": 130, "loss": 0.008, "lr": 5.668818674225684e-07, "epoch": 8.872727272727273, "percentage": 89.23, "elapsed_time": "0:47:35", "remaining_time": "0:05:44", "throughput": 10831.29, "total_tokens": 30932992} |
| {"current_steps": 117, "total_steps": 130, "loss": 0.0076, "lr": 4.894348370484648e-07, "epoch": 8.945454545454545, "percentage": 90.0, "elapsed_time": "0:48:00", "remaining_time": "0:05:20", "throughput": 10831.11, "total_tokens": 31195136} |
| {"current_steps": 118, "total_steps": 130, "loss": 0.0148, "lr": 4.1754170890833777e-07, "epoch": 9.072727272727272, "percentage": 90.77, "elapsed_time": "0:48:30", "remaining_time": "0:04:55", "throughput": 10831.33, "total_tokens": 31522816} |
| {"current_steps": 119, "total_steps": 130, "loss": 0.0061, "lr": 3.5124446656448654e-07, "epoch": 9.145454545454545, "percentage": 91.54, "elapsed_time": "0:48:54", "remaining_time": "0:04:31", "throughput": 10831.5, "total_tokens": 31784960} |
| {"current_steps": 120, "total_steps": 130, "loss": 0.0066, "lr": 2.905818257394799e-07, "epoch": 9.218181818181819, "percentage": 92.31, "elapsed_time": "0:49:18", "remaining_time": "0:04:06", "throughput": 10831.5, "total_tokens": 32047104} |
| {"current_steps": 121, "total_steps": 130, "loss": 0.0062, "lr": 2.355892117072789e-07, "epoch": 9.290909090909091, "percentage": 93.08, "elapsed_time": "0:49:42", "remaining_time": "0:03:41", "throughput": 10831.52, "total_tokens": 32309248} |
| {"current_steps": 122, "total_steps": 130, "loss": 0.0062, "lr": 1.8629873860586567e-07, "epoch": 9.363636363636363, "percentage": 93.85, "elapsed_time": "0:50:07", "remaining_time": "0:03:17", "throughput": 10831.64, "total_tokens": 32571392} |
| {"current_steps": 123, "total_steps": 130, "loss": 0.006, "lr": 1.4273919068349184e-07, "epoch": 9.436363636363636, "percentage": 94.62, "elapsed_time": "0:50:31", "remaining_time": "0:02:52", "throughput": 10831.79, "total_tokens": 32833536} |
| {"current_steps": 124, "total_steps": 130, "loss": 0.0061, "lr": 1.0493600548948879e-07, "epoch": 9.50909090909091, "percentage": 95.38, "elapsed_time": "0:50:55", "remaining_time": "0:02:27", "throughput": 10831.95, "total_tokens": 33095680} |
| {"current_steps": 125, "total_steps": 130, "loss": 0.0062, "lr": 7.291125901946027e-08, "epoch": 9.581818181818182, "percentage": 96.15, "elapsed_time": "0:51:19", "remaining_time": "0:02:03", "throughput": 10832.2, "total_tokens": 33357824} |
| {"current_steps": 126, "total_steps": 130, "loss": 0.0059, "lr": 4.6683652823513725e-08, "epoch": 9.654545454545454, "percentage": 96.92, "elapsed_time": "0:51:43", "remaining_time": "0:01:38", "throughput": 10832.3, "total_tokens": 33619968} |
| {"current_steps": 127, "total_steps": 130, "loss": 0.0064, "lr": 2.6268503085089547e-08, "epoch": 9.727272727272727, "percentage": 97.69, "elapsed_time": "0:52:07", "remaining_time": "0:01:13", "throughput": 10832.36, "total_tokens": 33882112} |
| {"current_steps": 128, "total_steps": 130, "loss": 0.0062, "lr": 1.1677731676733584e-08, "epoch": 9.8, "percentage": 98.46, "elapsed_time": "0:52:32", "remaining_time": "0:00:49", "throughput": 10832.47, "total_tokens": 34144256} |
| {"current_steps": 129, "total_steps": 130, "loss": 0.0061, "lr": 2.9198591980705847e-09, "epoch": 9.872727272727273, "percentage": 99.23, "elapsed_time": "0:52:56", "remaining_time": "0:00:24", "throughput": 10832.49, "total_tokens": 34406400} |
| {"current_steps": 130, "total_steps": 130, "loss": 0.0064, "lr": 0.0, "epoch": 9.945454545454545, "percentage": 100.0, "elapsed_time": "0:53:20", "remaining_time": "0:00:00", "throughput": 10832.53, "total_tokens": 34668544} |
|
|