| {"loss": 4.24950361, "token_acc": 0.40126382, "grad_norm": 96.5, "learning_rate": 1.9e-07, "memory(GiB)": 112.92, "train_speed(iter/s)": 0.130363, "epoch": 9.709e-05, "global_step/max_steps": "1/10301", "percentage": "0.01%", "elapsed_time": "7s", "remaining_time": "21h 26m 2s"} |
| {"loss": 2.01999081, "token_acc": 0.59580568, "grad_norm": 6.84375, "learning_rate": 4.961e-05, "memory(GiB)": 138.16, "train_speed(iter/s)": 0.135217, "epoch": 0.02485377, "global_step/max_steps": "256/10301", "percentage": "2.49%", "elapsed_time": "31m 33s", "remaining_time": "20h 38m 1s"} |
| {"loss": 1.45775986, "token_acc": 0.67850231, "grad_norm": 2.6875, "learning_rate": 9.922e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.135385, "epoch": 0.04970753, "global_step/max_steps": "512/10301", "percentage": "4.97%", "elapsed_time": "1h 3m 1s", "remaining_time": "20h 5m 1s"} |
| {"loss": 1.42178023, "token_acc": 0.68494967, "grad_norm": 2.296875, "learning_rate": 9.984e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.134581, "epoch": 0.0745613, "global_step/max_steps": "768/10301", "percentage": "7.46%", "elapsed_time": "1h 35m 6s", "remaining_time": "19h 40m 32s"} |
| {"loss": 1.34263551, "token_acc": 0.69967373, "grad_norm": 1.8984375, "learning_rate": 9.934e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.132515, "epoch": 0.09941506, "global_step/max_steps": "1024/10301", "percentage": "9.94%", "elapsed_time": "2h 8m 47s", "remaining_time": "19h 26m 45s"} |
| {"loss": 1.24449825, "token_acc": 0.7187272, "grad_norm": 1.84375, "learning_rate": 9.85e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133056, "epoch": 0.12426883, "global_step/max_steps": "1280/10301", "percentage": "12.43%", "elapsed_time": "2h 40m 19s", "remaining_time": "18h 49m 57s"} |
| {"loss": 1.16240036, "token_acc": 0.73398361, "grad_norm": 1.859375, "learning_rate": 9.734e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133627, "epoch": 0.14912259, "global_step/max_steps": "1536/10301", "percentage": "14.91%", "elapsed_time": "3h 11m 34s", "remaining_time": "18h 13m 12s"} |
| {"loss": 1.09534538, "token_acc": 0.74774444, "grad_norm": 1.7421875, "learning_rate": 9.586e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133066, "epoch": 0.17397636, "global_step/max_steps": "1792/10301", "percentage": "17.40%", "elapsed_time": "3h 44m 26s", "remaining_time": "17h 45m 45s"} |
| {"loss": 1.0396682, "token_acc": 0.75772938, "grad_norm": 1.796875, "learning_rate": 9.407e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.132805, "epoch": 0.19883013, "global_step/max_steps": "2048/10301", "percentage": "19.88%", "elapsed_time": "4h 17m 0s", "remaining_time": "17h 15m 42s"} |
| {"loss": 0.97651255, "token_acc": 0.77122279, "grad_norm": 2.078125, "learning_rate": 9.199e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133242, "epoch": 0.22368389, "global_step/max_steps": "2304/10301", "percentage": "22.37%", "elapsed_time": "4h 48m 11s", "remaining_time": "16h 40m 18s"} |
| {"loss": 0.92033613, "token_acc": 0.7830756, "grad_norm": 1.5703125, "learning_rate": 8.961e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133383, "epoch": 0.24853766, "global_step/max_steps": "2560/10301", "percentage": "24.85%", "elapsed_time": "5h 19m 52s", "remaining_time": "16h 7m 15s"} |
| {"loss": 0.88013655, "token_acc": 0.79133499, "grad_norm": 1.59375, "learning_rate": 8.698e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133433, "epoch": 0.27339142, "global_step/max_steps": "2816/10301", "percentage": "27.34%", "elapsed_time": "5h 51m 44s", "remaining_time": "15h 34m 55s"} |
| {"loss": 0.83347797, "token_acc": 0.80061759, "grad_norm": 1.1640625, "learning_rate": 8.409e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.132889, "epoch": 0.29824519, "global_step/max_steps": "3072/10301", "percentage": "29.82%", "elapsed_time": "6h 25m 16s", "remaining_time": "15h 6m 38s"} |
| {"loss": 0.78842294, "token_acc": 0.81023221, "grad_norm": 1.6875, "learning_rate": 8.097e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133073, "epoch": 0.32309895, "global_step/max_steps": "3328/10301", "percentage": "32.31%", "elapsed_time": "6h 56m 48s", "remaining_time": "14h 33m 19s"} |
| {"loss": 0.7397579, "token_acc": 0.82125286, "grad_norm": 1.4921875, "learning_rate": 7.764e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133335, "epoch": 0.34795272, "global_step/max_steps": "3584/10301", "percentage": "34.79%", "elapsed_time": "7h 27m 59s", "remaining_time": "13h 59m 36s"} |
| {"loss": 0.7087571, "token_acc": 0.82865897, "grad_norm": 2.1875, "learning_rate": 7.413e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133468, "epoch": 0.37280649, "global_step/max_steps": "3840/10301", "percentage": "37.28%", "elapsed_time": "7h 59m 30s", "remaining_time": "13h 26m 48s"} |
| {"loss": 0.66005343, "token_acc": 0.8389954, "grad_norm": 1.1953125, "learning_rate": 7.045e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133269, "epoch": 0.39766025, "global_step/max_steps": "4096/10301", "percentage": "39.76%", "elapsed_time": "8h 32m 14s", "remaining_time": "12h 55m 59s"} |
| {"loss": 0.61967212, "token_acc": 0.84847695, "grad_norm": 1.984375, "learning_rate": 6.664e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133418, "epoch": 0.42251402, "global_step/max_steps": "4352/10301", "percentage": "42.25%", "elapsed_time": "9h 3m 39s", "remaining_time": "12h 23m 8s"} |
| {"loss": 0.58649731, "token_acc": 0.85602927, "grad_norm": 1.203125, "learning_rate": 6.271e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133475, "epoch": 0.44736778, "global_step/max_steps": "4608/10301", "percentage": "44.73%", "elapsed_time": "9h 35m 23s", "remaining_time": "11h 50m 52s"} |
| {"loss": 0.53853643, "token_acc": 0.86720535, "grad_norm": 1.203125, "learning_rate": 5.87e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133422, "epoch": 0.47222155, "global_step/max_steps": "4864/10301", "percentage": "47.22%", "elapsed_time": "10h 7m 35s", "remaining_time": "11h 19m 10s"} |
| {"loss": 0.49772173, "token_acc": 0.87674778, "grad_norm": 1.359375, "learning_rate": 5.462e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133344, "epoch": 0.49707531, "global_step/max_steps": "5120/10301", "percentage": "49.70%", "elapsed_time": "10h 39m 56s", "remaining_time": "10h 47m 34s"} |
| {"loss": 0.46582431, "token_acc": 0.88507241, "grad_norm": 1.40625, "learning_rate": 5.052e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.13342, "epoch": 0.52192908, "global_step/max_steps": "5376/10301", "percentage": "52.19%", "elapsed_time": "11h 11m 33s", "remaining_time": "10h 15m 13s"} |
| {"loss": 0.44412497, "token_acc": 0.88964865, "grad_norm": 1.4140625, "learning_rate": 4.642e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133319, "epoch": 0.54678285, "global_step/max_steps": "5632/10301", "percentage": "54.67%", "elapsed_time": "11h 44m 4s", "remaining_time": "9h 43m 41s"} |
| {"loss": 0.40981537, "token_acc": 0.89903667, "grad_norm": 1.8203125, "learning_rate": 4.233e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133434, "epoch": 0.57163661, "global_step/max_steps": "5888/10301", "percentage": "57.16%", "elapsed_time": "12h 15m 26s", "remaining_time": "9h 11m 12s"} |
| {"loss": 0.38657734, "token_acc": 0.90348159, "grad_norm": 1.5234375, "learning_rate": 3.83e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133189, "epoch": 0.59649038, "global_step/max_steps": "6144/10301", "percentage": "59.64%", "elapsed_time": "12h 48m 49s", "remaining_time": "8h 40m 11s"} |
| {"loss": 0.36623093, "token_acc": 0.90824468, "grad_norm": 1.03125, "learning_rate": 3.435e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133233, "epoch": 0.62134414, "global_step/max_steps": "6400/10301", "percentage": "62.13%", "elapsed_time": "13h 20m 35s", "remaining_time": "8h 7m 59s"} |
| {"loss": 0.34115398, "token_acc": 0.91443513, "grad_norm": 2.203125, "learning_rate": 3.05e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133334, "epoch": 0.64619791, "global_step/max_steps": "6656/10301", "percentage": "64.62%", "elapsed_time": "13h 51m 59s", "remaining_time": "7h 35m 37s"} |
| {"loss": 0.33424041, "token_acc": 0.91686025, "grad_norm": 1.6015625, "learning_rate": 2.679e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133453, "epoch": 0.67105167, "global_step/max_steps": "6912/10301", "percentage": "67.10%", "elapsed_time": "14h 23m 13s", "remaining_time": "7h 3m 14s"} |
| {"loss": 0.31423533, "token_acc": 0.92212138, "grad_norm": 1.3515625, "learning_rate": 2.323e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133271, "epoch": 0.69590544, "global_step/max_steps": "7168/10301", "percentage": "69.59%", "elapsed_time": "14h 56m 25s", "remaining_time": "6h 31m 48s"} |
| {"loss": 0.30621493, "token_acc": 0.92497889, "grad_norm": 2.03125, "learning_rate": 1.986e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133231, "epoch": 0.7207592, "global_step/max_steps": "7424/10301", "percentage": "72.07%", "elapsed_time": "15h 28m 42s", "remaining_time": "5h 59m 54s"} |
| {"loss": 0.29216826, "token_acc": 0.92759896, "grad_norm": 1.5078125, "learning_rate": 1.668e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133307, "epoch": 0.74561297, "global_step/max_steps": "7680/10301", "percentage": "74.56%", "elapsed_time": "16h 0m 11s", "remaining_time": "5h 27m 41s"} |
| {"loss": 0.28541595, "token_acc": 0.92859027, "grad_norm": 1.3671875, "learning_rate": 1.373e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133282, "epoch": 0.77046674, "global_step/max_steps": "7936/10301", "percentage": "77.04%", "elapsed_time": "16h 32m 22s", "remaining_time": "4h 55m 44s"} |
| {"loss": 0.28136611, "token_acc": 0.93033218, "grad_norm": 2.0625, "learning_rate": 1.103e-05, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.132989, "epoch": 0.7953205, "global_step/max_steps": "8192/10301", "percentage": "79.53%", "elapsed_time": "17h 6m 39s", "remaining_time": "4h 24m 18s"} |
| {"loss": 0.27359205, "token_acc": 0.93117378, "grad_norm": 1.28125, "learning_rate": 8.59e-06, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.13298, "epoch": 0.82017427, "global_step/max_steps": "8448/10301", "percentage": "82.01%", "elapsed_time": "17h 38m 48s", "remaining_time": "3h 52m 14s"} |
| {"loss": 0.26918975, "token_acc": 0.93274282, "grad_norm": 1.3671875, "learning_rate": 6.43e-06, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.132967, "epoch": 0.84502803, "global_step/max_steps": "8704/10301", "percentage": "84.50%", "elapsed_time": "18h 10m 59s", "remaining_time": "3h 20m 10s"} |
| {"loss": 0.26750854, "token_acc": 0.93308045, "grad_norm": 1.5234375, "learning_rate": 4.56e-06, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.132959, "epoch": 0.8698818, "global_step/max_steps": "8960/10301", "percentage": "86.98%", "elapsed_time": "18h 43m 9s", "remaining_time": "2h 48m 5s"} |
| {"loss": 0.26333496, "token_acc": 0.93391206, "grad_norm": 1.9375, "learning_rate": 3e-06, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.132818, "epoch": 0.89473556, "global_step/max_steps": "9216/10301", "percentage": "89.47%", "elapsed_time": "19h 16m 28s", "remaining_time": "2h 16m 9s"} |
| {"loss": 0.26137111, "token_acc": 0.93451909, "grad_norm": 0.9765625, "learning_rate": 1.76e-06, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.132924, "epoch": 0.91958933, "global_step/max_steps": "9472/10301", "percentage": "91.95%", "elapsed_time": "19h 47m 38s", "remaining_time": "1h 43m 56s"} |
| {"loss": 0.26724005, "token_acc": 0.93295594, "grad_norm": 1.1484375, "learning_rate": 8.4e-07, "memory(GiB)": 138.17, "train_speed(iter/s)": 0.133007, "epoch": 0.9444431, "global_step/max_steps": "9728/10301", "percentage": "94.44%", "elapsed_time": "20h 18m 58s", "remaining_time": "1h 11m 48s"} |
| {"loss": 0.26430196, "token_acc": 0.93423544, "grad_norm": 1.7109375, "learning_rate": 2.6e-07, "memory(GiB)": 138.56, "train_speed(iter/s)": 0.133046, "epoch": 0.96929686, "global_step/max_steps": "9984/10301", "percentage": "96.92%", "elapsed_time": "20h 50m 41s", "remaining_time": "39m 42s"} |
| {"loss": 0.27376783, "token_acc": 0.9314503, "grad_norm": 1.1015625, "learning_rate": 1e-08, "memory(GiB)": 138.56, "train_speed(iter/s)": 0.132919, "epoch": 0.99415063, "global_step/max_steps": "10240/10301", "percentage": "99.41%", "elapsed_time": "21h 23m 59s", "remaining_time": "7m 38s"} |
| {"train_runtime": 77604.8237, "train_samples_per_second": 4.247, "train_steps_per_second": 0.133, "total_flos": 1.2615193287838972e+19, "train_loss": 0.64555652, "epoch": 1.0, "global_step/max_steps": "10301/10301", "percentage": "100.00%", "elapsed_time": "21h 33m 24s", "remaining_time": "0s"} |
|
|