| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.0998851320980872, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 9.98851320980872e-05, |
| "grad_norm": 9.875, |
| "learning_rate": 0.0, |
| "loss": 1.4779, |
| "memory/device_reserved (GiB)": 86.98, |
| "memory/max_active (GiB)": 76.6, |
| "memory/max_allocated (GiB)": 76.6, |
| "step": 1, |
| "tokens_per_second_per_gpu": 8243.37, |
| "total_tokens": 40962 |
| }, |
| { |
| "epoch": 0.0001997702641961744, |
| "grad_norm": 8.25, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.3415, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 2, |
| "tokens_per_second_per_gpu": 19760.57, |
| "total_tokens": 88833 |
| }, |
| { |
| "epoch": 0.0002996553962942616, |
| "grad_norm": 8.8125, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.3753, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 3, |
| "tokens_per_second_per_gpu": 17636.24, |
| "total_tokens": 131779 |
| }, |
| { |
| "epoch": 0.0003995405283923488, |
| "grad_norm": 8.4375, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.4345, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 4, |
| "tokens_per_second_per_gpu": 20544.74, |
| "total_tokens": 180993 |
| }, |
| { |
| "epoch": 0.000499425660490436, |
| "grad_norm": 9.9375, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.486, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 5, |
| "tokens_per_second_per_gpu": 16871.06, |
| "total_tokens": 221420 |
| }, |
| { |
| "epoch": 0.0005993107925885232, |
| "grad_norm": 8.8125, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.3823, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 6, |
| "tokens_per_second_per_gpu": 19640.03, |
| "total_tokens": 268993 |
| }, |
| { |
| "epoch": 0.0006991959246866104, |
| "grad_norm": 8.0625, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.3434, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 7, |
| "tokens_per_second_per_gpu": 17908.21, |
| "total_tokens": 313714 |
| }, |
| { |
| "epoch": 0.0007990810567846976, |
| "grad_norm": 7.875, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.3487, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 8, |
| "tokens_per_second_per_gpu": 18417.25, |
| "total_tokens": 359437 |
| }, |
| { |
| "epoch": 0.0008989661888827848, |
| "grad_norm": 8.75, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.4139, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 9, |
| "tokens_per_second_per_gpu": 19609.76, |
| "total_tokens": 406269 |
| }, |
| { |
| "epoch": 0.000998851320980872, |
| "grad_norm": 8.0, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 1.4235, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 10, |
| "tokens_per_second_per_gpu": 20842.7, |
| "total_tokens": 456114 |
| }, |
| { |
| "epoch": 0.0010987364530789592, |
| "grad_norm": 8.25, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.3933, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 11, |
| "tokens_per_second_per_gpu": 18011.03, |
| "total_tokens": 499704 |
| }, |
| { |
| "epoch": 0.0011986215851770463, |
| "grad_norm": 8.1875, |
| "learning_rate": 2.2e-06, |
| "loss": 1.4046, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 12, |
| "tokens_per_second_per_gpu": 18778.61, |
| "total_tokens": 545509 |
| }, |
| { |
| "epoch": 0.0012985067172751337, |
| "grad_norm": 6.96875, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 1.2975, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 13, |
| "tokens_per_second_per_gpu": 18930.63, |
| "total_tokens": 593148 |
| }, |
| { |
| "epoch": 0.0013983918493732208, |
| "grad_norm": 7.09375, |
| "learning_rate": 2.6e-06, |
| "loss": 1.3302, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 14, |
| "tokens_per_second_per_gpu": 18373.51, |
| "total_tokens": 639166 |
| }, |
| { |
| "epoch": 0.001498276981471308, |
| "grad_norm": 7.0625, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 1.3545, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 15, |
| "tokens_per_second_per_gpu": 19694.91, |
| "total_tokens": 687960 |
| }, |
| { |
| "epoch": 0.0015981621135693952, |
| "grad_norm": 7.6875, |
| "learning_rate": 3e-06, |
| "loss": 1.4662, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 16, |
| "tokens_per_second_per_gpu": 19953.67, |
| "total_tokens": 734740 |
| }, |
| { |
| "epoch": 0.0016980472456674823, |
| "grad_norm": 7.0, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 1.3218, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 17, |
| "tokens_per_second_per_gpu": 18332.53, |
| "total_tokens": 778737 |
| }, |
| { |
| "epoch": 0.0017979323777655696, |
| "grad_norm": 6.71875, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 1.3578, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 18, |
| "tokens_per_second_per_gpu": 18145.79, |
| "total_tokens": 822714 |
| }, |
| { |
| "epoch": 0.0018978175098636567, |
| "grad_norm": 6.3125, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 1.3095, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 19, |
| "tokens_per_second_per_gpu": 16064.69, |
| "total_tokens": 862422 |
| }, |
| { |
| "epoch": 0.001997702641961744, |
| "grad_norm": 8.0, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 1.3111, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 20, |
| "tokens_per_second_per_gpu": 19133.11, |
| "total_tokens": 908386 |
| }, |
| { |
| "epoch": 0.002097587774059831, |
| "grad_norm": 5.8125, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.3143, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 21, |
| "tokens_per_second_per_gpu": 21112.41, |
| "total_tokens": 958799 |
| }, |
| { |
| "epoch": 0.0021974729061579185, |
| "grad_norm": 5.15625, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 1.1851, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 22, |
| "tokens_per_second_per_gpu": 16875.94, |
| "total_tokens": 1001224 |
| }, |
| { |
| "epoch": 0.0022973580382560058, |
| "grad_norm": 5.0, |
| "learning_rate": 4.4e-06, |
| "loss": 1.1666, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 23, |
| "tokens_per_second_per_gpu": 16288.84, |
| "total_tokens": 1041090 |
| }, |
| { |
| "epoch": 0.0023972431703540927, |
| "grad_norm": 5.3125, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 1.2269, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 24, |
| "tokens_per_second_per_gpu": 15587.26, |
| "total_tokens": 1078798 |
| }, |
| { |
| "epoch": 0.00249712830245218, |
| "grad_norm": 4.21875, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 1.1578, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 25, |
| "tokens_per_second_per_gpu": 17930.32, |
| "total_tokens": 1123045 |
| }, |
| { |
| "epoch": 0.0025970134345502673, |
| "grad_norm": 4.28125, |
| "learning_rate": 5e-06, |
| "loss": 1.2388, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 26, |
| "tokens_per_second_per_gpu": 20577.02, |
| "total_tokens": 1172554 |
| }, |
| { |
| "epoch": 0.002696898566648354, |
| "grad_norm": 3.453125, |
| "learning_rate": 5.2e-06, |
| "loss": 1.1585, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 27, |
| "tokens_per_second_per_gpu": 19405.27, |
| "total_tokens": 1219906 |
| }, |
| { |
| "epoch": 0.0027967836987464415, |
| "grad_norm": 3.375, |
| "learning_rate": 5.400000000000001e-06, |
| "loss": 1.1144, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 28, |
| "tokens_per_second_per_gpu": 18917.28, |
| "total_tokens": 1266344 |
| }, |
| { |
| "epoch": 0.002896668830844529, |
| "grad_norm": 3.203125, |
| "learning_rate": 5.600000000000001e-06, |
| "loss": 1.0446, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 29, |
| "tokens_per_second_per_gpu": 16874.54, |
| "total_tokens": 1308444 |
| }, |
| { |
| "epoch": 0.002996553962942616, |
| "grad_norm": 3.34375, |
| "learning_rate": 5.8e-06, |
| "loss": 1.1263, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 30, |
| "tokens_per_second_per_gpu": 18492.77, |
| "total_tokens": 1353101 |
| }, |
| { |
| "epoch": 0.003096439095040703, |
| "grad_norm": 2.71875, |
| "learning_rate": 6e-06, |
| "loss": 1.0484, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 31, |
| "tokens_per_second_per_gpu": 17416.39, |
| "total_tokens": 1396719 |
| }, |
| { |
| "epoch": 0.0031963242271387904, |
| "grad_norm": 2.671875, |
| "learning_rate": 6.200000000000001e-06, |
| "loss": 1.0232, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 32, |
| "tokens_per_second_per_gpu": 17462.08, |
| "total_tokens": 1439123 |
| }, |
| { |
| "epoch": 0.0032962093592368777, |
| "grad_norm": 2.28125, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 1.0868, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 33, |
| "tokens_per_second_per_gpu": 19708.23, |
| "total_tokens": 1486970 |
| }, |
| { |
| "epoch": 0.0033960944913349646, |
| "grad_norm": 2.140625, |
| "learning_rate": 6.600000000000001e-06, |
| "loss": 1.0017, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 34, |
| "tokens_per_second_per_gpu": 18253.1, |
| "total_tokens": 1531013 |
| }, |
| { |
| "epoch": 0.003495979623433052, |
| "grad_norm": 2.078125, |
| "learning_rate": 6.800000000000001e-06, |
| "loss": 0.9939, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 35, |
| "tokens_per_second_per_gpu": 18028.16, |
| "total_tokens": 1574525 |
| }, |
| { |
| "epoch": 0.0035958647555311392, |
| "grad_norm": 1.90625, |
| "learning_rate": 7e-06, |
| "loss": 1.0111, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 36, |
| "tokens_per_second_per_gpu": 16813.31, |
| "total_tokens": 1615560 |
| }, |
| { |
| "epoch": 0.0036957498876292265, |
| "grad_norm": 1.7109375, |
| "learning_rate": 7.2000000000000005e-06, |
| "loss": 1.0056, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 37, |
| "tokens_per_second_per_gpu": 18608.55, |
| "total_tokens": 1660541 |
| }, |
| { |
| "epoch": 0.0037956350197273134, |
| "grad_norm": 1.65625, |
| "learning_rate": 7.4e-06, |
| "loss": 0.9551, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 38, |
| "tokens_per_second_per_gpu": 16008.72, |
| "total_tokens": 1700965 |
| }, |
| { |
| "epoch": 0.0038955201518254007, |
| "grad_norm": 1.5625, |
| "learning_rate": 7.600000000000001e-06, |
| "loss": 0.9944, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 39, |
| "tokens_per_second_per_gpu": 18581.27, |
| "total_tokens": 1746774 |
| }, |
| { |
| "epoch": 0.003995405283923488, |
| "grad_norm": 1.484375, |
| "learning_rate": 7.800000000000002e-06, |
| "loss": 0.9869, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 40, |
| "tokens_per_second_per_gpu": 20093.69, |
| "total_tokens": 1794883 |
| }, |
| { |
| "epoch": 0.004095290416021575, |
| "grad_norm": 1.359375, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.9399, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 41, |
| "tokens_per_second_per_gpu": 18810.89, |
| "total_tokens": 1841261 |
| }, |
| { |
| "epoch": 0.004195175548119662, |
| "grad_norm": 1.40625, |
| "learning_rate": 8.2e-06, |
| "loss": 0.9138, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 42, |
| "tokens_per_second_per_gpu": 17770.49, |
| "total_tokens": 1884703 |
| }, |
| { |
| "epoch": 0.004295060680217749, |
| "grad_norm": 1.40625, |
| "learning_rate": 8.400000000000001e-06, |
| "loss": 0.8892, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 43, |
| "tokens_per_second_per_gpu": 16237.8, |
| "total_tokens": 1924914 |
| }, |
| { |
| "epoch": 0.004394945812315837, |
| "grad_norm": 1.2734375, |
| "learning_rate": 8.6e-06, |
| "loss": 0.8984, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 44, |
| "tokens_per_second_per_gpu": 19662.27, |
| "total_tokens": 1973886 |
| }, |
| { |
| "epoch": 0.004494830944413924, |
| "grad_norm": 1.3046875, |
| "learning_rate": 8.8e-06, |
| "loss": 0.9596, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 45, |
| "tokens_per_second_per_gpu": 18586.65, |
| "total_tokens": 2018483 |
| }, |
| { |
| "epoch": 0.0045947160765120116, |
| "grad_norm": 1.2421875, |
| "learning_rate": 9e-06, |
| "loss": 0.913, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 46, |
| "tokens_per_second_per_gpu": 18524.22, |
| "total_tokens": 2064097 |
| }, |
| { |
| "epoch": 0.0046946012086100984, |
| "grad_norm": 1.2265625, |
| "learning_rate": 9.200000000000002e-06, |
| "loss": 0.9494, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 47, |
| "tokens_per_second_per_gpu": 18999.6, |
| "total_tokens": 2109868 |
| }, |
| { |
| "epoch": 0.004794486340708185, |
| "grad_norm": 1.203125, |
| "learning_rate": 9.4e-06, |
| "loss": 0.8813, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 48, |
| "tokens_per_second_per_gpu": 18857.28, |
| "total_tokens": 2156067 |
| }, |
| { |
| "epoch": 0.004894371472806273, |
| "grad_norm": 1.171875, |
| "learning_rate": 9.600000000000001e-06, |
| "loss": 0.8018, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 49, |
| "tokens_per_second_per_gpu": 16664.35, |
| "total_tokens": 2196879 |
| }, |
| { |
| "epoch": 0.00499425660490436, |
| "grad_norm": 1.1796875, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 0.9073, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 50, |
| "tokens_per_second_per_gpu": 20783.23, |
| "total_tokens": 2246949 |
| }, |
| { |
| "epoch": 0.005094141737002447, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1e-05, |
| "loss": 0.8948, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 51, |
| "tokens_per_second_per_gpu": 19415.04, |
| "total_tokens": 2294931 |
| }, |
| { |
| "epoch": 0.005194026869100535, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.02e-05, |
| "loss": 0.9246, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 52, |
| "tokens_per_second_per_gpu": 20802.16, |
| "total_tokens": 2343942 |
| }, |
| { |
| "epoch": 0.0052939120011986215, |
| "grad_norm": 1.5078125, |
| "learning_rate": 1.04e-05, |
| "loss": 0.864, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 53, |
| "tokens_per_second_per_gpu": 18225.11, |
| "total_tokens": 2388139 |
| }, |
| { |
| "epoch": 0.005393797133296708, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.0600000000000002e-05, |
| "loss": 0.8455, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 54, |
| "tokens_per_second_per_gpu": 19175.62, |
| "total_tokens": 2435894 |
| }, |
| { |
| "epoch": 0.005493682265394796, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.0800000000000002e-05, |
| "loss": 0.7576, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 55, |
| "tokens_per_second_per_gpu": 16339.01, |
| "total_tokens": 2476460 |
| }, |
| { |
| "epoch": 0.005593567397492883, |
| "grad_norm": 1.125, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.7952, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 56, |
| "tokens_per_second_per_gpu": 18003.43, |
| "total_tokens": 2521341 |
| }, |
| { |
| "epoch": 0.005693452529590971, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.1200000000000001e-05, |
| "loss": 0.7279, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 57, |
| "tokens_per_second_per_gpu": 16071.69, |
| "total_tokens": 2561139 |
| }, |
| { |
| "epoch": 0.005793337661689058, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.14e-05, |
| "loss": 0.841, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 58, |
| "tokens_per_second_per_gpu": 17495.95, |
| "total_tokens": 2603702 |
| }, |
| { |
| "epoch": 0.0058932227937871446, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.16e-05, |
| "loss": 0.8575, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 59, |
| "tokens_per_second_per_gpu": 19617.38, |
| "total_tokens": 2651665 |
| }, |
| { |
| "epoch": 0.005993107925885232, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.18e-05, |
| "loss": 0.8329, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 60, |
| "tokens_per_second_per_gpu": 18947.4, |
| "total_tokens": 2698698 |
| }, |
| { |
| "epoch": 0.006092993057983319, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.2e-05, |
| "loss": 0.7797, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 61, |
| "tokens_per_second_per_gpu": 18057.29, |
| "total_tokens": 2743457 |
| }, |
| { |
| "epoch": 0.006192878190081406, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.22e-05, |
| "loss": 0.7424, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 62, |
| "tokens_per_second_per_gpu": 17142.61, |
| "total_tokens": 2786649 |
| }, |
| { |
| "epoch": 0.006292763322179494, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.2400000000000002e-05, |
| "loss": 0.8005, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 63, |
| "tokens_per_second_per_gpu": 17120.79, |
| "total_tokens": 2828570 |
| }, |
| { |
| "epoch": 0.006392648454277581, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.2600000000000001e-05, |
| "loss": 0.7657, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 64, |
| "tokens_per_second_per_gpu": 18101.39, |
| "total_tokens": 2872421 |
| }, |
| { |
| "epoch": 0.006492533586375668, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.2800000000000001e-05, |
| "loss": 0.7696, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 65, |
| "tokens_per_second_per_gpu": 17904.68, |
| "total_tokens": 2916417 |
| }, |
| { |
| "epoch": 0.006592418718473755, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.7335, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 66, |
| "tokens_per_second_per_gpu": 16890.55, |
| "total_tokens": 2958573 |
| }, |
| { |
| "epoch": 0.006692303850571842, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.3200000000000002e-05, |
| "loss": 0.758, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 67, |
| "tokens_per_second_per_gpu": 19215.29, |
| "total_tokens": 3006135 |
| }, |
| { |
| "epoch": 0.006792188982669929, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.3400000000000002e-05, |
| "loss": 0.7379, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 68, |
| "tokens_per_second_per_gpu": 16960.42, |
| "total_tokens": 3048323 |
| }, |
| { |
| "epoch": 0.006892074114768017, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.3600000000000002e-05, |
| "loss": 0.7149, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 69, |
| "tokens_per_second_per_gpu": 18080.8, |
| "total_tokens": 3092850 |
| }, |
| { |
| "epoch": 0.006991959246866104, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.38e-05, |
| "loss": 0.7738, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 70, |
| "tokens_per_second_per_gpu": 20170.16, |
| "total_tokens": 3141049 |
| }, |
| { |
| "epoch": 0.0070918443789641916, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.4e-05, |
| "loss": 0.7906, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 71, |
| "tokens_per_second_per_gpu": 18126.92, |
| "total_tokens": 3184495 |
| }, |
| { |
| "epoch": 0.0071917295110622784, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.4200000000000001e-05, |
| "loss": 0.8002, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 72, |
| "tokens_per_second_per_gpu": 20161.62, |
| "total_tokens": 3232249 |
| }, |
| { |
| "epoch": 0.007291614643160365, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.4400000000000001e-05, |
| "loss": 0.817, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 73, |
| "tokens_per_second_per_gpu": 20645.27, |
| "total_tokens": 3281435 |
| }, |
| { |
| "epoch": 0.007391499775258453, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.46e-05, |
| "loss": 0.7147, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 74, |
| "tokens_per_second_per_gpu": 16148.65, |
| "total_tokens": 3321456 |
| }, |
| { |
| "epoch": 0.00749138490735654, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.48e-05, |
| "loss": 0.7153, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 75, |
| "tokens_per_second_per_gpu": 17587.93, |
| "total_tokens": 3364463 |
| }, |
| { |
| "epoch": 0.007591270039454627, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.7806, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 76, |
| "tokens_per_second_per_gpu": 19119.21, |
| "total_tokens": 3411507 |
| }, |
| { |
| "epoch": 0.007691155171552715, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.5200000000000002e-05, |
| "loss": 0.7713, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 77, |
| "tokens_per_second_per_gpu": 19492.79, |
| "total_tokens": 3458778 |
| }, |
| { |
| "epoch": 0.0077910403036508015, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.54e-05, |
| "loss": 0.7522, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 78, |
| "tokens_per_second_per_gpu": 20537.8, |
| "total_tokens": 3507274 |
| }, |
| { |
| "epoch": 0.007890925435748888, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.5600000000000003e-05, |
| "loss": 0.6706, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 79, |
| "tokens_per_second_per_gpu": 16985.35, |
| "total_tokens": 3549783 |
| }, |
| { |
| "epoch": 0.007990810567846975, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.58e-05, |
| "loss": 0.7132, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 80, |
| "tokens_per_second_per_gpu": 18124.3, |
| "total_tokens": 3594794 |
| }, |
| { |
| "epoch": 0.008090695699945064, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.7931, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 81, |
| "tokens_per_second_per_gpu": 21164.76, |
| "total_tokens": 3644056 |
| }, |
| { |
| "epoch": 0.00819058083204315, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.62e-05, |
| "loss": 0.7121, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 82, |
| "tokens_per_second_per_gpu": 17748.32, |
| "total_tokens": 3687994 |
| }, |
| { |
| "epoch": 0.008290465964141238, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.64e-05, |
| "loss": 0.6742, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 83, |
| "tokens_per_second_per_gpu": 18630.57, |
| "total_tokens": 3734033 |
| }, |
| { |
| "epoch": 0.008390351096239325, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.66e-05, |
| "loss": 0.6896, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 84, |
| "tokens_per_second_per_gpu": 17730.4, |
| "total_tokens": 3777101 |
| }, |
| { |
| "epoch": 0.008490236228337411, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.6800000000000002e-05, |
| "loss": 0.6859, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 85, |
| "tokens_per_second_per_gpu": 18543.13, |
| "total_tokens": 3822199 |
| }, |
| { |
| "epoch": 0.008590121360435498, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.7e-05, |
| "loss": 0.6824, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 86, |
| "tokens_per_second_per_gpu": 18246.7, |
| "total_tokens": 3866806 |
| }, |
| { |
| "epoch": 0.008690006492533587, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.72e-05, |
| "loss": 0.7252, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 87, |
| "tokens_per_second_per_gpu": 19550.06, |
| "total_tokens": 3914452 |
| }, |
| { |
| "epoch": 0.008789891624631674, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.7400000000000003e-05, |
| "loss": 0.642, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 88, |
| "tokens_per_second_per_gpu": 17625.61, |
| "total_tokens": 3957260 |
| }, |
| { |
| "epoch": 0.00888977675672976, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.76e-05, |
| "loss": 0.7135, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 89, |
| "tokens_per_second_per_gpu": 20639.71, |
| "total_tokens": 4007282 |
| }, |
| { |
| "epoch": 0.008989661888827848, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.7800000000000002e-05, |
| "loss": 0.7338, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 90, |
| "tokens_per_second_per_gpu": 20737.91, |
| "total_tokens": 4056792 |
| }, |
| { |
| "epoch": 0.009089547020925935, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.8e-05, |
| "loss": 0.6705, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 91, |
| "tokens_per_second_per_gpu": 18263.18, |
| "total_tokens": 4101429 |
| }, |
| { |
| "epoch": 0.009189432153024023, |
| "grad_norm": 1.125, |
| "learning_rate": 1.8200000000000002e-05, |
| "loss": 0.6818, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 92, |
| "tokens_per_second_per_gpu": 16507.6, |
| "total_tokens": 4142165 |
| }, |
| { |
| "epoch": 0.00928931728512211, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.8400000000000003e-05, |
| "loss": 0.7067, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 93, |
| "tokens_per_second_per_gpu": 19972.82, |
| "total_tokens": 4189518 |
| }, |
| { |
| "epoch": 0.009389202417220197, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.86e-05, |
| "loss": 0.7137, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 94, |
| "tokens_per_second_per_gpu": 18967.35, |
| "total_tokens": 4236108 |
| }, |
| { |
| "epoch": 0.009489087549318284, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.88e-05, |
| "loss": 0.6867, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 95, |
| "tokens_per_second_per_gpu": 18271.78, |
| "total_tokens": 4280134 |
| }, |
| { |
| "epoch": 0.00958897268141637, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.9e-05, |
| "loss": 0.6651, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 96, |
| "tokens_per_second_per_gpu": 17804.55, |
| "total_tokens": 4323994 |
| }, |
| { |
| "epoch": 0.009688857813514458, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.9200000000000003e-05, |
| "loss": 0.6832, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 97, |
| "tokens_per_second_per_gpu": 17164.59, |
| "total_tokens": 4365898 |
| }, |
| { |
| "epoch": 0.009788742945612546, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.94e-05, |
| "loss": 0.7163, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 98, |
| "tokens_per_second_per_gpu": 19376.04, |
| "total_tokens": 4412792 |
| }, |
| { |
| "epoch": 0.009888628077710633, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.9600000000000002e-05, |
| "loss": 0.6589, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 99, |
| "tokens_per_second_per_gpu": 16998.89, |
| "total_tokens": 4454365 |
| }, |
| { |
| "epoch": 0.00998851320980872, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.98e-05, |
| "loss": 0.6456, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 100, |
| "tokens_per_second_per_gpu": 19673.35, |
| "total_tokens": 4501083 |
| }, |
| { |
| "epoch": 0.010088398341906807, |
| "grad_norm": 0.99609375, |
| "learning_rate": 2e-05, |
| "loss": 0.648, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 101, |
| "tokens_per_second_per_gpu": 18584.02, |
| "total_tokens": 4547371 |
| }, |
| { |
| "epoch": 0.010188283474004894, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.9999939076577906e-05, |
| "loss": 0.6653, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 102, |
| "tokens_per_second_per_gpu": 18727.67, |
| "total_tokens": 4593306 |
| }, |
| { |
| "epoch": 0.010288168606102982, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.9999756307053947e-05, |
| "loss": 0.6515, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 103, |
| "tokens_per_second_per_gpu": 18131.83, |
| "total_tokens": 4639433 |
| }, |
| { |
| "epoch": 0.01038805373820107, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.9999451693655125e-05, |
| "loss": 0.6943, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 104, |
| "tokens_per_second_per_gpu": 19484.13, |
| "total_tokens": 4687235 |
| }, |
| { |
| "epoch": 0.010487938870299156, |
| "grad_norm": 1.1015625, |
| "learning_rate": 1.9999025240093045e-05, |
| "loss": 0.6511, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 105, |
| "tokens_per_second_per_gpu": 17318.02, |
| "total_tokens": 4729861 |
| }, |
| { |
| "epoch": 0.010587824002397243, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.9998476951563914e-05, |
| "loss": 0.691, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 106, |
| "tokens_per_second_per_gpu": 18638.54, |
| "total_tokens": 4775601 |
| }, |
| { |
| "epoch": 0.01068770913449533, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.9997806834748455e-05, |
| "loss": 0.6842, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 107, |
| "tokens_per_second_per_gpu": 20067.07, |
| "total_tokens": 4824337 |
| }, |
| { |
| "epoch": 0.010787594266593417, |
| "grad_norm": 1.0, |
| "learning_rate": 1.9997014897811834e-05, |
| "loss": 0.6205, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 108, |
| "tokens_per_second_per_gpu": 18881.75, |
| "total_tokens": 4869394 |
| }, |
| { |
| "epoch": 0.010887479398691505, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.9996101150403543e-05, |
| "loss": 0.6933, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 109, |
| "tokens_per_second_per_gpu": 20694.39, |
| "total_tokens": 4920754 |
| }, |
| { |
| "epoch": 0.010987364530789592, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.9995065603657317e-05, |
| "loss": 0.6624, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 110, |
| "tokens_per_second_per_gpu": 19216.67, |
| "total_tokens": 4967591 |
| }, |
| { |
| "epoch": 0.01108724966288768, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.999390827019096e-05, |
| "loss": 0.666, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 111, |
| "tokens_per_second_per_gpu": 19515.06, |
| "total_tokens": 5015301 |
| }, |
| { |
| "epoch": 0.011187134794985766, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.999262916410621e-05, |
| "loss": 0.604, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 112, |
| "tokens_per_second_per_gpu": 18859.71, |
| "total_tokens": 5061747 |
| }, |
| { |
| "epoch": 0.011287019927083853, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.9991228300988586e-05, |
| "loss": 0.6128, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 113, |
| "tokens_per_second_per_gpu": 16901.47, |
| "total_tokens": 5102619 |
| }, |
| { |
| "epoch": 0.011386905059181942, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.998970569790715e-05, |
| "loss": 0.6028, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 114, |
| "tokens_per_second_per_gpu": 18441.42, |
| "total_tokens": 5149611 |
| }, |
| { |
| "epoch": 0.011486790191280028, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.9988061373414342e-05, |
| "loss": 0.6268, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 115, |
| "tokens_per_second_per_gpu": 18813.74, |
| "total_tokens": 5195136 |
| }, |
| { |
| "epoch": 0.011586675323378115, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.9986295347545738e-05, |
| "loss": 0.5847, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 116, |
| "tokens_per_second_per_gpu": 17771.59, |
| "total_tokens": 5238688 |
| }, |
| { |
| "epoch": 0.011686560455476202, |
| "grad_norm": 1.0, |
| "learning_rate": 1.9984407641819812e-05, |
| "loss": 0.6286, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 117, |
| "tokens_per_second_per_gpu": 18788.16, |
| "total_tokens": 5284773 |
| }, |
| { |
| "epoch": 0.011786445587574289, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.9982398279237657e-05, |
| "loss": 0.6314, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 118, |
| "tokens_per_second_per_gpu": 18806.58, |
| "total_tokens": 5330605 |
| }, |
| { |
| "epoch": 0.011886330719672376, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.9980267284282718e-05, |
| "loss": 0.6555, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 119, |
| "tokens_per_second_per_gpu": 20889.02, |
| "total_tokens": 5381886 |
| }, |
| { |
| "epoch": 0.011986215851770465, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.9978014682920503e-05, |
| "loss": 0.6192, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 120, |
| "tokens_per_second_per_gpu": 18547.29, |
| "total_tokens": 5427929 |
| }, |
| { |
| "epoch": 0.012086100983868552, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.9975640502598243e-05, |
| "loss": 0.6199, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 121, |
| "tokens_per_second_per_gpu": 19617.73, |
| "total_tokens": 5477120 |
| }, |
| { |
| "epoch": 0.012185986115966638, |
| "grad_norm": 1.0, |
| "learning_rate": 1.997314477224458e-05, |
| "loss": 0.5961, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 122, |
| "tokens_per_second_per_gpu": 18775.7, |
| "total_tokens": 5523093 |
| }, |
| { |
| "epoch": 0.012285871248064725, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.9970527522269204e-05, |
| "loss": 0.6216, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 123, |
| "tokens_per_second_per_gpu": 16705.54, |
| "total_tokens": 5564130 |
| }, |
| { |
| "epoch": 0.012385756380162812, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.9967788784562474e-05, |
| "loss": 0.5953, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 124, |
| "tokens_per_second_per_gpu": 17016.96, |
| "total_tokens": 5606507 |
| }, |
| { |
| "epoch": 0.0124856415122609, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.9964928592495046e-05, |
| "loss": 0.6204, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 125, |
| "tokens_per_second_per_gpu": 18766.62, |
| "total_tokens": 5652803 |
| }, |
| { |
| "epoch": 0.012585526644358988, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.9961946980917457e-05, |
| "loss": 0.6115, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 126, |
| "tokens_per_second_per_gpu": 19247.22, |
| "total_tokens": 5699726 |
| }, |
| { |
| "epoch": 0.012685411776457075, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.9958843986159705e-05, |
| "loss": 0.565, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 127, |
| "tokens_per_second_per_gpu": 15244.68, |
| "total_tokens": 5737177 |
| }, |
| { |
| "epoch": 0.012785296908555161, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.99556196460308e-05, |
| "loss": 0.5976, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 128, |
| "tokens_per_second_per_gpu": 18070.5, |
| "total_tokens": 5780971 |
| }, |
| { |
| "epoch": 0.012885182040653248, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.9952273999818312e-05, |
| "loss": 0.6006, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 129, |
| "tokens_per_second_per_gpu": 16004.02, |
| "total_tokens": 5820791 |
| }, |
| { |
| "epoch": 0.012985067172751335, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.9948807088287884e-05, |
| "loss": 0.5546, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 130, |
| "tokens_per_second_per_gpu": 17616.14, |
| "total_tokens": 5864343 |
| }, |
| { |
| "epoch": 0.013084952304849424, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.9945218953682736e-05, |
| "loss": 0.5823, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 131, |
| "tokens_per_second_per_gpu": 16860.07, |
| "total_tokens": 5906747 |
| }, |
| { |
| "epoch": 0.01318483743694751, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.9941509639723155e-05, |
| "loss": 0.5803, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 132, |
| "tokens_per_second_per_gpu": 17772.91, |
| "total_tokens": 5950850 |
| }, |
| { |
| "epoch": 0.013284722569045598, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.9937679191605964e-05, |
| "loss": 0.6474, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 133, |
| "tokens_per_second_per_gpu": 20154.63, |
| "total_tokens": 6000386 |
| }, |
| { |
| "epoch": 0.013384607701143685, |
| "grad_norm": 1.1015625, |
| "learning_rate": 1.9933727656003964e-05, |
| "loss": 0.5555, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 134, |
| "tokens_per_second_per_gpu": 14310.4, |
| "total_tokens": 6036301 |
| }, |
| { |
| "epoch": 0.013484492833241771, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.992965508106537e-05, |
| "loss": 0.5518, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 135, |
| "tokens_per_second_per_gpu": 18142.79, |
| "total_tokens": 6080833 |
| }, |
| { |
| "epoch": 0.013584377965339858, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.9925461516413224e-05, |
| "loss": 0.6084, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 136, |
| "tokens_per_second_per_gpu": 18133.64, |
| "total_tokens": 6126732 |
| }, |
| { |
| "epoch": 0.013684263097437947, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.9921147013144782e-05, |
| "loss": 0.5763, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 137, |
| "tokens_per_second_per_gpu": 18002.0, |
| "total_tokens": 6170450 |
| }, |
| { |
| "epoch": 0.013784148229536034, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.9916711623830904e-05, |
| "loss": 0.5355, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 138, |
| "tokens_per_second_per_gpu": 16095.63, |
| "total_tokens": 6210299 |
| }, |
| { |
| "epoch": 0.01388403336163412, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.991215540251542e-05, |
| "loss": 0.5808, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 139, |
| "tokens_per_second_per_gpu": 18650.9, |
| "total_tokens": 6254948 |
| }, |
| { |
| "epoch": 0.013983918493732208, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.9907478404714438e-05, |
| "loss": 0.5979, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 140, |
| "tokens_per_second_per_gpu": 20297.6, |
| "total_tokens": 6305011 |
| }, |
| { |
| "epoch": 0.014083803625830294, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.9902680687415704e-05, |
| "loss": 0.582, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 141, |
| "tokens_per_second_per_gpu": 17056.17, |
| "total_tokens": 6347632 |
| }, |
| { |
| "epoch": 0.014183688757928383, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.989776230907789e-05, |
| "loss": 0.6453, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 142, |
| "tokens_per_second_per_gpu": 18054.21, |
| "total_tokens": 6392193 |
| }, |
| { |
| "epoch": 0.01428357389002647, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.9892723329629885e-05, |
| "loss": 0.5983, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 143, |
| "tokens_per_second_per_gpu": 17722.52, |
| "total_tokens": 6436920 |
| }, |
| { |
| "epoch": 0.014383459022124557, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.988756381047006e-05, |
| "loss": 0.5753, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 144, |
| "tokens_per_second_per_gpu": 16254.16, |
| "total_tokens": 6478117 |
| }, |
| { |
| "epoch": 0.014483344154222644, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.988228381446553e-05, |
| "loss": 0.5995, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 145, |
| "tokens_per_second_per_gpu": 19667.83, |
| "total_tokens": 6525699 |
| }, |
| { |
| "epoch": 0.01458322928632073, |
| "grad_norm": 1.0, |
| "learning_rate": 1.9876883405951378e-05, |
| "loss": 0.5977, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 146, |
| "tokens_per_second_per_gpu": 19651.42, |
| "total_tokens": 6573453 |
| }, |
| { |
| "epoch": 0.014683114418418818, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.987136265072988e-05, |
| "loss": 0.634, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 147, |
| "tokens_per_second_per_gpu": 21150.33, |
| "total_tokens": 6624754 |
| }, |
| { |
| "epoch": 0.014782999550516906, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.9865721616069695e-05, |
| "loss": 0.582, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 148, |
| "tokens_per_second_per_gpu": 17994.67, |
| "total_tokens": 6669304 |
| }, |
| { |
| "epoch": 0.014882884682614993, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.985996037070505e-05, |
| "loss": 0.574, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 149, |
| "tokens_per_second_per_gpu": 17365.56, |
| "total_tokens": 6711661 |
| }, |
| { |
| "epoch": 0.01498276981471308, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.9854078984834904e-05, |
| "loss": 0.591, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 150, |
| "tokens_per_second_per_gpu": 19119.45, |
| "total_tokens": 6758301 |
| }, |
| { |
| "epoch": 0.015082654946811167, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.9848077530122083e-05, |
| "loss": 0.541, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 151, |
| "tokens_per_second_per_gpu": 17707.92, |
| "total_tokens": 6801621 |
| }, |
| { |
| "epoch": 0.015182540078909254, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.984195607969242e-05, |
| "loss": 0.6135, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 152, |
| "tokens_per_second_per_gpu": 17502.16, |
| "total_tokens": 6845361 |
| }, |
| { |
| "epoch": 0.015282425211007342, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.983571470813386e-05, |
| "loss": 0.6196, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 153, |
| "tokens_per_second_per_gpu": 19755.5, |
| "total_tokens": 6894342 |
| }, |
| { |
| "epoch": 0.01538231034310543, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.9829353491495545e-05, |
| "loss": 0.544, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 154, |
| "tokens_per_second_per_gpu": 18517.83, |
| "total_tokens": 6939316 |
| }, |
| { |
| "epoch": 0.015482195475203516, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.982287250728689e-05, |
| "loss": 0.6107, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 155, |
| "tokens_per_second_per_gpu": 19691.98, |
| "total_tokens": 6987772 |
| }, |
| { |
| "epoch": 0.015582080607301603, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.9816271834476642e-05, |
| "loss": 0.6226, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 156, |
| "tokens_per_second_per_gpu": 21383.14, |
| "total_tokens": 7039667 |
| }, |
| { |
| "epoch": 0.01568196573939969, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.9809551553491918e-05, |
| "loss": 0.5607, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 157, |
| "tokens_per_second_per_gpu": 17060.37, |
| "total_tokens": 7080343 |
| }, |
| { |
| "epoch": 0.015781850871497777, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.9802711746217222e-05, |
| "loss": 0.6185, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 158, |
| "tokens_per_second_per_gpu": 20126.33, |
| "total_tokens": 7130105 |
| }, |
| { |
| "epoch": 0.015881736003595864, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.979575249599344e-05, |
| "loss": 0.6005, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 159, |
| "tokens_per_second_per_gpu": 16630.21, |
| "total_tokens": 7171657 |
| }, |
| { |
| "epoch": 0.01598162113569395, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.9788673887616852e-05, |
| "loss": 0.5958, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 160, |
| "tokens_per_second_per_gpu": 21247.67, |
| "total_tokens": 7221634 |
| }, |
| { |
| "epoch": 0.016081506267792037, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.9781476007338058e-05, |
| "loss": 0.5536, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 161, |
| "tokens_per_second_per_gpu": 18179.95, |
| "total_tokens": 7266949 |
| }, |
| { |
| "epoch": 0.016181391399890128, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.9774158942860962e-05, |
| "loss": 0.5735, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 162, |
| "tokens_per_second_per_gpu": 19294.01, |
| "total_tokens": 7314475 |
| }, |
| { |
| "epoch": 0.016281276531988215, |
| "grad_norm": 1.109375, |
| "learning_rate": 1.9766722783341682e-05, |
| "loss": 0.5661, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 163, |
| "tokens_per_second_per_gpu": 17160.88, |
| "total_tokens": 7356122 |
| }, |
| { |
| "epoch": 0.0163811616640863, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.9759167619387474e-05, |
| "loss": 0.5724, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 164, |
| "tokens_per_second_per_gpu": 20031.67, |
| "total_tokens": 7404915 |
| }, |
| { |
| "epoch": 0.01648104679618439, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.9751493543055634e-05, |
| "loss": 0.5188, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 165, |
| "tokens_per_second_per_gpu": 18645.96, |
| "total_tokens": 7450909 |
| }, |
| { |
| "epoch": 0.016580931928282475, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.9743700647852356e-05, |
| "loss": 0.5581, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 166, |
| "tokens_per_second_per_gpu": 20450.52, |
| "total_tokens": 7500716 |
| }, |
| { |
| "epoch": 0.016680817060380562, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.9735789028731603e-05, |
| "loss": 0.6003, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 167, |
| "tokens_per_second_per_gpu": 18029.38, |
| "total_tokens": 7545625 |
| }, |
| { |
| "epoch": 0.01678070219247865, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.972775878209397e-05, |
| "loss": 0.5534, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 168, |
| "tokens_per_second_per_gpu": 18306.53, |
| "total_tokens": 7591257 |
| }, |
| { |
| "epoch": 0.016880587324576736, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.9719610005785466e-05, |
| "loss": 0.5268, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 169, |
| "tokens_per_second_per_gpu": 16971.41, |
| "total_tokens": 7633328 |
| }, |
| { |
| "epoch": 0.016980472456674823, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.971134279909636e-05, |
| "loss": 0.5476, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 170, |
| "tokens_per_second_per_gpu": 15585.99, |
| "total_tokens": 7671631 |
| }, |
| { |
| "epoch": 0.01708035758877291, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.9702957262759964e-05, |
| "loss": 0.6494, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 171, |
| "tokens_per_second_per_gpu": 20532.2, |
| "total_tokens": 7721385 |
| }, |
| { |
| "epoch": 0.017180242720870997, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.9694453498951392e-05, |
| "loss": 0.5652, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 172, |
| "tokens_per_second_per_gpu": 18492.11, |
| "total_tokens": 7766388 |
| }, |
| { |
| "epoch": 0.017280127852969087, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.9685831611286312e-05, |
| "loss": 0.5508, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 173, |
| "tokens_per_second_per_gpu": 19519.67, |
| "total_tokens": 7814109 |
| }, |
| { |
| "epoch": 0.017380012985067174, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.9677091704819714e-05, |
| "loss": 0.5424, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 174, |
| "tokens_per_second_per_gpu": 18019.17, |
| "total_tokens": 7858610 |
| }, |
| { |
| "epoch": 0.01747989811716526, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.9668233886044597e-05, |
| "loss": 0.6046, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 175, |
| "tokens_per_second_per_gpu": 20685.55, |
| "total_tokens": 7907100 |
| }, |
| { |
| "epoch": 0.017579783249263348, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.9659258262890683e-05, |
| "loss": 0.5475, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 176, |
| "tokens_per_second_per_gpu": 19322.29, |
| "total_tokens": 7953990 |
| }, |
| { |
| "epoch": 0.017679668381361435, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.9650164944723116e-05, |
| "loss": 0.5525, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 177, |
| "tokens_per_second_per_gpu": 17699.63, |
| "total_tokens": 7998810 |
| }, |
| { |
| "epoch": 0.01777955351345952, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.96409540423411e-05, |
| "loss": 0.5338, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 178, |
| "tokens_per_second_per_gpu": 17720.92, |
| "total_tokens": 8041953 |
| }, |
| { |
| "epoch": 0.01787943864555761, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.9631625667976584e-05, |
| "loss": 0.6071, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 179, |
| "tokens_per_second_per_gpu": 19699.73, |
| "total_tokens": 8089553 |
| }, |
| { |
| "epoch": 0.017979323777655695, |
| "grad_norm": 1.125, |
| "learning_rate": 1.9622179935292855e-05, |
| "loss": 0.5819, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 180, |
| "tokens_per_second_per_gpu": 17729.02, |
| "total_tokens": 8133623 |
| }, |
| { |
| "epoch": 0.018079208909753782, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.961261695938319e-05, |
| "loss": 0.573, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 181, |
| "tokens_per_second_per_gpu": 20525.09, |
| "total_tokens": 8182577 |
| }, |
| { |
| "epoch": 0.01817909404185187, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.9602936856769432e-05, |
| "loss": 0.5255, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 182, |
| "tokens_per_second_per_gpu": 17597.52, |
| "total_tokens": 8225616 |
| }, |
| { |
| "epoch": 0.018278979173949956, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.9593139745400575e-05, |
| "loss": 0.587, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 183, |
| "tokens_per_second_per_gpu": 19864.17, |
| "total_tokens": 8272813 |
| }, |
| { |
| "epoch": 0.018378864306048046, |
| "grad_norm": 1.0, |
| "learning_rate": 1.9583225744651334e-05, |
| "loss": 0.5602, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 184, |
| "tokens_per_second_per_gpu": 18250.92, |
| "total_tokens": 8317907 |
| }, |
| { |
| "epoch": 0.018478749438146133, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.9573194975320672e-05, |
| "loss": 0.4711, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 185, |
| "tokens_per_second_per_gpu": 14209.64, |
| "total_tokens": 8353779 |
| }, |
| { |
| "epoch": 0.01857863457024422, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.9563047559630356e-05, |
| "loss": 0.6015, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 186, |
| "tokens_per_second_per_gpu": 19199.47, |
| "total_tokens": 8400755 |
| }, |
| { |
| "epoch": 0.018678519702342307, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.9552783621223437e-05, |
| "loss": 0.5105, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 187, |
| "tokens_per_second_per_gpu": 19506.67, |
| "total_tokens": 8447073 |
| }, |
| { |
| "epoch": 0.018778404834440394, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.954240328516277e-05, |
| "loss": 0.5857, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 188, |
| "tokens_per_second_per_gpu": 21690.49, |
| "total_tokens": 8498995 |
| }, |
| { |
| "epoch": 0.01887828996653848, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.9531906677929472e-05, |
| "loss": 0.5433, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 189, |
| "tokens_per_second_per_gpu": 18889.76, |
| "total_tokens": 8544811 |
| }, |
| { |
| "epoch": 0.018978175098636568, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.9521293927421388e-05, |
| "loss": 0.5275, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 190, |
| "tokens_per_second_per_gpu": 17928.4, |
| "total_tokens": 8588703 |
| }, |
| { |
| "epoch": 0.019078060230734654, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.9510565162951538e-05, |
| "loss": 0.5463, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 191, |
| "tokens_per_second_per_gpu": 17265.13, |
| "total_tokens": 8630893 |
| }, |
| { |
| "epoch": 0.01917794536283274, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.9499720515246524e-05, |
| "loss": 0.5368, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 192, |
| "tokens_per_second_per_gpu": 16986.38, |
| "total_tokens": 8672973 |
| }, |
| { |
| "epoch": 0.019277830494930828, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.9488760116444966e-05, |
| "loss": 0.5524, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 193, |
| "tokens_per_second_per_gpu": 17589.87, |
| "total_tokens": 8715648 |
| }, |
| { |
| "epoch": 0.019377715627028915, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.947768410009586e-05, |
| "loss": 0.5248, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 194, |
| "tokens_per_second_per_gpu": 18909.14, |
| "total_tokens": 8760894 |
| }, |
| { |
| "epoch": 0.019477600759127005, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.9466492601156964e-05, |
| "loss": 0.5234, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 195, |
| "tokens_per_second_per_gpu": 18256.57, |
| "total_tokens": 8806917 |
| }, |
| { |
| "epoch": 0.019577485891225092, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.945518575599317e-05, |
| "loss": 0.4815, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 196, |
| "tokens_per_second_per_gpu": 16892.03, |
| "total_tokens": 8848209 |
| }, |
| { |
| "epoch": 0.01967737102332318, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.944376370237481e-05, |
| "loss": 0.5487, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 197, |
| "tokens_per_second_per_gpu": 18435.99, |
| "total_tokens": 8893596 |
| }, |
| { |
| "epoch": 0.019777256155421266, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.943222657947601e-05, |
| "loss": 0.5513, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 198, |
| "tokens_per_second_per_gpu": 17293.09, |
| "total_tokens": 8935716 |
| }, |
| { |
| "epoch": 0.019877141287519353, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.942057452787297e-05, |
| "loss": 0.5301, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 199, |
| "tokens_per_second_per_gpu": 17071.93, |
| "total_tokens": 8978524 |
| }, |
| { |
| "epoch": 0.01997702641961744, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.9408807689542257e-05, |
| "loss": 0.6015, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 200, |
| "tokens_per_second_per_gpu": 17983.04, |
| "total_tokens": 9022769 |
| }, |
| { |
| "epoch": 0.020076911551715527, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.9396926207859085e-05, |
| "loss": 0.5086, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 201, |
| "tokens_per_second_per_gpu": 18544.64, |
| "total_tokens": 9068586 |
| }, |
| { |
| "epoch": 0.020176796683813614, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.938493022759556e-05, |
| "loss": 0.5595, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 202, |
| "tokens_per_second_per_gpu": 19571.41, |
| "total_tokens": 9117162 |
| }, |
| { |
| "epoch": 0.0202766818159117, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.937281989491892e-05, |
| "loss": 0.5898, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 203, |
| "tokens_per_second_per_gpu": 19050.82, |
| "total_tokens": 9164422 |
| }, |
| { |
| "epoch": 0.020376566948009787, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.9360595357389735e-05, |
| "loss": 0.5467, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 204, |
| "tokens_per_second_per_gpu": 18601.68, |
| "total_tokens": 9209783 |
| }, |
| { |
| "epoch": 0.020476452080107874, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.9348256763960146e-05, |
| "loss": 0.551, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 205, |
| "tokens_per_second_per_gpu": 18742.55, |
| "total_tokens": 9254835 |
| }, |
| { |
| "epoch": 0.020576337212205965, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.9335804264972018e-05, |
| "loss": 0.4828, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 206, |
| "tokens_per_second_per_gpu": 16024.4, |
| "total_tokens": 9294875 |
| }, |
| { |
| "epoch": 0.02067622234430405, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.9323238012155125e-05, |
| "loss": 0.5877, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 207, |
| "tokens_per_second_per_gpu": 19774.55, |
| "total_tokens": 9343127 |
| }, |
| { |
| "epoch": 0.02077610747640214, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.9310558158625286e-05, |
| "loss": 0.593, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 208, |
| "tokens_per_second_per_gpu": 18799.85, |
| "total_tokens": 9388693 |
| }, |
| { |
| "epoch": 0.020875992608500225, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.9297764858882516e-05, |
| "loss": 0.4974, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 209, |
| "tokens_per_second_per_gpu": 19057.68, |
| "total_tokens": 9435170 |
| }, |
| { |
| "epoch": 0.020975877740598312, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.9284858268809135e-05, |
| "loss": 0.5196, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 210, |
| "tokens_per_second_per_gpu": 18224.29, |
| "total_tokens": 9479728 |
| }, |
| { |
| "epoch": 0.0210757628726964, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.9271838545667876e-05, |
| "loss": 0.5296, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 211, |
| "tokens_per_second_per_gpu": 21928.55, |
| "total_tokens": 9532152 |
| }, |
| { |
| "epoch": 0.021175648004794486, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.925870584809995e-05, |
| "loss": 0.5262, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 212, |
| "tokens_per_second_per_gpu": 17696.69, |
| "total_tokens": 9575808 |
| }, |
| { |
| "epoch": 0.021275533136892573, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.9245460336123136e-05, |
| "loss": 0.5571, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 213, |
| "tokens_per_second_per_gpu": 16806.72, |
| "total_tokens": 9618295 |
| }, |
| { |
| "epoch": 0.02137541826899066, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.923210217112981e-05, |
| "loss": 0.5041, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 214, |
| "tokens_per_second_per_gpu": 18831.48, |
| "total_tokens": 9662301 |
| }, |
| { |
| "epoch": 0.021475303401088747, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.9218631515885007e-05, |
| "loss": 0.5231, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 215, |
| "tokens_per_second_per_gpu": 16461.58, |
| "total_tokens": 9703190 |
| }, |
| { |
| "epoch": 0.021575188533186834, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.9205048534524405e-05, |
| "loss": 0.6079, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 216, |
| "tokens_per_second_per_gpu": 21287.11, |
| "total_tokens": 9754531 |
| }, |
| { |
| "epoch": 0.021675073665284924, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.9191353392552346e-05, |
| "loss": 0.5376, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 217, |
| "tokens_per_second_per_gpu": 18028.11, |
| "total_tokens": 9797416 |
| }, |
| { |
| "epoch": 0.02177495879738301, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.9177546256839814e-05, |
| "loss": 0.4622, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 218, |
| "tokens_per_second_per_gpu": 15611.94, |
| "total_tokens": 9836019 |
| }, |
| { |
| "epoch": 0.021874843929481098, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.9163627295622397e-05, |
| "loss": 0.5094, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 219, |
| "tokens_per_second_per_gpu": 17357.06, |
| "total_tokens": 9878932 |
| }, |
| { |
| "epoch": 0.021974729061579185, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.914959667849825e-05, |
| "loss": 0.4949, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 220, |
| "tokens_per_second_per_gpu": 17750.15, |
| "total_tokens": 9922022 |
| }, |
| { |
| "epoch": 0.02207461419367727, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.913545457642601e-05, |
| "loss": 0.5137, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 221, |
| "tokens_per_second_per_gpu": 16924.18, |
| "total_tokens": 9964115 |
| }, |
| { |
| "epoch": 0.02217449932577536, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.9121201161722732e-05, |
| "loss": 0.5185, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 222, |
| "tokens_per_second_per_gpu": 17421.71, |
| "total_tokens": 10006500 |
| }, |
| { |
| "epoch": 0.022274384457873445, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.910683660806177e-05, |
| "loss": 0.5358, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 223, |
| "tokens_per_second_per_gpu": 18662.16, |
| "total_tokens": 10051372 |
| }, |
| { |
| "epoch": 0.022374269589971532, |
| "grad_norm": 1.0, |
| "learning_rate": 1.9092361090470688e-05, |
| "loss": 0.5609, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 224, |
| "tokens_per_second_per_gpu": 20715.95, |
| "total_tokens": 10101597 |
| }, |
| { |
| "epoch": 0.02247415472206962, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.907777478532909e-05, |
| "loss": 0.4757, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 225, |
| "tokens_per_second_per_gpu": 16981.74, |
| "total_tokens": 10142918 |
| }, |
| { |
| "epoch": 0.022574039854167706, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.9063077870366504e-05, |
| "loss": 0.5302, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 226, |
| "tokens_per_second_per_gpu": 19896.85, |
| "total_tokens": 10191595 |
| }, |
| { |
| "epoch": 0.022673924986265793, |
| "grad_norm": 5.0625, |
| "learning_rate": 1.9048270524660197e-05, |
| "loss": 0.502, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 227, |
| "tokens_per_second_per_gpu": 19528.4, |
| "total_tokens": 10239443 |
| }, |
| { |
| "epoch": 0.022773810118363883, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.903335292863301e-05, |
| "loss": 0.5454, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 228, |
| "tokens_per_second_per_gpu": 18761.49, |
| "total_tokens": 10286847 |
| }, |
| { |
| "epoch": 0.02287369525046197, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.901832526405114e-05, |
| "loss": 0.5269, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 229, |
| "tokens_per_second_per_gpu": 22014.25, |
| "total_tokens": 10338804 |
| }, |
| { |
| "epoch": 0.022973580382560057, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.9003187714021936e-05, |
| "loss": 0.5163, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 230, |
| "tokens_per_second_per_gpu": 19617.38, |
| "total_tokens": 10385941 |
| }, |
| { |
| "epoch": 0.023073465514658144, |
| "grad_norm": 1.0, |
| "learning_rate": 1.8987940462991673e-05, |
| "loss": 0.4907, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 231, |
| "tokens_per_second_per_gpu": 16952.42, |
| "total_tokens": 10428295 |
| }, |
| { |
| "epoch": 0.02317335064675623, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.8972583696743284e-05, |
| "loss": 0.519, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 232, |
| "tokens_per_second_per_gpu": 18391.38, |
| "total_tokens": 10475782 |
| }, |
| { |
| "epoch": 0.023273235778854318, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.895711760239413e-05, |
| "loss": 0.5142, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 233, |
| "tokens_per_second_per_gpu": 18456.78, |
| "total_tokens": 10520937 |
| }, |
| { |
| "epoch": 0.023373120910952404, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.8941542368393683e-05, |
| "loss": 0.5679, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 234, |
| "tokens_per_second_per_gpu": 18947.6, |
| "total_tokens": 10567203 |
| }, |
| { |
| "epoch": 0.02347300604305049, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.892585818452126e-05, |
| "loss": 0.5676, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 235, |
| "tokens_per_second_per_gpu": 21453.85, |
| "total_tokens": 10620507 |
| }, |
| { |
| "epoch": 0.023572891175148578, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.891006524188368e-05, |
| "loss": 0.5218, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 236, |
| "tokens_per_second_per_gpu": 17346.43, |
| "total_tokens": 10662969 |
| }, |
| { |
| "epoch": 0.023672776307246665, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.889416373291298e-05, |
| "loss": 0.5512, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 237, |
| "tokens_per_second_per_gpu": 17718.31, |
| "total_tokens": 10707126 |
| }, |
| { |
| "epoch": 0.023772661439344752, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.8878153851364013e-05, |
| "loss": 0.5454, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 238, |
| "tokens_per_second_per_gpu": 19729.28, |
| "total_tokens": 10756150 |
| }, |
| { |
| "epoch": 0.023872546571442842, |
| "grad_norm": 1.0, |
| "learning_rate": 1.8862035792312148e-05, |
| "loss": 0.5673, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 239, |
| "tokens_per_second_per_gpu": 19230.59, |
| "total_tokens": 10803451 |
| }, |
| { |
| "epoch": 0.02397243170354093, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.884580975215084e-05, |
| "loss": 0.4678, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 240, |
| "tokens_per_second_per_gpu": 16075.38, |
| "total_tokens": 10844152 |
| }, |
| { |
| "epoch": 0.024072316835639016, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.8829475928589272e-05, |
| "loss": 0.5216, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 241, |
| "tokens_per_second_per_gpu": 16947.62, |
| "total_tokens": 10886504 |
| }, |
| { |
| "epoch": 0.024172201967737103, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.8813034520649923e-05, |
| "loss": 0.495, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 242, |
| "tokens_per_second_per_gpu": 16490.15, |
| "total_tokens": 10927577 |
| }, |
| { |
| "epoch": 0.02427208709983519, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.879648572866617e-05, |
| "loss": 0.521, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 243, |
| "tokens_per_second_per_gpu": 19398.3, |
| "total_tokens": 10975538 |
| }, |
| { |
| "epoch": 0.024371972231933277, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.8779829754279806e-05, |
| "loss": 0.5261, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 244, |
| "tokens_per_second_per_gpu": 17232.62, |
| "total_tokens": 11017683 |
| }, |
| { |
| "epoch": 0.024471857364031364, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.8763066800438638e-05, |
| "loss": 0.5228, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 245, |
| "tokens_per_second_per_gpu": 17577.85, |
| "total_tokens": 11061522 |
| }, |
| { |
| "epoch": 0.02457174249612945, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.874619707139396e-05, |
| "loss": 0.4951, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 246, |
| "tokens_per_second_per_gpu": 17959.41, |
| "total_tokens": 11106561 |
| }, |
| { |
| "epoch": 0.024671627628227537, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.8729220772698096e-05, |
| "loss": 0.5425, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 247, |
| "tokens_per_second_per_gpu": 18537.52, |
| "total_tokens": 11153308 |
| }, |
| { |
| "epoch": 0.024771512760325624, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.8712138111201898e-05, |
| "loss": 0.5187, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 248, |
| "tokens_per_second_per_gpu": 16956.83, |
| "total_tokens": 11194195 |
| }, |
| { |
| "epoch": 0.02487139789242371, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.869494929505219e-05, |
| "loss": 0.5101, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 249, |
| "tokens_per_second_per_gpu": 18105.97, |
| "total_tokens": 11238072 |
| }, |
| { |
| "epoch": 0.0249712830245218, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.8677654533689287e-05, |
| "loss": 0.4723, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 250, |
| "tokens_per_second_per_gpu": 18915.71, |
| "total_tokens": 11283289 |
| }, |
| { |
| "epoch": 0.02507116815661989, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.866025403784439e-05, |
| "loss": 0.5126, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 251, |
| "tokens_per_second_per_gpu": 18381.16, |
| "total_tokens": 11327416 |
| }, |
| { |
| "epoch": 0.025171053288717975, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.864274801953705e-05, |
| "loss": 0.5325, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 252, |
| "tokens_per_second_per_gpu": 15937.65, |
| "total_tokens": 11367142 |
| }, |
| { |
| "epoch": 0.025270938420816062, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.8625136692072577e-05, |
| "loss": 0.518, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 253, |
| "tokens_per_second_per_gpu": 18123.03, |
| "total_tokens": 11411442 |
| }, |
| { |
| "epoch": 0.02537082355291415, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.860742027003944e-05, |
| "loss": 0.5035, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 254, |
| "tokens_per_second_per_gpu": 17837.27, |
| "total_tokens": 11455512 |
| }, |
| { |
| "epoch": 0.025470708685012236, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.8589598969306646e-05, |
| "loss": 0.4812, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 255, |
| "tokens_per_second_per_gpu": 18677.68, |
| "total_tokens": 11501242 |
| }, |
| { |
| "epoch": 0.025570593817110323, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.8571673007021124e-05, |
| "loss": 0.4369, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 256, |
| "tokens_per_second_per_gpu": 16378.74, |
| "total_tokens": 11541449 |
| }, |
| { |
| "epoch": 0.02567047894920841, |
| "grad_norm": 1.0, |
| "learning_rate": 1.855364260160507e-05, |
| "loss": 0.4969, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 257, |
| "tokens_per_second_per_gpu": 18536.15, |
| "total_tokens": 11586387 |
| }, |
| { |
| "epoch": 0.025770364081306497, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.8535507972753275e-05, |
| "loss": 0.5515, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 258, |
| "tokens_per_second_per_gpu": 19604.96, |
| "total_tokens": 11635059 |
| }, |
| { |
| "epoch": 0.025870249213404584, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.851726934143048e-05, |
| "loss": 0.4852, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 259, |
| "tokens_per_second_per_gpu": 17284.37, |
| "total_tokens": 11677499 |
| }, |
| { |
| "epoch": 0.02597013434550267, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.849892692986864e-05, |
| "loss": 0.5206, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 260, |
| "tokens_per_second_per_gpu": 17155.87, |
| "total_tokens": 11719610 |
| }, |
| { |
| "epoch": 0.02607001947760076, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.848048096156426e-05, |
| "loss": 0.4847, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 261, |
| "tokens_per_second_per_gpu": 19597.03, |
| "total_tokens": 11766478 |
| }, |
| { |
| "epoch": 0.026169904609698848, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.8461931661275642e-05, |
| "loss": 0.5008, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 262, |
| "tokens_per_second_per_gpu": 19182.2, |
| "total_tokens": 11814476 |
| }, |
| { |
| "epoch": 0.026269789741796935, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.8443279255020153e-05, |
| "loss": 0.5125, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 263, |
| "tokens_per_second_per_gpu": 21126.2, |
| "total_tokens": 11865959 |
| }, |
| { |
| "epoch": 0.02636967487389502, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.842452397007148e-05, |
| "loss": 0.5473, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 264, |
| "tokens_per_second_per_gpu": 20107.78, |
| "total_tokens": 11915303 |
| }, |
| { |
| "epoch": 0.02646956000599311, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.8405666034956842e-05, |
| "loss": 0.5118, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 265, |
| "tokens_per_second_per_gpu": 16581.5, |
| "total_tokens": 11956888 |
| }, |
| { |
| "epoch": 0.026569445138091195, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.8386705679454243e-05, |
| "loss": 0.4893, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 266, |
| "tokens_per_second_per_gpu": 18772.48, |
| "total_tokens": 12001866 |
| }, |
| { |
| "epoch": 0.026669330270189282, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.836764313458962e-05, |
| "loss": 0.5045, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 267, |
| "tokens_per_second_per_gpu": 16038.98, |
| "total_tokens": 12041689 |
| }, |
| { |
| "epoch": 0.02676921540228737, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.8348478632634067e-05, |
| "loss": 0.4969, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 268, |
| "tokens_per_second_per_gpu": 17039.71, |
| "total_tokens": 12084140 |
| }, |
| { |
| "epoch": 0.026869100534385456, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.8329212407100996e-05, |
| "loss": 0.5459, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 269, |
| "tokens_per_second_per_gpu": 20357.58, |
| "total_tokens": 12133252 |
| }, |
| { |
| "epoch": 0.026968985666483543, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.8309844692743283e-05, |
| "loss": 0.5052, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 270, |
| "tokens_per_second_per_gpu": 19910.54, |
| "total_tokens": 12181918 |
| }, |
| { |
| "epoch": 0.02706887079858163, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.8290375725550417e-05, |
| "loss": 0.5141, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 271, |
| "tokens_per_second_per_gpu": 18294.76, |
| "total_tokens": 12226107 |
| }, |
| { |
| "epoch": 0.027168755930679717, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.827080574274562e-05, |
| "loss": 0.5605, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 272, |
| "tokens_per_second_per_gpu": 19639.78, |
| "total_tokens": 12274774 |
| }, |
| { |
| "epoch": 0.027268641062777807, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.8251134982782952e-05, |
| "loss": 0.5569, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 273, |
| "tokens_per_second_per_gpu": 20715.49, |
| "total_tokens": 12325027 |
| }, |
| { |
| "epoch": 0.027368526194875894, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.8231363685344422e-05, |
| "loss": 0.5389, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 274, |
| "tokens_per_second_per_gpu": 18047.21, |
| "total_tokens": 12369086 |
| }, |
| { |
| "epoch": 0.02746841132697398, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.821149209133704e-05, |
| "loss": 0.4896, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 275, |
| "tokens_per_second_per_gpu": 18340.18, |
| "total_tokens": 12414271 |
| }, |
| { |
| "epoch": 0.027568296459072068, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.819152044288992e-05, |
| "loss": 0.4762, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 276, |
| "tokens_per_second_per_gpu": 19089.79, |
| "total_tokens": 12459681 |
| }, |
| { |
| "epoch": 0.027668181591170155, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.8171448983351284e-05, |
| "loss": 0.5327, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 277, |
| "tokens_per_second_per_gpu": 18793.23, |
| "total_tokens": 12506046 |
| }, |
| { |
| "epoch": 0.02776806672326824, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.815127795728554e-05, |
| "loss": 0.4785, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 278, |
| "tokens_per_second_per_gpu": 18361.93, |
| "total_tokens": 12550215 |
| }, |
| { |
| "epoch": 0.02786795185536633, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.8131007610470278e-05, |
| "loss": 0.4658, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 279, |
| "tokens_per_second_per_gpu": 20140.12, |
| "total_tokens": 12599109 |
| }, |
| { |
| "epoch": 0.027967836987464415, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.8110638189893267e-05, |
| "loss": 0.5672, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 280, |
| "tokens_per_second_per_gpu": 21231.66, |
| "total_tokens": 12650418 |
| }, |
| { |
| "epoch": 0.028067722119562502, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.8090169943749477e-05, |
| "loss": 0.5241, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 281, |
| "tokens_per_second_per_gpu": 18823.79, |
| "total_tokens": 12696996 |
| }, |
| { |
| "epoch": 0.02816760725166059, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.806960312143802e-05, |
| "loss": 0.5084, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 282, |
| "tokens_per_second_per_gpu": 19507.52, |
| "total_tokens": 12744486 |
| }, |
| { |
| "epoch": 0.028267492383758676, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.804893797355914e-05, |
| "loss": 0.537, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 283, |
| "tokens_per_second_per_gpu": 20278.56, |
| "total_tokens": 12794381 |
| }, |
| { |
| "epoch": 0.028367377515856766, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.8028174751911147e-05, |
| "loss": 0.5501, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 284, |
| "tokens_per_second_per_gpu": 19920.09, |
| "total_tokens": 12842353 |
| }, |
| { |
| "epoch": 0.028467262647954853, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.8007313709487334e-05, |
| "loss": 0.487, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 285, |
| "tokens_per_second_per_gpu": 18472.14, |
| "total_tokens": 12887368 |
| }, |
| { |
| "epoch": 0.02856714778005294, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.798635510047293e-05, |
| "loss": 0.4733, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 286, |
| "tokens_per_second_per_gpu": 17903.3, |
| "total_tokens": 12930372 |
| }, |
| { |
| "epoch": 0.028667032912151027, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.7965299180241963e-05, |
| "loss": 0.478, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 287, |
| "tokens_per_second_per_gpu": 17593.31, |
| "total_tokens": 12974566 |
| }, |
| { |
| "epoch": 0.028766918044249114, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.7944146205354182e-05, |
| "loss": 0.5189, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 288, |
| "tokens_per_second_per_gpu": 18942.14, |
| "total_tokens": 13021241 |
| }, |
| { |
| "epoch": 0.0288668031763472, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.792289643355191e-05, |
| "loss": 0.5124, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 289, |
| "tokens_per_second_per_gpu": 18756.68, |
| "total_tokens": 13067207 |
| }, |
| { |
| "epoch": 0.028966688308445288, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.7901550123756906e-05, |
| "loss": 0.4909, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 290, |
| "tokens_per_second_per_gpu": 17000.27, |
| "total_tokens": 13109482 |
| }, |
| { |
| "epoch": 0.029066573440543374, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.788010753606722e-05, |
| "loss": 0.5352, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 291, |
| "tokens_per_second_per_gpu": 19516.61, |
| "total_tokens": 13157090 |
| }, |
| { |
| "epoch": 0.02916645857264146, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.785856893175402e-05, |
| "loss": 0.5216, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 292, |
| "tokens_per_second_per_gpu": 17696.17, |
| "total_tokens": 13201391 |
| }, |
| { |
| "epoch": 0.029266343704739548, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.78369345732584e-05, |
| "loss": 0.5069, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 293, |
| "tokens_per_second_per_gpu": 17687.66, |
| "total_tokens": 13244732 |
| }, |
| { |
| "epoch": 0.029366228836837635, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.781520472418819e-05, |
| "loss": 0.5491, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 294, |
| "tokens_per_second_per_gpu": 19981.45, |
| "total_tokens": 13293892 |
| }, |
| { |
| "epoch": 0.029466113968935725, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.7793379649314743e-05, |
| "loss": 0.5243, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 295, |
| "tokens_per_second_per_gpu": 21192.86, |
| "total_tokens": 13345141 |
| }, |
| { |
| "epoch": 0.029565999101033812, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.777145961456971e-05, |
| "loss": 0.5119, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 296, |
| "tokens_per_second_per_gpu": 19742.73, |
| "total_tokens": 13393443 |
| }, |
| { |
| "epoch": 0.0296658842331319, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.7749444887041797e-05, |
| "loss": 0.521, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 297, |
| "tokens_per_second_per_gpu": 18918.37, |
| "total_tokens": 13440225 |
| }, |
| { |
| "epoch": 0.029765769365229986, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.7727335734973512e-05, |
| "loss": 0.5153, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 298, |
| "tokens_per_second_per_gpu": 19484.4, |
| "total_tokens": 13486462 |
| }, |
| { |
| "epoch": 0.029865654497328073, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.7705132427757895e-05, |
| "loss": 0.4908, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 299, |
| "tokens_per_second_per_gpu": 18590.01, |
| "total_tokens": 13532632 |
| }, |
| { |
| "epoch": 0.02996553962942616, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.7682835235935236e-05, |
| "loss": 0.5278, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 300, |
| "tokens_per_second_per_gpu": 19135.09, |
| "total_tokens": 13579070 |
| }, |
| { |
| "epoch": 0.030065424761524247, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.766044443118978e-05, |
| "loss": 0.4759, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 301, |
| "tokens_per_second_per_gpu": 18283.95, |
| "total_tokens": 13623918 |
| }, |
| { |
| "epoch": 0.030165309893622334, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.7637960286346423e-05, |
| "loss": 0.5244, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 302, |
| "tokens_per_second_per_gpu": 19133.09, |
| "total_tokens": 13670237 |
| }, |
| { |
| "epoch": 0.03026519502572042, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.761538307536737e-05, |
| "loss": 0.5141, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 303, |
| "tokens_per_second_per_gpu": 19248.59, |
| "total_tokens": 13717299 |
| }, |
| { |
| "epoch": 0.030365080157818507, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.759271307334881e-05, |
| "loss": 0.4981, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 304, |
| "tokens_per_second_per_gpu": 18204.05, |
| "total_tokens": 13762208 |
| }, |
| { |
| "epoch": 0.030464965289916594, |
| "grad_norm": 1.5390625, |
| "learning_rate": 1.7569950556517566e-05, |
| "loss": 0.487, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 305, |
| "tokens_per_second_per_gpu": 18663.58, |
| "total_tokens": 13807816 |
| }, |
| { |
| "epoch": 0.030564850422014685, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.7547095802227723e-05, |
| "loss": 0.5029, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 306, |
| "tokens_per_second_per_gpu": 18010.27, |
| "total_tokens": 13852296 |
| }, |
| { |
| "epoch": 0.03066473555411277, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.7524149088957244e-05, |
| "loss": 0.4872, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 307, |
| "tokens_per_second_per_gpu": 19421.97, |
| "total_tokens": 13900538 |
| }, |
| { |
| "epoch": 0.03076462068621086, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.7501110696304598e-05, |
| "loss": 0.4879, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 308, |
| "tokens_per_second_per_gpu": 17969.56, |
| "total_tokens": 13944542 |
| }, |
| { |
| "epoch": 0.030864505818308945, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.747798090498532e-05, |
| "loss": 0.4495, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 309, |
| "tokens_per_second_per_gpu": 18007.97, |
| "total_tokens": 13988178 |
| }, |
| { |
| "epoch": 0.030964390950407032, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.7454759996828622e-05, |
| "loss": 0.4651, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 310, |
| "tokens_per_second_per_gpu": 19010.71, |
| "total_tokens": 14036025 |
| }, |
| { |
| "epoch": 0.03106427608250512, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.7431448254773943e-05, |
| "loss": 0.532, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 311, |
| "tokens_per_second_per_gpu": 18966.75, |
| "total_tokens": 14081781 |
| }, |
| { |
| "epoch": 0.031164161214603206, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.74080459628675e-05, |
| "loss": 0.5316, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 312, |
| "tokens_per_second_per_gpu": 19665.56, |
| "total_tokens": 14129766 |
| }, |
| { |
| "epoch": 0.031264046346701296, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.7384553406258842e-05, |
| "loss": 0.4403, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 313, |
| "tokens_per_second_per_gpu": 17855.82, |
| "total_tokens": 14174008 |
| }, |
| { |
| "epoch": 0.03136393147879938, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.7360970871197347e-05, |
| "loss": 0.4727, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 314, |
| "tokens_per_second_per_gpu": 17841.13, |
| "total_tokens": 14218809 |
| }, |
| { |
| "epoch": 0.03146381661089747, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.7337298645028764e-05, |
| "loss": 0.4913, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 315, |
| "tokens_per_second_per_gpu": 18165.77, |
| "total_tokens": 14263022 |
| }, |
| { |
| "epoch": 0.031563701742995554, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.7313537016191706e-05, |
| "loss": 0.5182, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 316, |
| "tokens_per_second_per_gpu": 18308.57, |
| "total_tokens": 14308512 |
| }, |
| { |
| "epoch": 0.031663586875093644, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.7289686274214116e-05, |
| "loss": 0.4994, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 317, |
| "tokens_per_second_per_gpu": 18705.09, |
| "total_tokens": 14354147 |
| }, |
| { |
| "epoch": 0.03176347200719173, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.7265746709709762e-05, |
| "loss": 0.5177, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 318, |
| "tokens_per_second_per_gpu": 19544.81, |
| "total_tokens": 14402261 |
| }, |
| { |
| "epoch": 0.03186335713928982, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.7241718614374678e-05, |
| "loss": 0.5126, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 319, |
| "tokens_per_second_per_gpu": 19536.76, |
| "total_tokens": 14449838 |
| }, |
| { |
| "epoch": 0.0319632422713879, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.7217602280983622e-05, |
| "loss": 0.4438, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 320, |
| "tokens_per_second_per_gpu": 15149.65, |
| "total_tokens": 14488127 |
| }, |
| { |
| "epoch": 0.03206312740348599, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.7193398003386514e-05, |
| "loss": 0.538, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 321, |
| "tokens_per_second_per_gpu": 19976.01, |
| "total_tokens": 14536446 |
| }, |
| { |
| "epoch": 0.032163012535584075, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.716910607650483e-05, |
| "loss": 0.4728, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 322, |
| "tokens_per_second_per_gpu": 17459.16, |
| "total_tokens": 14579841 |
| }, |
| { |
| "epoch": 0.032262897667682165, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.7144726796328034e-05, |
| "loss": 0.5182, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 323, |
| "tokens_per_second_per_gpu": 17770.82, |
| "total_tokens": 14624238 |
| }, |
| { |
| "epoch": 0.032362782799780256, |
| "grad_norm": 1.0, |
| "learning_rate": 1.712026045990997e-05, |
| "loss": 0.4697, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 324, |
| "tokens_per_second_per_gpu": 17157.62, |
| "total_tokens": 14665490 |
| }, |
| { |
| "epoch": 0.03246266793187834, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.709570736536521e-05, |
| "loss": 0.5361, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 325, |
| "tokens_per_second_per_gpu": 16988.15, |
| "total_tokens": 14708874 |
| }, |
| { |
| "epoch": 0.03256255306397643, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.7071067811865477e-05, |
| "loss": 0.4871, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 326, |
| "tokens_per_second_per_gpu": 16881.81, |
| "total_tokens": 14750657 |
| }, |
| { |
| "epoch": 0.03266243819607451, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.7046342099635948e-05, |
| "loss": 0.5409, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 327, |
| "tokens_per_second_per_gpu": 21657.15, |
| "total_tokens": 14800847 |
| }, |
| { |
| "epoch": 0.0327623233281726, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.7021530529951627e-05, |
| "loss": 0.4758, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 328, |
| "tokens_per_second_per_gpu": 17620.93, |
| "total_tokens": 14843795 |
| }, |
| { |
| "epoch": 0.03286220846027069, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.6996633405133656e-05, |
| "loss": 0.4971, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 329, |
| "tokens_per_second_per_gpu": 19737.26, |
| "total_tokens": 14891678 |
| }, |
| { |
| "epoch": 0.03296209359236878, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.697165102854565e-05, |
| "loss": 0.474, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 330, |
| "tokens_per_second_per_gpu": 17700.75, |
| "total_tokens": 14935021 |
| }, |
| { |
| "epoch": 0.03306197872446686, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.6946583704589973e-05, |
| "loss": 0.5282, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 331, |
| "tokens_per_second_per_gpu": 19790.9, |
| "total_tokens": 14983388 |
| }, |
| { |
| "epoch": 0.03316186385656495, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.692143173870407e-05, |
| "loss": 0.5258, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 332, |
| "tokens_per_second_per_gpu": 19972.01, |
| "total_tokens": 15031594 |
| }, |
| { |
| "epoch": 0.033261748988663034, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.68961954373567e-05, |
| "loss": 0.4575, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 333, |
| "tokens_per_second_per_gpu": 18681.11, |
| "total_tokens": 15076740 |
| }, |
| { |
| "epoch": 0.033361634120761124, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.6870875108044233e-05, |
| "loss": 0.4903, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 334, |
| "tokens_per_second_per_gpu": 17157.67, |
| "total_tokens": 15120321 |
| }, |
| { |
| "epoch": 0.033461519252859215, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.684547105928689e-05, |
| "loss": 0.4515, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 335, |
| "tokens_per_second_per_gpu": 15719.21, |
| "total_tokens": 15158224 |
| }, |
| { |
| "epoch": 0.0335614043849573, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.6819983600624986e-05, |
| "loss": 0.4877, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 336, |
| "tokens_per_second_per_gpu": 18868.7, |
| "total_tokens": 15204132 |
| }, |
| { |
| "epoch": 0.03366128951705539, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.6794413042615168e-05, |
| "loss": 0.4875, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 337, |
| "tokens_per_second_per_gpu": 16502.82, |
| "total_tokens": 15246281 |
| }, |
| { |
| "epoch": 0.03376117464915347, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.6768759696826608e-05, |
| "loss": 0.5036, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 338, |
| "tokens_per_second_per_gpu": 18350.8, |
| "total_tokens": 15290200 |
| }, |
| { |
| "epoch": 0.03386105978125156, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.6743023875837233e-05, |
| "loss": 0.4749, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 339, |
| "tokens_per_second_per_gpu": 18822.04, |
| "total_tokens": 15337650 |
| }, |
| { |
| "epoch": 0.033960944913349646, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.6717205893229904e-05, |
| "loss": 0.4843, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 340, |
| "tokens_per_second_per_gpu": 18742.82, |
| "total_tokens": 15383316 |
| }, |
| { |
| "epoch": 0.034060830045447736, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.6691306063588583e-05, |
| "loss": 0.4261, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 341, |
| "tokens_per_second_per_gpu": 16470.5, |
| "total_tokens": 15424698 |
| }, |
| { |
| "epoch": 0.03416071517754582, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.6665324702494524e-05, |
| "loss": 0.4358, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 342, |
| "tokens_per_second_per_gpu": 17679.61, |
| "total_tokens": 15468208 |
| }, |
| { |
| "epoch": 0.03426060030964391, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.6639262126522417e-05, |
| "loss": 0.4723, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 343, |
| "tokens_per_second_per_gpu": 17795.33, |
| "total_tokens": 15512033 |
| }, |
| { |
| "epoch": 0.03436048544174199, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.661311865323652e-05, |
| "loss": 0.5252, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 344, |
| "tokens_per_second_per_gpu": 21287.65, |
| "total_tokens": 15564114 |
| }, |
| { |
| "epoch": 0.034460370573840084, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.6586894601186804e-05, |
| "loss": 0.4677, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 345, |
| "tokens_per_second_per_gpu": 18457.17, |
| "total_tokens": 15609874 |
| }, |
| { |
| "epoch": 0.034560255705938174, |
| "grad_norm": 1.0, |
| "learning_rate": 1.6560590289905074e-05, |
| "loss": 0.5226, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 346, |
| "tokens_per_second_per_gpu": 19767.3, |
| "total_tokens": 15657417 |
| }, |
| { |
| "epoch": 0.03466014083803626, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.6534206039901057e-05, |
| "loss": 0.4562, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 347, |
| "tokens_per_second_per_gpu": 19409.52, |
| "total_tokens": 15704110 |
| }, |
| { |
| "epoch": 0.03476002597013435, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.650774217265851e-05, |
| "loss": 0.4736, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 348, |
| "tokens_per_second_per_gpu": 16562.36, |
| "total_tokens": 15745533 |
| }, |
| { |
| "epoch": 0.03485991110223243, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.6481199010631312e-05, |
| "loss": 0.4421, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 349, |
| "tokens_per_second_per_gpu": 18111.37, |
| "total_tokens": 15790497 |
| }, |
| { |
| "epoch": 0.03495979623433052, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.645457687723951e-05, |
| "loss": 0.5272, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 350, |
| "tokens_per_second_per_gpu": 18123.01, |
| "total_tokens": 15835918 |
| }, |
| { |
| "epoch": 0.035059681366428605, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.6427876096865394e-05, |
| "loss": 0.4796, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 351, |
| "tokens_per_second_per_gpu": 19074.82, |
| "total_tokens": 15882568 |
| }, |
| { |
| "epoch": 0.035159566498526695, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.6401096994849558e-05, |
| "loss": 0.4702, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 352, |
| "tokens_per_second_per_gpu": 18347.85, |
| "total_tokens": 15927874 |
| }, |
| { |
| "epoch": 0.03525945163062478, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.63742398974869e-05, |
| "loss": 0.4702, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 353, |
| "tokens_per_second_per_gpu": 18320.93, |
| "total_tokens": 15974025 |
| }, |
| { |
| "epoch": 0.03535933676272287, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.6347305132022677e-05, |
| "loss": 0.5038, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 354, |
| "tokens_per_second_per_gpu": 20266.97, |
| "total_tokens": 16023301 |
| }, |
| { |
| "epoch": 0.03545922189482095, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.632029302664851e-05, |
| "loss": 0.4681, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 355, |
| "tokens_per_second_per_gpu": 14593.18, |
| "total_tokens": 16059415 |
| }, |
| { |
| "epoch": 0.03555910702691904, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.6293203910498375e-05, |
| "loss": 0.5048, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 356, |
| "tokens_per_second_per_gpu": 17647.42, |
| "total_tokens": 16102167 |
| }, |
| { |
| "epoch": 0.03565899215901713, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.6266038113644605e-05, |
| "loss": 0.4722, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 357, |
| "tokens_per_second_per_gpu": 19000.51, |
| "total_tokens": 16148416 |
| }, |
| { |
| "epoch": 0.03575887729111522, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.6238795967093865e-05, |
| "loss": 0.4886, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 358, |
| "tokens_per_second_per_gpu": 19107.42, |
| "total_tokens": 16194773 |
| }, |
| { |
| "epoch": 0.03585876242321331, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.6211477802783105e-05, |
| "loss": 0.4529, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 359, |
| "tokens_per_second_per_gpu": 18559.33, |
| "total_tokens": 16240465 |
| }, |
| { |
| "epoch": 0.03595864755531139, |
| "grad_norm": 1.0, |
| "learning_rate": 1.6184083953575543e-05, |
| "loss": 0.4638, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 360, |
| "tokens_per_second_per_gpu": 18688.95, |
| "total_tokens": 16286759 |
| }, |
| { |
| "epoch": 0.03605853268740948, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.6156614753256583e-05, |
| "loss": 0.4533, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 361, |
| "tokens_per_second_per_gpu": 18258.2, |
| "total_tokens": 16331195 |
| }, |
| { |
| "epoch": 0.036158417819507564, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.6129070536529767e-05, |
| "loss": 0.5191, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 362, |
| "tokens_per_second_per_gpu": 18698.11, |
| "total_tokens": 16378176 |
| }, |
| { |
| "epoch": 0.036258302951605655, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.610145163901268e-05, |
| "loss": 0.4656, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 363, |
| "tokens_per_second_per_gpu": 18091.65, |
| "total_tokens": 16423031 |
| }, |
| { |
| "epoch": 0.03635818808370374, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.607375839723287e-05, |
| "loss": 0.5201, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 364, |
| "tokens_per_second_per_gpu": 20080.33, |
| "total_tokens": 16472899 |
| }, |
| { |
| "epoch": 0.03645807321580183, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.6045991148623752e-05, |
| "loss": 0.4326, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 365, |
| "tokens_per_second_per_gpu": 15284.04, |
| "total_tokens": 16510300 |
| }, |
| { |
| "epoch": 0.03655795834789991, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.6018150231520486e-05, |
| "loss": 0.5057, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 366, |
| "tokens_per_second_per_gpu": 18603.95, |
| "total_tokens": 16556259 |
| }, |
| { |
| "epoch": 0.036657843479998, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.599023598515586e-05, |
| "loss": 0.4852, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 367, |
| "tokens_per_second_per_gpu": 18972.85, |
| "total_tokens": 16602771 |
| }, |
| { |
| "epoch": 0.03675772861209609, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.5962248749656158e-05, |
| "loss": 0.4484, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 368, |
| "tokens_per_second_per_gpu": 18752.14, |
| "total_tokens": 16649863 |
| }, |
| { |
| "epoch": 0.036857613744194176, |
| "grad_norm": 1.0, |
| "learning_rate": 1.5934188866037017e-05, |
| "loss": 0.4871, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 369, |
| "tokens_per_second_per_gpu": 18747.7, |
| "total_tokens": 16695468 |
| }, |
| { |
| "epoch": 0.036957498876292266, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.5906056676199256e-05, |
| "loss": 0.4475, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 370, |
| "tokens_per_second_per_gpu": 16922.36, |
| "total_tokens": 16738176 |
| }, |
| { |
| "epoch": 0.03705738400839035, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.5877852522924733e-05, |
| "loss": 0.4742, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 371, |
| "tokens_per_second_per_gpu": 18042.51, |
| "total_tokens": 16782261 |
| }, |
| { |
| "epoch": 0.03715726914048844, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.584957674987216e-05, |
| "loss": 0.4745, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 372, |
| "tokens_per_second_per_gpu": 19842.54, |
| "total_tokens": 16830975 |
| }, |
| { |
| "epoch": 0.03725715427258652, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.5821229701572897e-05, |
| "loss": 0.4499, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 373, |
| "tokens_per_second_per_gpu": 16786.67, |
| "total_tokens": 16872527 |
| }, |
| { |
| "epoch": 0.037357039404684614, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.5792811723426787e-05, |
| "loss": 0.4368, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 374, |
| "tokens_per_second_per_gpu": 16910.29, |
| "total_tokens": 16915897 |
| }, |
| { |
| "epoch": 0.0374569245367827, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.5764323161697933e-05, |
| "loss": 0.4274, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 375, |
| "tokens_per_second_per_gpu": 16770.9, |
| "total_tokens": 16956784 |
| }, |
| { |
| "epoch": 0.03755680966888079, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.573576436351046e-05, |
| "loss": 0.4516, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 376, |
| "tokens_per_second_per_gpu": 15393.62, |
| "total_tokens": 16995210 |
| }, |
| { |
| "epoch": 0.03765669480097887, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.570713567684432e-05, |
| "loss": 0.4663, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 377, |
| "tokens_per_second_per_gpu": 16725.72, |
| "total_tokens": 17036548 |
| }, |
| { |
| "epoch": 0.03775657993307696, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.5678437450531014e-05, |
| "loss": 0.529, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 378, |
| "tokens_per_second_per_gpu": 22398.24, |
| "total_tokens": 17089299 |
| }, |
| { |
| "epoch": 0.03785646506517505, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.564967003424938e-05, |
| "loss": 0.4789, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 379, |
| "tokens_per_second_per_gpu": 19240.37, |
| "total_tokens": 17135363 |
| }, |
| { |
| "epoch": 0.037956350197273135, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.5620833778521306e-05, |
| "loss": 0.4886, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 380, |
| "tokens_per_second_per_gpu": 19903.65, |
| "total_tokens": 17183281 |
| }, |
| { |
| "epoch": 0.038056235329371225, |
| "grad_norm": 1.0, |
| "learning_rate": 1.5591929034707468e-05, |
| "loss": 0.4449, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 381, |
| "tokens_per_second_per_gpu": 17219.67, |
| "total_tokens": 17225460 |
| }, |
| { |
| "epoch": 0.03815612046146931, |
| "grad_norm": 0.921875, |
| "learning_rate": 1.556295615500305e-05, |
| "loss": 0.488, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 382, |
| "tokens_per_second_per_gpu": 19687.26, |
| "total_tokens": 17273403 |
| }, |
| { |
| "epoch": 0.0382560055935674, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.553391549243344e-05, |
| "loss": 0.4886, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 383, |
| "tokens_per_second_per_gpu": 16172.03, |
| "total_tokens": 17313336 |
| }, |
| { |
| "epoch": 0.03835589072566548, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.5504807400849957e-05, |
| "loss": 0.4709, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 384, |
| "tokens_per_second_per_gpu": 16817.61, |
| "total_tokens": 17353898 |
| }, |
| { |
| "epoch": 0.03845577585776357, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.5475632234925505e-05, |
| "loss": 0.4926, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 385, |
| "tokens_per_second_per_gpu": 21137.73, |
| "total_tokens": 17404787 |
| }, |
| { |
| "epoch": 0.038555660989861656, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.5446390350150272e-05, |
| "loss": 0.4896, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 386, |
| "tokens_per_second_per_gpu": 20261.92, |
| "total_tokens": 17454159 |
| }, |
| { |
| "epoch": 0.03865554612195975, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.54170821028274e-05, |
| "loss": 0.4817, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 387, |
| "tokens_per_second_per_gpu": 17798.83, |
| "total_tokens": 17498095 |
| }, |
| { |
| "epoch": 0.03875543125405783, |
| "grad_norm": 1.0, |
| "learning_rate": 1.5387707850068633e-05, |
| "loss": 0.4538, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 388, |
| "tokens_per_second_per_gpu": 18309.45, |
| "total_tokens": 17542740 |
| }, |
| { |
| "epoch": 0.03885531638615592, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.5358267949789968e-05, |
| "loss": 0.4877, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 389, |
| "tokens_per_second_per_gpu": 18106.24, |
| "total_tokens": 17586608 |
| }, |
| { |
| "epoch": 0.03895520151825401, |
| "grad_norm": 1.109375, |
| "learning_rate": 1.53287627607073e-05, |
| "loss": 0.4918, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 390, |
| "tokens_per_second_per_gpu": 16000.74, |
| "total_tokens": 17626349 |
| }, |
| { |
| "epoch": 0.039055086650352094, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.529919264233205e-05, |
| "loss": 0.4838, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 391, |
| "tokens_per_second_per_gpu": 17778.35, |
| "total_tokens": 17670133 |
| }, |
| { |
| "epoch": 0.039154971782450185, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.5269557954966777e-05, |
| "loss": 0.4923, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 392, |
| "tokens_per_second_per_gpu": 18786.88, |
| "total_tokens": 17715930 |
| }, |
| { |
| "epoch": 0.03925485691454827, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.5239859059700794e-05, |
| "loss": 0.5369, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 393, |
| "tokens_per_second_per_gpu": 19758.95, |
| "total_tokens": 17763863 |
| }, |
| { |
| "epoch": 0.03935474204664636, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.5210096318405768e-05, |
| "loss": 0.4702, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 394, |
| "tokens_per_second_per_gpu": 17953.26, |
| "total_tokens": 17807788 |
| }, |
| { |
| "epoch": 0.03945462717874444, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.5180270093731305e-05, |
| "loss": 0.4827, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 395, |
| "tokens_per_second_per_gpu": 18853.02, |
| "total_tokens": 17852672 |
| }, |
| { |
| "epoch": 0.03955451231084253, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.5150380749100545e-05, |
| "loss": 0.4722, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 396, |
| "tokens_per_second_per_gpu": 15988.81, |
| "total_tokens": 17892790 |
| }, |
| { |
| "epoch": 0.039654397442940616, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.5120428648705716e-05, |
| "loss": 0.471, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 397, |
| "tokens_per_second_per_gpu": 15682.78, |
| "total_tokens": 17931167 |
| }, |
| { |
| "epoch": 0.039754282575038706, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.5090414157503715e-05, |
| "loss": 0.4666, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 398, |
| "tokens_per_second_per_gpu": 19233.06, |
| "total_tokens": 17977266 |
| }, |
| { |
| "epoch": 0.03985416770713679, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.5060337641211637e-05, |
| "loss": 0.4853, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 399, |
| "tokens_per_second_per_gpu": 17453.17, |
| "total_tokens": 18019796 |
| }, |
| { |
| "epoch": 0.03995405283923488, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.5030199466302354e-05, |
| "loss": 0.4996, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 400, |
| "tokens_per_second_per_gpu": 20378.63, |
| "total_tokens": 18068913 |
| }, |
| { |
| "epoch": 0.04005393797133297, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.4709, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 401, |
| "tokens_per_second_per_gpu": 16785.21, |
| "total_tokens": 18110434 |
| }, |
| { |
| "epoch": 0.040153823103431054, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.4969739610275556e-05, |
| "loss": 0.4835, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 402, |
| "tokens_per_second_per_gpu": 18895.39, |
| "total_tokens": 18155666 |
| }, |
| { |
| "epoch": 0.040253708235529144, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.493941866584231e-05, |
| "loss": 0.4325, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 403, |
| "tokens_per_second_per_gpu": 17748.23, |
| "total_tokens": 18199979 |
| }, |
| { |
| "epoch": 0.04035359336762723, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.490903753615141e-05, |
| "loss": 0.457, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 404, |
| "tokens_per_second_per_gpu": 18064.08, |
| "total_tokens": 18244991 |
| }, |
| { |
| "epoch": 0.04045347849972532, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.4878596591387329e-05, |
| "loss": 0.4955, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 405, |
| "tokens_per_second_per_gpu": 20242.52, |
| "total_tokens": 18294163 |
| }, |
| { |
| "epoch": 0.0405533636318234, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.4848096202463373e-05, |
| "loss": 0.4543, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 406, |
| "tokens_per_second_per_gpu": 16321.01, |
| "total_tokens": 18334899 |
| }, |
| { |
| "epoch": 0.04065324876392149, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.4817536741017153e-05, |
| "loss": 0.5023, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 407, |
| "tokens_per_second_per_gpu": 20600.84, |
| "total_tokens": 18384811 |
| }, |
| { |
| "epoch": 0.040753133896019575, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.478691857940607e-05, |
| "loss": 0.4377, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 408, |
| "tokens_per_second_per_gpu": 16604.43, |
| "total_tokens": 18426521 |
| }, |
| { |
| "epoch": 0.040853019028117665, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.4756242090702756e-05, |
| "loss": 0.5098, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 409, |
| "tokens_per_second_per_gpu": 19356.84, |
| "total_tokens": 18475111 |
| }, |
| { |
| "epoch": 0.04095290416021575, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.4725507648690542e-05, |
| "loss": 0.4828, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 410, |
| "tokens_per_second_per_gpu": 20213.24, |
| "total_tokens": 18523782 |
| }, |
| { |
| "epoch": 0.04105278929231384, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.469471562785891e-05, |
| "loss": 0.499, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 411, |
| "tokens_per_second_per_gpu": 19154.22, |
| "total_tokens": 18569782 |
| }, |
| { |
| "epoch": 0.04115267442441193, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.4663866403398915e-05, |
| "loss": 0.4652, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 412, |
| "tokens_per_second_per_gpu": 17546.97, |
| "total_tokens": 18613362 |
| }, |
| { |
| "epoch": 0.04125255955651001, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.463296035119862e-05, |
| "loss": 0.4443, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 413, |
| "tokens_per_second_per_gpu": 17136.09, |
| "total_tokens": 18656132 |
| }, |
| { |
| "epoch": 0.0413524446886081, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.4601997847838518e-05, |
| "loss": 0.495, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 414, |
| "tokens_per_second_per_gpu": 19173.21, |
| "total_tokens": 18703756 |
| }, |
| { |
| "epoch": 0.04145232982070619, |
| "grad_norm": 1.0, |
| "learning_rate": 1.4570979270586944e-05, |
| "loss": 0.481, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 415, |
| "tokens_per_second_per_gpu": 16812.7, |
| "total_tokens": 18745722 |
| }, |
| { |
| "epoch": 0.04155221495280428, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.4539904997395468e-05, |
| "loss": 0.4299, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 416, |
| "tokens_per_second_per_gpu": 16677.75, |
| "total_tokens": 18786807 |
| }, |
| { |
| "epoch": 0.04165210008490236, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.4508775406894308e-05, |
| "loss": 0.4857, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 417, |
| "tokens_per_second_per_gpu": 17607.47, |
| "total_tokens": 18830485 |
| }, |
| { |
| "epoch": 0.04175198521700045, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.4477590878387697e-05, |
| "loss": 0.4711, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 418, |
| "tokens_per_second_per_gpu": 20045.44, |
| "total_tokens": 18878778 |
| }, |
| { |
| "epoch": 0.041851870349098534, |
| "grad_norm": 1.0, |
| "learning_rate": 1.4446351791849276e-05, |
| "loss": 0.4933, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 419, |
| "tokens_per_second_per_gpu": 18986.08, |
| "total_tokens": 18924651 |
| }, |
| { |
| "epoch": 0.041951755481196625, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.4415058527917454e-05, |
| "loss": 0.505, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 420, |
| "tokens_per_second_per_gpu": 19508.66, |
| "total_tokens": 18972299 |
| }, |
| { |
| "epoch": 0.04205164061329471, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.4383711467890776e-05, |
| "loss": 0.4733, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 421, |
| "tokens_per_second_per_gpu": 18690.55, |
| "total_tokens": 19018039 |
| }, |
| { |
| "epoch": 0.0421515257453928, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.4352310993723277e-05, |
| "loss": 0.4814, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 422, |
| "tokens_per_second_per_gpu": 19967.53, |
| "total_tokens": 19066992 |
| }, |
| { |
| "epoch": 0.04225141087749089, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.4320857488019826e-05, |
| "loss": 0.5123, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 423, |
| "tokens_per_second_per_gpu": 20665.01, |
| "total_tokens": 19116238 |
| }, |
| { |
| "epoch": 0.04235129600958897, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.4289351334031461e-05, |
| "loss": 0.4845, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 424, |
| "tokens_per_second_per_gpu": 18633.95, |
| "total_tokens": 19161635 |
| }, |
| { |
| "epoch": 0.04245118114168706, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.4257792915650728e-05, |
| "loss": 0.4953, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 425, |
| "tokens_per_second_per_gpu": 19149.34, |
| "total_tokens": 19207721 |
| }, |
| { |
| "epoch": 0.042551066273785146, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.4226182617406996e-05, |
| "loss": 0.5075, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 426, |
| "tokens_per_second_per_gpu": 20615.55, |
| "total_tokens": 19257403 |
| }, |
| { |
| "epoch": 0.042650951405883236, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.4194520824461773e-05, |
| "loss": 0.4779, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 427, |
| "tokens_per_second_per_gpu": 18010.14, |
| "total_tokens": 19300710 |
| }, |
| { |
| "epoch": 0.04275083653798132, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.4162807922604014e-05, |
| "loss": 0.437, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 428, |
| "tokens_per_second_per_gpu": 16253.78, |
| "total_tokens": 19340967 |
| }, |
| { |
| "epoch": 0.04285072167007941, |
| "grad_norm": 0.91015625, |
| "learning_rate": 1.413104429824542e-05, |
| "loss": 0.477, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 429, |
| "tokens_per_second_per_gpu": 20601.91, |
| "total_tokens": 19390987 |
| }, |
| { |
| "epoch": 0.04295060680217749, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.4099230338415728e-05, |
| "loss": 0.4954, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 430, |
| "tokens_per_second_per_gpu": 19146.66, |
| "total_tokens": 19437852 |
| }, |
| { |
| "epoch": 0.043050491934275584, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.4067366430758004e-05, |
| "loss": 0.4818, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 431, |
| "tokens_per_second_per_gpu": 17260.63, |
| "total_tokens": 19480645 |
| }, |
| { |
| "epoch": 0.04315037706637367, |
| "grad_norm": 0.921875, |
| "learning_rate": 1.4035452963523903e-05, |
| "loss": 0.4795, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 432, |
| "tokens_per_second_per_gpu": 19292.06, |
| "total_tokens": 19529326 |
| }, |
| { |
| "epoch": 0.04325026219847176, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.4003490325568953e-05, |
| "loss": 0.527, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 433, |
| "tokens_per_second_per_gpu": 20906.46, |
| "total_tokens": 19580256 |
| }, |
| { |
| "epoch": 0.04335014733056985, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.3971478906347806e-05, |
| "loss": 0.4926, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 434, |
| "tokens_per_second_per_gpu": 16518.25, |
| "total_tokens": 19621920 |
| }, |
| { |
| "epoch": 0.04345003246266793, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.3939419095909513e-05, |
| "loss": 0.4155, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 435, |
| "tokens_per_second_per_gpu": 17755.02, |
| "total_tokens": 19666530 |
| }, |
| { |
| "epoch": 0.04354991759476602, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.3907311284892737e-05, |
| "loss": 0.4568, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 436, |
| "tokens_per_second_per_gpu": 17222.48, |
| "total_tokens": 19709675 |
| }, |
| { |
| "epoch": 0.043649802726864105, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.3875155864521031e-05, |
| "loss": 0.4477, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 437, |
| "tokens_per_second_per_gpu": 16412.82, |
| "total_tokens": 19750839 |
| }, |
| { |
| "epoch": 0.043749687858962195, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.3842953226598036e-05, |
| "loss": 0.4441, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 438, |
| "tokens_per_second_per_gpu": 17774.0, |
| "total_tokens": 19794275 |
| }, |
| { |
| "epoch": 0.04384957299106028, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.3810703763502744e-05, |
| "loss": 0.4864, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 439, |
| "tokens_per_second_per_gpu": 19258.91, |
| "total_tokens": 19841649 |
| }, |
| { |
| "epoch": 0.04394945812315837, |
| "grad_norm": 1.0, |
| "learning_rate": 1.3778407868184674e-05, |
| "loss": 0.4498, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 440, |
| "tokens_per_second_per_gpu": 18522.25, |
| "total_tokens": 19887636 |
| }, |
| { |
| "epoch": 0.04404934325525645, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.3746065934159123e-05, |
| "loss": 0.4497, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 441, |
| "tokens_per_second_per_gpu": 15927.53, |
| "total_tokens": 19927026 |
| }, |
| { |
| "epoch": 0.04414922838735454, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.371367835550235e-05, |
| "loss": 0.4638, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 442, |
| "tokens_per_second_per_gpu": 18635.1, |
| "total_tokens": 19972837 |
| }, |
| { |
| "epoch": 0.044249113519452626, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.3681245526846782e-05, |
| "loss": 0.4623, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 443, |
| "tokens_per_second_per_gpu": 18977.89, |
| "total_tokens": 20019104 |
| }, |
| { |
| "epoch": 0.04434899865155072, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.3648767843376196e-05, |
| "loss": 0.4301, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 444, |
| "tokens_per_second_per_gpu": 15652.52, |
| "total_tokens": 20059232 |
| }, |
| { |
| "epoch": 0.04444888378364881, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.3616245700820922e-05, |
| "loss": 0.4843, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 445, |
| "tokens_per_second_per_gpu": 16862.53, |
| "total_tokens": 20100364 |
| }, |
| { |
| "epoch": 0.04454876891574689, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.3583679495453e-05, |
| "loss": 0.5102, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 446, |
| "tokens_per_second_per_gpu": 20530.87, |
| "total_tokens": 20150727 |
| }, |
| { |
| "epoch": 0.04464865404784498, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.3551069624081372e-05, |
| "loss": 0.4832, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 447, |
| "tokens_per_second_per_gpu": 19254.04, |
| "total_tokens": 20196714 |
| }, |
| { |
| "epoch": 0.044748539179943064, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.3518416484047018e-05, |
| "loss": 0.4832, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 448, |
| "tokens_per_second_per_gpu": 18247.57, |
| "total_tokens": 20241670 |
| }, |
| { |
| "epoch": 0.044848424312041155, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.3485720473218153e-05, |
| "loss": 0.4366, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 449, |
| "tokens_per_second_per_gpu": 16882.38, |
| "total_tokens": 20283970 |
| }, |
| { |
| "epoch": 0.04494830944413924, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.3452981989985347e-05, |
| "loss": 0.4708, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 450, |
| "tokens_per_second_per_gpu": 21541.68, |
| "total_tokens": 20335291 |
| }, |
| { |
| "epoch": 0.04504819457623733, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.342020143325669e-05, |
| "loss": 0.5028, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 451, |
| "tokens_per_second_per_gpu": 19648.41, |
| "total_tokens": 20383351 |
| }, |
| { |
| "epoch": 0.04514807970833541, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.3387379202452917e-05, |
| "loss": 0.4838, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 452, |
| "tokens_per_second_per_gpu": 19749.18, |
| "total_tokens": 20430693 |
| }, |
| { |
| "epoch": 0.0452479648404335, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.3354515697502552e-05, |
| "loss": 0.4566, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 453, |
| "tokens_per_second_per_gpu": 19588.54, |
| "total_tokens": 20478883 |
| }, |
| { |
| "epoch": 0.045347849972531586, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.3321611318837033e-05, |
| "loss": 0.4549, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 454, |
| "tokens_per_second_per_gpu": 18174.67, |
| "total_tokens": 20523444 |
| }, |
| { |
| "epoch": 0.045447735104629676, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.3288666467385834e-05, |
| "loss": 0.442, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 455, |
| "tokens_per_second_per_gpu": 16575.21, |
| "total_tokens": 20564379 |
| }, |
| { |
| "epoch": 0.045547620236727766, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.3255681544571568e-05, |
| "loss": 0.4501, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 456, |
| "tokens_per_second_per_gpu": 19453.63, |
| "total_tokens": 20611507 |
| }, |
| { |
| "epoch": 0.04564750536882585, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.3222656952305113e-05, |
| "loss": 0.4896, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 457, |
| "tokens_per_second_per_gpu": 18982.61, |
| "total_tokens": 20657825 |
| }, |
| { |
| "epoch": 0.04574739050092394, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.3189593092980701e-05, |
| "loss": 0.4279, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 458, |
| "tokens_per_second_per_gpu": 18418.04, |
| "total_tokens": 20702133 |
| }, |
| { |
| "epoch": 0.045847275633022024, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.3156490369471026e-05, |
| "loss": 0.4607, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 459, |
| "tokens_per_second_per_gpu": 18412.72, |
| "total_tokens": 20747114 |
| }, |
| { |
| "epoch": 0.045947160765120114, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.3123349185122328e-05, |
| "loss": 0.4671, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 460, |
| "tokens_per_second_per_gpu": 17640.8, |
| "total_tokens": 20790822 |
| }, |
| { |
| "epoch": 0.0460470458972182, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.3090169943749475e-05, |
| "loss": 0.5004, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 461, |
| "tokens_per_second_per_gpu": 18665.44, |
| "total_tokens": 20836379 |
| }, |
| { |
| "epoch": 0.04614693102931629, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.3056953049631059e-05, |
| "loss": 0.4387, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 462, |
| "tokens_per_second_per_gpu": 19129.47, |
| "total_tokens": 20883986 |
| }, |
| { |
| "epoch": 0.04624681616141437, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.3023698907504447e-05, |
| "loss": 0.4174, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 463, |
| "tokens_per_second_per_gpu": 16146.06, |
| "total_tokens": 20923457 |
| }, |
| { |
| "epoch": 0.04634670129351246, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.2990407922560869e-05, |
| "loss": 0.5199, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 464, |
| "tokens_per_second_per_gpu": 21859.89, |
| "total_tokens": 20975561 |
| }, |
| { |
| "epoch": 0.046446586425610545, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.2957080500440469e-05, |
| "loss": 0.4812, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 465, |
| "tokens_per_second_per_gpu": 19141.51, |
| "total_tokens": 21022847 |
| }, |
| { |
| "epoch": 0.046546471557708635, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.2923717047227368e-05, |
| "loss": 0.4277, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 466, |
| "tokens_per_second_per_gpu": 16949.78, |
| "total_tokens": 21064452 |
| }, |
| { |
| "epoch": 0.046646356689806726, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.2890317969444716e-05, |
| "loss": 0.4635, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 467, |
| "tokens_per_second_per_gpu": 17184.9, |
| "total_tokens": 21106486 |
| }, |
| { |
| "epoch": 0.04674624182190481, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.2856883674049736e-05, |
| "loss": 0.4734, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 468, |
| "tokens_per_second_per_gpu": 20115.21, |
| "total_tokens": 21154512 |
| }, |
| { |
| "epoch": 0.0468461269540029, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.2823414568428767e-05, |
| "loss": 0.412, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 469, |
| "tokens_per_second_per_gpu": 15311.39, |
| "total_tokens": 21193038 |
| }, |
| { |
| "epoch": 0.04694601208610098, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.2789911060392295e-05, |
| "loss": 0.4683, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 470, |
| "tokens_per_second_per_gpu": 19043.56, |
| "total_tokens": 21240442 |
| }, |
| { |
| "epoch": 0.04704589721819907, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.2756373558169992e-05, |
| "loss": 0.5045, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 471, |
| "tokens_per_second_per_gpu": 18973.02, |
| "total_tokens": 21286255 |
| }, |
| { |
| "epoch": 0.047145782350297157, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.2722802470405744e-05, |
| "loss": 0.4847, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 472, |
| "tokens_per_second_per_gpu": 19835.41, |
| "total_tokens": 21334376 |
| }, |
| { |
| "epoch": 0.04724566748239525, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.2689198206152657e-05, |
| "loss": 0.4578, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 473, |
| "tokens_per_second_per_gpu": 19532.41, |
| "total_tokens": 21382963 |
| }, |
| { |
| "epoch": 0.04734555261449333, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.265556117486809e-05, |
| "loss": 0.4629, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 474, |
| "tokens_per_second_per_gpu": 18454.21, |
| "total_tokens": 21428429 |
| }, |
| { |
| "epoch": 0.04744543774659142, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.2621891786408648e-05, |
| "loss": 0.4086, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 475, |
| "tokens_per_second_per_gpu": 17434.94, |
| "total_tokens": 21472056 |
| }, |
| { |
| "epoch": 0.047545322878689504, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.2588190451025209e-05, |
| "loss": 0.4546, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 476, |
| "tokens_per_second_per_gpu": 18143.91, |
| "total_tokens": 21516053 |
| }, |
| { |
| "epoch": 0.047645208010787594, |
| "grad_norm": 1.5234375, |
| "learning_rate": 1.2554457579357906e-05, |
| "loss": 0.4865, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 477, |
| "tokens_per_second_per_gpu": 17698.1, |
| "total_tokens": 21559736 |
| }, |
| { |
| "epoch": 0.047745093142885685, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.252069358243114e-05, |
| "loss": 0.4796, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 478, |
| "tokens_per_second_per_gpu": 18941.25, |
| "total_tokens": 21606119 |
| }, |
| { |
| "epoch": 0.04784497827498377, |
| "grad_norm": 1.0, |
| "learning_rate": 1.2486898871648552e-05, |
| "loss": 0.4479, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 479, |
| "tokens_per_second_per_gpu": 17602.65, |
| "total_tokens": 21649683 |
| }, |
| { |
| "epoch": 0.04794486340708186, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.2453073858788027e-05, |
| "loss": 0.4305, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 480, |
| "tokens_per_second_per_gpu": 16582.51, |
| "total_tokens": 21690429 |
| }, |
| { |
| "epoch": 0.04804474853917994, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.2419218955996677e-05, |
| "loss": 0.5023, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 481, |
| "tokens_per_second_per_gpu": 19550.42, |
| "total_tokens": 21738074 |
| }, |
| { |
| "epoch": 0.04814463367127803, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.238533457578581e-05, |
| "loss": 0.4985, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 482, |
| "tokens_per_second_per_gpu": 19261.78, |
| "total_tokens": 21784804 |
| }, |
| { |
| "epoch": 0.048244518803376116, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.23514211310259e-05, |
| "loss": 0.4767, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 483, |
| "tokens_per_second_per_gpu": 19732.31, |
| "total_tokens": 21832012 |
| }, |
| { |
| "epoch": 0.048344403935474206, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.2317479034941572e-05, |
| "loss": 0.4619, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 484, |
| "tokens_per_second_per_gpu": 20400.51, |
| "total_tokens": 21880113 |
| }, |
| { |
| "epoch": 0.04844428906757229, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.2283508701106559e-05, |
| "loss": 0.4387, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 485, |
| "tokens_per_second_per_gpu": 18048.0, |
| "total_tokens": 21924247 |
| }, |
| { |
| "epoch": 0.04854417419967038, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.2249510543438652e-05, |
| "loss": 0.4826, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 486, |
| "tokens_per_second_per_gpu": 17412.13, |
| "total_tokens": 21967561 |
| }, |
| { |
| "epoch": 0.04864405933176846, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.2215484976194675e-05, |
| "loss": 0.5, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 487, |
| "tokens_per_second_per_gpu": 19866.72, |
| "total_tokens": 22014791 |
| }, |
| { |
| "epoch": 0.048743944463866554, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.2181432413965428e-05, |
| "loss": 0.4481, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 488, |
| "tokens_per_second_per_gpu": 17658.1, |
| "total_tokens": 22058505 |
| }, |
| { |
| "epoch": 0.048843829595964644, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.2147353271670634e-05, |
| "loss": 0.4992, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 489, |
| "tokens_per_second_per_gpu": 21458.73, |
| "total_tokens": 22109615 |
| }, |
| { |
| "epoch": 0.04894371472806273, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.211324796455389e-05, |
| "loss": 0.451, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 490, |
| "tokens_per_second_per_gpu": 18087.11, |
| "total_tokens": 22153686 |
| }, |
| { |
| "epoch": 0.04904359986016082, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.2079116908177592e-05, |
| "loss": 0.483, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 491, |
| "tokens_per_second_per_gpu": 17660.42, |
| "total_tokens": 22197018 |
| }, |
| { |
| "epoch": 0.0491434849922589, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.2044960518417902e-05, |
| "loss": 0.4741, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 492, |
| "tokens_per_second_per_gpu": 17381.69, |
| "total_tokens": 22240331 |
| }, |
| { |
| "epoch": 0.04924337012435699, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.2010779211459649e-05, |
| "loss": 0.4283, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 493, |
| "tokens_per_second_per_gpu": 17621.55, |
| "total_tokens": 22283909 |
| }, |
| { |
| "epoch": 0.049343255256455075, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.1976573403791263e-05, |
| "loss": 0.4636, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 494, |
| "tokens_per_second_per_gpu": 19438.53, |
| "total_tokens": 22332531 |
| }, |
| { |
| "epoch": 0.049443140388553165, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.194234351219972e-05, |
| "loss": 0.464, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 495, |
| "tokens_per_second_per_gpu": 18317.53, |
| "total_tokens": 22376999 |
| }, |
| { |
| "epoch": 0.04954302552065125, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.190808995376545e-05, |
| "loss": 0.509, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 496, |
| "tokens_per_second_per_gpu": 19293.59, |
| "total_tokens": 22424618 |
| }, |
| { |
| "epoch": 0.04964291065274934, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.187381314585725e-05, |
| "loss": 0.4516, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 497, |
| "tokens_per_second_per_gpu": 18270.3, |
| "total_tokens": 22469423 |
| }, |
| { |
| "epoch": 0.04974279578484742, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.1839513506127202e-05, |
| "loss": 0.4849, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 498, |
| "tokens_per_second_per_gpu": 19430.08, |
| "total_tokens": 22516368 |
| }, |
| { |
| "epoch": 0.04984268091694551, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.1805191452505602e-05, |
| "loss": 0.4667, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 499, |
| "tokens_per_second_per_gpu": 16036.97, |
| "total_tokens": 22556525 |
| }, |
| { |
| "epoch": 0.0499425660490436, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.1770847403195836e-05, |
| "loss": 0.4213, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 500, |
| "tokens_per_second_per_gpu": 17013.59, |
| "total_tokens": 22598669 |
| }, |
| { |
| "epoch": 0.05004245118114169, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.1736481776669307e-05, |
| "loss": 0.4727, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 501, |
| "tokens_per_second_per_gpu": 19157.82, |
| "total_tokens": 22644756 |
| }, |
| { |
| "epoch": 0.05014233631323978, |
| "grad_norm": 0.8984375, |
| "learning_rate": 1.1702094991660326e-05, |
| "loss": 0.4344, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 502, |
| "tokens_per_second_per_gpu": 19863.48, |
| "total_tokens": 22694880 |
| }, |
| { |
| "epoch": 0.05024222144533786, |
| "grad_norm": 1.125, |
| "learning_rate": 1.1667687467161025e-05, |
| "loss": 0.4611, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 503, |
| "tokens_per_second_per_gpu": 18555.46, |
| "total_tokens": 22740472 |
| }, |
| { |
| "epoch": 0.05034210657743595, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.1633259622416224e-05, |
| "loss": 0.4185, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 504, |
| "tokens_per_second_per_gpu": 16048.54, |
| "total_tokens": 22780599 |
| }, |
| { |
| "epoch": 0.050441991709534034, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.159881187691835e-05, |
| "loss": 0.4438, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 505, |
| "tokens_per_second_per_gpu": 18750.75, |
| "total_tokens": 22826251 |
| }, |
| { |
| "epoch": 0.050541876841632125, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.156434465040231e-05, |
| "loss": 0.4806, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 506, |
| "tokens_per_second_per_gpu": 19927.32, |
| "total_tokens": 22875342 |
| }, |
| { |
| "epoch": 0.05064176197373021, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.1529858362840383e-05, |
| "loss": 0.4731, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 507, |
| "tokens_per_second_per_gpu": 17839.7, |
| "total_tokens": 22918851 |
| }, |
| { |
| "epoch": 0.0507416471058283, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.1495353434437098e-05, |
| "loss": 0.4712, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 508, |
| "tokens_per_second_per_gpu": 17955.09, |
| "total_tokens": 22963103 |
| }, |
| { |
| "epoch": 0.05084153223792638, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.1460830285624119e-05, |
| "loss": 0.4683, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 509, |
| "tokens_per_second_per_gpu": 17845.55, |
| "total_tokens": 23007258 |
| }, |
| { |
| "epoch": 0.05094141737002447, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.1426289337055119e-05, |
| "loss": 0.4843, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 510, |
| "tokens_per_second_per_gpu": 19624.71, |
| "total_tokens": 23055166 |
| }, |
| { |
| "epoch": 0.05104130250212256, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.1391731009600655e-05, |
| "loss": 0.4588, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 511, |
| "tokens_per_second_per_gpu": 15869.37, |
| "total_tokens": 23095363 |
| }, |
| { |
| "epoch": 0.051141187634220646, |
| "grad_norm": 1.125, |
| "learning_rate": 1.1357155724343046e-05, |
| "loss": 0.4484, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 512, |
| "tokens_per_second_per_gpu": 17790.97, |
| "total_tokens": 23138552 |
| }, |
| { |
| "epoch": 0.051241072766318736, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.1322563902571227e-05, |
| "loss": 0.4843, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 513, |
| "tokens_per_second_per_gpu": 18872.7, |
| "total_tokens": 23186121 |
| }, |
| { |
| "epoch": 0.05134095789841682, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.128795596577563e-05, |
| "loss": 0.4933, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 514, |
| "tokens_per_second_per_gpu": 18641.21, |
| "total_tokens": 23231758 |
| }, |
| { |
| "epoch": 0.05144084303051491, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.1253332335643043e-05, |
| "loss": 0.4605, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 515, |
| "tokens_per_second_per_gpu": 19688.1, |
| "total_tokens": 23278872 |
| }, |
| { |
| "epoch": 0.05154072816261299, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.1218693434051475e-05, |
| "loss": 0.5027, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 516, |
| "tokens_per_second_per_gpu": 20087.61, |
| "total_tokens": 23327833 |
| }, |
| { |
| "epoch": 0.051640613294711084, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.1184039683065014e-05, |
| "loss": 0.4815, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 517, |
| "tokens_per_second_per_gpu": 18899.57, |
| "total_tokens": 23373189 |
| }, |
| { |
| "epoch": 0.05174049842680917, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.1149371504928667e-05, |
| "loss": 0.4454, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 518, |
| "tokens_per_second_per_gpu": 19441.21, |
| "total_tokens": 23420039 |
| }, |
| { |
| "epoch": 0.05184038355890726, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.1114689322063255e-05, |
| "loss": 0.4678, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 519, |
| "tokens_per_second_per_gpu": 18903.71, |
| "total_tokens": 23465844 |
| }, |
| { |
| "epoch": 0.05194026869100534, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.1079993557060228e-05, |
| "loss": 0.4813, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 520, |
| "tokens_per_second_per_gpu": 17792.12, |
| "total_tokens": 23509432 |
| }, |
| { |
| "epoch": 0.05204015382310343, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.1045284632676535e-05, |
| "loss": 0.5014, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 521, |
| "tokens_per_second_per_gpu": 19237.45, |
| "total_tokens": 23556465 |
| }, |
| { |
| "epoch": 0.05214003895520152, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.1010562971829464e-05, |
| "loss": 0.4765, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 522, |
| "tokens_per_second_per_gpu": 17247.23, |
| "total_tokens": 23598540 |
| }, |
| { |
| "epoch": 0.052239924087299605, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.0975828997591496e-05, |
| "loss": 0.4712, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 523, |
| "tokens_per_second_per_gpu": 18469.09, |
| "total_tokens": 23643815 |
| }, |
| { |
| "epoch": 0.052339809219397695, |
| "grad_norm": 1.0, |
| "learning_rate": 1.0941083133185146e-05, |
| "loss": 0.4282, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 524, |
| "tokens_per_second_per_gpu": 17815.45, |
| "total_tokens": 23686989 |
| }, |
| { |
| "epoch": 0.05243969435149578, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.0906325801977804e-05, |
| "loss": 0.4625, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 525, |
| "tokens_per_second_per_gpu": 17453.89, |
| "total_tokens": 23730806 |
| }, |
| { |
| "epoch": 0.05253957948359387, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.0871557427476585e-05, |
| "loss": 0.4538, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 526, |
| "tokens_per_second_per_gpu": 18837.53, |
| "total_tokens": 23776963 |
| }, |
| { |
| "epoch": 0.05263946461569195, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.083677843332316e-05, |
| "loss": 0.4792, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 527, |
| "tokens_per_second_per_gpu": 19107.38, |
| "total_tokens": 23824252 |
| }, |
| { |
| "epoch": 0.05273934974779004, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.0801989243288588e-05, |
| "loss": 0.4795, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 528, |
| "tokens_per_second_per_gpu": 18085.61, |
| "total_tokens": 23869316 |
| }, |
| { |
| "epoch": 0.052839234879888126, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.0767190281268187e-05, |
| "loss": 0.4145, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 529, |
| "tokens_per_second_per_gpu": 16406.74, |
| "total_tokens": 23910350 |
| }, |
| { |
| "epoch": 0.05293912001198622, |
| "grad_norm": 1.0, |
| "learning_rate": 1.0732381971276318e-05, |
| "loss": 0.4572, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 530, |
| "tokens_per_second_per_gpu": 17710.36, |
| "total_tokens": 23954773 |
| }, |
| { |
| "epoch": 0.0530390051440843, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.0697564737441254e-05, |
| "loss": 0.4703, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 531, |
| "tokens_per_second_per_gpu": 18903.27, |
| "total_tokens": 24000771 |
| }, |
| { |
| "epoch": 0.05313889027618239, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.0662739004000005e-05, |
| "loss": 0.4402, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 532, |
| "tokens_per_second_per_gpu": 18233.74, |
| "total_tokens": 24045859 |
| }, |
| { |
| "epoch": 0.053238775408280474, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.0627905195293135e-05, |
| "loss": 0.4951, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 533, |
| "tokens_per_second_per_gpu": 19692.99, |
| "total_tokens": 24093700 |
| }, |
| { |
| "epoch": 0.053338660540378564, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.0593063735759619e-05, |
| "loss": 0.4596, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 534, |
| "tokens_per_second_per_gpu": 18803.61, |
| "total_tokens": 24139870 |
| }, |
| { |
| "epoch": 0.053438545672476655, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.055821504993164e-05, |
| "loss": 0.4681, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 535, |
| "tokens_per_second_per_gpu": 19578.83, |
| "total_tokens": 24186517 |
| }, |
| { |
| "epoch": 0.05353843080457474, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.0523359562429441e-05, |
| "loss": 0.5205, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 536, |
| "tokens_per_second_per_gpu": 20269.17, |
| "total_tokens": 24236114 |
| }, |
| { |
| "epoch": 0.05363831593667283, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.0488497697956134e-05, |
| "loss": 0.4709, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 537, |
| "tokens_per_second_per_gpu": 18848.83, |
| "total_tokens": 24282417 |
| }, |
| { |
| "epoch": 0.05373820106877091, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.0453629881292537e-05, |
| "loss": 0.4237, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 538, |
| "tokens_per_second_per_gpu": 18133.2, |
| "total_tokens": 24327150 |
| }, |
| { |
| "epoch": 0.053838086200869, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.0418756537291996e-05, |
| "loss": 0.3724, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 539, |
| "tokens_per_second_per_gpu": 15680.58, |
| "total_tokens": 24365728 |
| }, |
| { |
| "epoch": 0.053937971332967086, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.03838780908752e-05, |
| "loss": 0.428, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 540, |
| "tokens_per_second_per_gpu": 18189.04, |
| "total_tokens": 24410190 |
| }, |
| { |
| "epoch": 0.054037856465065176, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.0348994967025012e-05, |
| "loss": 0.4759, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 541, |
| "tokens_per_second_per_gpu": 17332.7, |
| "total_tokens": 24451894 |
| }, |
| { |
| "epoch": 0.05413774159716326, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.0314107590781284e-05, |
| "loss": 0.4739, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 542, |
| "tokens_per_second_per_gpu": 20634.37, |
| "total_tokens": 24502320 |
| }, |
| { |
| "epoch": 0.05423762672926135, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.0279216387235691e-05, |
| "loss": 0.4531, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 543, |
| "tokens_per_second_per_gpu": 17057.94, |
| "total_tokens": 24545125 |
| }, |
| { |
| "epoch": 0.05433751186135943, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.0244321781526533e-05, |
| "loss": 0.4646, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 544, |
| "tokens_per_second_per_gpu": 17576.91, |
| "total_tokens": 24587955 |
| }, |
| { |
| "epoch": 0.054437396993457524, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.0209424198833571e-05, |
| "loss": 0.4518, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 545, |
| "tokens_per_second_per_gpu": 17794.97, |
| "total_tokens": 24632457 |
| }, |
| { |
| "epoch": 0.054537282125555614, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.0174524064372837e-05, |
| "loss": 0.4674, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 546, |
| "tokens_per_second_per_gpu": 21668.28, |
| "total_tokens": 24684174 |
| }, |
| { |
| "epoch": 0.0546371672576537, |
| "grad_norm": 1.0, |
| "learning_rate": 1.0139621803391454e-05, |
| "loss": 0.4574, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 547, |
| "tokens_per_second_per_gpu": 18638.05, |
| "total_tokens": 24731619 |
| }, |
| { |
| "epoch": 0.05473705238975179, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.010471784116246e-05, |
| "loss": 0.4437, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 548, |
| "tokens_per_second_per_gpu": 18738.17, |
| "total_tokens": 24777474 |
| }, |
| { |
| "epoch": 0.05483693752184987, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.0069812602979617e-05, |
| "loss": 0.4953, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 549, |
| "tokens_per_second_per_gpu": 20376.59, |
| "total_tokens": 24827482 |
| }, |
| { |
| "epoch": 0.05493682265394796, |
| "grad_norm": 0.9296875, |
| "learning_rate": 1.0034906514152239e-05, |
| "loss": 0.4572, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 550, |
| "tokens_per_second_per_gpu": 19513.4, |
| "total_tokens": 24876158 |
| }, |
| { |
| "epoch": 0.055036707786046045, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1e-05, |
| "loss": 0.4804, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 551, |
| "tokens_per_second_per_gpu": 19312.89, |
| "total_tokens": 24924673 |
| }, |
| { |
| "epoch": 0.055136592918144135, |
| "grad_norm": 0.953125, |
| "learning_rate": 9.965093485847766e-06, |
| "loss": 0.4502, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 552, |
| "tokens_per_second_per_gpu": 19002.55, |
| "total_tokens": 24970279 |
| }, |
| { |
| "epoch": 0.05523647805024222, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.930187397020385e-06, |
| "loss": 0.451, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 553, |
| "tokens_per_second_per_gpu": 19493.12, |
| "total_tokens": 25017731 |
| }, |
| { |
| "epoch": 0.05533636318234031, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.895282158837545e-06, |
| "loss": 0.4229, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 554, |
| "tokens_per_second_per_gpu": 17975.71, |
| "total_tokens": 25062533 |
| }, |
| { |
| "epoch": 0.05543624831443839, |
| "grad_norm": 1.0, |
| "learning_rate": 9.860378196608549e-06, |
| "loss": 0.4934, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 555, |
| "tokens_per_second_per_gpu": 18552.65, |
| "total_tokens": 25108295 |
| }, |
| { |
| "epoch": 0.05553613344653648, |
| "grad_norm": 1.0, |
| "learning_rate": 9.825475935627165e-06, |
| "loss": 0.3945, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 556, |
| "tokens_per_second_per_gpu": 16296.72, |
| "total_tokens": 25147946 |
| }, |
| { |
| "epoch": 0.05563601857863457, |
| "grad_norm": 0.96875, |
| "learning_rate": 9.790575801166432e-06, |
| "loss": 0.4595, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 557, |
| "tokens_per_second_per_gpu": 18774.27, |
| "total_tokens": 25194998 |
| }, |
| { |
| "epoch": 0.05573590371073266, |
| "grad_norm": 0.9453125, |
| "learning_rate": 9.75567821847347e-06, |
| "loss": 0.4485, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 558, |
| "tokens_per_second_per_gpu": 19033.71, |
| "total_tokens": 25242866 |
| }, |
| { |
| "epoch": 0.05583578884283075, |
| "grad_norm": 0.9921875, |
| "learning_rate": 9.720783612764314e-06, |
| "loss": 0.4339, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 559, |
| "tokens_per_second_per_gpu": 17784.22, |
| "total_tokens": 25286127 |
| }, |
| { |
| "epoch": 0.05593567397492883, |
| "grad_norm": 1.0, |
| "learning_rate": 9.685892409218718e-06, |
| "loss": 0.4305, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 560, |
| "tokens_per_second_per_gpu": 16979.23, |
| "total_tokens": 25327149 |
| }, |
| { |
| "epoch": 0.05603555910702692, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.651005032974994e-06, |
| "loss": 0.5061, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 561, |
| "tokens_per_second_per_gpu": 19737.19, |
| "total_tokens": 25375816 |
| }, |
| { |
| "epoch": 0.056135444239125004, |
| "grad_norm": 0.9921875, |
| "learning_rate": 9.616121909124801e-06, |
| "loss": 0.42, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 562, |
| "tokens_per_second_per_gpu": 17275.51, |
| "total_tokens": 25417750 |
| }, |
| { |
| "epoch": 0.056235329371223094, |
| "grad_norm": 0.98828125, |
| "learning_rate": 9.581243462708007e-06, |
| "loss": 0.475, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 563, |
| "tokens_per_second_per_gpu": 18029.63, |
| "total_tokens": 25463172 |
| }, |
| { |
| "epoch": 0.05633521450332118, |
| "grad_norm": 1.0, |
| "learning_rate": 9.546370118707463e-06, |
| "loss": 0.3855, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 564, |
| "tokens_per_second_per_gpu": 15926.85, |
| "total_tokens": 25501577 |
| }, |
| { |
| "epoch": 0.05643509963541927, |
| "grad_norm": 1.25, |
| "learning_rate": 9.511502302043867e-06, |
| "loss": 0.4714, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 565, |
| "tokens_per_second_per_gpu": 20540.94, |
| "total_tokens": 25550938 |
| }, |
| { |
| "epoch": 0.05653498476751735, |
| "grad_norm": 0.953125, |
| "learning_rate": 9.476640437570562e-06, |
| "loss": 0.4812, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 566, |
| "tokens_per_second_per_gpu": 18593.41, |
| "total_tokens": 25596861 |
| }, |
| { |
| "epoch": 0.05663486989961544, |
| "grad_norm": 1.0390625, |
| "learning_rate": 9.441784950068362e-06, |
| "loss": 0.4998, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 567, |
| "tokens_per_second_per_gpu": 19689.28, |
| "total_tokens": 25645516 |
| }, |
| { |
| "epoch": 0.05673475503171353, |
| "grad_norm": 1.0078125, |
| "learning_rate": 9.406936264240386e-06, |
| "loss": 0.4486, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 568, |
| "tokens_per_second_per_gpu": 17352.5, |
| "total_tokens": 25687800 |
| }, |
| { |
| "epoch": 0.056834640163811616, |
| "grad_norm": 1.0390625, |
| "learning_rate": 9.372094804706867e-06, |
| "loss": 0.4391, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 569, |
| "tokens_per_second_per_gpu": 17140.78, |
| "total_tokens": 25730650 |
| }, |
| { |
| "epoch": 0.056934525295909706, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.337260996000002e-06, |
| "loss": 0.4701, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 570, |
| "tokens_per_second_per_gpu": 16625.41, |
| "total_tokens": 25772206 |
| }, |
| { |
| "epoch": 0.05703441042800779, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.302435262558748e-06, |
| "loss": 0.467, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 571, |
| "tokens_per_second_per_gpu": 19677.5, |
| "total_tokens": 25821005 |
| }, |
| { |
| "epoch": 0.05713429556010588, |
| "grad_norm": 1.015625, |
| "learning_rate": 9.267618028723687e-06, |
| "loss": 0.4443, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 572, |
| "tokens_per_second_per_gpu": 17615.31, |
| "total_tokens": 25863849 |
| }, |
| { |
| "epoch": 0.05723418069220396, |
| "grad_norm": 0.96875, |
| "learning_rate": 9.232809718731815e-06, |
| "loss": 0.4633, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 573, |
| "tokens_per_second_per_gpu": 18647.94, |
| "total_tokens": 25909625 |
| }, |
| { |
| "epoch": 0.057334065824302054, |
| "grad_norm": 0.97265625, |
| "learning_rate": 9.198010756711413e-06, |
| "loss": 0.4333, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 574, |
| "tokens_per_second_per_gpu": 18315.95, |
| "total_tokens": 25954200 |
| }, |
| { |
| "epoch": 0.05743395095640014, |
| "grad_norm": 0.984375, |
| "learning_rate": 9.163221566676847e-06, |
| "loss": 0.5012, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 575, |
| "tokens_per_second_per_gpu": 20415.32, |
| "total_tokens": 26003168 |
| }, |
| { |
| "epoch": 0.05753383608849823, |
| "grad_norm": 0.99609375, |
| "learning_rate": 9.128442572523418e-06, |
| "loss": 0.4842, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 576, |
| "tokens_per_second_per_gpu": 18732.91, |
| "total_tokens": 26048948 |
| }, |
| { |
| "epoch": 0.05763372122059631, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.093674198022201e-06, |
| "loss": 0.4333, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 577, |
| "tokens_per_second_per_gpu": 19008.14, |
| "total_tokens": 26096480 |
| }, |
| { |
| "epoch": 0.0577336063526944, |
| "grad_norm": 0.97265625, |
| "learning_rate": 9.058916866814857e-06, |
| "loss": 0.4946, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 578, |
| "tokens_per_second_per_gpu": 20926.04, |
| "total_tokens": 26147269 |
| }, |
| { |
| "epoch": 0.05783349148479249, |
| "grad_norm": 1.015625, |
| "learning_rate": 9.024171002408507e-06, |
| "loss": 0.4802, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 579, |
| "tokens_per_second_per_gpu": 19004.89, |
| "total_tokens": 26194908 |
| }, |
| { |
| "epoch": 0.057933376616890575, |
| "grad_norm": 0.9765625, |
| "learning_rate": 8.989437028170537e-06, |
| "loss": 0.4595, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 580, |
| "tokens_per_second_per_gpu": 18815.85, |
| "total_tokens": 26241318 |
| }, |
| { |
| "epoch": 0.058033261748988665, |
| "grad_norm": 0.984375, |
| "learning_rate": 8.954715367323468e-06, |
| "loss": 0.4884, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 581, |
| "tokens_per_second_per_gpu": 20409.23, |
| "total_tokens": 26289950 |
| }, |
| { |
| "epoch": 0.05813314688108675, |
| "grad_norm": 1.0234375, |
| "learning_rate": 8.920006442939772e-06, |
| "loss": 0.4935, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 582, |
| "tokens_per_second_per_gpu": 19855.08, |
| "total_tokens": 26339613 |
| }, |
| { |
| "epoch": 0.05823303201318484, |
| "grad_norm": 0.984375, |
| "learning_rate": 8.885310677936746e-06, |
| "loss": 0.4837, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 583, |
| "tokens_per_second_per_gpu": 18973.04, |
| "total_tokens": 26385041 |
| }, |
| { |
| "epoch": 0.05833291714528292, |
| "grad_norm": 0.99609375, |
| "learning_rate": 8.850628495071336e-06, |
| "loss": 0.4396, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 584, |
| "tokens_per_second_per_gpu": 17273.93, |
| "total_tokens": 26427549 |
| }, |
| { |
| "epoch": 0.05843280227738101, |
| "grad_norm": 0.94140625, |
| "learning_rate": 8.815960316934991e-06, |
| "loss": 0.4523, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 585, |
| "tokens_per_second_per_gpu": 19089.53, |
| "total_tokens": 26473743 |
| }, |
| { |
| "epoch": 0.058532687409479096, |
| "grad_norm": 0.9921875, |
| "learning_rate": 8.781306565948528e-06, |
| "loss": 0.3876, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 586, |
| "tokens_per_second_per_gpu": 16731.9, |
| "total_tokens": 26514808 |
| }, |
| { |
| "epoch": 0.05863257254157719, |
| "grad_norm": 1.0390625, |
| "learning_rate": 8.746667664356957e-06, |
| "loss": 0.4639, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 587, |
| "tokens_per_second_per_gpu": 18593.43, |
| "total_tokens": 26560648 |
| }, |
| { |
| "epoch": 0.05873245767367527, |
| "grad_norm": 0.98046875, |
| "learning_rate": 8.712044034224374e-06, |
| "loss": 0.4661, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 588, |
| "tokens_per_second_per_gpu": 19767.89, |
| "total_tokens": 26607406 |
| }, |
| { |
| "epoch": 0.05883234280577336, |
| "grad_norm": 0.97265625, |
| "learning_rate": 8.677436097428775e-06, |
| "loss": 0.4576, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 589, |
| "tokens_per_second_per_gpu": 18286.53, |
| "total_tokens": 26652688 |
| }, |
| { |
| "epoch": 0.05893222793787145, |
| "grad_norm": 0.9609375, |
| "learning_rate": 8.642844275656957e-06, |
| "loss": 0.4869, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 590, |
| "tokens_per_second_per_gpu": 20452.72, |
| "total_tokens": 26701540 |
| }, |
| { |
| "epoch": 0.059032113069969534, |
| "grad_norm": 0.9765625, |
| "learning_rate": 8.60826899039935e-06, |
| "loss": 0.4703, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 591, |
| "tokens_per_second_per_gpu": 19659.34, |
| "total_tokens": 26748739 |
| }, |
| { |
| "epoch": 0.059131998202067625, |
| "grad_norm": 1.078125, |
| "learning_rate": 8.573710662944884e-06, |
| "loss": 0.4692, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 592, |
| "tokens_per_second_per_gpu": 15527.22, |
| "total_tokens": 26787153 |
| }, |
| { |
| "epoch": 0.05923188333416571, |
| "grad_norm": 1.0078125, |
| "learning_rate": 8.539169714375885e-06, |
| "loss": 0.4582, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 593, |
| "tokens_per_second_per_gpu": 18806.46, |
| "total_tokens": 26832537 |
| }, |
| { |
| "epoch": 0.0593317684662638, |
| "grad_norm": 0.93359375, |
| "learning_rate": 8.504646565562907e-06, |
| "loss": 0.4949, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 594, |
| "tokens_per_second_per_gpu": 20047.32, |
| "total_tokens": 26881583 |
| }, |
| { |
| "epoch": 0.05943165359836188, |
| "grad_norm": 0.96484375, |
| "learning_rate": 8.47014163715962e-06, |
| "loss": 0.4277, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 595, |
| "tokens_per_second_per_gpu": 17635.6, |
| "total_tokens": 26924740 |
| }, |
| { |
| "epoch": 0.05953153873045997, |
| "grad_norm": 1.0703125, |
| "learning_rate": 8.43565534959769e-06, |
| "loss": 0.4941, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 596, |
| "tokens_per_second_per_gpu": 16559.95, |
| "total_tokens": 26964699 |
| }, |
| { |
| "epoch": 0.059631423862558056, |
| "grad_norm": 1.0546875, |
| "learning_rate": 8.401188123081653e-06, |
| "loss": 0.503, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 597, |
| "tokens_per_second_per_gpu": 19523.28, |
| "total_tokens": 27013061 |
| }, |
| { |
| "epoch": 0.059731308994656146, |
| "grad_norm": 1.046875, |
| "learning_rate": 8.366740377583781e-06, |
| "loss": 0.4505, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 598, |
| "tokens_per_second_per_gpu": 16031.96, |
| "total_tokens": 27053346 |
| }, |
| { |
| "epoch": 0.05983119412675423, |
| "grad_norm": 1.828125, |
| "learning_rate": 8.332312532838978e-06, |
| "loss": 0.4278, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 599, |
| "tokens_per_second_per_gpu": 18183.46, |
| "total_tokens": 27097289 |
| }, |
| { |
| "epoch": 0.05993107925885232, |
| "grad_norm": 0.96484375, |
| "learning_rate": 8.297905008339677e-06, |
| "loss": 0.4328, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 600, |
| "tokens_per_second_per_gpu": 18745.84, |
| "total_tokens": 27142719 |
| }, |
| { |
| "epoch": 0.06003096439095041, |
| "grad_norm": 0.96875, |
| "learning_rate": 8.263518223330698e-06, |
| "loss": 0.4855, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 601, |
| "tokens_per_second_per_gpu": 19117.69, |
| "total_tokens": 27189704 |
| }, |
| { |
| "epoch": 0.060130849523048494, |
| "grad_norm": 0.9765625, |
| "learning_rate": 8.22915259680417e-06, |
| "loss": 0.4138, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 602, |
| "tokens_per_second_per_gpu": 17316.95, |
| "total_tokens": 27233327 |
| }, |
| { |
| "epoch": 0.060230734655146584, |
| "grad_norm": 0.9921875, |
| "learning_rate": 8.194808547494401e-06, |
| "loss": 0.4416, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 603, |
| "tokens_per_second_per_gpu": 18794.71, |
| "total_tokens": 27278463 |
| }, |
| { |
| "epoch": 0.06033061978724467, |
| "grad_norm": 0.9765625, |
| "learning_rate": 8.1604864938728e-06, |
| "loss": 0.4301, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 604, |
| "tokens_per_second_per_gpu": 17557.98, |
| "total_tokens": 27321255 |
| }, |
| { |
| "epoch": 0.06043050491934276, |
| "grad_norm": 0.984375, |
| "learning_rate": 8.126186854142752e-06, |
| "loss": 0.4919, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 605, |
| "tokens_per_second_per_gpu": 19766.28, |
| "total_tokens": 27368920 |
| }, |
| { |
| "epoch": 0.06053039005144084, |
| "grad_norm": 0.9765625, |
| "learning_rate": 8.091910046234552e-06, |
| "loss": 0.4667, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 606, |
| "tokens_per_second_per_gpu": 18862.83, |
| "total_tokens": 27415583 |
| }, |
| { |
| "epoch": 0.06063027518353893, |
| "grad_norm": 0.96484375, |
| "learning_rate": 8.057656487800283e-06, |
| "loss": 0.4607, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 607, |
| "tokens_per_second_per_gpu": 18179.15, |
| "total_tokens": 27459995 |
| }, |
| { |
| "epoch": 0.060730160315637015, |
| "grad_norm": 0.94140625, |
| "learning_rate": 8.023426596208739e-06, |
| "loss": 0.444, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 608, |
| "tokens_per_second_per_gpu": 19382.04, |
| "total_tokens": 27506679 |
| }, |
| { |
| "epoch": 0.060830045447735105, |
| "grad_norm": 1.015625, |
| "learning_rate": 7.989220788540356e-06, |
| "loss": 0.4386, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 609, |
| "tokens_per_second_per_gpu": 17295.19, |
| "total_tokens": 27549153 |
| }, |
| { |
| "epoch": 0.06092993057983319, |
| "grad_norm": 0.9453125, |
| "learning_rate": 7.955039481582098e-06, |
| "loss": 0.4129, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 610, |
| "tokens_per_second_per_gpu": 16833.12, |
| "total_tokens": 27591645 |
| }, |
| { |
| "epoch": 0.06102981571193128, |
| "grad_norm": 0.96484375, |
| "learning_rate": 7.92088309182241e-06, |
| "loss": 0.45, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 611, |
| "tokens_per_second_per_gpu": 19610.45, |
| "total_tokens": 27638598 |
| }, |
| { |
| "epoch": 0.06112970084402937, |
| "grad_norm": 0.9921875, |
| "learning_rate": 7.886752035446116e-06, |
| "loss": 0.4748, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 612, |
| "tokens_per_second_per_gpu": 19465.45, |
| "total_tokens": 27685299 |
| }, |
| { |
| "epoch": 0.06122958597612745, |
| "grad_norm": 0.9609375, |
| "learning_rate": 7.852646728329368e-06, |
| "loss": 0.4291, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 613, |
| "tokens_per_second_per_gpu": 17706.05, |
| "total_tokens": 27729722 |
| }, |
| { |
| "epoch": 0.06132947110822554, |
| "grad_norm": 0.9609375, |
| "learning_rate": 7.818567586034578e-06, |
| "loss": 0.4414, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 614, |
| "tokens_per_second_per_gpu": 19520.29, |
| "total_tokens": 27776771 |
| }, |
| { |
| "epoch": 0.061429356240323627, |
| "grad_norm": 0.96484375, |
| "learning_rate": 7.784515023805328e-06, |
| "loss": 0.4513, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 615, |
| "tokens_per_second_per_gpu": 19684.59, |
| "total_tokens": 27824101 |
| }, |
| { |
| "epoch": 0.06152924137242172, |
| "grad_norm": 1.0390625, |
| "learning_rate": 7.750489456561351e-06, |
| "loss": 0.4747, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 616, |
| "tokens_per_second_per_gpu": 17967.76, |
| "total_tokens": 27868858 |
| }, |
| { |
| "epoch": 0.0616291265045198, |
| "grad_norm": 0.96875, |
| "learning_rate": 7.716491298893443e-06, |
| "loss": 0.434, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 617, |
| "tokens_per_second_per_gpu": 16676.99, |
| "total_tokens": 27910582 |
| }, |
| { |
| "epoch": 0.06172901163661789, |
| "grad_norm": 1.015625, |
| "learning_rate": 7.68252096505843e-06, |
| "loss": 0.4833, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 618, |
| "tokens_per_second_per_gpu": 17930.32, |
| "total_tokens": 27955552 |
| }, |
| { |
| "epoch": 0.061828896768715974, |
| "grad_norm": 0.9921875, |
| "learning_rate": 7.6485788689741e-06, |
| "loss": 0.4148, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 619, |
| "tokens_per_second_per_gpu": 17313.44, |
| "total_tokens": 27997912 |
| }, |
| { |
| "epoch": 0.061928781900814064, |
| "grad_norm": 1.0078125, |
| "learning_rate": 7.6146654242141935e-06, |
| "loss": 0.3938, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 620, |
| "tokens_per_second_per_gpu": 16905.12, |
| "total_tokens": 28039699 |
| }, |
| { |
| "epoch": 0.06202866703291215, |
| "grad_norm": 0.9921875, |
| "learning_rate": 7.580781044003324e-06, |
| "loss": 0.466, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 621, |
| "tokens_per_second_per_gpu": 19457.54, |
| "total_tokens": 28085977 |
| }, |
| { |
| "epoch": 0.06212855216501024, |
| "grad_norm": 1.0234375, |
| "learning_rate": 7.546926141211975e-06, |
| "loss": 0.466, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 622, |
| "tokens_per_second_per_gpu": 17933.92, |
| "total_tokens": 28130081 |
| }, |
| { |
| "epoch": 0.06222843729710833, |
| "grad_norm": 1.0234375, |
| "learning_rate": 7.513101128351454e-06, |
| "loss": 0.4754, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 623, |
| "tokens_per_second_per_gpu": 18196.03, |
| "total_tokens": 28175362 |
| }, |
| { |
| "epoch": 0.06232832242920641, |
| "grad_norm": 0.93359375, |
| "learning_rate": 7.4793064175688635e-06, |
| "loss": 0.5095, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 624, |
| "tokens_per_second_per_gpu": 20367.97, |
| "total_tokens": 28225060 |
| }, |
| { |
| "epoch": 0.0624282075613045, |
| "grad_norm": 0.99609375, |
| "learning_rate": 7.445542420642097e-06, |
| "loss": 0.4894, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 625, |
| "tokens_per_second_per_gpu": 19088.22, |
| "total_tokens": 28272613 |
| }, |
| { |
| "epoch": 0.06252809269340259, |
| "grad_norm": 0.9609375, |
| "learning_rate": 7.411809548974792e-06, |
| "loss": 0.4331, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 626, |
| "tokens_per_second_per_gpu": 17644.45, |
| "total_tokens": 28318026 |
| }, |
| { |
| "epoch": 0.06262797782550067, |
| "grad_norm": 0.9609375, |
| "learning_rate": 7.378108213591355e-06, |
| "loss": 0.441, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 627, |
| "tokens_per_second_per_gpu": 18800.17, |
| "total_tokens": 28363693 |
| }, |
| { |
| "epoch": 0.06272786295759876, |
| "grad_norm": 0.984375, |
| "learning_rate": 7.344438825131912e-06, |
| "loss": 0.4471, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 628, |
| "tokens_per_second_per_gpu": 18055.68, |
| "total_tokens": 28407964 |
| }, |
| { |
| "epoch": 0.06282774808969685, |
| "grad_norm": 1.015625, |
| "learning_rate": 7.310801793847344e-06, |
| "loss": 0.4504, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 629, |
| "tokens_per_second_per_gpu": 17227.74, |
| "total_tokens": 28451011 |
| }, |
| { |
| "epoch": 0.06292763322179494, |
| "grad_norm": 1.0390625, |
| "learning_rate": 7.277197529594257e-06, |
| "loss": 0.407, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 630, |
| "tokens_per_second_per_gpu": 16007.4, |
| "total_tokens": 28490050 |
| }, |
| { |
| "epoch": 0.06302751835389302, |
| "grad_norm": 1.0, |
| "learning_rate": 7.243626441830009e-06, |
| "loss": 0.4627, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 631, |
| "tokens_per_second_per_gpu": 18154.11, |
| "total_tokens": 28534404 |
| }, |
| { |
| "epoch": 0.06312740348599111, |
| "grad_norm": 0.953125, |
| "learning_rate": 7.210088939607709e-06, |
| "loss": 0.4687, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 632, |
| "tokens_per_second_per_gpu": 19973.58, |
| "total_tokens": 28583112 |
| }, |
| { |
| "epoch": 0.0632272886180892, |
| "grad_norm": 0.98828125, |
| "learning_rate": 7.176585431571235e-06, |
| "loss": 0.4917, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 633, |
| "tokens_per_second_per_gpu": 19270.85, |
| "total_tokens": 28630682 |
| }, |
| { |
| "epoch": 0.06332717375018729, |
| "grad_norm": 1.015625, |
| "learning_rate": 7.143116325950266e-06, |
| "loss": 0.4501, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 634, |
| "tokens_per_second_per_gpu": 17027.02, |
| "total_tokens": 28672763 |
| }, |
| { |
| "epoch": 0.06342705888228538, |
| "grad_norm": 0.9375, |
| "learning_rate": 7.109682030555283e-06, |
| "loss": 0.4657, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 635, |
| "tokens_per_second_per_gpu": 18996.37, |
| "total_tokens": 28718962 |
| }, |
| { |
| "epoch": 0.06352694401438345, |
| "grad_norm": 0.99609375, |
| "learning_rate": 7.076282952772634e-06, |
| "loss": 0.462, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 636, |
| "tokens_per_second_per_gpu": 19421.75, |
| "total_tokens": 28766632 |
| }, |
| { |
| "epoch": 0.06362682914648154, |
| "grad_norm": 0.98828125, |
| "learning_rate": 7.042919499559538e-06, |
| "loss": 0.468, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 637, |
| "tokens_per_second_per_gpu": 18377.34, |
| "total_tokens": 28811631 |
| }, |
| { |
| "epoch": 0.06372671427857964, |
| "grad_norm": 0.984375, |
| "learning_rate": 7.009592077439135e-06, |
| "loss": 0.4868, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 638, |
| "tokens_per_second_per_gpu": 19772.77, |
| "total_tokens": 28859019 |
| }, |
| { |
| "epoch": 0.06382659941067773, |
| "grad_norm": 1.0078125, |
| "learning_rate": 6.976301092495556e-06, |
| "loss": 0.4605, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 639, |
| "tokens_per_second_per_gpu": 19948.77, |
| "total_tokens": 28907903 |
| }, |
| { |
| "epoch": 0.0639264845427758, |
| "grad_norm": 0.96484375, |
| "learning_rate": 6.943046950368944e-06, |
| "loss": 0.4336, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 640, |
| "tokens_per_second_per_gpu": 18449.02, |
| "total_tokens": 28952689 |
| }, |
| { |
| "epoch": 0.06402636967487389, |
| "grad_norm": 1.0546875, |
| "learning_rate": 6.909830056250527e-06, |
| "loss": 0.3581, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 641, |
| "tokens_per_second_per_gpu": 13364.83, |
| "total_tokens": 28986620 |
| }, |
| { |
| "epoch": 0.06412625480697198, |
| "grad_norm": 1.0390625, |
| "learning_rate": 6.876650814877675e-06, |
| "loss": 0.4532, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 642, |
| "tokens_per_second_per_gpu": 18207.0, |
| "total_tokens": 29030761 |
| }, |
| { |
| "epoch": 0.06422613993907007, |
| "grad_norm": 0.99609375, |
| "learning_rate": 6.843509630528977e-06, |
| "loss": 0.4339, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 643, |
| "tokens_per_second_per_gpu": 19715.45, |
| "total_tokens": 29077383 |
| }, |
| { |
| "epoch": 0.06432602507116815, |
| "grad_norm": 0.9921875, |
| "learning_rate": 6.8104069070193e-06, |
| "loss": 0.4657, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 644, |
| "tokens_per_second_per_gpu": 17513.0, |
| "total_tokens": 29120675 |
| }, |
| { |
| "epoch": 0.06442591020326624, |
| "grad_norm": 1.015625, |
| "learning_rate": 6.777343047694891e-06, |
| "loss": 0.4571, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 645, |
| "tokens_per_second_per_gpu": 17560.1, |
| "total_tokens": 29164085 |
| }, |
| { |
| "epoch": 0.06452579533536433, |
| "grad_norm": 1.03125, |
| "learning_rate": 6.744318455428436e-06, |
| "loss": 0.4452, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 646, |
| "tokens_per_second_per_gpu": 16974.5, |
| "total_tokens": 29205333 |
| }, |
| { |
| "epoch": 0.06462568046746242, |
| "grad_norm": 0.953125, |
| "learning_rate": 6.711333532614168e-06, |
| "loss": 0.4409, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 647, |
| "tokens_per_second_per_gpu": 18172.28, |
| "total_tokens": 29250861 |
| }, |
| { |
| "epoch": 0.06472556559956051, |
| "grad_norm": 1.1484375, |
| "learning_rate": 6.67838868116297e-06, |
| "loss": 0.4604, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 648, |
| "tokens_per_second_per_gpu": 17922.54, |
| "total_tokens": 29294375 |
| }, |
| { |
| "epoch": 0.06482545073165859, |
| "grad_norm": 0.9453125, |
| "learning_rate": 6.645484302497452e-06, |
| "loss": 0.4123, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 649, |
| "tokens_per_second_per_gpu": 19412.3, |
| "total_tokens": 29342090 |
| }, |
| { |
| "epoch": 0.06492533586375668, |
| "grad_norm": 0.98828125, |
| "learning_rate": 6.612620797547087e-06, |
| "loss": 0.4664, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 650, |
| "tokens_per_second_per_gpu": 20085.46, |
| "total_tokens": 29390943 |
| }, |
| { |
| "epoch": 0.06502522099585477, |
| "grad_norm": 0.953125, |
| "learning_rate": 6.579798566743314e-06, |
| "loss": 0.4643, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 651, |
| "tokens_per_second_per_gpu": 19639.71, |
| "total_tokens": 29438908 |
| }, |
| { |
| "epoch": 0.06512510612795286, |
| "grad_norm": 0.96875, |
| "learning_rate": 6.547018010014654e-06, |
| "loss": 0.5143, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 652, |
| "tokens_per_second_per_gpu": 20348.98, |
| "total_tokens": 29487963 |
| }, |
| { |
| "epoch": 0.06522499126005094, |
| "grad_norm": 1.140625, |
| "learning_rate": 6.5142795267818505e-06, |
| "loss": 0.4578, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 653, |
| "tokens_per_second_per_gpu": 21329.4, |
| "total_tokens": 29539358 |
| }, |
| { |
| "epoch": 0.06532487639214903, |
| "grad_norm": 0.9765625, |
| "learning_rate": 6.481583515952983e-06, |
| "loss": 0.4758, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 654, |
| "tokens_per_second_per_gpu": 20649.82, |
| "total_tokens": 29589170 |
| }, |
| { |
| "epoch": 0.06542476152424712, |
| "grad_norm": 1.0234375, |
| "learning_rate": 6.448930375918632e-06, |
| "loss": 0.4418, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 655, |
| "tokens_per_second_per_gpu": 15975.75, |
| "total_tokens": 29628635 |
| }, |
| { |
| "epoch": 0.0655246466563452, |
| "grad_norm": 0.94140625, |
| "learning_rate": 6.4163205045469975e-06, |
| "loss": 0.4991, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 656, |
| "tokens_per_second_per_gpu": 20504.51, |
| "total_tokens": 29680577 |
| }, |
| { |
| "epoch": 0.0656245317884433, |
| "grad_norm": 1.03125, |
| "learning_rate": 6.383754299179079e-06, |
| "loss": 0.4592, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 657, |
| "tokens_per_second_per_gpu": 17970.07, |
| "total_tokens": 29725176 |
| }, |
| { |
| "epoch": 0.06572441692054137, |
| "grad_norm": 0.99609375, |
| "learning_rate": 6.351232156623803e-06, |
| "loss": 0.4616, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 658, |
| "tokens_per_second_per_gpu": 18193.26, |
| "total_tokens": 29769698 |
| }, |
| { |
| "epoch": 0.06582430205263946, |
| "grad_norm": 1.0078125, |
| "learning_rate": 6.318754473153221e-06, |
| "loss": 0.4671, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 659, |
| "tokens_per_second_per_gpu": 18415.02, |
| "total_tokens": 29814583 |
| }, |
| { |
| "epoch": 0.06592418718473755, |
| "grad_norm": 0.9765625, |
| "learning_rate": 6.286321644497655e-06, |
| "loss": 0.4223, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 660, |
| "tokens_per_second_per_gpu": 17227.34, |
| "total_tokens": 29857378 |
| }, |
| { |
| "epoch": 0.06602407231683564, |
| "grad_norm": 0.92578125, |
| "learning_rate": 6.25393406584088e-06, |
| "loss": 0.392, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 661, |
| "tokens_per_second_per_gpu": 18097.88, |
| "total_tokens": 29901353 |
| }, |
| { |
| "epoch": 0.06612395744893372, |
| "grad_norm": 1.0703125, |
| "learning_rate": 6.22159213181533e-06, |
| "loss": 0.4632, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 662, |
| "tokens_per_second_per_gpu": 18450.85, |
| "total_tokens": 29946306 |
| }, |
| { |
| "epoch": 0.06622384258103181, |
| "grad_norm": 0.96875, |
| "learning_rate": 6.18929623649726e-06, |
| "loss": 0.4411, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 663, |
| "tokens_per_second_per_gpu": 17681.94, |
| "total_tokens": 29989951 |
| }, |
| { |
| "epoch": 0.0663237277131299, |
| "grad_norm": 0.9375, |
| "learning_rate": 6.157046773401964e-06, |
| "loss": 0.4539, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 664, |
| "tokens_per_second_per_gpu": 19703.0, |
| "total_tokens": 30038267 |
| }, |
| { |
| "epoch": 0.06642361284522799, |
| "grad_norm": 0.98046875, |
| "learning_rate": 6.124844135478971e-06, |
| "loss": 0.4563, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 665, |
| "tokens_per_second_per_gpu": 17008.48, |
| "total_tokens": 30081026 |
| }, |
| { |
| "epoch": 0.06652349797732607, |
| "grad_norm": 0.96484375, |
| "learning_rate": 6.092688715107265e-06, |
| "loss": 0.4441, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 666, |
| "tokens_per_second_per_gpu": 19151.18, |
| "total_tokens": 30127625 |
| }, |
| { |
| "epoch": 0.06662338310942416, |
| "grad_norm": 0.96484375, |
| "learning_rate": 6.06058090409049e-06, |
| "loss": 0.412, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 667, |
| "tokens_per_second_per_gpu": 16757.9, |
| "total_tokens": 30168887 |
| }, |
| { |
| "epoch": 0.06672326824152225, |
| "grad_norm": 0.9375, |
| "learning_rate": 6.028521093652195e-06, |
| "loss": 0.4656, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 668, |
| "tokens_per_second_per_gpu": 20171.11, |
| "total_tokens": 30218601 |
| }, |
| { |
| "epoch": 0.06682315337362034, |
| "grad_norm": 0.984375, |
| "learning_rate": 5.996509674431053e-06, |
| "loss": 0.4596, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 669, |
| "tokens_per_second_per_gpu": 18319.13, |
| "total_tokens": 30264663 |
| }, |
| { |
| "epoch": 0.06692303850571843, |
| "grad_norm": 0.98046875, |
| "learning_rate": 5.9645470364761e-06, |
| "loss": 0.4339, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 670, |
| "tokens_per_second_per_gpu": 17872.78, |
| "total_tokens": 30308631 |
| }, |
| { |
| "epoch": 0.0670229236378165, |
| "grad_norm": 0.95703125, |
| "learning_rate": 5.932633569242e-06, |
| "loss": 0.4562, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 671, |
| "tokens_per_second_per_gpu": 19672.95, |
| "total_tokens": 30355498 |
| }, |
| { |
| "epoch": 0.0671228087699146, |
| "grad_norm": 0.95703125, |
| "learning_rate": 5.900769661584273e-06, |
| "loss": 0.4612, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 672, |
| "tokens_per_second_per_gpu": 19593.86, |
| "total_tokens": 30403625 |
| }, |
| { |
| "epoch": 0.06722269390201269, |
| "grad_norm": 0.96484375, |
| "learning_rate": 5.868955701754584e-06, |
| "loss": 0.485, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 673, |
| "tokens_per_second_per_gpu": 21161.74, |
| "total_tokens": 30455157 |
| }, |
| { |
| "epoch": 0.06732257903411078, |
| "grad_norm": 0.984375, |
| "learning_rate": 5.83719207739599e-06, |
| "loss": 0.4095, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 674, |
| "tokens_per_second_per_gpu": 17658.46, |
| "total_tokens": 30498641 |
| }, |
| { |
| "epoch": 0.06742246416620885, |
| "grad_norm": 0.984375, |
| "learning_rate": 5.8054791755382286e-06, |
| "loss": 0.4319, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 675, |
| "tokens_per_second_per_gpu": 17117.92, |
| "total_tokens": 30540987 |
| }, |
| { |
| "epoch": 0.06752234929830694, |
| "grad_norm": 1.046875, |
| "learning_rate": 5.773817382593008e-06, |
| "loss": 0.4071, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 676, |
| "tokens_per_second_per_gpu": 14920.45, |
| "total_tokens": 30578470 |
| }, |
| { |
| "epoch": 0.06762223443040503, |
| "grad_norm": 1.265625, |
| "learning_rate": 5.742207084349274e-06, |
| "loss": 0.4675, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 677, |
| "tokens_per_second_per_gpu": 18108.81, |
| "total_tokens": 30623728 |
| }, |
| { |
| "epoch": 0.06772211956250312, |
| "grad_norm": 1.0078125, |
| "learning_rate": 5.710648665968543e-06, |
| "loss": 0.4302, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 678, |
| "tokens_per_second_per_gpu": 16796.52, |
| "total_tokens": 30665187 |
| }, |
| { |
| "epoch": 0.06782200469460122, |
| "grad_norm": 0.921875, |
| "learning_rate": 5.679142511980176e-06, |
| "loss": 0.4784, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 679, |
| "tokens_per_second_per_gpu": 20095.37, |
| "total_tokens": 30715056 |
| }, |
| { |
| "epoch": 0.06792188982669929, |
| "grad_norm": 1.0078125, |
| "learning_rate": 5.647689006276727e-06, |
| "loss": 0.4255, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 680, |
| "tokens_per_second_per_gpu": 16664.02, |
| "total_tokens": 30755522 |
| }, |
| { |
| "epoch": 0.06802177495879738, |
| "grad_norm": 1.0625, |
| "learning_rate": 5.616288532109225e-06, |
| "loss": 0.4976, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 681, |
| "tokens_per_second_per_gpu": 18576.48, |
| "total_tokens": 30801413 |
| }, |
| { |
| "epoch": 0.06812166009089547, |
| "grad_norm": 0.9765625, |
| "learning_rate": 5.584941472082549e-06, |
| "loss": 0.4152, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 682, |
| "tokens_per_second_per_gpu": 17569.33, |
| "total_tokens": 30844380 |
| }, |
| { |
| "epoch": 0.06822154522299356, |
| "grad_norm": 0.9765625, |
| "learning_rate": 5.553648208150728e-06, |
| "loss": 0.4134, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 683, |
| "tokens_per_second_per_gpu": 17369.95, |
| "total_tokens": 30887278 |
| }, |
| { |
| "epoch": 0.06832143035509164, |
| "grad_norm": 1.0078125, |
| "learning_rate": 5.522409121612304e-06, |
| "loss": 0.4959, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 684, |
| "tokens_per_second_per_gpu": 20798.58, |
| "total_tokens": 30937699 |
| }, |
| { |
| "epoch": 0.06842131548718973, |
| "grad_norm": 0.98046875, |
| "learning_rate": 5.491224593105695e-06, |
| "loss": 0.5075, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 685, |
| "tokens_per_second_per_gpu": 19341.32, |
| "total_tokens": 30985491 |
| }, |
| { |
| "epoch": 0.06852120061928782, |
| "grad_norm": 0.9375, |
| "learning_rate": 5.460095002604533e-06, |
| "loss": 0.4622, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 686, |
| "tokens_per_second_per_gpu": 20051.25, |
| "total_tokens": 31034575 |
| }, |
| { |
| "epoch": 0.06862108575138591, |
| "grad_norm": 0.9375, |
| "learning_rate": 5.429020729413062e-06, |
| "loss": 0.4696, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 687, |
| "tokens_per_second_per_gpu": 19922.24, |
| "total_tokens": 31084550 |
| }, |
| { |
| "epoch": 0.06872097088348399, |
| "grad_norm": 0.90625, |
| "learning_rate": 5.398002152161484e-06, |
| "loss": 0.4259, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 688, |
| "tokens_per_second_per_gpu": 19415.35, |
| "total_tokens": 31132179 |
| }, |
| { |
| "epoch": 0.06882085601558208, |
| "grad_norm": 1.015625, |
| "learning_rate": 5.367039648801386e-06, |
| "loss": 0.4633, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 689, |
| "tokens_per_second_per_gpu": 16543.47, |
| "total_tokens": 31173186 |
| }, |
| { |
| "epoch": 0.06892074114768017, |
| "grad_norm": 0.98046875, |
| "learning_rate": 5.336133596601089e-06, |
| "loss": 0.4457, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 690, |
| "tokens_per_second_per_gpu": 17956.14, |
| "total_tokens": 31217511 |
| }, |
| { |
| "epoch": 0.06902062627977826, |
| "grad_norm": 0.9765625, |
| "learning_rate": 5.305284372141095e-06, |
| "loss": 0.4806, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 691, |
| "tokens_per_second_per_gpu": 20201.6, |
| "total_tokens": 31266754 |
| }, |
| { |
| "epoch": 0.06912051141187635, |
| "grad_norm": 1.1796875, |
| "learning_rate": 5.274492351309462e-06, |
| "loss": 0.4487, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 692, |
| "tokens_per_second_per_gpu": 17138.32, |
| "total_tokens": 31310363 |
| }, |
| { |
| "epoch": 0.06922039654397442, |
| "grad_norm": 0.9375, |
| "learning_rate": 5.243757909297247e-06, |
| "loss": 0.409, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 693, |
| "tokens_per_second_per_gpu": 19496.06, |
| "total_tokens": 31357301 |
| }, |
| { |
| "epoch": 0.06932028167607251, |
| "grad_norm": 0.9140625, |
| "learning_rate": 5.213081420593933e-06, |
| "loss": 0.4808, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 694, |
| "tokens_per_second_per_gpu": 21254.7, |
| "total_tokens": 31408105 |
| }, |
| { |
| "epoch": 0.0694201668081706, |
| "grad_norm": 1.015625, |
| "learning_rate": 5.1824632589828465e-06, |
| "loss": 0.4155, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 695, |
| "tokens_per_second_per_gpu": 15527.07, |
| "total_tokens": 31447034 |
| }, |
| { |
| "epoch": 0.0695200519402687, |
| "grad_norm": 1.0, |
| "learning_rate": 5.151903797536631e-06, |
| "loss": 0.4171, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 696, |
| "tokens_per_second_per_gpu": 16471.4, |
| "total_tokens": 31486678 |
| }, |
| { |
| "epoch": 0.06961993707236677, |
| "grad_norm": 0.9921875, |
| "learning_rate": 5.121403408612672e-06, |
| "loss": 0.4642, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 697, |
| "tokens_per_second_per_gpu": 19309.6, |
| "total_tokens": 31533685 |
| }, |
| { |
| "epoch": 0.06971982220446486, |
| "grad_norm": 1.0234375, |
| "learning_rate": 5.090962463848592e-06, |
| "loss": 0.4628, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 698, |
| "tokens_per_second_per_gpu": 18263.08, |
| "total_tokens": 31578085 |
| }, |
| { |
| "epoch": 0.06981970733656295, |
| "grad_norm": 0.96484375, |
| "learning_rate": 5.060581334157693e-06, |
| "loss": 0.4488, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 699, |
| "tokens_per_second_per_gpu": 19174.1, |
| "total_tokens": 31624026 |
| }, |
| { |
| "epoch": 0.06991959246866104, |
| "grad_norm": 1.015625, |
| "learning_rate": 5.030260389724447e-06, |
| "loss": 0.4883, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 700, |
| "tokens_per_second_per_gpu": 20237.01, |
| "total_tokens": 31672429 |
| }, |
| { |
| "epoch": 0.07001947760075913, |
| "grad_norm": 0.96484375, |
| "learning_rate": 5.000000000000003e-06, |
| "loss": 0.4821, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 701, |
| "tokens_per_second_per_gpu": 19608.0, |
| "total_tokens": 31720678 |
| }, |
| { |
| "epoch": 0.07011936273285721, |
| "grad_norm": 0.99609375, |
| "learning_rate": 4.96980053369765e-06, |
| "loss": 0.4297, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 702, |
| "tokens_per_second_per_gpu": 16657.01, |
| "total_tokens": 31762205 |
| }, |
| { |
| "epoch": 0.0702192478649553, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.939662358788364e-06, |
| "loss": 0.3997, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 703, |
| "tokens_per_second_per_gpu": 17215.9, |
| "total_tokens": 31805553 |
| }, |
| { |
| "epoch": 0.07031913299705339, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.909585842496287e-06, |
| "loss": 0.417, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 704, |
| "tokens_per_second_per_gpu": 18258.52, |
| "total_tokens": 31849565 |
| }, |
| { |
| "epoch": 0.07041901812915148, |
| "grad_norm": 1.0546875, |
| "learning_rate": 4.879571351294287e-06, |
| "loss": 0.4359, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 705, |
| "tokens_per_second_per_gpu": 16860.81, |
| "total_tokens": 31890442 |
| }, |
| { |
| "epoch": 0.07051890326124956, |
| "grad_norm": 1.0, |
| "learning_rate": 4.849619250899458e-06, |
| "loss": 0.4084, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 706, |
| "tokens_per_second_per_gpu": 16599.91, |
| "total_tokens": 31931257 |
| }, |
| { |
| "epoch": 0.07061878839334765, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.8197299062687e-06, |
| "loss": 0.4197, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 707, |
| "tokens_per_second_per_gpu": 18363.15, |
| "total_tokens": 31976058 |
| }, |
| { |
| "epoch": 0.07071867352544574, |
| "grad_norm": 1.2421875, |
| "learning_rate": 4.78990368159424e-06, |
| "loss": 0.432, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 708, |
| "tokens_per_second_per_gpu": 18281.44, |
| "total_tokens": 32020676 |
| }, |
| { |
| "epoch": 0.07081855865754383, |
| "grad_norm": 1.15625, |
| "learning_rate": 4.76014094029921e-06, |
| "loss": 0.4821, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 709, |
| "tokens_per_second_per_gpu": 17574.88, |
| "total_tokens": 32064101 |
| }, |
| { |
| "epoch": 0.0709184437896419, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.7304420450332244e-06, |
| "loss": 0.4496, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 710, |
| "tokens_per_second_per_gpu": 17647.12, |
| "total_tokens": 32107600 |
| }, |
| { |
| "epoch": 0.07101832892174, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.700807357667953e-06, |
| "loss": 0.507, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 711, |
| "tokens_per_second_per_gpu": 21211.22, |
| "total_tokens": 32158910 |
| }, |
| { |
| "epoch": 0.07111821405383809, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.671237239292699e-06, |
| "loss": 0.3837, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 712, |
| "tokens_per_second_per_gpu": 17423.28, |
| "total_tokens": 32202315 |
| }, |
| { |
| "epoch": 0.07121809918593618, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.641732050210032e-06, |
| "loss": 0.4286, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 713, |
| "tokens_per_second_per_gpu": 17394.81, |
| "total_tokens": 32245312 |
| }, |
| { |
| "epoch": 0.07131798431803427, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.612292149931369e-06, |
| "loss": 0.3988, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 714, |
| "tokens_per_second_per_gpu": 16734.28, |
| "total_tokens": 32286805 |
| }, |
| { |
| "epoch": 0.07141786945013234, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.582917897172603e-06, |
| "loss": 0.4663, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 715, |
| "tokens_per_second_per_gpu": 19969.21, |
| "total_tokens": 32336588 |
| }, |
| { |
| "epoch": 0.07151775458223043, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.5536096498497295e-06, |
| "loss": 0.462, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 716, |
| "tokens_per_second_per_gpu": 17908.97, |
| "total_tokens": 32379697 |
| }, |
| { |
| "epoch": 0.07161763971432852, |
| "grad_norm": 0.9140625, |
| "learning_rate": 4.524367765074499e-06, |
| "loss": 0.4429, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 717, |
| "tokens_per_second_per_gpu": 20401.38, |
| "total_tokens": 32430056 |
| }, |
| { |
| "epoch": 0.07171752484642661, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.495192599150045e-06, |
| "loss": 0.4642, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 718, |
| "tokens_per_second_per_gpu": 16419.87, |
| "total_tokens": 32470376 |
| }, |
| { |
| "epoch": 0.07181740997852469, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.46608450756656e-06, |
| "loss": 0.4466, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 719, |
| "tokens_per_second_per_gpu": 19404.36, |
| "total_tokens": 32518031 |
| }, |
| { |
| "epoch": 0.07191729511062278, |
| "grad_norm": 0.99609375, |
| "learning_rate": 4.437043844996952e-06, |
| "loss": 0.4127, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 720, |
| "tokens_per_second_per_gpu": 15533.69, |
| "total_tokens": 32557117 |
| }, |
| { |
| "epoch": 0.07201718024272087, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.408070965292534e-06, |
| "loss": 0.4562, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 721, |
| "tokens_per_second_per_gpu": 17043.35, |
| "total_tokens": 32598621 |
| }, |
| { |
| "epoch": 0.07211706537481896, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.379166221478697e-06, |
| "loss": 0.4657, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 722, |
| "tokens_per_second_per_gpu": 18217.55, |
| "total_tokens": 32642986 |
| }, |
| { |
| "epoch": 0.07221695050691705, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.350329965750622e-06, |
| "loss": 0.4085, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 723, |
| "tokens_per_second_per_gpu": 17567.62, |
| "total_tokens": 32685773 |
| }, |
| { |
| "epoch": 0.07231683563901513, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.321562549468991e-06, |
| "loss": 0.4135, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 724, |
| "tokens_per_second_per_gpu": 16458.0, |
| "total_tokens": 32726348 |
| }, |
| { |
| "epoch": 0.07241672077111322, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.292864323155684e-06, |
| "loss": 0.4623, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 725, |
| "tokens_per_second_per_gpu": 18086.79, |
| "total_tokens": 32771186 |
| }, |
| { |
| "epoch": 0.07251660590321131, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.264235636489542e-06, |
| "loss": 0.5004, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 726, |
| "tokens_per_second_per_gpu": 20477.83, |
| "total_tokens": 32821377 |
| }, |
| { |
| "epoch": 0.0726164910353094, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.235676838302069e-06, |
| "loss": 0.4673, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 727, |
| "tokens_per_second_per_gpu": 19149.83, |
| "total_tokens": 32868608 |
| }, |
| { |
| "epoch": 0.07271637616740748, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.207188276573214e-06, |
| "loss": 0.441, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 728, |
| "tokens_per_second_per_gpu": 17952.4, |
| "total_tokens": 32912104 |
| }, |
| { |
| "epoch": 0.07281626129950557, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.178770298427107e-06, |
| "loss": 0.4795, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 729, |
| "tokens_per_second_per_gpu": 18644.2, |
| "total_tokens": 32958514 |
| }, |
| { |
| "epoch": 0.07291614643160366, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.150423250127846e-06, |
| "loss": 0.4647, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 730, |
| "tokens_per_second_per_gpu": 17645.44, |
| "total_tokens": 33002672 |
| }, |
| { |
| "epoch": 0.07301603156370175, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.12214747707527e-06, |
| "loss": 0.4294, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 731, |
| "tokens_per_second_per_gpu": 17261.14, |
| "total_tokens": 33045478 |
| }, |
| { |
| "epoch": 0.07311591669579982, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.093943323800746e-06, |
| "loss": 0.4318, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 732, |
| "tokens_per_second_per_gpu": 18671.66, |
| "total_tokens": 33092420 |
| }, |
| { |
| "epoch": 0.07321580182789791, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.065811133962987e-06, |
| "loss": 0.4281, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 733, |
| "tokens_per_second_per_gpu": 18318.77, |
| "total_tokens": 33137357 |
| }, |
| { |
| "epoch": 0.073315686959996, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.037751250343841e-06, |
| "loss": 0.3983, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 734, |
| "tokens_per_second_per_gpu": 16528.32, |
| "total_tokens": 33178233 |
| }, |
| { |
| "epoch": 0.0734155720920941, |
| "grad_norm": 1.6015625, |
| "learning_rate": 4.009764014844143e-06, |
| "loss": 0.4226, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 735, |
| "tokens_per_second_per_gpu": 18004.58, |
| "total_tokens": 33222630 |
| }, |
| { |
| "epoch": 0.07351545722419218, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.981849768479516e-06, |
| "loss": 0.4552, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 736, |
| "tokens_per_second_per_gpu": 18603.91, |
| "total_tokens": 33268228 |
| }, |
| { |
| "epoch": 0.07361534235629026, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.954008851376252e-06, |
| "loss": 0.4387, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 737, |
| "tokens_per_second_per_gpu": 19583.86, |
| "total_tokens": 33315124 |
| }, |
| { |
| "epoch": 0.07371522748838835, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.9262416027671354e-06, |
| "loss": 0.4538, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 738, |
| "tokens_per_second_per_gpu": 16932.53, |
| "total_tokens": 33357686 |
| }, |
| { |
| "epoch": 0.07381511262048644, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.898548360987325e-06, |
| "loss": 0.4558, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 739, |
| "tokens_per_second_per_gpu": 19851.47, |
| "total_tokens": 33405913 |
| }, |
| { |
| "epoch": 0.07391499775258453, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.8709294634702374e-06, |
| "loss": 0.4522, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 740, |
| "tokens_per_second_per_gpu": 19943.32, |
| "total_tokens": 33452722 |
| }, |
| { |
| "epoch": 0.07401488288468261, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.8433852467434175e-06, |
| "loss": 0.4498, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 741, |
| "tokens_per_second_per_gpu": 17826.87, |
| "total_tokens": 33496275 |
| }, |
| { |
| "epoch": 0.0741147680167807, |
| "grad_norm": 1.0, |
| "learning_rate": 3.81591604642446e-06, |
| "loss": 0.4784, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 742, |
| "tokens_per_second_per_gpu": 19558.59, |
| "total_tokens": 33543831 |
| }, |
| { |
| "epoch": 0.07421465314887879, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.7885221972168974e-06, |
| "loss": 0.4276, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 743, |
| "tokens_per_second_per_gpu": 16763.51, |
| "total_tokens": 33584229 |
| }, |
| { |
| "epoch": 0.07431453828097688, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.7612040329061405e-06, |
| "loss": 0.4796, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 744, |
| "tokens_per_second_per_gpu": 19724.55, |
| "total_tokens": 33633472 |
| }, |
| { |
| "epoch": 0.07441442341307497, |
| "grad_norm": 0.953125, |
| "learning_rate": 3.7339618863553983e-06, |
| "loss": 0.4666, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 745, |
| "tokens_per_second_per_gpu": 19000.4, |
| "total_tokens": 33680629 |
| }, |
| { |
| "epoch": 0.07451430854517305, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.7067960895016277e-06, |
| "loss": 0.4235, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 746, |
| "tokens_per_second_per_gpu": 19174.22, |
| "total_tokens": 33726158 |
| }, |
| { |
| "epoch": 0.07461419367727114, |
| "grad_norm": 0.90625, |
| "learning_rate": 3.679706973351491e-06, |
| "loss": 0.4475, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 747, |
| "tokens_per_second_per_gpu": 19844.29, |
| "total_tokens": 33775074 |
| }, |
| { |
| "epoch": 0.07471407880936923, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.6526948679773256e-06, |
| "loss": 0.4764, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 748, |
| "tokens_per_second_per_gpu": 18010.15, |
| "total_tokens": 33819103 |
| }, |
| { |
| "epoch": 0.07481396394146732, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.625760102513103e-06, |
| "loss": 0.4354, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 749, |
| "tokens_per_second_per_gpu": 18519.92, |
| "total_tokens": 33864876 |
| }, |
| { |
| "epoch": 0.0749138490735654, |
| "grad_norm": 0.984375, |
| "learning_rate": 3.598903005150444e-06, |
| "loss": 0.4649, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 750, |
| "tokens_per_second_per_gpu": 20241.23, |
| "total_tokens": 33913338 |
| }, |
| { |
| "epoch": 0.07501373420566348, |
| "grad_norm": 0.921875, |
| "learning_rate": 3.5721239031346067e-06, |
| "loss": 0.4566, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 751, |
| "tokens_per_second_per_gpu": 19538.75, |
| "total_tokens": 33960877 |
| }, |
| { |
| "epoch": 0.07511361933776158, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.545423122760493e-06, |
| "loss": 0.4134, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 752, |
| "tokens_per_second_per_gpu": 18120.67, |
| "total_tokens": 34005425 |
| }, |
| { |
| "epoch": 0.07521350446985967, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.5188009893686916e-06, |
| "loss": 0.4733, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 753, |
| "tokens_per_second_per_gpu": 20333.77, |
| "total_tokens": 34055315 |
| }, |
| { |
| "epoch": 0.07531338960195774, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.492257827341492e-06, |
| "loss": 0.4752, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 754, |
| "tokens_per_second_per_gpu": 18882.58, |
| "total_tokens": 34101132 |
| }, |
| { |
| "epoch": 0.07541327473405583, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.4657939600989453e-06, |
| "loss": 0.4387, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 755, |
| "tokens_per_second_per_gpu": 16251.93, |
| "total_tokens": 34141329 |
| }, |
| { |
| "epoch": 0.07551315986615392, |
| "grad_norm": 0.984375, |
| "learning_rate": 3.4394097100949286e-06, |
| "loss": 0.4814, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 756, |
| "tokens_per_second_per_gpu": 19121.99, |
| "total_tokens": 34187810 |
| }, |
| { |
| "epoch": 0.07561304499825201, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.4131053988131947e-06, |
| "loss": 0.4285, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 757, |
| "tokens_per_second_per_gpu": 15534.14, |
| "total_tokens": 34226967 |
| }, |
| { |
| "epoch": 0.0757129301303501, |
| "grad_norm": 0.9296875, |
| "learning_rate": 3.3868813467634833e-06, |
| "loss": 0.5004, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 758, |
| "tokens_per_second_per_gpu": 20540.89, |
| "total_tokens": 34278318 |
| }, |
| { |
| "epoch": 0.07581281526244818, |
| "grad_norm": 0.92578125, |
| "learning_rate": 3.360737873477584e-06, |
| "loss": 0.478, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 759, |
| "tokens_per_second_per_gpu": 18996.35, |
| "total_tokens": 34326382 |
| }, |
| { |
| "epoch": 0.07591270039454627, |
| "grad_norm": 0.92578125, |
| "learning_rate": 3.3346752975054763e-06, |
| "loss": 0.4768, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 760, |
| "tokens_per_second_per_gpu": 20186.09, |
| "total_tokens": 34376644 |
| }, |
| { |
| "epoch": 0.07601258552664436, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.308693936411421e-06, |
| "loss": 0.4029, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 761, |
| "tokens_per_second_per_gpu": 16115.93, |
| "total_tokens": 34416708 |
| }, |
| { |
| "epoch": 0.07611247065874245, |
| "grad_norm": 1.0, |
| "learning_rate": 3.2827941067700996e-06, |
| "loss": 0.4444, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 762, |
| "tokens_per_second_per_gpu": 16823.28, |
| "total_tokens": 34458692 |
| }, |
| { |
| "epoch": 0.07621235579084053, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.2569761241627694e-06, |
| "loss": 0.468, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 763, |
| "tokens_per_second_per_gpu": 19727.4, |
| "total_tokens": 34506469 |
| }, |
| { |
| "epoch": 0.07631224092293862, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.2312403031733943e-06, |
| "loss": 0.4906, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 764, |
| "tokens_per_second_per_gpu": 19245.83, |
| "total_tokens": 34553520 |
| }, |
| { |
| "epoch": 0.07641212605503671, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.2055869573848374e-06, |
| "loss": 0.4225, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 765, |
| "tokens_per_second_per_gpu": 17663.42, |
| "total_tokens": 34595781 |
| }, |
| { |
| "epoch": 0.0765120111871348, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.1800163993750166e-06, |
| "loss": 0.469, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 766, |
| "tokens_per_second_per_gpu": 18474.22, |
| "total_tokens": 34640570 |
| }, |
| { |
| "epoch": 0.07661189631923289, |
| "grad_norm": 0.92578125, |
| "learning_rate": 3.1545289407131128e-06, |
| "loss": 0.4653, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 767, |
| "tokens_per_second_per_gpu": 19435.84, |
| "total_tokens": 34687529 |
| }, |
| { |
| "epoch": 0.07671178145133097, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.1291248919557717e-06, |
| "loss": 0.4393, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 768, |
| "tokens_per_second_per_gpu": 18703.97, |
| "total_tokens": 34734118 |
| }, |
| { |
| "epoch": 0.07681166658342906, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.103804562643302e-06, |
| "loss": 0.4458, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 769, |
| "tokens_per_second_per_gpu": 19402.21, |
| "total_tokens": 34781930 |
| }, |
| { |
| "epoch": 0.07691155171552715, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.0785682612959334e-06, |
| "loss": 0.4941, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 770, |
| "tokens_per_second_per_gpu": 19548.47, |
| "total_tokens": 34830220 |
| }, |
| { |
| "epoch": 0.07701143684762524, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.0534162954100264e-06, |
| "loss": 0.4643, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 771, |
| "tokens_per_second_per_gpu": 17562.18, |
| "total_tokens": 34874221 |
| }, |
| { |
| "epoch": 0.07711132197972331, |
| "grad_norm": 0.93359375, |
| "learning_rate": 3.028348971454356e-06, |
| "loss": 0.4263, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 772, |
| "tokens_per_second_per_gpu": 18395.88, |
| "total_tokens": 34919409 |
| }, |
| { |
| "epoch": 0.0772112071118214, |
| "grad_norm": 0.93359375, |
| "learning_rate": 3.003366594866345e-06, |
| "loss": 0.4913, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 773, |
| "tokens_per_second_per_gpu": 20334.84, |
| "total_tokens": 34968740 |
| }, |
| { |
| "epoch": 0.0773110922439195, |
| "grad_norm": 0.92578125, |
| "learning_rate": 2.978469470048376e-06, |
| "loss": 0.4351, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 774, |
| "tokens_per_second_per_gpu": 19515.04, |
| "total_tokens": 35015775 |
| }, |
| { |
| "epoch": 0.07741097737601758, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.953657900364053e-06, |
| "loss": 0.4445, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 775, |
| "tokens_per_second_per_gpu": 18606.56, |
| "total_tokens": 35061754 |
| }, |
| { |
| "epoch": 0.07751086250811566, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.9289321881345257e-06, |
| "loss": 0.4226, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 776, |
| "tokens_per_second_per_gpu": 17852.59, |
| "total_tokens": 35106349 |
| }, |
| { |
| "epoch": 0.07761074764021375, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.9042926346347932e-06, |
| "loss": 0.4751, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 777, |
| "tokens_per_second_per_gpu": 20021.82, |
| "total_tokens": 35155865 |
| }, |
| { |
| "epoch": 0.07771063277231184, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.8797395400900362e-06, |
| "loss": 0.4197, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 778, |
| "tokens_per_second_per_gpu": 18516.27, |
| "total_tokens": 35200487 |
| }, |
| { |
| "epoch": 0.07781051790440993, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.855273203671969e-06, |
| "loss": 0.472, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 779, |
| "tokens_per_second_per_gpu": 20415.64, |
| "total_tokens": 35249366 |
| }, |
| { |
| "epoch": 0.07791040303650802, |
| "grad_norm": 0.96484375, |
| "learning_rate": 2.830893923495173e-06, |
| "loss": 0.444, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 780, |
| "tokens_per_second_per_gpu": 18200.26, |
| "total_tokens": 35294028 |
| }, |
| { |
| "epoch": 0.0780102881686061, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.8066019966134907e-06, |
| "loss": 0.4302, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 781, |
| "tokens_per_second_per_gpu": 17184.21, |
| "total_tokens": 35336675 |
| }, |
| { |
| "epoch": 0.07811017330070419, |
| "grad_norm": 0.9375, |
| "learning_rate": 2.7823977190163788e-06, |
| "loss": 0.4132, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 782, |
| "tokens_per_second_per_gpu": 18695.96, |
| "total_tokens": 35381758 |
| }, |
| { |
| "epoch": 0.07821005843280228, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.7582813856253276e-06, |
| "loss": 0.4294, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 783, |
| "tokens_per_second_per_gpu": 18864.72, |
| "total_tokens": 35427540 |
| }, |
| { |
| "epoch": 0.07830994356490037, |
| "grad_norm": 0.94140625, |
| "learning_rate": 2.7342532902902418e-06, |
| "loss": 0.443, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 784, |
| "tokens_per_second_per_gpu": 18334.86, |
| "total_tokens": 35473265 |
| }, |
| { |
| "epoch": 0.07840982869699845, |
| "grad_norm": 0.94140625, |
| "learning_rate": 2.7103137257858867e-06, |
| "loss": 0.4663, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 785, |
| "tokens_per_second_per_gpu": 20488.92, |
| "total_tokens": 35522599 |
| }, |
| { |
| "epoch": 0.07850971382909654, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.6864629838082957e-06, |
| "loss": 0.486, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 786, |
| "tokens_per_second_per_gpu": 19238.21, |
| "total_tokens": 35569885 |
| }, |
| { |
| "epoch": 0.07860959896119463, |
| "grad_norm": 0.94140625, |
| "learning_rate": 2.6627013549712355e-06, |
| "loss": 0.4398, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 787, |
| "tokens_per_second_per_gpu": 20147.3, |
| "total_tokens": 35618490 |
| }, |
| { |
| "epoch": 0.07870948409329272, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.639029128802657e-06, |
| "loss": 0.4521, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 788, |
| "tokens_per_second_per_gpu": 19362.36, |
| "total_tokens": 35664805 |
| }, |
| { |
| "epoch": 0.07880936922539081, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.615446593741161e-06, |
| "loss": 0.4338, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 789, |
| "tokens_per_second_per_gpu": 17103.85, |
| "total_tokens": 35707639 |
| }, |
| { |
| "epoch": 0.07890925435748888, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.5919540371325005e-06, |
| "loss": 0.4768, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 790, |
| "tokens_per_second_per_gpu": 16934.67, |
| "total_tokens": 35749944 |
| }, |
| { |
| "epoch": 0.07900913948958697, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.5685517452260566e-06, |
| "loss": 0.486, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 791, |
| "tokens_per_second_per_gpu": 19332.58, |
| "total_tokens": 35797939 |
| }, |
| { |
| "epoch": 0.07910902462168506, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.5452400031713786e-06, |
| "loss": 0.4516, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 792, |
| "tokens_per_second_per_gpu": 16832.4, |
| "total_tokens": 35839936 |
| }, |
| { |
| "epoch": 0.07920890975378315, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.522019095014683e-06, |
| "loss": 0.481, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 793, |
| "tokens_per_second_per_gpu": 20197.47, |
| "total_tokens": 35888927 |
| }, |
| { |
| "epoch": 0.07930879488588123, |
| "grad_norm": 0.96484375, |
| "learning_rate": 2.4988893036954045e-06, |
| "loss": 0.4085, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 794, |
| "tokens_per_second_per_gpu": 16791.69, |
| "total_tokens": 35930456 |
| }, |
| { |
| "epoch": 0.07940868001797932, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.4758509110427576e-06, |
| "loss": 0.4307, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 795, |
| "tokens_per_second_per_gpu": 18229.64, |
| "total_tokens": 35975399 |
| }, |
| { |
| "epoch": 0.07950856515007741, |
| "grad_norm": 0.9296875, |
| "learning_rate": 2.45290419777228e-06, |
| "loss": 0.4409, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 796, |
| "tokens_per_second_per_gpu": 19402.09, |
| "total_tokens": 36022487 |
| }, |
| { |
| "epoch": 0.0796084502821755, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.4300494434824373e-06, |
| "loss": 0.4787, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 797, |
| "tokens_per_second_per_gpu": 18897.54, |
| "total_tokens": 36067845 |
| }, |
| { |
| "epoch": 0.07970833541427358, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.407286926651192e-06, |
| "loss": 0.4174, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 798, |
| "tokens_per_second_per_gpu": 17302.79, |
| "total_tokens": 36110406 |
| }, |
| { |
| "epoch": 0.07980822054637167, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.3846169246326345e-06, |
| "loss": 0.4533, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 799, |
| "tokens_per_second_per_gpu": 18365.7, |
| "total_tokens": 36154887 |
| }, |
| { |
| "epoch": 0.07990810567846976, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.362039713653581e-06, |
| "loss": 0.4402, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 800, |
| "tokens_per_second_per_gpu": 17734.24, |
| "total_tokens": 36199208 |
| }, |
| { |
| "epoch": 0.08000799081056785, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.339555568810221e-06, |
| "loss": 0.448, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 801, |
| "tokens_per_second_per_gpu": 16035.94, |
| "total_tokens": 36238446 |
| }, |
| { |
| "epoch": 0.08010787594266594, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.317164764064769e-06, |
| "loss": 0.4579, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 802, |
| "tokens_per_second_per_gpu": 18881.91, |
| "total_tokens": 36285159 |
| }, |
| { |
| "epoch": 0.08020776107476402, |
| "grad_norm": 1.0, |
| "learning_rate": 2.2948675722421086e-06, |
| "loss": 0.4404, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 803, |
| "tokens_per_second_per_gpu": 16918.36, |
| "total_tokens": 36327565 |
| }, |
| { |
| "epoch": 0.08030764620686211, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.27266426502649e-06, |
| "loss": 0.4996, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 804, |
| "tokens_per_second_per_gpu": 19541.82, |
| "total_tokens": 36375461 |
| }, |
| { |
| "epoch": 0.0804075313389602, |
| "grad_norm": 1.0, |
| "learning_rate": 2.2505551129582047e-06, |
| "loss": 0.5001, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 805, |
| "tokens_per_second_per_gpu": 19301.34, |
| "total_tokens": 36422902 |
| }, |
| { |
| "epoch": 0.08050741647105829, |
| "grad_norm": 0.921875, |
| "learning_rate": 2.2285403854302912e-06, |
| "loss": 0.4725, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 806, |
| "tokens_per_second_per_gpu": 20864.66, |
| "total_tokens": 36473161 |
| }, |
| { |
| "epoch": 0.08060730160315636, |
| "grad_norm": 0.93359375, |
| "learning_rate": 2.206620350685257e-06, |
| "loss": 0.4983, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 807, |
| "tokens_per_second_per_gpu": 20616.17, |
| "total_tokens": 36523271 |
| }, |
| { |
| "epoch": 0.08070718673525445, |
| "grad_norm": 0.9375, |
| "learning_rate": 2.1847952758118118e-06, |
| "loss": 0.4219, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 808, |
| "tokens_per_second_per_gpu": 18241.16, |
| "total_tokens": 36568183 |
| }, |
| { |
| "epoch": 0.08080707186735255, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.163065426741603e-06, |
| "loss": 0.4266, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 809, |
| "tokens_per_second_per_gpu": 16330.06, |
| "total_tokens": 36608756 |
| }, |
| { |
| "epoch": 0.08090695699945064, |
| "grad_norm": 0.9296875, |
| "learning_rate": 2.1414310682459805e-06, |
| "loss": 0.4323, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 810, |
| "tokens_per_second_per_gpu": 20630.5, |
| "total_tokens": 36657416 |
| }, |
| { |
| "epoch": 0.08100684213154873, |
| "grad_norm": 1.0390625, |
| "learning_rate": 2.119892463932781e-06, |
| "loss": 0.4131, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 811, |
| "tokens_per_second_per_gpu": 14407.44, |
| "total_tokens": 36693932 |
| }, |
| { |
| "epoch": 0.0811067272636468, |
| "grad_norm": 0.93359375, |
| "learning_rate": 2.098449876243096e-06, |
| "loss": 0.4454, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 812, |
| "tokens_per_second_per_gpu": 19878.7, |
| "total_tokens": 36742759 |
| }, |
| { |
| "epoch": 0.08120661239574489, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.0771035664480944e-06, |
| "loss": 0.416, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 813, |
| "tokens_per_second_per_gpu": 17948.12, |
| "total_tokens": 36786121 |
| }, |
| { |
| "epoch": 0.08130649752784298, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.0558537946458177e-06, |
| "loss": 0.4597, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 814, |
| "tokens_per_second_per_gpu": 19428.04, |
| "total_tokens": 36833543 |
| }, |
| { |
| "epoch": 0.08140638265994107, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.0347008197580376e-06, |
| "loss": 0.4773, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 815, |
| "tokens_per_second_per_gpu": 16797.44, |
| "total_tokens": 36875562 |
| }, |
| { |
| "epoch": 0.08150626779203915, |
| "grad_norm": 0.91015625, |
| "learning_rate": 2.013644899527074e-06, |
| "loss": 0.3689, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 816, |
| "tokens_per_second_per_gpu": 17852.4, |
| "total_tokens": 36920264 |
| }, |
| { |
| "epoch": 0.08160615292413724, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.9926862905126663e-06, |
| "loss": 0.4804, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 817, |
| "tokens_per_second_per_gpu": 19627.76, |
| "total_tokens": 36968604 |
| }, |
| { |
| "epoch": 0.08170603805623533, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.9718252480888567e-06, |
| "loss": 0.4784, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 818, |
| "tokens_per_second_per_gpu": 19471.58, |
| "total_tokens": 37017512 |
| }, |
| { |
| "epoch": 0.08180592318833342, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.95106202644086e-06, |
| "loss": 0.4654, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 819, |
| "tokens_per_second_per_gpu": 18562.62, |
| "total_tokens": 37063802 |
| }, |
| { |
| "epoch": 0.0819058083204315, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.930396878561983e-06, |
| "loss": 0.4418, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 820, |
| "tokens_per_second_per_gpu": 19439.85, |
| "total_tokens": 37111193 |
| }, |
| { |
| "epoch": 0.08200569345252959, |
| "grad_norm": 0.9296875, |
| "learning_rate": 1.9098300562505266e-06, |
| "loss": 0.4499, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 821, |
| "tokens_per_second_per_gpu": 19463.13, |
| "total_tokens": 37157706 |
| }, |
| { |
| "epoch": 0.08210557858462768, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.8893618101067357e-06, |
| "loss": 0.4992, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 822, |
| "tokens_per_second_per_gpu": 19661.97, |
| "total_tokens": 37206493 |
| }, |
| { |
| "epoch": 0.08220546371672577, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.8689923895297247e-06, |
| "loss": 0.4788, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 823, |
| "tokens_per_second_per_gpu": 17584.68, |
| "total_tokens": 37250267 |
| }, |
| { |
| "epoch": 0.08230534884882386, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.848722042714457e-06, |
| "loss": 0.4918, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 824, |
| "tokens_per_second_per_gpu": 19572.39, |
| "total_tokens": 37297746 |
| }, |
| { |
| "epoch": 0.08240523398092194, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.8285510166487154e-06, |
| "loss": 0.4801, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 825, |
| "tokens_per_second_per_gpu": 18921.89, |
| "total_tokens": 37345014 |
| }, |
| { |
| "epoch": 0.08250511911302003, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.808479557110081e-06, |
| "loss": 0.4642, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 826, |
| "tokens_per_second_per_gpu": 19139.52, |
| "total_tokens": 37391906 |
| }, |
| { |
| "epoch": 0.08260500424511812, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.7885079086629598e-06, |
| "loss": 0.5035, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 827, |
| "tokens_per_second_per_gpu": 19019.06, |
| "total_tokens": 37438925 |
| }, |
| { |
| "epoch": 0.0827048893772162, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.7686363146555807e-06, |
| "loss": 0.4424, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 828, |
| "tokens_per_second_per_gpu": 16617.06, |
| "total_tokens": 37479544 |
| }, |
| { |
| "epoch": 0.08280477450931428, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.7488650172170496e-06, |
| "loss": 0.4608, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 829, |
| "tokens_per_second_per_gpu": 17764.13, |
| "total_tokens": 37523449 |
| }, |
| { |
| "epoch": 0.08290465964141237, |
| "grad_norm": 1.0, |
| "learning_rate": 1.7291942572543806e-06, |
| "loss": 0.3988, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 830, |
| "tokens_per_second_per_gpu": 16056.31, |
| "total_tokens": 37562434 |
| }, |
| { |
| "epoch": 0.08300454477351046, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.709624274449584e-06, |
| "loss": 0.4472, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 831, |
| "tokens_per_second_per_gpu": 17246.78, |
| "total_tokens": 37603863 |
| }, |
| { |
| "epoch": 0.08310442990560855, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.6901553072567189e-06, |
| "loss": 0.4265, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 832, |
| "tokens_per_second_per_gpu": 16067.93, |
| "total_tokens": 37643783 |
| }, |
| { |
| "epoch": 0.08320431503770664, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.6707875928990059e-06, |
| "loss": 0.4405, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 833, |
| "tokens_per_second_per_gpu": 18527.37, |
| "total_tokens": 37688766 |
| }, |
| { |
| "epoch": 0.08330420016980472, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.651521367365936e-06, |
| "loss": 0.4489, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 834, |
| "tokens_per_second_per_gpu": 19398.06, |
| "total_tokens": 37735797 |
| }, |
| { |
| "epoch": 0.08340408530190281, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.6323568654103838e-06, |
| "loss": 0.4791, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 835, |
| "tokens_per_second_per_gpu": 19898.65, |
| "total_tokens": 37784821 |
| }, |
| { |
| "epoch": 0.0835039704340009, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.6132943205457607e-06, |
| "loss": 0.5056, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 836, |
| "tokens_per_second_per_gpu": 18456.36, |
| "total_tokens": 37830582 |
| }, |
| { |
| "epoch": 0.08360385556609899, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.5943339650431578e-06, |
| "loss": 0.415, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 837, |
| "tokens_per_second_per_gpu": 17436.82, |
| "total_tokens": 37873914 |
| }, |
| { |
| "epoch": 0.08370374069819707, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.5754760299285255e-06, |
| "loss": 0.5159, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 838, |
| "tokens_per_second_per_gpu": 20230.5, |
| "total_tokens": 37922986 |
| }, |
| { |
| "epoch": 0.08380362583029516, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.5567207449798517e-06, |
| "loss": 0.4642, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 839, |
| "tokens_per_second_per_gpu": 16996.21, |
| "total_tokens": 37964539 |
| }, |
| { |
| "epoch": 0.08390351096239325, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.538068338724361e-06, |
| "loss": 0.4508, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 840, |
| "tokens_per_second_per_gpu": 18701.87, |
| "total_tokens": 38009445 |
| }, |
| { |
| "epoch": 0.08400339609449134, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.5195190384357405e-06, |
| "loss": 0.4494, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 841, |
| "tokens_per_second_per_gpu": 19390.21, |
| "total_tokens": 38056971 |
| }, |
| { |
| "epoch": 0.08410328122658942, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.5010730701313626e-06, |
| "loss": 0.456, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 842, |
| "tokens_per_second_per_gpu": 17713.22, |
| "total_tokens": 38099722 |
| }, |
| { |
| "epoch": 0.0842031663586875, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.4827306585695234e-06, |
| "loss": 0.4127, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 843, |
| "tokens_per_second_per_gpu": 18723.15, |
| "total_tokens": 38145587 |
| }, |
| { |
| "epoch": 0.0843030514907856, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.4644920272467245e-06, |
| "loss": 0.4542, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 844, |
| "tokens_per_second_per_gpu": 18546.4, |
| "total_tokens": 38191632 |
| }, |
| { |
| "epoch": 0.08440293662288369, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.446357398394934e-06, |
| "loss": 0.4214, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 845, |
| "tokens_per_second_per_gpu": 16288.16, |
| "total_tokens": 38232383 |
| }, |
| { |
| "epoch": 0.08450282175498178, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.4283269929788779e-06, |
| "loss": 0.4211, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 846, |
| "tokens_per_second_per_gpu": 15736.11, |
| "total_tokens": 38270618 |
| }, |
| { |
| "epoch": 0.08460270688707985, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.4104010306933558e-06, |
| "loss": 0.5127, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 847, |
| "tokens_per_second_per_gpu": 20861.17, |
| "total_tokens": 38322839 |
| }, |
| { |
| "epoch": 0.08470259201917794, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.3925797299605649e-06, |
| "loss": 0.453, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 848, |
| "tokens_per_second_per_gpu": 19225.22, |
| "total_tokens": 38369114 |
| }, |
| { |
| "epoch": 0.08480247715127603, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.3748633079274254e-06, |
| "loss": 0.4638, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 849, |
| "tokens_per_second_per_gpu": 19435.5, |
| "total_tokens": 38415895 |
| }, |
| { |
| "epoch": 0.08490236228337412, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.3572519804629537e-06, |
| "loss": 0.4142, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 850, |
| "tokens_per_second_per_gpu": 17504.54, |
| "total_tokens": 38458330 |
| }, |
| { |
| "epoch": 0.0850022474154722, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.339745962155613e-06, |
| "loss": 0.4124, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 851, |
| "tokens_per_second_per_gpu": 18266.31, |
| "total_tokens": 38502745 |
| }, |
| { |
| "epoch": 0.08510213254757029, |
| "grad_norm": 0.921875, |
| "learning_rate": 1.322345466310717e-06, |
| "loss": 0.4658, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 852, |
| "tokens_per_second_per_gpu": 20271.47, |
| "total_tokens": 38552471 |
| }, |
| { |
| "epoch": 0.08520201767966838, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.30505070494781e-06, |
| "loss": 0.4, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 853, |
| "tokens_per_second_per_gpu": 17890.06, |
| "total_tokens": 38597351 |
| }, |
| { |
| "epoch": 0.08530190281176647, |
| "grad_norm": 0.9296875, |
| "learning_rate": 1.2878618887981064e-06, |
| "loss": 0.4398, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 854, |
| "tokens_per_second_per_gpu": 18875.61, |
| "total_tokens": 38643445 |
| }, |
| { |
| "epoch": 0.08540178794386455, |
| "grad_norm": 1.5859375, |
| "learning_rate": 1.2707792273019049e-06, |
| "loss": 0.4428, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 855, |
| "tokens_per_second_per_gpu": 16558.46, |
| "total_tokens": 38684289 |
| }, |
| { |
| "epoch": 0.08550167307596264, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.2538029286060428e-06, |
| "loss": 0.4257, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 856, |
| "tokens_per_second_per_gpu": 17797.76, |
| "total_tokens": 38728057 |
| }, |
| { |
| "epoch": 0.08560155820806073, |
| "grad_norm": 1.0, |
| "learning_rate": 1.2369331995613664e-06, |
| "loss": 0.4436, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 857, |
| "tokens_per_second_per_gpu": 16844.23, |
| "total_tokens": 38769231 |
| }, |
| { |
| "epoch": 0.08570144334015882, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.2201702457201948e-06, |
| "loss": 0.4174, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 858, |
| "tokens_per_second_per_gpu": 18295.02, |
| "total_tokens": 38813479 |
| }, |
| { |
| "epoch": 0.08580132847225691, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.2035142713338366e-06, |
| "loss": 0.4569, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 859, |
| "tokens_per_second_per_gpu": 19348.96, |
| "total_tokens": 38861170 |
| }, |
| { |
| "epoch": 0.08590121360435499, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.1869654793500784e-06, |
| "loss": 0.4735, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 860, |
| "tokens_per_second_per_gpu": 19254.08, |
| "total_tokens": 38907760 |
| }, |
| { |
| "epoch": 0.08600109873645308, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.1705240714107301e-06, |
| "loss": 0.4271, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 861, |
| "tokens_per_second_per_gpu": 15999.45, |
| "total_tokens": 38946661 |
| }, |
| { |
| "epoch": 0.08610098386855117, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.1541902478491607e-06, |
| "loss": 0.4408, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 862, |
| "tokens_per_second_per_gpu": 18010.12, |
| "total_tokens": 38991339 |
| }, |
| { |
| "epoch": 0.08620086900064926, |
| "grad_norm": 0.9296875, |
| "learning_rate": 1.1379642076878528e-06, |
| "loss": 0.4487, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 863, |
| "tokens_per_second_per_gpu": 20454.89, |
| "total_tokens": 39040982 |
| }, |
| { |
| "epoch": 0.08630075413274733, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.1218461486359878e-06, |
| "loss": 0.4596, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 864, |
| "tokens_per_second_per_gpu": 18759.82, |
| "total_tokens": 39087467 |
| }, |
| { |
| "epoch": 0.08640063926484542, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.1058362670870248e-06, |
| "loss": 0.4461, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 865, |
| "tokens_per_second_per_gpu": 19638.49, |
| "total_tokens": 39134944 |
| }, |
| { |
| "epoch": 0.08650052439694352, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.0899347581163222e-06, |
| "loss": 0.3956, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 866, |
| "tokens_per_second_per_gpu": 15727.83, |
| "total_tokens": 39173576 |
| }, |
| { |
| "epoch": 0.0866004095290416, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.0741418154787443e-06, |
| "loss": 0.5174, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 867, |
| "tokens_per_second_per_gpu": 21853.95, |
| "total_tokens": 39225174 |
| }, |
| { |
| "epoch": 0.0867002946611397, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.058457631606319e-06, |
| "loss": 0.42, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 868, |
| "tokens_per_second_per_gpu": 17794.46, |
| "total_tokens": 39269152 |
| }, |
| { |
| "epoch": 0.08680017979323777, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.042882397605871e-06, |
| "loss": 0.4497, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 869, |
| "tokens_per_second_per_gpu": 17295.14, |
| "total_tokens": 39311626 |
| }, |
| { |
| "epoch": 0.08690006492533586, |
| "grad_norm": 1.0, |
| "learning_rate": 1.0274163032567165e-06, |
| "loss": 0.4458, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 870, |
| "tokens_per_second_per_gpu": 19380.27, |
| "total_tokens": 39358741 |
| }, |
| { |
| "epoch": 0.08699995005743395, |
| "grad_norm": 0.92578125, |
| "learning_rate": 1.012059537008332e-06, |
| "loss": 0.4043, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 871, |
| "tokens_per_second_per_gpu": 17965.13, |
| "total_tokens": 39403437 |
| }, |
| { |
| "epoch": 0.08709983518953204, |
| "grad_norm": 0.96484375, |
| "learning_rate": 9.968122859780648e-07, |
| "loss": 0.4402, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 872, |
| "tokens_per_second_per_gpu": 18130.63, |
| "total_tokens": 39446971 |
| }, |
| { |
| "epoch": 0.08719972032163012, |
| "grad_norm": 1.2265625, |
| "learning_rate": 9.816747359488632e-07, |
| "loss": 0.4494, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 873, |
| "tokens_per_second_per_gpu": 17179.87, |
| "total_tokens": 39489129 |
| }, |
| { |
| "epoch": 0.08729960545372821, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.666470713669918e-07, |
| "loss": 0.4177, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 874, |
| "tokens_per_second_per_gpu": 17429.74, |
| "total_tokens": 39531879 |
| }, |
| { |
| "epoch": 0.0873994905858263, |
| "grad_norm": 1.0390625, |
| "learning_rate": 9.517294753398066e-07, |
| "loss": 0.4372, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 875, |
| "tokens_per_second_per_gpu": 16985.12, |
| "total_tokens": 39573717 |
| }, |
| { |
| "epoch": 0.08749937571792439, |
| "grad_norm": 0.96875, |
| "learning_rate": 9.369221296335007e-07, |
| "loss": 0.5002, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 876, |
| "tokens_per_second_per_gpu": 19949.48, |
| "total_tokens": 39622256 |
| }, |
| { |
| "epoch": 0.08759926085002247, |
| "grad_norm": 0.9609375, |
| "learning_rate": 9.222252146709143e-07, |
| "loss": 0.4295, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 877, |
| "tokens_per_second_per_gpu": 18473.63, |
| "total_tokens": 39667518 |
| }, |
| { |
| "epoch": 0.08769914598212056, |
| "grad_norm": 0.9375, |
| "learning_rate": 9.076389095293148e-07, |
| "loss": 0.4607, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 878, |
| "tokens_per_second_per_gpu": 20108.44, |
| "total_tokens": 39715924 |
| }, |
| { |
| "epoch": 0.08779903111421865, |
| "grad_norm": 1.0546875, |
| "learning_rate": 8.931633919382299e-07, |
| "loss": 0.5037, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 879, |
| "tokens_per_second_per_gpu": 18996.83, |
| "total_tokens": 39762377 |
| }, |
| { |
| "epoch": 0.08789891624631674, |
| "grad_norm": 0.99609375, |
| "learning_rate": 8.787988382772705e-07, |
| "loss": 0.4251, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 880, |
| "tokens_per_second_per_gpu": 18702.25, |
| "total_tokens": 39808110 |
| }, |
| { |
| "epoch": 0.08799880137841483, |
| "grad_norm": 1.0390625, |
| "learning_rate": 8.645454235739903e-07, |
| "loss": 0.4113, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 881, |
| "tokens_per_second_per_gpu": 15478.48, |
| "total_tokens": 39846683 |
| }, |
| { |
| "epoch": 0.0880986865105129, |
| "grad_norm": 0.9375, |
| "learning_rate": 8.504033215017527e-07, |
| "loss": 0.4558, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 882, |
| "tokens_per_second_per_gpu": 19692.77, |
| "total_tokens": 39894219 |
| }, |
| { |
| "epoch": 0.088198571642611, |
| "grad_norm": 0.9296875, |
| "learning_rate": 8.363727043776037e-07, |
| "loss": 0.4684, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 883, |
| "tokens_per_second_per_gpu": 20459.8, |
| "total_tokens": 39943380 |
| }, |
| { |
| "epoch": 0.08829845677470909, |
| "grad_norm": 0.9765625, |
| "learning_rate": 8.224537431601886e-07, |
| "loss": 0.4421, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 884, |
| "tokens_per_second_per_gpu": 18059.44, |
| "total_tokens": 39987487 |
| }, |
| { |
| "epoch": 0.08839834190680718, |
| "grad_norm": 0.98828125, |
| "learning_rate": 8.086466074476562e-07, |
| "loss": 0.4255, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 885, |
| "tokens_per_second_per_gpu": 17447.58, |
| "total_tokens": 40031018 |
| }, |
| { |
| "epoch": 0.08849822703890525, |
| "grad_norm": 0.93359375, |
| "learning_rate": 7.949514654755963e-07, |
| "loss": 0.4196, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 886, |
| "tokens_per_second_per_gpu": 18557.05, |
| "total_tokens": 40076790 |
| }, |
| { |
| "epoch": 0.08859811217100334, |
| "grad_norm": 0.96484375, |
| "learning_rate": 7.81368484114996e-07, |
| "loss": 0.4456, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 887, |
| "tokens_per_second_per_gpu": 18626.23, |
| "total_tokens": 40123066 |
| }, |
| { |
| "epoch": 0.08869799730310143, |
| "grad_norm": 1.125, |
| "learning_rate": 7.678978288701911e-07, |
| "loss": 0.4572, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 888, |
| "tokens_per_second_per_gpu": 17197.3, |
| "total_tokens": 40165096 |
| }, |
| { |
| "epoch": 0.08879788243519952, |
| "grad_norm": 1.015625, |
| "learning_rate": 7.545396638768698e-07, |
| "loss": 0.4906, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 889, |
| "tokens_per_second_per_gpu": 18299.48, |
| "total_tokens": 40209850 |
| }, |
| { |
| "epoch": 0.08889776756729761, |
| "grad_norm": 0.9609375, |
| "learning_rate": 7.412941519000527e-07, |
| "loss": 0.407, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 890, |
| "tokens_per_second_per_gpu": 17714.45, |
| "total_tokens": 40253574 |
| }, |
| { |
| "epoch": 0.08899765269939569, |
| "grad_norm": 0.96875, |
| "learning_rate": 7.281614543321269e-07, |
| "loss": 0.4417, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 891, |
| "tokens_per_second_per_gpu": 18392.38, |
| "total_tokens": 40299387 |
| }, |
| { |
| "epoch": 0.08909753783149378, |
| "grad_norm": 0.93359375, |
| "learning_rate": 7.151417311908648e-07, |
| "loss": 0.4439, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 892, |
| "tokens_per_second_per_gpu": 18968.63, |
| "total_tokens": 40345374 |
| }, |
| { |
| "epoch": 0.08919742296359187, |
| "grad_norm": 0.97265625, |
| "learning_rate": 7.022351411174866e-07, |
| "loss": 0.4761, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 893, |
| "tokens_per_second_per_gpu": 18196.28, |
| "total_tokens": 40390659 |
| }, |
| { |
| "epoch": 0.08929730809568996, |
| "grad_norm": 1.8984375, |
| "learning_rate": 6.894418413747183e-07, |
| "loss": 0.457, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 894, |
| "tokens_per_second_per_gpu": 20565.68, |
| "total_tokens": 40440395 |
| }, |
| { |
| "epoch": 0.08939719322778804, |
| "grad_norm": 0.91796875, |
| "learning_rate": 6.767619878448783e-07, |
| "loss": 0.4651, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 895, |
| "tokens_per_second_per_gpu": 20380.49, |
| "total_tokens": 40489844 |
| }, |
| { |
| "epoch": 0.08949707835988613, |
| "grad_norm": 1.0, |
| "learning_rate": 6.641957350279838e-07, |
| "loss": 0.4452, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 896, |
| "tokens_per_second_per_gpu": 16799.21, |
| "total_tokens": 40531712 |
| }, |
| { |
| "epoch": 0.08959696349198422, |
| "grad_norm": 1.0390625, |
| "learning_rate": 6.517432360398556e-07, |
| "loss": 0.486, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 897, |
| "tokens_per_second_per_gpu": 16152.11, |
| "total_tokens": 40572211 |
| }, |
| { |
| "epoch": 0.08969684862408231, |
| "grad_norm": 0.9921875, |
| "learning_rate": 6.394046426102673e-07, |
| "loss": 0.4418, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 898, |
| "tokens_per_second_per_gpu": 16569.92, |
| "total_tokens": 40613429 |
| }, |
| { |
| "epoch": 0.08979673375618039, |
| "grad_norm": 0.96484375, |
| "learning_rate": 6.271801050810856e-07, |
| "loss": 0.4513, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 899, |
| "tokens_per_second_per_gpu": 20112.44, |
| "total_tokens": 40662338 |
| }, |
| { |
| "epoch": 0.08989661888827848, |
| "grad_norm": 0.9140625, |
| "learning_rate": 6.150697724044407e-07, |
| "loss": 0.4388, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 900, |
| "tokens_per_second_per_gpu": 19881.68, |
| "total_tokens": 40709871 |
| }, |
| { |
| "epoch": 0.08999650402037657, |
| "grad_norm": 1.1171875, |
| "learning_rate": 6.030737921409169e-07, |
| "loss": 0.4688, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 901, |
| "tokens_per_second_per_gpu": 19500.12, |
| "total_tokens": 40756767 |
| }, |
| { |
| "epoch": 0.09009638915247466, |
| "grad_norm": 0.98828125, |
| "learning_rate": 5.911923104577455e-07, |
| "loss": 0.4264, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 902, |
| "tokens_per_second_per_gpu": 17913.22, |
| "total_tokens": 40801044 |
| }, |
| { |
| "epoch": 0.09019627428457275, |
| "grad_norm": 1.0234375, |
| "learning_rate": 5.794254721270331e-07, |
| "loss": 0.4401, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 903, |
| "tokens_per_second_per_gpu": 17718.96, |
| "total_tokens": 40844019 |
| }, |
| { |
| "epoch": 0.09029615941667082, |
| "grad_norm": 0.984375, |
| "learning_rate": 5.677734205239904e-07, |
| "loss": 0.4382, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 904, |
| "tokens_per_second_per_gpu": 16599.4, |
| "total_tokens": 40885158 |
| }, |
| { |
| "epoch": 0.09039604454876891, |
| "grad_norm": 0.9453125, |
| "learning_rate": 5.562362976251901e-07, |
| "loss": 0.4395, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 905, |
| "tokens_per_second_per_gpu": 19135.68, |
| "total_tokens": 40931536 |
| }, |
| { |
| "epoch": 0.090495929680867, |
| "grad_norm": 4.21875, |
| "learning_rate": 5.448142440068316e-07, |
| "loss": 0.4645, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 906, |
| "tokens_per_second_per_gpu": 21138.2, |
| "total_tokens": 40982884 |
| }, |
| { |
| "epoch": 0.0905958148129651, |
| "grad_norm": 0.98828125, |
| "learning_rate": 5.335073988430373e-07, |
| "loss": 0.4415, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 907, |
| "tokens_per_second_per_gpu": 17495.75, |
| "total_tokens": 41026347 |
| }, |
| { |
| "epoch": 0.09069569994506317, |
| "grad_norm": 1.0234375, |
| "learning_rate": 5.223158999041444e-07, |
| "loss": 0.4052, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 908, |
| "tokens_per_second_per_gpu": 15924.71, |
| "total_tokens": 41066086 |
| }, |
| { |
| "epoch": 0.09079558507716126, |
| "grad_norm": 0.9140625, |
| "learning_rate": 5.112398835550348e-07, |
| "loss": 0.4484, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 909, |
| "tokens_per_second_per_gpu": 20056.15, |
| "total_tokens": 41114943 |
| }, |
| { |
| "epoch": 0.09089547020925935, |
| "grad_norm": 0.93359375, |
| "learning_rate": 5.002794847534765e-07, |
| "loss": 0.448, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 910, |
| "tokens_per_second_per_gpu": 20648.43, |
| "total_tokens": 41163509 |
| }, |
| { |
| "epoch": 0.09099535534135744, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.894348370484648e-07, |
| "loss": 0.4369, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 911, |
| "tokens_per_second_per_gpu": 18606.75, |
| "total_tokens": 41208775 |
| }, |
| { |
| "epoch": 0.09109524047345553, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.787060725786141e-07, |
| "loss": 0.466, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 912, |
| "tokens_per_second_per_gpu": 20156.85, |
| "total_tokens": 41256862 |
| }, |
| { |
| "epoch": 0.09119512560555361, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.6809332207053083e-07, |
| "loss": 0.4929, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 913, |
| "tokens_per_second_per_gpu": 21484.71, |
| "total_tokens": 41309518 |
| }, |
| { |
| "epoch": 0.0912950107376517, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.575967148372318e-07, |
| "loss": 0.4791, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 914, |
| "tokens_per_second_per_gpu": 20365.56, |
| "total_tokens": 41358382 |
| }, |
| { |
| "epoch": 0.09139489586974979, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.4721637877656377e-07, |
| "loss": 0.4525, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 915, |
| "tokens_per_second_per_gpu": 16982.25, |
| "total_tokens": 41399708 |
| }, |
| { |
| "epoch": 0.09149478100184788, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.3695244036964567e-07, |
| "loss": 0.438, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 916, |
| "tokens_per_second_per_gpu": 18562.94, |
| "total_tokens": 41443914 |
| }, |
| { |
| "epoch": 0.09159466613394596, |
| "grad_norm": 0.99609375, |
| "learning_rate": 4.268050246793276e-07, |
| "loss": 0.4601, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 917, |
| "tokens_per_second_per_gpu": 17697.46, |
| "total_tokens": 41486925 |
| }, |
| { |
| "epoch": 0.09169455126604405, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.167742553486676e-07, |
| "loss": 0.3925, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 918, |
| "tokens_per_second_per_gpu": 16335.77, |
| "total_tokens": 41527017 |
| }, |
| { |
| "epoch": 0.09179443639814214, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.068602545994249e-07, |
| "loss": 0.4612, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 919, |
| "tokens_per_second_per_gpu": 17681.21, |
| "total_tokens": 41569225 |
| }, |
| { |
| "epoch": 0.09189432153024023, |
| "grad_norm": 1.0, |
| "learning_rate": 3.9706314323056936e-07, |
| "loss": 0.4546, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 920, |
| "tokens_per_second_per_gpu": 17947.92, |
| "total_tokens": 41612776 |
| }, |
| { |
| "epoch": 0.0919942066623383, |
| "grad_norm": 0.9609375, |
| "learning_rate": 3.8738304061681107e-07, |
| "loss": 0.4736, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 921, |
| "tokens_per_second_per_gpu": 19814.19, |
| "total_tokens": 41661306 |
| }, |
| { |
| "epoch": 0.0920940917944364, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.7782006470714614e-07, |
| "loss": 0.422, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 922, |
| "tokens_per_second_per_gpu": 17699.83, |
| "total_tokens": 41703850 |
| }, |
| { |
| "epoch": 0.09219397692653448, |
| "grad_norm": 0.93359375, |
| "learning_rate": 3.68374332023419e-07, |
| "loss": 0.4534, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 923, |
| "tokens_per_second_per_gpu": 19623.71, |
| "total_tokens": 41751685 |
| }, |
| { |
| "epoch": 0.09229386205863258, |
| "grad_norm": 0.9609375, |
| "learning_rate": 3.590459576589e-07, |
| "loss": 0.4743, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 924, |
| "tokens_per_second_per_gpu": 18825.06, |
| "total_tokens": 41798183 |
| }, |
| { |
| "epoch": 0.09239374719073067, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.498350552768859e-07, |
| "loss": 0.4961, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 925, |
| "tokens_per_second_per_gpu": 19234.73, |
| "total_tokens": 41845106 |
| }, |
| { |
| "epoch": 0.09249363232282874, |
| "grad_norm": 0.9375, |
| "learning_rate": 3.4074173710931804e-07, |
| "loss": 0.458, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 926, |
| "tokens_per_second_per_gpu": 20158.76, |
| "total_tokens": 41894753 |
| }, |
| { |
| "epoch": 0.09259351745492683, |
| "grad_norm": 0.9296875, |
| "learning_rate": 3.3176611395540625e-07, |
| "loss": 0.4375, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 927, |
| "tokens_per_second_per_gpu": 19702.47, |
| "total_tokens": 41941859 |
| }, |
| { |
| "epoch": 0.09269340258702492, |
| "grad_norm": 0.953125, |
| "learning_rate": 3.2290829518028867e-07, |
| "loss": 0.4647, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 928, |
| "tokens_per_second_per_gpu": 18735.78, |
| "total_tokens": 41987824 |
| }, |
| { |
| "epoch": 0.09279328771912301, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.1416838871368925e-07, |
| "loss": 0.5122, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 929, |
| "tokens_per_second_per_gpu": 19608.28, |
| "total_tokens": 42034861 |
| }, |
| { |
| "epoch": 0.09289317285122109, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.0554650104861137e-07, |
| "loss": 0.5212, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 930, |
| "tokens_per_second_per_gpu": 18235.48, |
| "total_tokens": 42080690 |
| }, |
| { |
| "epoch": 0.09299305798331918, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.970427372400353e-07, |
| "loss": 0.4358, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 931, |
| "tokens_per_second_per_gpu": 17479.6, |
| "total_tokens": 42123904 |
| }, |
| { |
| "epoch": 0.09309294311541727, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.8865720090364037e-07, |
| "loss": 0.4614, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 932, |
| "tokens_per_second_per_gpu": 19294.48, |
| "total_tokens": 42171026 |
| }, |
| { |
| "epoch": 0.09319282824751536, |
| "grad_norm": 0.96484375, |
| "learning_rate": 2.8038999421453827e-07, |
| "loss": 0.4582, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 933, |
| "tokens_per_second_per_gpu": 18575.01, |
| "total_tokens": 42217647 |
| }, |
| { |
| "epoch": 0.09329271337961345, |
| "grad_norm": 0.921875, |
| "learning_rate": 2.7224121790603517e-07, |
| "loss": 0.4831, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 934, |
| "tokens_per_second_per_gpu": 20795.01, |
| "total_tokens": 42267921 |
| }, |
| { |
| "epoch": 0.09339259851171153, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.6421097126839714e-07, |
| "loss": 0.4428, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 935, |
| "tokens_per_second_per_gpu": 18775.03, |
| "total_tokens": 42314148 |
| }, |
| { |
| "epoch": 0.09349248364380962, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.5629935214764866e-07, |
| "loss": 0.4708, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 936, |
| "tokens_per_second_per_gpu": 19374.28, |
| "total_tokens": 42361852 |
| }, |
| { |
| "epoch": 0.09359236877590771, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.4850645694436736e-07, |
| "loss": 0.4287, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 937, |
| "tokens_per_second_per_gpu": 18463.91, |
| "total_tokens": 42406661 |
| }, |
| { |
| "epoch": 0.0936922539080058, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.4083238061252565e-07, |
| "loss": 0.494, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 938, |
| "tokens_per_second_per_gpu": 18323.56, |
| "total_tokens": 42451040 |
| }, |
| { |
| "epoch": 0.09379213904010388, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.332772166583208e-07, |
| "loss": 0.4761, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 939, |
| "tokens_per_second_per_gpu": 19643.91, |
| "total_tokens": 42498816 |
| }, |
| { |
| "epoch": 0.09389202417220197, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.2584105713904126e-07, |
| "loss": 0.4444, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 940, |
| "tokens_per_second_per_gpu": 18834.15, |
| "total_tokens": 42544756 |
| }, |
| { |
| "epoch": 0.09399190930430006, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.1852399266194312e-07, |
| "loss": 0.3685, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 941, |
| "tokens_per_second_per_gpu": 14666.73, |
| "total_tokens": 42581678 |
| }, |
| { |
| "epoch": 0.09409179443639815, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.1132611238315004e-07, |
| "loss": 0.5325, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 942, |
| "tokens_per_second_per_gpu": 19959.54, |
| "total_tokens": 42631711 |
| }, |
| { |
| "epoch": 0.09419167956849622, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.0424750400655947e-07, |
| "loss": 0.4282, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 943, |
| "tokens_per_second_per_gpu": 17580.27, |
| "total_tokens": 42674486 |
| }, |
| { |
| "epoch": 0.09429156470059431, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.9728825378278248e-07, |
| "loss": 0.4102, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 944, |
| "tokens_per_second_per_gpu": 17182.11, |
| "total_tokens": 42716862 |
| }, |
| { |
| "epoch": 0.0943914498326924, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.9044844650808468e-07, |
| "loss": 0.4571, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 945, |
| "tokens_per_second_per_gpu": 18865.72, |
| "total_tokens": 42762569 |
| }, |
| { |
| "epoch": 0.0944913349647905, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.8372816552336025e-07, |
| "loss": 0.4254, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 946, |
| "tokens_per_second_per_gpu": 18081.22, |
| "total_tokens": 42806582 |
| }, |
| { |
| "epoch": 0.09459122009688858, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.7712749271311392e-07, |
| "loss": 0.4432, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 947, |
| "tokens_per_second_per_gpu": 18913.25, |
| "total_tokens": 42853391 |
| }, |
| { |
| "epoch": 0.09469110522898666, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.706465085044584e-07, |
| "loss": 0.4443, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 948, |
| "tokens_per_second_per_gpu": 16852.31, |
| "total_tokens": 42895962 |
| }, |
| { |
| "epoch": 0.09479099036108475, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.6428529186614195e-07, |
| "loss": 0.4545, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 949, |
| "tokens_per_second_per_gpu": 17871.59, |
| "total_tokens": 42940970 |
| }, |
| { |
| "epoch": 0.09489087549318284, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.580439203075812e-07, |
| "loss": 0.4492, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 950, |
| "tokens_per_second_per_gpu": 18162.74, |
| "total_tokens": 42985294 |
| }, |
| { |
| "epoch": 0.09499076062528093, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.519224698779198e-07, |
| "loss": 0.4666, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 951, |
| "tokens_per_second_per_gpu": 18369.89, |
| "total_tokens": 43031965 |
| }, |
| { |
| "epoch": 0.09509064575737901, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.4592101516509916e-07, |
| "loss": 0.4825, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 952, |
| "tokens_per_second_per_gpu": 19654.91, |
| "total_tokens": 43080342 |
| }, |
| { |
| "epoch": 0.0951905308894771, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.400396292949513e-07, |
| "loss": 0.4514, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 953, |
| "tokens_per_second_per_gpu": 19029.22, |
| "total_tokens": 43126884 |
| }, |
| { |
| "epoch": 0.09529041602157519, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.3427838393030634e-07, |
| "loss": 0.4483, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 954, |
| "tokens_per_second_per_gpu": 18592.71, |
| "total_tokens": 43172457 |
| }, |
| { |
| "epoch": 0.09539030115367328, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.2863734927012094e-07, |
| "loss": 0.4691, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 955, |
| "tokens_per_second_per_gpu": 17402.82, |
| "total_tokens": 43219624 |
| }, |
| { |
| "epoch": 0.09549018628577137, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.231165940486234e-07, |
| "loss": 0.4377, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 956, |
| "tokens_per_second_per_gpu": 17255.75, |
| "total_tokens": 43262874 |
| }, |
| { |
| "epoch": 0.09559007141786945, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.1771618553447217e-07, |
| "loss": 0.4813, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 957, |
| "tokens_per_second_per_gpu": 19253.02, |
| "total_tokens": 43309217 |
| }, |
| { |
| "epoch": 0.09568995654996754, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.1243618952994195e-07, |
| "loss": 0.484, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 958, |
| "tokens_per_second_per_gpu": 20471.01, |
| "total_tokens": 43358319 |
| }, |
| { |
| "epoch": 0.09578984168206563, |
| "grad_norm": 1.0, |
| "learning_rate": 1.0727667037011668e-07, |
| "loss": 0.4488, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 959, |
| "tokens_per_second_per_gpu": 19270.76, |
| "total_tokens": 43404675 |
| }, |
| { |
| "epoch": 0.09588972681416372, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.0223769092211012e-07, |
| "loss": 0.4487, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 960, |
| "tokens_per_second_per_gpu": 17859.57, |
| "total_tokens": 43448494 |
| }, |
| { |
| "epoch": 0.0959896119462618, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.731931258429638e-08, |
| "loss": 0.5092, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 961, |
| "tokens_per_second_per_gpu": 22155.32, |
| "total_tokens": 43501117 |
| }, |
| { |
| "epoch": 0.09608949707835988, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.252159528556404e-08, |
| "loss": 0.4117, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 962, |
| "tokens_per_second_per_gpu": 17370.88, |
| "total_tokens": 43543709 |
| }, |
| { |
| "epoch": 0.09618938221045797, |
| "grad_norm": 0.99609375, |
| "learning_rate": 8.784459748458318e-08, |
| "loss": 0.4376, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 963, |
| "tokens_per_second_per_gpu": 16865.09, |
| "total_tokens": 43585580 |
| }, |
| { |
| "epoch": 0.09628926734255606, |
| "grad_norm": 1.4296875, |
| "learning_rate": 8.328837616909612e-08, |
| "loss": 0.4473, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 964, |
| "tokens_per_second_per_gpu": 16405.92, |
| "total_tokens": 43626251 |
| }, |
| { |
| "epoch": 0.09638915247465414, |
| "grad_norm": 1.046875, |
| "learning_rate": 7.885298685522235e-08, |
| "loss": 0.4357, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 965, |
| "tokens_per_second_per_gpu": 16632.25, |
| "total_tokens": 43666658 |
| }, |
| { |
| "epoch": 0.09648903760675223, |
| "grad_norm": 1.0703125, |
| "learning_rate": 7.453848358678018e-08, |
| "loss": 0.4606, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 966, |
| "tokens_per_second_per_gpu": 15540.97, |
| "total_tokens": 43705183 |
| }, |
| { |
| "epoch": 0.09658892273885032, |
| "grad_norm": 0.8984375, |
| "learning_rate": 7.034491893463059e-08, |
| "loss": 0.4517, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 967, |
| "tokens_per_second_per_gpu": 21108.92, |
| "total_tokens": 43756112 |
| }, |
| { |
| "epoch": 0.09668880787094841, |
| "grad_norm": 0.9765625, |
| "learning_rate": 6.627234399603554e-08, |
| "loss": 0.4383, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 968, |
| "tokens_per_second_per_gpu": 18476.27, |
| "total_tokens": 43801144 |
| }, |
| { |
| "epoch": 0.0967886930030465, |
| "grad_norm": 1.21875, |
| "learning_rate": 6.232080839403631e-08, |
| "loss": 0.3901, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 969, |
| "tokens_per_second_per_gpu": 16945.39, |
| "total_tokens": 43843371 |
| }, |
| { |
| "epoch": 0.09688857813514458, |
| "grad_norm": 0.98046875, |
| "learning_rate": 5.849036027684607e-08, |
| "loss": 0.4401, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 970, |
| "tokens_per_second_per_gpu": 16730.65, |
| "total_tokens": 43883917 |
| }, |
| { |
| "epoch": 0.09698846326724267, |
| "grad_norm": 1.015625, |
| "learning_rate": 5.4781046317267103e-08, |
| "loss": 0.4496, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 971, |
| "tokens_per_second_per_gpu": 18073.27, |
| "total_tokens": 43927712 |
| }, |
| { |
| "epoch": 0.09708834839934076, |
| "grad_norm": 0.984375, |
| "learning_rate": 5.119291171211793e-08, |
| "loss": 0.4021, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 972, |
| "tokens_per_second_per_gpu": 15942.93, |
| "total_tokens": 43967262 |
| }, |
| { |
| "epoch": 0.09718823353143885, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.772600018168816e-08, |
| "loss": 0.4618, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 973, |
| "tokens_per_second_per_gpu": 17579.52, |
| "total_tokens": 44009445 |
| }, |
| { |
| "epoch": 0.09728811866353693, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.438035396920004e-08, |
| "loss": 0.4217, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 974, |
| "tokens_per_second_per_gpu": 18951.32, |
| "total_tokens": 44056001 |
| }, |
| { |
| "epoch": 0.09738800379563502, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.115601384029666e-08, |
| "loss": 0.4659, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 975, |
| "tokens_per_second_per_gpu": 20514.73, |
| "total_tokens": 44104705 |
| }, |
| { |
| "epoch": 0.09748788892773311, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.805301908254455e-08, |
| "loss": 0.4325, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 976, |
| "tokens_per_second_per_gpu": 16668.21, |
| "total_tokens": 44145662 |
| }, |
| { |
| "epoch": 0.0975877740598312, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.50714075049563e-08, |
| "loss": 0.4782, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 977, |
| "tokens_per_second_per_gpu": 18495.56, |
| "total_tokens": 44191132 |
| }, |
| { |
| "epoch": 0.09768765919192929, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.22112154375287e-08, |
| "loss": 0.4653, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 978, |
| "tokens_per_second_per_gpu": 19483.13, |
| "total_tokens": 44239031 |
| }, |
| { |
| "epoch": 0.09778754432402736, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.947247773079753e-08, |
| "loss": 0.4519, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 979, |
| "tokens_per_second_per_gpu": 19120.96, |
| "total_tokens": 44285415 |
| }, |
| { |
| "epoch": 0.09788742945612545, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.6855227755419046e-08, |
| "loss": 0.4424, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 980, |
| "tokens_per_second_per_gpu": 17814.06, |
| "total_tokens": 44330574 |
| }, |
| { |
| "epoch": 0.09798731458822355, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.4359497401758026e-08, |
| "loss": 0.4192, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 981, |
| "tokens_per_second_per_gpu": 19316.62, |
| "total_tokens": 44376868 |
| }, |
| { |
| "epoch": 0.09808719972032164, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.1985317079500358e-08, |
| "loss": 0.4946, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 982, |
| "tokens_per_second_per_gpu": 20238.67, |
| "total_tokens": 44425601 |
| }, |
| { |
| "epoch": 0.09818708485241971, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.973271571728441e-08, |
| "loss": 0.4305, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 983, |
| "tokens_per_second_per_gpu": 18249.19, |
| "total_tokens": 44469533 |
| }, |
| { |
| "epoch": 0.0982869699845178, |
| "grad_norm": 1.4296875, |
| "learning_rate": 1.7601720762346895e-08, |
| "loss": 0.4433, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 984, |
| "tokens_per_second_per_gpu": 17378.35, |
| "total_tokens": 44512930 |
| }, |
| { |
| "epoch": 0.09838685511661589, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.5592358180189782e-08, |
| "loss": 0.3867, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 985, |
| "tokens_per_second_per_gpu": 16166.91, |
| "total_tokens": 44552078 |
| }, |
| { |
| "epoch": 0.09848674024871398, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.370465245426167e-08, |
| "loss": 0.4447, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 986, |
| "tokens_per_second_per_gpu": 19431.46, |
| "total_tokens": 44599621 |
| }, |
| { |
| "epoch": 0.09858662538081206, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.1938626585660252e-08, |
| "loss": 0.4789, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 987, |
| "tokens_per_second_per_gpu": 21446.01, |
| "total_tokens": 44651008 |
| }, |
| { |
| "epoch": 0.09868651051291015, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.0294302092853647e-08, |
| "loss": 0.4412, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 988, |
| "tokens_per_second_per_gpu": 16777.56, |
| "total_tokens": 44692261 |
| }, |
| { |
| "epoch": 0.09878639564500824, |
| "grad_norm": 0.98828125, |
| "learning_rate": 8.771699011416169e-09, |
| "loss": 0.4817, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 989, |
| "tokens_per_second_per_gpu": 19002.23, |
| "total_tokens": 44738601 |
| }, |
| { |
| "epoch": 0.09888628077710633, |
| "grad_norm": 0.984375, |
| "learning_rate": 7.370835893788508e-09, |
| "loss": 0.4094, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 990, |
| "tokens_per_second_per_gpu": 16810.32, |
| "total_tokens": 44779354 |
| }, |
| { |
| "epoch": 0.09898616590920442, |
| "grad_norm": 0.94921875, |
| "learning_rate": 6.091729809042379e-09, |
| "loss": 0.4621, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 991, |
| "tokens_per_second_per_gpu": 19308.29, |
| "total_tokens": 44826450 |
| }, |
| { |
| "epoch": 0.0990860510413025, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.9343963426840006e-09, |
| "loss": 0.4263, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 992, |
| "tokens_per_second_per_gpu": 17706.22, |
| "total_tokens": 44869736 |
| }, |
| { |
| "epoch": 0.09918593617340059, |
| "grad_norm": 0.92578125, |
| "learning_rate": 3.898849596456477e-09, |
| "loss": 0.4745, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 993, |
| "tokens_per_second_per_gpu": 20667.54, |
| "total_tokens": 44920499 |
| }, |
| { |
| "epoch": 0.09928582130549868, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.9851021881688314e-09, |
| "loss": 0.4705, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 994, |
| "tokens_per_second_per_gpu": 17161.8, |
| "total_tokens": 44963646 |
| }, |
| { |
| "epoch": 0.09938570643759677, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.193165251545004e-09, |
| "loss": 0.4023, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 995, |
| "tokens_per_second_per_gpu": 16653.71, |
| "total_tokens": 45005897 |
| }, |
| { |
| "epoch": 0.09948559156969485, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.5230484360873043e-09, |
| "loss": 0.4059, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 996, |
| "tokens_per_second_per_gpu": 16316.94, |
| "total_tokens": 45046570 |
| }, |
| { |
| "epoch": 0.09958547670179294, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.74759906957612e-10, |
| "loss": 0.4281, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 997, |
| "tokens_per_second_per_gpu": 16667.99, |
| "total_tokens": 45087565 |
| }, |
| { |
| "epoch": 0.09968536183389103, |
| "grad_norm": 0.9765625, |
| "learning_rate": 5.483063448785686e-10, |
| "loss": 0.516, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 998, |
| "tokens_per_second_per_gpu": 18744.23, |
| "total_tokens": 45134425 |
| }, |
| { |
| "epoch": 0.09978524696598912, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.436929460525317e-10, |
| "loss": 0.4697, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 999, |
| "tokens_per_second_per_gpu": 21031.15, |
| "total_tokens": 45185167 |
| }, |
| { |
| "epoch": 0.0998851320980872, |
| "grad_norm": 0.92578125, |
| "learning_rate": 6.092342209607083e-11, |
| "loss": 0.4754, |
| "memory/device_reserved (GiB)": 101.86, |
| "memory/max_active (GiB)": 91.58, |
| "memory/max_allocated (GiB)": 91.58, |
| "step": 1000, |
| "tokens_per_second_per_gpu": 20383.74, |
| "total_tokens": 45235592 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.428755029426176e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|