| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.956175298804781, | |
| "eval_steps": 500, | |
| "global_step": 186, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01593625498007968, | |
| "grad_norm": 3.265625, | |
| "learning_rate": 0.0, | |
| "loss": 1.1802, | |
| "memory/device_reserved (GiB)": 62.1, | |
| "memory/max_active (GiB)": 50.46, | |
| "memory/max_allocated (GiB)": 50.46, | |
| "step": 1, | |
| "tokens_per_second_per_gpu": 4078.9, | |
| "total_tokens": 187960 | |
| }, | |
| { | |
| "epoch": 0.03187250996015936, | |
| "grad_norm": 3.53125, | |
| "learning_rate": 1.111111111111111e-07, | |
| "loss": 1.2461, | |
| "memory/device_reserved (GiB)": 77.62, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 2, | |
| "tokens_per_second_per_gpu": 5600.28, | |
| "total_tokens": 380007 | |
| }, | |
| { | |
| "epoch": 0.04780876494023904, | |
| "grad_norm": 3.5625, | |
| "learning_rate": 2.222222222222222e-07, | |
| "loss": 1.3145, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 3, | |
| "tokens_per_second_per_gpu": 5504.25, | |
| "total_tokens": 566579 | |
| }, | |
| { | |
| "epoch": 0.06374501992031872, | |
| "grad_norm": 3.625, | |
| "learning_rate": 3.333333333333333e-07, | |
| "loss": 1.2505, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 4, | |
| "tokens_per_second_per_gpu": 5678.77, | |
| "total_tokens": 754678 | |
| }, | |
| { | |
| "epoch": 0.0796812749003984, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 4.444444444444444e-07, | |
| "loss": 1.2344, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 5, | |
| "tokens_per_second_per_gpu": 5644.44, | |
| "total_tokens": 935084 | |
| }, | |
| { | |
| "epoch": 0.09561752988047809, | |
| "grad_norm": 3.71875, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 1.334, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 6, | |
| "tokens_per_second_per_gpu": 5488.37, | |
| "total_tokens": 1114037 | |
| }, | |
| { | |
| "epoch": 0.11155378486055777, | |
| "grad_norm": 3.4375, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 1.1704, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 7, | |
| "tokens_per_second_per_gpu": 5468.62, | |
| "total_tokens": 1302175 | |
| }, | |
| { | |
| "epoch": 0.12749003984063745, | |
| "grad_norm": 3.484375, | |
| "learning_rate": 7.777777777777778e-07, | |
| "loss": 1.2471, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 8, | |
| "tokens_per_second_per_gpu": 5412.64, | |
| "total_tokens": 1483342 | |
| }, | |
| { | |
| "epoch": 0.14342629482071714, | |
| "grad_norm": 3.375, | |
| "learning_rate": 8.888888888888888e-07, | |
| "loss": 1.2354, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 9, | |
| "tokens_per_second_per_gpu": 5455.54, | |
| "total_tokens": 1667332 | |
| }, | |
| { | |
| "epoch": 0.1593625498007968, | |
| "grad_norm": 3.546875, | |
| "learning_rate": 1e-06, | |
| "loss": 1.3232, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 10, | |
| "tokens_per_second_per_gpu": 5574.32, | |
| "total_tokens": 1849357 | |
| }, | |
| { | |
| "epoch": 0.1752988047808765, | |
| "grad_norm": 3.671875, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 1.3232, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 11, | |
| "tokens_per_second_per_gpu": 5321.58, | |
| "total_tokens": 2026853 | |
| }, | |
| { | |
| "epoch": 0.19123505976095617, | |
| "grad_norm": 3.359375, | |
| "learning_rate": 1.2222222222222223e-06, | |
| "loss": 1.2529, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 12, | |
| "tokens_per_second_per_gpu": 5847.85, | |
| "total_tokens": 2213285 | |
| }, | |
| { | |
| "epoch": 0.20717131474103587, | |
| "grad_norm": 3.328125, | |
| "learning_rate": 1.3333333333333332e-06, | |
| "loss": 1.2559, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 13, | |
| "tokens_per_second_per_gpu": 5613.77, | |
| "total_tokens": 2400076 | |
| }, | |
| { | |
| "epoch": 0.22310756972111553, | |
| "grad_norm": 3.15625, | |
| "learning_rate": 1.4444444444444443e-06, | |
| "loss": 1.2129, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 14, | |
| "tokens_per_second_per_gpu": 5571.51, | |
| "total_tokens": 2593112 | |
| }, | |
| { | |
| "epoch": 0.23904382470119523, | |
| "grad_norm": 3.125, | |
| "learning_rate": 1.5555555555555556e-06, | |
| "loss": 1.2153, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 15, | |
| "tokens_per_second_per_gpu": 5626.56, | |
| "total_tokens": 2782327 | |
| }, | |
| { | |
| "epoch": 0.2549800796812749, | |
| "grad_norm": 3.3125, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 1.2598, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 16, | |
| "tokens_per_second_per_gpu": 5731.24, | |
| "total_tokens": 2966947 | |
| }, | |
| { | |
| "epoch": 0.27091633466135456, | |
| "grad_norm": 3.09375, | |
| "learning_rate": 1.7777777777777775e-06, | |
| "loss": 1.1714, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 17, | |
| "tokens_per_second_per_gpu": 6088.78, | |
| "total_tokens": 3161163 | |
| }, | |
| { | |
| "epoch": 0.2868525896414343, | |
| "grad_norm": 3.234375, | |
| "learning_rate": 1.8888888888888888e-06, | |
| "loss": 1.2402, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 18, | |
| "tokens_per_second_per_gpu": 5499.3, | |
| "total_tokens": 3343301 | |
| }, | |
| { | |
| "epoch": 0.30278884462151395, | |
| "grad_norm": 3.171875, | |
| "learning_rate": 2e-06, | |
| "loss": 1.2158, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 19, | |
| "tokens_per_second_per_gpu": 5505.1, | |
| "total_tokens": 3527203 | |
| }, | |
| { | |
| "epoch": 0.3187250996015936, | |
| "grad_norm": 3.21875, | |
| "learning_rate": 1.9998251609127463e-06, | |
| "loss": 1.2446, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 20, | |
| "tokens_per_second_per_gpu": 5441.06, | |
| "total_tokens": 3710228 | |
| }, | |
| { | |
| "epoch": 0.3346613545816733, | |
| "grad_norm": 2.875, | |
| "learning_rate": 1.9993007047883984e-06, | |
| "loss": 1.1895, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 21, | |
| "tokens_per_second_per_gpu": 5817.48, | |
| "total_tokens": 3903090 | |
| }, | |
| { | |
| "epoch": 0.350597609561753, | |
| "grad_norm": 3.078125, | |
| "learning_rate": 1.9984268150178167e-06, | |
| "loss": 1.209, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 22, | |
| "tokens_per_second_per_gpu": 5823.26, | |
| "total_tokens": 4085213 | |
| }, | |
| { | |
| "epoch": 0.3665338645418327, | |
| "grad_norm": 2.71875, | |
| "learning_rate": 1.9972037971811797e-06, | |
| "loss": 1.1602, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 23, | |
| "tokens_per_second_per_gpu": 5595.44, | |
| "total_tokens": 4271930 | |
| }, | |
| { | |
| "epoch": 0.38247011952191234, | |
| "grad_norm": 2.625, | |
| "learning_rate": 1.9956320789411338e-06, | |
| "loss": 1.1587, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 24, | |
| "tokens_per_second_per_gpu": 5470.25, | |
| "total_tokens": 4458282 | |
| }, | |
| { | |
| "epoch": 0.398406374501992, | |
| "grad_norm": 2.96875, | |
| "learning_rate": 1.9937122098932426e-06, | |
| "loss": 1.2295, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 25, | |
| "tokens_per_second_per_gpu": 5381.09, | |
| "total_tokens": 4638920 | |
| }, | |
| { | |
| "epoch": 0.41434262948207173, | |
| "grad_norm": 2.765625, | |
| "learning_rate": 1.9914448613738106e-06, | |
| "loss": 1.2695, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 26, | |
| "tokens_per_second_per_gpu": 5413.01, | |
| "total_tokens": 4823256 | |
| }, | |
| { | |
| "epoch": 0.4302788844621514, | |
| "grad_norm": 2.703125, | |
| "learning_rate": 1.9888308262251284e-06, | |
| "loss": 1.2412, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 27, | |
| "tokens_per_second_per_gpu": 5555.69, | |
| "total_tokens": 5004546 | |
| }, | |
| { | |
| "epoch": 0.44621513944223107, | |
| "grad_norm": 2.703125, | |
| "learning_rate": 1.9858710185182355e-06, | |
| "loss": 1.2666, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 28, | |
| "tokens_per_second_per_gpu": 5321.57, | |
| "total_tokens": 5180916 | |
| }, | |
| { | |
| "epoch": 0.46215139442231074, | |
| "grad_norm": 2.5625, | |
| "learning_rate": 1.9825664732332882e-06, | |
| "loss": 1.207, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 29, | |
| "tokens_per_second_per_gpu": 5507.08, | |
| "total_tokens": 5362870 | |
| }, | |
| { | |
| "epoch": 0.47808764940239046, | |
| "grad_norm": 2.671875, | |
| "learning_rate": 1.9789183458976484e-06, | |
| "loss": 1.1904, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 30, | |
| "tokens_per_second_per_gpu": 5788.71, | |
| "total_tokens": 5548692 | |
| }, | |
| { | |
| "epoch": 0.4940239043824701, | |
| "grad_norm": 2.4375, | |
| "learning_rate": 1.9749279121818236e-06, | |
| "loss": 1.1865, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 31, | |
| "tokens_per_second_per_gpu": 5411.35, | |
| "total_tokens": 5734611 | |
| }, | |
| { | |
| "epoch": 0.5099601593625498, | |
| "grad_norm": 2.5, | |
| "learning_rate": 1.970596567453391e-06, | |
| "loss": 1.1953, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 32, | |
| "tokens_per_second_per_gpu": 5568.61, | |
| "total_tokens": 5915483 | |
| }, | |
| { | |
| "epoch": 0.5258964143426295, | |
| "grad_norm": 2.328125, | |
| "learning_rate": 1.965925826289068e-06, | |
| "loss": 1.1885, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 33, | |
| "tokens_per_second_per_gpu": 5527.28, | |
| "total_tokens": 6103887 | |
| }, | |
| { | |
| "epoch": 0.5418326693227091, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 1.9609173219450997e-06, | |
| "loss": 1.2578, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 34, | |
| "tokens_per_second_per_gpu": 5786.79, | |
| "total_tokens": 6291053 | |
| }, | |
| { | |
| "epoch": 0.5577689243027888, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 1.955572805786141e-06, | |
| "loss": 1.2656, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 35, | |
| "tokens_per_second_per_gpu": 5616.39, | |
| "total_tokens": 6476080 | |
| }, | |
| { | |
| "epoch": 0.5737051792828686, | |
| "grad_norm": 2.40625, | |
| "learning_rate": 1.9498941466728456e-06, | |
| "loss": 1.2285, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 36, | |
| "tokens_per_second_per_gpu": 5446.88, | |
| "total_tokens": 6654173 | |
| }, | |
| { | |
| "epoch": 0.5896414342629482, | |
| "grad_norm": 2.1875, | |
| "learning_rate": 1.9438833303083674e-06, | |
| "loss": 1.2314, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 37, | |
| "tokens_per_second_per_gpu": 5411.99, | |
| "total_tokens": 6838496 | |
| }, | |
| { | |
| "epoch": 0.6055776892430279, | |
| "grad_norm": 2.140625, | |
| "learning_rate": 1.937542458543999e-06, | |
| "loss": 1.1685, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 38, | |
| "tokens_per_second_per_gpu": 5285.57, | |
| "total_tokens": 7020101 | |
| }, | |
| { | |
| "epoch": 0.6215139442231076, | |
| "grad_norm": 2.09375, | |
| "learning_rate": 1.930873748644204e-06, | |
| "loss": 1.1489, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 39, | |
| "tokens_per_second_per_gpu": 5909.97, | |
| "total_tokens": 7209728 | |
| }, | |
| { | |
| "epoch": 0.6374501992031872, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 1.9238795325112867e-06, | |
| "loss": 1.1924, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 40, | |
| "tokens_per_second_per_gpu": 5668.7, | |
| "total_tokens": 7392542 | |
| }, | |
| { | |
| "epoch": 0.6533864541832669, | |
| "grad_norm": 2.1875, | |
| "learning_rate": 1.916562255869976e-06, | |
| "loss": 1.21, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 41, | |
| "tokens_per_second_per_gpu": 5187.81, | |
| "total_tokens": 7570380 | |
| }, | |
| { | |
| "epoch": 0.6693227091633466, | |
| "grad_norm": 2.03125, | |
| "learning_rate": 1.908924477412211e-06, | |
| "loss": 1.1787, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 42, | |
| "tokens_per_second_per_gpu": 5502.78, | |
| "total_tokens": 7753287 | |
| }, | |
| { | |
| "epoch": 0.6852589641434262, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 1.9009688679024189e-06, | |
| "loss": 1.1504, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 43, | |
| "tokens_per_second_per_gpu": 5664.86, | |
| "total_tokens": 7942863 | |
| }, | |
| { | |
| "epoch": 0.701195219123506, | |
| "grad_norm": 1.9921875, | |
| "learning_rate": 1.8926982092436114e-06, | |
| "loss": 1.2075, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 44, | |
| "tokens_per_second_per_gpu": 5493.62, | |
| "total_tokens": 8127398 | |
| }, | |
| { | |
| "epoch": 0.7171314741035857, | |
| "grad_norm": 1.9140625, | |
| "learning_rate": 1.8841153935046096e-06, | |
| "loss": 1.1484, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 45, | |
| "tokens_per_second_per_gpu": 5847.74, | |
| "total_tokens": 8312603 | |
| }, | |
| { | |
| "epoch": 0.7330677290836654, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 1.8752234219087537e-06, | |
| "loss": 1.167, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 46, | |
| "tokens_per_second_per_gpu": 5502.76, | |
| "total_tokens": 8497383 | |
| }, | |
| { | |
| "epoch": 0.749003984063745, | |
| "grad_norm": 1.8828125, | |
| "learning_rate": 1.8660254037844386e-06, | |
| "loss": 1.147, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 47, | |
| "tokens_per_second_per_gpu": 5615.32, | |
| "total_tokens": 8688166 | |
| }, | |
| { | |
| "epoch": 0.7649402390438247, | |
| "grad_norm": 1.9453125, | |
| "learning_rate": 1.8565245554778515e-06, | |
| "loss": 1.1992, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 48, | |
| "tokens_per_second_per_gpu": 5767.9, | |
| "total_tokens": 8874386 | |
| }, | |
| { | |
| "epoch": 0.7808764940239044, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 1.8467241992282841e-06, | |
| "loss": 1.1475, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 49, | |
| "tokens_per_second_per_gpu": 5789.02, | |
| "total_tokens": 9059229 | |
| }, | |
| { | |
| "epoch": 0.796812749003984, | |
| "grad_norm": 1.75, | |
| "learning_rate": 1.8366277620064197e-06, | |
| "loss": 1.0986, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 50, | |
| "tokens_per_second_per_gpu": 5192.74, | |
| "total_tokens": 9255532 | |
| }, | |
| { | |
| "epoch": 0.8127490039840638, | |
| "grad_norm": 1.7578125, | |
| "learning_rate": 1.8262387743159948e-06, | |
| "loss": 1.0908, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 51, | |
| "tokens_per_second_per_gpu": 5522.43, | |
| "total_tokens": 9446438 | |
| }, | |
| { | |
| "epoch": 0.8286852589641435, | |
| "grad_norm": 1.7109375, | |
| "learning_rate": 1.8155608689592601e-06, | |
| "loss": 1.1084, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 52, | |
| "tokens_per_second_per_gpu": 5467.0, | |
| "total_tokens": 9632510 | |
| }, | |
| { | |
| "epoch": 0.8446215139442231, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 1.8045977797666683e-06, | |
| "loss": 1.0781, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 53, | |
| "tokens_per_second_per_gpu": 5831.19, | |
| "total_tokens": 9824144 | |
| }, | |
| { | |
| "epoch": 0.8605577689243028, | |
| "grad_norm": 1.78125, | |
| "learning_rate": 1.7933533402912351e-06, | |
| "loss": 1.1533, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 54, | |
| "tokens_per_second_per_gpu": 5705.42, | |
| "total_tokens": 10010140 | |
| }, | |
| { | |
| "epoch": 0.8764940239043825, | |
| "grad_norm": 1.7734375, | |
| "learning_rate": 1.7818314824680298e-06, | |
| "loss": 1.1689, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 55, | |
| "tokens_per_second_per_gpu": 5696.79, | |
| "total_tokens": 10195855 | |
| }, | |
| { | |
| "epoch": 0.8924302788844621, | |
| "grad_norm": 1.6796875, | |
| "learning_rate": 1.770036235239263e-06, | |
| "loss": 1.1182, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 56, | |
| "tokens_per_second_per_gpu": 6045.85, | |
| "total_tokens": 10381589 | |
| }, | |
| { | |
| "epoch": 0.9083665338645418, | |
| "grad_norm": 1.6875, | |
| "learning_rate": 1.7579717231454529e-06, | |
| "loss": 1.1738, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 57, | |
| "tokens_per_second_per_gpu": 5658.53, | |
| "total_tokens": 10573779 | |
| }, | |
| { | |
| "epoch": 0.9243027888446215, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 1.7456421648831654e-06, | |
| "loss": 1.1553, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 58, | |
| "tokens_per_second_per_gpu": 5601.69, | |
| "total_tokens": 10762764 | |
| }, | |
| { | |
| "epoch": 0.9402390438247012, | |
| "grad_norm": 1.7109375, | |
| "learning_rate": 1.733051871829826e-06, | |
| "loss": 1.1416, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 59, | |
| "tokens_per_second_per_gpu": 5873.82, | |
| "total_tokens": 10948310 | |
| }, | |
| { | |
| "epoch": 0.9561752988047809, | |
| "grad_norm": 1.5859375, | |
| "learning_rate": 1.7202052465361266e-06, | |
| "loss": 1.1514, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 60, | |
| "tokens_per_second_per_gpu": 5552.06, | |
| "total_tokens": 11134379 | |
| }, | |
| { | |
| "epoch": 0.9721115537848606, | |
| "grad_norm": 1.5859375, | |
| "learning_rate": 1.7071067811865474e-06, | |
| "loss": 1.1243, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 61, | |
| "tokens_per_second_per_gpu": 5016.43, | |
| "total_tokens": 11320025 | |
| }, | |
| { | |
| "epoch": 0.9880478087649402, | |
| "grad_norm": 1.4921875, | |
| "learning_rate": 1.6937610560285416e-06, | |
| "loss": 1.0957, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 62, | |
| "tokens_per_second_per_gpu": 5350.32, | |
| "total_tokens": 11504140 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 1.6801727377709191e-06, | |
| "loss": 1.1582, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 63, | |
| "tokens_per_second_per_gpu": 4173.22, | |
| "total_tokens": 11627371 | |
| }, | |
| { | |
| "epoch": 1.0159362549800797, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 1.6663465779520037e-06, | |
| "loss": 1.0864, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 64, | |
| "tokens_per_second_per_gpu": 5530.73, | |
| "total_tokens": 11815331 | |
| }, | |
| { | |
| "epoch": 1.0318725099601593, | |
| "grad_norm": 1.6015625, | |
| "learning_rate": 1.6522874112781212e-06, | |
| "loss": 1.1445, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 65, | |
| "tokens_per_second_per_gpu": 5941.29, | |
| "total_tokens": 12007378 | |
| }, | |
| { | |
| "epoch": 1.047808764940239, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 1.6380001539330085e-06, | |
| "loss": 1.209, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 66, | |
| "tokens_per_second_per_gpu": 5598.11, | |
| "total_tokens": 12193950 | |
| }, | |
| { | |
| "epoch": 1.0637450199203187, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 1.6234898018587336e-06, | |
| "loss": 1.1421, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 67, | |
| "tokens_per_second_per_gpu": 5671.24, | |
| "total_tokens": 12382049 | |
| }, | |
| { | |
| "epoch": 1.0796812749003983, | |
| "grad_norm": 1.5, | |
| "learning_rate": 1.6087614290087205e-06, | |
| "loss": 1.1323, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 68, | |
| "tokens_per_second_per_gpu": 5627.89, | |
| "total_tokens": 12562455 | |
| }, | |
| { | |
| "epoch": 1.095617529880478, | |
| "grad_norm": 1.5859375, | |
| "learning_rate": 1.5938201855735014e-06, | |
| "loss": 1.2207, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 69, | |
| "tokens_per_second_per_gpu": 5368.43, | |
| "total_tokens": 12741408 | |
| }, | |
| { | |
| "epoch": 1.1115537848605577, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 1.578671296179806e-06, | |
| "loss": 1.0659, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 70, | |
| "tokens_per_second_per_gpu": 5466.98, | |
| "total_tokens": 12929546 | |
| }, | |
| { | |
| "epoch": 1.1274900398406373, | |
| "grad_norm": 1.421875, | |
| "learning_rate": 1.563320058063622e-06, | |
| "loss": 1.1416, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 71, | |
| "tokens_per_second_per_gpu": 5509.23, | |
| "total_tokens": 13110713 | |
| }, | |
| { | |
| "epoch": 1.1434262948207172, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 1.5477718392178713e-06, | |
| "loss": 1.1318, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 72, | |
| "tokens_per_second_per_gpu": 5553.33, | |
| "total_tokens": 13294703 | |
| }, | |
| { | |
| "epoch": 1.159362549800797, | |
| "grad_norm": 1.484375, | |
| "learning_rate": 1.5320320765153365e-06, | |
| "loss": 1.2129, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 73, | |
| "tokens_per_second_per_gpu": 5457.5, | |
| "total_tokens": 13476728 | |
| }, | |
| { | |
| "epoch": 1.1752988047808766, | |
| "grad_norm": 1.5, | |
| "learning_rate": 1.5161062738075065e-06, | |
| "loss": 1.2109, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 74, | |
| "tokens_per_second_per_gpu": 5229.94, | |
| "total_tokens": 13654224 | |
| }, | |
| { | |
| "epoch": 1.1912350597609562, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 1.5e-06, | |
| "loss": 1.1475, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 75, | |
| "tokens_per_second_per_gpu": 5856.14, | |
| "total_tokens": 13840656 | |
| }, | |
| { | |
| "epoch": 1.207171314741036, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 1.4837188871052397e-06, | |
| "loss": 1.1494, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 76, | |
| "tokens_per_second_per_gpu": 5621.93, | |
| "total_tokens": 14027447 | |
| }, | |
| { | |
| "epoch": 1.2231075697211156, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 1.467268628273062e-06, | |
| "loss": 1.1133, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 77, | |
| "tokens_per_second_per_gpu": 5678.9, | |
| "total_tokens": 14220483 | |
| }, | |
| { | |
| "epoch": 1.2390438247011952, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 1.4506549757999453e-06, | |
| "loss": 1.1182, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 78, | |
| "tokens_per_second_per_gpu": 5610.92, | |
| "total_tokens": 14409698 | |
| }, | |
| { | |
| "epoch": 1.254980079681275, | |
| "grad_norm": 1.4296875, | |
| "learning_rate": 1.433883739117558e-06, | |
| "loss": 1.1582, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 79, | |
| "tokens_per_second_per_gpu": 5624.06, | |
| "total_tokens": 14594318 | |
| }, | |
| { | |
| "epoch": 1.2709163346613546, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 1.4169607827613282e-06, | |
| "loss": 1.0762, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 80, | |
| "tokens_per_second_per_gpu": 6081.02, | |
| "total_tokens": 14788534 | |
| }, | |
| { | |
| "epoch": 1.2868525896414342, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 1.3998920243197408e-06, | |
| "loss": 1.1372, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 81, | |
| "tokens_per_second_per_gpu": 5511.47, | |
| "total_tokens": 14970672 | |
| }, | |
| { | |
| "epoch": 1.302788844621514, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 1.3826834323650898e-06, | |
| "loss": 1.1167, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 82, | |
| "tokens_per_second_per_gpu": 5614.12, | |
| "total_tokens": 15154574 | |
| }, | |
| { | |
| "epoch": 1.3187250996015936, | |
| "grad_norm": 1.390625, | |
| "learning_rate": 1.3653410243663951e-06, | |
| "loss": 1.1455, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 83, | |
| "tokens_per_second_per_gpu": 5438.76, | |
| "total_tokens": 15337599 | |
| }, | |
| { | |
| "epoch": 1.3346613545816732, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 1.347870864585227e-06, | |
| "loss": 1.103, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 84, | |
| "tokens_per_second_per_gpu": 5795.88, | |
| "total_tokens": 15530461 | |
| }, | |
| { | |
| "epoch": 1.3505976095617531, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 1.3302790619551672e-06, | |
| "loss": 1.1162, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 85, | |
| "tokens_per_second_per_gpu": 5723.72, | |
| "total_tokens": 15712584 | |
| }, | |
| { | |
| "epoch": 1.3665338645418328, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 1.3125717679456444e-06, | |
| "loss": 1.0786, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 86, | |
| "tokens_per_second_per_gpu": 5598.4, | |
| "total_tokens": 15899301 | |
| }, | |
| { | |
| "epoch": 1.3824701195219125, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 1.2947551744109043e-06, | |
| "loss": 1.082, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 87, | |
| "tokens_per_second_per_gpu": 5458.74, | |
| "total_tokens": 16085653 | |
| }, | |
| { | |
| "epoch": 1.3984063745019921, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 1.2768355114248492e-06, | |
| "loss": 1.1436, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 88, | |
| "tokens_per_second_per_gpu": 5579.84, | |
| "total_tokens": 16266291 | |
| }, | |
| { | |
| "epoch": 1.4143426294820718, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 1.2588190451025207e-06, | |
| "loss": 1.1914, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 89, | |
| "tokens_per_second_per_gpu": 5336.85, | |
| "total_tokens": 16450627 | |
| }, | |
| { | |
| "epoch": 1.4302788844621515, | |
| "grad_norm": 1.25, | |
| "learning_rate": 1.240712075408973e-06, | |
| "loss": 1.167, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 90, | |
| "tokens_per_second_per_gpu": 5445.98, | |
| "total_tokens": 16631917 | |
| }, | |
| { | |
| "epoch": 1.4462151394422311, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 1.2225209339563143e-06, | |
| "loss": 1.1895, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 91, | |
| "tokens_per_second_per_gpu": 5330.92, | |
| "total_tokens": 16808287 | |
| }, | |
| { | |
| "epoch": 1.4621513944223108, | |
| "grad_norm": 1.25, | |
| "learning_rate": 1.2042519817896804e-06, | |
| "loss": 1.1357, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 92, | |
| "tokens_per_second_per_gpu": 5515.76, | |
| "total_tokens": 16990241 | |
| }, | |
| { | |
| "epoch": 1.4780876494023905, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 1.1859116071629147e-06, | |
| "loss": 1.1187, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 93, | |
| "tokens_per_second_per_gpu": 5890.54, | |
| "total_tokens": 17176063 | |
| }, | |
| { | |
| "epoch": 1.4940239043824701, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 1.1675062233047363e-06, | |
| "loss": 1.1211, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 94, | |
| "tokens_per_second_per_gpu": 5413.95, | |
| "total_tokens": 17361982 | |
| }, | |
| { | |
| "epoch": 1.5099601593625498, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 1.1490422661761743e-06, | |
| "loss": 1.1294, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 95, | |
| "tokens_per_second_per_gpu": 5447.34, | |
| "total_tokens": 17542854 | |
| }, | |
| { | |
| "epoch": 1.5258964143426295, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 1.1305261922200517e-06, | |
| "loss": 1.125, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 96, | |
| "tokens_per_second_per_gpu": 5531.41, | |
| "total_tokens": 17731258 | |
| }, | |
| { | |
| "epoch": 1.5418326693227091, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 1.1119644761033077e-06, | |
| "loss": 1.1963, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 97, | |
| "tokens_per_second_per_gpu": 5812.79, | |
| "total_tokens": 17918424 | |
| }, | |
| { | |
| "epoch": 1.5577689243027888, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 1.0933636084529506e-06, | |
| "loss": 1.2041, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 98, | |
| "tokens_per_second_per_gpu": 5613.31, | |
| "total_tokens": 18103451 | |
| }, | |
| { | |
| "epoch": 1.5737051792828685, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 1.0747300935864243e-06, | |
| "loss": 1.168, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 99, | |
| "tokens_per_second_per_gpu": 5555.4, | |
| "total_tokens": 18281544 | |
| }, | |
| { | |
| "epoch": 1.5896414342629481, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 1.0560704472371917e-06, | |
| "loss": 1.1768, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 100, | |
| "tokens_per_second_per_gpu": 5413.52, | |
| "total_tokens": 18465867 | |
| }, | |
| { | |
| "epoch": 1.6055776892430278, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 1.037391194276326e-06, | |
| "loss": 1.1162, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 101, | |
| "tokens_per_second_per_gpu": 5209.56, | |
| "total_tokens": 18647472 | |
| }, | |
| { | |
| "epoch": 1.6215139442231075, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 1.0186988664309022e-06, | |
| "loss": 1.0986, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 102, | |
| "tokens_per_second_per_gpu": 5913.8, | |
| "total_tokens": 18837099 | |
| }, | |
| { | |
| "epoch": 1.6374501992031871, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 1e-06, | |
| "loss": 1.1392, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 103, | |
| "tokens_per_second_per_gpu": 5669.98, | |
| "total_tokens": 19019913 | |
| }, | |
| { | |
| "epoch": 1.6533864541832668, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 9.81301133569098e-07, | |
| "loss": 1.1582, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 104, | |
| "tokens_per_second_per_gpu": 5370.41, | |
| "total_tokens": 19197751 | |
| }, | |
| { | |
| "epoch": 1.6693227091633465, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 9.626088057236744e-07, | |
| "loss": 1.1318, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 105, | |
| "tokens_per_second_per_gpu": 5391.77, | |
| "total_tokens": 19380658 | |
| }, | |
| { | |
| "epoch": 1.6852589641434261, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 9.43929552762808e-07, | |
| "loss": 1.1084, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 106, | |
| "tokens_per_second_per_gpu": 5564.12, | |
| "total_tokens": 19570234 | |
| }, | |
| { | |
| "epoch": 1.701195219123506, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 9.252699064135758e-07, | |
| "loss": 1.1616, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 107, | |
| "tokens_per_second_per_gpu": 5485.18, | |
| "total_tokens": 19754769 | |
| }, | |
| { | |
| "epoch": 1.7171314741035857, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 9.066363915470494e-07, | |
| "loss": 1.106, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 108, | |
| "tokens_per_second_per_gpu": 5839.07, | |
| "total_tokens": 19939974 | |
| }, | |
| { | |
| "epoch": 1.7330677290836654, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 8.880355238966921e-07, | |
| "loss": 1.125, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 109, | |
| "tokens_per_second_per_gpu": 5601.59, | |
| "total_tokens": 20124754 | |
| }, | |
| { | |
| "epoch": 1.749003984063745, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 8.694738077799486e-07, | |
| "loss": 1.1084, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 110, | |
| "tokens_per_second_per_gpu": 5601.31, | |
| "total_tokens": 20315537 | |
| }, | |
| { | |
| "epoch": 1.7649402390438247, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 8.509577338238254e-07, | |
| "loss": 1.1602, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 111, | |
| "tokens_per_second_per_gpu": 5767.0, | |
| "total_tokens": 20501757 | |
| }, | |
| { | |
| "epoch": 1.7808764940239044, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 8.324937766952636e-07, | |
| "loss": 1.1094, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 112, | |
| "tokens_per_second_per_gpu": 5679.05, | |
| "total_tokens": 20686600 | |
| }, | |
| { | |
| "epoch": 1.796812749003984, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 8.140883928370854e-07, | |
| "loss": 1.0659, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 113, | |
| "tokens_per_second_per_gpu": 5176.17, | |
| "total_tokens": 20882903 | |
| }, | |
| { | |
| "epoch": 1.812749003984064, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 7.957480182103197e-07, | |
| "loss": 1.0562, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 114, | |
| "tokens_per_second_per_gpu": 5609.67, | |
| "total_tokens": 21073809 | |
| }, | |
| { | |
| "epoch": 1.8286852589641436, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 7.774790660436857e-07, | |
| "loss": 1.0747, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 115, | |
| "tokens_per_second_per_gpu": 5480.8, | |
| "total_tokens": 21259881 | |
| }, | |
| { | |
| "epoch": 1.8446215139442232, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 7.592879245910272e-07, | |
| "loss": 1.0459, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 116, | |
| "tokens_per_second_per_gpu": 5854.74, | |
| "total_tokens": 21451515 | |
| }, | |
| { | |
| "epoch": 1.860557768924303, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 7.411809548974791e-07, | |
| "loss": 1.1191, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 117, | |
| "tokens_per_second_per_gpu": 5607.88, | |
| "total_tokens": 21637511 | |
| }, | |
| { | |
| "epoch": 1.8764940239043826, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 7.231644885751507e-07, | |
| "loss": 1.1377, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 118, | |
| "tokens_per_second_per_gpu": 5697.72, | |
| "total_tokens": 21823226 | |
| }, | |
| { | |
| "epoch": 1.8924302788844622, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 7.052448255890957e-07, | |
| "loss": 1.0869, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 119, | |
| "tokens_per_second_per_gpu": 6034.6, | |
| "total_tokens": 22008960 | |
| }, | |
| { | |
| "epoch": 1.908366533864542, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 6.874282320543556e-07, | |
| "loss": 1.144, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 120, | |
| "tokens_per_second_per_gpu": 5668.36, | |
| "total_tokens": 22201150 | |
| }, | |
| { | |
| "epoch": 1.9243027888446216, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 6.697209380448332e-07, | |
| "loss": 1.1255, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 121, | |
| "tokens_per_second_per_gpu": 5720.74, | |
| "total_tokens": 22390135 | |
| }, | |
| { | |
| "epoch": 1.9402390438247012, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 6.521291354147726e-07, | |
| "loss": 1.1104, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 122, | |
| "tokens_per_second_per_gpu": 5870.28, | |
| "total_tokens": 22575681 | |
| }, | |
| { | |
| "epoch": 1.956175298804781, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 6.34658975633605e-07, | |
| "loss": 1.1221, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 123, | |
| "tokens_per_second_per_gpu": 5453.27, | |
| "total_tokens": 22761750 | |
| }, | |
| { | |
| "epoch": 1.9721115537848606, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 6.173165676349102e-07, | |
| "loss": 1.0967, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 124, | |
| "tokens_per_second_per_gpu": 4989.67, | |
| "total_tokens": 22947396 | |
| }, | |
| { | |
| "epoch": 1.9880478087649402, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 6.001079756802592e-07, | |
| "loss": 1.0703, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 125, | |
| "tokens_per_second_per_gpu": 5385.24, | |
| "total_tokens": 23131511 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 5.830392172386722e-07, | |
| "loss": 1.1328, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 126, | |
| "tokens_per_second_per_gpu": 4540.27, | |
| "total_tokens": 23254742 | |
| }, | |
| { | |
| "epoch": 2.0159362549800797, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 5.661162608824419e-07, | |
| "loss": 1.061, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 127, | |
| "tokens_per_second_per_gpu": 5523.13, | |
| "total_tokens": 23442702 | |
| }, | |
| { | |
| "epoch": 2.0318725099601593, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 5.493450242000546e-07, | |
| "loss": 1.1201, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 128, | |
| "tokens_per_second_per_gpu": 5699.3, | |
| "total_tokens": 23634749 | |
| }, | |
| { | |
| "epoch": 2.047808764940239, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 5.327313717269379e-07, | |
| "loss": 1.1875, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 129, | |
| "tokens_per_second_per_gpu": 5607.25, | |
| "total_tokens": 23821321 | |
| }, | |
| { | |
| "epoch": 2.0637450199203187, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 5.162811128947602e-07, | |
| "loss": 1.1191, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 130, | |
| "tokens_per_second_per_gpu": 5781.65, | |
| "total_tokens": 24009420 | |
| }, | |
| { | |
| "epoch": 2.0796812749003983, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 5.000000000000002e-07, | |
| "loss": 1.1104, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 131, | |
| "tokens_per_second_per_gpu": 5505.55, | |
| "total_tokens": 24189826 | |
| }, | |
| { | |
| "epoch": 2.095617529880478, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 4.838937261924933e-07, | |
| "loss": 1.1973, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 132, | |
| "tokens_per_second_per_gpu": 5512.31, | |
| "total_tokens": 24368779 | |
| }, | |
| { | |
| "epoch": 2.1115537848605577, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.6796792348466353e-07, | |
| "loss": 1.0444, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 133, | |
| "tokens_per_second_per_gpu": 5465.08, | |
| "total_tokens": 24556917 | |
| }, | |
| { | |
| "epoch": 2.1274900398406373, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.522281607821288e-07, | |
| "loss": 1.1206, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 134, | |
| "tokens_per_second_per_gpu": 5419.8, | |
| "total_tokens": 24738084 | |
| }, | |
| { | |
| "epoch": 2.143426294820717, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.366799419363779e-07, | |
| "loss": 1.1143, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 135, | |
| "tokens_per_second_per_gpu": 5539.41, | |
| "total_tokens": 24922074 | |
| }, | |
| { | |
| "epoch": 2.1593625498007967, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.2132870382019427e-07, | |
| "loss": 1.1924, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 136, | |
| "tokens_per_second_per_gpu": 5444.42, | |
| "total_tokens": 25104099 | |
| }, | |
| { | |
| "epoch": 2.1752988047808763, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.061798144264985e-07, | |
| "loss": 1.1885, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 137, | |
| "tokens_per_second_per_gpu": 5332.72, | |
| "total_tokens": 25281595 | |
| }, | |
| { | |
| "epoch": 2.191235059760956, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 3.912385709912793e-07, | |
| "loss": 1.1318, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 138, | |
| "tokens_per_second_per_gpu": 5857.46, | |
| "total_tokens": 25468027 | |
| }, | |
| { | |
| "epoch": 2.2071713147410357, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 3.765101981412665e-07, | |
| "loss": 1.1328, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 139, | |
| "tokens_per_second_per_gpu": 5613.35, | |
| "total_tokens": 25654818 | |
| }, | |
| { | |
| "epoch": 2.2231075697211153, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.6199984606699153e-07, | |
| "loss": 1.0981, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 140, | |
| "tokens_per_second_per_gpu": 5586.6, | |
| "total_tokens": 25847854 | |
| }, | |
| { | |
| "epoch": 2.239043824701195, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.477125887218791e-07, | |
| "loss": 1.105, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 141, | |
| "tokens_per_second_per_gpu": 5723.95, | |
| "total_tokens": 26037069 | |
| }, | |
| { | |
| "epoch": 2.2549800796812747, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 3.3365342204799606e-07, | |
| "loss": 1.1416, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 142, | |
| "tokens_per_second_per_gpu": 5609.66, | |
| "total_tokens": 26221689 | |
| }, | |
| { | |
| "epoch": 2.2709163346613543, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 3.198272622290804e-07, | |
| "loss": 1.0625, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 143, | |
| "tokens_per_second_per_gpu": 6065.87, | |
| "total_tokens": 26415905 | |
| }, | |
| { | |
| "epoch": 2.2868525896414345, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 3.0623894397145833e-07, | |
| "loss": 1.123, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 144, | |
| "tokens_per_second_per_gpu": 5513.26, | |
| "total_tokens": 26598043 | |
| }, | |
| { | |
| "epoch": 2.302788844621514, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 2.9289321881345254e-07, | |
| "loss": 1.103, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 145, | |
| "tokens_per_second_per_gpu": 5505.93, | |
| "total_tokens": 26781945 | |
| }, | |
| { | |
| "epoch": 2.318725099601594, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 2.797947534638736e-07, | |
| "loss": 1.1348, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 146, | |
| "tokens_per_second_per_gpu": 5529.92, | |
| "total_tokens": 26964970 | |
| }, | |
| { | |
| "epoch": 2.3346613545816735, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 2.6694812817017387e-07, | |
| "loss": 1.0938, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 147, | |
| "tokens_per_second_per_gpu": 5703.33, | |
| "total_tokens": 27157832 | |
| }, | |
| { | |
| "epoch": 2.350597609561753, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 2.543578351168344e-07, | |
| "loss": 1.1045, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 148, | |
| "tokens_per_second_per_gpu": 5833.69, | |
| "total_tokens": 27339955 | |
| }, | |
| { | |
| "epoch": 2.366533864541833, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 2.4202827685454687e-07, | |
| "loss": 1.0674, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 149, | |
| "tokens_per_second_per_gpu": 5586.43, | |
| "total_tokens": 27526672 | |
| }, | |
| { | |
| "epoch": 2.3824701195219125, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 2.299637647607372e-07, | |
| "loss": 1.0728, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 150, | |
| "tokens_per_second_per_gpu": 5369.71, | |
| "total_tokens": 27713024 | |
| }, | |
| { | |
| "epoch": 2.398406374501992, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 2.181685175319702e-07, | |
| "loss": 1.1318, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 151, | |
| "tokens_per_second_per_gpu": 5560.0, | |
| "total_tokens": 27893662 | |
| }, | |
| { | |
| "epoch": 2.414342629482072, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 2.0664665970876495e-07, | |
| "loss": 1.1807, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 152, | |
| "tokens_per_second_per_gpu": 5346.34, | |
| "total_tokens": 28077998 | |
| }, | |
| { | |
| "epoch": 2.4302788844621515, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 1.9540222023333163e-07, | |
| "loss": 1.1572, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 153, | |
| "tokens_per_second_per_gpu": 5554.36, | |
| "total_tokens": 28259288 | |
| }, | |
| { | |
| "epoch": 2.446215139442231, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 1.8443913104073982e-07, | |
| "loss": 1.1807, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 154, | |
| "tokens_per_second_per_gpu": 5321.03, | |
| "total_tokens": 28435658 | |
| }, | |
| { | |
| "epoch": 2.462151394422311, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 1.737612256840053e-07, | |
| "loss": 1.127, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 155, | |
| "tokens_per_second_per_gpu": 5503.74, | |
| "total_tokens": 28617612 | |
| }, | |
| { | |
| "epoch": 2.4780876494023905, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 1.6337223799358024e-07, | |
| "loss": 1.1099, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 156, | |
| "tokens_per_second_per_gpu": 5737.94, | |
| "total_tokens": 28803434 | |
| }, | |
| { | |
| "epoch": 2.49402390438247, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 1.5327580077171588e-07, | |
| "loss": 1.1143, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 157, | |
| "tokens_per_second_per_gpu": 5419.26, | |
| "total_tokens": 28989353 | |
| }, | |
| { | |
| "epoch": 2.50996015936255, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 1.4347544452214867e-07, | |
| "loss": 1.1216, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 158, | |
| "tokens_per_second_per_gpu": 5564.03, | |
| "total_tokens": 29170225 | |
| }, | |
| { | |
| "epoch": 2.5258964143426295, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.3397459621556128e-07, | |
| "loss": 1.1182, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 159, | |
| "tokens_per_second_per_gpu": 5533.6, | |
| "total_tokens": 29358629 | |
| }, | |
| { | |
| "epoch": 2.541832669322709, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 1.2477657809124632e-07, | |
| "loss": 1.1895, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 160, | |
| "tokens_per_second_per_gpu": 5801.01, | |
| "total_tokens": 29545795 | |
| }, | |
| { | |
| "epoch": 2.557768924302789, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 1.1588460649539034e-07, | |
| "loss": 1.1973, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 161, | |
| "tokens_per_second_per_gpu": 5496.74, | |
| "total_tokens": 29730822 | |
| }, | |
| { | |
| "epoch": 2.5737051792828685, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 1.0730179075638868e-07, | |
| "loss": 1.1611, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 162, | |
| "tokens_per_second_per_gpu": 5661.86, | |
| "total_tokens": 29908915 | |
| }, | |
| { | |
| "epoch": 2.589641434262948, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 9.903113209758096e-08, | |
| "loss": 1.1709, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 163, | |
| "tokens_per_second_per_gpu": 5334.67, | |
| "total_tokens": 30093238 | |
| }, | |
| { | |
| "epoch": 2.605577689243028, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 9.107552258778905e-08, | |
| "loss": 1.1099, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 164, | |
| "tokens_per_second_per_gpu": 5276.84, | |
| "total_tokens": 30274843 | |
| }, | |
| { | |
| "epoch": 2.6215139442231075, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 8.34377441300238e-08, | |
| "loss": 1.0933, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 165, | |
| "tokens_per_second_per_gpu": 5920.44, | |
| "total_tokens": 30464470 | |
| }, | |
| { | |
| "epoch": 2.637450199203187, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 7.612046748871326e-08, | |
| "loss": 1.1348, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 166, | |
| "tokens_per_second_per_gpu": 5562.69, | |
| "total_tokens": 30647284 | |
| }, | |
| { | |
| "epoch": 2.653386454183267, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 6.912625135579586e-08, | |
| "loss": 1.1553, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 167, | |
| "tokens_per_second_per_gpu": 5381.44, | |
| "total_tokens": 30825122 | |
| }, | |
| { | |
| "epoch": 2.6693227091633465, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 6.245754145600091e-08, | |
| "loss": 1.126, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 168, | |
| "tokens_per_second_per_gpu": 5392.77, | |
| "total_tokens": 31008029 | |
| }, | |
| { | |
| "epoch": 2.685258964143426, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 5.611666969163242e-08, | |
| "loss": 1.1025, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 169, | |
| "tokens_per_second_per_gpu": 5636.12, | |
| "total_tokens": 31197605 | |
| }, | |
| { | |
| "epoch": 2.7011952191235062, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 5.0105853327154004e-08, | |
| "loss": 1.1572, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 170, | |
| "tokens_per_second_per_gpu": 5442.72, | |
| "total_tokens": 31382140 | |
| }, | |
| { | |
| "epoch": 2.717131474103586, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.442719421385921e-08, | |
| "loss": 1.1011, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 171, | |
| "tokens_per_second_per_gpu": 5829.11, | |
| "total_tokens": 31567345 | |
| }, | |
| { | |
| "epoch": 2.7330677290836656, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.908267805490051e-08, | |
| "loss": 1.123, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 172, | |
| "tokens_per_second_per_gpu": 5509.97, | |
| "total_tokens": 31752125 | |
| }, | |
| { | |
| "epoch": 2.7490039840637452, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 3.4074173710931796e-08, | |
| "loss": 1.105, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 173, | |
| "tokens_per_second_per_gpu": 5609.04, | |
| "total_tokens": 31942908 | |
| }, | |
| { | |
| "epoch": 2.764940239043825, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 2.9403432546609043e-08, | |
| "loss": 1.1533, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 174, | |
| "tokens_per_second_per_gpu": 5753.07, | |
| "total_tokens": 32129128 | |
| }, | |
| { | |
| "epoch": 2.7808764940239046, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 2.507208781817638e-08, | |
| "loss": 1.1074, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 175, | |
| "tokens_per_second_per_gpu": 5763.07, | |
| "total_tokens": 32313971 | |
| }, | |
| { | |
| "epoch": 2.7968127490039842, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 2.1081654102351632e-08, | |
| "loss": 1.0635, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 176, | |
| "tokens_per_second_per_gpu": 5200.76, | |
| "total_tokens": 32510274 | |
| }, | |
| { | |
| "epoch": 2.812749003984064, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 1.7433526766711725e-08, | |
| "loss": 1.0547, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 177, | |
| "tokens_per_second_per_gpu": 5511.76, | |
| "total_tokens": 32701180 | |
| }, | |
| { | |
| "epoch": 2.8286852589641436, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 1.4128981481764113e-08, | |
| "loss": 1.0728, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 178, | |
| "tokens_per_second_per_gpu": 5453.23, | |
| "total_tokens": 32887252 | |
| }, | |
| { | |
| "epoch": 2.8446215139442232, | |
| "grad_norm": 1.0, | |
| "learning_rate": 1.1169173774871477e-08, | |
| "loss": 1.0454, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 179, | |
| "tokens_per_second_per_gpu": 5831.37, | |
| "total_tokens": 33078886 | |
| }, | |
| { | |
| "epoch": 2.860557768924303, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 8.555138626189618e-09, | |
| "loss": 1.1182, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 180, | |
| "tokens_per_second_per_gpu": 5712.45, | |
| "total_tokens": 33264882 | |
| }, | |
| { | |
| "epoch": 2.8764940239043826, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 6.2877901067573955e-09, | |
| "loss": 1.1357, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 181, | |
| "tokens_per_second_per_gpu": 5709.15, | |
| "total_tokens": 33450597 | |
| }, | |
| { | |
| "epoch": 2.8924302788844622, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.367921058866186e-09, | |
| "loss": 1.085, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 182, | |
| "tokens_per_second_per_gpu": 5923.67, | |
| "total_tokens": 33636331 | |
| }, | |
| { | |
| "epoch": 2.908366533864542, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 2.7962028188198706e-09, | |
| "loss": 1.1416, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 183, | |
| "tokens_per_second_per_gpu": 5643.13, | |
| "total_tokens": 33828521 | |
| }, | |
| { | |
| "epoch": 2.9243027888446216, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.5731849821833953e-09, | |
| "loss": 1.1255, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 184, | |
| "tokens_per_second_per_gpu": 5703.08, | |
| "total_tokens": 34017506 | |
| }, | |
| { | |
| "epoch": 2.9402390438247012, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 6.992952116013917e-10, | |
| "loss": 1.1094, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 185, | |
| "tokens_per_second_per_gpu": 5872.3, | |
| "total_tokens": 34203052 | |
| }, | |
| { | |
| "epoch": 2.956175298804781, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 1.7483908725357543e-10, | |
| "loss": 1.1221, | |
| "memory/device_reserved (GiB)": 77.63, | |
| "memory/max_active (GiB)": 65.77, | |
| "memory/max_allocated (GiB)": 65.77, | |
| "step": 186, | |
| "tokens_per_second_per_gpu": 5556.53, | |
| "total_tokens": 34389121 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 186, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 62, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.3756016575819284e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |