| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.956175298804781, |
| "eval_steps": 500, |
| "global_step": 186, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01593625498007968, |
| "grad_norm": 4.5625, |
| "learning_rate": 0.0, |
| "loss": 1.4053, |
| "memory/device_reserved (GiB)": 61.34, |
| "memory/max_active (GiB)": 49.6, |
| "memory/max_allocated (GiB)": 49.6, |
| "step": 1, |
| "tokens_per_second_per_gpu": 4706.79, |
| "total_tokens": 180518 |
| }, |
| { |
| "epoch": 0.03187250996015936, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 1.3369, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 2, |
| "tokens_per_second_per_gpu": 5826.72, |
| "total_tokens": 363757 |
| }, |
| { |
| "epoch": 0.04780876494023904, |
| "grad_norm": 4.15625, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 1.3623, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 3, |
| "tokens_per_second_per_gpu": 5939.96, |
| "total_tokens": 558043 |
| }, |
| { |
| "epoch": 0.06374501992031872, |
| "grad_norm": 4.34375, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.3643, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 4, |
| "tokens_per_second_per_gpu": 5941.47, |
| "total_tokens": 743276 |
| }, |
| { |
| "epoch": 0.0796812749003984, |
| "grad_norm": 3.90625, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 1.2998, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 5, |
| "tokens_per_second_per_gpu": 5380.38, |
| "total_tokens": 929761 |
| }, |
| { |
| "epoch": 0.09561752988047809, |
| "grad_norm": 3.546875, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 1.3018, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 6, |
| "tokens_per_second_per_gpu": 5949.69, |
| "total_tokens": 1118316 |
| }, |
| { |
| "epoch": 0.11155378486055777, |
| "grad_norm": 3.171875, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.2793, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 7, |
| "tokens_per_second_per_gpu": 5785.23, |
| "total_tokens": 1301615 |
| }, |
| { |
| "epoch": 0.12749003984063745, |
| "grad_norm": 2.96875, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 1.3115, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 8, |
| "tokens_per_second_per_gpu": 5941.21, |
| "total_tokens": 1490474 |
| }, |
| { |
| "epoch": 0.14342629482071714, |
| "grad_norm": 2.296875, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 1.2588, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 9, |
| "tokens_per_second_per_gpu": 5534.49, |
| "total_tokens": 1667576 |
| }, |
| { |
| "epoch": 0.1593625498007968, |
| "grad_norm": 1.5625, |
| "learning_rate": 1e-05, |
| "loss": 1.1992, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 10, |
| "tokens_per_second_per_gpu": 6154.87, |
| "total_tokens": 1857807 |
| }, |
| { |
| "epoch": 0.1752988047808765, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.1111111111111113e-05, |
| "loss": 1.1436, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 11, |
| "tokens_per_second_per_gpu": 5715.6, |
| "total_tokens": 2041489 |
| }, |
| { |
| "epoch": 0.19123505976095617, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.2222222222222224e-05, |
| "loss": 1.2402, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 12, |
| "tokens_per_second_per_gpu": 5749.37, |
| "total_tokens": 2216014 |
| }, |
| { |
| "epoch": 0.20717131474103587, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 1.2051, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 13, |
| "tokens_per_second_per_gpu": 5748.94, |
| "total_tokens": 2397131 |
| }, |
| { |
| "epoch": 0.22310756972111553, |
| "grad_norm": 0.921875, |
| "learning_rate": 1.4444444444444446e-05, |
| "loss": 1.1211, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 14, |
| "tokens_per_second_per_gpu": 6171.1, |
| "total_tokens": 2590472 |
| }, |
| { |
| "epoch": 0.23904382470119523, |
| "grad_norm": 0.8984375, |
| "learning_rate": 1.555555555555556e-05, |
| "loss": 1.1777, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 15, |
| "tokens_per_second_per_gpu": 6160.5, |
| "total_tokens": 2780711 |
| }, |
| { |
| "epoch": 0.2549800796812749, |
| "grad_norm": 0.80078125, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 1.1025, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 16, |
| "tokens_per_second_per_gpu": 5706.58, |
| "total_tokens": 2968588 |
| }, |
| { |
| "epoch": 0.27091633466135456, |
| "grad_norm": 0.65234375, |
| "learning_rate": 1.7777777777777777e-05, |
| "loss": 1.2041, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 17, |
| "tokens_per_second_per_gpu": 5569.19, |
| "total_tokens": 3148691 |
| }, |
| { |
| "epoch": 0.2868525896414343, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.888888888888889e-05, |
| "loss": 1.168, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 18, |
| "tokens_per_second_per_gpu": 5894.91, |
| "total_tokens": 3332398 |
| }, |
| { |
| "epoch": 0.30278884462151395, |
| "grad_norm": 0.5625, |
| "learning_rate": 2e-05, |
| "loss": 1.0977, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 19, |
| "tokens_per_second_per_gpu": 6092.09, |
| "total_tokens": 3526610 |
| }, |
| { |
| "epoch": 0.3187250996015936, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.9998251609127465e-05, |
| "loss": 1.1372, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 20, |
| "tokens_per_second_per_gpu": 5971.19, |
| "total_tokens": 3711042 |
| }, |
| { |
| "epoch": 0.3346613545816733, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.9993007047883988e-05, |
| "loss": 1.0659, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 21, |
| "tokens_per_second_per_gpu": 5750.71, |
| "total_tokens": 3890841 |
| }, |
| { |
| "epoch": 0.350597609561753, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.998426815017817e-05, |
| "loss": 1.124, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 22, |
| "tokens_per_second_per_gpu": 5968.21, |
| "total_tokens": 4074024 |
| }, |
| { |
| "epoch": 0.3665338645418327, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.9972037971811802e-05, |
| "loss": 1.064, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 23, |
| "tokens_per_second_per_gpu": 5672.93, |
| "total_tokens": 4261426 |
| }, |
| { |
| "epoch": 0.38247011952191234, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.9956320789411338e-05, |
| "loss": 1.0977, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 24, |
| "tokens_per_second_per_gpu": 5947.63, |
| "total_tokens": 4448221 |
| }, |
| { |
| "epoch": 0.398406374501992, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.9937122098932428e-05, |
| "loss": 0.9438, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 25, |
| "tokens_per_second_per_gpu": 5830.3, |
| "total_tokens": 4643418 |
| }, |
| { |
| "epoch": 0.41434262948207173, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.9914448613738107e-05, |
| "loss": 1.0786, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 26, |
| "tokens_per_second_per_gpu": 5753.23, |
| "total_tokens": 4826564 |
| }, |
| { |
| "epoch": 0.4302788844621514, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.9888308262251286e-05, |
| "loss": 1.1084, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 27, |
| "tokens_per_second_per_gpu": 5786.21, |
| "total_tokens": 5008617 |
| }, |
| { |
| "epoch": 0.44621513944223107, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.985871018518236e-05, |
| "loss": 1.0488, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 28, |
| "tokens_per_second_per_gpu": 5935.98, |
| "total_tokens": 5194550 |
| }, |
| { |
| "epoch": 0.46215139442231074, |
| "grad_norm": 0.37109375, |
| "learning_rate": 1.9825664732332886e-05, |
| "loss": 1.0894, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 29, |
| "tokens_per_second_per_gpu": 5927.93, |
| "total_tokens": 5380376 |
| }, |
| { |
| "epoch": 0.47808764940239046, |
| "grad_norm": 0.35546875, |
| "learning_rate": 1.9789183458976485e-05, |
| "loss": 1.0869, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 30, |
| "tokens_per_second_per_gpu": 6097.05, |
| "total_tokens": 5567310 |
| }, |
| { |
| "epoch": 0.4940239043824701, |
| "grad_norm": 0.37109375, |
| "learning_rate": 1.9749279121818235e-05, |
| "loss": 1.0181, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 31, |
| "tokens_per_second_per_gpu": 6055.18, |
| "total_tokens": 5750982 |
| }, |
| { |
| "epoch": 0.5099601593625498, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.970596567453391e-05, |
| "loss": 1.0552, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 32, |
| "tokens_per_second_per_gpu": 6008.39, |
| "total_tokens": 5937332 |
| }, |
| { |
| "epoch": 0.5258964143426295, |
| "grad_norm": 0.376953125, |
| "learning_rate": 1.9659258262890683e-05, |
| "loss": 1.0439, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 33, |
| "tokens_per_second_per_gpu": 6030.07, |
| "total_tokens": 6120851 |
| }, |
| { |
| "epoch": 0.5418326693227091, |
| "grad_norm": 0.369140625, |
| "learning_rate": 1.9609173219450998e-05, |
| "loss": 1.0835, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 34, |
| "tokens_per_second_per_gpu": 5726.88, |
| "total_tokens": 6297402 |
| }, |
| { |
| "epoch": 0.5577689243027888, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.955572805786141e-05, |
| "loss": 1.1074, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 35, |
| "tokens_per_second_per_gpu": 5816.12, |
| "total_tokens": 6480316 |
| }, |
| { |
| "epoch": 0.5737051792828686, |
| "grad_norm": 0.357421875, |
| "learning_rate": 1.9498941466728462e-05, |
| "loss": 1.0391, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 36, |
| "tokens_per_second_per_gpu": 5765.66, |
| "total_tokens": 6665052 |
| }, |
| { |
| "epoch": 0.5896414342629482, |
| "grad_norm": 0.345703125, |
| "learning_rate": 1.9438833303083677e-05, |
| "loss": 1.0371, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 37, |
| "tokens_per_second_per_gpu": 5749.09, |
| "total_tokens": 6849283 |
| }, |
| { |
| "epoch": 0.6055776892430279, |
| "grad_norm": 0.34375, |
| "learning_rate": 1.9375424585439994e-05, |
| "loss": 1.0503, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 38, |
| "tokens_per_second_per_gpu": 5927.3, |
| "total_tokens": 7032513 |
| }, |
| { |
| "epoch": 0.6215139442231076, |
| "grad_norm": 0.330078125, |
| "learning_rate": 1.9308737486442045e-05, |
| "loss": 1.0479, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 39, |
| "tokens_per_second_per_gpu": 5905.92, |
| "total_tokens": 7214561 |
| }, |
| { |
| "epoch": 0.6374501992031872, |
| "grad_norm": 0.3359375, |
| "learning_rate": 1.9238795325112867e-05, |
| "loss": 1.0098, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 40, |
| "tokens_per_second_per_gpu": 5853.3, |
| "total_tokens": 7400854 |
| }, |
| { |
| "epoch": 0.6533864541832669, |
| "grad_norm": 0.36328125, |
| "learning_rate": 1.9165622558699763e-05, |
| "loss": 1.106, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 41, |
| "tokens_per_second_per_gpu": 5556.45, |
| "total_tokens": 7577263 |
| }, |
| { |
| "epoch": 0.6693227091633466, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.908924477412211e-05, |
| "loss": 1.0498, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 42, |
| "tokens_per_second_per_gpu": 5928.26, |
| "total_tokens": 7763586 |
| }, |
| { |
| "epoch": 0.6852589641434262, |
| "grad_norm": 0.32421875, |
| "learning_rate": 1.900968867902419e-05, |
| "loss": 1.0171, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 43, |
| "tokens_per_second_per_gpu": 6102.36, |
| "total_tokens": 7953595 |
| }, |
| { |
| "epoch": 0.701195219123506, |
| "grad_norm": 0.36328125, |
| "learning_rate": 1.8926982092436117e-05, |
| "loss": 1.0688, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 44, |
| "tokens_per_second_per_gpu": 6058.46, |
| "total_tokens": 8135608 |
| }, |
| { |
| "epoch": 0.7171314741035857, |
| "grad_norm": 0.359375, |
| "learning_rate": 1.8841153935046098e-05, |
| "loss": 0.978, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 45, |
| "tokens_per_second_per_gpu": 5806.45, |
| "total_tokens": 8328038 |
| }, |
| { |
| "epoch": 0.7330677290836654, |
| "grad_norm": 0.333984375, |
| "learning_rate": 1.8752234219087538e-05, |
| "loss": 1.0435, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 46, |
| "tokens_per_second_per_gpu": 5940.85, |
| "total_tokens": 8517629 |
| }, |
| { |
| "epoch": 0.749003984063745, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.866025403784439e-05, |
| "loss": 1.0317, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 47, |
| "tokens_per_second_per_gpu": 5929.8, |
| "total_tokens": 8700619 |
| }, |
| { |
| "epoch": 0.7649402390438247, |
| "grad_norm": 0.328125, |
| "learning_rate": 1.8565245554778516e-05, |
| "loss": 0.9819, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 48, |
| "tokens_per_second_per_gpu": 5991.89, |
| "total_tokens": 8886726 |
| }, |
| { |
| "epoch": 0.7808764940239044, |
| "grad_norm": 0.34765625, |
| "learning_rate": 1.8467241992282842e-05, |
| "loss": 1.0396, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 49, |
| "tokens_per_second_per_gpu": 5941.59, |
| "total_tokens": 9074210 |
| }, |
| { |
| "epoch": 0.796812749003984, |
| "grad_norm": 0.345703125, |
| "learning_rate": 1.83662776200642e-05, |
| "loss": 1.0703, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 50, |
| "tokens_per_second_per_gpu": 5856.71, |
| "total_tokens": 9253264 |
| }, |
| { |
| "epoch": 0.8127490039840638, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.826238774315995e-05, |
| "loss": 1.0078, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 51, |
| "tokens_per_second_per_gpu": 5883.97, |
| "total_tokens": 9437019 |
| }, |
| { |
| "epoch": 0.8286852589641435, |
| "grad_norm": 0.326171875, |
| "learning_rate": 1.8155608689592604e-05, |
| "loss": 1.0352, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 52, |
| "tokens_per_second_per_gpu": 6284.45, |
| "total_tokens": 9624777 |
| }, |
| { |
| "epoch": 0.8446215139442231, |
| "grad_norm": 0.34375, |
| "learning_rate": 1.8045977797666685e-05, |
| "loss": 1.0015, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 53, |
| "tokens_per_second_per_gpu": 6227.78, |
| "total_tokens": 9816093 |
| }, |
| { |
| "epoch": 0.8605577689243028, |
| "grad_norm": 0.32421875, |
| "learning_rate": 1.7933533402912354e-05, |
| "loss": 1.0205, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 54, |
| "tokens_per_second_per_gpu": 5562.75, |
| "total_tokens": 10003875 |
| }, |
| { |
| "epoch": 0.8764940239043825, |
| "grad_norm": 0.3125, |
| "learning_rate": 1.78183148246803e-05, |
| "loss": 0.9985, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 55, |
| "tokens_per_second_per_gpu": 6029.45, |
| "total_tokens": 10195261 |
| }, |
| { |
| "epoch": 0.8924302788844621, |
| "grad_norm": 0.328125, |
| "learning_rate": 1.7700362352392632e-05, |
| "loss": 1.0151, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 56, |
| "tokens_per_second_per_gpu": 5824.93, |
| "total_tokens": 10378607 |
| }, |
| { |
| "epoch": 0.9083665338645418, |
| "grad_norm": 0.345703125, |
| "learning_rate": 1.757971723145453e-05, |
| "loss": 1.0737, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 57, |
| "tokens_per_second_per_gpu": 5758.69, |
| "total_tokens": 10565102 |
| }, |
| { |
| "epoch": 0.9243027888446215, |
| "grad_norm": 0.330078125, |
| "learning_rate": 1.7456421648831658e-05, |
| "loss": 1.0444, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 58, |
| "tokens_per_second_per_gpu": 5699.09, |
| "total_tokens": 10743645 |
| }, |
| { |
| "epoch": 0.9402390438247012, |
| "grad_norm": 0.337890625, |
| "learning_rate": 1.7330518718298263e-05, |
| "loss": 0.998, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 59, |
| "tokens_per_second_per_gpu": 5772.72, |
| "total_tokens": 10926325 |
| }, |
| { |
| "epoch": 0.9561752988047809, |
| "grad_norm": 0.361328125, |
| "learning_rate": 1.7202052465361268e-05, |
| "loss": 1.0659, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 60, |
| "tokens_per_second_per_gpu": 5781.99, |
| "total_tokens": 11105741 |
| }, |
| { |
| "epoch": 0.9721115537848606, |
| "grad_norm": 0.326171875, |
| "learning_rate": 1.7071067811865477e-05, |
| "loss": 1.0024, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 61, |
| "tokens_per_second_per_gpu": 5416.25, |
| "total_tokens": 11283752 |
| }, |
| { |
| "epoch": 0.9880478087649402, |
| "grad_norm": 0.314453125, |
| "learning_rate": 1.693761056028542e-05, |
| "loss": 0.9429, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 62, |
| "tokens_per_second_per_gpu": 6080.81, |
| "total_tokens": 11476891 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.6801727377709195e-05, |
| "loss": 0.8979, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 63, |
| "tokens_per_second_per_gpu": 4586.33, |
| "total_tokens": 11600559 |
| }, |
| { |
| "epoch": 1.0159362549800797, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.6663465779520042e-05, |
| "loss": 1.0391, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 64, |
| "tokens_per_second_per_gpu": 5765.65, |
| "total_tokens": 11781077 |
| }, |
| { |
| "epoch": 1.0318725099601593, |
| "grad_norm": 0.328125, |
| "learning_rate": 1.6522874112781213e-05, |
| "loss": 0.9893, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 65, |
| "tokens_per_second_per_gpu": 5812.65, |
| "total_tokens": 11964316 |
| }, |
| { |
| "epoch": 1.047808764940239, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.6380001539330088e-05, |
| "loss": 1.019, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 66, |
| "tokens_per_second_per_gpu": 5958.35, |
| "total_tokens": 12158602 |
| }, |
| { |
| "epoch": 1.0637450199203187, |
| "grad_norm": 0.318359375, |
| "learning_rate": 1.6234898018587336e-05, |
| "loss": 1.0098, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 67, |
| "tokens_per_second_per_gpu": 5947.9, |
| "total_tokens": 12343835 |
| }, |
| { |
| "epoch": 1.0796812749003983, |
| "grad_norm": 0.31640625, |
| "learning_rate": 1.608761429008721e-05, |
| "loss": 0.959, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 68, |
| "tokens_per_second_per_gpu": 5410.16, |
| "total_tokens": 12530320 |
| }, |
| { |
| "epoch": 1.095617529880478, |
| "grad_norm": 0.337890625, |
| "learning_rate": 1.5938201855735017e-05, |
| "loss": 0.998, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 69, |
| "tokens_per_second_per_gpu": 5950.0, |
| "total_tokens": 12718875 |
| }, |
| { |
| "epoch": 1.1115537848605577, |
| "grad_norm": 0.31640625, |
| "learning_rate": 1.578671296179806e-05, |
| "loss": 0.9834, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 70, |
| "tokens_per_second_per_gpu": 5806.97, |
| "total_tokens": 12902174 |
| }, |
| { |
| "epoch": 1.1274900398406373, |
| "grad_norm": 0.322265625, |
| "learning_rate": 1.563320058063622e-05, |
| "loss": 1.02, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 71, |
| "tokens_per_second_per_gpu": 5964.93, |
| "total_tokens": 13091033 |
| }, |
| { |
| "epoch": 1.1434262948207172, |
| "grad_norm": 0.328125, |
| "learning_rate": 1.5477718392178716e-05, |
| "loss": 1.001, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 72, |
| "tokens_per_second_per_gpu": 5543.95, |
| "total_tokens": 13268135 |
| }, |
| { |
| "epoch": 1.159362549800797, |
| "grad_norm": 0.322265625, |
| "learning_rate": 1.5320320765153367e-05, |
| "loss": 0.9868, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 73, |
| "tokens_per_second_per_gpu": 6161.46, |
| "total_tokens": 13458366 |
| }, |
| { |
| "epoch": 1.1752988047808766, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.5161062738075068e-05, |
| "loss": 0.9404, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 74, |
| "tokens_per_second_per_gpu": 5781.1, |
| "total_tokens": 13642048 |
| }, |
| { |
| "epoch": 1.1912350597609562, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 1.0273, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 75, |
| "tokens_per_second_per_gpu": 5754.51, |
| "total_tokens": 13816573 |
| }, |
| { |
| "epoch": 1.207171314741036, |
| "grad_norm": 0.376953125, |
| "learning_rate": 1.4837188871052399e-05, |
| "loss": 0.999, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 76, |
| "tokens_per_second_per_gpu": 5745.16, |
| "total_tokens": 13997690 |
| }, |
| { |
| "epoch": 1.2231075697211156, |
| "grad_norm": 0.30859375, |
| "learning_rate": 1.4672686282730622e-05, |
| "loss": 0.9365, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 77, |
| "tokens_per_second_per_gpu": 6187.18, |
| "total_tokens": 14191031 |
| }, |
| { |
| "epoch": 1.2390438247011952, |
| "grad_norm": 0.310546875, |
| "learning_rate": 1.4506549757999456e-05, |
| "loss": 0.9932, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 78, |
| "tokens_per_second_per_gpu": 6189.26, |
| "total_tokens": 14381270 |
| }, |
| { |
| "epoch": 1.254980079681275, |
| "grad_norm": 0.361328125, |
| "learning_rate": 1.4338837391175582e-05, |
| "loss": 0.9253, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 79, |
| "tokens_per_second_per_gpu": 5694.92, |
| "total_tokens": 14569147 |
| }, |
| { |
| "epoch": 1.2709163346613546, |
| "grad_norm": 0.349609375, |
| "learning_rate": 1.4169607827613284e-05, |
| "loss": 1.0249, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 80, |
| "tokens_per_second_per_gpu": 5574.13, |
| "total_tokens": 14749250 |
| }, |
| { |
| "epoch": 1.2868525896414342, |
| "grad_norm": 0.33984375, |
| "learning_rate": 1.3998920243197408e-05, |
| "loss": 1.0044, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 81, |
| "tokens_per_second_per_gpu": 5892.86, |
| "total_tokens": 14932957 |
| }, |
| { |
| "epoch": 1.302788844621514, |
| "grad_norm": 0.31640625, |
| "learning_rate": 1.3826834323650899e-05, |
| "loss": 0.9443, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 82, |
| "tokens_per_second_per_gpu": 6084.92, |
| "total_tokens": 15127169 |
| }, |
| { |
| "epoch": 1.3187250996015936, |
| "grad_norm": 0.328125, |
| "learning_rate": 1.3653410243663953e-05, |
| "loss": 0.9878, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 83, |
| "tokens_per_second_per_gpu": 5984.25, |
| "total_tokens": 15311601 |
| }, |
| { |
| "epoch": 1.3346613545816732, |
| "grad_norm": 0.32421875, |
| "learning_rate": 1.3478708645852272e-05, |
| "loss": 0.9248, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 84, |
| "tokens_per_second_per_gpu": 5744.73, |
| "total_tokens": 15491400 |
| }, |
| { |
| "epoch": 1.3505976095617531, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.3302790619551673e-05, |
| "loss": 0.9824, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 85, |
| "tokens_per_second_per_gpu": 6009.5, |
| "total_tokens": 15674583 |
| }, |
| { |
| "epoch": 1.3665338645418328, |
| "grad_norm": 0.314453125, |
| "learning_rate": 1.3125717679456447e-05, |
| "loss": 0.9404, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 86, |
| "tokens_per_second_per_gpu": 5690.82, |
| "total_tokens": 15861985 |
| }, |
| { |
| "epoch": 1.3824701195219125, |
| "grad_norm": 0.34765625, |
| "learning_rate": 1.2947551744109044e-05, |
| "loss": 0.9731, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 87, |
| "tokens_per_second_per_gpu": 5962.66, |
| "total_tokens": 16048780 |
| }, |
| { |
| "epoch": 1.3984063745019921, |
| "grad_norm": 0.318359375, |
| "learning_rate": 1.2768355114248493e-05, |
| "loss": 0.8406, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 88, |
| "tokens_per_second_per_gpu": 5796.13, |
| "total_tokens": 16243977 |
| }, |
| { |
| "epoch": 1.4143426294820718, |
| "grad_norm": 0.337890625, |
| "learning_rate": 1.2588190451025209e-05, |
| "loss": 0.9692, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 89, |
| "tokens_per_second_per_gpu": 5748.01, |
| "total_tokens": 16427123 |
| }, |
| { |
| "epoch": 1.4302788844621515, |
| "grad_norm": 0.345703125, |
| "learning_rate": 1.2407120754089733e-05, |
| "loss": 0.998, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 90, |
| "tokens_per_second_per_gpu": 5897.24, |
| "total_tokens": 16609176 |
| }, |
| { |
| "epoch": 1.4462151394422311, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.2225209339563144e-05, |
| "loss": 0.9507, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 91, |
| "tokens_per_second_per_gpu": 5936.81, |
| "total_tokens": 16795109 |
| }, |
| { |
| "epoch": 1.4621513944223108, |
| "grad_norm": 0.328125, |
| "learning_rate": 1.2042519817896805e-05, |
| "loss": 0.9912, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 92, |
| "tokens_per_second_per_gpu": 5949.43, |
| "total_tokens": 16980935 |
| }, |
| { |
| "epoch": 1.4780876494023905, |
| "grad_norm": 0.333984375, |
| "learning_rate": 1.1859116071629148e-05, |
| "loss": 0.9888, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 93, |
| "tokens_per_second_per_gpu": 6095.07, |
| "total_tokens": 17167869 |
| }, |
| { |
| "epoch": 1.4940239043824701, |
| "grad_norm": 0.322265625, |
| "learning_rate": 1.1675062233047365e-05, |
| "loss": 0.9219, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 94, |
| "tokens_per_second_per_gpu": 6067.77, |
| "total_tokens": 17351541 |
| }, |
| { |
| "epoch": 1.5099601593625498, |
| "grad_norm": 0.3828125, |
| "learning_rate": 1.1490422661761744e-05, |
| "loss": 0.9648, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 95, |
| "tokens_per_second_per_gpu": 6008.67, |
| "total_tokens": 17537891 |
| }, |
| { |
| "epoch": 1.5258964143426295, |
| "grad_norm": 0.328125, |
| "learning_rate": 1.130526192220052e-05, |
| "loss": 0.9556, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 96, |
| "tokens_per_second_per_gpu": 5955.34, |
| "total_tokens": 17721410 |
| }, |
| { |
| "epoch": 1.5418326693227091, |
| "grad_norm": 0.3359375, |
| "learning_rate": 1.1119644761033079e-05, |
| "loss": 0.9951, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 97, |
| "tokens_per_second_per_gpu": 5732.24, |
| "total_tokens": 17897961 |
| }, |
| { |
| "epoch": 1.5577689243027888, |
| "grad_norm": 0.330078125, |
| "learning_rate": 1.0933636084529507e-05, |
| "loss": 1.02, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 98, |
| "tokens_per_second_per_gpu": 5813.99, |
| "total_tokens": 18080875 |
| }, |
| { |
| "epoch": 1.5737051792828685, |
| "grad_norm": 0.330078125, |
| "learning_rate": 1.0747300935864245e-05, |
| "loss": 0.958, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 99, |
| "tokens_per_second_per_gpu": 5769.52, |
| "total_tokens": 18265611 |
| }, |
| { |
| "epoch": 1.5896414342629481, |
| "grad_norm": 0.326171875, |
| "learning_rate": 1.0560704472371919e-05, |
| "loss": 0.9561, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 100, |
| "tokens_per_second_per_gpu": 5726.38, |
| "total_tokens": 18449842 |
| }, |
| { |
| "epoch": 1.6055776892430278, |
| "grad_norm": 0.326171875, |
| "learning_rate": 1.037391194276326e-05, |
| "loss": 0.9707, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 101, |
| "tokens_per_second_per_gpu": 5944.93, |
| "total_tokens": 18633072 |
| }, |
| { |
| "epoch": 1.6215139442231075, |
| "grad_norm": 0.3203125, |
| "learning_rate": 1.0186988664309023e-05, |
| "loss": 0.9707, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 102, |
| "tokens_per_second_per_gpu": 5897.0, |
| "total_tokens": 18815120 |
| }, |
| { |
| "epoch": 1.6374501992031871, |
| "grad_norm": 0.328125, |
| "learning_rate": 1e-05, |
| "loss": 0.9385, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 103, |
| "tokens_per_second_per_gpu": 5841.82, |
| "total_tokens": 19001413 |
| }, |
| { |
| "epoch": 1.6533864541832668, |
| "grad_norm": 0.341796875, |
| "learning_rate": 9.81301133569098e-06, |
| "loss": 1.0303, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 104, |
| "tokens_per_second_per_gpu": 5567.16, |
| "total_tokens": 19177822 |
| }, |
| { |
| "epoch": 1.6693227091633465, |
| "grad_norm": 0.330078125, |
| "learning_rate": 9.626088057236745e-06, |
| "loss": 0.9814, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 105, |
| "tokens_per_second_per_gpu": 5850.82, |
| "total_tokens": 19364145 |
| }, |
| { |
| "epoch": 1.6852589641434261, |
| "grad_norm": 0.31640625, |
| "learning_rate": 9.439295527628083e-06, |
| "loss": 0.9531, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 106, |
| "tokens_per_second_per_gpu": 6142.09, |
| "total_tokens": 19554154 |
| }, |
| { |
| "epoch": 1.701195219123506, |
| "grad_norm": 0.333984375, |
| "learning_rate": 9.252699064135759e-06, |
| "loss": 0.998, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 107, |
| "tokens_per_second_per_gpu": 6059.46, |
| "total_tokens": 19736167 |
| }, |
| { |
| "epoch": 1.7171314741035857, |
| "grad_norm": 0.33203125, |
| "learning_rate": 9.066363915470494e-06, |
| "loss": 0.9204, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 108, |
| "tokens_per_second_per_gpu": 5807.52, |
| "total_tokens": 19928597 |
| }, |
| { |
| "epoch": 1.7330677290836654, |
| "grad_norm": 0.3515625, |
| "learning_rate": 8.880355238966923e-06, |
| "loss": 0.978, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 109, |
| "tokens_per_second_per_gpu": 5987.33, |
| "total_tokens": 20118188 |
| }, |
| { |
| "epoch": 1.749003984063745, |
| "grad_norm": 0.3359375, |
| "learning_rate": 8.694738077799487e-06, |
| "loss": 0.9702, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 110, |
| "tokens_per_second_per_gpu": 5888.18, |
| "total_tokens": 20301178 |
| }, |
| { |
| "epoch": 1.7649402390438247, |
| "grad_norm": 0.357421875, |
| "learning_rate": 8.509577338238255e-06, |
| "loss": 0.9253, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 111, |
| "tokens_per_second_per_gpu": 5972.68, |
| "total_tokens": 20487285 |
| }, |
| { |
| "epoch": 1.7808764940239044, |
| "grad_norm": 0.337890625, |
| "learning_rate": 8.324937766952638e-06, |
| "loss": 0.9814, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 112, |
| "tokens_per_second_per_gpu": 5932.16, |
| "total_tokens": 20674769 |
| }, |
| { |
| "epoch": 1.796812749003984, |
| "grad_norm": 0.341796875, |
| "learning_rate": 8.140883928370855e-06, |
| "loss": 1.0088, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 113, |
| "tokens_per_second_per_gpu": 5830.81, |
| "total_tokens": 20853823 |
| }, |
| { |
| "epoch": 1.812749003984064, |
| "grad_norm": 0.322265625, |
| "learning_rate": 7.957480182103198e-06, |
| "loss": 0.9487, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 114, |
| "tokens_per_second_per_gpu": 5865.22, |
| "total_tokens": 21037578 |
| }, |
| { |
| "epoch": 1.8286852589641436, |
| "grad_norm": 0.328125, |
| "learning_rate": 7.774790660436857e-06, |
| "loss": 0.9819, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 115, |
| "tokens_per_second_per_gpu": 6252.84, |
| "total_tokens": 21225336 |
| }, |
| { |
| "epoch": 1.8446215139442232, |
| "grad_norm": 0.33203125, |
| "learning_rate": 7.592879245910273e-06, |
| "loss": 0.9482, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 116, |
| "tokens_per_second_per_gpu": 6223.23, |
| "total_tokens": 21416652 |
| }, |
| { |
| "epoch": 1.860557768924303, |
| "grad_norm": 0.322265625, |
| "learning_rate": 7.411809548974792e-06, |
| "loss": 0.9697, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 117, |
| "tokens_per_second_per_gpu": 5561.76, |
| "total_tokens": 21604434 |
| }, |
| { |
| "epoch": 1.8764940239043826, |
| "grad_norm": 0.30859375, |
| "learning_rate": 7.2316448857515076e-06, |
| "loss": 0.9468, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 118, |
| "tokens_per_second_per_gpu": 6026.88, |
| "total_tokens": 21795820 |
| }, |
| { |
| "epoch": 1.8924302788844622, |
| "grad_norm": 0.32421875, |
| "learning_rate": 7.052448255890958e-06, |
| "loss": 0.9624, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 119, |
| "tokens_per_second_per_gpu": 5817.1, |
| "total_tokens": 21979166 |
| }, |
| { |
| "epoch": 1.908366533864542, |
| "grad_norm": 0.33984375, |
| "learning_rate": 6.874282320543557e-06, |
| "loss": 1.022, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 120, |
| "tokens_per_second_per_gpu": 5653.71, |
| "total_tokens": 22165661 |
| }, |
| { |
| "epoch": 1.9243027888446216, |
| "grad_norm": 0.32421875, |
| "learning_rate": 6.697209380448333e-06, |
| "loss": 0.9961, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 121, |
| "tokens_per_second_per_gpu": 5699.1, |
| "total_tokens": 22344204 |
| }, |
| { |
| "epoch": 1.9402390438247012, |
| "grad_norm": 0.33203125, |
| "learning_rate": 6.521291354147727e-06, |
| "loss": 0.9521, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 122, |
| "tokens_per_second_per_gpu": 5765.79, |
| "total_tokens": 22526884 |
| }, |
| { |
| "epoch": 1.956175298804781, |
| "grad_norm": 0.349609375, |
| "learning_rate": 6.34658975633605e-06, |
| "loss": 1.0171, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 123, |
| "tokens_per_second_per_gpu": 5780.38, |
| "total_tokens": 22706300 |
| }, |
| { |
| "epoch": 1.9721115537848606, |
| "grad_norm": 0.318359375, |
| "learning_rate": 6.173165676349103e-06, |
| "loss": 0.957, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 124, |
| "tokens_per_second_per_gpu": 5399.07, |
| "total_tokens": 22884311 |
| }, |
| { |
| "epoch": 1.9880478087649402, |
| "grad_norm": 0.357421875, |
| "learning_rate": 6.001079756802592e-06, |
| "loss": 0.9028, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 125, |
| "tokens_per_second_per_gpu": 5850.46, |
| "total_tokens": 23077450 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.39453125, |
| "learning_rate": 5.830392172386723e-06, |
| "loss": 0.8589, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 126, |
| "tokens_per_second_per_gpu": 4564.2, |
| "total_tokens": 23201118 |
| }, |
| { |
| "epoch": 2.0159362549800797, |
| "grad_norm": 0.32421875, |
| "learning_rate": 5.66116260882442e-06, |
| "loss": 0.9985, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 127, |
| "tokens_per_second_per_gpu": 5832.26, |
| "total_tokens": 23381636 |
| }, |
| { |
| "epoch": 2.0318725099601593, |
| "grad_norm": 0.328125, |
| "learning_rate": 5.493450242000546e-06, |
| "loss": 0.9521, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 128, |
| "tokens_per_second_per_gpu": 5774.6, |
| "total_tokens": 23564875 |
| }, |
| { |
| "epoch": 2.047808764940239, |
| "grad_norm": 0.328125, |
| "learning_rate": 5.32731371726938e-06, |
| "loss": 0.98, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 129, |
| "tokens_per_second_per_gpu": 5972.38, |
| "total_tokens": 23759161 |
| }, |
| { |
| "epoch": 2.0637450199203187, |
| "grad_norm": 0.328125, |
| "learning_rate": 5.1628111289476025e-06, |
| "loss": 0.9746, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 130, |
| "tokens_per_second_per_gpu": 5919.54, |
| "total_tokens": 23944394 |
| }, |
| { |
| "epoch": 2.0796812749003983, |
| "grad_norm": 0.31640625, |
| "learning_rate": 5.000000000000003e-06, |
| "loss": 0.9229, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 131, |
| "tokens_per_second_per_gpu": 5414.05, |
| "total_tokens": 24130879 |
| }, |
| { |
| "epoch": 2.095617529880478, |
| "grad_norm": 0.33203125, |
| "learning_rate": 4.838937261924933e-06, |
| "loss": 0.9639, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 132, |
| "tokens_per_second_per_gpu": 5968.88, |
| "total_tokens": 24319434 |
| }, |
| { |
| "epoch": 2.1115537848605577, |
| "grad_norm": 0.31640625, |
| "learning_rate": 4.679679234846636e-06, |
| "loss": 0.9502, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 133, |
| "tokens_per_second_per_gpu": 5802.86, |
| "total_tokens": 24502733 |
| }, |
| { |
| "epoch": 2.1274900398406373, |
| "grad_norm": 0.318359375, |
| "learning_rate": 4.522281607821288e-06, |
| "loss": 0.9854, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 134, |
| "tokens_per_second_per_gpu": 5970.96, |
| "total_tokens": 24691592 |
| }, |
| { |
| "epoch": 2.143426294820717, |
| "grad_norm": 0.373046875, |
| "learning_rate": 4.3667994193637794e-06, |
| "loss": 0.9683, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 135, |
| "tokens_per_second_per_gpu": 5528.1, |
| "total_tokens": 24868694 |
| }, |
| { |
| "epoch": 2.1593625498007967, |
| "grad_norm": 0.318359375, |
| "learning_rate": 4.213287038201943e-06, |
| "loss": 0.9561, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 136, |
| "tokens_per_second_per_gpu": 6105.7, |
| "total_tokens": 25058925 |
| }, |
| { |
| "epoch": 2.1752988047808763, |
| "grad_norm": 0.322265625, |
| "learning_rate": 4.061798144264986e-06, |
| "loss": 0.9116, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 137, |
| "tokens_per_second_per_gpu": 5771.48, |
| "total_tokens": 25242607 |
| }, |
| { |
| "epoch": 2.191235059760956, |
| "grad_norm": 0.3359375, |
| "learning_rate": 3.912385709912794e-06, |
| "loss": 0.9966, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 138, |
| "tokens_per_second_per_gpu": 5723.02, |
| "total_tokens": 25417132 |
| }, |
| { |
| "epoch": 2.2071713147410357, |
| "grad_norm": 0.318359375, |
| "learning_rate": 3.7651019814126656e-06, |
| "loss": 0.9712, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 139, |
| "tokens_per_second_per_gpu": 5739.14, |
| "total_tokens": 25598249 |
| }, |
| { |
| "epoch": 2.2231075697211153, |
| "grad_norm": 0.306640625, |
| "learning_rate": 3.619998460669916e-06, |
| "loss": 0.9106, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 140, |
| "tokens_per_second_per_gpu": 6168.5, |
| "total_tokens": 25791590 |
| }, |
| { |
| "epoch": 2.239043824701195, |
| "grad_norm": 0.31640625, |
| "learning_rate": 3.4771258872187917e-06, |
| "loss": 0.9673, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 141, |
| "tokens_per_second_per_gpu": 6156.05, |
| "total_tokens": 25981829 |
| }, |
| { |
| "epoch": 2.2549800796812747, |
| "grad_norm": 0.33203125, |
| "learning_rate": 3.3365342204799613e-06, |
| "loss": 0.9019, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 142, |
| "tokens_per_second_per_gpu": 5766.33, |
| "total_tokens": 26169706 |
| }, |
| { |
| "epoch": 2.2709163346613543, |
| "grad_norm": 0.50390625, |
| "learning_rate": 3.1982726222908046e-06, |
| "loss": 0.9995, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 143, |
| "tokens_per_second_per_gpu": 5566.19, |
| "total_tokens": 26349809 |
| }, |
| { |
| "epoch": 2.2868525896414345, |
| "grad_norm": 0.359375, |
| "learning_rate": 3.0623894397145837e-06, |
| "loss": 0.9805, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 144, |
| "tokens_per_second_per_gpu": 5897.52, |
| "total_tokens": 26533516 |
| }, |
| { |
| "epoch": 2.302788844621514, |
| "grad_norm": 0.375, |
| "learning_rate": 2.9289321881345257e-06, |
| "loss": 0.9219, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 145, |
| "tokens_per_second_per_gpu": 6065.68, |
| "total_tokens": 26727728 |
| }, |
| { |
| "epoch": 2.318725099601594, |
| "grad_norm": 0.3359375, |
| "learning_rate": 2.7979475346387363e-06, |
| "loss": 0.9639, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 146, |
| "tokens_per_second_per_gpu": 5976.74, |
| "total_tokens": 26912160 |
| }, |
| { |
| "epoch": 2.3346613545816735, |
| "grad_norm": 0.34765625, |
| "learning_rate": 2.669481281701739e-06, |
| "loss": 0.9038, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 147, |
| "tokens_per_second_per_gpu": 5726.82, |
| "total_tokens": 27091959 |
| }, |
| { |
| "epoch": 2.350597609561753, |
| "grad_norm": 0.341796875, |
| "learning_rate": 2.5435783511683444e-06, |
| "loss": 0.9614, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 148, |
| "tokens_per_second_per_gpu": 5973.36, |
| "total_tokens": 27275142 |
| }, |
| { |
| "epoch": 2.366533864541833, |
| "grad_norm": 0.33203125, |
| "learning_rate": 2.420282768545469e-06, |
| "loss": 0.9219, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 149, |
| "tokens_per_second_per_gpu": 5654.39, |
| "total_tokens": 27462544 |
| }, |
| { |
| "epoch": 2.3824701195219125, |
| "grad_norm": 0.322265625, |
| "learning_rate": 2.2996376476073724e-06, |
| "loss": 0.9526, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 150, |
| "tokens_per_second_per_gpu": 5952.85, |
| "total_tokens": 27649339 |
| }, |
| { |
| "epoch": 2.398406374501992, |
| "grad_norm": 0.3203125, |
| "learning_rate": 2.1816851753197023e-06, |
| "loss": 0.8235, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 151, |
| "tokens_per_second_per_gpu": 5837.21, |
| "total_tokens": 27844536 |
| }, |
| { |
| "epoch": 2.414342629482072, |
| "grad_norm": 0.333984375, |
| "learning_rate": 2.0664665970876496e-06, |
| "loss": 0.9521, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 152, |
| "tokens_per_second_per_gpu": 5754.33, |
| "total_tokens": 28027682 |
| }, |
| { |
| "epoch": 2.4302788844621515, |
| "grad_norm": 0.326171875, |
| "learning_rate": 1.9540222023333165e-06, |
| "loss": 0.9805, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 153, |
| "tokens_per_second_per_gpu": 5902.39, |
| "total_tokens": 28209735 |
| }, |
| { |
| "epoch": 2.446215139442231, |
| "grad_norm": 0.32421875, |
| "learning_rate": 1.8443913104073984e-06, |
| "loss": 0.9321, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 154, |
| "tokens_per_second_per_gpu": 5930.45, |
| "total_tokens": 28395668 |
| }, |
| { |
| "epoch": 2.462151394422311, |
| "grad_norm": 0.3203125, |
| "learning_rate": 1.7376122568400533e-06, |
| "loss": 0.9756, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 155, |
| "tokens_per_second_per_gpu": 5945.66, |
| "total_tokens": 28581494 |
| }, |
| { |
| "epoch": 2.4780876494023905, |
| "grad_norm": 0.322265625, |
| "learning_rate": 1.6337223799358025e-06, |
| "loss": 0.9736, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 156, |
| "tokens_per_second_per_gpu": 6107.11, |
| "total_tokens": 28768428 |
| }, |
| { |
| "epoch": 2.49402390438247, |
| "grad_norm": 0.31640625, |
| "learning_rate": 1.5327580077171589e-06, |
| "loss": 0.9067, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 157, |
| "tokens_per_second_per_gpu": 6059.61, |
| "total_tokens": 28952100 |
| }, |
| { |
| "epoch": 2.50996015936255, |
| "grad_norm": 0.326171875, |
| "learning_rate": 1.4347544452214869e-06, |
| "loss": 0.9512, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 158, |
| "tokens_per_second_per_gpu": 6010.47, |
| "total_tokens": 29138450 |
| }, |
| { |
| "epoch": 2.5258964143426295, |
| "grad_norm": 0.376953125, |
| "learning_rate": 1.339745962155613e-06, |
| "loss": 0.9409, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 159, |
| "tokens_per_second_per_gpu": 6038.89, |
| "total_tokens": 29321969 |
| }, |
| { |
| "epoch": 2.541832669322709, |
| "grad_norm": 0.330078125, |
| "learning_rate": 1.2477657809124632e-06, |
| "loss": 0.9824, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 160, |
| "tokens_per_second_per_gpu": 5740.71, |
| "total_tokens": 29498520 |
| }, |
| { |
| "epoch": 2.557768924302789, |
| "grad_norm": 0.328125, |
| "learning_rate": 1.1588460649539036e-06, |
| "loss": 1.0068, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 161, |
| "tokens_per_second_per_gpu": 5804.04, |
| "total_tokens": 29681434 |
| }, |
| { |
| "epoch": 2.5737051792828685, |
| "grad_norm": 0.337890625, |
| "learning_rate": 1.073017907563887e-06, |
| "loss": 0.9453, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 162, |
| "tokens_per_second_per_gpu": 5741.44, |
| "total_tokens": 29866170 |
| }, |
| { |
| "epoch": 2.589641434262948, |
| "grad_norm": 0.3203125, |
| "learning_rate": 9.903113209758098e-07, |
| "loss": 0.9443, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 163, |
| "tokens_per_second_per_gpu": 5756.27, |
| "total_tokens": 30050401 |
| }, |
| { |
| "epoch": 2.605577689243028, |
| "grad_norm": 0.32421875, |
| "learning_rate": 9.107552258778907e-07, |
| "loss": 0.9585, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 164, |
| "tokens_per_second_per_gpu": 5959.88, |
| "total_tokens": 30233631 |
| }, |
| { |
| "epoch": 2.6215139442231075, |
| "grad_norm": 0.3203125, |
| "learning_rate": 8.343774413002382e-07, |
| "loss": 0.9604, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 165, |
| "tokens_per_second_per_gpu": 5899.57, |
| "total_tokens": 30415679 |
| }, |
| { |
| "epoch": 2.637450199203187, |
| "grad_norm": 0.326171875, |
| "learning_rate": 7.612046748871327e-07, |
| "loss": 0.9277, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 166, |
| "tokens_per_second_per_gpu": 5845.3, |
| "total_tokens": 30601972 |
| }, |
| { |
| "epoch": 2.653386454183267, |
| "grad_norm": 0.33984375, |
| "learning_rate": 6.912625135579587e-07, |
| "loss": 1.022, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 167, |
| "tokens_per_second_per_gpu": 5574.11, |
| "total_tokens": 30778381 |
| }, |
| { |
| "epoch": 2.6693227091633465, |
| "grad_norm": 0.31640625, |
| "learning_rate": 6.245754145600091e-07, |
| "loss": 0.9707, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 168, |
| "tokens_per_second_per_gpu": 5930.11, |
| "total_tokens": 30964704 |
| }, |
| { |
| "epoch": 2.685258964143426, |
| "grad_norm": 0.318359375, |
| "learning_rate": 5.611666969163243e-07, |
| "loss": 0.9448, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 169, |
| "tokens_per_second_per_gpu": 6128.63, |
| "total_tokens": 31154713 |
| }, |
| { |
| "epoch": 2.7011952191235062, |
| "grad_norm": 0.333984375, |
| "learning_rate": 5.010585332715401e-07, |
| "loss": 0.9883, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 170, |
| "tokens_per_second_per_gpu": 6064.77, |
| "total_tokens": 31336726 |
| }, |
| { |
| "epoch": 2.717131474103586, |
| "grad_norm": 0.328125, |
| "learning_rate": 4.4427194213859216e-07, |
| "loss": 0.9131, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 171, |
| "tokens_per_second_per_gpu": 5798.36, |
| "total_tokens": 31529156 |
| }, |
| { |
| "epoch": 2.7330677290836656, |
| "grad_norm": 0.318359375, |
| "learning_rate": 3.908267805490051e-07, |
| "loss": 0.9697, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 172, |
| "tokens_per_second_per_gpu": 5977.29, |
| "total_tokens": 31718747 |
| }, |
| { |
| "epoch": 2.7490039840637452, |
| "grad_norm": 0.328125, |
| "learning_rate": 3.4074173710931804e-07, |
| "loss": 0.9619, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 173, |
| "tokens_per_second_per_gpu": 5934.4, |
| "total_tokens": 31901737 |
| }, |
| { |
| "epoch": 2.764940239043825, |
| "grad_norm": 0.322265625, |
| "learning_rate": 2.940343254660905e-07, |
| "loss": 0.9185, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 174, |
| "tokens_per_second_per_gpu": 5978.89, |
| "total_tokens": 32087844 |
| }, |
| { |
| "epoch": 2.7808764940239046, |
| "grad_norm": 0.328125, |
| "learning_rate": 2.507208781817638e-07, |
| "loss": 0.9751, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 175, |
| "tokens_per_second_per_gpu": 5946.29, |
| "total_tokens": 32275328 |
| }, |
| { |
| "epoch": 2.7968127490039842, |
| "grad_norm": 0.337890625, |
| "learning_rate": 2.1081654102351634e-07, |
| "loss": 1.0015, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 176, |
| "tokens_per_second_per_gpu": 5871.96, |
| "total_tokens": 32454382 |
| }, |
| { |
| "epoch": 2.812749003984064, |
| "grad_norm": 0.318359375, |
| "learning_rate": 1.7433526766711727e-07, |
| "loss": 0.9429, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 177, |
| "tokens_per_second_per_gpu": 5872.24, |
| "total_tokens": 32638137 |
| }, |
| { |
| "epoch": 2.8286852589641436, |
| "grad_norm": 0.3203125, |
| "learning_rate": 1.4128981481764115e-07, |
| "loss": 0.9746, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 178, |
| "tokens_per_second_per_gpu": 6173.8, |
| "total_tokens": 32825895 |
| }, |
| { |
| "epoch": 2.8446215139442232, |
| "grad_norm": 0.322265625, |
| "learning_rate": 1.1169173774871478e-07, |
| "loss": 0.9434, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 179, |
| "tokens_per_second_per_gpu": 6232.85, |
| "total_tokens": 33017211 |
| }, |
| { |
| "epoch": 2.860557768924303, |
| "grad_norm": 0.3203125, |
| "learning_rate": 8.555138626189619e-08, |
| "loss": 0.9644, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 180, |
| "tokens_per_second_per_gpu": 5578.64, |
| "total_tokens": 33204993 |
| }, |
| { |
| "epoch": 2.8764940239043826, |
| "grad_norm": 0.310546875, |
| "learning_rate": 6.287790106757396e-08, |
| "loss": 0.9429, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 181, |
| "tokens_per_second_per_gpu": 6034.66, |
| "total_tokens": 33396379 |
| }, |
| { |
| "epoch": 2.8924302788844622, |
| "grad_norm": 0.318359375, |
| "learning_rate": 4.367921058866187e-08, |
| "loss": 0.959, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 182, |
| "tokens_per_second_per_gpu": 5841.56, |
| "total_tokens": 33579725 |
| }, |
| { |
| "epoch": 2.908366533864542, |
| "grad_norm": 0.33984375, |
| "learning_rate": 2.796202818819871e-08, |
| "loss": 1.0166, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 183, |
| "tokens_per_second_per_gpu": 5736.29, |
| "total_tokens": 33766220 |
| }, |
| { |
| "epoch": 2.9243027888446216, |
| "grad_norm": 0.322265625, |
| "learning_rate": 1.5731849821833955e-08, |
| "loss": 0.9907, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 184, |
| "tokens_per_second_per_gpu": 5704.78, |
| "total_tokens": 33944763 |
| }, |
| { |
| "epoch": 2.9402390438247012, |
| "grad_norm": 0.326171875, |
| "learning_rate": 6.992952116013918e-09, |
| "loss": 0.9478, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 185, |
| "tokens_per_second_per_gpu": 5773.31, |
| "total_tokens": 34127443 |
| }, |
| { |
| "epoch": 2.956175298804781, |
| "grad_norm": 0.349609375, |
| "learning_rate": 1.7483908725357546e-09, |
| "loss": 1.0122, |
| "memory/device_reserved (GiB)": 76.38, |
| "memory/max_active (GiB)": 64.91, |
| "memory/max_allocated (GiB)": 64.91, |
| "step": 186, |
| "tokens_per_second_per_gpu": 5785.44, |
| "total_tokens": 34306859 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 186, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 62, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.2082055574021734e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|