| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 416, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.024067388688327317, | |
| "grad_norm": 4.96875, | |
| "learning_rate": 3.6000000000000003e-06, | |
| "loss": 2.9962, | |
| "memory/device_reserved (GiB)": 32.53, | |
| "memory/max_active (GiB)": 31.83, | |
| "memory/max_allocated (GiB)": 31.83, | |
| "step": 10, | |
| "tokens_per_second_per_gpu": 2526.79 | |
| }, | |
| { | |
| "epoch": 0.048134777376654635, | |
| "grad_norm": 4.375, | |
| "learning_rate": 7.600000000000001e-06, | |
| "loss": 3.0077, | |
| "memory/device_reserved (GiB)": 32.54, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 20, | |
| "tokens_per_second_per_gpu": 2746.26 | |
| }, | |
| { | |
| "epoch": 0.07220216606498195, | |
| "grad_norm": 4.21875, | |
| "learning_rate": 1.16e-05, | |
| "loss": 2.8836, | |
| "memory/device_reserved (GiB)": 32.54, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 30, | |
| "tokens_per_second_per_gpu": 2704.02 | |
| }, | |
| { | |
| "epoch": 0.09626955475330927, | |
| "grad_norm": 4.15625, | |
| "learning_rate": 1.5600000000000003e-05, | |
| "loss": 2.7347, | |
| "memory/device_reserved (GiB)": 32.54, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 40, | |
| "tokens_per_second_per_gpu": 2751.94 | |
| }, | |
| { | |
| "epoch": 0.12033694344163658, | |
| "grad_norm": 13.8125, | |
| "learning_rate": 1.9600000000000002e-05, | |
| "loss": 2.6617, | |
| "memory/device_reserved (GiB)": 32.55, | |
| "memory/max_active (GiB)": 31.8, | |
| "memory/max_allocated (GiB)": 31.8, | |
| "step": 50, | |
| "tokens_per_second_per_gpu": 12.16 | |
| }, | |
| { | |
| "epoch": 0.1444043321299639, | |
| "grad_norm": 2.953125, | |
| "learning_rate": 1.9970175264485268e-05, | |
| "loss": 2.6474, | |
| "memory/device_reserved (GiB)": 32.9, | |
| "memory/max_active (GiB)": 32.02, | |
| "memory/max_allocated (GiB)": 32.02, | |
| "step": 60, | |
| "tokens_per_second_per_gpu": 3420.31 | |
| }, | |
| { | |
| "epoch": 0.1684717208182912, | |
| "grad_norm": 3.359375, | |
| "learning_rate": 1.9867305793119814e-05, | |
| "loss": 2.5291, | |
| "memory/device_reserved (GiB)": 32.9, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 70, | |
| "tokens_per_second_per_gpu": 2842.63 | |
| }, | |
| { | |
| "epoch": 0.19253910950661854, | |
| "grad_norm": 3.53125, | |
| "learning_rate": 1.9691780654392538e-05, | |
| "loss": 2.5769, | |
| "memory/device_reserved (GiB)": 32.9, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 80, | |
| "tokens_per_second_per_gpu": 2713.62 | |
| }, | |
| { | |
| "epoch": 0.21660649819494585, | |
| "grad_norm": 4.0, | |
| "learning_rate": 1.9444892287836614e-05, | |
| "loss": 2.509, | |
| "memory/device_reserved (GiB)": 32.9, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 90, | |
| "tokens_per_second_per_gpu": 2652.2 | |
| }, | |
| { | |
| "epoch": 0.24067388688327315, | |
| "grad_norm": 9.6875, | |
| "learning_rate": 1.9128458599921358e-05, | |
| "loss": 2.3728, | |
| "memory/device_reserved (GiB)": 32.9, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 100, | |
| "tokens_per_second_per_gpu": 15.95 | |
| }, | |
| { | |
| "epoch": 0.2647412755716005, | |
| "grad_norm": 3.25, | |
| "learning_rate": 1.8744809578310398e-05, | |
| "loss": 2.5744, | |
| "memory/device_reserved (GiB)": 32.76, | |
| "memory/max_active (GiB)": 31.92, | |
| "memory/max_allocated (GiB)": 31.92, | |
| "step": 110, | |
| "tokens_per_second_per_gpu": 3061.35 | |
| }, | |
| { | |
| "epoch": 0.2888086642599278, | |
| "grad_norm": 3.203125, | |
| "learning_rate": 1.829677013552619e-05, | |
| "loss": 2.4197, | |
| "memory/device_reserved (GiB)": 32.76, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 120, | |
| "tokens_per_second_per_gpu": 2742.16 | |
| }, | |
| { | |
| "epoch": 0.3128760529482551, | |
| "grad_norm": 3.6875, | |
| "learning_rate": 1.778763930834761e-05, | |
| "loss": 2.3961, | |
| "memory/device_reserved (GiB)": 32.76, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 130, | |
| "tokens_per_second_per_gpu": 2790.74 | |
| }, | |
| { | |
| "epoch": 0.3369434416365824, | |
| "grad_norm": 4.15625, | |
| "learning_rate": 1.7221165966101163e-05, | |
| "loss": 2.4885, | |
| "memory/device_reserved (GiB)": 32.76, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 140, | |
| "tokens_per_second_per_gpu": 2710.33 | |
| }, | |
| { | |
| "epoch": 0.36101083032490977, | |
| "grad_norm": 8.5, | |
| "learning_rate": 1.660152120671232e-05, | |
| "loss": 2.3189, | |
| "memory/device_reserved (GiB)": 32.76, | |
| "memory/max_active (GiB)": 31.8, | |
| "memory/max_allocated (GiB)": 31.8, | |
| "step": 150, | |
| "tokens_per_second_per_gpu": 13.9 | |
| }, | |
| { | |
| "epoch": 0.3850782190132371, | |
| "grad_norm": 3.109375, | |
| "learning_rate": 1.593326764377232e-05, | |
| "loss": 2.4858, | |
| "memory/device_reserved (GiB)": 32.82, | |
| "memory/max_active (GiB)": 31.96, | |
| "memory/max_allocated (GiB)": 31.96, | |
| "step": 160, | |
| "tokens_per_second_per_gpu": 3034.65 | |
| }, | |
| { | |
| "epoch": 0.4091456077015644, | |
| "grad_norm": 3.296875, | |
| "learning_rate": 1.5221325810768251e-05, | |
| "loss": 2.3984, | |
| "memory/device_reserved (GiB)": 32.82, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 170, | |
| "tokens_per_second_per_gpu": 2794.98 | |
| }, | |
| { | |
| "epoch": 0.4332129963898917, | |
| "grad_norm": 3.765625, | |
| "learning_rate": 1.4470937929851142e-05, | |
| "loss": 2.4386, | |
| "memory/device_reserved (GiB)": 32.82, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 180, | |
| "tokens_per_second_per_gpu": 2640.62 | |
| }, | |
| { | |
| "epoch": 0.457280385078219, | |
| "grad_norm": 3.9375, | |
| "learning_rate": 1.3687629311922604e-05, | |
| "loss": 2.3784, | |
| "memory/device_reserved (GiB)": 32.82, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 190, | |
| "tokens_per_second_per_gpu": 2688.2 | |
| }, | |
| { | |
| "epoch": 0.4813477737665463, | |
| "grad_norm": 6.375, | |
| "learning_rate": 1.287716767226167e-05, | |
| "loss": 2.254, | |
| "memory/device_reserved (GiB)": 32.82, | |
| "memory/max_active (GiB)": 31.8, | |
| "memory/max_allocated (GiB)": 31.8, | |
| "step": 200, | |
| "tokens_per_second_per_gpu": 16.53 | |
| }, | |
| { | |
| "epoch": 0.5054151624548736, | |
| "grad_norm": 3.125, | |
| "learning_rate": 1.2045520661262011e-05, | |
| "loss": 2.5127, | |
| "memory/device_reserved (GiB)": 32.94, | |
| "memory/max_active (GiB)": 31.96, | |
| "memory/max_allocated (GiB)": 31.96, | |
| "step": 210, | |
| "tokens_per_second_per_gpu": 3069.32 | |
| }, | |
| { | |
| "epoch": 0.529482551143201, | |
| "grad_norm": 3.328125, | |
| "learning_rate": 1.1198811922992274e-05, | |
| "loss": 2.4208, | |
| "memory/device_reserved (GiB)": 32.94, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 220, | |
| "tokens_per_second_per_gpu": 2758.53 | |
| }, | |
| { | |
| "epoch": 0.5535499398315282, | |
| "grad_norm": 3.390625, | |
| "learning_rate": 1.0343276005132436e-05, | |
| "loss": 2.3724, | |
| "memory/device_reserved (GiB)": 32.94, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 230, | |
| "tokens_per_second_per_gpu": 2799.61 | |
| }, | |
| { | |
| "epoch": 0.5776173285198556, | |
| "grad_norm": 3.96875, | |
| "learning_rate": 9.485212452296535e-06, | |
| "loss": 2.3658, | |
| "memory/device_reserved (GiB)": 32.94, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 240, | |
| "tokens_per_second_per_gpu": 2669.67 | |
| }, | |
| { | |
| "epoch": 0.601684717208183, | |
| "grad_norm": 7.875, | |
| "learning_rate": 8.630939420765247e-06, | |
| "loss": 2.2695, | |
| "memory/device_reserved (GiB)": 32.94, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 250, | |
| "tokens_per_second_per_gpu": 16.97 | |
| }, | |
| { | |
| "epoch": 0.6257521058965102, | |
| "grad_norm": 3.0625, | |
| "learning_rate": 7.786747156175675e-06, | |
| "loss": 2.4657, | |
| "memory/device_reserved (GiB)": 32.64, | |
| "memory/max_active (GiB)": 31.99, | |
| "memory/max_allocated (GiB)": 31.99, | |
| "step": 260, | |
| "tokens_per_second_per_gpu": 3091.19 | |
| }, | |
| { | |
| "epoch": 0.6498194945848376, | |
| "grad_norm": 3.234375, | |
| "learning_rate": 6.958851676724823e-06, | |
| "loss": 2.4131, | |
| "memory/device_reserved (GiB)": 32.64, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 270, | |
| "tokens_per_second_per_gpu": 2802.37 | |
| }, | |
| { | |
| "epoch": 0.6738868832731648, | |
| "grad_norm": 3.421875, | |
| "learning_rate": 6.153349002929988e-06, | |
| "loss": 2.3716, | |
| "memory/device_reserved (GiB)": 32.64, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 280, | |
| "tokens_per_second_per_gpu": 2645.5 | |
| }, | |
| { | |
| "epoch": 0.6979542719614922, | |
| "grad_norm": 4.125, | |
| "learning_rate": 5.3761702709648555e-06, | |
| "loss": 2.3805, | |
| "memory/device_reserved (GiB)": 32.64, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 290, | |
| "tokens_per_second_per_gpu": 2761.33 | |
| }, | |
| { | |
| "epoch": 0.7220216606498195, | |
| "grad_norm": 11.75, | |
| "learning_rate": 4.633038060083996e-06, | |
| "loss": 2.1915, | |
| "memory/device_reserved (GiB)": 32.65, | |
| "memory/max_active (GiB)": 31.8, | |
| "memory/max_allocated (GiB)": 31.8, | |
| "step": 300, | |
| "tokens_per_second_per_gpu": 14.43 | |
| }, | |
| { | |
| "epoch": 0.7460890493381468, | |
| "grad_norm": 3.078125, | |
| "learning_rate": 3.929424255708999e-06, | |
| "loss": 2.4542, | |
| "memory/device_reserved (GiB)": 33.33, | |
| "memory/max_active (GiB)": 31.99, | |
| "memory/max_allocated (GiB)": 31.99, | |
| "step": 310, | |
| "tokens_per_second_per_gpu": 3503.84 | |
| }, | |
| { | |
| "epoch": 0.7701564380264742, | |
| "grad_norm": 3.28125, | |
| "learning_rate": 3.2705097584416712e-06, | |
| "loss": 2.4018, | |
| "memory/device_reserved (GiB)": 33.33, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 320, | |
| "tokens_per_second_per_gpu": 2889.9 | |
| }, | |
| { | |
| "epoch": 0.7942238267148014, | |
| "grad_norm": 3.53125, | |
| "learning_rate": 2.66114633567801e-06, | |
| "loss": 2.4133, | |
| "memory/device_reserved (GiB)": 33.33, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 330, | |
| "tokens_per_second_per_gpu": 2775.32 | |
| }, | |
| { | |
| "epoch": 0.8182912154031288, | |
| "grad_norm": 4.09375, | |
| "learning_rate": 2.1058208967198046e-06, | |
| "loss": 2.3389, | |
| "memory/device_reserved (GiB)": 33.33, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 340, | |
| "tokens_per_second_per_gpu": 2850.91 | |
| }, | |
| { | |
| "epoch": 0.8423586040914561, | |
| "grad_norm": 12.9375, | |
| "learning_rate": 1.6086224544360617e-06, | |
| "loss": 2.2469, | |
| "memory/device_reserved (GiB)": 33.33, | |
| "memory/max_active (GiB)": 31.8, | |
| "memory/max_allocated (GiB)": 31.8, | |
| "step": 350, | |
| "tokens_per_second_per_gpu": 13.75 | |
| }, | |
| { | |
| "epoch": 0.8664259927797834, | |
| "grad_norm": 3.015625, | |
| "learning_rate": 1.1732120167445248e-06, | |
| "loss": 2.4261, | |
| "memory/device_reserved (GiB)": 32.94, | |
| "memory/max_active (GiB)": 31.92, | |
| "memory/max_allocated (GiB)": 31.92, | |
| "step": 360, | |
| "tokens_per_second_per_gpu": 3009.01 | |
| }, | |
| { | |
| "epoch": 0.8904933814681107, | |
| "grad_norm": 3.28125, | |
| "learning_rate": 8.027956296105355e-07, | |
| "loss": 2.411, | |
| "memory/device_reserved (GiB)": 32.94, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 370, | |
| "tokens_per_second_per_gpu": 2749.41 | |
| }, | |
| { | |
| "epoch": 0.914560770156438, | |
| "grad_norm": 3.453125, | |
| "learning_rate": 5.001007700549898e-07, | |
| "loss": 2.3689, | |
| "memory/device_reserved (GiB)": 32.94, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 380, | |
| "tokens_per_second_per_gpu": 2769.43 | |
| }, | |
| { | |
| "epoch": 0.9386281588447654, | |
| "grad_norm": 3.84375, | |
| "learning_rate": 2.6735626299617456e-07, | |
| "loss": 2.3496, | |
| "memory/device_reserved (GiB)": 32.94, | |
| "memory/max_active (GiB)": 31.81, | |
| "memory/max_allocated (GiB)": 31.81, | |
| "step": 390, | |
| "tokens_per_second_per_gpu": 2799.6 | |
| }, | |
| { | |
| "epoch": 0.9626955475330926, | |
| "grad_norm": 10.625, | |
| "learning_rate": 1.0627586980317073e-07, | |
| "loss": 2.1982, | |
| "memory/device_reserved (GiB)": 32.94, | |
| "memory/max_active (GiB)": 31.8, | |
| "memory/max_allocated (GiB)": 31.8, | |
| "step": 400, | |
| "tokens_per_second_per_gpu": 14.1 | |
| }, | |
| { | |
| "epoch": 0.98676293622142, | |
| "grad_norm": 3.75, | |
| "learning_rate": 1.8045669402859678e-08, | |
| "loss": 2.473, | |
| "memory/device_reserved (GiB)": 32.76, | |
| "memory/max_active (GiB)": 31.86, | |
| "memory/max_allocated (GiB)": 31.86, | |
| "step": 410, | |
| "tokens_per_second_per_gpu": 3564.04 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 416, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.6853339156578304e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |