| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 366, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0547945205479452, | |
| "grad_norm": 1.181526780128479, | |
| "learning_rate": 4.736842105263158e-05, | |
| "loss": 1.799, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1095890410958904, | |
| "grad_norm": 0.522590160369873, | |
| "learning_rate": 0.0001, | |
| "loss": 1.2094, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1643835616438356, | |
| "grad_norm": 0.3870140314102173, | |
| "learning_rate": 9.979522160511282e-05, | |
| "loss": 0.8021, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2191780821917808, | |
| "grad_norm": 0.3272338807582855, | |
| "learning_rate": 9.918256378809177e-05, | |
| "loss": 0.7125, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.273972602739726, | |
| "grad_norm": 0.39106306433677673, | |
| "learning_rate": 9.816704491231226e-05, | |
| "loss": 0.6694, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3287671232876712, | |
| "grad_norm": 0.5298248529434204, | |
| "learning_rate": 9.675698323078865e-05, | |
| "loss": 0.6647, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3835616438356164, | |
| "grad_norm": 0.33846038579940796, | |
| "learning_rate": 9.496392875023432e-05, | |
| "loss": 0.612, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.4383561643835616, | |
| "grad_norm": 0.3002408444881439, | |
| "learning_rate": 9.280256862338821e-05, | |
| "loss": 0.5868, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4931506849315068, | |
| "grad_norm": 0.27812713384628296, | |
| "learning_rate": 9.029060684455229e-05, | |
| "loss": 0.599, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.547945205479452, | |
| "grad_norm": 0.3248140513896942, | |
| "learning_rate": 8.744861923377001e-05, | |
| "loss": 0.5933, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6027397260273972, | |
| "grad_norm": 0.36872419714927673, | |
| "learning_rate": 8.429988489749046e-05, | |
| "loss": 0.5952, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6575342465753424, | |
| "grad_norm": 0.3348616659641266, | |
| "learning_rate": 8.087019554624595e-05, | |
| "loss": 0.5813, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7123287671232876, | |
| "grad_norm": 0.33727309107780457, | |
| "learning_rate": 7.718764423124892e-05, | |
| "loss": 0.5511, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7671232876712328, | |
| "grad_norm": 0.35852885246276855, | |
| "learning_rate": 7.32823952303943e-05, | |
| "loss": 0.5837, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.821917808219178, | |
| "grad_norm": 0.37122035026550293, | |
| "learning_rate": 6.918643696856333e-05, | |
| "loss": 0.5604, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8767123287671232, | |
| "grad_norm": 0.3272222578525543, | |
| "learning_rate": 6.493331999609131e-05, | |
| "loss": 0.5318, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9315068493150684, | |
| "grad_norm": 0.3521849513053894, | |
| "learning_rate": 6.055788217165383e-05, | |
| "loss": 0.5598, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9863013698630136, | |
| "grad_norm": 0.3393417000770569, | |
| "learning_rate": 5.609596330063558e-05, | |
| "loss": 0.5626, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.0383561643835617, | |
| "grad_norm": 0.29794371128082275, | |
| "learning_rate": 5.1584111566417515e-05, | |
| "loss": 0.5401, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.093150684931507, | |
| "grad_norm": 0.40294867753982544, | |
| "learning_rate": 4.705928415924373e-05, | |
| "loss": 0.4977, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.1479452054794521, | |
| "grad_norm": 0.3701549768447876, | |
| "learning_rate": 4.255854455485753e-05, | |
| "loss": 0.4893, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.2027397260273973, | |
| "grad_norm": 0.41681772470474243, | |
| "learning_rate": 3.811875892253853e-05, | |
| "loss": 0.509, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.2575342465753425, | |
| "grad_norm": 0.47252005338668823, | |
| "learning_rate": 3.377629414930396e-05, | |
| "loss": 0.4866, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.3123287671232877, | |
| "grad_norm": 0.5522735118865967, | |
| "learning_rate": 2.9566719953798473e-05, | |
| "loss": 0.4788, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.367123287671233, | |
| "grad_norm": 0.3663751780986786, | |
| "learning_rate": 2.552451752989865e-05, | |
| "loss": 0.5067, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.4219178082191781, | |
| "grad_norm": 0.3987874388694763, | |
| "learning_rate": 2.168279710657149e-05, | |
| "loss": 0.4634, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.4767123287671233, | |
| "grad_norm": 0.5115519762039185, | |
| "learning_rate": 1.8073026737492782e-05, | |
| "loss": 0.4912, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.5315068493150685, | |
| "grad_norm": 0.5251284241676331, | |
| "learning_rate": 1.4724774541946146e-05, | |
| "loss": 0.5087, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.5863013698630137, | |
| "grad_norm": 0.5367189049720764, | |
| "learning_rate": 1.1665466508342877e-05, | |
| "loss": 0.462, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.641095890410959, | |
| "grad_norm": 0.6373235583305359, | |
| "learning_rate": 8.920161844226415e-06, | |
| "loss": 0.482, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.6958904109589041, | |
| "grad_norm": 0.5205274820327759, | |
| "learning_rate": 6.511347712900545e-06, | |
| "loss": 0.4816, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.7506849315068493, | |
| "grad_norm": 0.42115122079849243, | |
| "learning_rate": 4.458755038021029e-06, | |
| "loss": 0.4663, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.8054794520547945, | |
| "grad_norm": 0.5167638659477234, | |
| "learning_rate": 2.7791968849206428e-06, | |
| "loss": 0.4938, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.8602739726027397, | |
| "grad_norm": 0.544147253036499, | |
| "learning_rate": 1.4864307425078327e-06, | |
| "loss": 0.4824, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.915068493150685, | |
| "grad_norm": 0.4460655748844147, | |
| "learning_rate": 5.910458338069191e-07, | |
| "loss": 0.4946, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.9698630136986301, | |
| "grad_norm": 0.5717807412147522, | |
| "learning_rate": 1.0037637819431123e-07, | |
| "loss": 0.4736, | |
| "step": 360 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 366, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.27377858339115e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |