{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.093150684931507, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0547945205479452, "grad_norm": 1.181526780128479, "learning_rate": 4.736842105263158e-05, "loss": 1.799, "step": 10 }, { "epoch": 0.1095890410958904, "grad_norm": 0.522590160369873, "learning_rate": 0.0001, "loss": 1.2094, "step": 20 }, { "epoch": 0.1643835616438356, "grad_norm": 0.3870140314102173, "learning_rate": 9.979522160511282e-05, "loss": 0.8021, "step": 30 }, { "epoch": 0.2191780821917808, "grad_norm": 0.3272338807582855, "learning_rate": 9.918256378809177e-05, "loss": 0.7125, "step": 40 }, { "epoch": 0.273972602739726, "grad_norm": 0.39106306433677673, "learning_rate": 9.816704491231226e-05, "loss": 0.6694, "step": 50 }, { "epoch": 0.3287671232876712, "grad_norm": 0.5298248529434204, "learning_rate": 9.675698323078865e-05, "loss": 0.6647, "step": 60 }, { "epoch": 0.3835616438356164, "grad_norm": 0.33846038579940796, "learning_rate": 9.496392875023432e-05, "loss": 0.612, "step": 70 }, { "epoch": 0.4383561643835616, "grad_norm": 0.3002408444881439, "learning_rate": 9.280256862338821e-05, "loss": 0.5868, "step": 80 }, { "epoch": 0.4931506849315068, "grad_norm": 0.27812713384628296, "learning_rate": 9.029060684455229e-05, "loss": 0.599, "step": 90 }, { "epoch": 0.547945205479452, "grad_norm": 0.3248140513896942, "learning_rate": 8.744861923377001e-05, "loss": 0.5933, "step": 100 }, { "epoch": 0.6027397260273972, "grad_norm": 0.36872419714927673, "learning_rate": 8.429988489749046e-05, "loss": 0.5952, "step": 110 }, { "epoch": 0.6575342465753424, "grad_norm": 0.3348616659641266, "learning_rate": 8.087019554624595e-05, "loss": 0.5813, "step": 120 }, { "epoch": 0.7123287671232876, "grad_norm": 0.33727309107780457, "learning_rate": 7.718764423124892e-05, "loss": 0.5511, "step": 130 }, { "epoch": 0.7671232876712328, "grad_norm": 0.35852885246276855, "learning_rate": 7.32823952303943e-05, "loss": 0.5837, "step": 140 }, { "epoch": 0.821917808219178, "grad_norm": 0.37122035026550293, "learning_rate": 6.918643696856333e-05, "loss": 0.5604, "step": 150 }, { "epoch": 0.8767123287671232, "grad_norm": 0.3272222578525543, "learning_rate": 6.493331999609131e-05, "loss": 0.5318, "step": 160 }, { "epoch": 0.9315068493150684, "grad_norm": 0.3521849513053894, "learning_rate": 6.055788217165383e-05, "loss": 0.5598, "step": 170 }, { "epoch": 0.9863013698630136, "grad_norm": 0.3393417000770569, "learning_rate": 5.609596330063558e-05, "loss": 0.5626, "step": 180 }, { "epoch": 1.0383561643835617, "grad_norm": 0.29794371128082275, "learning_rate": 5.1584111566417515e-05, "loss": 0.5401, "step": 190 }, { "epoch": 1.093150684931507, "grad_norm": 0.40294867753982544, "learning_rate": 4.705928415924373e-05, "loss": 0.4977, "step": 200 } ], "logging_steps": 10, "max_steps": 366, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.928581998343168e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }