{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.998719590268886, "eval_steps": 1000, "global_step": 9366, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00032030749519538755, "grad_norm": 6.058427333831787, "learning_rate": 0.0, "loss": 2.3526, "step": 1 }, { "epoch": 0.3203074951953876, "grad_norm": 3.9029946327209473, "learning_rate": 2.977933325424131e-05, "loss": 1.6965, "step": 1000 }, { "epoch": 0.3203074951953876, "eval_cosine_accuracy": 0.9661373496055603, "eval_loss": 0.7820664644241333, "eval_runtime": 28.1656, "eval_samples_per_second": 337.61, "eval_steps_per_second": 1.349, "step": 1000 }, { "epoch": 0.6406149903907752, "grad_norm": 3.5058798789978027, "learning_rate": 2.6220192193617276e-05, "loss": 1.1157, "step": 2000 }, { "epoch": 0.6406149903907752, "eval_cosine_accuracy": 0.9707645177841187, "eval_loss": 0.7116907835006714, "eval_runtime": 27.4286, "eval_samples_per_second": 346.681, "eval_steps_per_second": 1.385, "step": 2000 }, { "epoch": 0.9609224855861627, "grad_norm": 3.5930891036987305, "learning_rate": 2.2661051132993238e-05, "loss": 1.5779, "step": 3000 }, { "epoch": 0.9609224855861627, "eval_cosine_accuracy": 0.9699232578277588, "eval_loss": 0.7110825777053833, "eval_runtime": 27.7293, "eval_samples_per_second": 342.922, "eval_steps_per_second": 1.37, "step": 3000 }, { "epoch": 1.2810499359795133, "grad_norm": 4.003444671630859, "learning_rate": 1.9101910072369202e-05, "loss": 1.2526, "step": 4000 }, { "epoch": 1.2810499359795133, "eval_cosine_accuracy": 0.9773898124694824, "eval_loss": 0.6422452330589294, "eval_runtime": 27.4782, "eval_samples_per_second": 346.057, "eval_steps_per_second": 1.383, "step": 4000 }, { "epoch": 1.6011523687580027, "grad_norm": 3.0391690731048584, "learning_rate": 1.5549887293866415e-05, "loss": 1.1009, "step": 5000 }, { "epoch": 1.6011523687580027, "eval_cosine_accuracy": 0.9776001572608948, "eval_loss": 0.6362274885177612, "eval_runtime": 29.0916, "eval_samples_per_second": 326.864, "eval_steps_per_second": 1.306, "step": 5000 }, { "epoch": 1.9212548015364916, "grad_norm": 3.6326820850372314, "learning_rate": 1.1990746233242378e-05, "loss": 1.0505, "step": 6000 }, { "epoch": 1.9212548015364916, "eval_cosine_accuracy": 0.9786518216133118, "eval_loss": 0.6349849104881287, "eval_runtime": 28.408, "eval_samples_per_second": 334.73, "eval_steps_per_second": 1.338, "step": 6000 }, { "epoch": 2.2413572343149806, "grad_norm": 2.998448133468628, "learning_rate": 8.431605172618341e-06, "loss": 0.9656, "step": 7000 }, { "epoch": 2.2413572343149806, "eval_cosine_accuracy": 0.9782311320304871, "eval_loss": 0.6253951191902161, "eval_runtime": 29.1172, "eval_samples_per_second": 326.577, "eval_steps_per_second": 1.305, "step": 7000 }, { "epoch": 2.56145966709347, "grad_norm": 3.0235984325408936, "learning_rate": 4.876023253054929e-06, "loss": 0.9874, "step": 8000 }, { "epoch": 2.56145966709347, "eval_cosine_accuracy": 0.9779156446456909, "eval_loss": 0.6308476328849792, "eval_runtime": 29.1512, "eval_samples_per_second": 326.195, "eval_steps_per_second": 1.304, "step": 8000 }, { "epoch": 2.881562099871959, "grad_norm": 3.0960497856140137, "learning_rate": 1.3168821924308935e-06, "loss": 0.9667, "step": 9000 }, { "epoch": 2.881562099871959, "eval_cosine_accuracy": 0.9782311320304871, "eval_loss": 0.627689778804779, "eval_runtime": 28.5859, "eval_samples_per_second": 332.647, "eval_steps_per_second": 1.329, "step": 9000 } ], "logging_steps": 1000, "max_steps": 9366, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }