{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 1000, "global_step": 8284, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002414875633904854, "grad_norm": 7.886749267578125, "learning_rate": 0.0, "loss": 3.6713, "step": 1 }, { "epoch": 0.24148756339048538, "grad_norm": 6.218736171722412, "learning_rate": 9.647513278609368e-06, "loss": 2.8183, "step": 1000 }, { "epoch": 0.24148756339048538, "eval_cosine_accuracy": 0.9434220194816589, "eval_loss": 0.585781455039978, "eval_runtime": 34.9645, "eval_samples_per_second": 271.962, "eval_steps_per_second": 2.145, "step": 1000 }, { "epoch": 0.48297512678097076, "grad_norm": 6.299063682556152, "learning_rate": 1.9304683727667796e-05, "loss": 2.1179, "step": 2000 }, { "epoch": 0.48297512678097076, "eval_cosine_accuracy": 0.9497318267822266, "eval_loss": 0.5328050851821899, "eval_runtime": 34.2691, "eval_samples_per_second": 277.48, "eval_steps_per_second": 2.189, "step": 2000 }, { "epoch": 0.7244626901714561, "grad_norm": 9.58321475982666, "learning_rate": 1.9003971235376196e-05, "loss": 1.4826, "step": 3000 }, { "epoch": 0.7244626901714561, "eval_cosine_accuracy": 0.9538332223892212, "eval_loss": 0.49323785305023193, "eval_runtime": 34.0684, "eval_samples_per_second": 279.115, "eval_steps_per_second": 2.201, "step": 3000 }, { "epoch": 0.9659502535619415, "grad_norm": 15.6340970993042, "learning_rate": 1.79317376838038e-05, "loss": 0.949, "step": 4000 }, { "epoch": 0.9659502535619415, "eval_cosine_accuracy": 0.9546745419502258, "eval_loss": 0.4723624587059021, "eval_runtime": 33.7991, "eval_samples_per_second": 281.339, "eval_steps_per_second": 2.219, "step": 4000 }, { "epoch": 1.2073376780111031, "grad_norm": 6.6801862716674805, "learning_rate": 1.6858430825372976e-05, "loss": 1.1823, "step": 5000 }, { "epoch": 1.2073376780111031, "eval_cosine_accuracy": 0.960037887096405, "eval_loss": 0.4632853865623474, "eval_runtime": 34.2938, "eval_samples_per_second": 277.28, "eval_steps_per_second": 2.187, "step": 5000 }, { "epoch": 1.4487086652184407, "grad_norm": 6.39561653137207, "learning_rate": 1.578619727380058e-05, "loss": 1.1665, "step": 6000 }, { "epoch": 1.4487086652184407, "eval_cosine_accuracy": 0.9617204666137695, "eval_loss": 0.4432311952114105, "eval_runtime": 35.5061, "eval_samples_per_second": 267.813, "eval_steps_per_second": 2.112, "step": 6000 }, { "epoch": 1.6900796524257784, "grad_norm": 6.977872371673584, "learning_rate": 1.4712890415369757e-05, "loss": 1.1042, "step": 7000 }, { "epoch": 1.6900796524257784, "eval_cosine_accuracy": 0.9625617861747742, "eval_loss": 0.43880006670951843, "eval_runtime": 34.1311, "eval_samples_per_second": 278.603, "eval_steps_per_second": 2.197, "step": 7000 }, { "epoch": 1.9314506396331161, "grad_norm": 7.896651268005371, "learning_rate": 1.3640656863797362e-05, "loss": 1.0525, "step": 8000 }, { "epoch": 1.9314506396331161, "eval_cosine_accuracy": 0.9643495678901672, "eval_loss": 0.4344501495361328, "eval_runtime": 34.0153, "eval_samples_per_second": 279.55, "eval_steps_per_second": 2.205, "step": 8000 } ], "logging_steps": 1000, "max_steps": 20705, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 128, "trial_name": null, "trial_params": null }