{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 1000, "global_step": 5515, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00018132366273798732, "grad_norm": 6.366542339324951, "learning_rate": 0.0, "loss": 1.2337, "step": 1 }, { "epoch": 0.1813236627379873, "grad_norm": 6.652426719665527, "learning_rate": 1.2072507552870091e-05, "loss": 1.2987, "step": 1000 }, { "epoch": 0.1813236627379873, "eval_cosine_accuracy": 0.9657166600227356, "eval_loss": 0.4266870319843292, "eval_runtime": 35.8127, "eval_samples_per_second": 265.52, "eval_steps_per_second": 2.094, "step": 1000 }, { "epoch": 0.3626473254759746, "grad_norm": 6.257086753845215, "learning_rate": 1.9539288112827402e-05, "loss": 1.2594, "step": 2000 }, { "epoch": 0.3626473254759746, "eval_cosine_accuracy": 0.9685561060905457, "eval_loss": 0.42614081501960754, "eval_runtime": 35.5879, "eval_samples_per_second": 267.198, "eval_steps_per_second": 2.107, "step": 2000 }, { "epoch": 0.543970988213962, "grad_norm": 6.342113494873047, "learning_rate": 1.8196104768300877e-05, "loss": 1.1928, "step": 3000 }, { "epoch": 0.543970988213962, "eval_cosine_accuracy": 0.9664528369903564, "eval_loss": 0.42114901542663574, "eval_runtime": 35.7842, "eval_samples_per_second": 265.732, "eval_steps_per_second": 2.096, "step": 3000 }, { "epoch": 0.7252946509519492, "grad_norm": 7.883905410766602, "learning_rate": 1.6852921423774348e-05, "loss": 0.9213, "step": 4000 }, { "epoch": 0.7252946509519492, "eval_cosine_accuracy": 0.9636133909225464, "eval_loss": 0.40882474184036255, "eval_runtime": 35.2731, "eval_samples_per_second": 269.582, "eval_steps_per_second": 2.126, "step": 4000 }, { "epoch": 0.9066183136899365, "grad_norm": 9.094715118408203, "learning_rate": 1.550973807924782e-05, "loss": 0.7272, "step": 5000 }, { "epoch": 0.9066183136899365, "eval_cosine_accuracy": 0.9598275423049927, "eval_loss": 0.41818034648895264, "eval_runtime": 35.7134, "eval_samples_per_second": 266.259, "eval_steps_per_second": 2.1, "step": 5000 } ], "logging_steps": 1000, "max_steps": 16545, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 128, "trial_name": null, "trial_params": null }