{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 2000, "global_step": 9497, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010529640939243972, "grad_norm": 6.751945495605469, "learning_rate": 0.0, "loss": 2.8442, "step": 1 }, { "epoch": 0.21059281878487943, "grad_norm": 4.618383884429932, "learning_rate": 6.315290648694187e-06, "loss": 2.5015, "step": 2000 }, { "epoch": 0.21059281878487943, "eval_cosine_accuracy": 0.9469176530838013, "eval_loss": 0.7108581066131592, "eval_runtime": 24.9028, "eval_samples_per_second": 379.756, "eval_steps_per_second": 1.486, "step": 2000 }, { "epoch": 0.42118563756975885, "grad_norm": 4.100494861602783, "learning_rate": 1.2630581297388374e-05, "loss": 1.8564, "step": 4000 }, { "epoch": 0.42118563756975885, "eval_cosine_accuracy": 0.9481865167617798, "eval_loss": 0.6932559013366699, "eval_runtime": 25.2131, "eval_samples_per_second": 375.083, "eval_steps_per_second": 1.467, "step": 4000 }, { "epoch": 0.6317784563546383, "grad_norm": 4.006742477416992, "learning_rate": 1.8949031171019377e-05, "loss": 1.504, "step": 6000 }, { "epoch": 0.6317784563546383, "eval_cosine_accuracy": 0.9519932270050049, "eval_loss": 0.6632474064826965, "eval_runtime": 25.5415, "eval_samples_per_second": 370.261, "eval_steps_per_second": 1.449, "step": 6000 }, { "epoch": 0.8423712751395177, "grad_norm": 3.7652177810668945, "learning_rate": 2.5264321819713565e-05, "loss": 1.2874, "step": 8000 }, { "epoch": 0.8423712751395177, "eval_cosine_accuracy": 0.9577032923698425, "eval_loss": 0.6434259414672852, "eval_runtime": 25.6225, "eval_samples_per_second": 369.089, "eval_steps_per_second": 1.444, "step": 8000 } ], "logging_steps": 2000, "max_steps": 18994, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }