{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.992, "eval_steps": 28, "global_step": 93, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "grad_norm": 6.5, "learning_rate": 1.8681318681318682e-05, "loss": 0.338, "step": 8 }, { "epoch": 0.17, "grad_norm": 6.59375, "learning_rate": 1.6923076923076924e-05, "loss": 0.2964, "step": 16 }, { "epoch": 0.26, "grad_norm": 4.5625, "learning_rate": 1.5164835164835166e-05, "loss": 0.3441, "step": 24 }, { "epoch": 0.3, "eval_accuracy": 0.86, "eval_f1_score": 0.8597171717171715, "eval_gmean": 0.8574842430349027, "eval_loss": 0.36518555879592896, "eval_precision": 0.8601298701298701, "eval_recall": 0.86, "eval_runtime": 150.657, "eval_samples_per_second": 0.332, "eval_steps_per_second": 0.046, "step": 28 }, { "epoch": 0.34, "grad_norm": 5.0625, "learning_rate": 1.3406593406593406e-05, "loss": 0.3407, "step": 32 }, { "epoch": 0.43, "grad_norm": 7.21875, "learning_rate": 1.164835164835165e-05, "loss": 0.3418, "step": 40 }, { "epoch": 0.51, "grad_norm": 9.75, "learning_rate": 9.890109890109892e-06, "loss": 0.3868, "step": 48 }, { "epoch": 0.6, "grad_norm": 6.15625, "learning_rate": 8.131868131868132e-06, "loss": 0.3252, "step": 56 }, { "epoch": 0.6, "eval_accuracy": 0.86, "eval_f1_score": 0.8597171717171715, "eval_gmean": 0.8574842430349027, "eval_loss": 0.36469727754592896, "eval_precision": 0.8601298701298701, "eval_recall": 0.86, "eval_runtime": 150.6949, "eval_samples_per_second": 0.332, "eval_steps_per_second": 0.046, "step": 56 }, { "epoch": 0.68, "grad_norm": 5.875, "learning_rate": 6.373626373626373e-06, "loss": 0.3562, "step": 64 }, { "epoch": 0.77, "grad_norm": 7.34375, "learning_rate": 4.615384615384616e-06, "loss": 0.3252, "step": 72 }, { "epoch": 0.85, "grad_norm": 6.25, "learning_rate": 2.8571428571428573e-06, "loss": 0.3177, "step": 80 }, { "epoch": 0.9, "eval_accuracy": 0.86, "eval_f1_score": 0.8597171717171715, "eval_gmean": 0.8574842430349027, "eval_loss": 0.3646777272224426, "eval_precision": 0.8601298701298701, "eval_recall": 0.86, "eval_runtime": 150.7321, "eval_samples_per_second": 0.332, "eval_steps_per_second": 0.046, "step": 84 }, { "epoch": 0.94, "grad_norm": 6.9375, "learning_rate": 1.098901098901099e-06, "loss": 0.3777, "step": 88 }, { "epoch": 0.99, "step": 93, "total_flos": 1.1914693337677824e+17, "train_loss": 0.34092466292842744, "train_runtime": 18751.1569, "train_samples_per_second": 0.32, "train_steps_per_second": 0.005 } ], "logging_steps": 8, "max_steps": 93, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 1.1914693337677824e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }