{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 1000, "global_step": 11298, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002655337227827934, "grad_norm": 5.798187732696533, "learning_rate": 0.0, "loss": 1.8907, "step": 1 }, { "epoch": 0.2655337227827934, "grad_norm": 3.392871618270874, "learning_rate": 2.652681890600106e-06, "loss": 1.46, "step": 1000 }, { "epoch": 0.2655337227827934, "eval_cosine_accuracy": 0.9434220194816589, "eval_loss": 0.5881128311157227, "eval_runtime": 37.2445, "eval_samples_per_second": 255.313, "eval_steps_per_second": 1.02, "step": 1000 }, { "epoch": 0.5310674455655868, "grad_norm": 4.811636924743652, "learning_rate": 5.30801911842804e-06, "loss": 1.005, "step": 2000 }, { "epoch": 0.5310674455655868, "eval_cosine_accuracy": 0.9501525163650513, "eval_loss": 0.5147020816802979, "eval_runtime": 34.2592, "eval_samples_per_second": 277.56, "eval_steps_per_second": 1.109, "step": 2000 }, { "epoch": 0.7966011683483802, "grad_norm": 4.881891250610352, "learning_rate": 7.963356346255975e-06, "loss": 0.7199, "step": 3000 }, { "epoch": 0.7966011683483802, "eval_cosine_accuracy": 0.9585655927658081, "eval_loss": 0.4466570317745209, "eval_runtime": 35.3151, "eval_samples_per_second": 269.262, "eval_steps_per_second": 1.076, "step": 3000 }, { "epoch": 1.0621348911311737, "grad_norm": 3.386178970336914, "learning_rate": 9.84599044078598e-06, "loss": 0.6439, "step": 4000 }, { "epoch": 1.0621348911311737, "eval_cosine_accuracy": 0.9612998366355896, "eval_loss": 0.42167210578918457, "eval_runtime": 34.503, "eval_samples_per_second": 275.599, "eval_steps_per_second": 1.101, "step": 4000 }, { "epoch": 1.327668613913967, "grad_norm": 3.1094956398010254, "learning_rate": 9.182156133828996e-06, "loss": 0.7827, "step": 5000 }, { "epoch": 1.327668613913967, "eval_cosine_accuracy": 0.967714786529541, "eval_loss": 0.3992333710193634, "eval_runtime": 34.5803, "eval_samples_per_second": 274.984, "eval_steps_per_second": 1.099, "step": 5000 }, { "epoch": 1.5932023366967605, "grad_norm": 4.082334518432617, "learning_rate": 8.518321826872013e-06, "loss": 0.5518, "step": 6000 }, { "epoch": 1.5932023366967605, "eval_cosine_accuracy": 0.9695025682449341, "eval_loss": 0.3816065192222595, "eval_runtime": 35.1269, "eval_samples_per_second": 270.704, "eval_steps_per_second": 1.082, "step": 6000 }, { "epoch": 1.858736059479554, "grad_norm": 3.973569631576538, "learning_rate": 7.85448751991503e-06, "loss": 0.4181, "step": 7000 }, { "epoch": 1.858736059479554, "eval_cosine_accuracy": 0.9710800051689148, "eval_loss": 0.36719873547554016, "eval_runtime": 34.8837, "eval_samples_per_second": 272.592, "eval_steps_per_second": 1.089, "step": 7000 }, { "epoch": 2.1242697822623473, "grad_norm": 2.694537401199341, "learning_rate": 7.1906532129580465e-06, "loss": 0.5191, "step": 8000 }, { "epoch": 2.1242697822623473, "eval_cosine_accuracy": 0.9718161821365356, "eval_loss": 0.37118107080459595, "eval_runtime": 34.5413, "eval_samples_per_second": 275.294, "eval_steps_per_second": 1.1, "step": 8000 }, { "epoch": 2.3898035050451405, "grad_norm": 2.9365272521972656, "learning_rate": 6.526818906001062e-06, "loss": 0.5758, "step": 9000 }, { "epoch": 2.3898035050451405, "eval_cosine_accuracy": 0.9725522994995117, "eval_loss": 0.3617595136165619, "eval_runtime": 34.1585, "eval_samples_per_second": 278.379, "eval_steps_per_second": 1.112, "step": 9000 }, { "epoch": 2.655337227827934, "grad_norm": 3.451385498046875, "learning_rate": 5.863648433351036e-06, "loss": 0.4162, "step": 10000 }, { "epoch": 2.655337227827934, "eval_cosine_accuracy": 0.9733936190605164, "eval_loss": 0.3557143807411194, "eval_runtime": 35.0722, "eval_samples_per_second": 271.126, "eval_steps_per_second": 1.083, "step": 10000 }, { "epoch": 2.9208709506107278, "grad_norm": 3.8668599128723145, "learning_rate": 5.199814126394053e-06, "loss": 0.3282, "step": 11000 }, { "epoch": 2.9208709506107278, "eval_cosine_accuracy": 0.9744452834129333, "eval_loss": 0.34901732206344604, "eval_runtime": 35.1373, "eval_samples_per_second": 270.624, "eval_steps_per_second": 1.081, "step": 11000 } ], "logging_steps": 1000, "max_steps": 18830, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }