| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 6495, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00046189376443418013, | |
| "grad_norm": 12.800962448120117, | |
| "learning_rate": 0.0, | |
| "loss": 4.0583, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.23094688221709006, | |
| "eval_cosine_accuracy": 0.9421356916427612, | |
| "eval_loss": 1.4610942602157593, | |
| "eval_runtime": 21.999, | |
| "eval_samples_per_second": 432.064, | |
| "eval_steps_per_second": 1.727, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4618937644341801, | |
| "eval_cosine_accuracy": 0.9457128047943115, | |
| "eval_loss": 1.3612146377563477, | |
| "eval_runtime": 21.8898, | |
| "eval_samples_per_second": 434.22, | |
| "eval_steps_per_second": 1.736, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6928406466512702, | |
| "eval_cosine_accuracy": 0.9528669118881226, | |
| "eval_loss": 1.288307547569275, | |
| "eval_runtime": 21.8186, | |
| "eval_samples_per_second": 435.638, | |
| "eval_steps_per_second": 1.742, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9237875288683602, | |
| "eval_cosine_accuracy": 0.9522356390953064, | |
| "eval_loss": 1.2684112787246704, | |
| "eval_runtime": 21.8066, | |
| "eval_samples_per_second": 435.877, | |
| "eval_steps_per_second": 1.743, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 18.849971771240234, | |
| "learning_rate": 1.6664098613251156e-05, | |
| "loss": 2.6124, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 1.1547344110854503, | |
| "eval_cosine_accuracy": 0.9541293978691101, | |
| "eval_loss": 1.2559542655944824, | |
| "eval_runtime": 21.9828, | |
| "eval_samples_per_second": 432.383, | |
| "eval_steps_per_second": 1.729, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.3856812933025404, | |
| "eval_cosine_accuracy": 0.956233561038971, | |
| "eval_loss": 1.1885266304016113, | |
| "eval_runtime": 23.3704, | |
| "eval_samples_per_second": 406.71, | |
| "eval_steps_per_second": 1.626, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.6166281755196303, | |
| "eval_cosine_accuracy": 0.9557075500488281, | |
| "eval_loss": 1.187910556793213, | |
| "eval_runtime": 21.9291, | |
| "eval_samples_per_second": 433.442, | |
| "eval_steps_per_second": 1.733, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.8475750577367207, | |
| "eval_cosine_accuracy": 0.9580221176147461, | |
| "eval_loss": 1.1555284261703491, | |
| "eval_runtime": 21.9378, | |
| "eval_samples_per_second": 433.27, | |
| "eval_steps_per_second": 1.732, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 17.494279861450195, | |
| "learning_rate": 1.1120800205180818e-05, | |
| "loss": 1.986, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.0785219399538106, | |
| "eval_cosine_accuracy": 0.9582325220108032, | |
| "eval_loss": 1.154712438583374, | |
| "eval_runtime": 22.0846, | |
| "eval_samples_per_second": 430.391, | |
| "eval_steps_per_second": 1.721, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.3094688221709005, | |
| "eval_cosine_accuracy": 0.9584429264068604, | |
| "eval_loss": 1.145609974861145, | |
| "eval_runtime": 22.0404, | |
| "eval_samples_per_second": 431.254, | |
| "eval_steps_per_second": 1.724, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.540415704387991, | |
| "eval_cosine_accuracy": 0.9585481286048889, | |
| "eval_loss": 1.135787844657898, | |
| "eval_runtime": 23.3896, | |
| "eval_samples_per_second": 406.377, | |
| "eval_steps_per_second": 1.625, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.771362586605081, | |
| "eval_cosine_accuracy": 0.9596002101898193, | |
| "eval_loss": 1.1278640031814575, | |
| "eval_runtime": 21.8218, | |
| "eval_samples_per_second": 435.573, | |
| "eval_steps_per_second": 1.741, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 19.504749298095703, | |
| "learning_rate": 2.0518081559374197e-08, | |
| "loss": 1.8005, | |
| "step": 6495 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 6495, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |