| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.998059508408797, | |
| "eval_steps": 1000, | |
| "global_step": 12360, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00032362459546925567, | |
| "grad_norm": 6.455742835998535, | |
| "learning_rate": 0.0, | |
| "loss": 3.5189, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.32362459546925565, | |
| "grad_norm": 4.869843482971191, | |
| "learning_rate": 2.020631067961165e-05, | |
| "loss": 2.5841, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.32362459546925565, | |
| "eval_cosine_accuracy": 0.9525712728500366, | |
| "eval_loss": 0.5323272943496704, | |
| "eval_runtime": 35.0009, | |
| "eval_samples_per_second": 271.679, | |
| "eval_steps_per_second": 1.086, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6472491909385113, | |
| "grad_norm": 6.113117694854736, | |
| "learning_rate": 4.04126213592233e-05, | |
| "loss": 1.5199, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6472491909385113, | |
| "eval_cosine_accuracy": 0.9623514413833618, | |
| "eval_loss": 0.47361451387405396, | |
| "eval_runtime": 34.0145, | |
| "eval_samples_per_second": 279.557, | |
| "eval_steps_per_second": 1.117, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.970873786407767, | |
| "grad_norm": 6.593084812164307, | |
| "learning_rate": 4.7340210355987055e-05, | |
| "loss": 1.0615, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.970873786407767, | |
| "eval_cosine_accuracy": 0.9607740044593811, | |
| "eval_loss": 0.45996761322021484, | |
| "eval_runtime": 33.7873, | |
| "eval_samples_per_second": 281.437, | |
| "eval_steps_per_second": 1.125, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2943078913324708, | |
| "grad_norm": 3.9310717582702637, | |
| "learning_rate": 4.228357605177994e-05, | |
| "loss": 1.1183, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.2943078913324708, | |
| "eval_cosine_accuracy": 0.9660322070121765, | |
| "eval_loss": 0.43745797872543335, | |
| "eval_runtime": 33.8541, | |
| "eval_samples_per_second": 280.882, | |
| "eval_steps_per_second": 1.122, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.6177231565329884, | |
| "grad_norm": 4.16636848449707, | |
| "learning_rate": 3.722694174757282e-05, | |
| "loss": 1.0372, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.6177231565329884, | |
| "eval_cosine_accuracy": 0.9657166600227356, | |
| "eval_loss": 0.4422674775123596, | |
| "eval_runtime": 33.8581, | |
| "eval_samples_per_second": 280.849, | |
| "eval_steps_per_second": 1.122, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.941138421733506, | |
| "grad_norm": 4.619586944580078, | |
| "learning_rate": 3.21703074433657e-05, | |
| "loss": 0.9562, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.941138421733506, | |
| "eval_cosine_accuracy": 0.9676096439361572, | |
| "eval_loss": 0.4301552176475525, | |
| "eval_runtime": 35.1089, | |
| "eval_samples_per_second": 270.843, | |
| "eval_steps_per_second": 1.082, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.2645536869340233, | |
| "grad_norm": 3.7584993839263916, | |
| "learning_rate": 2.711367313915858e-05, | |
| "loss": 0.8554, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.2645536869340233, | |
| "eval_cosine_accuracy": 0.9667683243751526, | |
| "eval_loss": 0.43318116664886475, | |
| "eval_runtime": 33.7513, | |
| "eval_samples_per_second": 281.738, | |
| "eval_steps_per_second": 1.126, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.5879689521345406, | |
| "grad_norm": 4.243616580963135, | |
| "learning_rate": 2.2062095469255663e-05, | |
| "loss": 0.812, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.5879689521345406, | |
| "eval_cosine_accuracy": 0.9670838117599487, | |
| "eval_loss": 0.4286963939666748, | |
| "eval_runtime": 34.2095, | |
| "eval_samples_per_second": 277.964, | |
| "eval_steps_per_second": 1.111, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.9113842173350584, | |
| "grad_norm": 4.7793097496032715, | |
| "learning_rate": 1.7005461165048546e-05, | |
| "loss": 0.7919, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.9113842173350584, | |
| "eval_cosine_accuracy": 0.9696077108383179, | |
| "eval_loss": 0.4297306537628174, | |
| "eval_runtime": 33.8897, | |
| "eval_samples_per_second": 280.586, | |
| "eval_steps_per_second": 1.121, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.2347994825355757, | |
| "grad_norm": 3.795403003692627, | |
| "learning_rate": 1.1948826860841425e-05, | |
| "loss": 0.7476, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.2347994825355757, | |
| "eval_cosine_accuracy": 0.9684509634971619, | |
| "eval_loss": 0.42872345447540283, | |
| "eval_runtime": 33.5535, | |
| "eval_samples_per_second": 283.398, | |
| "eval_steps_per_second": 1.133, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.558214747736093, | |
| "grad_norm": 3.640136957168579, | |
| "learning_rate": 6.892192556634304e-06, | |
| "loss": 0.7199, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.558214747736093, | |
| "eval_cosine_accuracy": 0.9697129130363464, | |
| "eval_loss": 0.4244835376739502, | |
| "eval_runtime": 35.2497, | |
| "eval_samples_per_second": 269.761, | |
| "eval_steps_per_second": 1.078, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.8816300129366104, | |
| "grad_norm": 4.36401891708374, | |
| "learning_rate": 1.8406148867313916e-06, | |
| "loss": 0.7133, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.8816300129366104, | |
| "eval_cosine_accuracy": 0.9703438878059387, | |
| "eval_loss": 0.4268113672733307, | |
| "eval_runtime": 33.7961, | |
| "eval_samples_per_second": 281.364, | |
| "eval_steps_per_second": 1.124, | |
| "step": 12000 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 12360, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |