| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.022943673282092462, | |
| "eval_steps": 10, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001147183664104623, | |
| "eval_loss": 1.873344898223877, | |
| "eval_runtime": 12.7647, | |
| "eval_samples_per_second": 515.797, | |
| "eval_steps_per_second": 8.069, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.002294367328209246, | |
| "eval_loss": 1.8726389408111572, | |
| "eval_runtime": 12.8667, | |
| "eval_samples_per_second": 511.709, | |
| "eval_steps_per_second": 8.005, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0034415509923138693, | |
| "eval_loss": 1.8714078664779663, | |
| "eval_runtime": 12.9103, | |
| "eval_samples_per_second": 509.979, | |
| "eval_steps_per_second": 7.978, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.004588734656418492, | |
| "eval_loss": 1.8696790933609009, | |
| "eval_runtime": 12.947, | |
| "eval_samples_per_second": 508.534, | |
| "eval_steps_per_second": 7.955, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0057359183205231154, | |
| "eval_loss": 1.8675329685211182, | |
| "eval_runtime": 12.9458, | |
| "eval_samples_per_second": 508.582, | |
| "eval_steps_per_second": 7.956, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.006883101984627739, | |
| "eval_loss": 1.8649154901504517, | |
| "eval_runtime": 13.0432, | |
| "eval_samples_per_second": 504.785, | |
| "eval_steps_per_second": 7.897, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.008030285648732363, | |
| "eval_loss": 1.8619294166564941, | |
| "eval_runtime": 13.0638, | |
| "eval_samples_per_second": 503.988, | |
| "eval_steps_per_second": 7.884, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.009177469312836984, | |
| "eval_loss": 1.8583979606628418, | |
| "eval_runtime": 13.0482, | |
| "eval_samples_per_second": 504.592, | |
| "eval_steps_per_second": 7.894, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.010324652976941608, | |
| "eval_loss": 1.85438871383667, | |
| "eval_runtime": 13.0615, | |
| "eval_samples_per_second": 504.075, | |
| "eval_steps_per_second": 7.886, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.011471836641046231, | |
| "grad_norm": 9.938580513000488, | |
| "learning_rate": 3.8226299694189603e-07, | |
| "loss": 3.1046, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.011471836641046231, | |
| "eval_loss": 1.849947214126587, | |
| "eval_runtime": 13.0663, | |
| "eval_samples_per_second": 503.89, | |
| "eval_steps_per_second": 7.883, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.012619020305150854, | |
| "eval_loss": 1.8451412916183472, | |
| "eval_runtime": 12.9771, | |
| "eval_samples_per_second": 507.357, | |
| "eval_steps_per_second": 7.937, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.013766203969255477, | |
| "eval_loss": 1.8399487733840942, | |
| "eval_runtime": 13.0209, | |
| "eval_samples_per_second": 505.648, | |
| "eval_steps_per_second": 7.91, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0149133876333601, | |
| "eval_loss": 1.8342881202697754, | |
| "eval_runtime": 13.0369, | |
| "eval_samples_per_second": 505.028, | |
| "eval_steps_per_second": 7.901, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.016060571297464726, | |
| "eval_loss": 1.8283486366271973, | |
| "eval_runtime": 13.0149, | |
| "eval_samples_per_second": 505.88, | |
| "eval_steps_per_second": 7.914, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.017207754961569347, | |
| "eval_loss": 1.822334885597229, | |
| "eval_runtime": 13.0213, | |
| "eval_samples_per_second": 505.632, | |
| "eval_steps_per_second": 7.91, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.01835493862567397, | |
| "eval_loss": 1.8158738613128662, | |
| "eval_runtime": 13.0599, | |
| "eval_samples_per_second": 504.14, | |
| "eval_steps_per_second": 7.887, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.019502122289778594, | |
| "eval_loss": 1.8090614080429077, | |
| "eval_runtime": 13.034, | |
| "eval_samples_per_second": 505.14, | |
| "eval_steps_per_second": 7.902, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.020649305953883215, | |
| "eval_loss": 1.8015782833099365, | |
| "eval_runtime": 13.0665, | |
| "eval_samples_per_second": 503.885, | |
| "eval_steps_per_second": 7.883, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.02179648961798784, | |
| "eval_loss": 1.793796420097351, | |
| "eval_runtime": 13.0555, | |
| "eval_samples_per_second": 504.31, | |
| "eval_steps_per_second": 7.889, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.022943673282092462, | |
| "grad_norm": 4.906337738037109, | |
| "learning_rate": 7.645259938837921e-07, | |
| "loss": 3.0303, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.022943673282092462, | |
| "eval_loss": 1.785815715789795, | |
| "eval_runtime": 12.9925, | |
| "eval_samples_per_second": 506.754, | |
| "eval_steps_per_second": 7.928, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 26151, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |