{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 66, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.030303030303030304, "grad_norm": 0.12914219144102593, "learning_rate": 1.4285714285714286e-06, "loss": 0.5151, "step": 1 }, { "epoch": 0.15151515151515152, "grad_norm": 0.10294084507814112, "learning_rate": 7.1428571428571436e-06, "loss": 0.5084, "step": 5 }, { "epoch": 0.30303030303030304, "grad_norm": 0.11430775946527363, "learning_rate": 9.936341773606723e-06, "loss": 0.5085, "step": 10 }, { "epoch": 0.45454545454545453, "grad_norm": 0.08588455579355678, "learning_rate": 9.553173864274567e-06, "loss": 0.4765, "step": 15 }, { "epoch": 0.6060606060606061, "grad_norm": 0.0720040180861239, "learning_rate": 8.849169917149532e-06, "loss": 0.4394, "step": 20 }, { "epoch": 0.7575757575757576, "grad_norm": 0.06453236041824466, "learning_rate": 7.873937051072037e-06, "loss": 0.4322, "step": 25 }, { "epoch": 0.9090909090909091, "grad_norm": 0.05882814905970625, "learning_rate": 6.6961943305901515e-06, "loss": 0.4058, "step": 30 }, { "epoch": 1.0, "eval_loss": 0.42501434683799744, "eval_runtime": 28.7091, "eval_samples_per_second": 18.984, "eval_steps_per_second": 4.772, "step": 33 }, { "epoch": 1.0606060606060606, "grad_norm": 0.056958863910414055, "learning_rate": 5.398930527765416e-06, "loss": 0.3821, "step": 35 }, { "epoch": 1.2121212121212122, "grad_norm": 0.05279260475294177, "learning_rate": 4.073556379564429e-06, "loss": 0.3697, "step": 40 }, { "epoch": 1.3636363636363638, "grad_norm": 0.05128712340945032, "learning_rate": 2.8134633977057236e-06, "loss": 0.3503, "step": 45 }, { "epoch": 1.5151515151515151, "grad_norm": 0.060058454006873215, "learning_rate": 1.7074431046748075e-06, "loss": 0.3674, "step": 50 }, { "epoch": 1.6666666666666665, "grad_norm": 0.0483607238694913, "learning_rate": 8.334304045874248e-07, "loss": 0.3641, "step": 55 }, { "epoch": 1.8181818181818183, "grad_norm": 0.05802115776370139, "learning_rate": 2.530119576580936e-07, "loss": 0.3614, "step": 60 }, { "epoch": 1.9696969696969697, "grad_norm": 0.05938652178950297, "learning_rate": 7.0865216161902785e-09, "loss": 0.3696, "step": 65 }, { "epoch": 2.0, "eval_loss": 0.4221389591693878, "eval_runtime": 28.8226, "eval_samples_per_second": 18.909, "eval_steps_per_second": 4.753, "step": 66 }, { "epoch": 2.0, "step": 66, "total_flos": 1.681359576886149e+17, "train_loss": 0.4086628511096492, "train_runtime": 982.585, "train_samples_per_second": 3.224, "train_steps_per_second": 0.067 } ], "logging_steps": 5, "max_steps": 66, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.681359576886149e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }