{ "best_metric": 0.014762421138584614, "best_model_checkpoint": "saves/chess/both/checkpoint-1000", "epoch": 1.6260162601626016, "eval_steps": 1000, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16260162601626016, "grad_norm": 1.6060084762973008, "learning_rate": 1.0162601626016261e-06, "loss": 1.0073, "step": 100 }, { "epoch": 0.3252032520325203, "grad_norm": 3.0440875723802594, "learning_rate": 2.0325203252032523e-06, "loss": 0.0302, "step": 200 }, { "epoch": 0.4878048780487805, "grad_norm": 0.9455772942098933, "learning_rate": 3.0487804878048782e-06, "loss": 0.0229, "step": 300 }, { "epoch": 0.6504065040650406, "grad_norm": 0.3831361794753519, "learning_rate": 4.0650406504065046e-06, "loss": 0.0197, "step": 400 }, { "epoch": 0.8130081300813008, "grad_norm": 0.5841201203132319, "learning_rate": 4.999959730768458e-06, "loss": 0.0183, "step": 500 }, { "epoch": 0.975609756097561, "grad_norm": 0.36973884839665605, "learning_rate": 4.992664502959351e-06, "loss": 0.0172, "step": 600 }, { "epoch": 1.1382113821138211, "grad_norm": 0.5308905808120226, "learning_rate": 4.9728272933003704e-06, "loss": 0.0149, "step": 700 }, { "epoch": 1.3008130081300813, "grad_norm": 0.3199255336490814, "learning_rate": 4.940547913829274e-06, "loss": 0.015, "step": 800 }, { "epoch": 1.4634146341463414, "grad_norm": 0.4781960951795096, "learning_rate": 4.89598878006206e-06, "loss": 0.0147, "step": 900 }, { "epoch": 1.6260162601626016, "grad_norm": 0.5141788877979915, "learning_rate": 4.839374093790139e-06, "loss": 0.0144, "step": 1000 }, { "epoch": 1.6260162601626016, "eval_loss": 0.014762421138584614, "eval_runtime": 232.6693, "eval_samples_per_second": 150.265, "eval_steps_per_second": 0.589, "step": 1000 } ], "logging_steps": 100, "max_steps": 4920, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 197337053921280.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }