| { | |
| "best_metric": 0.014762421138584614, | |
| "best_model_checkpoint": "saves/chess/both/checkpoint-1000", | |
| "epoch": 1.6260162601626016, | |
| "eval_steps": 1000, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16260162601626016, | |
| "grad_norm": 1.6060084762973008, | |
| "learning_rate": 1.0162601626016261e-06, | |
| "loss": 1.0073, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3252032520325203, | |
| "grad_norm": 3.0440875723802594, | |
| "learning_rate": 2.0325203252032523e-06, | |
| "loss": 0.0302, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 0.9455772942098933, | |
| "learning_rate": 3.0487804878048782e-06, | |
| "loss": 0.0229, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6504065040650406, | |
| "grad_norm": 0.3831361794753519, | |
| "learning_rate": 4.0650406504065046e-06, | |
| "loss": 0.0197, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8130081300813008, | |
| "grad_norm": 0.5841201203132319, | |
| "learning_rate": 4.999959730768458e-06, | |
| "loss": 0.0183, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 0.36973884839665605, | |
| "learning_rate": 4.992664502959351e-06, | |
| "loss": 0.0172, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1382113821138211, | |
| "grad_norm": 0.5308905808120226, | |
| "learning_rate": 4.9728272933003704e-06, | |
| "loss": 0.0149, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.3008130081300813, | |
| "grad_norm": 0.3199255336490814, | |
| "learning_rate": 4.940547913829274e-06, | |
| "loss": 0.015, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.4634146341463414, | |
| "grad_norm": 0.4781960951795096, | |
| "learning_rate": 4.89598878006206e-06, | |
| "loss": 0.0147, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.6260162601626016, | |
| "grad_norm": 0.5141788877979915, | |
| "learning_rate": 4.839374093790139e-06, | |
| "loss": 0.0144, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.6260162601626016, | |
| "eval_loss": 0.014762421138584614, | |
| "eval_runtime": 232.6693, | |
| "eval_samples_per_second": 150.265, | |
| "eval_steps_per_second": 0.589, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 4920, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 197337053921280.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |