| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.918918918918919, | |
| "eval_steps": 500, | |
| "global_step": 81, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.36036036036036034, | |
| "grad_norm": 24.162766925724526, | |
| "learning_rate": 5e-06, | |
| "loss": 1.2352, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.7207207207207207, | |
| "grad_norm": 2.460851975181821, | |
| "learning_rate": 5e-06, | |
| "loss": 1.129, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.972972972972973, | |
| "eval_loss": 1.0856622457504272, | |
| "eval_runtime": 20.0808, | |
| "eval_samples_per_second": 37.249, | |
| "eval_steps_per_second": 0.598, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "grad_norm": 1.5697438122223701, | |
| "learning_rate": 5e-06, | |
| "loss": 1.1105, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.4414414414414414, | |
| "grad_norm": 1.4204201478767204, | |
| "learning_rate": 5e-06, | |
| "loss": 1.0347, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.8018018018018018, | |
| "grad_norm": 1.4140162476830664, | |
| "learning_rate": 5e-06, | |
| "loss": 1.015, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.981981981981982, | |
| "eval_loss": 1.0293084383010864, | |
| "eval_runtime": 19.208, | |
| "eval_samples_per_second": 38.942, | |
| "eval_steps_per_second": 0.625, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 2.1621621621621623, | |
| "grad_norm": 2.291804896225563, | |
| "learning_rate": 5e-06, | |
| "loss": 1.0103, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.5225225225225225, | |
| "grad_norm": 1.4769259269396882, | |
| "learning_rate": 5e-06, | |
| "loss": 0.9502, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.8828828828828827, | |
| "grad_norm": 1.4935265215970417, | |
| "learning_rate": 5e-06, | |
| "loss": 0.939, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.918918918918919, | |
| "eval_loss": 1.0073624849319458, | |
| "eval_runtime": 17.8833, | |
| "eval_samples_per_second": 41.827, | |
| "eval_steps_per_second": 0.671, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 2.918918918918919, | |
| "step": 81, | |
| "total_flos": 135468637224960.0, | |
| "train_loss": 1.051608243106324, | |
| "train_runtime": 2954.7854, | |
| "train_samples_per_second": 14.423, | |
| "train_steps_per_second": 0.027 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 81, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 135468637224960.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |