{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 36, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.43956043956043955, "grad_norm": 9.493675231933594, "learning_rate": 4.849231551964771e-05, "loss": 5.4762, "num_input_tokens_seen": 13272, "step": 5, "train_runtime": 12.8278, "train_tokens_per_second": 1034.626 }, { "epoch": 0.8791208791208791, "grad_norm": 4.911149024963379, "learning_rate": 4.267766952966369e-05, "loss": 3.9653, "num_input_tokens_seen": 25368, "step": 10, "train_runtime": 24.6003, "train_tokens_per_second": 1031.207 }, { "epoch": 1.2637362637362637, "grad_norm": 3.035015821456909, "learning_rate": 3.355050358314172e-05, "loss": 3.2543, "num_input_tokens_seen": 36568, "step": 15, "train_runtime": 35.5974, "train_tokens_per_second": 1027.266 }, { "epoch": 1.7032967032967035, "grad_norm": 2.246847152709961, "learning_rate": 2.2821106431308544e-05, "loss": 2.9243, "num_input_tokens_seen": 49360, "step": 20, "train_runtime": 49.3421, "train_tokens_per_second": 1000.362 }, { "epoch": 2.087912087912088, "grad_norm": 1.6257575750350952, "learning_rate": 1.2500000000000006e-05, "loss": 2.6278, "num_input_tokens_seen": 60992, "step": 25, "train_runtime": 60.8949, "train_tokens_per_second": 1001.594 }, { "epoch": 2.5274725274725274, "grad_norm": 1.944761872291565, "learning_rate": 4.521198892775203e-06, "loss": 2.6988, "num_input_tokens_seen": 72872, "step": 30, "train_runtime": 74.2765, "train_tokens_per_second": 981.091 }, { "epoch": 2.967032967032967, "grad_norm": 1.752797245979309, "learning_rate": 3.7980617469479953e-07, "loss": 2.7677, "num_input_tokens_seen": 85304, "step": 35, "train_runtime": 87.3084, "train_tokens_per_second": 977.042 }, { "epoch": 3.0, "num_input_tokens_seen": 85968, "step": 36, "total_flos": 363344926666752.0, "train_loss": 3.3720982670783997, "train_runtime": 88.7993, "train_samples_per_second": 9.155, "train_steps_per_second": 0.405 } ], "logging_steps": 5, "max_steps": 36, "num_input_tokens_seen": 85968, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 363344926666752.0, "train_batch_size": 3, "trial_name": null, "trial_params": null }