| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 17.999285459092533, | |
| "global_step": 5038, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2306, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0002, | |
| "loss": 1.0373, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.0002, | |
| "loss": 0.983, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.0002, | |
| "loss": 0.97, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9417, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9256, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9053, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8893, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8817, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8662, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8612, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8391, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8434, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8167, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8239, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8046, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8053, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7903, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7868, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7763, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7658, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 10.06, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7669, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 10.52, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7443, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7597, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7313, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 11.89, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7388, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 12.35, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7202, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7231, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "learning_rate": 0.0002, | |
| "loss": 0.711, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 13.72, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7066, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 14.18, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7015, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 14.63, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6885, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 15.09, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6938, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 15.55, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6753, | |
| "step": 4352 | |
| }, | |
| { | |
| "epoch": 16.01, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6863, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 16.46, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6546, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6739, | |
| "step": 4736 | |
| }, | |
| { | |
| "epoch": 17.38, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6436, | |
| "step": 4864 | |
| }, | |
| { | |
| "epoch": 17.83, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6589, | |
| "step": 4992 | |
| } | |
| ], | |
| "max_steps": 5580, | |
| "num_train_epochs": 20, | |
| "total_flos": 2.989931152539648e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |