| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.935691318327976, | |
| "global_step": 5580, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2054, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0002, | |
| "loss": 1.0285, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.0002, | |
| "loss": 0.976, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9643, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9371, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9214, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9016, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8862, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8792, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 0.0002, | |
| "loss": 0.864, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8588, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8374, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8414, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8152, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8216, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8031, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8031, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7887, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7847, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7746, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 0.0002, | |
| "loss": 0.764, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 10.06, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7652, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 10.52, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7425, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7584, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7302, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 11.89, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7374, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 12.35, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7187, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7222, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7096, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 13.72, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7057, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 14.18, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7003, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 14.63, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6879, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 15.09, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6928, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 15.55, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6749, | |
| "step": 4352 | |
| }, | |
| { | |
| "epoch": 16.01, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6857, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 16.46, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6545, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6724, | |
| "step": 4736 | |
| }, | |
| { | |
| "epoch": 17.38, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6434, | |
| "step": 4864 | |
| }, | |
| { | |
| "epoch": 17.83, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6581, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 18.29, | |
| "learning_rate": 0.0002, | |
| "loss": 0.644, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6392, | |
| "step": 5248 | |
| }, | |
| { | |
| "epoch": 19.21, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6346, | |
| "step": 5376 | |
| }, | |
| { | |
| "epoch": 19.66, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6286, | |
| "step": 5504 | |
| } | |
| ], | |
| "max_steps": 5580, | |
| "num_train_epochs": 20, | |
| "total_flos": 3.311540504834867e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |