| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 4806, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00013305613305613308, | |
| "loss": 1.0196, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00019264739884393065, | |
| "loss": 0.8615, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00017784971098265897, | |
| "loss": 0.8536, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001630520231213873, | |
| "loss": 0.8487, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0001482543352601156, | |
| "loss": 0.8546, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.00013345664739884393, | |
| "loss": 0.826, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00011865895953757227, | |
| "loss": 0.8328, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.00010386127167630057, | |
| "loss": 0.8243, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 8.906358381502891e-05, | |
| "loss": 0.824, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 7.426589595375722e-05, | |
| "loss": 0.8275, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 5.946820809248556e-05, | |
| "loss": 0.8031, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 4.467052023121387e-05, | |
| "loss": 0.7995, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 2.98728323699422e-05, | |
| "loss": 0.8027, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 1.5075144508670521e-05, | |
| "loss": 0.8074, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.7745664739884395e-07, | |
| "loss": 0.801, | |
| "step": 4800 | |
| } | |
| ], | |
| "max_steps": 4806, | |
| "num_train_epochs": 3, | |
| "total_flos": 3.989512556547932e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |