| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 252, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001230769230769231, | |
| "loss": 1.8283, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00019469026548672567, | |
| "loss": 1.3401, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00018053097345132742, | |
| "loss": 1.2389, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0001663716814159292, | |
| "loss": 1.0655, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00015221238938053098, | |
| "loss": 0.9853, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00013805309734513276, | |
| "loss": 1.0283, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.0001238938053097345, | |
| "loss": 0.9138, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00010973451327433629, | |
| "loss": 0.997, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 9.557522123893806e-05, | |
| "loss": 0.9827, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 8.141592920353983e-05, | |
| "loss": 0.9958, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 6.725663716814161e-05, | |
| "loss": 0.9319, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 5.309734513274337e-05, | |
| "loss": 0.9466, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.893805309734514e-05, | |
| "loss": 0.955, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 2.4778761061946905e-05, | |
| "loss": 0.9503, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 1.0619469026548673e-05, | |
| "loss": 0.9037, | |
| "step": 240 | |
| } | |
| ], | |
| "max_steps": 252, | |
| "num_train_epochs": 3, | |
| "total_flos": 2.079427580972237e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |