| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 15969, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.843446677938506e-05, | |
| "loss": 1.9037, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.686893355877012e-05, | |
| "loss": 1.7424, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.530340033815518e-05, | |
| "loss": 1.6677, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.373786711754024e-05, | |
| "loss": 1.6062, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.2172333896925295e-05, | |
| "loss": 1.5738, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.060680067631035e-05, | |
| "loss": 1.5341, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.9041267455695416e-05, | |
| "loss": 1.4979, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.7475734235080466e-05, | |
| "loss": 1.4711, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.591020101446553e-05, | |
| "loss": 1.4475, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.434466779385059e-05, | |
| "loss": 1.4165, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.277913457323565e-05, | |
| "loss": 1.3643, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.12136013526207e-05, | |
| "loss": 1.286, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.9648068132005762e-05, | |
| "loss": 1.2671, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.8082534911390823e-05, | |
| "loss": 1.2576, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.6517001690775877e-05, | |
| "loss": 1.2502, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.4951468470160937e-05, | |
| "loss": 1.2262, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.3385935249545994e-05, | |
| "loss": 1.223, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.1820402028931055e-05, | |
| "loss": 1.214, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.0254868808316112e-05, | |
| "loss": 1.1949, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.868933558770117e-05, | |
| "loss": 1.1977, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.712380236708623e-05, | |
| "loss": 1.1708, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.5558269146471287e-05, | |
| "loss": 1.0994, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.3992735925856349e-05, | |
| "loss": 1.0833, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2427202705241406e-05, | |
| "loss": 1.0865, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0861669484626465e-05, | |
| "loss": 1.073, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.296136264011522e-06, | |
| "loss": 1.072, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.730603043396581e-06, | |
| "loss": 1.071, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.16506982278164e-06, | |
| "loss": 1.0711, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.599536602166698e-06, | |
| "loss": 1.0627, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.034003381551757e-06, | |
| "loss": 1.06, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.4684701609368152e-06, | |
| "loss": 1.0543, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 15969, | |
| "total_flos": 2.977670544895181e+16, | |
| "train_loss": 1.2911211150314672, | |
| "train_runtime": 9784.2831, | |
| "train_samples_per_second": 1.632, | |
| "train_steps_per_second": 1.632 | |
| } | |
| ], | |
| "max_steps": 15969, | |
| "num_train_epochs": 3, | |
| "total_flos": 2.977670544895181e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |