| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.7072135785007072, | |
| "eval_steps": 100, | |
| "global_step": 300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.987653272773745e-06, | |
| "loss": 0.5127, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 8.950680842949738e-06, | |
| "loss": 0.2702, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 8.88928559430839e-06, | |
| "loss": 0.2002, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 8.80380442924435e-06, | |
| "loss": 0.1616, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 8.694706420036745e-06, | |
| "loss": 0.1754, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 8.562590234845845e-06, | |
| "loss": 0.184, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 8.408180852560864e-06, | |
| "loss": 0.141, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 8.232325584525908e-06, | |
| "loss": 0.1414, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 8.035989424974636e-06, | |
| "loss": 0.1235, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 7.820249755687773e-06, | |
| "loss": 0.1389, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 0.035761188715696335, | |
| "eval_runtime": 32.8918, | |
| "eval_samples_per_second": 85.979, | |
| "eval_steps_per_second": 14.35, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 7.586290433931348e-06, | |
| "loss": 0.0967, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 7.3353952961177e-06, | |
| "loss": 0.1373, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 7.068941112837489e-06, | |
| "loss": 0.0957, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 6.788390033921493e-06, | |
| "loss": 0.0884, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 6.495281564989449e-06, | |
| "loss": 0.0918, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 6.191224119514071e-06, | |
| "loss": 0.085, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 5.877886192757761e-06, | |
| "loss": 0.1101, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 5.556987206014427e-06, | |
| "loss": 0.1185, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 5.23028807139807e-06, | |
| "loss": 0.1112, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.899581528953207e-06, | |
| "loss": 0.104, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 0.028301144018769264, | |
| "eval_runtime": 33.4946, | |
| "eval_samples_per_second": 84.431, | |
| "eval_steps_per_second": 14.092, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.566682309111617e-06, | |
| "loss": 0.0972, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.233417174478286e-06, | |
| "loss": 0.0902, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 3.901614895591586e-06, | |
| "loss": 0.0726, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 3.573096215665058e-06, | |
| "loss": 0.0724, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 3.249663859378603e-06, | |
| "loss": 0.0974, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 2.9330926405452062e-06, | |
| "loss": 0.0793, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 2.6251197229366963e-06, | |
| "loss": 0.0901, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 2.3274350877116486e-06, | |
| "loss": 0.0731, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.0416722597547573e-06, | |
| "loss": 0.0836, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.7693993438163312e-06, | |
| "loss": 0.0881, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 0.01625123806297779, | |
| "eval_runtime": 33.119, | |
| "eval_samples_per_second": 85.389, | |
| "eval_steps_per_second": 14.252, | |
| "step": 300 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 424, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |