| { | |
| "best_metric": 2.0136280059814453, | |
| "best_model_checkpoint": "cdetr-cd45rb-s/checkpoint-2420", | |
| "epoch": 10.0, | |
| "global_step": 2420, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.6006, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.214585542678833, | |
| "eval_runtime": 17.9919, | |
| "eval_samples_per_second": 5.558, | |
| "eval_steps_per_second": 0.723, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.2409, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.110588788986206, | |
| "eval_runtime": 17.3999, | |
| "eval_samples_per_second": 5.747, | |
| "eval_steps_per_second": 0.747, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 4e-05, | |
| "loss": 2.2027, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 2.145615339279175, | |
| "eval_runtime": 17.7242, | |
| "eval_samples_per_second": 5.642, | |
| "eval_steps_per_second": 0.733, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.183, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 2.0863144397735596, | |
| "eval_runtime": 17.5156, | |
| "eval_samples_per_second": 5.709, | |
| "eval_steps_per_second": 0.742, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.0628, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 2.0293631553649902, | |
| "eval_runtime": 17.5041, | |
| "eval_samples_per_second": 5.713, | |
| "eval_steps_per_second": 0.743, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 4e-05, | |
| "loss": 2.1588, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 2.0859251022338867, | |
| "eval_runtime": 17.1118, | |
| "eval_samples_per_second": 5.844, | |
| "eval_steps_per_second": 0.76, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 3e-05, | |
| "loss": 2.1731, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 2.073065757751465, | |
| "eval_runtime": 16.6383, | |
| "eval_samples_per_second": 6.01, | |
| "eval_steps_per_second": 0.781, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.1041, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 2.1051650047302246, | |
| "eval_runtime": 16.6295, | |
| "eval_samples_per_second": 6.013, | |
| "eval_steps_per_second": 0.782, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 1e-05, | |
| "loss": 2.0383, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 2.037771463394165, | |
| "eval_runtime": 17.1339, | |
| "eval_samples_per_second": 5.836, | |
| "eval_steps_per_second": 0.759, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.967, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 2.0136280059814453, | |
| "eval_runtime": 16.8066, | |
| "eval_samples_per_second": 5.95, | |
| "eval_steps_per_second": 0.774, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 2420, | |
| "total_flos": 4.8214727269632e+18, | |
| "train_loss": 1.0441367535551718, | |
| "train_runtime": 1297.7653, | |
| "train_samples_per_second": 7.444, | |
| "train_steps_per_second": 1.865 | |
| } | |
| ], | |
| "max_steps": 2420, | |
| "num_train_epochs": 10, | |
| "total_flos": 4.8214727269632e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |