| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 128, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9230769230769231e-07, | |
| "loss": 2.3131, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.8461538461538463e-07, | |
| "loss": 2.3205, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.99626950870707e-07, | |
| "loss": 2.1741, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.954429235188896e-07, | |
| "loss": 1.9213, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.866867588977609e-07, | |
| "loss": 1.8027, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.7352156778691276e-07, | |
| "loss": 1.5125, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.4529484510421753, | |
| "eval_runtime": 16.1705, | |
| "eval_samples_per_second": 7.05, | |
| "eval_steps_per_second": 0.928, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.561925927872421e-07, | |
| "loss": 1.4467, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.3502263991531294e-07, | |
| "loss": 1.3768, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.104060653380402e-07, | |
| "loss": 1.2423, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.828014292634508e-07, | |
| "loss": 1.1306, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.527229538316371e-07, | |
| "loss": 1.0165, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.207309441292325e-07, | |
| "loss": 0.9656, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.9357134699821472, | |
| "eval_runtime": 15.8011, | |
| "eval_samples_per_second": 7.215, | |
| "eval_steps_per_second": 0.949, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 2.8742135076578607e-07, | |
| "loss": 0.9752, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 2.5341466844148774e-07, | |
| "loss": 0.8735, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 2.1934437730492543e-07, | |
| "loss": 0.8911, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.8584514241650663e-07, | |
| "loss": 0.8266, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 1.5354099113921612e-07, | |
| "loss": 0.8245, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.2303368868954847e-07, | |
| "loss": 0.7816, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 9.489152839010797e-08, | |
| "loss": 0.7992, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.8069692850112915, | |
| "eval_runtime": 15.7689, | |
| "eval_samples_per_second": 7.229, | |
| "eval_steps_per_second": 0.951, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 6.963874544026108e-08, | |
| "loss": 0.7874, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 4.774575140626316e-08, | |
| "loss": 0.7714, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 2.9620371343832106e-08, | |
| "loss": 0.7559, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 1.5600246788994938e-08, | |
| "loss": 0.7602, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 5.946546135113861e-09, | |
| "loss": 0.7548, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 8.390995598676065e-10, | |
| "loss": 0.7865, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.7899278998374939, | |
| "eval_runtime": 15.7827, | |
| "eval_samples_per_second": 7.223, | |
| "eval_steps_per_second": 0.95, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 128, | |
| "total_flos": 89583526084608.0, | |
| "train_loss": 1.1804139949381351, | |
| "train_runtime": 4681.6725, | |
| "train_samples_per_second": 0.871, | |
| "train_steps_per_second": 0.027 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 128, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "total_flos": 89583526084608.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |