| { |
| "best_global_step": 98, |
| "best_metric": 0.3496828079223633, |
| "best_model_checkpoint": "saves/test/checkpoint-98", |
| "epoch": 1.0, |
| "eval_steps": 7, |
| "global_step": 125, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "grad_norm": 396.0, |
| "learning_rate": 0.009230769230769232, |
| "loss": 3.6273, |
| "num_input_tokens_seen": 2144, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.056, |
| "eval_loss": 6.932018280029297, |
| "eval_runtime": 1.4196, |
| "eval_samples_per_second": 39.449, |
| "eval_steps_per_second": 9.862, |
| "num_input_tokens_seen": 2880, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 22.5, |
| "learning_rate": 0.02076923076923077, |
| "loss": 6.3813, |
| "num_input_tokens_seen": 4128, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.112, |
| "eval_loss": 1.6227623224258423, |
| "eval_runtime": 1.3856, |
| "eval_samples_per_second": 40.417, |
| "eval_steps_per_second": 10.104, |
| "num_input_tokens_seen": 5920, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 9.9375, |
| "learning_rate": 0.029994099395745794, |
| "loss": 2.5342, |
| "num_input_tokens_seen": 6240, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.9140625, |
| "learning_rate": 0.029788065277773536, |
| "loss": 1.4507, |
| "num_input_tokens_seen": 8096, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.168, |
| "eval_loss": 0.4040253460407257, |
| "eval_runtime": 1.3702, |
| "eval_samples_per_second": 40.869, |
| "eval_steps_per_second": 10.217, |
| "num_input_tokens_seen": 8416, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.140625, |
| "learning_rate": 0.02929162684084344, |
| "loss": 1.7771, |
| "num_input_tokens_seen": 10112, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.224, |
| "eval_loss": 3.618654489517212, |
| "eval_runtime": 1.3779, |
| "eval_samples_per_second": 40.642, |
| "eval_steps_per_second": 10.161, |
| "num_input_tokens_seen": 11264, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 103.5, |
| "learning_rate": 0.028514533018536285, |
| "loss": 3.7935, |
| "num_input_tokens_seen": 12032, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 0.02747204418453818, |
| "loss": 0.7848, |
| "num_input_tokens_seen": 13824, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.28, |
| "eval_loss": 0.36673951148986816, |
| "eval_runtime": 1.3899, |
| "eval_samples_per_second": 40.29, |
| "eval_steps_per_second": 10.072, |
| "num_input_tokens_seen": 13824, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.11083984375, |
| "learning_rate": 0.026184632473247482, |
| "loss": 0.4314, |
| "num_input_tokens_seen": 15840, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.336, |
| "eval_loss": 0.3661668598651886, |
| "eval_runtime": 1.3878, |
| "eval_samples_per_second": 40.353, |
| "eval_steps_per_second": 10.088, |
| "num_input_tokens_seen": 16672, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.251953125, |
| "learning_rate": 0.024677579753131316, |
| "loss": 0.4096, |
| "num_input_tokens_seen": 17920, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.392, |
| "eval_loss": 0.5265085101127625, |
| "eval_runtime": 1.3816, |
| "eval_samples_per_second": 40.532, |
| "eval_steps_per_second": 10.133, |
| "num_input_tokens_seen": 19296, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.07373046875, |
| "learning_rate": 0.022980481147730047, |
| "loss": 0.2423, |
| "num_input_tokens_seen": 19712, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.150390625, |
| "learning_rate": 0.021126663854039943, |
| "loss": 0.5554, |
| "num_input_tokens_seen": 21952, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.448, |
| "eval_loss": 0.39251771569252014, |
| "eval_runtime": 1.3876, |
| "eval_samples_per_second": 40.359, |
| "eval_steps_per_second": 10.09, |
| "num_input_tokens_seen": 22432, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 0.01915253267137274, |
| "loss": 0.4968, |
| "num_input_tokens_seen": 24160, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.504, |
| "eval_loss": 2.652535915374756, |
| "eval_runtime": 1.3806, |
| "eval_samples_per_second": 40.562, |
| "eval_steps_per_second": 10.141, |
| "num_input_tokens_seen": 25504, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 10.0625, |
| "learning_rate": 0.017096855093032493, |
| "loss": 0.7728, |
| "num_input_tokens_seen": 26112, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.189453125, |
| "learning_rate": 0.015, |
| "loss": 0.3298, |
| "num_input_tokens_seen": 28064, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.56, |
| "eval_loss": 0.37763792276382446, |
| "eval_runtime": 1.3741, |
| "eval_samples_per_second": 40.753, |
| "eval_steps_per_second": 10.188, |
| "num_input_tokens_seen": 28064, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 0.012903144906967513, |
| "loss": 0.3663, |
| "num_input_tokens_seen": 29824, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.616, |
| "eval_loss": 0.36274200677871704, |
| "eval_runtime": 1.4269, |
| "eval_samples_per_second": 39.245, |
| "eval_steps_per_second": 9.811, |
| "num_input_tokens_seen": 30720, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.142578125, |
| "learning_rate": 0.01084746732862726, |
| "loss": 0.3654, |
| "num_input_tokens_seen": 31904, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.672, |
| "eval_loss": 0.35259029269218445, |
| "eval_runtime": 1.4343, |
| "eval_samples_per_second": 39.044, |
| "eval_steps_per_second": 9.761, |
| "num_input_tokens_seen": 33504, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.08349609375, |
| "learning_rate": 0.008873336145960059, |
| "loss": 0.3696, |
| "num_input_tokens_seen": 33984, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.039306640625, |
| "learning_rate": 0.007019518852269953, |
| "loss": 0.3495, |
| "num_input_tokens_seen": 35776, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.728, |
| "eval_loss": 0.3546493351459503, |
| "eval_runtime": 1.4075, |
| "eval_samples_per_second": 39.786, |
| "eval_steps_per_second": 9.946, |
| "num_input_tokens_seen": 36128, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.017578125, |
| "learning_rate": 0.005322420246868689, |
| "loss": 0.412, |
| "num_input_tokens_seen": 37472, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.784, |
| "eval_loss": 0.3496828079223633, |
| "eval_runtime": 1.4395, |
| "eval_samples_per_second": 38.902, |
| "eval_steps_per_second": 9.726, |
| "num_input_tokens_seen": 38592, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.02734375, |
| "learning_rate": 0.003815367526752516, |
| "loss": 0.3676, |
| "num_input_tokens_seen": 39328, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.047607421875, |
| "learning_rate": 0.0025279558154618197, |
| "loss": 0.349, |
| "num_input_tokens_seen": 41280, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.84, |
| "eval_loss": 0.35384148359298706, |
| "eval_runtime": 1.4061, |
| "eval_samples_per_second": 39.827, |
| "eval_steps_per_second": 9.957, |
| "num_input_tokens_seen": 41280, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.037841796875, |
| "learning_rate": 0.0014854669814637145, |
| "loss": 0.3482, |
| "num_input_tokens_seen": 43552, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.896, |
| "eval_loss": 0.3565780818462372, |
| "eval_runtime": 1.4483, |
| "eval_samples_per_second": 38.666, |
| "eval_steps_per_second": 9.666, |
| "num_input_tokens_seen": 44160, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.040283203125, |
| "learning_rate": 0.000708373159156555, |
| "loss": 0.3258, |
| "num_input_tokens_seen": 45216, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.952, |
| "eval_loss": 0.35846200585365295, |
| "eval_runtime": 1.4597, |
| "eval_samples_per_second": 38.364, |
| "eval_steps_per_second": 9.591, |
| "num_input_tokens_seen": 46944, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.0189208984375, |
| "learning_rate": 0.0002119347222264617, |
| "loss": 0.3217, |
| "num_input_tokens_seen": 47360, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.0859375, |
| "learning_rate": 5.90060425420702e-06, |
| "loss": 0.3608, |
| "num_input_tokens_seen": 49376, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.0, |
| "num_input_tokens_seen": 49376, |
| "step": 125, |
| "total_flos": 2223378963628032.0, |
| "train_loss": 1.100914571762085, |
| "train_runtime": 86.254, |
| "train_samples_per_second": 5.774, |
| "train_steps_per_second": 1.449 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 125, |
| "num_input_tokens_seen": 49376, |
| "num_train_epochs": 1, |
| "save_steps": 7, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2223378963628032.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|