| { |
| "best_global_step": 112, |
| "best_metric": 0.34589245915412903, |
| "best_model_checkpoint": "saves/test/checkpoint-112", |
| "epoch": 1.0, |
| "eval_steps": 7, |
| "global_step": 125, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "grad_norm": 556.8270263671875, |
| "learning_rate": 1.5384615384615387e-05, |
| "loss": 10.9709, |
| "num_input_tokens_seen": 2144, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.056, |
| "eval_loss": 6.5227251052856445, |
| "eval_runtime": 0.7016, |
| "eval_samples_per_second": 79.812, |
| "eval_steps_per_second": 19.953, |
| "num_input_tokens_seen": 2880, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 166.4779815673828, |
| "learning_rate": 3.461538461538462e-05, |
| "loss": 6.4075, |
| "num_input_tokens_seen": 4128, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.112, |
| "eval_loss": 1.382468581199646, |
| "eval_runtime": 0.7422, |
| "eval_samples_per_second": 75.454, |
| "eval_steps_per_second": 18.864, |
| "num_input_tokens_seen": 5920, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 137.21327209472656, |
| "learning_rate": 4.999016565957633e-05, |
| "loss": 2.5338, |
| "num_input_tokens_seen": 6240, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 15.133822441101074, |
| "learning_rate": 4.96467754629559e-05, |
| "loss": 0.5326, |
| "num_input_tokens_seen": 8096, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.168, |
| "eval_loss": 0.4987373352050781, |
| "eval_runtime": 0.7195, |
| "eval_samples_per_second": 77.827, |
| "eval_steps_per_second": 19.457, |
| "num_input_tokens_seen": 8416, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 18.45602798461914, |
| "learning_rate": 4.881937806807241e-05, |
| "loss": 0.4144, |
| "num_input_tokens_seen": 10112, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.224, |
| "eval_loss": 0.4531269073486328, |
| "eval_runtime": 0.7843, |
| "eval_samples_per_second": 71.406, |
| "eval_steps_per_second": 17.851, |
| "num_input_tokens_seen": 11264, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 42.76151657104492, |
| "learning_rate": 4.752422169756048e-05, |
| "loss": 0.4563, |
| "num_input_tokens_seen": 12032, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 4.994872570037842, |
| "learning_rate": 4.5786740307563636e-05, |
| "loss": 0.4802, |
| "num_input_tokens_seen": 13824, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.28, |
| "eval_loss": 0.36931881308555603, |
| "eval_runtime": 0.8719, |
| "eval_samples_per_second": 64.225, |
| "eval_steps_per_second": 16.056, |
| "num_input_tokens_seen": 13824, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 8.647699356079102, |
| "learning_rate": 4.364105412207914e-05, |
| "loss": 0.3809, |
| "num_input_tokens_seen": 15840, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.336, |
| "eval_loss": 0.3872639238834381, |
| "eval_runtime": 0.7387, |
| "eval_samples_per_second": 75.812, |
| "eval_steps_per_second": 18.953, |
| "num_input_tokens_seen": 16672, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 12.718330383300781, |
| "learning_rate": 4.1129299588552193e-05, |
| "loss": 0.3844, |
| "num_input_tokens_seen": 17920, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.392, |
| "eval_loss": 0.3777945637702942, |
| "eval_runtime": 0.7665, |
| "eval_samples_per_second": 73.06, |
| "eval_steps_per_second": 18.265, |
| "num_input_tokens_seen": 19296, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 12.110234260559082, |
| "learning_rate": 3.830080191288342e-05, |
| "loss": 0.2817, |
| "num_input_tokens_seen": 19712, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 10.657136917114258, |
| "learning_rate": 3.521110642339991e-05, |
| "loss": 0.3831, |
| "num_input_tokens_seen": 21952, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.448, |
| "eval_loss": 0.4436803460121155, |
| "eval_runtime": 0.7702, |
| "eval_samples_per_second": 72.71, |
| "eval_steps_per_second": 18.178, |
| "num_input_tokens_seen": 22432, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 10.894862174987793, |
| "learning_rate": 3.1920887785621235e-05, |
| "loss": 0.5576, |
| "num_input_tokens_seen": 24160, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.504, |
| "eval_loss": 0.35032057762145996, |
| "eval_runtime": 0.9256, |
| "eval_samples_per_second": 60.502, |
| "eval_steps_per_second": 15.125, |
| "num_input_tokens_seen": 25504, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 7.415125370025635, |
| "learning_rate": 2.849475848838749e-05, |
| "loss": 0.4013, |
| "num_input_tokens_seen": 26112, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 9.572220802307129, |
| "learning_rate": 2.5e-05, |
| "loss": 0.3242, |
| "num_input_tokens_seen": 28064, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.56, |
| "eval_loss": 0.37164703011512756, |
| "eval_runtime": 0.816, |
| "eval_samples_per_second": 68.627, |
| "eval_steps_per_second": 17.157, |
| "num_input_tokens_seen": 28064, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 11.036535263061523, |
| "learning_rate": 2.1505241511612522e-05, |
| "loss": 0.3963, |
| "num_input_tokens_seen": 29824, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.616, |
| "eval_loss": 0.3748786747455597, |
| "eval_runtime": 0.7992, |
| "eval_samples_per_second": 70.066, |
| "eval_steps_per_second": 17.516, |
| "num_input_tokens_seen": 30720, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 2.2476918697357178, |
| "learning_rate": 1.8079112214378768e-05, |
| "loss": 0.3946, |
| "num_input_tokens_seen": 31904, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.672, |
| "eval_loss": 0.3603578209877014, |
| "eval_runtime": 0.7982, |
| "eval_samples_per_second": 70.16, |
| "eval_steps_per_second": 17.54, |
| "num_input_tokens_seen": 33504, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 2.605731248855591, |
| "learning_rate": 1.4788893576600099e-05, |
| "loss": 0.3496, |
| "num_input_tokens_seen": 33984, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 2.532665967941284, |
| "learning_rate": 1.1699198087116589e-05, |
| "loss": 0.337, |
| "num_input_tokens_seen": 35776, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.728, |
| "eval_loss": 0.35710158944129944, |
| "eval_runtime": 0.8602, |
| "eval_samples_per_second": 65.102, |
| "eval_steps_per_second": 16.276, |
| "num_input_tokens_seen": 36128, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.5953240394592285, |
| "learning_rate": 8.870700411447816e-06, |
| "loss": 0.4315, |
| "num_input_tokens_seen": 37472, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.784, |
| "eval_loss": 0.3520326614379883, |
| "eval_runtime": 0.7911, |
| "eval_samples_per_second": 70.792, |
| "eval_steps_per_second": 17.698, |
| "num_input_tokens_seen": 38592, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.179095506668091, |
| "learning_rate": 6.358945877920861e-06, |
| "loss": 0.38, |
| "num_input_tokens_seen": 39328, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 5.5090203285217285, |
| "learning_rate": 4.213259692436367e-06, |
| "loss": 0.371, |
| "num_input_tokens_seen": 41280, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.84, |
| "eval_loss": 0.34758228063583374, |
| "eval_runtime": 0.8072, |
| "eval_samples_per_second": 69.373, |
| "eval_steps_per_second": 17.343, |
| "num_input_tokens_seen": 41280, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.8210620880126953, |
| "learning_rate": 2.475778302439524e-06, |
| "loss": 0.364, |
| "num_input_tokens_seen": 43552, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.896, |
| "eval_loss": 0.34589245915412903, |
| "eval_runtime": 0.822, |
| "eval_samples_per_second": 68.123, |
| "eval_steps_per_second": 17.031, |
| "num_input_tokens_seen": 44160, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 5.644977569580078, |
| "learning_rate": 1.180621931927592e-06, |
| "loss": 0.3554, |
| "num_input_tokens_seen": 45216, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.952, |
| "eval_loss": 0.3492301404476166, |
| "eval_runtime": 0.8757, |
| "eval_samples_per_second": 63.945, |
| "eval_steps_per_second": 15.986, |
| "num_input_tokens_seen": 46944, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.6824895143508911, |
| "learning_rate": 3.5322453704410286e-07, |
| "loss": 0.3494, |
| "num_input_tokens_seen": 47360, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 10.473832130432129, |
| "learning_rate": 9.834340423678368e-09, |
| "loss": 0.3588, |
| "num_input_tokens_seen": 49376, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.0, |
| "num_input_tokens_seen": 49376, |
| "step": 125, |
| "total_flos": 497127920369664.0, |
| "train_loss": 1.1438678817749024, |
| "train_runtime": 264.1495, |
| "train_samples_per_second": 1.885, |
| "train_steps_per_second": 0.473 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 125, |
| "num_input_tokens_seen": 49376, |
| "num_train_epochs": 1, |
| "save_steps": 7, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 497127920369664.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|