| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 40, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.125, |
| "grad_norm": 5.211779927637621, |
| "learning_rate": 2e-05, |
| "loss": 0.9857, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 5.042557082573154, |
| "learning_rate": 4e-05, |
| "loss": 1.0105, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 1.6016497975651993, |
| "learning_rate": 6.000000000000001e-05, |
| "loss": 0.8315, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 4.25741909777284, |
| "learning_rate": 8e-05, |
| "loss": 0.8376, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 4.221010570287036, |
| "learning_rate": 7.984778792366983e-05, |
| "loss": 0.8615, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.7341942887765978, |
| "learning_rate": 7.939231012048833e-05, |
| "loss": 0.8683, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 2.869440032757376, |
| "learning_rate": 7.863703305156273e-05, |
| "loss": 0.8362, |
| "step": 7 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.122066806364661, |
| "learning_rate": 7.758770483143634e-05, |
| "loss": 0.784, |
| "step": 8 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 1.439471039607689, |
| "learning_rate": 7.625231148146601e-05, |
| "loss": 0.7544, |
| "step": 9 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 1.6534051953010969, |
| "learning_rate": 7.464101615137756e-05, |
| "loss": 0.7631, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 1.8137551875928488, |
| "learning_rate": 7.276608177155968e-05, |
| "loss": 0.7403, |
| "step": 11 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.3142504730332583, |
| "learning_rate": 7.064177772475912e-05, |
| "loss": 0.7002, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 5.925202262290306, |
| "learning_rate": 6.828427124746191e-05, |
| "loss": 0.6861, |
| "step": 13 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 1.7227726654448416, |
| "learning_rate": 6.571150438746157e-05, |
| "loss": 0.7334, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 1.0449895112458119, |
| "learning_rate": 6.294305745404185e-05, |
| "loss": 0.6708, |
| "step": 15 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 5.096810621766211, |
| "learning_rate": 6.000000000000001e-05, |
| "loss": 0.6817, |
| "step": 16 |
| }, |
| { |
| "epoch": 2.125, |
| "grad_norm": 1.8744800353306919, |
| "learning_rate": 5.6904730469627985e-05, |
| "loss": 0.6989, |
| "step": 17 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 1.230587778975542, |
| "learning_rate": 5.368080573302676e-05, |
| "loss": 0.6331, |
| "step": 18 |
| }, |
| { |
| "epoch": 2.375, |
| "grad_norm": 8.839539018981332, |
| "learning_rate": 5.0352761804100835e-05, |
| "loss": 0.6351, |
| "step": 19 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 5.01265204416041, |
| "learning_rate": 4.694592710667723e-05, |
| "loss": 0.661, |
| "step": 20 |
| }, |
| { |
| "epoch": 2.625, |
| "grad_norm": 2.099879032906444, |
| "learning_rate": 4.348622970990634e-05, |
| "loss": 0.6938, |
| "step": 21 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 1.4954684606615343, |
| "learning_rate": 4e-05, |
| "loss": 0.6499, |
| "step": 22 |
| }, |
| { |
| "epoch": 2.875, |
| "grad_norm": 0.742639587291529, |
| "learning_rate": 3.6513770290093674e-05, |
| "loss": 0.6396, |
| "step": 23 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.7952668726605489, |
| "learning_rate": 3.305407289332279e-05, |
| "loss": 0.6316, |
| "step": 24 |
| }, |
| { |
| "epoch": 3.125, |
| "grad_norm": 0.9332649605547626, |
| "learning_rate": 2.9647238195899168e-05, |
| "loss": 0.6314, |
| "step": 25 |
| }, |
| { |
| "epoch": 3.25, |
| "grad_norm": 0.8319077523070111, |
| "learning_rate": 2.6319194266973256e-05, |
| "loss": 0.5775, |
| "step": 26 |
| }, |
| { |
| "epoch": 3.375, |
| "grad_norm": 0.6043436502068867, |
| "learning_rate": 2.3095269530372032e-05, |
| "loss": 0.5838, |
| "step": 27 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.48007057002472814, |
| "learning_rate": 2.0000000000000012e-05, |
| "loss": 0.5709, |
| "step": 28 |
| }, |
| { |
| "epoch": 3.625, |
| "grad_norm": 0.4637812767567033, |
| "learning_rate": 1.7056942545958167e-05, |
| "loss": 0.5556, |
| "step": 29 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 0.46472503204848403, |
| "learning_rate": 1.4288495612538427e-05, |
| "loss": 0.5443, |
| "step": 30 |
| }, |
| { |
| "epoch": 3.875, |
| "grad_norm": 0.46363908296159384, |
| "learning_rate": 1.1715728752538103e-05, |
| "loss": 0.557, |
| "step": 31 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.41103161754751094, |
| "learning_rate": 9.358222275240884e-06, |
| "loss": 0.5874, |
| "step": 32 |
| }, |
| { |
| "epoch": 4.125, |
| "grad_norm": 0.3604630063954317, |
| "learning_rate": 7.233918228440324e-06, |
| "loss": 0.5322, |
| "step": 33 |
| }, |
| { |
| "epoch": 4.25, |
| "grad_norm": 0.3152024677309081, |
| "learning_rate": 5.358983848622452e-06, |
| "loss": 0.5663, |
| "step": 34 |
| }, |
| { |
| "epoch": 4.375, |
| "grad_norm": 0.2782735119012474, |
| "learning_rate": 3.747688518534003e-06, |
| "loss": 0.5334, |
| "step": 35 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.2517890470284474, |
| "learning_rate": 2.4122951685636674e-06, |
| "loss": 0.5227, |
| "step": 36 |
| }, |
| { |
| "epoch": 4.625, |
| "grad_norm": 0.2307830936984302, |
| "learning_rate": 1.3629669484372722e-06, |
| "loss": 0.5605, |
| "step": 37 |
| }, |
| { |
| "epoch": 4.75, |
| "grad_norm": 0.2120443064521722, |
| "learning_rate": 6.076898795116792e-07, |
| "loss": 0.5255, |
| "step": 38 |
| }, |
| { |
| "epoch": 4.875, |
| "grad_norm": 0.21220571438664002, |
| "learning_rate": 1.522120763301782e-07, |
| "loss": 0.5098, |
| "step": 39 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.20885459665653022, |
| "learning_rate": 0.0, |
| "loss": 0.5482, |
| "step": 40 |
| }, |
| { |
| "epoch": 5.0, |
| "step": 40, |
| "total_flos": 671045690327040.0, |
| "train_loss": 0.0, |
| "train_runtime": 7.8738, |
| "train_samples_per_second": 2427.029, |
| "train_steps_per_second": 5.08 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 40, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 671045690327040.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|