| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.034584125886218224, |
| "eval_steps": 500, |
| "global_step": 50, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006916825177243646, |
| "grad_norm": 0.2954893708229065, |
| "learning_rate": 0.0, |
| "loss": 2.3812, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0013833650354487291, |
| "grad_norm": 0.3372870683670044, |
| "learning_rate": 1.3698630136986302e-06, |
| "loss": 2.6562, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0020750475531730937, |
| "grad_norm": 0.41708794236183167, |
| "learning_rate": 2.7397260273972604e-06, |
| "loss": 2.779, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0027667300708974583, |
| "grad_norm": 0.48284393548965454, |
| "learning_rate": 4.10958904109589e-06, |
| "loss": 2.83, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0034584125886218224, |
| "grad_norm": 0.328521728515625, |
| "learning_rate": 5.479452054794521e-06, |
| "loss": 2.3607, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.004150095106346187, |
| "grad_norm": 0.3413754403591156, |
| "learning_rate": 6.849315068493151e-06, |
| "loss": 1.9744, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0048417776240705515, |
| "grad_norm": 0.43425995111465454, |
| "learning_rate": 8.21917808219178e-06, |
| "loss": 1.915, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0055334601417949165, |
| "grad_norm": 0.45184195041656494, |
| "learning_rate": 9.589041095890411e-06, |
| "loss": 2.0002, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.006225142659519281, |
| "grad_norm": 0.3451146185398102, |
| "learning_rate": 1.0958904109589042e-05, |
| "loss": 2.5739, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.006916825177243645, |
| "grad_norm": 0.39400503039360046, |
| "learning_rate": 1.2328767123287671e-05, |
| "loss": 2.6139, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00760850769496801, |
| "grad_norm": 0.4555888772010803, |
| "learning_rate": 1.3698630136986302e-05, |
| "loss": 2.4346, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.008300190212692375, |
| "grad_norm": 0.5475619435310364, |
| "learning_rate": 1.5068493150684931e-05, |
| "loss": 1.485, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.008991872730416739, |
| "grad_norm": 1.219076156616211, |
| "learning_rate": 1.643835616438356e-05, |
| "loss": 2.0555, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.009683555248141103, |
| "grad_norm": 0.3271433711051941, |
| "learning_rate": 1.780821917808219e-05, |
| "loss": 2.8418, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.010375237765865467, |
| "grad_norm": 0.3261394500732422, |
| "learning_rate": 1.9178082191780822e-05, |
| "loss": 2.287, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.011066920283589833, |
| "grad_norm": 0.39713796973228455, |
| "learning_rate": 2.0547945205479453e-05, |
| "loss": 2.7331, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.011758602801314197, |
| "grad_norm": 0.3226444125175476, |
| "learning_rate": 2.1917808219178083e-05, |
| "loss": 2.7307, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.012450285319038561, |
| "grad_norm": 0.4153122007846832, |
| "learning_rate": 2.328767123287671e-05, |
| "loss": 2.3229, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.013141967836762926, |
| "grad_norm": 0.29302462935447693, |
| "learning_rate": 2.4657534246575342e-05, |
| "loss": 2.2093, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.01383365035448729, |
| "grad_norm": 0.42535701394081116, |
| "learning_rate": 2.6027397260273973e-05, |
| "loss": 2.3698, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.014525332872211656, |
| "grad_norm": 0.33567023277282715, |
| "learning_rate": 2.7397260273972603e-05, |
| "loss": 2.6456, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01521701538993602, |
| "grad_norm": 0.37375620007514954, |
| "learning_rate": 2.8767123287671234e-05, |
| "loss": 2.4148, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.015908697907660384, |
| "grad_norm": 0.2704203128814697, |
| "learning_rate": 3.0136986301369862e-05, |
| "loss": 1.6722, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01660038042538475, |
| "grad_norm": 0.3946782946586609, |
| "learning_rate": 3.1506849315068496e-05, |
| "loss": 2.6759, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.017292062943109112, |
| "grad_norm": 0.368335098028183, |
| "learning_rate": 3.287671232876712e-05, |
| "loss": 2.6882, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.017983745460833478, |
| "grad_norm": 0.38029783964157104, |
| "learning_rate": 3.424657534246575e-05, |
| "loss": 2.6915, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01867542797855784, |
| "grad_norm": 0.36253222823143005, |
| "learning_rate": 3.561643835616438e-05, |
| "loss": 2.1426, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.019367110496282206, |
| "grad_norm": 0.35769322514533997, |
| "learning_rate": 3.698630136986301e-05, |
| "loss": 2.6634, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.020058793014006572, |
| "grad_norm": 0.44577229022979736, |
| "learning_rate": 3.8356164383561644e-05, |
| "loss": 2.5996, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.020750475531730934, |
| "grad_norm": 0.5225628614425659, |
| "learning_rate": 3.9726027397260274e-05, |
| "loss": 2.5993, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0214421580494553, |
| "grad_norm": 0.45649707317352295, |
| "learning_rate": 4.1095890410958905e-05, |
| "loss": 2.3344, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.022133840567179666, |
| "grad_norm": 0.31408172845840454, |
| "learning_rate": 4.2465753424657536e-05, |
| "loss": 2.343, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.02282552308490403, |
| "grad_norm": 0.3498693108558655, |
| "learning_rate": 4.383561643835617e-05, |
| "loss": 1.7196, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.023517205602628394, |
| "grad_norm": 0.5255292654037476, |
| "learning_rate": 4.520547945205479e-05, |
| "loss": 1.5354, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.024208888120352757, |
| "grad_norm": 0.3460487425327301, |
| "learning_rate": 4.657534246575342e-05, |
| "loss": 2.1435, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.024900570638077123, |
| "grad_norm": 0.47466233372688293, |
| "learning_rate": 4.794520547945205e-05, |
| "loss": 2.7025, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.02559225315580149, |
| "grad_norm": 0.39667466282844543, |
| "learning_rate": 4.9315068493150684e-05, |
| "loss": 2.2736, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.02628393567352585, |
| "grad_norm": 0.40798133611679077, |
| "learning_rate": 5.068493150684932e-05, |
| "loss": 1.4074, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.026975618191250217, |
| "grad_norm": 0.39603471755981445, |
| "learning_rate": 5.2054794520547945e-05, |
| "loss": 2.4354, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.02766730070897458, |
| "grad_norm": 0.5650593042373657, |
| "learning_rate": 5.342465753424658e-05, |
| "loss": 1.7307, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.028358983226698945, |
| "grad_norm": 0.7711919546127319, |
| "learning_rate": 5.479452054794521e-05, |
| "loss": 1.2067, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.02905066574442331, |
| "grad_norm": 0.46603119373321533, |
| "learning_rate": 5.616438356164384e-05, |
| "loss": 1.0539, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.029742348262147673, |
| "grad_norm": 0.2997550666332245, |
| "learning_rate": 5.753424657534247e-05, |
| "loss": 1.9997, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.03043403077987204, |
| "grad_norm": 0.4058617651462555, |
| "learning_rate": 5.89041095890411e-05, |
| "loss": 2.3323, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0311257132975964, |
| "grad_norm": 0.3763635754585266, |
| "learning_rate": 6.0273972602739724e-05, |
| "loss": 1.8749, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.03181739581532077, |
| "grad_norm": 0.3507993221282959, |
| "learning_rate": 6.164383561643835e-05, |
| "loss": 2.3828, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.03250907833304513, |
| "grad_norm": 0.34240859746932983, |
| "learning_rate": 6.301369863013699e-05, |
| "loss": 2.3809, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0332007608507695, |
| "grad_norm": 0.4183844029903412, |
| "learning_rate": 6.438356164383562e-05, |
| "loss": 2.2065, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.03389244336849386, |
| "grad_norm": 0.5209120512008667, |
| "learning_rate": 6.575342465753424e-05, |
| "loss": 2.061, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.034584125886218224, |
| "grad_norm": 0.6568111181259155, |
| "learning_rate": 6.712328767123288e-05, |
| "loss": 2.1576, |
| "step": 50 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1446, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1666678748626944.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|