| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 24.881889763779526, |
| "eval_steps": 25, |
| "global_step": 175, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.6299212598425197, |
| "grad_norm": 5.632317132462053, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 0.5498, |
| "step": 5 |
| }, |
| { |
| "epoch": 1.3779527559055118, |
| "grad_norm": 2.7309533902724237, |
| "learning_rate": 5e-06, |
| "loss": 0.5156, |
| "step": 10 |
| }, |
| { |
| "epoch": 2.1259842519685037, |
| "grad_norm": 1.2157903681282713, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 0.1575, |
| "step": 15 |
| }, |
| { |
| "epoch": 2.7559055118110236, |
| "grad_norm": 0.4902690514544274, |
| "learning_rate": 9.998999018714264e-06, |
| "loss": 0.0449, |
| "step": 20 |
| }, |
| { |
| "epoch": 3.5039370078740157, |
| "grad_norm": 0.4746290087026016, |
| "learning_rate": 9.964006738212574e-06, |
| "loss": 0.0385, |
| "step": 25 |
| }, |
| { |
| "epoch": 3.5039370078740157, |
| "eval_loss": 0.049805961549282074, |
| "eval_runtime": 3.8791, |
| "eval_samples_per_second": 7.476, |
| "eval_steps_per_second": 3.867, |
| "step": 25 |
| }, |
| { |
| "epoch": 4.251968503937007, |
| "grad_norm": 0.2525712314895859, |
| "learning_rate": 9.879365458117678e-06, |
| "loss": 0.0247, |
| "step": 30 |
| }, |
| { |
| "epoch": 4.881889763779528, |
| "grad_norm": 0.31313562383003646, |
| "learning_rate": 9.745921743533653e-06, |
| "loss": 0.0157, |
| "step": 35 |
| }, |
| { |
| "epoch": 5.6299212598425195, |
| "grad_norm": 0.21515126665369913, |
| "learning_rate": 9.565010271724353e-06, |
| "loss": 0.0125, |
| "step": 40 |
| }, |
| { |
| "epoch": 6.377952755905512, |
| "grad_norm": 0.34914670734943754, |
| "learning_rate": 9.338440482939146e-06, |
| "loss": 0.0069, |
| "step": 45 |
| }, |
| { |
| "epoch": 7.125984251968504, |
| "grad_norm": 0.14477627700807097, |
| "learning_rate": 9.068478482754532e-06, |
| "loss": 0.0047, |
| "step": 50 |
| }, |
| { |
| "epoch": 7.125984251968504, |
| "eval_loss": 0.05980484187602997, |
| "eval_runtime": 3.7853, |
| "eval_samples_per_second": 7.661, |
| "eval_steps_per_second": 3.963, |
| "step": 50 |
| }, |
| { |
| "epoch": 7.755905511811024, |
| "grad_norm": 0.3479903618887373, |
| "learning_rate": 8.757824376940748e-06, |
| "loss": 0.0019, |
| "step": 55 |
| }, |
| { |
| "epoch": 8.503937007874015, |
| "grad_norm": 0.5106959959750706, |
| "learning_rate": 8.409585265545509e-06, |
| "loss": 0.0023, |
| "step": 60 |
| }, |
| { |
| "epoch": 9.251968503937007, |
| "grad_norm": 0.14853492351867037, |
| "learning_rate": 8.027244166302641e-06, |
| "loss": 0.002, |
| "step": 65 |
| }, |
| { |
| "epoch": 9.881889763779528, |
| "grad_norm": 0.16542609484128554, |
| "learning_rate": 7.614625178187402e-06, |
| "loss": 0.0016, |
| "step": 70 |
| }, |
| { |
| "epoch": 10.62992125984252, |
| "grad_norm": 0.1706483384230398, |
| "learning_rate": 7.175855233545669e-06, |
| "loss": 0.001, |
| "step": 75 |
| }, |
| { |
| "epoch": 10.62992125984252, |
| "eval_loss": 0.06815612316131592, |
| "eval_runtime": 3.9081, |
| "eval_samples_per_second": 7.42, |
| "eval_steps_per_second": 3.838, |
| "step": 75 |
| }, |
| { |
| "epoch": 11.377952755905511, |
| "grad_norm": 0.12734977176199208, |
| "learning_rate": 6.715322821344495e-06, |
| "loss": 0.0007, |
| "step": 80 |
| }, |
| { |
| "epoch": 12.125984251968504, |
| "grad_norm": 0.055045406105188224, |
| "learning_rate": 6.237634094385814e-06, |
| "loss": 0.0004, |
| "step": 85 |
| }, |
| { |
| "epoch": 12.755905511811024, |
| "grad_norm": 0.24933655723456083, |
| "learning_rate": 5.7475667994901316e-06, |
| "loss": 0.0002, |
| "step": 90 |
| }, |
| { |
| "epoch": 13.503937007874015, |
| "grad_norm": 0.08486520999135673, |
| "learning_rate": 5.250022491431259e-06, |
| "loss": 0.0002, |
| "step": 95 |
| }, |
| { |
| "epoch": 14.251968503937007, |
| "grad_norm": 0.03626525027130318, |
| "learning_rate": 4.749977508568742e-06, |
| "loss": 0.0002, |
| "step": 100 |
| }, |
| { |
| "epoch": 14.251968503937007, |
| "eval_loss": 0.08573012053966522, |
| "eval_runtime": 3.9129, |
| "eval_samples_per_second": 7.411, |
| "eval_steps_per_second": 3.833, |
| "step": 100 |
| }, |
| { |
| "epoch": 14.881889763779528, |
| "grad_norm": 0.005000292928153053, |
| "learning_rate": 4.252433200509869e-06, |
| "loss": 0.0001, |
| "step": 105 |
| }, |
| { |
| "epoch": 15.62992125984252, |
| "grad_norm": 0.0070081305200095844, |
| "learning_rate": 3.762365905614187e-06, |
| "loss": 0.0001, |
| "step": 110 |
| }, |
| { |
| "epoch": 16.37795275590551, |
| "grad_norm": 0.023387552639621518, |
| "learning_rate": 3.2846771786555075e-06, |
| "loss": 0.0001, |
| "step": 115 |
| }, |
| { |
| "epoch": 17.125984251968504, |
| "grad_norm": 0.008314297434967715, |
| "learning_rate": 2.824144766454333e-06, |
| "loss": 0.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 17.755905511811022, |
| "grad_norm": 0.0028401414939199423, |
| "learning_rate": 2.3853748218126e-06, |
| "loss": 0.0, |
| "step": 125 |
| }, |
| { |
| "epoch": 17.755905511811022, |
| "eval_loss": 0.08820342272520065, |
| "eval_runtime": 3.889, |
| "eval_samples_per_second": 7.457, |
| "eval_steps_per_second": 3.857, |
| "step": 125 |
| }, |
| { |
| "epoch": 18.503937007874015, |
| "grad_norm": 0.002171915506295924, |
| "learning_rate": 1.9727558336973594e-06, |
| "loss": 0.0, |
| "step": 130 |
| }, |
| { |
| "epoch": 19.251968503937007, |
| "grad_norm": 0.002247395436703256, |
| "learning_rate": 1.5904147344544928e-06, |
| "loss": 0.0, |
| "step": 135 |
| }, |
| { |
| "epoch": 19.881889763779526, |
| "grad_norm": 0.020207725975457932, |
| "learning_rate": 1.2421756230592535e-06, |
| "loss": 0.0, |
| "step": 140 |
| }, |
| { |
| "epoch": 20.62992125984252, |
| "grad_norm": 0.002114011246814878, |
| "learning_rate": 9.315215172454689e-07, |
| "loss": 0.0, |
| "step": 145 |
| }, |
| { |
| "epoch": 21.37795275590551, |
| "grad_norm": 0.0025524711331419707, |
| "learning_rate": 6.615595170608541e-07, |
| "loss": 0.0, |
| "step": 150 |
| }, |
| { |
| "epoch": 21.37795275590551, |
| "eval_loss": 0.09042119234800339, |
| "eval_runtime": 3.8398, |
| "eval_samples_per_second": 7.552, |
| "eval_steps_per_second": 3.906, |
| "step": 150 |
| }, |
| { |
| "epoch": 22.125984251968504, |
| "grad_norm": 0.003056620821824751, |
| "learning_rate": 4.349897282756488e-07, |
| "loss": 0.0, |
| "step": 155 |
| }, |
| { |
| "epoch": 22.755905511811022, |
| "grad_norm": 0.0022376804480244176, |
| "learning_rate": 2.54078256466348e-07, |
| "loss": 0.0, |
| "step": 160 |
| }, |
| { |
| "epoch": 23.503937007874015, |
| "grad_norm": 0.0023153643544619597, |
| "learning_rate": 1.206345418823235e-07, |
| "loss": 0.0, |
| "step": 165 |
| }, |
| { |
| "epoch": 24.251968503937007, |
| "grad_norm": 0.0024824470278455323, |
| "learning_rate": 3.599326178742535e-08, |
| "loss": 0.0, |
| "step": 170 |
| }, |
| { |
| "epoch": 24.881889763779526, |
| "grad_norm": 0.002034088863587326, |
| "learning_rate": 1.0009812857370016e-09, |
| "loss": 0.0, |
| "step": 175 |
| }, |
| { |
| "epoch": 24.881889763779526, |
| "eval_loss": 0.0903918594121933, |
| "eval_runtime": 3.9075, |
| "eval_samples_per_second": 7.422, |
| "eval_steps_per_second": 3.839, |
| "step": 175 |
| }, |
| { |
| "epoch": 24.881889763779526, |
| "step": 175, |
| "total_flos": 8801054883840.0, |
| "train_loss": 0.039481030342618136, |
| "train_runtime": 3513.9873, |
| "train_samples_per_second": 1.807, |
| "train_steps_per_second": 0.05 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 175, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 25, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8801054883840.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|