| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9487179487179487, |
| "eval_steps": 500, |
| "global_step": 38, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05128205128205128, |
| "grad_norm": 6.611382418524136, |
| "learning_rate": 5e-05, |
| "loss": 0.9748, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.10256410256410256, |
| "grad_norm": 6.255827210322037, |
| "learning_rate": 4.9909937213563165e-05, |
| "loss": 0.9645, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 26.37694268451028, |
| "learning_rate": 4.9640397758692715e-05, |
| "loss": 1.8679, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.20512820512820512, |
| "grad_norm": 109.62841403244713, |
| "learning_rate": 4.9193323673337476e-05, |
| "loss": 3.1823, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 38.27279611720776, |
| "learning_rate": 4.857193613652711e-05, |
| "loss": 1.8243, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 52.16666128950054, |
| "learning_rate": 4.77807122597034e-05, |
| "loss": 2.3408, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.358974358974359, |
| "grad_norm": 61.865694873216064, |
| "learning_rate": 4.6825352829029705e-05, |
| "loss": 1.668, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.41025641025641024, |
| "grad_norm": 20.412323559915432, |
| "learning_rate": 4.571274123109606e-05, |
| "loss": 1.2475, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 6.613572126150075, |
| "learning_rate": 4.445089385796099e-05, |
| "loss": 0.937, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 6.2563622738947045, |
| "learning_rate": 4.3048902348863116e-05, |
| "loss": 0.8008, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.5641025641025641, |
| "grad_norm": 7.2077871299442755, |
| "learning_rate": 4.151686808475204e-05, |
| "loss": 0.7078, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 2.815837188824988, |
| "learning_rate": 3.986582940760717e-05, |
| "loss": 0.5816, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 3.486576911271355, |
| "learning_rate": 3.8107682088930794e-05, |
| "loss": 0.4899, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.717948717948718, |
| "grad_norm": 2.149756304629691, |
| "learning_rate": 3.6255093620441834e-05, |
| "loss": 0.491, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 2.053499289405444, |
| "learning_rate": 3.432141194450772e-05, |
| "loss": 0.4575, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.8205128205128205, |
| "grad_norm": 1.395636933291467, |
| "learning_rate": 3.232056928191376e-05, |
| "loss": 0.3981, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.8717948717948718, |
| "grad_norm": 1.017073513763627, |
| "learning_rate": 3.0266981749893157e-05, |
| "loss": 0.3686, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 1.4739148894658656, |
| "learning_rate": 2.8175445493671972e-05, |
| "loss": 0.4269, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.9743589743589743, |
| "grad_norm": 1.3136690374942346, |
| "learning_rate": 2.606103007990371e-05, |
| "loss": 0.3809, |
| "step": 19 |
| }, |
| { |
| "epoch": 1.0256410256410255, |
| "grad_norm": 1.5835385453547504, |
| "learning_rate": 2.39389699200963e-05, |
| "loss": 0.5439, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.0769230769230769, |
| "grad_norm": 0.8971814869025673, |
| "learning_rate": 2.182455450632803e-05, |
| "loss": 0.3104, |
| "step": 21 |
| }, |
| { |
| "epoch": 1.1282051282051282, |
| "grad_norm": 0.8492410740355226, |
| "learning_rate": 1.973301825010685e-05, |
| "loss": 0.2981, |
| "step": 22 |
| }, |
| { |
| "epoch": 1.1794871794871795, |
| "grad_norm": 0.740051068350938, |
| "learning_rate": 1.7679430718086243e-05, |
| "loss": 0.2724, |
| "step": 23 |
| }, |
| { |
| "epoch": 1.2307692307692308, |
| "grad_norm": 0.6880281330130831, |
| "learning_rate": 1.567858805549229e-05, |
| "loss": 0.2526, |
| "step": 24 |
| }, |
| { |
| "epoch": 1.282051282051282, |
| "grad_norm": 0.6787402589568466, |
| "learning_rate": 1.3744906379558165e-05, |
| "loss": 0.2757, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.67758309834235, |
| "learning_rate": 1.1892317911069212e-05, |
| "loss": 0.2696, |
| "step": 26 |
| }, |
| { |
| "epoch": 1.3846153846153846, |
| "grad_norm": 0.674443102358302, |
| "learning_rate": 1.0134170592392836e-05, |
| "loss": 0.2805, |
| "step": 27 |
| }, |
| { |
| "epoch": 1.435897435897436, |
| "grad_norm": 0.5633358189387688, |
| "learning_rate": 8.483131915247968e-06, |
| "loss": 0.2503, |
| "step": 28 |
| }, |
| { |
| "epoch": 1.4871794871794872, |
| "grad_norm": 0.6197473329771566, |
| "learning_rate": 6.951097651136889e-06, |
| "loss": 0.2578, |
| "step": 29 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 0.5580527713592528, |
| "learning_rate": 5.549106142039018e-06, |
| "loss": 0.2553, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.5897435897435899, |
| "grad_norm": 0.5149565392248769, |
| "learning_rate": 4.2872587689039484e-06, |
| "loss": 0.2405, |
| "step": 31 |
| }, |
| { |
| "epoch": 1.641025641025641, |
| "grad_norm": 0.5766230877779212, |
| "learning_rate": 3.1746471709702964e-06, |
| "loss": 0.2889, |
| "step": 32 |
| }, |
| { |
| "epoch": 1.6923076923076923, |
| "grad_norm": 0.5729826484566335, |
| "learning_rate": 2.219287740296605e-06, |
| "loss": 0.257, |
| "step": 33 |
| }, |
| { |
| "epoch": 1.7435897435897436, |
| "grad_norm": 0.5287896884964262, |
| "learning_rate": 1.428063863472895e-06, |
| "loss": 0.2621, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.7948717948717947, |
| "grad_norm": 0.4759737147838452, |
| "learning_rate": 8.066763266625282e-07, |
| "loss": 0.2433, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.8461538461538463, |
| "grad_norm": 0.5048702889164741, |
| "learning_rate": 3.5960224130728857e-07, |
| "loss": 0.2462, |
| "step": 36 |
| }, |
| { |
| "epoch": 1.8974358974358974, |
| "grad_norm": 0.4819356178933713, |
| "learning_rate": 9.006278643683696e-08, |
| "loss": 0.2415, |
| "step": 37 |
| }, |
| { |
| "epoch": 1.9487179487179487, |
| "grad_norm": 0.43218352237890395, |
| "learning_rate": 0.0, |
| "loss": 0.2067, |
| "step": 38 |
| }, |
| { |
| "epoch": 1.9487179487179487, |
| "step": 38, |
| "total_flos": 6321437491200.0, |
| "train_loss": 0.6674525247592675, |
| "train_runtime": 535.3552, |
| "train_samples_per_second": 1.143, |
| "train_steps_per_second": 0.071 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 38, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6321437491200.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|