| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 400.0, |
| "eval_steps": 50, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.33985191583633423, |
| "learning_rate": 0.00029265, |
| "loss": 0.6013, |
| "step": 50 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.21591724455356598, |
| "learning_rate": 0.00028514999999999997, |
| "loss": 0.0457, |
| "step": 100 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.10208525508642197, |
| "learning_rate": 0.00027764999999999995, |
| "loss": 0.0193, |
| "step": 150 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 0.11739509552717209, |
| "learning_rate": 0.00027015, |
| "loss": 0.0127, |
| "step": 200 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.056288138031959534, |
| "learning_rate": 0.00026264999999999996, |
| "loss": 0.0119, |
| "step": 250 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 0.036820005625486374, |
| "learning_rate": 0.00025515, |
| "loss": 0.0098, |
| "step": 300 |
| }, |
| { |
| "epoch": 70.0, |
| "grad_norm": 0.0192593764513731, |
| "learning_rate": 0.00024765, |
| "loss": 0.0091, |
| "step": 350 |
| }, |
| { |
| "epoch": 80.0, |
| "grad_norm": 0.018456174060702324, |
| "learning_rate": 0.00024014999999999998, |
| "loss": 0.009, |
| "step": 400 |
| }, |
| { |
| "epoch": 90.0, |
| "grad_norm": 0.018791042268276215, |
| "learning_rate": 0.00023264999999999996, |
| "loss": 0.009, |
| "step": 450 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 0.016796967014670372, |
| "learning_rate": 0.00022514999999999997, |
| "loss": 0.0089, |
| "step": 500 |
| }, |
| { |
| "epoch": 110.0, |
| "grad_norm": 0.019207874312996864, |
| "learning_rate": 0.00021764999999999998, |
| "loss": 0.0089, |
| "step": 550 |
| }, |
| { |
| "epoch": 120.0, |
| "grad_norm": 0.020437780767679214, |
| "learning_rate": 0.00021014999999999999, |
| "loss": 0.0089, |
| "step": 600 |
| }, |
| { |
| "epoch": 130.0, |
| "grad_norm": 0.014155570417642593, |
| "learning_rate": 0.00020264999999999997, |
| "loss": 0.0088, |
| "step": 650 |
| }, |
| { |
| "epoch": 140.0, |
| "grad_norm": 0.012345368973910809, |
| "learning_rate": 0.00019514999999999997, |
| "loss": 0.0088, |
| "step": 700 |
| }, |
| { |
| "epoch": 150.0, |
| "grad_norm": 0.013084391131997108, |
| "learning_rate": 0.00018764999999999998, |
| "loss": 0.0088, |
| "step": 750 |
| }, |
| { |
| "epoch": 160.0, |
| "grad_norm": 0.01149736251682043, |
| "learning_rate": 0.00018015, |
| "loss": 0.0088, |
| "step": 800 |
| }, |
| { |
| "epoch": 170.0, |
| "grad_norm": 0.014810753054916859, |
| "learning_rate": 0.00017265, |
| "loss": 0.0088, |
| "step": 850 |
| }, |
| { |
| "epoch": 180.0, |
| "grad_norm": 0.013843956403434277, |
| "learning_rate": 0.00016514999999999998, |
| "loss": 0.0088, |
| "step": 900 |
| }, |
| { |
| "epoch": 190.0, |
| "grad_norm": 0.014220188371837139, |
| "learning_rate": 0.00015764999999999998, |
| "loss": 0.0087, |
| "step": 950 |
| }, |
| { |
| "epoch": 200.0, |
| "grad_norm": 0.010835015214979649, |
| "learning_rate": 0.00015014999999999996, |
| "loss": 0.0087, |
| "step": 1000 |
| }, |
| { |
| "epoch": 210.0, |
| "grad_norm": 0.014992893673479557, |
| "learning_rate": 0.00014264999999999997, |
| "loss": 0.0087, |
| "step": 1050 |
| }, |
| { |
| "epoch": 220.0, |
| "grad_norm": 0.014827009290456772, |
| "learning_rate": 0.00013514999999999998, |
| "loss": 0.0087, |
| "step": 1100 |
| }, |
| { |
| "epoch": 230.0, |
| "grad_norm": 0.013166485354304314, |
| "learning_rate": 0.00012764999999999999, |
| "loss": 0.0087, |
| "step": 1150 |
| }, |
| { |
| "epoch": 240.0, |
| "grad_norm": 0.014551304280757904, |
| "learning_rate": 0.00012014999999999999, |
| "loss": 0.0087, |
| "step": 1200 |
| }, |
| { |
| "epoch": 250.0, |
| "grad_norm": 0.013928817585110664, |
| "learning_rate": 0.00011264999999999999, |
| "loss": 0.0087, |
| "step": 1250 |
| }, |
| { |
| "epoch": 260.0, |
| "grad_norm": 0.010355178266763687, |
| "learning_rate": 0.00010514999999999998, |
| "loss": 0.0087, |
| "step": 1300 |
| }, |
| { |
| "epoch": 270.0, |
| "grad_norm": 0.010520271956920624, |
| "learning_rate": 9.764999999999999e-05, |
| "loss": 0.0087, |
| "step": 1350 |
| }, |
| { |
| "epoch": 280.0, |
| "grad_norm": 0.010308249853551388, |
| "learning_rate": 9.014999999999998e-05, |
| "loss": 0.0087, |
| "step": 1400 |
| }, |
| { |
| "epoch": 290.0, |
| "grad_norm": 0.011182799004018307, |
| "learning_rate": 8.265e-05, |
| "loss": 0.0087, |
| "step": 1450 |
| }, |
| { |
| "epoch": 300.0, |
| "grad_norm": 0.015329057350754738, |
| "learning_rate": 7.515e-05, |
| "loss": 0.0087, |
| "step": 1500 |
| }, |
| { |
| "epoch": 310.0, |
| "grad_norm": 0.011985884048044682, |
| "learning_rate": 6.764999999999999e-05, |
| "loss": 0.0087, |
| "step": 1550 |
| }, |
| { |
| "epoch": 320.0, |
| "grad_norm": 0.011977328918874264, |
| "learning_rate": 6.015e-05, |
| "loss": 0.0087, |
| "step": 1600 |
| }, |
| { |
| "epoch": 330.0, |
| "grad_norm": 0.011549632996320724, |
| "learning_rate": 5.264999999999999e-05, |
| "loss": 0.0086, |
| "step": 1650 |
| }, |
| { |
| "epoch": 340.0, |
| "grad_norm": 0.012495314702391624, |
| "learning_rate": 4.514999999999999e-05, |
| "loss": 0.0086, |
| "step": 1700 |
| }, |
| { |
| "epoch": 350.0, |
| "grad_norm": 0.01033444982022047, |
| "learning_rate": 3.7649999999999994e-05, |
| "loss": 0.0086, |
| "step": 1750 |
| }, |
| { |
| "epoch": 360.0, |
| "grad_norm": 0.011466676369309425, |
| "learning_rate": 3.0149999999999998e-05, |
| "loss": 0.0086, |
| "step": 1800 |
| }, |
| { |
| "epoch": 370.0, |
| "grad_norm": 0.013934852555394173, |
| "learning_rate": 2.2649999999999998e-05, |
| "loss": 0.0086, |
| "step": 1850 |
| }, |
| { |
| "epoch": 380.0, |
| "grad_norm": 0.01214936189353466, |
| "learning_rate": 1.5149999999999999e-05, |
| "loss": 0.0086, |
| "step": 1900 |
| }, |
| { |
| "epoch": 390.0, |
| "grad_norm": 0.011638245545327663, |
| "learning_rate": 7.65e-06, |
| "loss": 0.0086, |
| "step": 1950 |
| }, |
| { |
| "epoch": 400.0, |
| "grad_norm": 0.012530920095741749, |
| "learning_rate": 1.5e-07, |
| "loss": 0.0086, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 2000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 400, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.45453456605184e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|