| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 500.0, |
| "eval_steps": 50, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 12.571428571428571, |
| "grad_norm": 0.26636427640914917, |
| "learning_rate": 0.00029265, |
| "loss": 0.5904, |
| "step": 50 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.16996383666992188, |
| "learning_rate": 0.00028514999999999997, |
| "loss": 0.034, |
| "step": 100 |
| }, |
| { |
| "epoch": 37.57142857142857, |
| "grad_norm": 0.0670333057641983, |
| "learning_rate": 0.00027764999999999995, |
| "loss": 0.016, |
| "step": 150 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.03331753611564636, |
| "learning_rate": 0.00027015, |
| "loss": 0.0105, |
| "step": 200 |
| }, |
| { |
| "epoch": 62.57142857142857, |
| "grad_norm": 0.014663901180028915, |
| "learning_rate": 0.00026264999999999996, |
| "loss": 0.0086, |
| "step": 250 |
| }, |
| { |
| "epoch": 75.0, |
| "grad_norm": 0.02259993739426136, |
| "learning_rate": 0.00025515, |
| "loss": 0.0083, |
| "step": 300 |
| }, |
| { |
| "epoch": 87.57142857142857, |
| "grad_norm": 0.01287161372601986, |
| "learning_rate": 0.00024765, |
| "loss": 0.0082, |
| "step": 350 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 0.02039289101958275, |
| "learning_rate": 0.00024014999999999998, |
| "loss": 0.0082, |
| "step": 400 |
| }, |
| { |
| "epoch": 112.57142857142857, |
| "grad_norm": 0.012268965132534504, |
| "learning_rate": 0.00023264999999999996, |
| "loss": 0.0081, |
| "step": 450 |
| }, |
| { |
| "epoch": 125.0, |
| "grad_norm": 0.023380041122436523, |
| "learning_rate": 0.00022514999999999997, |
| "loss": 0.0081, |
| "step": 500 |
| }, |
| { |
| "epoch": 137.57142857142858, |
| "grad_norm": 0.009437249973416328, |
| "learning_rate": 0.00021764999999999998, |
| "loss": 0.008, |
| "step": 550 |
| }, |
| { |
| "epoch": 150.0, |
| "grad_norm": 0.015519071370363235, |
| "learning_rate": 0.00021014999999999999, |
| "loss": 0.008, |
| "step": 600 |
| }, |
| { |
| "epoch": 162.57142857142858, |
| "grad_norm": 0.008696269243955612, |
| "learning_rate": 0.00020264999999999997, |
| "loss": 0.008, |
| "step": 650 |
| }, |
| { |
| "epoch": 175.0, |
| "grad_norm": 0.018794188275933266, |
| "learning_rate": 0.00019514999999999997, |
| "loss": 0.008, |
| "step": 700 |
| }, |
| { |
| "epoch": 187.57142857142858, |
| "grad_norm": 0.010213403962552547, |
| "learning_rate": 0.00018764999999999998, |
| "loss": 0.008, |
| "step": 750 |
| }, |
| { |
| "epoch": 200.0, |
| "grad_norm": 0.01870943419635296, |
| "learning_rate": 0.00018015, |
| "loss": 0.008, |
| "step": 800 |
| }, |
| { |
| "epoch": 212.57142857142858, |
| "grad_norm": 0.007811392657458782, |
| "learning_rate": 0.00017265, |
| "loss": 0.008, |
| "step": 850 |
| }, |
| { |
| "epoch": 225.0, |
| "grad_norm": 0.014662496745586395, |
| "learning_rate": 0.00016514999999999998, |
| "loss": 0.008, |
| "step": 900 |
| }, |
| { |
| "epoch": 237.57142857142858, |
| "grad_norm": 0.008589034900069237, |
| "learning_rate": 0.00015764999999999998, |
| "loss": 0.008, |
| "step": 950 |
| }, |
| { |
| "epoch": 250.0, |
| "grad_norm": 0.015834109857678413, |
| "learning_rate": 0.00015014999999999996, |
| "loss": 0.008, |
| "step": 1000 |
| }, |
| { |
| "epoch": 262.57142857142856, |
| "grad_norm": 0.007818593643605709, |
| "learning_rate": 0.00014264999999999997, |
| "loss": 0.0079, |
| "step": 1050 |
| }, |
| { |
| "epoch": 275.0, |
| "grad_norm": 0.01577523909509182, |
| "learning_rate": 0.00013514999999999998, |
| "loss": 0.0079, |
| "step": 1100 |
| }, |
| { |
| "epoch": 287.57142857142856, |
| "grad_norm": 0.010020360350608826, |
| "learning_rate": 0.00012764999999999999, |
| "loss": 0.0079, |
| "step": 1150 |
| }, |
| { |
| "epoch": 300.0, |
| "grad_norm": 0.013038736768066883, |
| "learning_rate": 0.00012014999999999999, |
| "loss": 0.0079, |
| "step": 1200 |
| }, |
| { |
| "epoch": 312.57142857142856, |
| "grad_norm": 0.007050866261124611, |
| "learning_rate": 0.00011264999999999999, |
| "loss": 0.0079, |
| "step": 1250 |
| }, |
| { |
| "epoch": 325.0, |
| "grad_norm": 0.01592850126326084, |
| "learning_rate": 0.00010514999999999998, |
| "loss": 0.0079, |
| "step": 1300 |
| }, |
| { |
| "epoch": 337.57142857142856, |
| "grad_norm": 0.0075027476996183395, |
| "learning_rate": 9.764999999999999e-05, |
| "loss": 0.0079, |
| "step": 1350 |
| }, |
| { |
| "epoch": 350.0, |
| "grad_norm": 0.011593643575906754, |
| "learning_rate": 9.014999999999998e-05, |
| "loss": 0.0079, |
| "step": 1400 |
| }, |
| { |
| "epoch": 362.57142857142856, |
| "grad_norm": 0.007643813267350197, |
| "learning_rate": 8.265e-05, |
| "loss": 0.0079, |
| "step": 1450 |
| }, |
| { |
| "epoch": 375.0, |
| "grad_norm": 0.013792463578283787, |
| "learning_rate": 7.515e-05, |
| "loss": 0.0079, |
| "step": 1500 |
| }, |
| { |
| "epoch": 387.57142857142856, |
| "grad_norm": 0.00813667569309473, |
| "learning_rate": 6.764999999999999e-05, |
| "loss": 0.0079, |
| "step": 1550 |
| }, |
| { |
| "epoch": 400.0, |
| "grad_norm": 0.011745907366275787, |
| "learning_rate": 6.015e-05, |
| "loss": 0.0079, |
| "step": 1600 |
| }, |
| { |
| "epoch": 412.57142857142856, |
| "grad_norm": 0.006887589581310749, |
| "learning_rate": 5.264999999999999e-05, |
| "loss": 0.0079, |
| "step": 1650 |
| }, |
| { |
| "epoch": 425.0, |
| "grad_norm": 0.012646987102925777, |
| "learning_rate": 4.514999999999999e-05, |
| "loss": 0.0079, |
| "step": 1700 |
| }, |
| { |
| "epoch": 437.57142857142856, |
| "grad_norm": 0.0065942066721618176, |
| "learning_rate": 3.7649999999999994e-05, |
| "loss": 0.0079, |
| "step": 1750 |
| }, |
| { |
| "epoch": 450.0, |
| "grad_norm": 0.010179451666772366, |
| "learning_rate": 3.0149999999999998e-05, |
| "loss": 0.0079, |
| "step": 1800 |
| }, |
| { |
| "epoch": 462.57142857142856, |
| "grad_norm": 0.008461065590381622, |
| "learning_rate": 2.2649999999999998e-05, |
| "loss": 0.0079, |
| "step": 1850 |
| }, |
| { |
| "epoch": 475.0, |
| "grad_norm": 0.014076176099479198, |
| "learning_rate": 1.5149999999999999e-05, |
| "loss": 0.0079, |
| "step": 1900 |
| }, |
| { |
| "epoch": 487.57142857142856, |
| "grad_norm": 0.006570025812834501, |
| "learning_rate": 7.65e-06, |
| "loss": 0.0078, |
| "step": 1950 |
| }, |
| { |
| "epoch": 500.0, |
| "grad_norm": 0.0165297482162714, |
| "learning_rate": 1.5e-07, |
| "loss": 0.0078, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 2000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 667, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.5468656427008e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|