{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 234,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.042735042735042736,
      "grad_norm": 25.421951293945312,
      "learning_rate": 3.888888888888889e-05,
      "loss": 14.74276123046875,
      "step": 10
    },
    {
      "epoch": 0.08547008547008547,
      "grad_norm": 12.622594833374023,
      "learning_rate": 9.444444444444444e-05,
      "loss": 9.163805389404297,
      "step": 20
    },
    {
      "epoch": 0.1282051282051282,
      "grad_norm": 8.006195068359375,
      "learning_rate": 0.00015000000000000001,
      "loss": 5.28853759765625,
      "step": 30
    },
    {
      "epoch": 0.17094017094017094,
      "grad_norm": 3.498350143432617,
      "learning_rate": 0.00019999888744757143,
      "loss": 4.076284027099609,
      "step": 40
    },
    {
      "epoch": 0.21367521367521367,
      "grad_norm": 4.330902576446533,
      "learning_rate": 0.00019986541110764565,
      "loss": 3.210728073120117,
      "step": 50
    },
    {
      "epoch": 0.2564102564102564,
      "grad_norm": 5.267370700836182,
      "learning_rate": 0.0001995097645450266,
      "loss": 2.6838237762451174,
      "step": 60
    },
    {
      "epoch": 0.29914529914529914,
      "grad_norm": 3.2072625160217285,
      "learning_rate": 0.00019893273896534936,
      "loss": 2.4369382858276367,
      "step": 70
    },
    {
      "epoch": 0.3418803418803419,
      "grad_norm": 3.1016528606414795,
      "learning_rate": 0.00019813561807535598,
      "loss": 2.205874443054199,
      "step": 80
    },
    {
      "epoch": 0.38461538461538464,
      "grad_norm": 3.8450214862823486,
      "learning_rate": 0.00019712017522703764,
      "loss": 1.9279813766479492,
      "step": 90
    },
    {
      "epoch": 0.42735042735042733,
      "grad_norm": 2.348071575164795,
      "learning_rate": 0.00019588866947246498,
      "loss": 1.8235645294189453,
      "step": 100
    },
    {
      "epoch": 0.4700854700854701,
      "grad_norm": 3.2652463912963867,
      "learning_rate": 0.00019444384053808288,
      "loss": 1.8220790863037108,
      "step": 110
    },
    {
      "epoch": 0.5128205128205128,
      "grad_norm": 2.6423192024230957,
      "learning_rate": 0.00019278890272965096,
      "loss": 1.7959518432617188,
      "step": 120
    },
    {
      "epoch": 0.5555555555555556,
      "grad_norm": 2.6279354095458984,
      "learning_rate": 0.00019092753778138886,
      "loss": 1.7804344177246094,
      "step": 130
    },
    {
      "epoch": 0.5982905982905983,
      "grad_norm": 2.6313953399658203,
      "learning_rate": 0.0001888638866652356,
      "loss": 1.642679214477539,
      "step": 140
    },
    {
      "epoch": 0.6410256410256411,
      "grad_norm": 2.1009438037872314,
      "learning_rate": 0.00018660254037844388,
      "loss": 1.545415496826172,
      "step": 150
    },
    {
      "epoch": 0.6837606837606838,
      "grad_norm": 2.672374963760376,
      "learning_rate": 0.00018414852973000503,
      "loss": 1.5645628929138184,
      "step": 160
    },
    {
      "epoch": 0.7264957264957265,
      "grad_norm": 2.6783759593963623,
      "learning_rate": 0.00018150731414862622,
      "loss": 1.5343215942382813,
      "step": 170
    },
    {
      "epoch": 0.7692307692307693,
      "grad_norm": 2.3677117824554443,
      "learning_rate": 0.000178684769537159,
      "loss": 1.5453574180603027,
      "step": 180
    },
    {
      "epoch": 0.811965811965812,
      "grad_norm": 2.3082728385925293,
      "learning_rate": 0.0001756871752004992,
      "loss": 1.5324308395385742,
      "step": 190
    },
    {
      "epoch": 0.8547008547008547,
      "grad_norm": 1.969205617904663,
      "learning_rate": 0.00017252119987603973,
      "loss": 1.5409900665283203,
      "step": 200
    },
    {
      "epoch": 0.8974358974358975,
      "grad_norm": 2.5397582054138184,
      "learning_rate": 0.00016919388689775464,
      "loss": 1.4344990730285645,
      "step": 210
    },
    {
      "epoch": 0.9401709401709402,
      "grad_norm": 2.0636305809020996,
      "learning_rate": 0.00016571263852691888,
      "loss": 1.4311028480529786,
      "step": 220
    },
    {
      "epoch": 0.9829059829059829,
      "grad_norm": 2.4687087535858154,
      "learning_rate": 0.0001620851994843244,
      "loss": 1.461498737335205,
      "step": 230
    }
  ],
  "logging_steps": 10,
  "max_steps": 702,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2.961214772268672e+16,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}