{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.2058706862356208,
  "eval_steps": 500,
  "global_step": 190,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0317334391114637,
      "grad_norm": 0.060546875,
      "learning_rate": 0.0001,
      "loss": 0.6421,
      "step": 5
    },
    {
      "epoch": 0.0634668782229274,
      "grad_norm": 0.11572265625,
      "learning_rate": 0.0001,
      "loss": 0.5213,
      "step": 10
    },
    {
      "epoch": 0.09520031733439112,
      "grad_norm": 0.08251953125,
      "learning_rate": 0.0001,
      "loss": 0.2925,
      "step": 15
    },
    {
      "epoch": 0.1269337564458548,
      "grad_norm": 0.0634765625,
      "learning_rate": 0.0001,
      "loss": 0.1978,
      "step": 20
    },
    {
      "epoch": 0.15866719555731854,
      "grad_norm": 0.08251953125,
      "learning_rate": 0.0001,
      "loss": 0.1538,
      "step": 25
    },
    {
      "epoch": 0.19040063466878224,
      "grad_norm": 0.10888671875,
      "learning_rate": 0.0001,
      "loss": 0.106,
      "step": 30
    },
    {
      "epoch": 0.22213407378024594,
      "grad_norm": 0.049560546875,
      "learning_rate": 0.0001,
      "loss": 0.0454,
      "step": 35
    },
    {
      "epoch": 0.2538675128917096,
      "grad_norm": 0.310546875,
      "learning_rate": 0.0001,
      "loss": 0.1215,
      "step": 40
    },
    {
      "epoch": 0.28560095200317337,
      "grad_norm": 0.06494140625,
      "learning_rate": 0.0001,
      "loss": 0.2476,
      "step": 45
    },
    {
      "epoch": 0.31733439111463707,
      "grad_norm": 0.40234375,
      "learning_rate": 0.0001,
      "loss": 0.1073,
      "step": 50
    },
    {
      "epoch": 0.3490678302261008,
      "grad_norm": 0.04052734375,
      "learning_rate": 0.0001,
      "loss": 0.0863,
      "step": 55
    },
    {
      "epoch": 0.3808012693375645,
      "grad_norm": 0.03369140625,
      "learning_rate": 0.0001,
      "loss": 0.0671,
      "step": 60
    },
    {
      "epoch": 0.4125347084490282,
      "grad_norm": 0.0274658203125,
      "learning_rate": 0.0001,
      "loss": 0.0493,
      "step": 65
    },
    {
      "epoch": 0.4442681475604919,
      "grad_norm": 0.0277099609375,
      "learning_rate": 0.0001,
      "loss": 0.0311,
      "step": 70
    },
    {
      "epoch": 0.4760015866719556,
      "grad_norm": 0.01275634765625,
      "learning_rate": 0.0001,
      "loss": 0.0125,
      "step": 75
    },
    {
      "epoch": 0.5077350257834192,
      "grad_norm": 0.06787109375,
      "learning_rate": 0.0001,
      "loss": 0.1307,
      "step": 80
    },
    {
      "epoch": 0.539468464894883,
      "grad_norm": 0.050048828125,
      "learning_rate": 0.0001,
      "loss": 0.171,
      "step": 85
    },
    {
      "epoch": 0.5712019040063467,
      "grad_norm": 0.060791015625,
      "learning_rate": 0.0001,
      "loss": 0.0818,
      "step": 90
    },
    {
      "epoch": 0.6029353431178104,
      "grad_norm": 0.033203125,
      "learning_rate": 0.0001,
      "loss": 0.0658,
      "step": 95
    },
    {
      "epoch": 0.6346687822292741,
      "grad_norm": 0.0235595703125,
      "learning_rate": 0.0001,
      "loss": 0.046,
      "step": 100
    },
    {
      "epoch": 0.6664022213407378,
      "grad_norm": 0.0299072265625,
      "learning_rate": 0.0001,
      "loss": 0.0384,
      "step": 105
    },
    {
      "epoch": 0.6981356604522015,
      "grad_norm": 0.0181884765625,
      "learning_rate": 0.0001,
      "loss": 0.0187,
      "step": 110
    },
    {
      "epoch": 0.7298690995636652,
      "grad_norm": 0.019775390625,
      "learning_rate": 0.0001,
      "loss": 0.0095,
      "step": 115
    },
    {
      "epoch": 0.761602538675129,
      "grad_norm": 0.060791015625,
      "learning_rate": 0.0001,
      "loss": 0.1381,
      "step": 120
    },
    {
      "epoch": 0.7933359777865926,
      "grad_norm": 0.038818359375,
      "learning_rate": 0.0001,
      "loss": 0.1125,
      "step": 125
    },
    {
      "epoch": 0.8250694168980564,
      "grad_norm": 0.032958984375,
      "learning_rate": 0.0001,
      "loss": 0.062,
      "step": 130
    },
    {
      "epoch": 0.85680285600952,
      "grad_norm": 0.03173828125,
      "learning_rate": 0.0001,
      "loss": 0.0526,
      "step": 135
    },
    {
      "epoch": 0.8885362951209838,
      "grad_norm": 0.02392578125,
      "learning_rate": 0.0001,
      "loss": 0.0382,
      "step": 140
    },
    {
      "epoch": 0.9202697342324474,
      "grad_norm": 0.027099609375,
      "learning_rate": 0.0001,
      "loss": 0.027,
      "step": 145
    },
    {
      "epoch": 0.9520031733439112,
      "grad_norm": 0.02294921875,
      "learning_rate": 0.0001,
      "loss": 0.0115,
      "step": 150
    },
    {
      "epoch": 0.9837366124553748,
      "grad_norm": 0.02099609375,
      "learning_rate": 0.0001,
      "loss": 0.005,
      "step": 155
    },
    {
      "epoch": 1.0154700515668384,
      "grad_norm": 0.0703125,
      "learning_rate": 0.0001,
      "loss": 0.1291,
      "step": 160
    },
    {
      "epoch": 1.0472034906783023,
      "grad_norm": 0.04052734375,
      "learning_rate": 0.0001,
      "loss": 0.1033,
      "step": 165
    },
    {
      "epoch": 1.078936929789766,
      "grad_norm": 0.03173828125,
      "learning_rate": 0.0001,
      "loss": 0.0539,
      "step": 170
    },
    {
      "epoch": 1.1106703689012296,
      "grad_norm": 0.0299072265625,
      "learning_rate": 0.0001,
      "loss": 0.043,
      "step": 175
    },
    {
      "epoch": 1.1424038080126935,
      "grad_norm": 0.0262451171875,
      "learning_rate": 0.0001,
      "loss": 0.0303,
      "step": 180
    },
    {
      "epoch": 1.1741372471241571,
      "grad_norm": 0.060791015625,
      "learning_rate": 0.0001,
      "loss": 0.0239,
      "step": 185
    },
    {
      "epoch": 1.2058706862356208,
      "grad_norm": 0.015625,
      "learning_rate": 0.0001,
      "loss": 0.0095,
      "step": 190
    },
    {
      "epoch": 1.2058706862356208,
      "step": 190,
      "total_flos": 1.216645538039931e+18,
      "train_loss": 0.10745409297707834,
      "train_runtime": 37043.3755,
      "train_samples_per_second": 0.657,
      "train_steps_per_second": 0.005
    }
  ],
  "logging_steps": 5,
  "max_steps": 190,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 90,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.216645538039931e+18,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}