{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 30.0,
  "eval_steps": 500,
  "global_step": 240,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 0.8689901232719421,
      "learning_rate": 1.6666666666666667e-05,
      "loss": 2.3654,
      "step": 5
    },
    {
      "epoch": 1.2666666666666666,
      "grad_norm": 0.7568275928497314,
      "learning_rate": 3.7500000000000003e-05,
      "loss": 1.8161,
      "step": 10
    },
    {
      "epoch": 1.9333333333333333,
      "grad_norm": 0.7656272053718567,
      "learning_rate": 5.833333333333334e-05,
      "loss": 1.4996,
      "step": 15
    },
    {
      "epoch": 2.533333333333333,
      "grad_norm": 0.7185885906219482,
      "learning_rate": 7.916666666666666e-05,
      "loss": 1.1473,
      "step": 20
    },
    {
      "epoch": 3.1333333333333333,
      "grad_norm": 1.1223357915878296,
      "learning_rate": 0.0001,
      "loss": 0.8881,
      "step": 25
    },
    {
      "epoch": 3.8,
      "grad_norm": 0.852708637714386,
      "learning_rate": 9.986784583502862e-05,
      "loss": 0.4632,
      "step": 30
    },
    {
      "epoch": 4.4,
      "grad_norm": 0.6442459225654602,
      "learning_rate": 9.947208192904722e-05,
      "loss": 0.2705,
      "step": 35
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.8184784054756165,
      "learning_rate": 9.881480035599667e-05,
      "loss": 0.1617,
      "step": 40
    },
    {
      "epoch": 5.666666666666667,
      "grad_norm": 0.5641859769821167,
      "learning_rate": 9.789947561577445e-05,
      "loss": 0.0784,
      "step": 45
    },
    {
      "epoch": 6.266666666666667,
      "grad_norm": 0.3222081661224365,
      "learning_rate": 9.673094626744942e-05,
      "loss": 0.0533,
      "step": 50
    },
    {
      "epoch": 6.933333333333334,
      "grad_norm": 0.34017637372016907,
      "learning_rate": 9.53153893518325e-05,
      "loss": 0.0341,
      "step": 55
    },
    {
      "epoch": 7.533333333333333,
      "grad_norm": 0.375735342502594,
      "learning_rate": 9.36602877386098e-05,
      "loss": 0.0245,
      "step": 60
    },
    {
      "epoch": 8.133333333333333,
      "grad_norm": 0.14678707718849182,
      "learning_rate": 9.177439057064683e-05,
      "loss": 0.0208,
      "step": 65
    },
    {
      "epoch": 8.8,
      "grad_norm": 0.22148066759109497,
      "learning_rate": 8.966766701456177e-05,
      "loss": 0.0176,
      "step": 70
    },
    {
      "epoch": 9.4,
      "grad_norm": 0.21415655314922333,
      "learning_rate": 8.73512535620498e-05,
      "loss": 0.0102,
      "step": 75
    },
    {
      "epoch": 10.0,
      "grad_norm": 0.2622195780277252,
      "learning_rate": 8.483739516053276e-05,
      "loss": 0.0087,
      "step": 80
    },
    {
      "epoch": 10.666666666666666,
      "grad_norm": 0.16748470067977905,
      "learning_rate": 8.213938048432697e-05,
      "loss": 0.0055,
      "step": 85
    },
    {
      "epoch": 11.266666666666667,
      "grad_norm": 0.16798754036426544,
      "learning_rate": 7.927147168849704e-05,
      "loss": 0.0058,
      "step": 90
    },
    {
      "epoch": 11.933333333333334,
      "grad_norm": 0.14203017950057983,
      "learning_rate": 7.6248829016728e-05,
      "loss": 0.0052,
      "step": 95
    },
    {
      "epoch": 12.533333333333333,
      "grad_norm": 0.18258453905582428,
      "learning_rate": 7.308743066175172e-05,
      "loss": 0.0022,
      "step": 100
    },
    {
      "epoch": 13.133333333333333,
      "grad_norm": 0.015843752771615982,
      "learning_rate": 6.980398830195785e-05,
      "loss": 0.0022,
      "step": 105
    },
    {
      "epoch": 13.8,
      "grad_norm": 0.07418923079967499,
      "learning_rate": 6.641585876067807e-05,
      "loss": 0.0017,
      "step": 110
    },
    {
      "epoch": 14.4,
      "grad_norm": 0.023140624165534973,
      "learning_rate": 6.294095225512603e-05,
      "loss": 0.0012,
      "step": 115
    },
    {
      "epoch": 15.0,
      "grad_norm": 0.25409796833992004,
      "learning_rate": 5.9397637720005595e-05,
      "loss": 0.0032,
      "step": 120
    },
    {
      "epoch": 15.666666666666666,
      "grad_norm": 0.05325435474514961,
      "learning_rate": 5.5804645706261514e-05,
      "loss": 0.0007,
      "step": 125
    },
    {
      "epoch": 16.266666666666666,
      "grad_norm": 0.023507924750447273,
      "learning_rate": 5.218096936826681e-05,
      "loss": 0.0012,
      "step": 130
    },
    {
      "epoch": 16.933333333333334,
      "grad_norm": 0.10688629001379013,
      "learning_rate": 4.854576406284443e-05,
      "loss": 0.0022,
      "step": 135
    },
    {
      "epoch": 17.533333333333335,
      "grad_norm": 0.014077894389629364,
      "learning_rate": 4.491824609085991e-05,
      "loss": 0.0011,
      "step": 140
    },
    {
      "epoch": 18.133333333333333,
      "grad_norm": 0.01724964752793312,
      "learning_rate": 4.131759111665349e-05,
      "loss": 0.0009,
      "step": 145
    },
    {
      "epoch": 18.8,
      "grad_norm": 0.07156306505203247,
      "learning_rate": 3.776283280228381e-05,
      "loss": 0.0012,
      "step": 150
    },
    {
      "epoch": 19.4,
      "grad_norm": 0.0061793080531060696,
      "learning_rate": 3.427276219241933e-05,
      "loss": 0.0005,
      "step": 155
    },
    {
      "epoch": 20.0,
      "grad_norm": 0.007031604181975126,
      "learning_rate": 3.086582838174551e-05,
      "loss": 0.0005,
      "step": 160
    },
    {
      "epoch": 20.666666666666668,
      "grad_norm": 0.006758078932762146,
      "learning_rate": 2.7560040989976892e-05,
      "loss": 0.0003,
      "step": 165
    },
    {
      "epoch": 21.266666666666666,
      "grad_norm": 0.0022400650195777416,
      "learning_rate": 2.4372874960006743e-05,
      "loss": 0.0002,
      "step": 170
    },
    {
      "epoch": 21.933333333333334,
      "grad_norm": 0.005241368897259235,
      "learning_rate": 2.132117818244771e-05,
      "loss": 0.0003,
      "step": 175
    },
    {
      "epoch": 22.533333333333335,
      "grad_norm": 0.005505857989192009,
      "learning_rate": 1.842108243487513e-05,
      "loss": 0.0002,
      "step": 180
    },
    {
      "epoch": 23.133333333333333,
      "grad_norm": 0.0022109781857579947,
      "learning_rate": 1.5687918106563326e-05,
      "loss": 0.0002,
      "step": 185
    },
    {
      "epoch": 23.8,
      "grad_norm": 0.002861538203433156,
      "learning_rate": 1.3136133159493802e-05,
      "loss": 0.0002,
      "step": 190
    },
    {
      "epoch": 24.4,
      "grad_norm": 0.001845506951212883,
      "learning_rate": 1.0779216754021215e-05,
      "loss": 0.0001,
      "step": 195
    },
    {
      "epoch": 25.0,
      "grad_norm": 0.0022635224740952253,
      "learning_rate": 8.629627942924473e-06,
      "loss": 0.0002,
      "step": 200
    },
    {
      "epoch": 25.666666666666668,
      "grad_norm": 0.0018658298067748547,
      "learning_rate": 6.698729810778065e-06,
      "loss": 0.0001,
      "step": 205
    },
    {
      "epoch": 26.266666666666666,
      "grad_norm": 0.002295706421136856,
      "learning_rate": 4.996729406793943e-06,
      "loss": 0.0002,
      "step": 210
    },
    {
      "epoch": 26.933333333333334,
      "grad_norm": 0.00466426694765687,
      "learning_rate": 3.5326237886588732e-06,
      "loss": 0.0001,
      "step": 215
    },
    {
      "epoch": 27.533333333333335,
      "grad_norm": 0.0056126974523067474,
      "learning_rate": 2.314152462588659e-06,
      "loss": 0.0002,
      "step": 220
    },
    {
      "epoch": 28.133333333333333,
      "grad_norm": 0.005923236720263958,
      "learning_rate": 1.3477564710088098e-06,
      "loss": 0.0001,
      "step": 225
    },
    {
      "epoch": 28.8,
      "grad_norm": 0.0023947993759065866,
      "learning_rate": 6.385443441312978e-07,
      "loss": 0.0001,
      "step": 230
    },
    {
      "epoch": 29.4,
      "grad_norm": 0.001765914843417704,
      "learning_rate": 1.9026509541272275e-07,
      "loss": 0.0001,
      "step": 235
    },
    {
      "epoch": 30.0,
      "grad_norm": 0.0020649449434131384,
      "learning_rate": 5.2884036446265714e-09,
      "loss": 0.0001,
      "step": 240
    },
    {
      "epoch": 30.0,
      "step": 240,
      "total_flos": 1.6497175945019392e+17,
      "train_loss": 0.18536781801400745,
      "train_runtime": 7707.7803,
      "train_samples_per_second": 0.93,
      "train_steps_per_second": 0.031
    }
  ],
  "logging_steps": 5,
  "max_steps": 240,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 30,
  "save_steps": 1000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.6497175945019392e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}