{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 271,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.018484288354898338,
      "grad_norm": 1.1482020616531372,
      "learning_rate": 1.7647058823529412e-06,
      "loss": 1.4088,
      "step": 5
    },
    {
      "epoch": 0.036968576709796676,
      "grad_norm": 0.8084781169891357,
      "learning_rate": 3.970588235294118e-06,
      "loss": 1.3425,
      "step": 10
    },
    {
      "epoch": 0.05545286506469501,
      "grad_norm": 0.7077063322067261,
      "learning_rate": 6.176470588235294e-06,
      "loss": 1.3099,
      "step": 15
    },
    {
      "epoch": 0.07393715341959335,
      "grad_norm": 0.6323762536048889,
      "learning_rate": 8.382352941176472e-06,
      "loss": 1.3226,
      "step": 20
    },
    {
      "epoch": 0.09242144177449169,
      "grad_norm": 0.6048110127449036,
      "learning_rate": 1.0588235294117648e-05,
      "loss": 1.3228,
      "step": 25
    },
    {
      "epoch": 0.11090573012939002,
      "grad_norm": 0.5438778400421143,
      "learning_rate": 1.2794117647058824e-05,
      "loss": 1.2997,
      "step": 30
    },
    {
      "epoch": 0.12939001848428835,
      "grad_norm": 0.48258763551712036,
      "learning_rate": 1.5e-05,
      "loss": 1.2228,
      "step": 35
    },
    {
      "epoch": 0.1478743068391867,
      "grad_norm": 0.5334736108779907,
      "learning_rate": 1.7205882352941175e-05,
      "loss": 1.252,
      "step": 40
    },
    {
      "epoch": 0.16635859519408502,
      "grad_norm": 0.4233540892601013,
      "learning_rate": 1.9411764705882355e-05,
      "loss": 1.238,
      "step": 45
    },
    {
      "epoch": 0.18484288354898337,
      "grad_norm": 0.44563496112823486,
      "learning_rate": 2.161764705882353e-05,
      "loss": 1.2182,
      "step": 50
    },
    {
      "epoch": 0.2033271719038817,
      "grad_norm": 0.4069574475288391,
      "learning_rate": 2.3823529411764704e-05,
      "loss": 1.2401,
      "step": 55
    },
    {
      "epoch": 0.22181146025878004,
      "grad_norm": 0.4704144597053528,
      "learning_rate": 2.6029411764705883e-05,
      "loss": 1.1739,
      "step": 60
    },
    {
      "epoch": 0.24029574861367836,
      "grad_norm": 0.5614204406738281,
      "learning_rate": 2.823529411764706e-05,
      "loss": 1.2586,
      "step": 65
    },
    {
      "epoch": 0.2587800369685767,
      "grad_norm": 0.44872087240219116,
      "learning_rate": 2.9999955310684845e-05,
      "loss": 1.1245,
      "step": 70
    },
    {
      "epoch": 0.27726432532347506,
      "grad_norm": 0.49408602714538574,
      "learning_rate": 2.999839121261416e-05,
      "loss": 1.2028,
      "step": 75
    },
    {
      "epoch": 0.2957486136783734,
      "grad_norm": 0.5130935907363892,
      "learning_rate": 2.999459291506328e-05,
      "loss": 1.1916,
      "step": 80
    },
    {
      "epoch": 0.3142329020332717,
      "grad_norm": 0.5284329652786255,
      "learning_rate": 2.9988560983836527e-05,
      "loss": 1.152,
      "step": 85
    },
    {
      "epoch": 0.33271719038817005,
      "grad_norm": 0.5931246876716614,
      "learning_rate": 2.99802963174661e-05,
      "loss": 1.1045,
      "step": 90
    },
    {
      "epoch": 0.3512014787430684,
      "grad_norm": 0.5653843283653259,
      "learning_rate": 2.9969800147078265e-05,
      "loss": 1.091,
      "step": 95
    },
    {
      "epoch": 0.36968576709796674,
      "grad_norm": 0.5574877262115479,
      "learning_rate": 2.9957074036209947e-05,
      "loss": 1.0728,
      "step": 100
    },
    {
      "epoch": 0.38817005545286504,
      "grad_norm": 0.5665938258171082,
      "learning_rate": 2.994211988057582e-05,
      "loss": 1.0265,
      "step": 105
    },
    {
      "epoch": 0.4066543438077634,
      "grad_norm": 0.6560489535331726,
      "learning_rate": 2.9924939907785906e-05,
      "loss": 1.066,
      "step": 110
    },
    {
      "epoch": 0.42513863216266173,
      "grad_norm": 0.6012698411941528,
      "learning_rate": 2.9905536677013782e-05,
      "loss": 1.0484,
      "step": 115
    },
    {
      "epoch": 0.4436229205175601,
      "grad_norm": 0.5735741853713989,
      "learning_rate": 2.9883913078615306e-05,
      "loss": 1.0043,
      "step": 120
    },
    {
      "epoch": 0.46210720887245843,
      "grad_norm": 0.5683181881904602,
      "learning_rate": 2.9860072333698115e-05,
      "loss": 1.0437,
      "step": 125
    },
    {
      "epoch": 0.4805914972273567,
      "grad_norm": 0.668404757976532,
      "learning_rate": 2.9834017993641756e-05,
      "loss": 1.0245,
      "step": 130
    },
    {
      "epoch": 0.49907578558225507,
      "grad_norm": 0.5659777522087097,
      "learning_rate": 2.980575393956869e-05,
      "loss": 1.004,
      "step": 135
    },
    {
      "epoch": 0.5175600739371534,
      "grad_norm": 0.7540903091430664,
      "learning_rate": 2.977528438176615e-05,
      "loss": 0.9866,
      "step": 140
    },
    {
      "epoch": 0.5360443622920518,
      "grad_norm": 0.7011395692825317,
      "learning_rate": 2.974261385905894e-05,
      "loss": 0.9349,
      "step": 145
    },
    {
      "epoch": 0.5545286506469501,
      "grad_norm": 0.7044267654418945,
      "learning_rate": 2.9707747238133358e-05,
      "loss": 0.956,
      "step": 150
    },
    {
      "epoch": 0.5730129390018485,
      "grad_norm": 0.7586350440979004,
      "learning_rate": 2.9670689712812195e-05,
      "loss": 0.9521,
      "step": 155
    },
    {
      "epoch": 0.5914972273567468,
      "grad_norm": 0.8364260792732239,
      "learning_rate": 2.963144680328111e-05,
      "loss": 0.9237,
      "step": 160
    },
    {
      "epoch": 0.609981515711645,
      "grad_norm": 0.7463046908378601,
      "learning_rate": 2.959002435526626e-05,
      "loss": 0.9384,
      "step": 165
    },
    {
      "epoch": 0.6284658040665434,
      "grad_norm": 0.7540715336799622,
      "learning_rate": 2.9546428539163568e-05,
      "loss": 0.9072,
      "step": 170
    },
    {
      "epoch": 0.6469500924214417,
      "grad_norm": 0.8799229860305786,
      "learning_rate": 2.9500665849119523e-05,
      "loss": 0.9326,
      "step": 175
    },
    {
      "epoch": 0.6654343807763401,
      "grad_norm": 0.8631930351257324,
      "learning_rate": 2.945274310206382e-05,
      "loss": 0.8823,
      "step": 180
    },
    {
      "epoch": 0.6839186691312384,
      "grad_norm": 0.7702048420906067,
      "learning_rate": 2.9402667436693852e-05,
      "loss": 0.8832,
      "step": 185
    },
    {
      "epoch": 0.7024029574861368,
      "grad_norm": 0.9274978637695312,
      "learning_rate": 2.935044631241138e-05,
      "loss": 0.8156,
      "step": 190
    },
    {
      "epoch": 0.7208872458410351,
      "grad_norm": 0.9201264381408691,
      "learning_rate": 2.929608750821129e-05,
      "loss": 0.8671,
      "step": 195
    },
    {
      "epoch": 0.7393715341959335,
      "grad_norm": 0.9688053131103516,
      "learning_rate": 2.923959912152287e-05,
      "loss": 0.9017,
      "step": 200
    },
    {
      "epoch": 0.7578558225508318,
      "grad_norm": 0.977288544178009,
      "learning_rate": 2.9180989567003547e-05,
      "loss": 0.8301,
      "step": 205
    },
    {
      "epoch": 0.7763401109057301,
      "grad_norm": 0.966156542301178,
      "learning_rate": 2.9120267575285458e-05,
      "loss": 0.8573,
      "step": 210
    },
    {
      "epoch": 0.7948243992606284,
      "grad_norm": 0.9321315884590149,
      "learning_rate": 2.905744219167489e-05,
      "loss": 0.8063,
      "step": 215
    },
    {
      "epoch": 0.8133086876155268,
      "grad_norm": 0.9106122255325317,
      "learning_rate": 2.899252277480487e-05,
      "loss": 0.8396,
      "step": 220
    },
    {
      "epoch": 0.8317929759704251,
      "grad_norm": 1.001532793045044,
      "learning_rate": 2.892551899524109e-05,
      "loss": 0.8269,
      "step": 225
    },
    {
      "epoch": 0.8502772643253235,
      "grad_norm": 0.9747412800788879,
      "learning_rate": 2.885644083404134e-05,
      "loss": 0.8071,
      "step": 230
    },
    {
      "epoch": 0.8687615526802218,
      "grad_norm": 0.8949458599090576,
      "learning_rate": 2.8785298581268704e-05,
      "loss": 0.8024,
      "step": 235
    },
    {
      "epoch": 0.8872458410351202,
      "grad_norm": 1.0015259981155396,
      "learning_rate": 2.871210283445875e-05,
      "loss": 0.8123,
      "step": 240
    },
    {
      "epoch": 0.9057301293900185,
      "grad_norm": 1.0993518829345703,
      "learning_rate": 2.8636864497040856e-05,
      "loss": 0.7778,
      "step": 245
    },
    {
      "epoch": 0.9242144177449169,
      "grad_norm": 1.0659856796264648,
      "learning_rate": 2.8559594776714034e-05,
      "loss": 0.7372,
      "step": 250
    },
    {
      "epoch": 0.9426987060998152,
      "grad_norm": 1.0356419086456299,
      "learning_rate": 2.848030518377739e-05,
      "loss": 0.77,
      "step": 255
    },
    {
      "epoch": 0.9611829944547134,
      "grad_norm": 1.22687828540802,
      "learning_rate": 2.8399007529415527e-05,
      "loss": 0.7909,
      "step": 260
    },
    {
      "epoch": 0.9796672828096118,
      "grad_norm": 0.8971600532531738,
      "learning_rate": 2.8315713923939113e-05,
      "loss": 0.663,
      "step": 265
    },
    {
      "epoch": 0.9981515711645101,
      "grad_norm": 1.0294069051742554,
      "learning_rate": 2.82304367749809e-05,
      "loss": 0.7002,
      "step": 270
    }
  ],
  "logging_steps": 5,
  "max_steps": 1355,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 2000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 3.798461404025979e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}