{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9986996098829649,
  "eval_steps": 500,
  "global_step": 384,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.002600780234070221,
      "grad_norm": 0.02962934412062168,
      "learning_rate": 1.282051282051282e-06,
      "loss": 0.619,
      "step": 1
    },
    {
      "epoch": 0.02600780234070221,
      "grad_norm": 0.06379027664661407,
      "learning_rate": 1.282051282051282e-05,
      "loss": 0.6962,
      "step": 10
    },
    {
      "epoch": 0.05201560468140442,
      "grad_norm": 0.0363883376121521,
      "learning_rate": 2.564102564102564e-05,
      "loss": 0.7759,
      "step": 20
    },
    {
      "epoch": 0.07802340702210664,
      "grad_norm": 0.03419478237628937,
      "learning_rate": 3.846153846153846e-05,
      "loss": 0.8087,
      "step": 30
    },
    {
      "epoch": 0.10403120936280884,
      "grad_norm": 0.04424262419342995,
      "learning_rate": 4.985507246376812e-05,
      "loss": 0.7775,
      "step": 40
    },
    {
      "epoch": 0.13003901170351106,
      "grad_norm": 0.22272075712680817,
      "learning_rate": 4.840579710144928e-05,
      "loss": 0.7476,
      "step": 50
    },
    {
      "epoch": 0.15604681404421328,
      "grad_norm": 0.049193304032087326,
      "learning_rate": 4.695652173913044e-05,
      "loss": 0.6617,
      "step": 60
    },
    {
      "epoch": 0.18205461638491546,
      "grad_norm": 0.04189423844218254,
      "learning_rate": 4.5507246376811595e-05,
      "loss": 0.7254,
      "step": 70
    },
    {
      "epoch": 0.20806241872561768,
      "grad_norm": 0.033223457634449005,
      "learning_rate": 4.405797101449275e-05,
      "loss": 0.7454,
      "step": 80
    },
    {
      "epoch": 0.2340702210663199,
      "grad_norm": 0.023022688925266266,
      "learning_rate": 4.2608695652173916e-05,
      "loss": 0.7263,
      "step": 90
    },
    {
      "epoch": 0.26007802340702213,
      "grad_norm": 0.1517011970281601,
      "learning_rate": 4.115942028985507e-05,
      "loss": 0.7241,
      "step": 100
    },
    {
      "epoch": 0.28608582574772434,
      "grad_norm": 0.041623640805482864,
      "learning_rate": 3.971014492753624e-05,
      "loss": 0.647,
      "step": 110
    },
    {
      "epoch": 0.31209362808842656,
      "grad_norm": 0.03412195295095444,
      "learning_rate": 3.8260869565217395e-05,
      "loss": 0.6991,
      "step": 120
    },
    {
      "epoch": 0.3381014304291287,
      "grad_norm": 0.02426602691411972,
      "learning_rate": 3.681159420289855e-05,
      "loss": 0.7115,
      "step": 130
    },
    {
      "epoch": 0.3641092327698309,
      "grad_norm": 0.023634808138012886,
      "learning_rate": 3.536231884057971e-05,
      "loss": 0.6992,
      "step": 140
    },
    {
      "epoch": 0.39011703511053314,
      "grad_norm": 0.1857312172651291,
      "learning_rate": 3.3913043478260867e-05,
      "loss": 0.7133,
      "step": 150
    },
    {
      "epoch": 0.41612483745123535,
      "grad_norm": 0.057914506644010544,
      "learning_rate": 3.246376811594203e-05,
      "loss": 0.637,
      "step": 160
    },
    {
      "epoch": 0.44213263979193757,
      "grad_norm": 0.0314478725194931,
      "learning_rate": 3.1014492753623195e-05,
      "loss": 0.69,
      "step": 170
    },
    {
      "epoch": 0.4681404421326398,
      "grad_norm": 0.02375701256096363,
      "learning_rate": 2.9565217391304352e-05,
      "loss": 0.7052,
      "step": 180
    },
    {
      "epoch": 0.494148244473342,
      "grad_norm": 0.017046812921762466,
      "learning_rate": 2.811594202898551e-05,
      "loss": 0.6963,
      "step": 190
    },
    {
      "epoch": 0.5201560468140443,
      "grad_norm": 0.14757999777793884,
      "learning_rate": 2.6666666666666667e-05,
      "loss": 0.699,
      "step": 200
    },
    {
      "epoch": 0.5461638491547465,
      "grad_norm": 0.03953570872545242,
      "learning_rate": 2.5217391304347827e-05,
      "loss": 0.6362,
      "step": 210
    },
    {
      "epoch": 0.5721716514954487,
      "grad_norm": 0.031761154532432556,
      "learning_rate": 2.3768115942028988e-05,
      "loss": 0.6929,
      "step": 220
    },
    {
      "epoch": 0.5981794538361509,
      "grad_norm": 0.019830092787742615,
      "learning_rate": 2.2318840579710145e-05,
      "loss": 0.6936,
      "step": 230
    },
    {
      "epoch": 0.6241872561768531,
      "grad_norm": 0.017688650637865067,
      "learning_rate": 2.0869565217391303e-05,
      "loss": 0.692,
      "step": 240
    },
    {
      "epoch": 0.6501950585175552,
      "grad_norm": 0.18702688813209534,
      "learning_rate": 1.9420289855072467e-05,
      "loss": 0.7103,
      "step": 250
    },
    {
      "epoch": 0.6762028608582574,
      "grad_norm": 0.03623680770397186,
      "learning_rate": 1.7971014492753624e-05,
      "loss": 0.6185,
      "step": 260
    },
    {
      "epoch": 0.7022106631989596,
      "grad_norm": 0.026319777593016624,
      "learning_rate": 1.652173913043478e-05,
      "loss": 0.7065,
      "step": 270
    },
    {
      "epoch": 0.7282184655396619,
      "grad_norm": 0.018396981060504913,
      "learning_rate": 1.5072463768115944e-05,
      "loss": 0.6869,
      "step": 280
    },
    {
      "epoch": 0.7542262678803641,
      "grad_norm": 0.016413649544119835,
      "learning_rate": 1.3623188405797103e-05,
      "loss": 0.6865,
      "step": 290
    },
    {
      "epoch": 0.7802340702210663,
      "grad_norm": 0.1341114193201065,
      "learning_rate": 1.2173913043478261e-05,
      "loss": 0.7022,
      "step": 300
    },
    {
      "epoch": 0.8062418725617685,
      "grad_norm": 0.03741007670760155,
      "learning_rate": 1.072463768115942e-05,
      "loss": 0.6272,
      "step": 310
    },
    {
      "epoch": 0.8322496749024707,
      "grad_norm": 0.024399157613515854,
      "learning_rate": 9.27536231884058e-06,
      "loss": 0.6793,
      "step": 320
    },
    {
      "epoch": 0.8582574772431729,
      "grad_norm": 0.016972342506051064,
      "learning_rate": 7.82608695652174e-06,
      "loss": 0.7078,
      "step": 330
    },
    {
      "epoch": 0.8842652795838751,
      "grad_norm": 0.014587855897843838,
      "learning_rate": 6.376811594202898e-06,
      "loss": 0.7041,
      "step": 340
    },
    {
      "epoch": 0.9102730819245773,
      "grad_norm": 0.13855686783790588,
      "learning_rate": 4.927536231884058e-06,
      "loss": 0.6831,
      "step": 350
    },
    {
      "epoch": 0.9362808842652796,
      "grad_norm": 0.03484239801764488,
      "learning_rate": 3.4782608695652175e-06,
      "loss": 0.6321,
      "step": 360
    },
    {
      "epoch": 0.9622886866059818,
      "grad_norm": 0.022825093939900398,
      "learning_rate": 2.028985507246377e-06,
      "loss": 0.6889,
      "step": 370
    },
    {
      "epoch": 0.988296488946684,
      "grad_norm": 0.019488025456666946,
      "learning_rate": 5.797101449275362e-07,
      "loss": 0.6797,
      "step": 380
    }
  ],
  "logging_steps": 10,
  "max_steps": 384,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 8.415557240450187e+18,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}