{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 250,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.02004008016032064,
      "grad_norm": 0.9890378713607788,
      "learning_rate": 1.9047619047619047e-06,
      "loss": 1.4301,
      "step": 5
    },
    {
      "epoch": 0.04008016032064128,
      "grad_norm": 0.863508939743042,
      "learning_rate": 4.2857142857142855e-06,
      "loss": 1.4641,
      "step": 10
    },
    {
      "epoch": 0.06012024048096192,
      "grad_norm": 0.6787753701210022,
      "learning_rate": 6.666666666666667e-06,
      "loss": 1.4197,
      "step": 15
    },
    {
      "epoch": 0.08016032064128256,
      "grad_norm": 0.6681222319602966,
      "learning_rate": 9.047619047619047e-06,
      "loss": 1.4312,
      "step": 20
    },
    {
      "epoch": 0.10020040080160321,
      "grad_norm": 0.5663285255432129,
      "learning_rate": 1.1428571428571429e-05,
      "loss": 1.3627,
      "step": 25
    },
    {
      "epoch": 0.12024048096192384,
      "grad_norm": 0.5837746858596802,
      "learning_rate": 1.380952380952381e-05,
      "loss": 1.3997,
      "step": 30
    },
    {
      "epoch": 0.1402805611222445,
      "grad_norm": 0.5583487153053284,
      "learning_rate": 1.619047619047619e-05,
      "loss": 1.3404,
      "step": 35
    },
    {
      "epoch": 0.16032064128256512,
      "grad_norm": 0.4913857877254486,
      "learning_rate": 1.8571428571428572e-05,
      "loss": 1.3386,
      "step": 40
    },
    {
      "epoch": 0.18036072144288579,
      "grad_norm": 0.573939323425293,
      "learning_rate": 2.095238095238095e-05,
      "loss": 1.3201,
      "step": 45
    },
    {
      "epoch": 0.20040080160320642,
      "grad_norm": 0.508334219455719,
      "learning_rate": 2.3333333333333336e-05,
      "loss": 1.2503,
      "step": 50
    },
    {
      "epoch": 0.22044088176352705,
      "grad_norm": 0.5239691734313965,
      "learning_rate": 2.5714285714285714e-05,
      "loss": 1.2509,
      "step": 55
    },
    {
      "epoch": 0.24048096192384769,
      "grad_norm": 0.7545399069786072,
      "learning_rate": 2.8095238095238096e-05,
      "loss": 1.2877,
      "step": 60
    },
    {
      "epoch": 0.2605210420841683,
      "grad_norm": 0.4638131260871887,
      "learning_rate": 2.9999947463720068e-05,
      "loss": 1.219,
      "step": 65
    },
    {
      "epoch": 0.280561122244489,
      "grad_norm": 0.5463809370994568,
      "learning_rate": 2.9998108732563026e-05,
      "loss": 1.2131,
      "step": 70
    },
    {
      "epoch": 0.30060120240480964,
      "grad_norm": 0.5909023284912109,
      "learning_rate": 2.9993643555405922e-05,
      "loss": 1.1617,
      "step": 75
    },
    {
      "epoch": 0.32064128256513025,
      "grad_norm": 0.4846280515193939,
      "learning_rate": 2.9986552714183782e-05,
      "loss": 1.2422,
      "step": 80
    },
    {
      "epoch": 0.3406813627254509,
      "grad_norm": 0.6445205807685852,
      "learning_rate": 2.9976837450633944e-05,
      "loss": 1.1748,
      "step": 85
    },
    {
      "epoch": 0.36072144288577157,
      "grad_norm": 0.6564697623252869,
      "learning_rate": 2.996449946607859e-05,
      "loss": 1.1738,
      "step": 90
    },
    {
      "epoch": 0.3807615230460922,
      "grad_norm": 0.7153516411781311,
      "learning_rate": 2.9949540921126824e-05,
      "loss": 1.1438,
      "step": 95
    },
    {
      "epoch": 0.40080160320641284,
      "grad_norm": 0.6607214212417603,
      "learning_rate": 2.9931964435296292e-05,
      "loss": 1.1229,
      "step": 100
    },
    {
      "epoch": 0.42084168336673344,
      "grad_norm": 0.6068851351737976,
      "learning_rate": 2.991177308655447e-05,
      "loss": 1.1227,
      "step": 105
    },
    {
      "epoch": 0.4408817635270541,
      "grad_norm": 0.6417985558509827,
      "learning_rate": 2.988897041077966e-05,
      "loss": 1.1068,
      "step": 110
    },
    {
      "epoch": 0.46092184368737477,
      "grad_norm": 0.6902046203613281,
      "learning_rate": 2.9863560401141773e-05,
      "loss": 1.1214,
      "step": 115
    },
    {
      "epoch": 0.48096192384769537,
      "grad_norm": 0.6481944918632507,
      "learning_rate": 2.9835547507403067e-05,
      "loss": 1.0765,
      "step": 120
    },
    {
      "epoch": 0.501002004008016,
      "grad_norm": 0.7215368747711182,
      "learning_rate": 2.980493663513891e-05,
      "loss": 1.0368,
      "step": 125
    },
    {
      "epoch": 0.5210420841683366,
      "grad_norm": 0.7066978216171265,
      "learning_rate": 2.9771733144878706e-05,
      "loss": 0.9668,
      "step": 130
    },
    {
      "epoch": 0.5410821643286573,
      "grad_norm": 0.7489072680473328,
      "learning_rate": 2.9735942851167202e-05,
      "loss": 1.0239,
      "step": 135
    },
    {
      "epoch": 0.561122244488978,
      "grad_norm": 0.7932925224304199,
      "learning_rate": 2.9697572021546216e-05,
      "loss": 0.9795,
      "step": 140
    },
    {
      "epoch": 0.5811623246492986,
      "grad_norm": 0.9287444949150085,
      "learning_rate": 2.9656627375457102e-05,
      "loss": 0.9915,
      "step": 145
    },
    {
      "epoch": 0.6012024048096193,
      "grad_norm": 0.8538782596588135,
      "learning_rate": 2.961311608306403e-05,
      "loss": 1.0117,
      "step": 150
    },
    {
      "epoch": 0.6212424849699398,
      "grad_norm": 0.773285984992981,
      "learning_rate": 2.956704576399838e-05,
      "loss": 0.9697,
      "step": 155
    },
    {
      "epoch": 0.6412825651302605,
      "grad_norm": 0.9210479855537415,
      "learning_rate": 2.9518424486024382e-05,
      "loss": 0.9589,
      "step": 160
    },
    {
      "epoch": 0.6613226452905812,
      "grad_norm": 0.7253521680831909,
      "learning_rate": 2.9467260763626323e-05,
      "loss": 0.9559,
      "step": 165
    },
    {
      "epoch": 0.6813627254509018,
      "grad_norm": 1.1397440433502197,
      "learning_rate": 2.9413563556517483e-05,
      "loss": 0.9394,
      "step": 170
    },
    {
      "epoch": 0.7014028056112225,
      "grad_norm": 0.9524412751197815,
      "learning_rate": 2.935734226807114e-05,
      "loss": 0.9114,
      "step": 175
    },
    {
      "epoch": 0.7214428857715431,
      "grad_norm": 0.8871064186096191,
      "learning_rate": 2.9298606743673854e-05,
      "loss": 0.865,
      "step": 180
    },
    {
      "epoch": 0.7414829659318637,
      "grad_norm": 0.9255661368370056,
      "learning_rate": 2.9237367269001362e-05,
      "loss": 0.9108,
      "step": 185
    },
    {
      "epoch": 0.7615230460921844,
      "grad_norm": 0.9735074639320374,
      "learning_rate": 2.9173634568217366e-05,
      "loss": 0.8465,
      "step": 190
    },
    {
      "epoch": 0.781563126252505,
      "grad_norm": 0.9207878112792969,
      "learning_rate": 2.9107419802095546e-05,
      "loss": 0.8566,
      "step": 195
    },
    {
      "epoch": 0.8016032064128257,
      "grad_norm": 0.9188950657844543,
      "learning_rate": 2.9038734566065068e-05,
      "loss": 0.815,
      "step": 200
    },
    {
      "epoch": 0.8216432865731463,
      "grad_norm": 1.1056013107299805,
      "learning_rate": 2.8967590888180052e-05,
      "loss": 0.8299,
      "step": 205
    },
    {
      "epoch": 0.8416833667334669,
      "grad_norm": 1.0484192371368408,
      "learning_rate": 2.8894001227013213e-05,
      "loss": 0.8266,
      "step": 210
    },
    {
      "epoch": 0.8617234468937875,
      "grad_norm": 0.9629917144775391,
      "learning_rate": 2.8817978469474137e-05,
      "loss": 0.8386,
      "step": 215
    },
    {
      "epoch": 0.8817635270541082,
      "grad_norm": 0.9744315147399902,
      "learning_rate": 2.873953592855255e-05,
      "loss": 0.7977,
      "step": 220
    },
    {
      "epoch": 0.9018036072144289,
      "grad_norm": 1.0901851654052734,
      "learning_rate": 2.8658687340986962e-05,
      "loss": 0.7714,
      "step": 225
    },
    {
      "epoch": 0.9218436873747495,
      "grad_norm": 1.034933090209961,
      "learning_rate": 2.8575446864859115e-05,
      "loss": 0.8256,
      "step": 230
    },
    {
      "epoch": 0.9418837675350702,
      "grad_norm": 1.0317702293395996,
      "learning_rate": 2.8489829077114644e-05,
      "loss": 0.7881,
      "step": 235
    },
    {
      "epoch": 0.9619238476953907,
      "grad_norm": 1.095956802368164,
      "learning_rate": 2.8401848971010376e-05,
      "loss": 0.7371,
      "step": 240
    },
    {
      "epoch": 0.9819639278557114,
      "grad_norm": 1.2121639251708984,
      "learning_rate": 2.8311521953488738e-05,
      "loss": 0.7487,
      "step": 245
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.322547197341919,
      "learning_rate": 2.8218863842479712e-05,
      "loss": 0.7738,
      "step": 250
    }
  ],
  "logging_steps": 5,
  "max_steps": 1250,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 2000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 3.345331284690862e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}