{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 269,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0186219739292365,
      "grad_norm": 1.397485375404358,
      "learning_rate": 1.7647058823529412e-06,
      "loss": 1.3045,
      "step": 5
    },
    {
      "epoch": 0.037243947858473,
      "grad_norm": 0.9164593815803528,
      "learning_rate": 3.970588235294118e-06,
      "loss": 1.3238,
      "step": 10
    },
    {
      "epoch": 0.055865921787709494,
      "grad_norm": 0.7285172343254089,
      "learning_rate": 6.176470588235294e-06,
      "loss": 1.3402,
      "step": 15
    },
    {
      "epoch": 0.074487895716946,
      "grad_norm": 0.7771405577659607,
      "learning_rate": 8.382352941176472e-06,
      "loss": 1.2444,
      "step": 20
    },
    {
      "epoch": 0.0931098696461825,
      "grad_norm": 0.590904951095581,
      "learning_rate": 1.0588235294117648e-05,
      "loss": 1.2908,
      "step": 25
    },
    {
      "epoch": 0.11173184357541899,
      "grad_norm": 0.68792724609375,
      "learning_rate": 1.2794117647058824e-05,
      "loss": 1.2953,
      "step": 30
    },
    {
      "epoch": 0.1303538175046555,
      "grad_norm": 0.6625350117683411,
      "learning_rate": 1.5e-05,
      "loss": 1.192,
      "step": 35
    },
    {
      "epoch": 0.148975791433892,
      "grad_norm": 0.5564359426498413,
      "learning_rate": 1.7205882352941175e-05,
      "loss": 1.1406,
      "step": 40
    },
    {
      "epoch": 0.16759776536312848,
      "grad_norm": 0.4591367542743683,
      "learning_rate": 1.9411764705882355e-05,
      "loss": 1.181,
      "step": 45
    },
    {
      "epoch": 0.186219739292365,
      "grad_norm": 0.5679731369018555,
      "learning_rate": 2.161764705882353e-05,
      "loss": 1.1935,
      "step": 50
    },
    {
      "epoch": 0.2048417132216015,
      "grad_norm": 0.4359203577041626,
      "learning_rate": 2.3823529411764704e-05,
      "loss": 1.1527,
      "step": 55
    },
    {
      "epoch": 0.22346368715083798,
      "grad_norm": 0.49340277910232544,
      "learning_rate": 2.6029411764705883e-05,
      "loss": 1.1635,
      "step": 60
    },
    {
      "epoch": 0.24208566108007448,
      "grad_norm": 0.4740353226661682,
      "learning_rate": 2.823529411764706e-05,
      "loss": 1.1883,
      "step": 65
    },
    {
      "epoch": 0.260707635009311,
      "grad_norm": 0.6028391122817993,
      "learning_rate": 2.9999954608033783e-05,
      "loss": 1.1328,
      "step": 70
    },
    {
      "epoch": 0.27932960893854747,
      "grad_norm": 0.5162932872772217,
      "learning_rate": 2.9998365918062082e-05,
      "loss": 1.0934,
      "step": 75
    },
    {
      "epoch": 0.297951582867784,
      "grad_norm": 0.5382624864578247,
      "learning_rate": 2.9994507904496206e-05,
      "loss": 1.1019,
      "step": 80
    },
    {
      "epoch": 0.3165735567970205,
      "grad_norm": 0.5565810799598694,
      "learning_rate": 2.998838115107183e-05,
      "loss": 1.0906,
      "step": 85
    },
    {
      "epoch": 0.33519553072625696,
      "grad_norm": 0.5212501287460327,
      "learning_rate": 2.997998658479568e-05,
      "loss": 1.0724,
      "step": 90
    },
    {
      "epoch": 0.3538175046554935,
      "grad_norm": 0.5489919185638428,
      "learning_rate": 2.9969325475805274e-05,
      "loss": 1.0658,
      "step": 95
    },
    {
      "epoch": 0.37243947858473,
      "grad_norm": 0.6241554021835327,
      "learning_rate": 2.995639943717676e-05,
      "loss": 1.059,
      "step": 100
    },
    {
      "epoch": 0.39106145251396646,
      "grad_norm": 0.563462495803833,
      "learning_rate": 2.9941210424680813e-05,
      "loss": 1.057,
      "step": 105
    },
    {
      "epoch": 0.409683426443203,
      "grad_norm": 0.5747496485710144,
      "learning_rate": 2.9923760736486766e-05,
      "loss": 1.0564,
      "step": 110
    },
    {
      "epoch": 0.42830540037243947,
      "grad_norm": 0.5738973021507263,
      "learning_rate": 2.9904053012814848e-05,
      "loss": 1.0316,
      "step": 115
    },
    {
      "epoch": 0.44692737430167595,
      "grad_norm": 0.6159545183181763,
      "learning_rate": 2.988209023553672e-05,
      "loss": 0.995,
      "step": 120
    },
    {
      "epoch": 0.4655493482309125,
      "grad_norm": 0.6447931528091431,
      "learning_rate": 2.9857875727724304e-05,
      "loss": 0.9692,
      "step": 125
    },
    {
      "epoch": 0.48417132216014896,
      "grad_norm": 0.7060043215751648,
      "learning_rate": 2.9831413153146988e-05,
      "loss": 0.9569,
      "step": 130
    },
    {
      "epoch": 0.5027932960893855,
      "grad_norm": 0.7400388717651367,
      "learning_rate": 2.9802706515717272e-05,
      "loss": 0.9378,
      "step": 135
    },
    {
      "epoch": 0.521415270018622,
      "grad_norm": 0.7127872705459595,
      "learning_rate": 2.9771760158884972e-05,
      "loss": 0.8923,
      "step": 140
    },
    {
      "epoch": 0.5400372439478585,
      "grad_norm": 0.8123269081115723,
      "learning_rate": 2.9738578764980025e-05,
      "loss": 0.9285,
      "step": 145
    },
    {
      "epoch": 0.5586592178770949,
      "grad_norm": 0.8044330477714539,
      "learning_rate": 2.9703167354504027e-05,
      "loss": 0.885,
      "step": 150
    },
    {
      "epoch": 0.5772811918063314,
      "grad_norm": 0.7073454856872559,
      "learning_rate": 2.966553128537062e-05,
      "loss": 0.9022,
      "step": 155
    },
    {
      "epoch": 0.595903165735568,
      "grad_norm": 0.725287914276123,
      "learning_rate": 2.9625676252094797e-05,
      "loss": 0.8863,
      "step": 160
    },
    {
      "epoch": 0.6145251396648045,
      "grad_norm": 0.843565046787262,
      "learning_rate": 2.9583608284931317e-05,
      "loss": 0.928,
      "step": 165
    },
    {
      "epoch": 0.633147113594041,
      "grad_norm": 0.8186341524124146,
      "learning_rate": 2.953933374896227e-05,
      "loss": 0.8967,
      "step": 170
    },
    {
      "epoch": 0.6517690875232774,
      "grad_norm": 0.8052655458450317,
      "learning_rate": 2.949285934313405e-05,
      "loss": 0.8766,
      "step": 175
    },
    {
      "epoch": 0.6703910614525139,
      "grad_norm": 1.142082691192627,
      "learning_rate": 2.9444192099243733e-05,
      "loss": 0.8402,
      "step": 180
    },
    {
      "epoch": 0.6890130353817505,
      "grad_norm": 0.9169373512268066,
      "learning_rate": 2.939333938087515e-05,
      "loss": 0.8426,
      "step": 185
    },
    {
      "epoch": 0.707635009310987,
      "grad_norm": 0.8522142171859741,
      "learning_rate": 2.9340308882284747e-05,
      "loss": 0.8288,
      "step": 190
    },
    {
      "epoch": 0.7262569832402235,
      "grad_norm": 0.8226320147514343,
      "learning_rate": 2.92851086272374e-05,
      "loss": 0.8252,
      "step": 195
    },
    {
      "epoch": 0.74487895716946,
      "grad_norm": 0.840149462223053,
      "learning_rate": 2.9227746967792392e-05,
      "loss": 0.7888,
      "step": 200
    },
    {
      "epoch": 0.7635009310986964,
      "grad_norm": 1.0344595909118652,
      "learning_rate": 2.916823258303968e-05,
      "loss": 0.7889,
      "step": 205
    },
    {
      "epoch": 0.7821229050279329,
      "grad_norm": 0.9850447177886963,
      "learning_rate": 2.9106574477786748e-05,
      "loss": 0.7634,
      "step": 210
    },
    {
      "epoch": 0.8007448789571695,
      "grad_norm": 0.9077547192573547,
      "learning_rate": 2.9042781981196095e-05,
      "loss": 0.7372,
      "step": 215
    },
    {
      "epoch": 0.819366852886406,
      "grad_norm": 0.8616479635238647,
      "learning_rate": 2.897686474537373e-05,
      "loss": 0.7238,
      "step": 220
    },
    {
      "epoch": 0.8379888268156425,
      "grad_norm": 0.9395949244499207,
      "learning_rate": 2.890883274390872e-05,
      "loss": 0.6952,
      "step": 225
    },
    {
      "epoch": 0.8566108007448789,
      "grad_norm": 0.9580796957015991,
      "learning_rate": 2.8838696270364183e-05,
      "loss": 0.6983,
      "step": 230
    },
    {
      "epoch": 0.8752327746741154,
      "grad_norm": 0.8633882403373718,
      "learning_rate": 2.8766465936719785e-05,
      "loss": 0.7479,
      "step": 235
    },
    {
      "epoch": 0.8938547486033519,
      "grad_norm": 1.0677725076675415,
      "learning_rate": 2.869215267176612e-05,
      "loss": 0.7132,
      "step": 240
    },
    {
      "epoch": 0.9124767225325885,
      "grad_norm": 1.0318955183029175,
      "learning_rate": 2.8615767719451125e-05,
      "loss": 0.6744,
      "step": 245
    },
    {
      "epoch": 0.931098696461825,
      "grad_norm": 0.8933286070823669,
      "learning_rate": 2.8537322637178816e-05,
      "loss": 0.705,
      "step": 250
    },
    {
      "epoch": 0.9497206703910615,
      "grad_norm": 0.8829292058944702,
      "learning_rate": 2.8456829294060608e-05,
      "loss": 0.7091,
      "step": 255
    },
    {
      "epoch": 0.9683426443202979,
      "grad_norm": 1.1101434230804443,
      "learning_rate": 2.837429986911944e-05,
      "loss": 0.6532,
      "step": 260
    },
    {
      "epoch": 0.9869646182495344,
      "grad_norm": 1.070557713508606,
      "learning_rate": 2.828974684944707e-05,
      "loss": 0.6493,
      "step": 265
    }
  ],
  "logging_steps": 5,
  "max_steps": 1345,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 2000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 3.85525873404543e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}