{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.1008142690965492,
  "eval_steps": 500,
  "global_step": 2000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.07754943776657619,
      "grad_norm": 0.41543584930406274,
      "learning_rate": 1.984472049689441e-05,
      "loss": 0.3571,
      "step": 50
    },
    {
      "epoch": 0.15509887553315238,
      "grad_norm": 0.2360007761618504,
      "learning_rate": 1.9689440993788823e-05,
      "loss": 0.1481,
      "step": 100
    },
    {
      "epoch": 0.23264831329972857,
      "grad_norm": 0.2459469865789585,
      "learning_rate": 1.9534161490683232e-05,
      "loss": 0.1413,
      "step": 150
    },
    {
      "epoch": 0.31019775106630476,
      "grad_norm": 0.1836701486527724,
      "learning_rate": 1.937888198757764e-05,
      "loss": 0.139,
      "step": 200
    },
    {
      "epoch": 0.38774718883288095,
      "grad_norm": 0.2053715213473303,
      "learning_rate": 1.922360248447205e-05,
      "loss": 0.1359,
      "step": 250
    },
    {
      "epoch": 0.46529662659945714,
      "grad_norm": 0.20210517044823975,
      "learning_rate": 1.906832298136646e-05,
      "loss": 0.1343,
      "step": 300
    },
    {
      "epoch": 0.5428460643660333,
      "grad_norm": 0.17133333163173686,
      "learning_rate": 1.891304347826087e-05,
      "loss": 0.1336,
      "step": 350
    },
    {
      "epoch": 0.6203955021326095,
      "grad_norm": 0.16513031282114732,
      "learning_rate": 1.875776397515528e-05,
      "loss": 0.1323,
      "step": 400
    },
    {
      "epoch": 0.6979449398991857,
      "grad_norm": 0.16414324471781971,
      "learning_rate": 1.8602484472049693e-05,
      "loss": 0.1318,
      "step": 450
    },
    {
      "epoch": 0.7754943776657619,
      "grad_norm": 0.16672201846671922,
      "learning_rate": 1.84472049689441e-05,
      "loss": 0.1307,
      "step": 500
    },
    {
      "epoch": 0.8530438154323381,
      "grad_norm": 0.1588831815209266,
      "learning_rate": 1.829192546583851e-05,
      "loss": 0.1301,
      "step": 550
    },
    {
      "epoch": 0.9305932531989143,
      "grad_norm": 0.17229438485787515,
      "learning_rate": 1.8136645962732923e-05,
      "loss": 0.13,
      "step": 600
    },
    {
      "epoch": 1.0077549437766575,
      "grad_norm": 0.1626649495069495,
      "learning_rate": 1.798136645962733e-05,
      "loss": 0.1284,
      "step": 650
    },
    {
      "epoch": 1.0853043815432337,
      "grad_norm": 0.16302373242598406,
      "learning_rate": 1.782608695652174e-05,
      "loss": 0.1256,
      "step": 700
    },
    {
      "epoch": 1.16285381930981,
      "grad_norm": 0.15938032051749196,
      "learning_rate": 1.767080745341615e-05,
      "loss": 0.1252,
      "step": 750
    },
    {
      "epoch": 1.240403257076386,
      "grad_norm": 0.19138770209482472,
      "learning_rate": 1.751552795031056e-05,
      "loss": 0.1244,
      "step": 800
    },
    {
      "epoch": 1.3179526948429623,
      "grad_norm": 0.15984339587089894,
      "learning_rate": 1.736024844720497e-05,
      "loss": 0.1253,
      "step": 850
    },
    {
      "epoch": 1.3955021326095385,
      "grad_norm": 0.1502502706654219,
      "learning_rate": 1.720496894409938e-05,
      "loss": 0.1248,
      "step": 900
    },
    {
      "epoch": 1.4730515703761147,
      "grad_norm": 0.13661135477508957,
      "learning_rate": 1.704968944099379e-05,
      "loss": 0.125,
      "step": 950
    },
    {
      "epoch": 1.5506010081426909,
      "grad_norm": 0.24839381800982097,
      "learning_rate": 1.68944099378882e-05,
      "loss": 0.1253,
      "step": 1000
    },
    {
      "epoch": 1.628150445909267,
      "grad_norm": 0.12815184233515442,
      "learning_rate": 1.673913043478261e-05,
      "loss": 0.1243,
      "step": 1050
    },
    {
      "epoch": 1.7056998836758432,
      "grad_norm": 0.13153520379094585,
      "learning_rate": 1.658385093167702e-05,
      "loss": 0.1237,
      "step": 1100
    },
    {
      "epoch": 1.7832493214424194,
      "grad_norm": 0.1189084339669079,
      "learning_rate": 1.642857142857143e-05,
      "loss": 0.1245,
      "step": 1150
    },
    {
      "epoch": 1.8607987592089956,
      "grad_norm": 0.15491708781159905,
      "learning_rate": 1.627329192546584e-05,
      "loss": 0.1235,
      "step": 1200
    },
    {
      "epoch": 1.9383481969755718,
      "grad_norm": 0.12739351593431672,
      "learning_rate": 1.611801242236025e-05,
      "loss": 0.1243,
      "step": 1250
    },
    {
      "epoch": 2.015509887553315,
      "grad_norm": 0.12465194041174449,
      "learning_rate": 1.596273291925466e-05,
      "loss": 0.1219,
      "step": 1300
    },
    {
      "epoch": 2.0930593253198913,
      "grad_norm": 0.1404295274618665,
      "learning_rate": 1.580745341614907e-05,
      "loss": 0.1186,
      "step": 1350
    },
    {
      "epoch": 2.1706087630864674,
      "grad_norm": 0.1359342551816161,
      "learning_rate": 1.565217391304348e-05,
      "loss": 0.1179,
      "step": 1400
    },
    {
      "epoch": 2.2481582008530436,
      "grad_norm": 0.15332233562241915,
      "learning_rate": 1.549689440993789e-05,
      "loss": 0.1185,
      "step": 1450
    },
    {
      "epoch": 2.32570763861962,
      "grad_norm": 0.11859966428735469,
      "learning_rate": 1.5341614906832298e-05,
      "loss": 0.1185,
      "step": 1500
    },
    {
      "epoch": 2.403257076386196,
      "grad_norm": 0.1493931915889296,
      "learning_rate": 1.5186335403726709e-05,
      "loss": 0.1186,
      "step": 1550
    },
    {
      "epoch": 2.480806514152772,
      "grad_norm": 0.1319324405407719,
      "learning_rate": 1.5031055900621118e-05,
      "loss": 0.1189,
      "step": 1600
    },
    {
      "epoch": 2.5583559519193484,
      "grad_norm": 0.12024679968154829,
      "learning_rate": 1.4875776397515529e-05,
      "loss": 0.1187,
      "step": 1650
    },
    {
      "epoch": 2.6359053896859246,
      "grad_norm": 0.11739796193835754,
      "learning_rate": 1.472049689440994e-05,
      "loss": 0.1187,
      "step": 1700
    },
    {
      "epoch": 2.7134548274525008,
      "grad_norm": 0.13178912063786213,
      "learning_rate": 1.456521739130435e-05,
      "loss": 0.1187,
      "step": 1750
    },
    {
      "epoch": 2.791004265219077,
      "grad_norm": 0.11853885559187641,
      "learning_rate": 1.4409937888198759e-05,
      "loss": 0.119,
      "step": 1800
    },
    {
      "epoch": 2.868553702985653,
      "grad_norm": 0.12827528414635014,
      "learning_rate": 1.425465838509317e-05,
      "loss": 0.1187,
      "step": 1850
    },
    {
      "epoch": 2.9461031407522293,
      "grad_norm": 0.13153122815676796,
      "learning_rate": 1.409937888198758e-05,
      "loss": 0.1188,
      "step": 1900
    },
    {
      "epoch": 3.023264831329973,
      "grad_norm": 0.15315655570709347,
      "learning_rate": 1.3944099378881988e-05,
      "loss": 0.1156,
      "step": 1950
    },
    {
      "epoch": 3.1008142690965492,
      "grad_norm": 0.11997442542975086,
      "learning_rate": 1.3788819875776398e-05,
      "loss": 0.1105,
      "step": 2000
    }
  ],
  "logging_steps": 50,
  "max_steps": 6440,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 1000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2.0334530111995904e+16,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}