{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.924302788844622,
  "eval_steps": 500,
  "global_step": 310,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.1593625498007968,
      "grad_norm": 1.439923644065857,
      "learning_rate": 6.25e-06,
      "loss": 0.8005,
      "step": 10
    },
    {
      "epoch": 0.3187250996015936,
      "grad_norm": 1.1607290506362915,
      "learning_rate": 9.995433337085492e-06,
      "loss": 0.6192,
      "step": 20
    },
    {
      "epoch": 0.47808764940239046,
      "grad_norm": 0.735303521156311,
      "learning_rate": 9.944154131125643e-06,
      "loss": 0.5349,
      "step": 30
    },
    {
      "epoch": 0.6374501992031872,
      "grad_norm": 1.0546119213104248,
      "learning_rate": 9.836474315195148e-06,
      "loss": 0.5105,
      "step": 40
    },
    {
      "epoch": 0.796812749003984,
      "grad_norm": 0.655317485332489,
      "learning_rate": 9.673622250534155e-06,
      "loss": 0.4992,
      "step": 50
    },
    {
      "epoch": 0.9561752988047809,
      "grad_norm": 0.8014914393424988,
      "learning_rate": 9.457455677726447e-06,
      "loss": 0.4943,
      "step": 60
    },
    {
      "epoch": 1.1115537848605577,
      "grad_norm": 0.8364565372467041,
      "learning_rate": 9.190440524459203e-06,
      "loss": 0.4301,
      "step": 70
    },
    {
      "epoch": 1.2709163346613546,
      "grad_norm": 0.6313614249229431,
      "learning_rate": 8.87562277536726e-06,
      "loss": 0.3869,
      "step": 80
    },
    {
      "epoch": 1.4302788844621515,
      "grad_norm": 0.7729827761650085,
      "learning_rate": 8.516593724857598e-06,
      "loss": 0.3895,
      "step": 90
    },
    {
      "epoch": 1.5896414342629481,
      "grad_norm": 0.5305516123771667,
      "learning_rate": 8.117449009293668e-06,
      "loss": 0.3809,
      "step": 100
    },
    {
      "epoch": 1.749003984063745,
      "grad_norm": 0.6976670026779175,
      "learning_rate": 7.682741885881314e-06,
      "loss": 0.3707,
      "step": 110
    },
    {
      "epoch": 1.908366533864542,
      "grad_norm": 0.5881310701370239,
      "learning_rate": 7.217431291229068e-06,
      "loss": 0.3831,
      "step": 120
    },
    {
      "epoch": 2.0637450199203187,
      "grad_norm": 0.5917549729347229,
      "learning_rate": 6.726825272106539e-06,
      "loss": 0.3343,
      "step": 130
    },
    {
      "epoch": 2.2231075697211153,
      "grad_norm": 0.6392484903335571,
      "learning_rate": 6.216520433716544e-06,
      "loss": 0.2776,
      "step": 140
    },
    {
      "epoch": 2.3824701195219125,
      "grad_norm": 0.5469350814819336,
      "learning_rate": 5.69233809622687e-06,
      "loss": 0.2751,
      "step": 150
    },
    {
      "epoch": 2.541832669322709,
      "grad_norm": 0.5329071879386902,
      "learning_rate": 5.160257887858278e-06,
      "loss": 0.2758,
      "step": 160
    },
    {
      "epoch": 2.7011952191235062,
      "grad_norm": 0.608709454536438,
      "learning_rate": 4.626349532067879e-06,
      "loss": 0.2711,
      "step": 170
    },
    {
      "epoch": 2.860557768924303,
      "grad_norm": 0.5087049603462219,
      "learning_rate": 4.096703606968007e-06,
      "loss": 0.2685,
      "step": 180
    },
    {
      "epoch": 3.0159362549800797,
      "grad_norm": 0.7022324800491333,
      "learning_rate": 3.5773620668448384e-06,
      "loss": 0.2626,
      "step": 190
    },
    {
      "epoch": 3.1752988047808763,
      "grad_norm": 0.5048023462295532,
      "learning_rate": 3.074249318355046e-06,
      "loss": 0.1978,
      "step": 200
    },
    {
      "epoch": 3.3346613545816735,
      "grad_norm": 0.4734826385974884,
      "learning_rate": 2.5931046376510875e-06,
      "loss": 0.1886,
      "step": 210
    },
    {
      "epoch": 3.49402390438247,
      "grad_norm": 0.6656137108802795,
      "learning_rate": 2.139416699389153e-06,
      "loss": 0.1918,
      "step": 220
    },
    {
      "epoch": 3.653386454183267,
      "grad_norm": 0.4610200524330139,
      "learning_rate": 1.7183609644824096e-06,
      "loss": 0.1908,
      "step": 230
    },
    {
      "epoch": 3.812749003984064,
      "grad_norm": 0.5110896229743958,
      "learning_rate": 1.3347406408508695e-06,
      "loss": 0.1758,
      "step": 240
    },
    {
      "epoch": 3.9721115537848606,
      "grad_norm": 0.4129928946495056,
      "learning_rate": 9.929318906602176e-07,
      "loss": 0.1944,
      "step": 250
    },
    {
      "epoch": 4.127490039840637,
      "grad_norm": 0.39533188939094543,
      "learning_rate": 6.968339090999188e-07,
      "loss": 0.1561,
      "step": 260
    },
    {
      "epoch": 4.286852589641434,
      "grad_norm": 0.4790317118167877,
      "learning_rate": 4.4982444417866753e-07,
      "loss": 0.1381,
      "step": 270
    },
    {
      "epoch": 4.446215139442231,
      "grad_norm": 0.39792048931121826,
      "learning_rate": 2.547212649466568e-07,
      "loss": 0.1532,
      "step": 280
    },
    {
      "epoch": 4.605577689243028,
      "grad_norm": 0.4457632899284363,
      "learning_rate": 1.1375001769728e-07,
      "loss": 0.153,
      "step": 290
    },
    {
      "epoch": 4.764940239043825,
      "grad_norm": 0.36862707138061523,
      "learning_rate": 2.8518836829732332e-08,
      "loss": 0.1556,
      "step": 300
    },
    {
      "epoch": 4.924302788844622,
      "grad_norm": 0.44045692682266235,
      "learning_rate": 0.0,
      "loss": 0.1487,
      "step": 310
    },
    {
      "epoch": 4.924302788844622,
      "step": 310,
      "total_flos": 238832327327744.0,
      "train_loss": 0.3164117013254473,
      "train_runtime": 47203.2069,
      "train_samples_per_second": 0.212,
      "train_steps_per_second": 0.007
    }
  ],
  "logging_steps": 10,
  "max_steps": 310,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 238832327327744.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}