{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.766671573678787,
  "eval_steps": 500,
  "global_step": 1500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.014721036360959812,
      "grad_norm": 1.0893694162368774,
      "learning_rate": 4.808635917566242e-06,
      "loss": 1.1444,
      "step": 50
    },
    {
      "epoch": 0.029442072721919624,
      "grad_norm": 1.118213415145874,
      "learning_rate": 9.715407262021591e-06,
      "loss": 0.9497,
      "step": 100
    },
    {
      "epoch": 0.04416310908287943,
      "grad_norm": 1.3216179609298706,
      "learning_rate": 1.4622178606476939e-05,
      "loss": 0.8346,
      "step": 150
    },
    {
      "epoch": 0.05888414544383925,
      "grad_norm": 1.292870044708252,
      "learning_rate": 1.9528949950932288e-05,
      "loss": 0.7699,
      "step": 200
    },
    {
      "epoch": 0.07360518180479905,
      "grad_norm": 1.4046356678009033,
      "learning_rate": 2.4435721295387637e-05,
      "loss": 0.7252,
      "step": 250
    },
    {
      "epoch": 0.08832621816575886,
      "grad_norm": 1.4438663721084595,
      "learning_rate": 2.9342492639842983e-05,
      "loss": 0.6996,
      "step": 300
    },
    {
      "epoch": 0.10304725452671869,
      "grad_norm": 1.3257337808609009,
      "learning_rate": 3.424926398429833e-05,
      "loss": 0.686,
      "step": 350
    },
    {
      "epoch": 0.1177682908876785,
      "grad_norm": 1.3639789819717407,
      "learning_rate": 3.9156035328753685e-05,
      "loss": 0.6601,
      "step": 400
    },
    {
      "epoch": 0.1324893272486383,
      "grad_norm": 1.4148070812225342,
      "learning_rate": 4.406280667320903e-05,
      "loss": 0.6371,
      "step": 450
    },
    {
      "epoch": 0.1472103636095981,
      "grad_norm": 1.3940412998199463,
      "learning_rate": 4.8969578017664384e-05,
      "loss": 0.6417,
      "step": 500
    },
    {
      "epoch": 0.6477255998822317,
      "grad_norm": 0.6072946190834045,
      "learning_rate": 9.599483839268026e-05,
      "loss": 0.6122,
      "step": 550
    },
    {
      "epoch": 0.7066097453260709,
      "grad_norm": 0.6030572652816772,
      "learning_rate": 9.454410179022932e-05,
      "loss": 0.5809,
      "step": 600
    },
    {
      "epoch": 0.7654938907699103,
      "grad_norm": 0.5781008005142212,
      "learning_rate": 9.288422825194501e-05,
      "loss": 0.5446,
      "step": 650
    },
    {
      "epoch": 0.8243780362137495,
      "grad_norm": 0.5412103533744812,
      "learning_rate": 9.102301097269974e-05,
      "loss": 0.5339,
      "step": 700
    },
    {
      "epoch": 0.8832621816575887,
      "grad_norm": 0.5678456425666809,
      "learning_rate": 8.896918846697821e-05,
      "loss": 0.5296,
      "step": 750
    },
    {
      "epoch": 0.942146327101428,
      "grad_norm": 0.525556206703186,
      "learning_rate": 8.673240354108538e-05,
      "loss": 0.5176,
      "step": 800
    },
    {
      "epoch": 1.0011776829088768,
      "grad_norm": 1.9685856103897095,
      "learning_rate": 8.432315801965616e-05,
      "loss": 0.5104,
      "step": 850
    },
    {
      "epoch": 1.0600618283527161,
      "grad_norm": 0.6006094217300415,
      "learning_rate": 8.175276343902802e-05,
      "loss": 0.4685,
      "step": 900
    },
    {
      "epoch": 1.1189459737965552,
      "grad_norm": 0.5228903889656067,
      "learning_rate": 7.903328793897418e-05,
      "loss": 0.473,
      "step": 950
    },
    {
      "epoch": 1.1778301192403946,
      "grad_norm": 0.5006899237632751,
      "learning_rate": 7.6177499602143e-05,
      "loss": 0.4679,
      "step": 1000
    },
    {
      "epoch": 1.1778301192403946,
      "eval_loss": 0.4844963848590851,
      "eval_runtime": 2172.4438,
      "eval_samples_per_second": 1.39,
      "eval_steps_per_second": 0.695,
      "step": 1000
    },
    {
      "epoch": 1.2367142646842337,
      "grad_norm": 0.5041179060935974,
      "learning_rate": 7.319880650722838e-05,
      "loss": 0.4541,
      "step": 1050
    },
    {
      "epoch": 1.295598410128073,
      "grad_norm": 0.5369197726249695,
      "learning_rate": 7.01111937773246e-05,
      "loss": 0.4576,
      "step": 1100
    },
    {
      "epoch": 1.3544825555719122,
      "grad_norm": 0.5211925506591797,
      "learning_rate": 6.692915791902665e-05,
      "loss": 0.4472,
      "step": 1150
    },
    {
      "epoch": 1.4133667010157516,
      "grad_norm": 0.5664705038070679,
      "learning_rate": 6.366763876055806e-05,
      "loss": 0.4427,
      "step": 1200
    },
    {
      "epoch": 1.4722508464595907,
      "grad_norm": 0.5420666337013245,
      "learning_rate": 6.034194930847975e-05,
      "loss": 0.4395,
      "step": 1250
    },
    {
      "epoch": 1.53113499190343,
      "grad_norm": 0.558952271938324,
      "learning_rate": 5.6967703852306786e-05,
      "loss": 0.4305,
      "step": 1300
    },
    {
      "epoch": 1.5900191373472694,
      "grad_norm": 0.510136067867279,
      "learning_rate": 5.356074465458553e-05,
      "loss": 0.428,
      "step": 1350
    },
    {
      "epoch": 1.6489032827911085,
      "grad_norm": 0.506799578666687,
      "learning_rate": 5.013706757062534e-05,
      "loss": 0.4251,
      "step": 1400
    },
    {
      "epoch": 1.7077874282349477,
      "grad_norm": 0.5179591178894043,
      "learning_rate": 4.671274694710388e-05,
      "loss": 0.4188,
      "step": 1450
    },
    {
      "epoch": 1.766671573678787,
      "grad_norm": 0.531908392906189,
      "learning_rate": 4.3303860152151445e-05,
      "loss": 0.4177,
      "step": 1500
    },
    {
      "epoch": 1.766671573678787,
      "eval_loss": 0.4321376383304596,
      "eval_runtime": 2174.1694,
      "eval_samples_per_second": 1.389,
      "eval_steps_per_second": 0.695,
      "step": 1500
    }
  ],
  "logging_steps": 50,
  "max_steps": 2547,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2.011350882666414e+17,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}