{
  "best_metric": 0.5491589903831482,
  "best_model_checkpoint": "checkpoints/checkpoint-200",
  "epoch": 2.9723076923076923,
  "eval_steps": 100,
  "global_step": 243,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.12307692307692308,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.4532247483730316,
      "learning_rate": 0.00023076923076923076,
      "loss": 2.0289,
      "step": 10
    },
    {
      "epoch": 0.24615384615384617,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.4620782732963562,
      "learning_rate": 0.00029931487386844626,
      "loss": 1.6411,
      "step": 20
    },
    {
      "epoch": 0.36923076923076925,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.3020133078098297,
      "learning_rate": 0.0002959742119362563,
      "loss": 1.3515,
      "step": 30
    },
    {
      "epoch": 0.49230769230769234,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.31100866198539734,
      "learning_rate": 0.0002899143266295095,
      "loss": 1.1845,
      "step": 40
    },
    {
      "epoch": 0.6153846153846154,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.3017350733280182,
      "learning_rate": 0.00028124810214572737,
      "loss": 1.1433,
      "step": 50
    },
    {
      "epoch": 0.7384615384615385,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.3388933837413788,
      "learning_rate": 0.0002701369738499162,
      "loss": 1.0192,
      "step": 60
    },
    {
      "epoch": 0.8615384615384616,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.36166566610336304,
      "learning_rate": 0.00025678792103916504,
      "loss": 0.9971,
      "step": 70
    },
    {
      "epoch": 0.9846153846153847,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.34558528661727905,
      "learning_rate": 0.00024144961130996017,
      "loss": 0.9646,
      "step": 80
    },
    {
      "epoch": 1.0984615384615384,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.3525680601596832,
      "learning_rate": 0.0002244077683513602,
      "loss": 0.9099,
      "step": 90
    },
    {
      "epoch": 1.2215384615384615,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.43674904108047485,
      "learning_rate": 0.0002059798494532787,
      "loss": 0.8937,
      "step": 100
    },
    {
      "epoch": 1.2215384615384615,
      "eval_loss": 0.6234937310218811,
      "eval_runtime": 0.2002,
      "eval_samples_per_second": 4.994,
      "eval_steps_per_second": 4.994,
      "gpu_memory": 4887.19873046875,
      "learning_rate": 0.0002059798494532787,
      "step": 100
    },
    {
      "epoch": 1.3446153846153845,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.3604443073272705,
      "learning_rate": 0.00018650913187782535,
      "loss": 0.8791,
      "step": 110
    },
    {
      "epoch": 1.4676923076923076,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.349542498588562,
      "learning_rate": 0.00016635831825341846,
      "loss": 0.8584,
      "step": 120
    },
    {
      "epoch": 1.5907692307692307,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.3813496232032776,
      "learning_rate": 0.00014590278011107714,
      "loss": 0.8552,
      "step": 130
    },
    {
      "epoch": 1.7138461538461538,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.3789571225643158,
      "learning_rate": 0.00012552356542302868,
      "loss": 0.8731,
      "step": 140
    },
    {
      "epoch": 1.8369230769230769,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.3803671896457672,
      "learning_rate": 0.00010560030039995649,
      "loss": 0.8411,
      "step": 150
    },
    {
      "epoch": 1.96,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.4134896397590637,
      "learning_rate": 8.650411777297534e-05,
      "loss": 0.8157,
      "step": 160
    },
    {
      "epoch": 2.0738461538461537,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.39426007866859436,
      "learning_rate": 6.859074329306077e-05,
      "loss": 0.8023,
      "step": 170
    },
    {
      "epoch": 2.1969230769230768,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.3811410367488861,
      "learning_rate": 5.2193869233367433e-05,
      "loss": 0.7673,
      "step": 180
    },
    {
      "epoch": 2.32,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.37296849489212036,
      "learning_rate": 3.761893833355035e-05,
      "loss": 0.7864,
      "step": 190
    },
    {
      "epoch": 2.443076923076923,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.45706120133399963,
      "learning_rate": 2.5137453979444762e-05,
      "loss": 0.7803,
      "step": 200
    },
    {
      "epoch": 2.443076923076923,
      "eval_loss": 0.5491589903831482,
      "eval_runtime": 0.1999,
      "eval_samples_per_second": 5.002,
      "eval_steps_per_second": 5.002,
      "gpu_memory": 4887.19873046875,
      "learning_rate": 2.5137453979444762e-05,
      "step": 200
    },
    {
      "epoch": 2.566153846153846,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.4115428924560547,
      "learning_rate": 1.4981922608692365e-05,
      "loss": 0.7901,
      "step": 210
    },
    {
      "epoch": 2.689230769230769,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.404224157333374,
      "learning_rate": 7.34152255572697e-06,
      "loss": 0.7795,
      "step": 220
    },
    {
      "epoch": 2.812307692307692,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.382841020822525,
      "learning_rate": 2.3585800173432813e-06,
      "loss": 0.7933,
      "step": 230
    },
    {
      "epoch": 2.9353846153846153,
      "gpu_memory": 4887.19873046875,
      "grad_norm": 0.4079365134239197,
      "learning_rate": 1.259177849420312e-07,
      "loss": 0.828,
      "step": 240
    }
  ],
  "logging_steps": 10,
  "max_steps": 243,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.675188391365837e+16,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}