{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 525,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.05730659025787966,
      "grad_norm": 2.0758858092387618,
      "learning_rate": 1.6981132075471698e-06,
      "loss": 0.5683,
      "step": 10
    },
    {
      "epoch": 0.11461318051575932,
      "grad_norm": 1.032745783560496,
      "learning_rate": 3.5849056603773586e-06,
      "loss": 0.511,
      "step": 20
    },
    {
      "epoch": 0.17191977077363896,
      "grad_norm": 0.6337965187055514,
      "learning_rate": 5.4716981132075475e-06,
      "loss": 0.4697,
      "step": 30
    },
    {
      "epoch": 0.22922636103151864,
      "grad_norm": 0.3341583249899249,
      "learning_rate": 7.358490566037736e-06,
      "loss": 0.4383,
      "step": 40
    },
    {
      "epoch": 0.28653295128939826,
      "grad_norm": 0.24105124040802722,
      "learning_rate": 9.245283018867926e-06,
      "loss": 0.415,
      "step": 50
    },
    {
      "epoch": 0.3438395415472779,
      "grad_norm": 0.1979988380418763,
      "learning_rate": 9.996013419506035e-06,
      "loss": 0.4052,
      "step": 60
    },
    {
      "epoch": 0.40114613180515757,
      "grad_norm": 0.2042121825495575,
      "learning_rate": 9.971674001050687e-06,
      "loss": 0.4027,
      "step": 70
    },
    {
      "epoch": 0.4584527220630373,
      "grad_norm": 0.20586101864992135,
      "learning_rate": 9.925317587058516e-06,
      "loss": 0.3908,
      "step": 80
    },
    {
      "epoch": 0.5157593123209169,
      "grad_norm": 0.19753266562175947,
      "learning_rate": 9.85714946632355e-06,
      "loss": 0.3824,
      "step": 90
    },
    {
      "epoch": 0.5730659025787965,
      "grad_norm": 0.2014408335897292,
      "learning_rate": 9.767471520507713e-06,
      "loss": 0.3843,
      "step": 100
    },
    {
      "epoch": 0.6303724928366762,
      "grad_norm": 0.21455878509840362,
      "learning_rate": 9.656680887261693e-06,
      "loss": 0.3797,
      "step": 110
    },
    {
      "epoch": 0.6876790830945558,
      "grad_norm": 0.19677946400066818,
      "learning_rate": 9.52526820150588e-06,
      "loss": 0.3763,
      "step": 120
    },
    {
      "epoch": 0.7449856733524355,
      "grad_norm": 0.18157944977618248,
      "learning_rate": 9.373815422659806e-06,
      "loss": 0.3756,
      "step": 130
    },
    {
      "epoch": 0.8022922636103151,
      "grad_norm": 0.20079167101829795,
      "learning_rate": 9.202993257442216e-06,
      "loss": 0.3735,
      "step": 140
    },
    {
      "epoch": 0.8595988538681948,
      "grad_norm": 0.22524995002273018,
      "learning_rate": 9.013558189654819e-06,
      "loss": 0.3704,
      "step": 150
    },
    {
      "epoch": 0.9169054441260746,
      "grad_norm": 0.20366626027514875,
      "learning_rate": 8.806349130103334e-06,
      "loss": 0.3649,
      "step": 160
    },
    {
      "epoch": 0.9742120343839542,
      "grad_norm": 0.20483109220703685,
      "learning_rate": 8.582283701491576e-06,
      "loss": 0.3726,
      "step": 170
    },
    {
      "epoch": 1.0286532951289398,
      "grad_norm": 0.1861813989832312,
      "learning_rate": 8.342354174740904e-06,
      "loss": 0.3613,
      "step": 180
    },
    {
      "epoch": 1.0859598853868195,
      "grad_norm": 0.18944792927613582,
      "learning_rate": 8.08762307473096e-06,
      "loss": 0.3477,
      "step": 190
    },
    {
      "epoch": 1.143266475644699,
      "grad_norm": 0.19241082013181077,
      "learning_rate": 7.81921847492168e-06,
      "loss": 0.3528,
      "step": 200
    },
    {
      "epoch": 1.2005730659025788,
      "grad_norm": 0.2154742217299199,
      "learning_rate": 7.5383290016942e-06,
      "loss": 0.351,
      "step": 210
    },
    {
      "epoch": 1.2578796561604584,
      "grad_norm": 0.20901304048619337,
      "learning_rate": 7.246198570533944e-06,
      "loss": 0.351,
      "step": 220
    },
    {
      "epoch": 1.3151862464183381,
      "grad_norm": 0.19073549086926014,
      "learning_rate": 6.944120877366605e-06,
      "loss": 0.3557,
      "step": 230
    },
    {
      "epoch": 1.3724928366762177,
      "grad_norm": 0.17646170009433357,
      "learning_rate": 6.633433669442066e-06,
      "loss": 0.35,
      "step": 240
    },
    {
      "epoch": 1.4297994269340975,
      "grad_norm": 0.19599623413727513,
      "learning_rate": 6.315512821137606e-06,
      "loss": 0.3473,
      "step": 250
    },
    {
      "epoch": 1.487106017191977,
      "grad_norm": 0.1780821708268571,
      "learning_rate": 5.9917662409155896e-06,
      "loss": 0.3516,
      "step": 260
    },
    {
      "epoch": 1.5444126074498568,
      "grad_norm": 0.17861634863865428,
      "learning_rate": 5.663627636418611e-06,
      "loss": 0.3501,
      "step": 270
    },
    {
      "epoch": 1.6017191977077365,
      "grad_norm": 0.19324138154633005,
      "learning_rate": 5.332550165313312e-06,
      "loss": 0.3482,
      "step": 280
    },
    {
      "epoch": 1.659025787965616,
      "grad_norm": 0.1756126120758286,
      "learning_rate": 5e-06,
      "loss": 0.3438,
      "step": 290
    },
    {
      "epoch": 1.7163323782234956,
      "grad_norm": 0.17188834689286137,
      "learning_rate": 4.667449834686689e-06,
      "loss": 0.3452,
      "step": 300
    },
    {
      "epoch": 1.7736389684813754,
      "grad_norm": 0.1837742901594931,
      "learning_rate": 4.336372363581391e-06,
      "loss": 0.3473,
      "step": 310
    },
    {
      "epoch": 1.8309455587392551,
      "grad_norm": 0.1672330512230618,
      "learning_rate": 4.00823375908441e-06,
      "loss": 0.3497,
      "step": 320
    },
    {
      "epoch": 1.8882521489971347,
      "grad_norm": 0.17943679283278077,
      "learning_rate": 3.6844871788623946e-06,
      "loss": 0.3422,
      "step": 330
    },
    {
      "epoch": 1.9455587392550142,
      "grad_norm": 0.17561326371630695,
      "learning_rate": 3.366566330557935e-06,
      "loss": 0.3434,
      "step": 340
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.21808711950541695,
      "learning_rate": 3.0558791226333974e-06,
      "loss": 0.3411,
      "step": 350
    },
    {
      "epoch": 2.0573065902578795,
      "grad_norm": 0.16805885653283237,
      "learning_rate": 2.7538014294660564e-06,
      "loss": 0.3338,
      "step": 360
    },
    {
      "epoch": 2.1146131805157595,
      "grad_norm": 0.1571105011704873,
      "learning_rate": 2.461670998305802e-06,
      "loss": 0.3365,
      "step": 370
    },
    {
      "epoch": 2.171919770773639,
      "grad_norm": 0.15487232581993202,
      "learning_rate": 2.1807815250783194e-06,
      "loss": 0.3266,
      "step": 380
    },
    {
      "epoch": 2.2292263610315186,
      "grad_norm": 0.15903611974395213,
      "learning_rate": 1.912376925269041e-06,
      "loss": 0.3306,
      "step": 390
    },
    {
      "epoch": 2.286532951289398,
      "grad_norm": 0.1602582517852452,
      "learning_rate": 1.6576458252590988e-06,
      "loss": 0.3338,
      "step": 400
    },
    {
      "epoch": 2.343839541547278,
      "grad_norm": 0.153892591517447,
      "learning_rate": 1.4177162985084242e-06,
      "loss": 0.3391,
      "step": 410
    },
    {
      "epoch": 2.4011461318051577,
      "grad_norm": 0.1670245531773294,
      "learning_rate": 1.1936508698966664e-06,
      "loss": 0.3368,
      "step": 420
    },
    {
      "epoch": 2.458452722063037,
      "grad_norm": 0.15231997853829518,
      "learning_rate": 9.86441810345183e-07,
      "loss": 0.3366,
      "step": 430
    },
    {
      "epoch": 2.5157593123209168,
      "grad_norm": 0.16138543102686964,
      "learning_rate": 7.970067425577849e-07,
      "loss": 0.3345,
      "step": 440
    },
    {
      "epoch": 2.5730659025787963,
      "grad_norm": 0.16815491218506493,
      "learning_rate": 6.261845773401936e-07,
      "loss": 0.3308,
      "step": 450
    },
    {
      "epoch": 2.6303724928366763,
      "grad_norm": 0.1645142556469246,
      "learning_rate": 4.747317984941213e-07,
      "loss": 0.3291,
      "step": 460
    },
    {
      "epoch": 2.687679083094556,
      "grad_norm": 0.1554646689171431,
      "learning_rate": 3.433191127383079e-07,
      "loss": 0.3341,
      "step": 470
    },
    {
      "epoch": 2.7449856733524354,
      "grad_norm": 0.14919476246429758,
      "learning_rate": 2.325284794922883e-07,
      "loss": 0.3337,
      "step": 480
    },
    {
      "epoch": 2.8022922636103154,
      "grad_norm": 0.1381795881550593,
      "learning_rate": 1.4285053367645074e-07,
      "loss": 0.3353,
      "step": 490
    },
    {
      "epoch": 2.859598853868195,
      "grad_norm": 0.1417249974490203,
      "learning_rate": 7.468241294148471e-08,
      "loss": 0.3307,
      "step": 500
    },
    {
      "epoch": 2.9169054441260744,
      "grad_norm": 0.14380681025290207,
      "learning_rate": 2.8325998949314536e-08,
      "loss": 0.3309,
      "step": 510
    },
    {
      "epoch": 2.974212034383954,
      "grad_norm": 0.14738174330263265,
      "learning_rate": 3.9865804939659414e-09,
      "loss": 0.3333,
      "step": 520
    }
  ],
  "logging_steps": 10,
  "max_steps": 525,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 10000000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 5069159780057088.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}