{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.03203640500568828,
  "eval_steps": 500,
  "global_step": 44,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0029124004550625713,
      "importance_ratio": 1.0,
      "kl_div_avg": 6.202674558153376e-05,
      "learning_rate": 0.0,
      "loss_func": "stage2",
      "step": 1,
      "total_loss": -0.30124378204345703
    },
    {
      "epoch": 0.0029124004550625713,
      "importance_ratio": 0.9999282956123352,
      "kl_div_avg": 0.0013916800962761045,
      "learning_rate": 2.153382790366965e-07,
      "loss_func": "stage2",
      "step": 2,
      "total_loss": -0.36082643270492554
    },
    {
      "epoch": 0.0029124004550625713,
      "importance_ratio": 0.9998849630355835,
      "kl_div_avg": 2.4411560843873303e-06,
      "learning_rate": 3.4130309724299266e-07,
      "loss_func": "stage2",
      "step": 3,
      "total_loss": -1.0000114440917969
    },
    {
      "epoch": 0.0029124004550625713,
      "importance_ratio": 1.0000627040863037,
      "kl_div_avg": 5.5745240388205275e-05,
      "learning_rate": 4.30676558073393e-07,
      "loss_func": "stage2",
      "step": 4,
      "total_loss": 0.44314149022102356
    },
    {
      "epoch": 0.0058248009101251426,
      "importance_ratio": 1.0,
      "kl_div_avg": 0.0006268564611673355,
      "learning_rate": 5e-07,
      "loss_func": "stage2",
      "step": 5,
      "total_loss": 0.060124993324279785
    },
    {
      "epoch": 0.0058248009101251426,
      "importance_ratio": 1.0000600814819336,
      "kl_div_avg": 0.0005689397221431136,
      "learning_rate": 5e-07,
      "loss_func": "stage2",
      "step": 6,
      "total_loss": 0.058914512395858765
    },
    {
      "epoch": 0.0058248009101251426,
      "importance_ratio": 0.9999627470970154,
      "kl_div_avg": 0.0004537358181551099,
      "learning_rate": 4.999271455631648e-07,
      "loss_func": "stage2",
      "step": 7,
      "total_loss": 0.19876566529273987
    },
    {
      "epoch": 0.0058248009101251426,
      "importance_ratio": 1.0000057220458984,
      "kl_div_avg": 0.000579233281314373,
      "learning_rate": 4.998542911263296e-07,
      "loss_func": "stage2",
      "step": 8,
      "total_loss": 0.05980253219604492
    },
    {
      "epoch": 0.008737201365187713,
      "importance_ratio": 1.0,
      "kl_div_avg": 0.0005276197334751487,
      "learning_rate": 4.997814366894943e-07,
      "loss_func": "stage2",
      "step": 9,
      "total_loss": 0.3613969683647156
    },
    {
      "epoch": 0.008737201365187713,
      "importance_ratio": 1.000020146369934,
      "kl_div_avg": 0.000614392978604883,
      "learning_rate": 4.997085822526592e-07,
      "loss_func": "stage2",
      "step": 10,
      "total_loss": -0.7252892255783081
    },
    {
      "epoch": 0.008737201365187713,
      "importance_ratio": 0.9998898506164551,
      "kl_div_avg": 0.0005164016038179398,
      "learning_rate": 4.99635727815824e-07,
      "loss_func": "stage2",
      "step": 11,
      "total_loss": 0.32872042059898376
    },
    {
      "epoch": 0.008737201365187713,
      "importance_ratio": 1.0001676082611084,
      "kl_div_avg": 0.000510960235260427,
      "learning_rate": 4.995628733789887e-07,
      "loss_func": "stage2",
      "step": 12,
      "total_loss": -0.8001887798309326
    },
    {
      "epoch": 0.011649601820250285,
      "importance_ratio": 0.999980092048645,
      "kl_div_avg": 0.0013118372298777103,
      "learning_rate": 4.994900189421535e-07,
      "loss_func": "stage2",
      "step": 13,
      "total_loss": 0.4664098024368286
    },
    {
      "epoch": 0.011649601820250285,
      "importance_ratio": 1.0000075101852417,
      "kl_div_avg": 0.0005553055088967085,
      "learning_rate": 4.994171645053183e-07,
      "loss_func": "stage2",
      "step": 14,
      "total_loss": -0.3387294113636017
    },
    {
      "epoch": 0.011649601820250285,
      "importance_ratio": 0.9999791979789734,
      "kl_div_avg": 0.0006344152498058975,
      "learning_rate": 4.993443100684832e-07,
      "loss_func": "stage2",
      "step": 15,
      "total_loss": -0.7034344673156738
    },
    {
      "epoch": 0.011649601820250285,
      "importance_ratio": 0.9996992349624634,
      "kl_div_avg": 0.0013188815210014582,
      "learning_rate": 4.99271455631648e-07,
      "loss_func": "stage2",
      "step": 16,
      "total_loss": 0.6023236513137817
    },
    {
      "epoch": 0.014562002275312855,
      "importance_ratio": 1.0,
      "kl_div_avg": 0.0015903799794614315,
      "learning_rate": 4.991986011948127e-07,
      "loss_func": "stage2",
      "step": 17,
      "total_loss": 0.9130043983459473
    },
    {
      "epoch": 0.014562002275312855,
      "importance_ratio": 1.0000156164169312,
      "kl_div_avg": 0.0008485906291753054,
      "learning_rate": 4.991257467579775e-07,
      "loss_func": "stage2",
      "step": 18,
      "total_loss": 0.05914050340652466
    },
    {
      "epoch": 0.014562002275312855,
      "importance_ratio": 1.0000320672988892,
      "kl_div_avg": 0.0009148549288511276,
      "learning_rate": 4.990528923211423e-07,
      "loss_func": "stage2",
      "step": 19,
      "total_loss": -0.6230961084365845
    },
    {
      "epoch": 0.014562002275312855,
      "importance_ratio": 0.9999111890792847,
      "kl_div_avg": 0.0016965724062174559,
      "learning_rate": 4.989800378843072e-07,
      "loss_func": "stage2",
      "step": 20,
      "total_loss": 0.5814720988273621
    },
    {
      "epoch": 0.017474402730375427,
      "importance_ratio": 1.0,
      "kl_div_avg": 0.0012479191645979881,
      "learning_rate": 4.98907183447472e-07,
      "loss_func": "stage2",
      "step": 21,
      "total_loss": 0.291412353515625
    },
    {
      "epoch": 0.017474402730375427,
      "importance_ratio": 0.9998515248298645,
      "kl_div_avg": 0.0008866681600920856,
      "learning_rate": 4.988343290106367e-07,
      "loss_func": "stage2",
      "step": 22,
      "total_loss": 0.069973886013031
    },
    {
      "epoch": 0.017474402730375427,
      "importance_ratio": 0.9997187852859497,
      "kl_div_avg": 0.0029276064597070217,
      "learning_rate": 4.987614745738015e-07,
      "loss_func": "stage2",
      "step": 23,
      "total_loss": 0.14661093056201935
    },
    {
      "epoch": 0.017474402730375427,
      "importance_ratio": 0.9996069669723511,
      "kl_div_avg": 0.0028409322258085012,
      "learning_rate": 4.986886201369663e-07,
      "loss_func": "stage2",
      "step": 24,
      "total_loss": 0.23758070170879364
    },
    {
      "epoch": 0.020386803185437997,
      "importance_ratio": 1.0000464916229248,
      "kl_div_avg": 0.003729865886271,
      "learning_rate": 4.986157657001312e-07,
      "loss_func": "stage2",
      "step": 25,
      "total_loss": 0.3280088007450104
    },
    {
      "epoch": 0.020386803185437997,
      "importance_ratio": 0.9997869729995728,
      "kl_div_avg": 0.0012685225810855627,
      "learning_rate": 4.985429112632959e-07,
      "loss_func": "stage2",
      "step": 26,
      "total_loss": 0.5303494930267334
    },
    {
      "epoch": 0.020386803185437997,
      "importance_ratio": 0.9990458488464355,
      "kl_div_avg": 0.0042372471652925014,
      "learning_rate": 4.984700568264607e-07,
      "loss_func": "stage2",
      "step": 27,
      "total_loss": 0.6485586166381836
    },
    {
      "epoch": 0.020386803185437997,
      "importance_ratio": 0.9997897148132324,
      "kl_div_avg": 0.000950633198954165,
      "learning_rate": 4.983972023896255e-07,
      "loss_func": "stage2",
      "step": 28,
      "total_loss": 0.9602090120315552
    },
    {
      "epoch": 0.02329920364050057,
      "importance_ratio": 1.0,
      "kl_div_avg": 0.00110217509791255,
      "learning_rate": 4.983243479527903e-07,
      "loss_func": "stage2",
      "step": 29,
      "total_loss": 0.056760966777801514
    },
    {
      "epoch": 0.02329920364050057,
      "importance_ratio": 0.9995359778404236,
      "kl_div_avg": 0.002028511371463537,
      "learning_rate": 4.98251493515955e-07,
      "loss_func": "stage2",
      "step": 30,
      "total_loss": -0.34580060839653015
    },
    {
      "epoch": 0.02329920364050057,
      "importance_ratio": 0.999755859375,
      "kl_div_avg": 0.0011557539692148566,
      "learning_rate": 4.981786390791199e-07,
      "loss_func": "stage2",
      "step": 31,
      "total_loss": 0.05999104678630829
    },
    {
      "epoch": 0.02329920364050057,
      "importance_ratio": 0.999870240688324,
      "kl_div_avg": 0.0008296141168102622,
      "learning_rate": 4.981057846422847e-07,
      "loss_func": "stage2",
      "step": 32,
      "total_loss": -0.2040196657180786
    },
    {
      "epoch": 0.02621160409556314,
      "importance_ratio": 1.0,
      "kl_div_avg": 0.0052886055782437325,
      "learning_rate": 4.980329302054495e-07,
      "loss_func": "stage2",
      "step": 33,
      "total_loss": 0.3385615050792694
    },
    {
      "epoch": 0.02621160409556314,
      "importance_ratio": 0.9999160766601562,
      "kl_div_avg": 0.00105857546441257,
      "learning_rate": 4.979600757686143e-07,
      "loss_func": "stage2",
      "step": 34,
      "total_loss": 0.8041456937789917
    },
    {
      "epoch": 0.02621160409556314,
      "importance_ratio": 1.0000337362289429,
      "kl_div_avg": 0.001299469848163426,
      "learning_rate": 4.97887221331779e-07,
      "loss_func": "stage2",
      "step": 35,
      "total_loss": 0.059677302837371826
    },
    {
      "epoch": 0.02621160409556314,
      "importance_ratio": 1.0000333786010742,
      "kl_div_avg": 0.0009728098521009088,
      "learning_rate": 4.978143668949439e-07,
      "loss_func": "stage2",
      "step": 36,
      "total_loss": 0.9722646474838257
    },
    {
      "epoch": 0.02912400455062571,
      "importance_ratio": 0.9999969005584717,
      "kl_div_avg": 0.004802822135388851,
      "learning_rate": 4.977415124581087e-07,
      "loss_func": "stage2",
      "step": 37,
      "total_loss": -0.5127509236335754
    },
    {
      "epoch": 0.02912400455062571,
      "importance_ratio": 0.9998223781585693,
      "kl_div_avg": 0.001794470939785242,
      "learning_rate": 4.976686580212735e-07,
      "loss_func": "stage2",
      "step": 38,
      "total_loss": 0.29614874720573425
    },
    {
      "epoch": 0.02912400455062571,
      "importance_ratio": 0.9999631643295288,
      "kl_div_avg": 0.0012322800466790795,
      "learning_rate": 4.975958035844383e-07,
      "loss_func": "stage2",
      "step": 39,
      "total_loss": -0.6838780045509338
    },
    {
      "epoch": 0.02912400455062571,
      "importance_ratio": 1.0000174045562744,
      "kl_div_avg": 0.0013172460021451116,
      "learning_rate": 4.97522949147603e-07,
      "loss_func": "stage2",
      "step": 40,
      "total_loss": 0.31415513157844543
    },
    {
      "epoch": 0.03203640500568828,
      "importance_ratio": 1.0,
      "kl_div_avg": 0.0018465688917785883,
      "learning_rate": 4.974500947107679e-07,
      "loss_func": "stage2",
      "step": 41,
      "total_loss": -0.6754190921783447
    },
    {
      "epoch": 0.03203640500568828,
      "importance_ratio": 0.9999680519104004,
      "kl_div_avg": 0.001666294177994132,
      "learning_rate": 4.973772402739327e-07,
      "loss_func": "stage2",
      "step": 42,
      "total_loss": -0.6812421083450317
    },
    {
      "epoch": 0.03203640500568828,
      "importance_ratio": 1.000089168548584,
      "kl_div_avg": 0.0016971167642623186,
      "learning_rate": 4.973043858370975e-07,
      "loss_func": "stage2",
      "step": 43,
      "total_loss": -0.764133095741272
    },
    {
      "epoch": 0.03203640500568828,
      "importance_ratio": 0.9997596740722656,
      "kl_div_avg": 0.001568423816934228,
      "learning_rate": 4.972315314002623e-07,
      "loss_func": "stage2",
      "step": 44,
      "total_loss": -0.6741525530815125
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 6868,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 20.0,
  "save_steps": 50,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}