File size: 2,612 Bytes

9003bbb
 
 
 
 
 
 
 
 
 
 
 
 
00fcad7
9003bbb
00fcad7
9003bbb
 
 
 
00fcad7
9003bbb
00fcad7
9003bbb
 
 
 
00fcad7
9003bbb
00fcad7
9003bbb
 
 
 
00fcad7
9003bbb
00fcad7
9003bbb
 
 
 
00fcad7
9003bbb
00fcad7
9003bbb
 
 
 
00fcad7
9003bbb
00fcad7
9003bbb
 
 
 
00fcad7
9003bbb
00fcad7
9003bbb
 
 
 
00fcad7
9003bbb
00fcad7
9003bbb
 
 
 
00fcad7
9003bbb
00fcad7
9003bbb
 
 
 
00fcad7
9003bbb
00fcad7
9003bbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00fcad7
9003bbb

{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.4271844660194173,
  "eval_steps": 500,
  "global_step": 500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.24271844660194175,
      "grad_norm": 2.8362834453582764,
      "learning_rate": 4.960355987055016e-05,
      "loss": 6.0742,
      "step": 50
    },
    {
      "epoch": 0.4854368932038835,
      "grad_norm": 2.9484496116638184,
      "learning_rate": 4.9199029126213595e-05,
      "loss": 5.7218,
      "step": 100
    },
    {
      "epoch": 0.7281553398058253,
      "grad_norm": 3.542572259902954,
      "learning_rate": 4.879449838187702e-05,
      "loss": 5.4227,
      "step": 150
    },
    {
      "epoch": 0.970873786407767,
      "grad_norm": 4.634098052978516,
      "learning_rate": 4.8389967637540455e-05,
      "loss": 5.1655,
      "step": 200
    },
    {
      "epoch": 1.2135922330097086,
      "grad_norm": 5.699330806732178,
      "learning_rate": 4.798543689320388e-05,
      "loss": 4.8009,
      "step": 250
    },
    {
      "epoch": 1.4563106796116505,
      "grad_norm": 5.711933135986328,
      "learning_rate": 4.7580906148867315e-05,
      "loss": 4.607,
      "step": 300
    },
    {
      "epoch": 1.6990291262135924,
      "grad_norm": 5.113691806793213,
      "learning_rate": 4.717637540453075e-05,
      "loss": 4.462,
      "step": 350
    },
    {
      "epoch": 1.941747572815534,
      "grad_norm": 5.632521152496338,
      "learning_rate": 4.6771844660194174e-05,
      "loss": 4.3695,
      "step": 400
    },
    {
      "epoch": 2.1844660194174756,
      "grad_norm": 5.428906440734863,
      "learning_rate": 4.636731391585761e-05,
      "loss": 4.1549,
      "step": 450
    },
    {
      "epoch": 2.4271844660194173,
      "grad_norm": 5.037013530731201,
      "learning_rate": 4.596278317152104e-05,
      "loss": 4.0921,
      "step": 500
    }
  ],
  "logging_steps": 50,
  "max_steps": 6180,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 30,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 232176746496000.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}