File size: 3,731 Bytes

8c1a1e3
 
 
98ad7f4
8c1a1e3
98ad7f4
8c1a1e3
 
 
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
 
98ad7f4
8c1a1e3
98ad7f4
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
 
98ad7f4
 
 
 
8c1a1e3
 
98ad7f4
 
 
 
 
8c1a1e3
 
98ad7f4
 
 
7c80437
 
 
 
8c1a1e3
 
 
98ad7f4
8c1a1e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98ad7f4
8c1a1e3

{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9933774834437086,
  "eval_steps": 500,
  "global_step": 75,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.013245033112582781,
      "grad_norm": 0.4943664073944092,
      "learning_rate": 2.5e-05,
      "loss": 1.4272,
      "step": 1
    },
    {
      "epoch": 0.06622516556291391,
      "grad_norm": 0.20984387397766113,
      "learning_rate": 0.000125,
      "loss": 1.3101,
      "step": 5
    },
    {
      "epoch": 0.13245033112582782,
      "grad_norm": 0.2290477156639099,
      "learning_rate": 0.00019956059820218982,
      "loss": 1.2917,
      "step": 10
    },
    {
      "epoch": 0.1986754966887417,
      "grad_norm": 0.15163910388946533,
      "learning_rate": 0.00019466156752904343,
      "loss": 1.2823,
      "step": 15
    },
    {
      "epoch": 0.26490066225165565,
      "grad_norm": 0.1627238243818283,
      "learning_rate": 0.00018458320592590975,
      "loss": 1.1889,
      "step": 20
    },
    {
      "epoch": 0.33112582781456956,
      "grad_norm": 0.15383219718933105,
      "learning_rate": 0.00016987694277788417,
      "loss": 1.198,
      "step": 25
    },
    {
      "epoch": 0.3973509933774834,
      "grad_norm": 0.1501755714416504,
      "learning_rate": 0.0001513474193514842,
      "loss": 1.1762,
      "step": 30
    },
    {
      "epoch": 0.46357615894039733,
      "grad_norm": 0.14539840817451477,
      "learning_rate": 0.0001300084635000341,
      "loss": 1.2176,
      "step": 35
    },
    {
      "epoch": 0.5298013245033113,
      "grad_norm": 0.12844280898571014,
      "learning_rate": 0.0001070276188945293,
      "loss": 1.1942,
      "step": 40
    },
    {
      "epoch": 0.5960264900662252,
      "grad_norm": 0.13806107640266418,
      "learning_rate": 8.366226381814697e-05,
      "loss": 1.2928,
      "step": 45
    },
    {
      "epoch": 0.6622516556291391,
      "grad_norm": 0.13188520073890686,
      "learning_rate": 6.119081473277501e-05,
      "loss": 1.1959,
      "step": 50
    },
    {
      "epoch": 0.7284768211920529,
      "grad_norm": 0.12824179232120514,
      "learning_rate": 4.084277875864776e-05,
      "loss": 1.1188,
      "step": 55
    },
    {
      "epoch": 0.7947019867549668,
      "grad_norm": 0.14250224828720093,
      "learning_rate": 2.3731482188961818e-05,
      "loss": 1.2076,
      "step": 60
    },
    {
      "epoch": 0.8609271523178808,
      "grad_norm": 0.14001749455928802,
      "learning_rate": 1.0793155744261351e-05,
      "loss": 1.1352,
      "step": 65
    },
    {
      "epoch": 0.9271523178807947,
      "grad_norm": 0.15154731273651123,
      "learning_rate": 2.735709467518699e-06,
      "loss": 1.1486,
      "step": 70
    },
    {
      "epoch": 0.9933774834437086,
      "grad_norm": 0.14987458288669586,
      "learning_rate": 0.0,
      "loss": 1.2385,
      "step": 75
    },
    {
      "epoch": 0.9933774834437086,
      "step": 75,
      "total_flos": 5.564154814608179e+16,
      "train_loss": 0.0,
      "train_runtime": 0.9326,
      "train_samples_per_second": 645.502,
      "train_steps_per_second": 80.42
    }
  ],
  "logging_steps": 5,
  "max_steps": 75,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 5.564154814608179e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}