File size: 1,586 Bytes
4ee5ebf
 
 
 
 
5f40123
4ee5ebf
 
 
 
 
5f40123
25e6086
5f40123
25e6086
4ee5ebf
 
 
 
25e6086
4ee5ebf
25e6086
5f40123
4ee5ebf
 
 
25e6086
4ee5ebf
25e6086
5f40123
4ee5ebf
 
 
25e6086
4ee5ebf
25e6086
5f40123
4ee5ebf
 
 
5f40123
 
25e6086
 
 
 
4ee5ebf
 
 
5f40123
4ee5ebf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f40123
4ee5ebf
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 6,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.5,
      "grad_norm": 0.2234506756067276,
      "learning_rate": 4.166666666666667e-05,
      "loss": 0.0322,
      "step": 1
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1784297227859497,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 0.1032,
      "step": 2
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.40786346793174744,
      "learning_rate": 1.6666666666666667e-05,
      "loss": 0.0438,
      "step": 4
    },
    {
      "epoch": 3.0,
      "grad_norm": 0.3481805622577667,
      "learning_rate": 0.0,
      "loss": 0.0325,
      "step": 6
    },
    {
      "epoch": 3.0,
      "step": 6,
      "total_flos": 43469958021120.0,
      "train_loss": 0.04801289985577265,
      "train_runtime": 6.6043,
      "train_samples_per_second": 2.271,
      "train_steps_per_second": 0.909
    }
  ],
  "logging_steps": 500,
  "max_steps": 6,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 43469958021120.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}