File size: 2,516 Bytes
481edb5
 
 
3674b21
 
 
481edb5
 
 
 
 
3674b21
 
 
 
481edb5
 
 
3674b21
 
 
 
481edb5
 
 
3674b21
 
 
 
 
 
481edb5
 
3674b21
 
 
 
481edb5
 
 
3674b21
 
 
 
 
481edb5
 
 
3674b21
 
 
 
481edb5
 
 
3674b21
 
 
 
 
 
481edb5
 
3674b21
 
481edb5
3674b21
 
481edb5
 
3674b21
 
 
 
 
 
481edb5
 
3674b21
 
 
 
 
 
 
481edb5
 
 
3674b21
481edb5
3674b21
481edb5
 
 
 
 
 
 
 
 
 
 
 
 
3674b21
481edb5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.6,
  "eval_steps": 5,
  "global_step": 20,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.08,
      "grad_norm": 0.7334153552614141,
      "learning_rate": 0.0001,
      "loss": 1.115,
      "step": 1
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.5975179564042915,
      "learning_rate": 0.00018660254037844388,
      "loss": 0.9142,
      "step": 5
    },
    {
      "epoch": 0.4,
      "eval_loss": 0.722756564617157,
      "eval_runtime": 3.0808,
      "eval_samples_per_second": 6.492,
      "eval_steps_per_second": 1.623,
      "step": 5
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.4159151923493577,
      "learning_rate": 0.00011736481776669306,
      "loss": 0.6834,
      "step": 10
    },
    {
      "epoch": 0.8,
      "eval_loss": 0.6278184056282043,
      "eval_runtime": 1.8619,
      "eval_samples_per_second": 10.742,
      "eval_steps_per_second": 2.685,
      "step": 10
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.40842948448715766,
      "learning_rate": 3.5721239031346066e-05,
      "loss": 0.5454,
      "step": 15
    },
    {
      "epoch": 1.2,
      "eval_loss": 0.6100303530693054,
      "eval_runtime": 1.8606,
      "eval_samples_per_second": 10.749,
      "eval_steps_per_second": 2.687,
      "step": 15
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.3737806540766406,
      "learning_rate": 0.0,
      "loss": 0.5243,
      "step": 20
    },
    {
      "epoch": 1.6,
      "eval_loss": 0.6069896221160889,
      "eval_runtime": 1.8906,
      "eval_samples_per_second": 10.579,
      "eval_steps_per_second": 2.645,
      "step": 20
    },
    {
      "epoch": 1.6,
      "step": 20,
      "total_flos": 1591906369536.0,
      "train_loss": 0.6768446683883667,
      "train_runtime": 82.5503,
      "train_samples_per_second": 1.938,
      "train_steps_per_second": 0.242
    }
  ],
  "logging_steps": 5,
  "max_steps": 20,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 1000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1591906369536.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}