File size: 6,083 Bytes
ad2e619
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 5.0,
  "eval_steps": 500,
  "global_step": 80,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.31746031746031744,
      "grad_norm": 3.1160410477431384,
      "learning_rate": 2e-05,
      "loss": 0.743,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.18026351928710938,
      "step": 5,
      "valid_targets_mean": 4387.2,
      "valid_targets_min": 740
    },
    {
      "epoch": 0.6349206349206349,
      "grad_norm": 0.9623025342413126,
      "learning_rate": 3.998096443163716e-05,
      "loss": 0.6426,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.15755076706409454,
      "step": 10,
      "valid_targets_mean": 3791.7,
      "valid_targets_min": 695
    },
    {
      "epoch": 0.9523809523809523,
      "grad_norm": 0.5400417708688194,
      "learning_rate": 3.931851652578137e-05,
      "loss": 0.5922,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.1631372570991516,
      "step": 15,
      "valid_targets_mean": 4048.3,
      "valid_targets_min": 1151
    },
    {
      "epoch": 1.253968253968254,
      "grad_norm": 0.4346375139896992,
      "learning_rate": 3.774021666356444e-05,
      "loss": 0.5513,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.13641172647476196,
      "step": 20,
      "valid_targets_mean": 3890.5,
      "valid_targets_min": 1004
    },
    {
      "epoch": 1.5714285714285714,
      "grad_norm": 0.3583194659422831,
      "learning_rate": 3.532088886237956e-05,
      "loss": 0.5268,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.157705619931221,
      "step": 25,
      "valid_targets_mean": 4477.9,
      "valid_targets_min": 656
    },
    {
      "epoch": 1.8888888888888888,
      "grad_norm": 0.2998700734251974,
      "learning_rate": 3.217522858017442e-05,
      "loss": 0.5194,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.12757813930511475,
      "step": 30,
      "valid_targets_mean": 4071.6,
      "valid_targets_min": 773
    },
    {
      "epoch": 2.1904761904761907,
      "grad_norm": 0.354322163099566,
      "learning_rate": 2.8452365234813992e-05,
      "loss": 0.4999,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.1116921454668045,
      "step": 35,
      "valid_targets_mean": 3261.8,
      "valid_targets_min": 695
    },
    {
      "epoch": 2.507936507936508,
      "grad_norm": 0.2736861113965854,
      "learning_rate": 2.4328792278762058e-05,
      "loss": 0.476,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.11490871757268906,
      "step": 40,
      "valid_targets_mean": 3865.1,
      "valid_targets_min": 746
    },
    {
      "epoch": 2.825396825396825,
      "grad_norm": 0.26810971560154995,
      "learning_rate": 2e-05,
      "loss": 0.4743,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.09590233117341995,
      "step": 45,
      "valid_targets_mean": 3056.7,
      "valid_targets_min": 726
    },
    {
      "epoch": 3.126984126984127,
      "grad_norm": 0.26045065338871903,
      "learning_rate": 1.5671207721237945e-05,
      "loss": 0.4803,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.1086278185248375,
      "step": 50,
      "valid_targets_mean": 3327.7,
      "valid_targets_min": 746
    },
    {
      "epoch": 3.4444444444444446,
      "grad_norm": 0.2646056254599043,
      "learning_rate": 1.1547634765186016e-05,
      "loss": 0.4741,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.10994982719421387,
      "step": 55,
      "valid_targets_mean": 3503.4,
      "valid_targets_min": 569
    },
    {
      "epoch": 3.761904761904762,
      "grad_norm": 0.23864309413170337,
      "learning_rate": 7.824771419825588e-06,
      "loss": 0.4612,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.09653226286172867,
      "step": 60,
      "valid_targets_mean": 3644.4,
      "valid_targets_min": 1042
    },
    {
      "epoch": 4.063492063492063,
      "grad_norm": 0.2652889879375569,
      "learning_rate": 4.679111137620442e-06,
      "loss": 0.4606,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.1475735604763031,
      "step": 65,
      "valid_targets_mean": 4244.4,
      "valid_targets_min": 2068
    },
    {
      "epoch": 4.380952380952381,
      "grad_norm": 0.28736207202393793,
      "learning_rate": 2.259783336435566e-06,
      "loss": 0.467,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.10715197026729584,
      "step": 70,
      "valid_targets_mean": 3558.8,
      "valid_targets_min": 733
    },
    {
      "epoch": 4.698412698412699,
      "grad_norm": 0.26387185108781563,
      "learning_rate": 6.814834742186361e-07,
      "loss": 0.459,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.10606744885444641,
      "step": 75,
      "valid_targets_mean": 3355.8,
      "valid_targets_min": 761
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.2624385775505968,
      "learning_rate": 1.9035568362844037e-08,
      "loss": 0.4442,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.11343716084957123,
      "step": 80,
      "valid_targets_mean": 3113.4,
      "valid_targets_min": 587
    },
    {
      "epoch": 5.0,
      "loss_nan_ranks": 0,
      "loss_rank_avg": 0.11343716084957123,
      "step": 80,
      "total_flos": 1.3290499486829773e+17,
      "train_loss": 0.5170090794563293,
      "train_runtime": 1718.7633,
      "train_samples_per_second": 2.909,
      "train_steps_per_second": 0.047,
      "valid_targets_mean": 3113.4,
      "valid_targets_min": 587
    }
  ],
  "logging_steps": 5,
  "max_steps": 80,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.3290499486829773e+17,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}