Nadav commited on
Commit
d46d300
·
1 Parent(s): deea266

Training in progress, step 20000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7965023fb90058fbcca08a10a8e9e39d8cf72c5b9f3d0eb2b25bc715dfb7185
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ab89fbcd5cd837ff38661f8e6562d1e556debad1cf94429f314679699e56c75
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5abd226a9e0cc358d3b07162345d74dc2fab3322999fae6a6de0ab8221263680
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:650de0629283aadbd2ed9c9c4e4fa6910cc48450c6a7d6491a018378e49a8f08
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8217c6a790474956dac035c8fd3d42f3d276f02a592aa421e5eed682cc08b9be
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5075119d983cedc61f9e794107c876257cfc00cbcd125777dcdfcea5b38678a0
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46171f332d1e5155156d4999fec5406fc9f03857ff686d0152b9c4ca38519697
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f62cdfaeb53c638813006cc5b9f0699031a6115d20311906da58b054af32fe51
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:611657e4e82b38f099b149471e6aaaa5a577926bec8385cc0f6dd743ea4cfd9b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fb733c4a27965716bd87114bf2e4699ec7082883b2e3998dcc83abb3d40f744
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.870852564660803,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -142,11 +142,147 @@
142
  "eval_samples_per_second": 67.475,
143
  "eval_steps_per_second": 1.066,
144
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  }
146
  ],
147
  "max_steps": 100000,
148
  "num_train_epochs": 9,
149
- "total_flos": 4.709861347295232e+20,
150
  "trial_name": null,
151
  "trial_params": null
152
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.741705129321606,
5
+ "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
142
  "eval_samples_per_second": 67.475,
143
  "eval_steps_per_second": 1.066,
144
  "step": 10000
145
+ },
146
+ {
147
+ "epoch": 0.91,
148
+ "learning_rate": 9.833495446081683e-05,
149
+ "loss": 0.4477,
150
+ "step": 10500
151
+ },
152
+ {
153
+ "epoch": 0.96,
154
+ "learning_rate": 9.809875714179663e-05,
155
+ "loss": 0.4472,
156
+ "step": 11000
157
+ },
158
+ {
159
+ "epoch": 1.0,
160
+ "learning_rate": 9.785202223022593e-05,
161
+ "loss": 0.4474,
162
+ "step": 11500
163
+ },
164
+ {
165
+ "epoch": 1.05,
166
+ "learning_rate": 9.759481060425221e-05,
167
+ "loss": 0.4452,
168
+ "step": 12000
169
+ },
170
+ {
171
+ "epoch": 1.09,
172
+ "learning_rate": 9.732718572699541e-05,
173
+ "loss": 0.4443,
174
+ "step": 12500
175
+ },
176
+ {
177
+ "epoch": 1.13,
178
+ "learning_rate": 9.704921363088947e-05,
179
+ "loss": 0.4439,
180
+ "step": 13000
181
+ },
182
+ {
183
+ "epoch": 1.18,
184
+ "learning_rate": 9.676096290138969e-05,
185
+ "loss": 0.4432,
186
+ "step": 13500
187
+ },
188
+ {
189
+ "epoch": 1.22,
190
+ "learning_rate": 9.646311171510121e-05,
191
+ "loss": 0.4444,
192
+ "step": 14000
193
+ },
194
+ {
195
+ "epoch": 1.26,
196
+ "learning_rate": 9.615453979461317e-05,
197
+ "loss": 0.443,
198
+ "step": 14500
199
+ },
200
+ {
201
+ "epoch": 1.31,
202
+ "learning_rate": 9.583590998811311e-05,
203
+ "loss": 0.4414,
204
+ "step": 15000
205
+ },
206
+ {
207
+ "epoch": 1.31,
208
+ "eval_loss": 0.4205426871776581,
209
+ "eval_runtime": 80.228,
210
+ "eval_samples_per_second": 62.322,
211
+ "eval_steps_per_second": 0.985,
212
+ "step": 15000
213
+ },
214
+ {
215
+ "epoch": 1.35,
216
+ "learning_rate": 9.550796803671621e-05,
217
+ "loss": 0.44,
218
+ "step": 15500
219
+ },
220
+ {
221
+ "epoch": 1.39,
222
+ "learning_rate": 9.516948048567349e-05,
223
+ "loss": 0.44,
224
+ "step": 16000
225
+ },
226
+ {
227
+ "epoch": 1.44,
228
+ "learning_rate": 9.482117809726346e-05,
229
+ "loss": 0.4386,
230
+ "step": 16500
231
+ },
232
+ {
233
+ "epoch": 1.48,
234
+ "learning_rate": 9.446314680988872e-05,
235
+ "loss": 0.438,
236
+ "step": 17000
237
+ },
238
+ {
239
+ "epoch": 1.52,
240
+ "learning_rate": 9.409547496241208e-05,
241
+ "loss": 0.4381,
242
+ "step": 17500
243
+ },
244
+ {
245
+ "epoch": 1.57,
246
+ "learning_rate": 9.371901718501017e-05,
247
+ "loss": 0.4364,
248
+ "step": 18000
249
+ },
250
+ {
251
+ "epoch": 1.61,
252
+ "learning_rate": 9.333235754528592e-05,
253
+ "loss": 0.4352,
254
+ "step": 18500
255
+ },
256
+ {
257
+ "epoch": 1.65,
258
+ "learning_rate": 9.293633635079047e-05,
259
+ "loss": 0.436,
260
+ "step": 19000
261
+ },
262
+ {
263
+ "epoch": 1.7,
264
+ "learning_rate": 9.253105131382774e-05,
265
+ "loss": 0.4357,
266
+ "step": 19500
267
+ },
268
+ {
269
+ "epoch": 1.74,
270
+ "learning_rate": 9.211660243241619e-05,
271
+ "loss": 0.4339,
272
+ "step": 20000
273
+ },
274
+ {
275
+ "epoch": 1.74,
276
+ "eval_loss": 0.413540244102478,
277
+ "eval_runtime": 78.0207,
278
+ "eval_samples_per_second": 64.086,
279
+ "eval_steps_per_second": 1.013,
280
+ "step": 20000
281
  }
282
  ],
283
  "max_steps": 100000,
284
  "num_train_epochs": 9,
285
+ "total_flos": 9.419374931572379e+20,
286
  "trial_name": null,
287
  "trial_params": null
288
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5abd226a9e0cc358d3b07162345d74dc2fab3322999fae6a6de0ab8221263680
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:650de0629283aadbd2ed9c9c4e4fa6910cc48450c6a7d6491a018378e49a8f08
3
  size 449471589