Nadav commited on
Commit
1ae31d6
·
1 Parent(s): 0b01633

Training in progress, step 15000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1d7732285dbc0e8407195393ce8d0f6d70acf3e85a7b4ef3f39c8609146bc0b
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74b1c9f89c10be34ba07b732580d1baf51bcc9628d8b3575e180ec708de685c5
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2b9950baa41cad72de72ff958a3f0b3caf4baaab2ba0b7beec4ae2bb74a2a62
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee2ea420baca9d2eefcc8dae7100675a88105c3d9e9c3b79b36e00445ccc4ead
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5d6b04968c3393a8d3fcd8486073945737f454dd868c26cd51d9e51b95e0e1f
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7226249c7c6eefd4928af13e408fa71684763a62cddcee1deb41b978d9e6dc31
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eec59b704dede5fd2cb3547af4da160bc4d245b31d03184f1306b83b75cd8e19
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a580bf3efb9f384c93dd5a88430e23ca8962352ab2092b0d242ef512ae2fd496
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68906a5e3db4424f18aed3a659657b33e5a4636dccdcb491f81ba7b3cc4c516d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b55c5d2c495ac51faf1cace6b3a1c40da89146b1ba80937cb1f531c05bc48c91
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8506294658046955,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -206,11 +206,111 @@
206
  "eval_samples_per_second": 32.547,
207
  "eval_steps_per_second": 1.042,
208
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  }
210
  ],
211
  "max_steps": 1000000,
212
  "num_train_epochs": 86,
213
- "total_flos": 4.600329688055808e+20,
214
  "trial_name": null,
215
  "trial_params": null
216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2759441987070432,
5
+ "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
206
  "eval_samples_per_second": 32.547,
207
  "eval_steps_per_second": 1.042,
208
  "step": 10000
209
+ },
210
+ {
211
+ "epoch": 0.89,
212
+ "learning_rate": 9.999999999999999e-06,
213
+ "loss": 0.4437,
214
+ "step": 10500
215
+ },
216
+ {
217
+ "epoch": 0.94,
218
+ "learning_rate": 9.999999999999999e-06,
219
+ "loss": 0.4433,
220
+ "step": 11000
221
+ },
222
+ {
223
+ "epoch": 0.94,
224
+ "eval_loss": 0.4144207835197449,
225
+ "eval_runtime": 30.4128,
226
+ "eval_samples_per_second": 16.44,
227
+ "eval_steps_per_second": 0.526,
228
+ "step": 11000
229
+ },
230
+ {
231
+ "epoch": 0.98,
232
+ "learning_rate": 9.999999999999999e-06,
233
+ "loss": 0.4431,
234
+ "step": 11500
235
+ },
236
+ {
237
+ "epoch": 1.02,
238
+ "learning_rate": 9.999999999999999e-06,
239
+ "loss": 0.4413,
240
+ "step": 12000
241
+ },
242
+ {
243
+ "epoch": 1.02,
244
+ "eval_loss": 0.4120546877384186,
245
+ "eval_runtime": 14.9708,
246
+ "eval_samples_per_second": 33.398,
247
+ "eval_steps_per_second": 1.069,
248
+ "step": 12000
249
+ },
250
+ {
251
+ "epoch": 1.06,
252
+ "learning_rate": 9.999999999999999e-06,
253
+ "loss": 0.4401,
254
+ "step": 12500
255
+ },
256
+ {
257
+ "epoch": 1.11,
258
+ "learning_rate": 9.999999999999999e-06,
259
+ "loss": 0.4395,
260
+ "step": 13000
261
+ },
262
+ {
263
+ "epoch": 1.11,
264
+ "eval_loss": 0.40858784317970276,
265
+ "eval_runtime": 16.4691,
266
+ "eval_samples_per_second": 30.36,
267
+ "eval_steps_per_second": 0.972,
268
+ "step": 13000
269
+ },
270
+ {
271
+ "epoch": 1.15,
272
+ "learning_rate": 9.999999999999999e-06,
273
+ "loss": 0.4411,
274
+ "step": 13500
275
+ },
276
+ {
277
+ "epoch": 1.19,
278
+ "learning_rate": 9.999999999999999e-06,
279
+ "loss": 0.4391,
280
+ "step": 14000
281
+ },
282
+ {
283
+ "epoch": 1.19,
284
+ "eval_loss": 0.40859168767929077,
285
+ "eval_runtime": 16.6715,
286
+ "eval_samples_per_second": 29.991,
287
+ "eval_steps_per_second": 0.96,
288
+ "step": 14000
289
+ },
290
+ {
291
+ "epoch": 1.23,
292
+ "learning_rate": 9.999999999999999e-06,
293
+ "loss": 0.4385,
294
+ "step": 14500
295
+ },
296
+ {
297
+ "epoch": 1.28,
298
+ "learning_rate": 9.999999999999999e-06,
299
+ "loss": 0.4371,
300
+ "step": 15000
301
+ },
302
+ {
303
+ "epoch": 1.28,
304
+ "eval_loss": 0.4050961434841156,
305
+ "eval_runtime": 14.6709,
306
+ "eval_samples_per_second": 34.081,
307
+ "eval_steps_per_second": 1.091,
308
+ "step": 15000
309
  }
310
  ],
311
  "max_steps": 1000000,
312
  "num_train_epochs": 86,
313
+ "total_flos": 6.900332972886334e+20,
314
  "trial_name": null,
315
  "trial_params": null
316
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2b9950baa41cad72de72ff958a3f0b3caf4baaab2ba0b7beec4ae2bb74a2a62
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee2ea420baca9d2eefcc8dae7100675a88105c3d9e9c3b79b36e00445ccc4ead
3
  size 449471589