Nadav commited on
Commit
7c21c36
·
1 Parent(s): 1ae31d6

Training in progress, step 20000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74b1c9f89c10be34ba07b732580d1baf51bcc9628d8b3575e180ec708de685c5
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:077c4e17d5d6659383f86009d5ba0492ce1a7fb2300e1f9721574b2a12b23519
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee2ea420baca9d2eefcc8dae7100675a88105c3d9e9c3b79b36e00445ccc4ead
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f93686a66224f35c3db268b2af688ad304cc6ecd7a83f195fac8d19065d16100
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7226249c7c6eefd4928af13e408fa71684763a62cddcee1deb41b978d9e6dc31
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57142c5a60897ef91d5d8c936729fa6ceded5442a5d10db587b6b7e76f18ca4a
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a580bf3efb9f384c93dd5a88430e23ca8962352ab2092b0d242ef512ae2fd496
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87d518c8dc6f08eacade9bd6abba5d643113786a574d03ca28503afbbe7268e3
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b55c5d2c495ac51faf1cace6b3a1c40da89146b1ba80937cb1f531c05bc48c91
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6828d085699bdbc936d5bbc612e9724cd129de0bf65d2e3da37a24ed4eacca1
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2759441987070432,
5
- "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -306,11 +306,111 @@
306
  "eval_samples_per_second": 34.081,
307
  "eval_steps_per_second": 1.091,
308
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  }
310
  ],
311
  "max_steps": 1000000,
312
  "num_train_epochs": 86,
313
- "total_flos": 6.900332972886334e+20,
314
  "trial_name": null,
315
  "trial_params": null
316
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.701258931609391,
5
+ "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
306
  "eval_samples_per_second": 34.081,
307
  "eval_steps_per_second": 1.091,
308
  "step": 15000
309
+ },
310
+ {
311
+ "epoch": 1.32,
312
+ "learning_rate": 9.999999999999999e-06,
313
+ "loss": 0.4358,
314
+ "step": 15500
315
+ },
316
+ {
317
+ "epoch": 1.36,
318
+ "learning_rate": 9.999999999999999e-06,
319
+ "loss": 0.4363,
320
+ "step": 16000
321
+ },
322
+ {
323
+ "epoch": 1.36,
324
+ "eval_loss": 0.4048325717449188,
325
+ "eval_runtime": 16.0756,
326
+ "eval_samples_per_second": 31.103,
327
+ "eval_steps_per_second": 0.995,
328
+ "step": 16000
329
+ },
330
+ {
331
+ "epoch": 1.4,
332
+ "learning_rate": 9.999999999999999e-06,
333
+ "loss": 0.4352,
334
+ "step": 16500
335
+ },
336
+ {
337
+ "epoch": 1.45,
338
+ "learning_rate": 9.999999999999999e-06,
339
+ "loss": 0.4346,
340
+ "step": 17000
341
+ },
342
+ {
343
+ "epoch": 1.45,
344
+ "eval_loss": 0.4037468731403351,
345
+ "eval_runtime": 16.4235,
346
+ "eval_samples_per_second": 30.444,
347
+ "eval_steps_per_second": 0.974,
348
+ "step": 17000
349
+ },
350
+ {
351
+ "epoch": 1.49,
352
+ "learning_rate": 9.999999999999999e-06,
353
+ "loss": 0.4336,
354
+ "step": 17500
355
+ },
356
+ {
357
+ "epoch": 1.53,
358
+ "learning_rate": 9.999999999999999e-06,
359
+ "loss": 0.4335,
360
+ "step": 18000
361
+ },
362
+ {
363
+ "epoch": 1.53,
364
+ "eval_loss": 0.402103453874588,
365
+ "eval_runtime": 28.6118,
366
+ "eval_samples_per_second": 17.475,
367
+ "eval_steps_per_second": 0.559,
368
+ "step": 18000
369
+ },
370
+ {
371
+ "epoch": 1.57,
372
+ "learning_rate": 9.999999999999999e-06,
373
+ "loss": 0.4325,
374
+ "step": 18500
375
+ },
376
+ {
377
+ "epoch": 1.62,
378
+ "learning_rate": 9.999999999999999e-06,
379
+ "loss": 0.4319,
380
+ "step": 19000
381
+ },
382
+ {
383
+ "epoch": 1.62,
384
+ "eval_loss": 0.4030299186706543,
385
+ "eval_runtime": 16.452,
386
+ "eval_samples_per_second": 30.391,
387
+ "eval_steps_per_second": 0.973,
388
+ "step": 19000
389
+ },
390
+ {
391
+ "epoch": 1.66,
392
+ "learning_rate": 9.999999999999999e-06,
393
+ "loss": 0.4311,
394
+ "step": 19500
395
+ },
396
+ {
397
+ "epoch": 1.7,
398
+ "learning_rate": 9.999999999999999e-06,
399
+ "loss": 0.4317,
400
+ "step": 20000
401
+ },
402
+ {
403
+ "epoch": 1.7,
404
+ "eval_loss": 0.40188169479370117,
405
+ "eval_runtime": 15.416,
406
+ "eval_samples_per_second": 32.434,
407
+ "eval_steps_per_second": 1.038,
408
+ "step": 20000
409
  }
410
  ],
411
  "max_steps": 1000000,
412
  "num_train_epochs": 86,
413
+ "total_flos": 9.200497816914238e+20,
414
  "trial_name": null,
415
  "trial_params": null
416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee2ea420baca9d2eefcc8dae7100675a88105c3d9e9c3b79b36e00445ccc4ead
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f93686a66224f35c3db268b2af688ad304cc6ecd7a83f195fac8d19065d16100
3
  size 449471589