Nadav commited on
Commit
21e32f8
·
1 Parent(s): 7c21c36

Training in progress, step 25000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:077c4e17d5d6659383f86009d5ba0492ce1a7fb2300e1f9721574b2a12b23519
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0d203a8c7bacd9049d0a9a6ba66771bad7db3da1b0e849bcf26a2083ccab635
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f93686a66224f35c3db268b2af688ad304cc6ecd7a83f195fac8d19065d16100
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83bcb89c6daff571003c7df9aae49dc4a313a7404638fcb7d95f82b950d2e5af
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57142c5a60897ef91d5d8c936729fa6ceded5442a5d10db587b6b7e76f18ca4a
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3271ed48861b853ff2a93ab2d113124282a36f76af112eacd53eeaa11994564a
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87d518c8dc6f08eacade9bd6abba5d643113786a574d03ca28503afbbe7268e3
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:865ba51ccd1f35f320c7110ccb893ffd337376d4652e722731792c01668ba190
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6828d085699bdbc936d5bbc612e9724cd129de0bf65d2e3da37a24ed4eacca1
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3041a56ac9f847b3d8ba49ecd2e74fdd80acf3c5d07444653f5e498839336c44
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.701258931609391,
5
- "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -406,11 +406,111 @@
406
  "eval_samples_per_second": 32.434,
407
  "eval_steps_per_second": 1.038,
408
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  }
410
  ],
411
  "max_steps": 1000000,
412
  "num_train_epochs": 86,
413
- "total_flos": 9.200497816914238e+20,
414
  "trial_name": null,
415
  "trial_params": null
416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.1265736645117386,
5
+ "global_step": 25000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
406
  "eval_samples_per_second": 32.434,
407
  "eval_steps_per_second": 1.038,
408
  "step": 20000
409
+ },
410
+ {
411
+ "epoch": 1.74,
412
+ "learning_rate": 9.999999999999999e-06,
413
+ "loss": 0.4313,
414
+ "step": 20500
415
+ },
416
+ {
417
+ "epoch": 1.79,
418
+ "learning_rate": 9.999999999999999e-06,
419
+ "loss": 0.4296,
420
+ "step": 21000
421
+ },
422
+ {
423
+ "epoch": 1.79,
424
+ "eval_loss": 0.39878711104393005,
425
+ "eval_runtime": 16.1844,
426
+ "eval_samples_per_second": 30.894,
427
+ "eval_steps_per_second": 0.989,
428
+ "step": 21000
429
+ },
430
+ {
431
+ "epoch": 1.83,
432
+ "learning_rate": 9.999999999999999e-06,
433
+ "loss": 0.4288,
434
+ "step": 21500
435
+ },
436
+ {
437
+ "epoch": 1.87,
438
+ "learning_rate": 9.999999999999999e-06,
439
+ "loss": 0.4278,
440
+ "step": 22000
441
+ },
442
+ {
443
+ "epoch": 1.87,
444
+ "eval_loss": 0.3984658718109131,
445
+ "eval_runtime": 17.0912,
446
+ "eval_samples_per_second": 29.255,
447
+ "eval_steps_per_second": 0.936,
448
+ "step": 22000
449
+ },
450
+ {
451
+ "epoch": 1.91,
452
+ "learning_rate": 9.999999999999999e-06,
453
+ "loss": 0.4278,
454
+ "step": 22500
455
+ },
456
+ {
457
+ "epoch": 1.96,
458
+ "learning_rate": 9.999999999999999e-06,
459
+ "loss": 0.4276,
460
+ "step": 23000
461
+ },
462
+ {
463
+ "epoch": 1.96,
464
+ "eval_loss": 0.3981262743473053,
465
+ "eval_runtime": 16.5906,
466
+ "eval_samples_per_second": 30.138,
467
+ "eval_steps_per_second": 0.964,
468
+ "step": 23000
469
+ },
470
+ {
471
+ "epoch": 2.0,
472
+ "learning_rate": 9.999999999999999e-06,
473
+ "loss": 0.428,
474
+ "step": 23500
475
+ },
476
+ {
477
+ "epoch": 2.04,
478
+ "learning_rate": 9.999999999999999e-06,
479
+ "loss": 0.4264,
480
+ "step": 24000
481
+ },
482
+ {
483
+ "epoch": 2.04,
484
+ "eval_loss": 0.39774054288864136,
485
+ "eval_runtime": 24.4452,
486
+ "eval_samples_per_second": 20.454,
487
+ "eval_steps_per_second": 0.655,
488
+ "step": 24000
489
+ },
490
+ {
491
+ "epoch": 2.08,
492
+ "learning_rate": 9.999999999999999e-06,
493
+ "loss": 0.427,
494
+ "step": 24500
495
+ },
496
+ {
497
+ "epoch": 2.13,
498
+ "learning_rate": 9.999999999999999e-06,
499
+ "loss": 0.4267,
500
+ "step": 25000
501
+ },
502
+ {
503
+ "epoch": 2.13,
504
+ "eval_loss": 0.3962687849998474,
505
+ "eval_runtime": 16.5048,
506
+ "eval_samples_per_second": 30.294,
507
+ "eval_steps_per_second": 0.969,
508
+ "step": 25000
509
  }
510
  ],
511
  "max_steps": 1000000,
512
  "num_train_epochs": 86,
513
+ "total_flos": 1.1500501101744764e+21,
514
  "trial_name": null,
515
  "trial_params": null
516
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f93686a66224f35c3db268b2af688ad304cc6ecd7a83f195fac8d19065d16100
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83bcb89c6daff571003c7df9aae49dc4a313a7404638fcb7d95f82b950d2e5af
3
  size 449471589