Nadav commited on
Commit
9cae425
·
1 Parent(s): 63659f2

Training in progress, step 50000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21e0468f83252fbd857b959ddace7a9f33b4b96fb87a6baf73ead5e873aabb0c
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f460e22729b026f332d2bbd58361fd7555bb5ce353e6bf9e9337dd07a16fd2
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bc7e4def826ce0471f39109f9fdb3314c8bc2030700c1f3907e774f1c01f478
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79523338e928e9a7fd3dc7b6a94546daade275fb1ce8b2909431e7af5e11c453
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85d1c2df1a9b702c1ab654e9e5e408f92b9742bb1e3ca9287f5761ef0b320b84
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c8a9a1cf72e4f2a5fde4519f00b51f996087fefce4d88bad27c5094268cbceb
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c80c9fc8381c1a21d068329ef7b88e8e389828d1de4a65d282cc16135a4f7204
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d419d70f318fa2e30e9d316c6fcb2475e360580d97334e925184e9f3c7f37a2d
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3321618f4e6ed92cd7e76689475cb3f46497297ac6a6a8342c9bad133a001350
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad72afbc8c0bb1254e98f3b0c83e6599b2d9a0f1bddb89ae821c17bf6f39f9af
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.483410258643212,
5
- "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -550,11 +550,147 @@
550
  "eval_samples_per_second": 81.507,
551
  "eval_steps_per_second": 1.288,
552
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
  }
554
  ],
555
  "max_steps": 100000,
556
  "num_train_epochs": 9,
557
- "total_flos": 1.8838402100126674e+21,
558
  "trial_name": null,
559
  "trial_params": null
560
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.354262823304015,
5
+ "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
550
  "eval_samples_per_second": 81.507,
551
  "eval_steps_per_second": 1.288,
552
  "step": 40000
553
+ },
554
+ {
555
+ "epoch": 3.53,
556
+ "learning_rate": 6.875501637243646e-05,
557
+ "loss": 0.4131,
558
+ "step": 40500
559
+ },
560
+ {
561
+ "epoch": 3.57,
562
+ "learning_rate": 6.807200639723347e-05,
563
+ "loss": 0.4117,
564
+ "step": 41000
565
+ },
566
+ {
567
+ "epoch": 3.61,
568
+ "learning_rate": 6.738586748092632e-05,
569
+ "loss": 0.4111,
570
+ "step": 41500
571
+ },
572
+ {
573
+ "epoch": 3.66,
574
+ "learning_rate": 6.669953098278655e-05,
575
+ "loss": 0.4112,
576
+ "step": 42000
577
+ },
578
+ {
579
+ "epoch": 3.7,
580
+ "learning_rate": 6.600765361692337e-05,
581
+ "loss": 0.4111,
582
+ "step": 42500
583
+ },
584
+ {
585
+ "epoch": 3.74,
586
+ "learning_rate": 6.531315665811498e-05,
587
+ "loss": 0.4101,
588
+ "step": 43000
589
+ },
590
+ {
591
+ "epoch": 3.79,
592
+ "learning_rate": 6.461621146309398e-05,
593
+ "loss": 0.4106,
594
+ "step": 43500
595
+ },
596
+ {
597
+ "epoch": 3.83,
598
+ "learning_rate": 6.39169899926586e-05,
599
+ "loss": 0.409,
600
+ "step": 44000
601
+ },
602
+ {
603
+ "epoch": 3.88,
604
+ "learning_rate": 6.321706940430336e-05,
605
+ "loss": 0.4103,
606
+ "step": 44500
607
+ },
608
+ {
609
+ "epoch": 3.92,
610
+ "learning_rate": 6.25138171578181e-05,
611
+ "loss": 0.4076,
612
+ "step": 45000
613
+ },
614
+ {
615
+ "epoch": 3.92,
616
+ "eval_loss": 0.3885731101036072,
617
+ "eval_runtime": 62.3283,
618
+ "eval_samples_per_second": 80.22,
619
+ "eval_steps_per_second": 1.267,
620
+ "step": 45000
621
+ },
622
+ {
623
+ "epoch": 3.96,
624
+ "learning_rate": 6.18088073702545e-05,
625
+ "loss": 0.4084,
626
+ "step": 45500
627
+ },
628
+ {
629
+ "epoch": 4.01,
630
+ "learning_rate": 6.110221399222838e-05,
631
+ "loss": 0.4087,
632
+ "step": 46000
633
+ },
634
+ {
635
+ "epoch": 4.05,
636
+ "learning_rate": 6.0394211365082756e-05,
637
+ "loss": 0.4069,
638
+ "step": 46500
639
+ },
640
+ {
641
+ "epoch": 4.09,
642
+ "learning_rate": 5.9686393768070895e-05,
643
+ "loss": 0.4067,
644
+ "step": 47000
645
+ },
646
+ {
647
+ "epoch": 4.14,
648
+ "learning_rate": 5.897609895876622e-05,
649
+ "loss": 0.4059,
650
+ "step": 47500
651
+ },
652
+ {
653
+ "epoch": 4.18,
654
+ "learning_rate": 5.826491948741281e-05,
655
+ "loss": 0.4064,
656
+ "step": 48000
657
+ },
658
+ {
659
+ "epoch": 4.22,
660
+ "learning_rate": 5.755303082690363e-05,
661
+ "loss": 0.4065,
662
+ "step": 48500
663
+ },
664
+ {
665
+ "epoch": 4.27,
666
+ "learning_rate": 5.6840608625113427e-05,
667
+ "loss": 0.4072,
668
+ "step": 49000
669
+ },
670
+ {
671
+ "epoch": 4.31,
672
+ "learning_rate": 5.61278286615603e-05,
673
+ "loss": 0.4054,
674
+ "step": 49500
675
+ },
676
+ {
677
+ "epoch": 4.35,
678
+ "learning_rate": 5.541486680403469e-05,
679
+ "loss": 0.4061,
680
+ "step": 50000
681
+ },
682
+ {
683
+ "epoch": 4.35,
684
+ "eval_loss": 0.38421276211738586,
685
+ "eval_runtime": 77.975,
686
+ "eval_samples_per_second": 64.123,
687
+ "eval_steps_per_second": 1.013,
688
+ "step": 50000
689
  }
690
  ],
691
  "max_steps": 100000,
692
  "num_train_epochs": 9,
693
+ "total_flos": 2.354791568440382e+21,
694
  "trial_name": null,
695
  "trial_params": null
696
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bc7e4def826ce0471f39109f9fdb3314c8bc2030700c1f3907e774f1c01f478
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79523338e928e9a7fd3dc7b6a94546daade275fb1ce8b2909431e7af5e11c453
3
  size 449471589