Rakhman16 commited on
Commit
3653556
·
verified ·
1 Parent(s): ae4dc37

Training in progress, step 3500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8050d0ba4cc80419a583c93968e6bf55216baf5e1371c2c9e7133fa0b1464ed
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9397f132a6123749318f2cec2de3795c2cecb21b04af496e60060bdf559d882
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b05d6d3a797d87d258d9bcbc672b81e5538c72ed7f79e2144c48b29830d47b1
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c4d354f40628a2bf76efa3fc41baf5125cffd8c92ac7a1e648705f2d017dfe1
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee21e9bc6023190a0db96d08050b5e5b20632d2971675b4236e71d6b2aa60903
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c628fd47930868a3626bfd463d6fa585c5249cd4b2ad88dfb998ebdaeffc2454
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4a50e6bc9278ed49513d0c8109e953279561a58261b90ebf3bd479594596325
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b542bbc256ffe03bc3de81e397affacfdab8368eb5fbeffaeab7a4d3289a6f9f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.2037852257490158,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-3000",
4
- "epoch": 2.107481559536354,
5
  "eval_steps": 100,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -667,6 +667,116 @@
667
  "eval_samples_per_second": 66.328,
668
  "eval_steps_per_second": 2.082,
669
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
670
  }
671
  ],
672
  "logging_steps": 50,
@@ -686,7 +796,7 @@
686
  "attributes": {}
687
  }
688
  },
689
- "total_flos": 2.92254115627008e+16,
690
  "train_batch_size": 32,
691
  "trial_name": null,
692
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.2032385915517807,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-3500",
4
+ "epoch": 2.4587284861257466,
5
  "eval_steps": 100,
6
+ "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
667
  "eval_samples_per_second": 66.328,
668
  "eval_steps_per_second": 2.082,
669
  "step": 3000
670
+ },
671
+ {
672
+ "epoch": 2.1426062521952933,
673
+ "grad_norm": 26514.828125,
674
+ "learning_rate": 1.3924806746310612e-05,
675
+ "loss": 0.2032,
676
+ "step": 3050
677
+ },
678
+ {
679
+ "epoch": 2.1777309448542326,
680
+ "grad_norm": 22808.0234375,
681
+ "learning_rate": 1.3661278988053408e-05,
682
+ "loss": 0.1944,
683
+ "step": 3100
684
+ },
685
+ {
686
+ "epoch": 2.1777309448542326,
687
+ "eval_loss": 0.20371171832084656,
688
+ "eval_runtime": 67.0231,
689
+ "eval_samples_per_second": 66.544,
690
+ "eval_steps_per_second": 2.089,
691
+ "step": 3100
692
+ },
693
+ {
694
+ "epoch": 2.212855637513172,
695
+ "grad_norm": 24228.18359375,
696
+ "learning_rate": 1.3397751229796205e-05,
697
+ "loss": 0.2056,
698
+ "step": 3150
699
+ },
700
+ {
701
+ "epoch": 2.247980330172111,
702
+ "grad_norm": 20969.25390625,
703
+ "learning_rate": 1.3134223471539003e-05,
704
+ "loss": 0.1948,
705
+ "step": 3200
706
+ },
707
+ {
708
+ "epoch": 2.247980330172111,
709
+ "eval_loss": 0.20387396216392517,
710
+ "eval_runtime": 66.9567,
711
+ "eval_samples_per_second": 66.61,
712
+ "eval_steps_per_second": 2.091,
713
+ "step": 3200
714
+ },
715
+ {
716
+ "epoch": 2.2831050228310503,
717
+ "grad_norm": 42587.73046875,
718
+ "learning_rate": 1.28706957132818e-05,
719
+ "loss": 0.2072,
720
+ "step": 3250
721
+ },
722
+ {
723
+ "epoch": 2.3182297154899896,
724
+ "grad_norm": 22174.130859375,
725
+ "learning_rate": 1.2607167955024596e-05,
726
+ "loss": 0.2023,
727
+ "step": 3300
728
+ },
729
+ {
730
+ "epoch": 2.3182297154899896,
731
+ "eval_loss": 0.20358328521251678,
732
+ "eval_runtime": 67.1207,
733
+ "eval_samples_per_second": 66.447,
734
+ "eval_steps_per_second": 2.086,
735
+ "step": 3300
736
+ },
737
+ {
738
+ "epoch": 2.353354408148929,
739
+ "grad_norm": 28607.568359375,
740
+ "learning_rate": 1.2343640196767393e-05,
741
+ "loss": 0.1964,
742
+ "step": 3350
743
+ },
744
+ {
745
+ "epoch": 2.388479100807868,
746
+ "grad_norm": 27227.3203125,
747
+ "learning_rate": 1.208011243851019e-05,
748
+ "loss": 0.2075,
749
+ "step": 3400
750
+ },
751
+ {
752
+ "epoch": 2.388479100807868,
753
+ "eval_loss": 0.20336925983428955,
754
+ "eval_runtime": 67.2613,
755
+ "eval_samples_per_second": 66.309,
756
+ "eval_steps_per_second": 2.081,
757
+ "step": 3400
758
+ },
759
+ {
760
+ "epoch": 2.4236037934668073,
761
+ "grad_norm": 24440.291015625,
762
+ "learning_rate": 1.1816584680252988e-05,
763
+ "loss": 0.1999,
764
+ "step": 3450
765
+ },
766
+ {
767
+ "epoch": 2.4587284861257466,
768
+ "grad_norm": 23327.6328125,
769
+ "learning_rate": 1.1553056921995784e-05,
770
+ "loss": 0.2041,
771
+ "step": 3500
772
+ },
773
+ {
774
+ "epoch": 2.4587284861257466,
775
+ "eval_loss": 0.2032385915517807,
776
+ "eval_runtime": 67.0192,
777
+ "eval_samples_per_second": 66.548,
778
+ "eval_steps_per_second": 2.089,
779
+ "step": 3500
780
  }
781
  ],
782
  "logging_steps": 50,
 
796
  "attributes": {}
797
  }
798
  },
799
+ "total_flos": 3.40970746871808e+16,
800
  "train_batch_size": 32,
801
  "trial_name": null,
802
  "trial_params": null