NairaRahim commited on
Commit
2eab144
·
verified ·
1 Parent(s): bc02520

Training in progress, epoch 8, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed8ae4ab42781745b2126513149381b5b6ff7214663e4449b01b2359d8311e3a
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baf95e715d14feeecbf6a32137141a40e7c6ebb37fd4ced245a4b228e0e760c6
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac9434c970128034aa8822b8a0401c794db85187656338f45cc845953009b5b3
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57865d434d83d8a261c59663a9b74528ff7f76e6caeaff8ebd9b87b92d21935b
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7565598da56842799ee28845bb7d5540de84da2eb38da30890faa373e17c3ad
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eae9ed662108a7f0520e7e0fb77f777896ccf85b0df0ce63cdb5500ab80485e4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c0cd9d4d4eb26e88d5f90eed6823e5f94040581f03708c992959bc0b84c560f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dce0295becb66b2a3531818b76729e598e30825db8f70c059ee3261485288885
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 34.818748474121094,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-9135",
4
- "epoch": 7.0,
5
  "eval_steps": 500,
6
- "global_step": 9135,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -700,6 +700,105 @@
700
  "eval_samples_per_second": 26.469,
701
  "eval_steps_per_second": 3.326,
702
  "step": 9135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
703
  }
704
  ],
705
  "logging_steps": 100,
@@ -714,7 +813,7 @@
714
  "early_stopping_threshold": 0.0
715
  },
716
  "attributes": {
717
- "early_stopping_patience_counter": 0
718
  }
719
  },
720
  "TrainerControl": {
@@ -728,7 +827,7 @@
728
  "attributes": {}
729
  }
730
  },
731
- "total_flos": 9851176321330176.0,
732
  "train_batch_size": 8,
733
  "trial_name": null,
734
  "trial_params": null
 
1
  {
2
  "best_metric": 34.818748474121094,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-9135",
4
+ "epoch": 8.0,
5
  "eval_steps": 500,
6
+ "global_step": 10440,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
700
  "eval_samples_per_second": 26.469,
701
  "eval_steps_per_second": 3.326,
702
  "step": 9135
703
+ },
704
+ {
705
+ "epoch": 7.049808429118774,
706
+ "grad_norm": 3.117553472518921,
707
+ "learning_rate": 4.5595785440613034e-05,
708
+ "loss": 34.1218,
709
+ "step": 9200
710
+ },
711
+ {
712
+ "epoch": 7.126436781609195,
713
+ "grad_norm": 2.5572612285614014,
714
+ "learning_rate": 4.5547892720306515e-05,
715
+ "loss": 33.662,
716
+ "step": 9300
717
+ },
718
+ {
719
+ "epoch": 7.203065134099617,
720
+ "grad_norm": 3.5347042083740234,
721
+ "learning_rate": 4.55e-05,
722
+ "loss": 34.4668,
723
+ "step": 9400
724
+ },
725
+ {
726
+ "epoch": 7.2796934865900385,
727
+ "grad_norm": 1.9216647148132324,
728
+ "learning_rate": 4.545210727969349e-05,
729
+ "loss": 33.4468,
730
+ "step": 9500
731
+ },
732
+ {
733
+ "epoch": 7.35632183908046,
734
+ "grad_norm": 4.242152214050293,
735
+ "learning_rate": 4.5404214559386975e-05,
736
+ "loss": 33.5805,
737
+ "step": 9600
738
+ },
739
+ {
740
+ "epoch": 7.432950191570881,
741
+ "grad_norm": 2.9310567378997803,
742
+ "learning_rate": 4.535632183908046e-05,
743
+ "loss": 34.0603,
744
+ "step": 9700
745
+ },
746
+ {
747
+ "epoch": 7.509578544061303,
748
+ "grad_norm": 2.6573023796081543,
749
+ "learning_rate": 4.530842911877395e-05,
750
+ "loss": 33.8766,
751
+ "step": 9800
752
+ },
753
+ {
754
+ "epoch": 7.586206896551724,
755
+ "grad_norm": 2.7849409580230713,
756
+ "learning_rate": 4.5260536398467436e-05,
757
+ "loss": 33.6309,
758
+ "step": 9900
759
+ },
760
+ {
761
+ "epoch": 7.662835249042145,
762
+ "grad_norm": 2.7377357482910156,
763
+ "learning_rate": 4.521264367816092e-05,
764
+ "loss": 33.3621,
765
+ "step": 10000
766
+ },
767
+ {
768
+ "epoch": 7.739463601532567,
769
+ "grad_norm": 2.106233835220337,
770
+ "learning_rate": 4.516475095785441e-05,
771
+ "loss": 33.4172,
772
+ "step": 10100
773
+ },
774
+ {
775
+ "epoch": 7.816091954022989,
776
+ "grad_norm": 2.1989126205444336,
777
+ "learning_rate": 4.5116858237547896e-05,
778
+ "loss": 33.5937,
779
+ "step": 10200
780
+ },
781
+ {
782
+ "epoch": 7.89272030651341,
783
+ "grad_norm": 2.903721570968628,
784
+ "learning_rate": 4.5068965517241377e-05,
785
+ "loss": 33.7935,
786
+ "step": 10300
787
+ },
788
+ {
789
+ "epoch": 7.969348659003831,
790
+ "grad_norm": 2.061602830886841,
791
+ "learning_rate": 4.5021072796934863e-05,
792
+ "loss": 33.3289,
793
+ "step": 10400
794
+ },
795
+ {
796
+ "epoch": 8.0,
797
+ "eval_loss": 34.95075607299805,
798
+ "eval_runtime": 49.3237,
799
+ "eval_samples_per_second": 26.458,
800
+ "eval_steps_per_second": 3.325,
801
+ "step": 10440
802
  }
803
  ],
804
  "logging_steps": 100,
 
813
  "early_stopping_threshold": 0.0
814
  },
815
  "attributes": {
816
+ "early_stopping_patience_counter": 1
817
  }
818
  },
819
  "TrainerControl": {
 
827
  "attributes": {}
828
  }
829
  },
830
+ "total_flos": 1.1258487224377344e+16,
831
  "train_batch_size": 8,
832
  "trial_name": null,
833
  "trial_params": null