NairaRahim commited on
Commit
f611722
·
verified ·
1 Parent(s): e03d80a

Training in progress, epoch 9, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:baf95e715d14feeecbf6a32137141a40e7c6ebb37fd4ced245a4b228e0e760c6
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43bc4ea709fdacfcad21a864c7ef0120d87fb2ee3ef66e8da659ffa444583421
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57865d434d83d8a261c59663a9b74528ff7f76e6caeaff8ebd9b87b92d21935b
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e223992d46bbe9bb512da1c3a6b7b0881dd041a1208b6f386d718e1eec40e519
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eae9ed662108a7f0520e7e0fb77f777896ccf85b0df0ce63cdb5500ab80485e4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e454a17a07c6bd0c2c4504c81830e5f67d059b3b8b8072bf0bd70406421b40e4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dce0295becb66b2a3531818b76729e598e30825db8f70c059ee3261485288885
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8666902435ebb9a3dd0fa595302755da0514dc6e613fbd4ffe9dc07747cb90f1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 34.818748474121094,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-9135",
4
- "epoch": 8.0,
5
  "eval_steps": 500,
6
- "global_step": 10440,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -799,6 +799,105 @@
799
  "eval_samples_per_second": 26.458,
800
  "eval_steps_per_second": 3.325,
801
  "step": 10440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
802
  }
803
  ],
804
  "logging_steps": 100,
@@ -813,7 +912,7 @@
813
  "early_stopping_threshold": 0.0
814
  },
815
  "attributes": {
816
- "early_stopping_patience_counter": 1
817
  }
818
  },
819
  "TrainerControl": {
@@ -827,7 +926,7 @@
827
  "attributes": {}
828
  }
829
  },
830
- "total_flos": 1.1258487224377344e+16,
831
  "train_batch_size": 8,
832
  "trial_name": null,
833
  "trial_params": null
 
1
  {
2
+ "best_metric": 34.75983428955078,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-11745",
4
+ "epoch": 9.0,
5
  "eval_steps": 500,
6
+ "global_step": 11745,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
799
  "eval_samples_per_second": 26.458,
800
  "eval_steps_per_second": 3.325,
801
  "step": 10440
802
+ },
803
+ {
804
+ "epoch": 8.045977011494253,
805
+ "grad_norm": 1.8656938076019287,
806
+ "learning_rate": 4.497318007662836e-05,
807
+ "loss": 33.8404,
808
+ "step": 10500
809
+ },
810
+ {
811
+ "epoch": 8.122605363984674,
812
+ "grad_norm": 2.783926486968994,
813
+ "learning_rate": 4.4925287356321844e-05,
814
+ "loss": 33.9544,
815
+ "step": 10600
816
+ },
817
+ {
818
+ "epoch": 8.199233716475096,
819
+ "grad_norm": 2.175081968307495,
820
+ "learning_rate": 4.487739463601533e-05,
821
+ "loss": 33.6405,
822
+ "step": 10700
823
+ },
824
+ {
825
+ "epoch": 8.275862068965518,
826
+ "grad_norm": 4.121524333953857,
827
+ "learning_rate": 4.482950191570882e-05,
828
+ "loss": 33.568,
829
+ "step": 10800
830
+ },
831
+ {
832
+ "epoch": 8.352490421455938,
833
+ "grad_norm": 3.978410482406616,
834
+ "learning_rate": 4.4781609195402305e-05,
835
+ "loss": 33.6659,
836
+ "step": 10900
837
+ },
838
+ {
839
+ "epoch": 8.42911877394636,
840
+ "grad_norm": 3.0454840660095215,
841
+ "learning_rate": 4.473419540229885e-05,
842
+ "loss": 33.2689,
843
+ "step": 11000
844
+ },
845
+ {
846
+ "epoch": 8.505747126436782,
847
+ "grad_norm": 3.169114828109741,
848
+ "learning_rate": 4.4686302681992336e-05,
849
+ "loss": 33.6227,
850
+ "step": 11100
851
+ },
852
+ {
853
+ "epoch": 8.582375478927203,
854
+ "grad_norm": 2.5880959033966064,
855
+ "learning_rate": 4.463840996168582e-05,
856
+ "loss": 33.3022,
857
+ "step": 11200
858
+ },
859
+ {
860
+ "epoch": 8.659003831417625,
861
+ "grad_norm": 2.1367762088775635,
862
+ "learning_rate": 4.459051724137932e-05,
863
+ "loss": 33.2851,
864
+ "step": 11300
865
+ },
866
+ {
867
+ "epoch": 8.735632183908045,
868
+ "grad_norm": 3.0278782844543457,
869
+ "learning_rate": 4.4542624521072804e-05,
870
+ "loss": 33.922,
871
+ "step": 11400
872
+ },
873
+ {
874
+ "epoch": 8.812260536398467,
875
+ "grad_norm": 2.6361653804779053,
876
+ "learning_rate": 4.4494731800766284e-05,
877
+ "loss": 33.1482,
878
+ "step": 11500
879
+ },
880
+ {
881
+ "epoch": 8.88888888888889,
882
+ "grad_norm": 2.7836809158325195,
883
+ "learning_rate": 4.444683908045977e-05,
884
+ "loss": 34.1345,
885
+ "step": 11600
886
+ },
887
+ {
888
+ "epoch": 8.96551724137931,
889
+ "grad_norm": 2.519681453704834,
890
+ "learning_rate": 4.439894636015326e-05,
891
+ "loss": 34.0642,
892
+ "step": 11700
893
+ },
894
+ {
895
+ "epoch": 9.0,
896
+ "eval_loss": 34.75983428955078,
897
+ "eval_runtime": 49.3463,
898
+ "eval_samples_per_second": 26.446,
899
+ "eval_steps_per_second": 3.323,
900
+ "step": 11745
901
  }
902
  ],
903
  "logging_steps": 100,
 
912
  "early_stopping_threshold": 0.0
913
  },
914
  "attributes": {
915
+ "early_stopping_patience_counter": 0
916
  }
917
  },
918
  "TrainerControl": {
 
926
  "attributes": {}
927
  }
928
  },
929
+ "total_flos": 1.2665798127424512e+16,
930
  "train_batch_size": 8,
931
  "trial_name": null,
932
  "trial_params": null