FredericFan commited on
Commit
430d498
·
verified ·
1 Parent(s): 334fade

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d92451ae8471118d0f4ff3201506d5a90f2bd77bc4421e8d24fa92669e67129e
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1e91e1bdef8cc188ea4d189cd5523f60cddadc3613fe79933294461a702e8a8
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1139382effd26c75058f4fa1f776505299c54924e4b6db23f70ae7f64e41724
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89290f193b6b7457d80f97bce6f0ec93a64c4a9bc3d593d8d12aeb4cde9c3c6d
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a72f59ef3242e6b962c12852e52500422626ceac4df8fb1a56d6379db9b8a98
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ea7477084e9e4ebf5f677e60167e6c2deee97d19a5a21299c6eb4ed40d31928
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94a107a53df948b9265998ab0f5b874dabd9db8a133cddea9279d7b4c6c999e9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69f50b15ae17c7257c9fed364b38e68d62581d1ae697e8e4bb273e2e41cb49e9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08653330057859421,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-4500",
4
- "epoch": 0.36,
5
  "eval_steps": 500,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -709,6 +709,84 @@
709
  "eval_samples_per_second": 22.784,
710
  "eval_steps_per_second": 5.696,
711
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
712
  }
713
  ],
714
  "logging_steps": 50,
@@ -728,7 +806,7 @@
728
  "attributes": {}
729
  }
730
  },
731
- "total_flos": 1.096124203008e+16,
732
  "train_batch_size": 4,
733
  "trial_name": null,
734
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.0860014408826828,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-5000",
4
+ "epoch": 0.4,
5
  "eval_steps": 500,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
709
  "eval_samples_per_second": 22.784,
710
  "eval_steps_per_second": 5.696,
711
  "step": 4500
712
+ },
713
+ {
714
+ "epoch": 0.364,
715
+ "grad_norm": 0.09011202305555344,
716
+ "learning_rate": 2.45424e-05,
717
+ "loss": 0.0699,
718
+ "step": 4550
719
+ },
720
+ {
721
+ "epoch": 0.368,
722
+ "grad_norm": 0.1441943347454071,
723
+ "learning_rate": 2.44824e-05,
724
+ "loss": 0.0634,
725
+ "step": 4600
726
+ },
727
+ {
728
+ "epoch": 0.372,
729
+ "grad_norm": 0.17736917734146118,
730
+ "learning_rate": 2.4422400000000002e-05,
731
+ "loss": 0.0688,
732
+ "step": 4650
733
+ },
734
+ {
735
+ "epoch": 0.376,
736
+ "grad_norm": 0.16143649816513062,
737
+ "learning_rate": 2.4362400000000002e-05,
738
+ "loss": 0.0575,
739
+ "step": 4700
740
+ },
741
+ {
742
+ "epoch": 0.38,
743
+ "grad_norm": 0.1789257675409317,
744
+ "learning_rate": 2.43024e-05,
745
+ "loss": 0.067,
746
+ "step": 4750
747
+ },
748
+ {
749
+ "epoch": 0.384,
750
+ "grad_norm": 0.13589045405387878,
751
+ "learning_rate": 2.42424e-05,
752
+ "loss": 0.0713,
753
+ "step": 4800
754
+ },
755
+ {
756
+ "epoch": 0.388,
757
+ "grad_norm": 0.12760789692401886,
758
+ "learning_rate": 2.41824e-05,
759
+ "loss": 0.0644,
760
+ "step": 4850
761
+ },
762
+ {
763
+ "epoch": 0.392,
764
+ "grad_norm": 0.17507490515708923,
765
+ "learning_rate": 2.41224e-05,
766
+ "loss": 0.0634,
767
+ "step": 4900
768
+ },
769
+ {
770
+ "epoch": 0.396,
771
+ "grad_norm": 0.15503354370594025,
772
+ "learning_rate": 2.4062400000000002e-05,
773
+ "loss": 0.0654,
774
+ "step": 4950
775
+ },
776
+ {
777
+ "epoch": 0.4,
778
+ "grad_norm": 0.12309867143630981,
779
+ "learning_rate": 2.4002400000000002e-05,
780
+ "loss": 0.0698,
781
+ "step": 5000
782
+ },
783
+ {
784
+ "epoch": 0.4,
785
+ "eval_loss": 0.0860014408826828,
786
+ "eval_runtime": 87.7287,
787
+ "eval_samples_per_second": 22.798,
788
+ "eval_steps_per_second": 5.699,
789
+ "step": 5000
790
  }
791
  ],
792
  "logging_steps": 50,
 
806
  "attributes": {}
807
  }
808
  },
809
+ "total_flos": 1.21791578112e+16,
810
  "train_batch_size": 4,
811
  "trial_name": null,
812
  "trial_params": null