irishprancer commited on
Commit
816aaac
·
verified ·
1 Parent(s): 6b83fe6

Training in progress, step 5100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f80ba914040554f1d90e5449fb288b4da5b6e06b0d76aa98ce7dddde9700b42e
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b8241d490cf7c6d73daa6e58d1e953d37a78b29a116a5819c07ede09f2a18a6
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b5a85cf2dc26270c152d94b20c59b9328d1c1900e3688d967f5caa95b453a21
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a28d22fdc04db2c35d665a1ac5cb8cd349b54d05fc00d81df4548b08481678c
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db84a3afeb4f95e4ac798b091171126770595698eb1a4e09a848baf2239c44e8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67c6f49cb50d8b09b0f2e9704dcb4986f8fc63f53d3b695322fdb8756b868c02
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fad51fab4121c7172f2200154af188e4ded1ceaffc87bf0a38752fa2cf99398
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:736f4102e80c412e5bd9ae55e7c4ee4195aa9541999b56cf808f798e57d982a7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 215.2173913043478,
5
  "eval_steps": 150,
6
- "global_step": 4950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4792,6 +4792,151 @@
4792
  "EMA_steps_per_second": 23.17,
4793
  "epoch": 215.2173913043478,
4794
  "step": 4950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4795
  }
4796
  ],
4797
  "logging_steps": 10,
@@ -4811,7 +4956,7 @@
4811
  "attributes": {}
4812
  }
4813
  },
4814
- "total_flos": 1.2725269073480909e+17,
4815
  "train_batch_size": 4,
4816
  "trial_name": null,
4817
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 221.7391304347826,
5
  "eval_steps": 150,
6
+ "global_step": 5100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4792
  "EMA_steps_per_second": 23.17,
4793
  "epoch": 215.2173913043478,
4794
  "step": 4950
4795
+ },
4796
+ {
4797
+ "epoch": 215.65217391304347,
4798
+ "grad_norm": 2.2454140186309814,
4799
+ "learning_rate": 1.5293145627308224e-06,
4800
+ "loss": 0.2412,
4801
+ "step": 4960
4802
+ },
4803
+ {
4804
+ "epoch": 216.08695652173913,
4805
+ "grad_norm": 2.0021111965179443,
4806
+ "learning_rate": 1.52929440471819e-06,
4807
+ "loss": 0.2214,
4808
+ "step": 4970
4809
+ },
4810
+ {
4811
+ "epoch": 216.52173913043478,
4812
+ "grad_norm": 1.6608623266220093,
4813
+ "learning_rate": 1.5292739437586956e-06,
4814
+ "loss": 0.215,
4815
+ "step": 4980
4816
+ },
4817
+ {
4818
+ "epoch": 216.95652173913044,
4819
+ "grad_norm": 1.7904845476150513,
4820
+ "learning_rate": 1.5292531798604489e-06,
4821
+ "loss": 0.2533,
4822
+ "step": 4990
4823
+ },
4824
+ {
4825
+ "epoch": 217.3913043478261,
4826
+ "grad_norm": 2.007638931274414,
4827
+ "learning_rate": 1.52923211303168e-06,
4828
+ "loss": 0.2257,
4829
+ "step": 5000
4830
+ },
4831
+ {
4832
+ "epoch": 217.82608695652175,
4833
+ "grad_norm": 2.0071678161621094,
4834
+ "learning_rate": 1.5292107432807391e-06,
4835
+ "loss": 0.2142,
4836
+ "step": 5010
4837
+ },
4838
+ {
4839
+ "epoch": 218.2608695652174,
4840
+ "grad_norm": 1.6132997274398804,
4841
+ "learning_rate": 1.5291890706160969e-06,
4842
+ "loss": 0.2562,
4843
+ "step": 5020
4844
+ },
4845
+ {
4846
+ "epoch": 218.69565217391303,
4847
+ "grad_norm": 1.4705387353897095,
4848
+ "learning_rate": 1.5291670950463434e-06,
4849
+ "loss": 0.2503,
4850
+ "step": 5030
4851
+ },
4852
+ {
4853
+ "epoch": 219.1304347826087,
4854
+ "grad_norm": 2.801940679550171,
4855
+ "learning_rate": 1.529144816580189e-06,
4856
+ "loss": 0.1838,
4857
+ "step": 5040
4858
+ },
4859
+ {
4860
+ "epoch": 219.56521739130434,
4861
+ "grad_norm": 2.1510982513427734,
4862
+ "learning_rate": 1.5291222352264644e-06,
4863
+ "loss": 0.2212,
4864
+ "step": 5050
4865
+ },
4866
+ {
4867
+ "epoch": 220.0,
4868
+ "grad_norm": 3.703181028366089,
4869
+ "learning_rate": 1.5290993509941199e-06,
4870
+ "loss": 0.2361,
4871
+ "step": 5060
4872
+ },
4873
+ {
4874
+ "epoch": 220.43478260869566,
4875
+ "grad_norm": 2.0578255653381348,
4876
+ "learning_rate": 1.5290761638922261e-06,
4877
+ "loss": 0.1747,
4878
+ "step": 5070
4879
+ },
4880
+ {
4881
+ "epoch": 220.8695652173913,
4882
+ "grad_norm": 2.1845853328704834,
4883
+ "learning_rate": 1.5290526739299738e-06,
4884
+ "loss": 0.2639,
4885
+ "step": 5080
4886
+ },
4887
+ {
4888
+ "epoch": 221.30434782608697,
4889
+ "grad_norm": 2.506991386413574,
4890
+ "learning_rate": 1.5290288811166734e-06,
4891
+ "loss": 0.2194,
4892
+ "step": 5090
4893
+ },
4894
+ {
4895
+ "epoch": 221.7391304347826,
4896
+ "grad_norm": 2.7278904914855957,
4897
+ "learning_rate": 1.529004785461756e-06,
4898
+ "loss": 0.2112,
4899
+ "step": 5100
4900
+ },
4901
+ {
4902
+ "epoch": 221.7391304347826,
4903
+ "eval_loss": 0.9890514612197876,
4904
+ "eval_runtime": 0.4992,
4905
+ "eval_samples_per_second": 20.032,
4906
+ "eval_steps_per_second": 20.032,
4907
+ "step": 5100
4908
+ },
4909
+ {
4910
+ "Start_State_loss": 0.8609819412231445,
4911
+ "Start_State_runtime": 0.4385,
4912
+ "Start_State_samples_per_second": 22.807,
4913
+ "Start_State_steps_per_second": 22.807,
4914
+ "epoch": 221.7391304347826,
4915
+ "step": 5100
4916
+ },
4917
+ {
4918
+ "Raw_Model_loss": 0.9890514612197876,
4919
+ "Raw_Model_runtime": 0.4364,
4920
+ "Raw_Model_samples_per_second": 22.915,
4921
+ "Raw_Model_steps_per_second": 22.915,
4922
+ "epoch": 221.7391304347826,
4923
+ "step": 5100
4924
+ },
4925
+ {
4926
+ "SWA_loss": 0.8240174055099487,
4927
+ "SWA_runtime": 0.4349,
4928
+ "SWA_samples_per_second": 22.995,
4929
+ "SWA_steps_per_second": 22.995,
4930
+ "epoch": 221.7391304347826,
4931
+ "step": 5100
4932
+ },
4933
+ {
4934
+ "EMA_loss": 0.8594372868537903,
4935
+ "EMA_runtime": 0.4421,
4936
+ "EMA_samples_per_second": 22.619,
4937
+ "EMA_steps_per_second": 22.619,
4938
+ "epoch": 221.7391304347826,
4939
+ "step": 5100
4940
  }
4941
  ],
4942
  "logging_steps": 10,
 
4956
  "attributes": {}
4957
  }
4958
  },
4959
+ "total_flos": 1.3106424730691174e+17,
4960
  "train_batch_size": 4,
4961
  "trial_name": null,
4962
  "trial_params": null