irishprancer commited on
Commit
f2f0c05
·
verified ·
1 Parent(s): 9dd7b4e

Training in progress, step 4950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73320e76344a133855f71d58f5599b5bab5bec21149e32ab22ea8639c81b6efa
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f80ba914040554f1d90e5449fb288b4da5b6e06b0d76aa98ce7dddde9700b42e
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df0f0d4523a1748c8d67b29c00bcdc95ba64d1d0d15e03f3aaf492af944d8a42
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b5a85cf2dc26270c152d94b20c59b9328d1c1900e3688d967f5caa95b453a21
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:790c395b0a5037d7be1e3d6aec472475bfb03f273b97c2103b3eaeeee641cbe6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db84a3afeb4f95e4ac798b091171126770595698eb1a4e09a848baf2239c44e8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0301af88bd95ce6b3924705dcc39f92acccc19dd6a0525d5021e46ffe9ebde47
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fad51fab4121c7172f2200154af188e4ded1ceaffc87bf0a38752fa2cf99398
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 208.69565217391303,
5
  "eval_steps": 150,
6
- "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4647,6 +4647,151 @@
4647
  "EMA_steps_per_second": 21.511,
4648
  "epoch": 208.69565217391303,
4649
  "step": 4800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4650
  }
4651
  ],
4652
  "logging_steps": 10,
@@ -4666,7 +4811,7 @@
4666
  "attributes": {}
4667
  }
4668
  },
4669
- "total_flos": 1.2352177659543552e+17,
4670
  "train_batch_size": 4,
4671
  "trial_name": null,
4672
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 215.2173913043478,
5
  "eval_steps": 150,
6
+ "global_step": 4950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4647
  "EMA_steps_per_second": 21.511,
4648
  "epoch": 208.69565217391303,
4649
  "step": 4800
4650
+ },
4651
+ {
4652
+ "epoch": 209.1304347826087,
4653
+ "grad_norm": 2.1667871475219727,
4654
+ "learning_rate": 1.5295805741494488e-06,
4655
+ "loss": 0.222,
4656
+ "step": 4810
4657
+ },
4658
+ {
4659
+ "epoch": 209.56521739130434,
4660
+ "grad_norm": 1.5375043153762817,
4661
+ "learning_rate": 1.5295649612312986e-06,
4662
+ "loss": 0.2039,
4663
+ "step": 4820
4664
+ },
4665
+ {
4666
+ "epoch": 210.0,
4667
+ "grad_norm": 4.258355617523193,
4668
+ "learning_rate": 1.5295490452590467e-06,
4669
+ "loss": 0.2327,
4670
+ "step": 4830
4671
+ },
4672
+ {
4673
+ "epoch": 210.43478260869566,
4674
+ "grad_norm": 3.2269175052642822,
4675
+ "learning_rate": 1.5295328262390007e-06,
4676
+ "loss": 0.2222,
4677
+ "step": 4840
4678
+ },
4679
+ {
4680
+ "epoch": 210.8695652173913,
4681
+ "grad_norm": 1.8290541172027588,
4682
+ "learning_rate": 1.52951630417759e-06,
4683
+ "loss": 0.2446,
4684
+ "step": 4850
4685
+ },
4686
+ {
4687
+ "epoch": 211.30434782608697,
4688
+ "grad_norm": 1.5938493013381958,
4689
+ "learning_rate": 1.529499479081363e-06,
4690
+ "loss": 0.2348,
4691
+ "step": 4860
4692
+ },
4693
+ {
4694
+ "epoch": 211.7391304347826,
4695
+ "grad_norm": 3.224069118499756,
4696
+ "learning_rate": 1.529482350956989e-06,
4697
+ "loss": 0.2,
4698
+ "step": 4870
4699
+ },
4700
+ {
4701
+ "epoch": 212.17391304347825,
4702
+ "grad_norm": 1.7785706520080566,
4703
+ "learning_rate": 1.5294649198112566e-06,
4704
+ "loss": 0.2512,
4705
+ "step": 4880
4706
+ },
4707
+ {
4708
+ "epoch": 212.6086956521739,
4709
+ "grad_norm": 1.4701004028320312,
4710
+ "learning_rate": 1.5294471856510751e-06,
4711
+ "loss": 0.2113,
4712
+ "step": 4890
4713
+ },
4714
+ {
4715
+ "epoch": 213.04347826086956,
4716
+ "grad_norm": 1.8032009601593018,
4717
+ "learning_rate": 1.5294291484834741e-06,
4718
+ "loss": 0.2194,
4719
+ "step": 4900
4720
+ },
4721
+ {
4722
+ "epoch": 213.47826086956522,
4723
+ "grad_norm": 1.9518096446990967,
4724
+ "learning_rate": 1.5294108083156023e-06,
4725
+ "loss": 0.2541,
4726
+ "step": 4910
4727
+ },
4728
+ {
4729
+ "epoch": 213.91304347826087,
4730
+ "grad_norm": 1.6218771934509277,
4731
+ "learning_rate": 1.5293921651547298e-06,
4732
+ "loss": 0.2186,
4733
+ "step": 4920
4734
+ },
4735
+ {
4736
+ "epoch": 214.34782608695653,
4737
+ "grad_norm": 2.399592399597168,
4738
+ "learning_rate": 1.529373219008246e-06,
4739
+ "loss": 0.2156,
4740
+ "step": 4930
4741
+ },
4742
+ {
4743
+ "epoch": 214.7826086956522,
4744
+ "grad_norm": 1.714321494102478,
4745
+ "learning_rate": 1.52935396988366e-06,
4746
+ "loss": 0.2162,
4747
+ "step": 4940
4748
+ },
4749
+ {
4750
+ "epoch": 215.2173913043478,
4751
+ "grad_norm": 2.624946355819702,
4752
+ "learning_rate": 1.5293344177886024e-06,
4753
+ "loss": 0.1922,
4754
+ "step": 4950
4755
+ },
4756
+ {
4757
+ "epoch": 215.2173913043478,
4758
+ "eval_loss": 0.9804704785346985,
4759
+ "eval_runtime": 0.404,
4760
+ "eval_samples_per_second": 24.75,
4761
+ "eval_steps_per_second": 24.75,
4762
+ "step": 4950
4763
+ },
4764
+ {
4765
+ "Start_State_loss": 0.8609819412231445,
4766
+ "Start_State_runtime": 0.3989,
4767
+ "Start_State_samples_per_second": 25.072,
4768
+ "Start_State_steps_per_second": 25.072,
4769
+ "epoch": 215.2173913043478,
4770
+ "step": 4950
4771
+ },
4772
+ {
4773
+ "Raw_Model_loss": 0.9804704785346985,
4774
+ "Raw_Model_runtime": 0.402,
4775
+ "Raw_Model_samples_per_second": 24.873,
4776
+ "Raw_Model_steps_per_second": 24.873,
4777
+ "epoch": 215.2173913043478,
4778
+ "step": 4950
4779
+ },
4780
+ {
4781
+ "SWA_loss": 0.8207080960273743,
4782
+ "SWA_runtime": 0.3968,
4783
+ "SWA_samples_per_second": 25.201,
4784
+ "SWA_steps_per_second": 25.201,
4785
+ "epoch": 215.2173913043478,
4786
+ "step": 4950
4787
+ },
4788
+ {
4789
+ "EMA_loss": 0.8598450422286987,
4790
+ "EMA_runtime": 0.4316,
4791
+ "EMA_samples_per_second": 23.17,
4792
+ "EMA_steps_per_second": 23.17,
4793
+ "epoch": 215.2173913043478,
4794
+ "step": 4950
4795
  }
4796
  ],
4797
  "logging_steps": 10,
 
4811
  "attributes": {}
4812
  }
4813
  },
4814
+ "total_flos": 1.2725269073480909e+17,
4815
  "train_batch_size": 4,
4816
  "trial_name": null,
4817
  "trial_params": null