mohammadmahdinouri commited on
Commit
b19a023
·
verified ·
1 Parent(s): b7c5d3d

Training in progress, step 7000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:025bd3895676bb45f85e978709ee3100916e8b661a4acf118d66da1a34578c20
3
  size 487156538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a619d173a81b959c06c6819e63784a3964cf704234614a53b41a95f8c4ce423b
3
  size 487156538
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cf7ac773382c8d1aee6b04a9257745ebc6433c31bf32a35f2c28ca8a787ce38
3
  size 1059459406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc07de972c404bd9d03e65d6a0a8bb8a57f33213d57e57b51c19d276698a2990
3
  size 1059459406
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05cc36ea509ae3d9ed977bcbbb89394adc9cfa825eece71f5a8c15d91b056c25
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fde34039d7b04934a891fddf8651f7147686cc194dd14ef9c544d9f194e3db54
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7625b80d9de0d6ccbc24d3a4c5243fa59067b74a09e5adcbf41abd0b3dc345bd
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5d095629c4afecfa399dffed86284dc4231689f617f0e254b3490299c477dd5
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e54e106208b222d163a78aeaa2cd5bd8e56e84cd4e12d099c444853b53df5a7
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6404ff16418ff06858ba815c4899c94a4c015e7870eab3f1b01051d9d511b73
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3e76718220132de3d23a940da7a48146836d61db9316b76dbeaedf3227d328d
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b933704c82ebaf750aa2519cd157aa39099844e58ed4ac2bed0623c91353a70d
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab2e7c5d1b7b91b754d6d981240bbb0789c029bfc4c8a94a02e7d3189581b5fd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfcd8a09e8e46c589c8638cc20283a9b31e9d60ec45a6122361751489d45607f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.012674827305477964,
6
  "eval_steps": 500,
7
- "global_step": 6500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4558,6 +4558,356 @@
4558
  "learning_rate": 0.0004980497158287391,
4559
  "loss": 17.1964,
4560
  "step": 6500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4561
  }
4562
  ],
4563
  "logging_steps": 10,
@@ -4577,7 +4927,7 @@
4577
  "attributes": {}
4578
  }
4579
  },
4580
- "total_flos": 1.4462433847435854e+19,
4581
  "train_batch_size": 48,
4582
  "trial_name": null,
4583
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.01364981402128396,
6
  "eval_steps": 500,
7
+ "global_step": 7000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4558
  "learning_rate": 0.0004980497158287391,
4559
  "loss": 17.1964,
4560
  "step": 6500
4561
+ },
4562
+ {
4563
+ "epoch": 0.012694327039794083,
4564
+ "grad_norm": 6.15625,
4565
+ "learning_rate": 0.0004980464648132845,
4566
+ "loss": 17.1415,
4567
+ "step": 6510
4568
+ },
4569
+ {
4570
+ "epoch": 0.012713826774110202,
4571
+ "grad_norm": 8.5,
4572
+ "learning_rate": 0.0004980432137978297,
4573
+ "loss": 17.2665,
4574
+ "step": 6520
4575
+ },
4576
+ {
4577
+ "epoch": 0.012733326508426323,
4578
+ "grad_norm": 8.875,
4579
+ "learning_rate": 0.000498039962782375,
4580
+ "loss": 17.1203,
4581
+ "step": 6530
4582
+ },
4583
+ {
4584
+ "epoch": 0.012752826242742443,
4585
+ "grad_norm": 9.1875,
4586
+ "learning_rate": 0.0004980367117669203,
4587
+ "loss": 17.1298,
4588
+ "step": 6540
4589
+ },
4590
+ {
4591
+ "epoch": 0.012772325977058562,
4592
+ "grad_norm": 9.875,
4593
+ "learning_rate": 0.0004980334607514657,
4594
+ "loss": 17.1376,
4595
+ "step": 6550
4596
+ },
4597
+ {
4598
+ "epoch": 0.012791825711374683,
4599
+ "grad_norm": 8.5625,
4600
+ "learning_rate": 0.000498030209736011,
4601
+ "loss": 17.1568,
4602
+ "step": 6560
4603
+ },
4604
+ {
4605
+ "epoch": 0.012811325445690802,
4606
+ "grad_norm": 7.34375,
4607
+ "learning_rate": 0.0004980269587205563,
4608
+ "loss": 17.1515,
4609
+ "step": 6570
4610
+ },
4611
+ {
4612
+ "epoch": 0.012830825180006923,
4613
+ "grad_norm": 7.28125,
4614
+ "learning_rate": 0.0004980237077051017,
4615
+ "loss": 17.2512,
4616
+ "step": 6580
4617
+ },
4618
+ {
4619
+ "epoch": 0.012850324914323042,
4620
+ "grad_norm": 6.65625,
4621
+ "learning_rate": 0.000498020456689647,
4622
+ "loss": 17.1904,
4623
+ "step": 6590
4624
+ },
4625
+ {
4626
+ "epoch": 0.012869824648639162,
4627
+ "grad_norm": 8.75,
4628
+ "learning_rate": 0.0004980172056741923,
4629
+ "loss": 17.1924,
4630
+ "step": 6600
4631
+ },
4632
+ {
4633
+ "epoch": 0.012889324382955283,
4634
+ "grad_norm": 8.6875,
4635
+ "learning_rate": 0.0004980139546587376,
4636
+ "loss": 17.179,
4637
+ "step": 6610
4638
+ },
4639
+ {
4640
+ "epoch": 0.012908824117271402,
4641
+ "grad_norm": 6.65625,
4642
+ "learning_rate": 0.000498010703643283,
4643
+ "loss": 17.1094,
4644
+ "step": 6620
4645
+ },
4646
+ {
4647
+ "epoch": 0.012928323851587523,
4648
+ "grad_norm": 8.1875,
4649
+ "learning_rate": 0.0004980074526278283,
4650
+ "loss": 17.2043,
4651
+ "step": 6630
4652
+ },
4653
+ {
4654
+ "epoch": 0.012947823585903642,
4655
+ "grad_norm": 9.5,
4656
+ "learning_rate": 0.0004980042016123736,
4657
+ "loss": 17.3089,
4658
+ "step": 6640
4659
+ },
4660
+ {
4661
+ "epoch": 0.012967323320219762,
4662
+ "grad_norm": 7.375,
4663
+ "learning_rate": 0.000498000950596919,
4664
+ "loss": 17.1986,
4665
+ "step": 6650
4666
+ },
4667
+ {
4668
+ "epoch": 0.012986823054535883,
4669
+ "grad_norm": 6.78125,
4670
+ "learning_rate": 0.0004979976995814643,
4671
+ "loss": 17.2439,
4672
+ "step": 6660
4673
+ },
4674
+ {
4675
+ "epoch": 0.013006322788852002,
4676
+ "grad_norm": 6.8125,
4677
+ "learning_rate": 0.0004979944485660096,
4678
+ "loss": 17.2103,
4679
+ "step": 6670
4680
+ },
4681
+ {
4682
+ "epoch": 0.013025822523168121,
4683
+ "grad_norm": 7.4375,
4684
+ "learning_rate": 0.0004979911975505549,
4685
+ "loss": 17.1663,
4686
+ "step": 6680
4687
+ },
4688
+ {
4689
+ "epoch": 0.013045322257484242,
4690
+ "grad_norm": 7.15625,
4691
+ "learning_rate": 0.0004979879465351003,
4692
+ "loss": 17.2314,
4693
+ "step": 6690
4694
+ },
4695
+ {
4696
+ "epoch": 0.013064821991800361,
4697
+ "grad_norm": 7.09375,
4698
+ "learning_rate": 0.0004979846955196456,
4699
+ "loss": 17.1084,
4700
+ "step": 6700
4701
+ },
4702
+ {
4703
+ "epoch": 0.013084321726116482,
4704
+ "grad_norm": 8.1875,
4705
+ "learning_rate": 0.0004979814445041909,
4706
+ "loss": 17.2277,
4707
+ "step": 6710
4708
+ },
4709
+ {
4710
+ "epoch": 0.013103821460432602,
4711
+ "grad_norm": 6.90625,
4712
+ "learning_rate": 0.0004979781934887363,
4713
+ "loss": 17.2464,
4714
+ "step": 6720
4715
+ },
4716
+ {
4717
+ "epoch": 0.013123321194748721,
4718
+ "grad_norm": 10.0625,
4719
+ "learning_rate": 0.0004979749424732816,
4720
+ "loss": 17.1748,
4721
+ "step": 6730
4722
+ },
4723
+ {
4724
+ "epoch": 0.013142820929064842,
4725
+ "grad_norm": 10.5625,
4726
+ "learning_rate": 0.0004979716914578268,
4727
+ "loss": 17.3005,
4728
+ "step": 6740
4729
+ },
4730
+ {
4731
+ "epoch": 0.013162320663380961,
4732
+ "grad_norm": 9.75,
4733
+ "learning_rate": 0.0004979684404423721,
4734
+ "loss": 17.2685,
4735
+ "step": 6750
4736
+ },
4737
+ {
4738
+ "epoch": 0.013181820397697082,
4739
+ "grad_norm": 13.25,
4740
+ "learning_rate": 0.0004979651894269175,
4741
+ "loss": 17.1753,
4742
+ "step": 6760
4743
+ },
4744
+ {
4745
+ "epoch": 0.013201320132013201,
4746
+ "grad_norm": 11.0,
4747
+ "learning_rate": 0.0004979619384114628,
4748
+ "loss": 17.1454,
4749
+ "step": 6770
4750
+ },
4751
+ {
4752
+ "epoch": 0.01322081986632932,
4753
+ "grad_norm": 8.25,
4754
+ "learning_rate": 0.0004979586873960081,
4755
+ "loss": 17.0365,
4756
+ "step": 6780
4757
+ },
4758
+ {
4759
+ "epoch": 0.013240319600645442,
4760
+ "grad_norm": 7.0,
4761
+ "learning_rate": 0.0004979554363805534,
4762
+ "loss": 17.2339,
4763
+ "step": 6790
4764
+ },
4765
+ {
4766
+ "epoch": 0.013259819334961561,
4767
+ "grad_norm": 13.0625,
4768
+ "learning_rate": 0.0004979521853650988,
4769
+ "loss": 17.1252,
4770
+ "step": 6800
4771
+ },
4772
+ {
4773
+ "epoch": 0.01327931906927768,
4774
+ "grad_norm": 7.53125,
4775
+ "learning_rate": 0.0004979489343496441,
4776
+ "loss": 17.1759,
4777
+ "step": 6810
4778
+ },
4779
+ {
4780
+ "epoch": 0.013298818803593801,
4781
+ "grad_norm": 6.4375,
4782
+ "learning_rate": 0.0004979456833341894,
4783
+ "loss": 17.1725,
4784
+ "step": 6820
4785
+ },
4786
+ {
4787
+ "epoch": 0.01331831853790992,
4788
+ "grad_norm": 8.1875,
4789
+ "learning_rate": 0.0004979424323187348,
4790
+ "loss": 17.1098,
4791
+ "step": 6830
4792
+ },
4793
+ {
4794
+ "epoch": 0.013337818272226042,
4795
+ "grad_norm": 7.9375,
4796
+ "learning_rate": 0.0004979391813032801,
4797
+ "loss": 17.2499,
4798
+ "step": 6840
4799
+ },
4800
+ {
4801
+ "epoch": 0.013357318006542161,
4802
+ "grad_norm": 10.75,
4803
+ "learning_rate": 0.0004979359302878254,
4804
+ "loss": 17.3004,
4805
+ "step": 6850
4806
+ },
4807
+ {
4808
+ "epoch": 0.01337681774085828,
4809
+ "grad_norm": 6.90625,
4810
+ "learning_rate": 0.0004979326792723707,
4811
+ "loss": 17.1902,
4812
+ "step": 6860
4813
+ },
4814
+ {
4815
+ "epoch": 0.013396317475174401,
4816
+ "grad_norm": 7.65625,
4817
+ "learning_rate": 0.0004979294282569161,
4818
+ "loss": 17.0263,
4819
+ "step": 6870
4820
+ },
4821
+ {
4822
+ "epoch": 0.01341581720949052,
4823
+ "grad_norm": 9.375,
4824
+ "learning_rate": 0.0004979261772414614,
4825
+ "loss": 17.2367,
4826
+ "step": 6880
4827
+ },
4828
+ {
4829
+ "epoch": 0.013435316943806641,
4830
+ "grad_norm": 7.6875,
4831
+ "learning_rate": 0.0004979229262260067,
4832
+ "loss": 17.2496,
4833
+ "step": 6890
4834
+ },
4835
+ {
4836
+ "epoch": 0.01345481667812276,
4837
+ "grad_norm": 7.0,
4838
+ "learning_rate": 0.0004979196752105521,
4839
+ "loss": 17.0661,
4840
+ "step": 6900
4841
+ },
4842
+ {
4843
+ "epoch": 0.01347431641243888,
4844
+ "grad_norm": 8.0625,
4845
+ "learning_rate": 0.0004979164241950974,
4846
+ "loss": 17.1662,
4847
+ "step": 6910
4848
+ },
4849
+ {
4850
+ "epoch": 0.013493816146755001,
4851
+ "grad_norm": 7.40625,
4852
+ "learning_rate": 0.0004979131731796427,
4853
+ "loss": 17.2432,
4854
+ "step": 6920
4855
+ },
4856
+ {
4857
+ "epoch": 0.01351331588107112,
4858
+ "grad_norm": 9.5,
4859
+ "learning_rate": 0.000497909922164188,
4860
+ "loss": 17.2159,
4861
+ "step": 6930
4862
+ },
4863
+ {
4864
+ "epoch": 0.01353281561538724,
4865
+ "grad_norm": 7.6875,
4866
+ "learning_rate": 0.0004979066711487334,
4867
+ "loss": 17.1567,
4868
+ "step": 6940
4869
+ },
4870
+ {
4871
+ "epoch": 0.01355231534970336,
4872
+ "grad_norm": 6.8125,
4873
+ "learning_rate": 0.0004979034201332787,
4874
+ "loss": 17.2054,
4875
+ "step": 6950
4876
+ },
4877
+ {
4878
+ "epoch": 0.01357181508401948,
4879
+ "grad_norm": 8.3125,
4880
+ "learning_rate": 0.000497900169117824,
4881
+ "loss": 17.1396,
4882
+ "step": 6960
4883
+ },
4884
+ {
4885
+ "epoch": 0.0135913148183356,
4886
+ "grad_norm": 7.875,
4887
+ "learning_rate": 0.0004978969181023694,
4888
+ "loss": 17.2691,
4889
+ "step": 6970
4890
+ },
4891
+ {
4892
+ "epoch": 0.01361081455265172,
4893
+ "grad_norm": 9.0,
4894
+ "learning_rate": 0.0004978936670869146,
4895
+ "loss": 17.1819,
4896
+ "step": 6980
4897
+ },
4898
+ {
4899
+ "epoch": 0.01363031428696784,
4900
+ "grad_norm": 7.75,
4901
+ "learning_rate": 0.0004978904160714599,
4902
+ "loss": 17.2082,
4903
+ "step": 6990
4904
+ },
4905
+ {
4906
+ "epoch": 0.01364981402128396,
4907
+ "grad_norm": 9.375,
4908
+ "learning_rate": 0.0004978871650560052,
4909
+ "loss": 17.2572,
4910
+ "step": 7000
4911
  }
4912
  ],
4913
  "logging_steps": 10,
 
4927
  "attributes": {}
4928
  }
4929
  },
4930
+ "total_flos": 1.557487423730588e+19,
4931
  "train_batch_size": 48,
4932
  "trial_name": null,
4933
  "trial_params": null