mohammadmahdinouri commited on
Commit
b17dc67
·
verified ·
1 Parent(s): 0e8e5e5

Training in progress, step 83000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fdea62ab3807d5c83f086f5151ea04cbf9ee9578a7b8e17883bf2d371b73c59
3
  size 304481530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86867d7114034c5ce9cc9d029da201dff42a389f63d4b662bb9a3aaa72d02379
3
  size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a88a14f277e372edcbef1004c41517572ec49368da6f457c37072a723ef15a5b
3
  size 402029570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81e0d480b36bf30d291b97e091e18788bf233399a4446897865328f68d72beb6
3
  size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e1adfa857ebdc86ec7fd943675fc57102e813288b2aafc927551a884f4b79c6
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d7f9758da2d3e14d7f42182479d86315138a7b7b34199b33bbe616fd250fd1
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57a30f5d80cfa3dad198a9cbb5668d7cb89aba9aa41f1f44032001d4e5f9fed3
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b91440a4692b5f169135f333404f68fc858d96847193631d33bcd1a9bc277a1e
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50a1704d8cd0ae1d1da5487260fd6a9d83621f0daf881048aad7559ce485f0af
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07ba5f63f4711c8ab404d662c7cb13d35ae312e00a001da6fffa61922c3b4f44
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:512ff917732395bd5049da89bf880b825c8d71316cd69e7277929f7763966d5b
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e5adf39b25c897bc63e4019ed90698924cec0f2e7d40940eb00ab5a52f2cef4
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13c457672f739f0501828eb93166275a4b9a832449b61ea1951a9a198cb7e4de
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e6939fa0bb635077cb363f18f1780aac2e900315e1171d36449035e74f63bb8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.12146780510638802,
6
  "eval_steps": 500,
7
- "global_step": 82000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -28708,6 +28708,356 @@
28708
  "learning_rate": 0.0004798740867909171,
28709
  "loss": 16.2152,
28710
  "step": 82000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28711
  }
28712
  ],
28713
  "logging_steps": 20,
@@ -28727,7 +29077,7 @@
28727
  "attributes": {}
28728
  }
28729
  },
28730
- "total_flos": 6.02907206476586e+19,
28731
  "train_batch_size": 48,
28732
  "trial_name": null,
28733
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.12294911980280739,
6
  "eval_steps": 500,
7
+ "global_step": 83000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
28708
  "learning_rate": 0.0004798740867909171,
28709
  "loss": 16.2152,
28710
  "step": 82000
28711
+ },
28712
+ {
28713
+ "epoch": 0.12149743140031641,
28714
+ "grad_norm": 6.03125,
28715
+ "learning_rate": 0.00047986914785590955,
28716
+ "loss": 16.2106,
28717
+ "step": 82020
28718
+ },
28719
+ {
28720
+ "epoch": 0.1215270576942448,
28721
+ "grad_norm": 6.4375,
28722
+ "learning_rate": 0.000479864208920902,
28723
+ "loss": 16.2421,
28724
+ "step": 82040
28725
+ },
28726
+ {
28727
+ "epoch": 0.12155668398817318,
28728
+ "grad_norm": 6.6875,
28729
+ "learning_rate": 0.0004798592699858944,
28730
+ "loss": 16.2096,
28731
+ "step": 82060
28732
+ },
28733
+ {
28734
+ "epoch": 0.12158631028210157,
28735
+ "grad_norm": 6.40625,
28736
+ "learning_rate": 0.00047985433105088683,
28737
+ "loss": 16.2556,
28738
+ "step": 82080
28739
+ },
28740
+ {
28741
+ "epoch": 0.12161593657602995,
28742
+ "grad_norm": 6.75,
28743
+ "learning_rate": 0.0004798493921158793,
28744
+ "loss": 16.183,
28745
+ "step": 82100
28746
+ },
28747
+ {
28748
+ "epoch": 0.12164556286995834,
28749
+ "grad_norm": 6.21875,
28750
+ "learning_rate": 0.00047984445318087173,
28751
+ "loss": 16.232,
28752
+ "step": 82120
28753
+ },
28754
+ {
28755
+ "epoch": 0.12167518916388673,
28756
+ "grad_norm": 7.3125,
28757
+ "learning_rate": 0.0004798395142458641,
28758
+ "loss": 16.2333,
28759
+ "step": 82140
28760
+ },
28761
+ {
28762
+ "epoch": 0.12170481545781513,
28763
+ "grad_norm": 6.15625,
28764
+ "learning_rate": 0.00047983457531085657,
28765
+ "loss": 16.1709,
28766
+ "step": 82160
28767
+ },
28768
+ {
28769
+ "epoch": 0.12173444175174351,
28770
+ "grad_norm": 6.96875,
28771
+ "learning_rate": 0.000479829636375849,
28772
+ "loss": 16.243,
28773
+ "step": 82180
28774
+ },
28775
+ {
28776
+ "epoch": 0.1217640680456719,
28777
+ "grad_norm": 6.40625,
28778
+ "learning_rate": 0.00047982469744084147,
28779
+ "loss": 16.2207,
28780
+ "step": 82200
28781
+ },
28782
+ {
28783
+ "epoch": 0.12179369433960029,
28784
+ "grad_norm": 6.90625,
28785
+ "learning_rate": 0.00047981975850583386,
28786
+ "loss": 16.2444,
28787
+ "step": 82220
28788
+ },
28789
+ {
28790
+ "epoch": 0.12182332063352867,
28791
+ "grad_norm": 6.65625,
28792
+ "learning_rate": 0.0004798148195708263,
28793
+ "loss": 16.1667,
28794
+ "step": 82240
28795
+ },
28796
+ {
28797
+ "epoch": 0.12185294692745706,
28798
+ "grad_norm": 6.53125,
28799
+ "learning_rate": 0.00047980988063581876,
28800
+ "loss": 16.1988,
28801
+ "step": 82260
28802
+ },
28803
+ {
28804
+ "epoch": 0.12188257322138545,
28805
+ "grad_norm": 6.59375,
28806
+ "learning_rate": 0.0004798049417008112,
28807
+ "loss": 16.216,
28808
+ "step": 82280
28809
+ },
28810
+ {
28811
+ "epoch": 0.12191219951531383,
28812
+ "grad_norm": 7.34375,
28813
+ "learning_rate": 0.0004798000027658036,
28814
+ "loss": 16.1921,
28815
+ "step": 82300
28816
+ },
28817
+ {
28818
+ "epoch": 0.12194182580924222,
28819
+ "grad_norm": 6.65625,
28820
+ "learning_rate": 0.0004797950638307961,
28821
+ "loss": 16.2033,
28822
+ "step": 82320
28823
+ },
28824
+ {
28825
+ "epoch": 0.1219714521031706,
28826
+ "grad_norm": 5.875,
28827
+ "learning_rate": 0.0004797901248957885,
28828
+ "loss": 16.1843,
28829
+ "step": 82340
28830
+ },
28831
+ {
28832
+ "epoch": 0.12200107839709899,
28833
+ "grad_norm": 6.5625,
28834
+ "learning_rate": 0.0004797851859607809,
28835
+ "loss": 16.1888,
28836
+ "step": 82360
28837
+ },
28838
+ {
28839
+ "epoch": 0.12203070469102738,
28840
+ "grad_norm": 5.6875,
28841
+ "learning_rate": 0.00047978024702577334,
28842
+ "loss": 16.1824,
28843
+ "step": 82380
28844
+ },
28845
+ {
28846
+ "epoch": 0.12206033098495576,
28847
+ "grad_norm": 6.125,
28848
+ "learning_rate": 0.0004797753080907658,
28849
+ "loss": 16.1561,
28850
+ "step": 82400
28851
+ },
28852
+ {
28853
+ "epoch": 0.12208995727888415,
28854
+ "grad_norm": 7.25,
28855
+ "learning_rate": 0.00047977036915575823,
28856
+ "loss": 16.2548,
28857
+ "step": 82420
28858
+ },
28859
+ {
28860
+ "epoch": 0.12211958357281254,
28861
+ "grad_norm": 8.0625,
28862
+ "learning_rate": 0.0004797654302207506,
28863
+ "loss": 16.2019,
28864
+ "step": 82440
28865
+ },
28866
+ {
28867
+ "epoch": 0.12214920986674092,
28868
+ "grad_norm": 7.1875,
28869
+ "learning_rate": 0.00047976049128574307,
28870
+ "loss": 16.1525,
28871
+ "step": 82460
28872
+ },
28873
+ {
28874
+ "epoch": 0.12217883616066932,
28875
+ "grad_norm": 7.03125,
28876
+ "learning_rate": 0.0004797555523507355,
28877
+ "loss": 16.2534,
28878
+ "step": 82480
28879
+ },
28880
+ {
28881
+ "epoch": 0.12220846245459771,
28882
+ "grad_norm": 6.9375,
28883
+ "learning_rate": 0.00047975061341572797,
28884
+ "loss": 16.1858,
28885
+ "step": 82500
28886
+ },
28887
+ {
28888
+ "epoch": 0.1222380887485261,
28889
+ "grad_norm": 6.6875,
28890
+ "learning_rate": 0.00047974567448072036,
28891
+ "loss": 16.1439,
28892
+ "step": 82520
28893
+ },
28894
+ {
28895
+ "epoch": 0.12226771504245448,
28896
+ "grad_norm": 6.4375,
28897
+ "learning_rate": 0.0004797407355457128,
28898
+ "loss": 16.2245,
28899
+ "step": 82540
28900
+ },
28901
+ {
28902
+ "epoch": 0.12229734133638287,
28903
+ "grad_norm": 6.46875,
28904
+ "learning_rate": 0.00047973579661070526,
28905
+ "loss": 16.1173,
28906
+ "step": 82560
28907
+ },
28908
+ {
28909
+ "epoch": 0.12232696763031126,
28910
+ "grad_norm": 6.96875,
28911
+ "learning_rate": 0.0004797308576756977,
28912
+ "loss": 16.1917,
28913
+ "step": 82580
28914
+ },
28915
+ {
28916
+ "epoch": 0.12235659392423964,
28917
+ "grad_norm": 6.46875,
28918
+ "learning_rate": 0.0004797259187406901,
28919
+ "loss": 16.2421,
28920
+ "step": 82600
28921
+ },
28922
+ {
28923
+ "epoch": 0.12238622021816803,
28924
+ "grad_norm": 6.875,
28925
+ "learning_rate": 0.0004797209798056826,
28926
+ "loss": 16.2,
28927
+ "step": 82620
28928
+ },
28929
+ {
28930
+ "epoch": 0.12241584651209642,
28931
+ "grad_norm": 6.28125,
28932
+ "learning_rate": 0.000479716040870675,
28933
+ "loss": 16.2038,
28934
+ "step": 82640
28935
+ },
28936
+ {
28937
+ "epoch": 0.1224454728060248,
28938
+ "grad_norm": 5.96875,
28939
+ "learning_rate": 0.00047971110193566744,
28940
+ "loss": 16.2384,
28941
+ "step": 82660
28942
+ },
28943
+ {
28944
+ "epoch": 0.12247509909995319,
28945
+ "grad_norm": 6.1875,
28946
+ "learning_rate": 0.00047970616300065984,
28947
+ "loss": 16.2127,
28948
+ "step": 82680
28949
+ },
28950
+ {
28951
+ "epoch": 0.12250472539388158,
28952
+ "grad_norm": 6.28125,
28953
+ "learning_rate": 0.0004797012240656523,
28954
+ "loss": 16.235,
28955
+ "step": 82700
28956
+ },
28957
+ {
28958
+ "epoch": 0.12253435168780996,
28959
+ "grad_norm": 6.125,
28960
+ "learning_rate": 0.00047969628513064473,
28961
+ "loss": 16.1783,
28962
+ "step": 82720
28963
+ },
28964
+ {
28965
+ "epoch": 0.12256397798173835,
28966
+ "grad_norm": 7.28125,
28967
+ "learning_rate": 0.0004796913461956371,
28968
+ "loss": 16.227,
28969
+ "step": 82740
28970
+ },
28971
+ {
28972
+ "epoch": 0.12259360427566673,
28973
+ "grad_norm": 6.15625,
28974
+ "learning_rate": 0.00047968640726062957,
28975
+ "loss": 16.2354,
28976
+ "step": 82760
28977
+ },
28978
+ {
28979
+ "epoch": 0.12262323056959512,
28980
+ "grad_norm": 6.125,
28981
+ "learning_rate": 0.000479681468325622,
28982
+ "loss": 16.1881,
28983
+ "step": 82780
28984
+ },
28985
+ {
28986
+ "epoch": 0.12265285686352352,
28987
+ "grad_norm": 6.375,
28988
+ "learning_rate": 0.00047967652939061447,
28989
+ "loss": 16.2082,
28990
+ "step": 82800
28991
+ },
28992
+ {
28993
+ "epoch": 0.12268248315745191,
28994
+ "grad_norm": 6.375,
28995
+ "learning_rate": 0.00047967159045560686,
28996
+ "loss": 16.1181,
28997
+ "step": 82820
28998
+ },
28999
+ {
29000
+ "epoch": 0.1227121094513803,
29001
+ "grad_norm": 6.59375,
29002
+ "learning_rate": 0.0004796666515205993,
29003
+ "loss": 16.1534,
29004
+ "step": 82840
29005
+ },
29006
+ {
29007
+ "epoch": 0.12274173574530868,
29008
+ "grad_norm": 6.4375,
29009
+ "learning_rate": 0.00047966171258559176,
29010
+ "loss": 16.1316,
29011
+ "step": 82860
29012
+ },
29013
+ {
29014
+ "epoch": 0.12277136203923707,
29015
+ "grad_norm": 6.5,
29016
+ "learning_rate": 0.0004796567736505842,
29017
+ "loss": 16.1864,
29018
+ "step": 82880
29019
+ },
29020
+ {
29021
+ "epoch": 0.12280098833316545,
29022
+ "grad_norm": 7.15625,
29023
+ "learning_rate": 0.0004796518347155766,
29024
+ "loss": 16.1988,
29025
+ "step": 82900
29026
+ },
29027
+ {
29028
+ "epoch": 0.12283061462709384,
29029
+ "grad_norm": 6.5625,
29030
+ "learning_rate": 0.0004796468957805691,
29031
+ "loss": 16.2569,
29032
+ "step": 82920
29033
+ },
29034
+ {
29035
+ "epoch": 0.12286024092102223,
29036
+ "grad_norm": 6.40625,
29037
+ "learning_rate": 0.0004796419568455615,
29038
+ "loss": 16.1792,
29039
+ "step": 82940
29040
+ },
29041
+ {
29042
+ "epoch": 0.12288986721495061,
29043
+ "grad_norm": 6.53125,
29044
+ "learning_rate": 0.00047963701791055394,
29045
+ "loss": 16.2019,
29046
+ "step": 82960
29047
+ },
29048
+ {
29049
+ "epoch": 0.122919493508879,
29050
+ "grad_norm": 6.6875,
29051
+ "learning_rate": 0.00047963207897554634,
29052
+ "loss": 16.1815,
29053
+ "step": 82980
29054
+ },
29055
+ {
29056
+ "epoch": 0.12294911980280739,
29057
+ "grad_norm": 6.75,
29058
+ "learning_rate": 0.00047962714004053884,
29059
+ "loss": 16.2309,
29060
+ "step": 83000
29061
  }
29062
  ],
29063
  "logging_steps": 20,
 
29077
  "attributes": {}
29078
  }
29079
  },
29080
+ "total_flos": 6.102608488091877e+19,
29081
  "train_batch_size": 48,
29082
  "trial_name": null,
29083
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc3f551404b0d7edd833494ee70d9c95a722ebd26deaead78190bce345559dbd
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78e73c5569e6c1326aedcb241444fa9deb29154b44bf64880d75f7e6d9e90132
3
  size 5432