mohammadmahdinouri commited on
Commit
c05bc60
·
verified ·
1 Parent(s): c7ec0df

Training in progress, step 57000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fe736c4aa2a01e7bdd450b3f5ad17d22bd6d998c21f3be88229c094c87c7e31
3
  size 304481530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:972aa91ec388a1f2f04b57475bbe0ef1d7a488751339adb89aa78c0871d0f22b
3
  size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f13c5595cffc9acc4fba913e67571bbfa169120e968c56adede64d35dc4a9983
3
  size 402029570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f588ba0d0b39a0c0daf2cb6afacca8a7aef1f4bc72fe4409ce0b2281d2e356a
3
  size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eefd6eabe10776e158c26b037c833f0a538e87ecc5b41f3ec5b83db2ee085222
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93c5029373839975c8e2ce486239c3c93c8bcc84856a9726f25e6b39e80d4bdb
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61b622224d0429fa788320c8e5bf7b4fa226b91d5779b03ff807c7a77c5801ff
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba86940b99fa7512a6bd263e7bdaf7ba94fc8e695324bdfda4c03882f64aa78d
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13eaed908712a1c285ee1e1812b438bbabf64c8443377b65c97ba88f1f1659c5
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf8627c515e0a9fd4095a16f3cf6f960eebbddd06bd5667ffafe332a0150e802
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:511643e6249f4ea9212a1dfdf8dd72a9148b63815d4ef9de03948ab4598161f3
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55937ecae83bb1b9ebb2721682f64ea1aca1aefba9e61d245b7d516977f878f9
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0989f631c4201212ca348622ae2d095f9b6b69c39f42732c5c97cef21592c5a6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:039c09879ba9a48ef7918776fd751a67234de8e6a37518ae707982e7427ed8c9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0829536229994845,
6
  "eval_steps": 500,
7
- "global_step": 56000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -19608,6 +19608,356 @@
19608
  "learning_rate": 0.0004862947023007535,
19609
  "loss": 17.0628,
19610
  "step": 56000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19611
  }
19612
  ],
19613
  "logging_steps": 20,
@@ -19627,7 +19977,7 @@
19627
  "attributes": {}
19628
  }
19629
  },
19630
- "total_flos": 4.117128426295394e+19,
19631
  "train_batch_size": 48,
19632
  "trial_name": null,
19633
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.08443493769590386,
6
  "eval_steps": 500,
7
+ "global_step": 57000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
19608
  "learning_rate": 0.0004862947023007535,
19609
  "loss": 17.0628,
19610
  "step": 56000
19611
+ },
19612
+ {
19613
+ "epoch": 0.08298324929341289,
19614
+ "grad_norm": 6.6875,
19615
+ "learning_rate": 0.00048628976336574593,
19616
+ "loss": 17.0877,
19617
+ "step": 56020
19618
+ },
19619
+ {
19620
+ "epoch": 0.08301287558734127,
19621
+ "grad_norm": 6.78125,
19622
+ "learning_rate": 0.0004862848244307383,
19623
+ "loss": 17.0776,
19624
+ "step": 56040
19625
+ },
19626
+ {
19627
+ "epoch": 0.08304250188126966,
19628
+ "grad_norm": 8.25,
19629
+ "learning_rate": 0.0004862798854957308,
19630
+ "loss": 17.0322,
19631
+ "step": 56060
19632
+ },
19633
+ {
19634
+ "epoch": 0.08307212817519805,
19635
+ "grad_norm": 6.5,
19636
+ "learning_rate": 0.0004862749465607232,
19637
+ "loss": 17.0593,
19638
+ "step": 56080
19639
+ },
19640
+ {
19641
+ "epoch": 0.08310175446912645,
19642
+ "grad_norm": 7.0,
19643
+ "learning_rate": 0.00048627000762571567,
19644
+ "loss": 17.0977,
19645
+ "step": 56100
19646
+ },
19647
+ {
19648
+ "epoch": 0.08313138076305483,
19649
+ "grad_norm": 6.21875,
19650
+ "learning_rate": 0.00048626506869070806,
19651
+ "loss": 17.0193,
19652
+ "step": 56120
19653
+ },
19654
+ {
19655
+ "epoch": 0.08316100705698322,
19656
+ "grad_norm": 7.09375,
19657
+ "learning_rate": 0.00048626012975570057,
19658
+ "loss": 17.0925,
19659
+ "step": 56140
19660
+ },
19661
+ {
19662
+ "epoch": 0.0831906333509116,
19663
+ "grad_norm": 7.5,
19664
+ "learning_rate": 0.00048625519082069296,
19665
+ "loss": 17.04,
19666
+ "step": 56160
19667
+ },
19668
+ {
19669
+ "epoch": 0.08322025964483999,
19670
+ "grad_norm": 7.40625,
19671
+ "learning_rate": 0.0004862502518856854,
19672
+ "loss": 17.1382,
19673
+ "step": 56180
19674
+ },
19675
+ {
19676
+ "epoch": 0.08324988593876838,
19677
+ "grad_norm": 7.34375,
19678
+ "learning_rate": 0.0004862453129506778,
19679
+ "loss": 17.0622,
19680
+ "step": 56200
19681
+ },
19682
+ {
19683
+ "epoch": 0.08327951223269676,
19684
+ "grad_norm": 8.125,
19685
+ "learning_rate": 0.0004862403740156703,
19686
+ "loss": 17.0424,
19687
+ "step": 56220
19688
+ },
19689
+ {
19690
+ "epoch": 0.08330913852662515,
19691
+ "grad_norm": 6.96875,
19692
+ "learning_rate": 0.0004862354350806627,
19693
+ "loss": 17.0722,
19694
+ "step": 56240
19695
+ },
19696
+ {
19697
+ "epoch": 0.08333876482055354,
19698
+ "grad_norm": 6.9375,
19699
+ "learning_rate": 0.00048623049614565514,
19700
+ "loss": 17.036,
19701
+ "step": 56260
19702
+ },
19703
+ {
19704
+ "epoch": 0.08336839111448192,
19705
+ "grad_norm": 6.78125,
19706
+ "learning_rate": 0.00048622555721064754,
19707
+ "loss": 17.0758,
19708
+ "step": 56280
19709
+ },
19710
+ {
19711
+ "epoch": 0.08339801740841031,
19712
+ "grad_norm": 6.6875,
19713
+ "learning_rate": 0.00048622061827564004,
19714
+ "loss": 17.1354,
19715
+ "step": 56300
19716
+ },
19717
+ {
19718
+ "epoch": 0.0834276437023387,
19719
+ "grad_norm": 6.9375,
19720
+ "learning_rate": 0.00048621567934063243,
19721
+ "loss": 17.1577,
19722
+ "step": 56320
19723
+ },
19724
+ {
19725
+ "epoch": 0.08345726999626708,
19726
+ "grad_norm": 6.46875,
19727
+ "learning_rate": 0.0004862107404056248,
19728
+ "loss": 17.0566,
19729
+ "step": 56340
19730
+ },
19731
+ {
19732
+ "epoch": 0.08348689629019547,
19733
+ "grad_norm": 7.40625,
19734
+ "learning_rate": 0.0004862058014706173,
19735
+ "loss": 17.0633,
19736
+ "step": 56360
19737
+ },
19738
+ {
19739
+ "epoch": 0.08351652258412386,
19740
+ "grad_norm": 7.0,
19741
+ "learning_rate": 0.0004862008625356097,
19742
+ "loss": 17.0427,
19743
+ "step": 56380
19744
+ },
19745
+ {
19746
+ "epoch": 0.08354614887805224,
19747
+ "grad_norm": 6.875,
19748
+ "learning_rate": 0.00048619592360060217,
19749
+ "loss": 17.068,
19750
+ "step": 56400
19751
+ },
19752
+ {
19753
+ "epoch": 0.08357577517198064,
19754
+ "grad_norm": 7.71875,
19755
+ "learning_rate": 0.00048619098466559456,
19756
+ "loss": 17.0289,
19757
+ "step": 56420
19758
+ },
19759
+ {
19760
+ "epoch": 0.08360540146590903,
19761
+ "grad_norm": 7.21875,
19762
+ "learning_rate": 0.00048618604573058707,
19763
+ "loss": 17.0595,
19764
+ "step": 56440
19765
+ },
19766
+ {
19767
+ "epoch": 0.08363502775983742,
19768
+ "grad_norm": 6.84375,
19769
+ "learning_rate": 0.00048618110679557946,
19770
+ "loss": 17.0464,
19771
+ "step": 56460
19772
+ },
19773
+ {
19774
+ "epoch": 0.0836646540537658,
19775
+ "grad_norm": 6.875,
19776
+ "learning_rate": 0.0004861761678605719,
19777
+ "loss": 17.0377,
19778
+ "step": 56480
19779
+ },
19780
+ {
19781
+ "epoch": 0.08369428034769419,
19782
+ "grad_norm": 6.90625,
19783
+ "learning_rate": 0.0004861712289255643,
19784
+ "loss": 17.1083,
19785
+ "step": 56500
19786
+ },
19787
+ {
19788
+ "epoch": 0.08372390664162258,
19789
+ "grad_norm": 6.4375,
19790
+ "learning_rate": 0.0004861662899905568,
19791
+ "loss": 17.0156,
19792
+ "step": 56520
19793
+ },
19794
+ {
19795
+ "epoch": 0.08375353293555096,
19796
+ "grad_norm": 7.21875,
19797
+ "learning_rate": 0.0004861613510555492,
19798
+ "loss": 17.1009,
19799
+ "step": 56540
19800
+ },
19801
+ {
19802
+ "epoch": 0.08378315922947935,
19803
+ "grad_norm": 7.09375,
19804
+ "learning_rate": 0.00048615641212054164,
19805
+ "loss": 17.0541,
19806
+ "step": 56560
19807
+ },
19808
+ {
19809
+ "epoch": 0.08381278552340773,
19810
+ "grad_norm": 6.875,
19811
+ "learning_rate": 0.00048615147318553404,
19812
+ "loss": 17.0052,
19813
+ "step": 56580
19814
+ },
19815
+ {
19816
+ "epoch": 0.08384241181733612,
19817
+ "grad_norm": 6.78125,
19818
+ "learning_rate": 0.00048614653425052654,
19819
+ "loss": 17.0689,
19820
+ "step": 56600
19821
+ },
19822
+ {
19823
+ "epoch": 0.08387203811126451,
19824
+ "grad_norm": 6.65625,
19825
+ "learning_rate": 0.00048614159531551893,
19826
+ "loss": 17.0825,
19827
+ "step": 56620
19828
+ },
19829
+ {
19830
+ "epoch": 0.0839016644051929,
19831
+ "grad_norm": 6.90625,
19832
+ "learning_rate": 0.0004861366563805114,
19833
+ "loss": 17.0745,
19834
+ "step": 56640
19835
+ },
19836
+ {
19837
+ "epoch": 0.08393129069912128,
19838
+ "grad_norm": 6.375,
19839
+ "learning_rate": 0.0004861317174455038,
19840
+ "loss": 17.0943,
19841
+ "step": 56660
19842
+ },
19843
+ {
19844
+ "epoch": 0.08396091699304967,
19845
+ "grad_norm": 7.28125,
19846
+ "learning_rate": 0.0004861267785104962,
19847
+ "loss": 16.9957,
19848
+ "step": 56680
19849
+ },
19850
+ {
19851
+ "epoch": 0.08399054328697805,
19852
+ "grad_norm": 7.15625,
19853
+ "learning_rate": 0.00048612183957548867,
19854
+ "loss": 17.0411,
19855
+ "step": 56700
19856
+ },
19857
+ {
19858
+ "epoch": 0.08402016958090644,
19859
+ "grad_norm": 7.25,
19860
+ "learning_rate": 0.00048611690064048106,
19861
+ "loss": 17.0705,
19862
+ "step": 56720
19863
+ },
19864
+ {
19865
+ "epoch": 0.08404979587483484,
19866
+ "grad_norm": 7.15625,
19867
+ "learning_rate": 0.00048611196170547357,
19868
+ "loss": 17.0302,
19869
+ "step": 56740
19870
+ },
19871
+ {
19872
+ "epoch": 0.08407942216876323,
19873
+ "grad_norm": 7.25,
19874
+ "learning_rate": 0.00048610702277046596,
19875
+ "loss": 17.0697,
19876
+ "step": 56760
19877
+ },
19878
+ {
19879
+ "epoch": 0.08410904846269161,
19880
+ "grad_norm": 7.0625,
19881
+ "learning_rate": 0.0004861020838354584,
19882
+ "loss": 16.9995,
19883
+ "step": 56780
19884
+ },
19885
+ {
19886
+ "epoch": 0.08413867475662,
19887
+ "grad_norm": 6.625,
19888
+ "learning_rate": 0.0004860971449004508,
19889
+ "loss": 17.0243,
19890
+ "step": 56800
19891
+ },
19892
+ {
19893
+ "epoch": 0.08416830105054839,
19894
+ "grad_norm": 7.1875,
19895
+ "learning_rate": 0.0004860922059654433,
19896
+ "loss": 17.0269,
19897
+ "step": 56820
19898
+ },
19899
+ {
19900
+ "epoch": 0.08419792734447677,
19901
+ "grad_norm": 7.1875,
19902
+ "learning_rate": 0.0004860872670304357,
19903
+ "loss": 17.095,
19904
+ "step": 56840
19905
+ },
19906
+ {
19907
+ "epoch": 0.08422755363840516,
19908
+ "grad_norm": 6.375,
19909
+ "learning_rate": 0.00048608232809542814,
19910
+ "loss": 16.9837,
19911
+ "step": 56860
19912
+ },
19913
+ {
19914
+ "epoch": 0.08425717993233355,
19915
+ "grad_norm": 6.40625,
19916
+ "learning_rate": 0.00048607738916042054,
19917
+ "loss": 17.0325,
19918
+ "step": 56880
19919
+ },
19920
+ {
19921
+ "epoch": 0.08428680622626193,
19922
+ "grad_norm": 6.5625,
19923
+ "learning_rate": 0.00048607245022541304,
19924
+ "loss": 17.0513,
19925
+ "step": 56900
19926
+ },
19927
+ {
19928
+ "epoch": 0.08431643252019032,
19929
+ "grad_norm": 6.71875,
19930
+ "learning_rate": 0.00048606751129040543,
19931
+ "loss": 17.0624,
19932
+ "step": 56920
19933
+ },
19934
+ {
19935
+ "epoch": 0.0843460588141187,
19936
+ "grad_norm": 6.8125,
19937
+ "learning_rate": 0.0004860625723553979,
19938
+ "loss": 17.0052,
19939
+ "step": 56940
19940
+ },
19941
+ {
19942
+ "epoch": 0.08437568510804709,
19943
+ "grad_norm": 7.09375,
19944
+ "learning_rate": 0.0004860576334203903,
19945
+ "loss": 17.0705,
19946
+ "step": 56960
19947
+ },
19948
+ {
19949
+ "epoch": 0.08440531140197548,
19950
+ "grad_norm": 7.0625,
19951
+ "learning_rate": 0.0004860526944853828,
19952
+ "loss": 16.9932,
19953
+ "step": 56980
19954
+ },
19955
+ {
19956
+ "epoch": 0.08443493769590386,
19957
+ "grad_norm": 6.9375,
19958
+ "learning_rate": 0.00048604775555037517,
19959
+ "loss": 16.9712,
19960
+ "step": 57000
19961
  }
19962
  ],
19963
  "logging_steps": 20,
 
19977
  "attributes": {}
19978
  }
19979
  },
19980
+ "total_flos": 4.190665024641329e+19,
19981
  "train_batch_size": 48,
19982
  "trial_name": null,
19983
  "trial_params": null