mohammadmahdinouri commited on
Commit
f161723
·
verified ·
1 Parent(s): f90b00c

Training in progress, step 80000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c756f34d6447d0a12191f2f228e5fca6325d2585555740b00f437d7c3e7004bb
3
  size 304481530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25eece902c4fc10f4ee2062692a9aedbe51bd7b7d97a5b7d579b674f96892276
3
  size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e5534657658835263ae3eba8ff79c99616131e773d428fcce31ad61af86046b
3
  size 402029570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d7f1d706e6cfbd7062b526c5f96351aba28490563e89cc3572dbd70ff071d52
3
  size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9cd212ee37c184ab654cd81424bd96e3d051626e53abebd61cf8f11452e1283
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3762fb83fd702043dec9c363ac412c392bf99ebaba36635b7ce08abde68594fe
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ec46da082cb75a7e3753cfb221c9d642f2a32f3a83b3b478de73eadc477388c
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c6e6a2cde44218a43149d5222369dc44b7c914b2ad856e2e09dfb4dca020fb
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a45666dfa471b8966a5f388cff9679b0f97f0a453b0c8aca6fb55a560f78c7c
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb4157f68b08406d6bc17d2638ab784f508ffb332e537043a8486d779d68898e
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba2d0810d74e907463868f0c583c57c89e9a4bad46de26ffd127e43e9b609736
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70fc5f5dac53b26b2e075af1f8abf3943ab8de6a2ae6129d92b62d3aa9705082
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb0dc069b89d8c308dec795d21a2ac94397c5df6c87082b4999d15bf441c0a2e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d73d6a55f40d828827c6493d8d4e36859284046429b1cc4d61ff3be96f72f5ef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.11702386101712993,
6
  "eval_steps": 500,
7
- "global_step": 79000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -27658,6 +27658,356 @@
27658
  "learning_rate": 0.00048061492704205204,
27659
  "loss": 16.3266,
27660
  "step": 79000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27661
  }
27662
  ],
27663
  "logging_steps": 20,
@@ -27677,7 +28027,7 @@
27677
  "attributes": {}
27678
  }
27679
  },
27680
- "total_flos": 5.808462373236769e+19,
27681
  "train_batch_size": 48,
27682
  "trial_name": null,
27683
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.1185051757135493,
6
  "eval_steps": 500,
7
+ "global_step": 80000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
27658
  "learning_rate": 0.00048061492704205204,
27659
  "loss": 16.3266,
27660
  "step": 79000
27661
+ },
27662
+ {
27663
+ "epoch": 0.11705348731105832,
27664
+ "grad_norm": 6.46875,
27665
+ "learning_rate": 0.00048060998810704454,
27666
+ "loss": 16.2829,
27667
+ "step": 79020
27668
+ },
27669
+ {
27670
+ "epoch": 0.1170831136049867,
27671
+ "grad_norm": 6.3125,
27672
+ "learning_rate": 0.00048060504917203694,
27673
+ "loss": 16.2504,
27674
+ "step": 79040
27675
+ },
27676
+ {
27677
+ "epoch": 0.11711273989891509,
27678
+ "grad_norm": 6.59375,
27679
+ "learning_rate": 0.0004806001102370294,
27680
+ "loss": 16.2487,
27681
+ "step": 79060
27682
+ },
27683
+ {
27684
+ "epoch": 0.11714236619284348,
27685
+ "grad_norm": 6.375,
27686
+ "learning_rate": 0.0004805951713020218,
27687
+ "loss": 16.3028,
27688
+ "step": 79080
27689
+ },
27690
+ {
27691
+ "epoch": 0.11717199248677186,
27692
+ "grad_norm": 7.59375,
27693
+ "learning_rate": 0.0004805902323670143,
27694
+ "loss": 16.2891,
27695
+ "step": 79100
27696
+ },
27697
+ {
27698
+ "epoch": 0.11720161878070025,
27699
+ "grad_norm": 6.15625,
27700
+ "learning_rate": 0.0004805852934320067,
27701
+ "loss": 16.3023,
27702
+ "step": 79120
27703
+ },
27704
+ {
27705
+ "epoch": 0.11723124507462863,
27706
+ "grad_norm": 8.875,
27707
+ "learning_rate": 0.0004805803544969991,
27708
+ "loss": 16.3369,
27709
+ "step": 79140
27710
+ },
27711
+ {
27712
+ "epoch": 0.11726087136855702,
27713
+ "grad_norm": 6.875,
27714
+ "learning_rate": 0.0004805754155619915,
27715
+ "loss": 16.2611,
27716
+ "step": 79160
27717
+ },
27718
+ {
27719
+ "epoch": 0.11729049766248541,
27720
+ "grad_norm": 6.84375,
27721
+ "learning_rate": 0.000480570476626984,
27722
+ "loss": 16.3287,
27723
+ "step": 79180
27724
+ },
27725
+ {
27726
+ "epoch": 0.1173201239564138,
27727
+ "grad_norm": 6.8125,
27728
+ "learning_rate": 0.0004805655376919764,
27729
+ "loss": 16.322,
27730
+ "step": 79200
27731
+ },
27732
+ {
27733
+ "epoch": 0.11734975025034218,
27734
+ "grad_norm": 6.34375,
27735
+ "learning_rate": 0.00048056059875696886,
27736
+ "loss": 16.2184,
27737
+ "step": 79220
27738
+ },
27739
+ {
27740
+ "epoch": 0.11737937654427057,
27741
+ "grad_norm": 7.3125,
27742
+ "learning_rate": 0.00048055565982196125,
27743
+ "loss": 16.3263,
27744
+ "step": 79240
27745
+ },
27746
+ {
27747
+ "epoch": 0.11740900283819895,
27748
+ "grad_norm": 6.59375,
27749
+ "learning_rate": 0.00048055072088695375,
27750
+ "loss": 16.2757,
27751
+ "step": 79260
27752
+ },
27753
+ {
27754
+ "epoch": 0.11743862913212734,
27755
+ "grad_norm": 6.71875,
27756
+ "learning_rate": 0.00048054578195194615,
27757
+ "loss": 16.2582,
27758
+ "step": 79280
27759
+ },
27760
+ {
27761
+ "epoch": 0.11746825542605574,
27762
+ "grad_norm": 7.0,
27763
+ "learning_rate": 0.00048054084301693854,
27764
+ "loss": 16.2983,
27765
+ "step": 79300
27766
+ },
27767
+ {
27768
+ "epoch": 0.11749788171998413,
27769
+ "grad_norm": 6.3125,
27770
+ "learning_rate": 0.00048053590408193104,
27771
+ "loss": 16.314,
27772
+ "step": 79320
27773
+ },
27774
+ {
27775
+ "epoch": 0.11752750801391251,
27776
+ "grad_norm": 6.375,
27777
+ "learning_rate": 0.00048053096514692344,
27778
+ "loss": 16.2981,
27779
+ "step": 79340
27780
+ },
27781
+ {
27782
+ "epoch": 0.1175571343078409,
27783
+ "grad_norm": 7.15625,
27784
+ "learning_rate": 0.0004805260262119159,
27785
+ "loss": 16.2972,
27786
+ "step": 79360
27787
+ },
27788
+ {
27789
+ "epoch": 0.11758676060176929,
27790
+ "grad_norm": 7.5,
27791
+ "learning_rate": 0.0004805210872769083,
27792
+ "loss": 16.2882,
27793
+ "step": 79380
27794
+ },
27795
+ {
27796
+ "epoch": 0.11761638689569767,
27797
+ "grad_norm": 6.8125,
27798
+ "learning_rate": 0.0004805161483419008,
27799
+ "loss": 16.2462,
27800
+ "step": 79400
27801
+ },
27802
+ {
27803
+ "epoch": 0.11764601318962606,
27804
+ "grad_norm": 6.78125,
27805
+ "learning_rate": 0.0004805112094068932,
27806
+ "loss": 16.2808,
27807
+ "step": 79420
27808
+ },
27809
+ {
27810
+ "epoch": 0.11767563948355445,
27811
+ "grad_norm": 6.09375,
27812
+ "learning_rate": 0.0004805062704718856,
27813
+ "loss": 16.265,
27814
+ "step": 79440
27815
+ },
27816
+ {
27817
+ "epoch": 0.11770526577748283,
27818
+ "grad_norm": 6.78125,
27819
+ "learning_rate": 0.000480501331536878,
27820
+ "loss": 16.2757,
27821
+ "step": 79460
27822
+ },
27823
+ {
27824
+ "epoch": 0.11773489207141122,
27825
+ "grad_norm": 6.09375,
27826
+ "learning_rate": 0.0004804963926018705,
27827
+ "loss": 16.3099,
27828
+ "step": 79480
27829
+ },
27830
+ {
27831
+ "epoch": 0.1177645183653396,
27832
+ "grad_norm": 6.65625,
27833
+ "learning_rate": 0.0004804914536668629,
27834
+ "loss": 16.2713,
27835
+ "step": 79500
27836
+ },
27837
+ {
27838
+ "epoch": 0.11779414465926799,
27839
+ "grad_norm": 6.3125,
27840
+ "learning_rate": 0.00048048651473185536,
27841
+ "loss": 16.2622,
27842
+ "step": 79520
27843
+ },
27844
+ {
27845
+ "epoch": 0.11782377095319638,
27846
+ "grad_norm": 5.875,
27847
+ "learning_rate": 0.00048048157579684775,
27848
+ "loss": 16.2833,
27849
+ "step": 79540
27850
+ },
27851
+ {
27852
+ "epoch": 0.11785339724712476,
27853
+ "grad_norm": 6.1875,
27854
+ "learning_rate": 0.00048047663686184025,
27855
+ "loss": 16.2429,
27856
+ "step": 79560
27857
+ },
27858
+ {
27859
+ "epoch": 0.11788302354105315,
27860
+ "grad_norm": 6.6875,
27861
+ "learning_rate": 0.00048047169792683265,
27862
+ "loss": 16.3167,
27863
+ "step": 79580
27864
+ },
27865
+ {
27866
+ "epoch": 0.11791264983498154,
27867
+ "grad_norm": 6.59375,
27868
+ "learning_rate": 0.0004804667589918251,
27869
+ "loss": 16.2256,
27870
+ "step": 79600
27871
+ },
27872
+ {
27873
+ "epoch": 0.11794227612890994,
27874
+ "grad_norm": 6.71875,
27875
+ "learning_rate": 0.00048046182005681754,
27876
+ "loss": 16.2095,
27877
+ "step": 79620
27878
+ },
27879
+ {
27880
+ "epoch": 0.11797190242283832,
27881
+ "grad_norm": 7.5,
27882
+ "learning_rate": 0.00048045688112180994,
27883
+ "loss": 16.2748,
27884
+ "step": 79640
27885
+ },
27886
+ {
27887
+ "epoch": 0.11800152871676671,
27888
+ "grad_norm": 6.5,
27889
+ "learning_rate": 0.0004804519421868024,
27890
+ "loss": 16.2733,
27891
+ "step": 79660
27892
+ },
27893
+ {
27894
+ "epoch": 0.1180311550106951,
27895
+ "grad_norm": 6.46875,
27896
+ "learning_rate": 0.0004804470032517948,
27897
+ "loss": 16.2384,
27898
+ "step": 79680
27899
+ },
27900
+ {
27901
+ "epoch": 0.11806078130462348,
27902
+ "grad_norm": 6.03125,
27903
+ "learning_rate": 0.0004804420643167873,
27904
+ "loss": 16.2747,
27905
+ "step": 79700
27906
+ },
27907
+ {
27908
+ "epoch": 0.11809040759855187,
27909
+ "grad_norm": 6.59375,
27910
+ "learning_rate": 0.0004804371253817797,
27911
+ "loss": 16.2826,
27912
+ "step": 79720
27913
+ },
27914
+ {
27915
+ "epoch": 0.11812003389248026,
27916
+ "grad_norm": 6.875,
27917
+ "learning_rate": 0.0004804321864467721,
27918
+ "loss": 16.2646,
27919
+ "step": 79740
27920
+ },
27921
+ {
27922
+ "epoch": 0.11814966018640864,
27923
+ "grad_norm": 7.625,
27924
+ "learning_rate": 0.0004804272475117645,
27925
+ "loss": 16.2691,
27926
+ "step": 79760
27927
+ },
27928
+ {
27929
+ "epoch": 0.11817928648033703,
27930
+ "grad_norm": 7.0625,
27931
+ "learning_rate": 0.000480422308576757,
27932
+ "loss": 16.2096,
27933
+ "step": 79780
27934
+ },
27935
+ {
27936
+ "epoch": 0.11820891277426541,
27937
+ "grad_norm": 7.09375,
27938
+ "learning_rate": 0.0004804173696417494,
27939
+ "loss": 16.2607,
27940
+ "step": 79800
27941
+ },
27942
+ {
27943
+ "epoch": 0.1182385390681938,
27944
+ "grad_norm": 6.1875,
27945
+ "learning_rate": 0.00048041243070674186,
27946
+ "loss": 16.2072,
27947
+ "step": 79820
27948
+ },
27949
+ {
27950
+ "epoch": 0.11826816536212219,
27951
+ "grad_norm": 5.96875,
27952
+ "learning_rate": 0.00048040749177173425,
27953
+ "loss": 16.2251,
27954
+ "step": 79840
27955
+ },
27956
+ {
27957
+ "epoch": 0.11829779165605057,
27958
+ "grad_norm": 7.0,
27959
+ "learning_rate": 0.00048040255283672675,
27960
+ "loss": 16.2256,
27961
+ "step": 79860
27962
+ },
27963
+ {
27964
+ "epoch": 0.11832741794997896,
27965
+ "grad_norm": 6.84375,
27966
+ "learning_rate": 0.00048039761390171915,
27967
+ "loss": 16.3018,
27968
+ "step": 79880
27969
+ },
27970
+ {
27971
+ "epoch": 0.11835704424390735,
27972
+ "grad_norm": 6.65625,
27973
+ "learning_rate": 0.0004803926749667116,
27974
+ "loss": 16.273,
27975
+ "step": 79900
27976
+ },
27977
+ {
27978
+ "epoch": 0.11838667053783573,
27979
+ "grad_norm": 7.1875,
27980
+ "learning_rate": 0.00048038773603170404,
27981
+ "loss": 16.2277,
27982
+ "step": 79920
27983
+ },
27984
+ {
27985
+ "epoch": 0.11841629683176413,
27986
+ "grad_norm": 6.34375,
27987
+ "learning_rate": 0.0004803827970966965,
27988
+ "loss": 16.2374,
27989
+ "step": 79940
27990
+ },
27991
+ {
27992
+ "epoch": 0.11844592312569252,
27993
+ "grad_norm": 6.25,
27994
+ "learning_rate": 0.0004803778581616889,
27995
+ "loss": 16.2584,
27996
+ "step": 79960
27997
+ },
27998
+ {
27999
+ "epoch": 0.1184755494196209,
28000
+ "grad_norm": 6.5,
28001
+ "learning_rate": 0.0004803729192266813,
28002
+ "loss": 16.2437,
28003
+ "step": 79980
28004
+ },
28005
+ {
28006
+ "epoch": 0.1185051757135493,
28007
+ "grad_norm": 7.375,
28008
+ "learning_rate": 0.0004803679802916738,
28009
+ "loss": 16.2565,
28010
+ "step": 80000
28011
  }
28012
  ],
28013
  "logging_steps": 20,
 
28027
  "attributes": {}
28028
  }
28029
  },
28030
+ "total_flos": 5.881999294671618e+19,
28031
  "train_batch_size": 48,
28032
  "trial_name": null,
28033
  "trial_params": null