mohammadmahdinouri commited on
Commit
bd053b6
·
verified ·
1 Parent(s): a0dc5a7

Training in progress, step 77000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc06a7f089f926af24a4dafd2fc5c68a00957b0501ae37664b3613577e08b3af
3
  size 304481530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456f25e6949b3d7d11abfc7016f20c519ec2224b5939a593f25e94ab538895d0
3
  size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9513e34a098ccfc4d0eeceb95099c5472b4ff0a71cffb25d876aad974cab2486
3
  size 402029570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:583ce6afe4697cbc9bee02b6ce9c574f4dc14c85f97be8a32cab3b7f02347cff
3
  size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3691082114682896d0f28ee5b4c8f41d4639d4efe6c895c755146048ab7c832
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d61a2372056f2b32f6ae2b2c7745d9d5c6ac967a32622f315e73c700a55b59c
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c20256e223141e6700101ae515de5a6287d380eaea8a4346e1c56536ce67dcb
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0305d043d3c89d1352e924dd8f0e87b43a3b6eaaaf9859b3bc689a1146bd169b
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5392970d44236e7a431111e07b6640793728800da16c52d461401ef3040338a
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:974b9922d6267aa5fa0a64e6a68535833054f08cce85f66dc5aaf99a834c1951
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:756f7cda01c1bba0353fe356cfd74ccb32f9626ff6708219372ea8a4c1ba35dc
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cc97b8ced844bab48e2e6688594701ea9aba44b688ce227a136172614ec21f5
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:178aedfc2920966f379dc01376957ddc00b6df6f84cf67e8abd741361412b63d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd8038700a783d3dbbe90c65ffd9f9176aad3cbeda38c8a7508ae0b5dcd99468
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.11257991692787182,
6
  "eval_steps": 500,
7
- "global_step": 76000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -26608,6 +26608,356 @@
26608
  "learning_rate": 0.00048135576729318704,
26609
  "loss": 16.3442,
26610
  "step": 76000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26611
  }
26612
  ],
26613
  "logging_steps": 20,
@@ -26627,7 +26977,7 @@
26627
  "attributes": {}
26628
  }
26629
  },
26630
- "total_flos": 5.587853722915124e+19,
26631
  "train_batch_size": 48,
26632
  "trial_name": null,
26633
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.11406123162429119,
6
  "eval_steps": 500,
7
+ "global_step": 77000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
26608
  "learning_rate": 0.00048135576729318704,
26609
  "loss": 16.3442,
26610
  "step": 76000
26611
+ },
26612
+ {
26613
+ "epoch": 0.11260954322180021,
26614
+ "grad_norm": 6.1875,
26615
+ "learning_rate": 0.0004813508283581795,
26616
+ "loss": 16.3857,
26617
+ "step": 76020
26618
+ },
26619
+ {
26620
+ "epoch": 0.1126391695157286,
26621
+ "grad_norm": 6.8125,
26622
+ "learning_rate": 0.00048134588942317193,
26623
+ "loss": 16.4228,
26624
+ "step": 76040
26625
+ },
26626
+ {
26627
+ "epoch": 0.11266879580965698,
26628
+ "grad_norm": 6.6875,
26629
+ "learning_rate": 0.0004813409504881643,
26630
+ "loss": 16.4156,
26631
+ "step": 76060
26632
+ },
26633
+ {
26634
+ "epoch": 0.11269842210358537,
26635
+ "grad_norm": 7.40625,
26636
+ "learning_rate": 0.0004813360115531568,
26637
+ "loss": 16.3843,
26638
+ "step": 76080
26639
+ },
26640
+ {
26641
+ "epoch": 0.11272804839751376,
26642
+ "grad_norm": 6.53125,
26643
+ "learning_rate": 0.0004813310726181492,
26644
+ "loss": 16.3459,
26645
+ "step": 76100
26646
+ },
26647
+ {
26648
+ "epoch": 0.11275767469144214,
26649
+ "grad_norm": 7.09375,
26650
+ "learning_rate": 0.00048132613368314167,
26651
+ "loss": 16.3973,
26652
+ "step": 76120
26653
+ },
26654
+ {
26655
+ "epoch": 0.11278730098537054,
26656
+ "grad_norm": 8.1875,
26657
+ "learning_rate": 0.00048132119474813406,
26658
+ "loss": 16.3245,
26659
+ "step": 76140
26660
+ },
26661
+ {
26662
+ "epoch": 0.11281692727929893,
26663
+ "grad_norm": 7.65625,
26664
+ "learning_rate": 0.0004813162558131265,
26665
+ "loss": 16.3705,
26666
+ "step": 76160
26667
+ },
26668
+ {
26669
+ "epoch": 0.11284655357322732,
26670
+ "grad_norm": 6.8125,
26671
+ "learning_rate": 0.00048131131687811896,
26672
+ "loss": 16.3244,
26673
+ "step": 76180
26674
+ },
26675
+ {
26676
+ "epoch": 0.1128761798671557,
26677
+ "grad_norm": 7.0625,
26678
+ "learning_rate": 0.0004813063779431114,
26679
+ "loss": 16.3466,
26680
+ "step": 76200
26681
+ },
26682
+ {
26683
+ "epoch": 0.11290580616108409,
26684
+ "grad_norm": 6.28125,
26685
+ "learning_rate": 0.0004813014390081038,
26686
+ "loss": 16.345,
26687
+ "step": 76220
26688
+ },
26689
+ {
26690
+ "epoch": 0.11293543245501247,
26691
+ "grad_norm": 6.46875,
26692
+ "learning_rate": 0.00048129650007309625,
26693
+ "loss": 16.3821,
26694
+ "step": 76240
26695
+ },
26696
+ {
26697
+ "epoch": 0.11296505874894086,
26698
+ "grad_norm": 6.78125,
26699
+ "learning_rate": 0.0004812915611380887,
26700
+ "loss": 16.3541,
26701
+ "step": 76260
26702
+ },
26703
+ {
26704
+ "epoch": 0.11299468504286925,
26705
+ "grad_norm": 7.0,
26706
+ "learning_rate": 0.0004812866222030811,
26707
+ "loss": 16.2916,
26708
+ "step": 76280
26709
+ },
26710
+ {
26711
+ "epoch": 0.11302431133679763,
26712
+ "grad_norm": 6.59375,
26713
+ "learning_rate": 0.00048128168326807354,
26714
+ "loss": 16.2938,
26715
+ "step": 76300
26716
+ },
26717
+ {
26718
+ "epoch": 0.11305393763072602,
26719
+ "grad_norm": 6.5,
26720
+ "learning_rate": 0.000481276744333066,
26721
+ "loss": 16.4026,
26722
+ "step": 76320
26723
+ },
26724
+ {
26725
+ "epoch": 0.1130835639246544,
26726
+ "grad_norm": 6.34375,
26727
+ "learning_rate": 0.00048127180539805843,
26728
+ "loss": 16.3102,
26729
+ "step": 76340
26730
+ },
26731
+ {
26732
+ "epoch": 0.1131131902185828,
26733
+ "grad_norm": 6.71875,
26734
+ "learning_rate": 0.0004812668664630508,
26735
+ "loss": 16.338,
26736
+ "step": 76360
26737
+ },
26738
+ {
26739
+ "epoch": 0.11314281651251118,
26740
+ "grad_norm": 6.0,
26741
+ "learning_rate": 0.0004812619275280433,
26742
+ "loss": 16.3304,
26743
+ "step": 76380
26744
+ },
26745
+ {
26746
+ "epoch": 0.11317244280643957,
26747
+ "grad_norm": 6.4375,
26748
+ "learning_rate": 0.0004812569885930357,
26749
+ "loss": 16.353,
26750
+ "step": 76400
26751
+ },
26752
+ {
26753
+ "epoch": 0.11320206910036795,
26754
+ "grad_norm": 7.03125,
26755
+ "learning_rate": 0.00048125204965802817,
26756
+ "loss": 16.3146,
26757
+ "step": 76420
26758
+ },
26759
+ {
26760
+ "epoch": 0.11323169539429635,
26761
+ "grad_norm": 7.125,
26762
+ "learning_rate": 0.00048124711072302056,
26763
+ "loss": 16.3556,
26764
+ "step": 76440
26765
+ },
26766
+ {
26767
+ "epoch": 0.11326132168822474,
26768
+ "grad_norm": 7.03125,
26769
+ "learning_rate": 0.000481242171788013,
26770
+ "loss": 16.4044,
26771
+ "step": 76460
26772
+ },
26773
+ {
26774
+ "epoch": 0.11329094798215313,
26775
+ "grad_norm": 6.125,
26776
+ "learning_rate": 0.00048123723285300546,
26777
+ "loss": 16.2708,
26778
+ "step": 76480
26779
+ },
26780
+ {
26781
+ "epoch": 0.11332057427608151,
26782
+ "grad_norm": 6.96875,
26783
+ "learning_rate": 0.0004812322939179979,
26784
+ "loss": 16.3705,
26785
+ "step": 76500
26786
+ },
26787
+ {
26788
+ "epoch": 0.1133502005700099,
26789
+ "grad_norm": 6.46875,
26790
+ "learning_rate": 0.0004812273549829903,
26791
+ "loss": 16.3579,
26792
+ "step": 76520
26793
+ },
26794
+ {
26795
+ "epoch": 0.11337982686393829,
26796
+ "grad_norm": 9.4375,
26797
+ "learning_rate": 0.00048122241604798275,
26798
+ "loss": 16.3504,
26799
+ "step": 76540
26800
+ },
26801
+ {
26802
+ "epoch": 0.11340945315786667,
26803
+ "grad_norm": 6.84375,
26804
+ "learning_rate": 0.0004812174771129752,
26805
+ "loss": 16.3338,
26806
+ "step": 76560
26807
+ },
26808
+ {
26809
+ "epoch": 0.11343907945179506,
26810
+ "grad_norm": 7.75,
26811
+ "learning_rate": 0.00048121253817796764,
26812
+ "loss": 16.337,
26813
+ "step": 76580
26814
+ },
26815
+ {
26816
+ "epoch": 0.11346870574572344,
26817
+ "grad_norm": 6.1875,
26818
+ "learning_rate": 0.00048120759924296004,
26819
+ "loss": 16.3507,
26820
+ "step": 76600
26821
+ },
26822
+ {
26823
+ "epoch": 0.11349833203965183,
26824
+ "grad_norm": 6.0,
26825
+ "learning_rate": 0.0004812026603079525,
26826
+ "loss": 16.311,
26827
+ "step": 76620
26828
+ },
26829
+ {
26830
+ "epoch": 0.11352795833358022,
26831
+ "grad_norm": 6.15625,
26832
+ "learning_rate": 0.00048119772137294493,
26833
+ "loss": 16.3014,
26834
+ "step": 76640
26835
+ },
26836
+ {
26837
+ "epoch": 0.1135575846275086,
26838
+ "grad_norm": 7.1875,
26839
+ "learning_rate": 0.00048119278243793733,
26840
+ "loss": 16.3813,
26841
+ "step": 76660
26842
+ },
26843
+ {
26844
+ "epoch": 0.11358721092143699,
26845
+ "grad_norm": 6.375,
26846
+ "learning_rate": 0.0004811878435029298,
26847
+ "loss": 16.3528,
26848
+ "step": 76680
26849
+ },
26850
+ {
26851
+ "epoch": 0.11361683721536538,
26852
+ "grad_norm": 7.34375,
26853
+ "learning_rate": 0.0004811829045679222,
26854
+ "loss": 16.308,
26855
+ "step": 76700
26856
+ },
26857
+ {
26858
+ "epoch": 0.11364646350929376,
26859
+ "grad_norm": 6.28125,
26860
+ "learning_rate": 0.00048117796563291467,
26861
+ "loss": 16.3711,
26862
+ "step": 76720
26863
+ },
26864
+ {
26865
+ "epoch": 0.11367608980322215,
26866
+ "grad_norm": 6.90625,
26867
+ "learning_rate": 0.00048117302669790706,
26868
+ "loss": 16.2906,
26869
+ "step": 76740
26870
+ },
26871
+ {
26872
+ "epoch": 0.11370571609715055,
26873
+ "grad_norm": 6.8125,
26874
+ "learning_rate": 0.0004811680877628995,
26875
+ "loss": 16.3626,
26876
+ "step": 76760
26877
+ },
26878
+ {
26879
+ "epoch": 0.11373534239107894,
26880
+ "grad_norm": 5.75,
26881
+ "learning_rate": 0.00048116314882789196,
26882
+ "loss": 16.2853,
26883
+ "step": 76780
26884
+ },
26885
+ {
26886
+ "epoch": 0.11376496868500732,
26887
+ "grad_norm": 7.0625,
26888
+ "learning_rate": 0.0004811582098928844,
26889
+ "loss": 16.3697,
26890
+ "step": 76800
26891
+ },
26892
+ {
26893
+ "epoch": 0.11379459497893571,
26894
+ "grad_norm": 6.28125,
26895
+ "learning_rate": 0.0004811532709578768,
26896
+ "loss": 16.3249,
26897
+ "step": 76820
26898
+ },
26899
+ {
26900
+ "epoch": 0.1138242212728641,
26901
+ "grad_norm": 7.59375,
26902
+ "learning_rate": 0.00048114833202286925,
26903
+ "loss": 16.3022,
26904
+ "step": 76840
26905
+ },
26906
+ {
26907
+ "epoch": 0.11385384756679248,
26908
+ "grad_norm": 7.375,
26909
+ "learning_rate": 0.0004811433930878617,
26910
+ "loss": 16.3372,
26911
+ "step": 76860
26912
+ },
26913
+ {
26914
+ "epoch": 0.11388347386072087,
26915
+ "grad_norm": 7.25,
26916
+ "learning_rate": 0.00048113845415285414,
26917
+ "loss": 16.3621,
26918
+ "step": 76880
26919
+ },
26920
+ {
26921
+ "epoch": 0.11391310015464925,
26922
+ "grad_norm": 6.4375,
26923
+ "learning_rate": 0.00048113351521784654,
26924
+ "loss": 16.3544,
26925
+ "step": 76900
26926
+ },
26927
+ {
26928
+ "epoch": 0.11394272644857764,
26929
+ "grad_norm": 6.40625,
26930
+ "learning_rate": 0.00048112857628283904,
26931
+ "loss": 16.3275,
26932
+ "step": 76920
26933
+ },
26934
+ {
26935
+ "epoch": 0.11397235274250603,
26936
+ "grad_norm": 6.90625,
26937
+ "learning_rate": 0.00048112363734783143,
26938
+ "loss": 16.359,
26939
+ "step": 76940
26940
+ },
26941
+ {
26942
+ "epoch": 0.11400197903643441,
26943
+ "grad_norm": 6.6875,
26944
+ "learning_rate": 0.00048111869841282383,
26945
+ "loss": 16.3528,
26946
+ "step": 76960
26947
+ },
26948
+ {
26949
+ "epoch": 0.1140316053303628,
26950
+ "grad_norm": 7.28125,
26951
+ "learning_rate": 0.0004811137594778163,
26952
+ "loss": 16.3342,
26953
+ "step": 76980
26954
+ },
26955
+ {
26956
+ "epoch": 0.11406123162429119,
26957
+ "grad_norm": 6.53125,
26958
+ "learning_rate": 0.0004811088205428087,
26959
+ "loss": 16.3042,
26960
+ "step": 77000
26961
  }
26962
  ],
26963
  "logging_steps": 20,
 
26977
  "attributes": {}
26978
  }
26979
  },
26980
+ "total_flos": 5.661389989152712e+19,
26981
  "train_batch_size": 48,
26982
  "trial_name": null,
26983
  "trial_params": null