mohammadmahdinouri commited on
Commit
a4ca969
·
verified ·
1 Parent(s): 6961084

Training in progress, step 74000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82e96b382e85cf4f91a0957df390eab642f1a5b90594b054112e585987e922fb
3
  size 304481530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c14f92422cc30c9605f95654d62c250bad463581bd3da10bb7b17093206005e
3
  size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86831135ab2a33d7609f755ab5e685a1ac6602cf0ed6e3f717ff3cd6a64064f2
3
  size 402029570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdbe93c9686a0a02ecdcba702915ad1389c2bb261f4103c48b737864febba412
3
  size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbf5a8e94cdeb9d71543994044a1496c0b99dc653812727d1f2b5879319264c4
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf863b0b895309e73d9088642dd8d00845be8fee481352073f05fd0bd67029a2
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e585ae00a418f8315b98a87df365e3f31023ec6747db05d48bdc24ed26af3666
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f0942e1e9569ddb210dcd2d42bc92e339bbd2239990fd3cc546265bee775d39
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c65df296955d0ea7a8b7df67d30426101d0bc72ddcf4935d0366aeb81991dd30
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2a7d2488bf1d4b76628b506fc6b6fb862cbf4396985e4c9e2f16e4262ba5085
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75f3a690a6b3c19beeba0982e2eceaedb3e05582e018ecc3f8710afa643876ad
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:115e6df582159f803bd87cdfeee2a6c991779cf09357b4ef2537b502b04c878f
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1971585f96833288fec52d3fdc773fe9f57b50e9c45dc3d75ed2e10f5ab3dca7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9391a0b437930e5697a6d0905f7bf157b3a70a9ca0d6fddfd220757077049906
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.10813597283861373,
6
  "eval_steps": 500,
7
- "global_step": 73000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -25558,6 +25558,356 @@
25558
  "learning_rate": 0.000482096607544322,
25559
  "loss": 16.4235,
25560
  "step": 73000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25561
  }
25562
  ],
25563
  "logging_steps": 20,
@@ -25577,7 +25927,7 @@
25577
  "attributes": {}
25578
  }
25579
  },
25580
- "total_flos": 5.367243712484711e+19,
25581
  "train_batch_size": 48,
25582
  "trial_name": null,
25583
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.1096172875350331,
6
  "eval_steps": 500,
7
+ "global_step": 74000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
25558
  "learning_rate": 0.000482096607544322,
25559
  "loss": 16.4235,
25560
  "step": 73000
25561
+ },
25562
+ {
25563
+ "epoch": 0.10816559913254212,
25564
+ "grad_norm": 7.0625,
25565
+ "learning_rate": 0.0004820916686093145,
25566
+ "loss": 16.4474,
25567
+ "step": 73020
25568
+ },
25569
+ {
25570
+ "epoch": 0.1081952254264705,
25571
+ "grad_norm": 6.75,
25572
+ "learning_rate": 0.0004820867296743069,
25573
+ "loss": 16.4285,
25574
+ "step": 73040
25575
+ },
25576
+ {
25577
+ "epoch": 0.10822485172039889,
25578
+ "grad_norm": 6.40625,
25579
+ "learning_rate": 0.0004820817907392993,
25580
+ "loss": 16.5063,
25581
+ "step": 73060
25582
+ },
25583
+ {
25584
+ "epoch": 0.10825447801432728,
25585
+ "grad_norm": 6.3125,
25586
+ "learning_rate": 0.0004820768518042917,
25587
+ "loss": 16.421,
25588
+ "step": 73080
25589
+ },
25590
+ {
25591
+ "epoch": 0.10828410430825566,
25592
+ "grad_norm": 6.78125,
25593
+ "learning_rate": 0.0004820719128692842,
25594
+ "loss": 16.5147,
25595
+ "step": 73100
25596
+ },
25597
+ {
25598
+ "epoch": 0.10831373060218405,
25599
+ "grad_norm": 7.125,
25600
+ "learning_rate": 0.0004820669739342766,
25601
+ "loss": 16.4131,
25602
+ "step": 73120
25603
+ },
25604
+ {
25605
+ "epoch": 0.10834335689611244,
25606
+ "grad_norm": 6.375,
25607
+ "learning_rate": 0.00048206203499926906,
25608
+ "loss": 16.3763,
25609
+ "step": 73140
25610
+ },
25611
+ {
25612
+ "epoch": 0.10837298319004082,
25613
+ "grad_norm": 6.75,
25614
+ "learning_rate": 0.00048205709606426145,
25615
+ "loss": 16.402,
25616
+ "step": 73160
25617
+ },
25618
+ {
25619
+ "epoch": 0.10840260948396921,
25620
+ "grad_norm": 6.90625,
25621
+ "learning_rate": 0.00048205215712925396,
25622
+ "loss": 16.4398,
25623
+ "step": 73180
25624
+ },
25625
+ {
25626
+ "epoch": 0.1084322357778976,
25627
+ "grad_norm": 7.34375,
25628
+ "learning_rate": 0.00048204721819424635,
25629
+ "loss": 16.4245,
25630
+ "step": 73200
25631
+ },
25632
+ {
25633
+ "epoch": 0.10846186207182598,
25634
+ "grad_norm": 6.71875,
25635
+ "learning_rate": 0.00048204227925923874,
25636
+ "loss": 16.4691,
25637
+ "step": 73220
25638
+ },
25639
+ {
25640
+ "epoch": 0.10849148836575437,
25641
+ "grad_norm": 6.375,
25642
+ "learning_rate": 0.0004820373403242312,
25643
+ "loss": 16.4718,
25644
+ "step": 73240
25645
+ },
25646
+ {
25647
+ "epoch": 0.10852111465968275,
25648
+ "grad_norm": 6.90625,
25649
+ "learning_rate": 0.00048203240138922364,
25650
+ "loss": 16.4101,
25651
+ "step": 73260
25652
+ },
25653
+ {
25654
+ "epoch": 0.10855074095361116,
25655
+ "grad_norm": 6.875,
25656
+ "learning_rate": 0.0004820274624542161,
25657
+ "loss": 16.4164,
25658
+ "step": 73280
25659
+ },
25660
+ {
25661
+ "epoch": 0.10858036724753954,
25662
+ "grad_norm": 6.6875,
25663
+ "learning_rate": 0.0004820225235192085,
25664
+ "loss": 16.4198,
25665
+ "step": 73300
25666
+ },
25667
+ {
25668
+ "epoch": 0.10860999354146793,
25669
+ "grad_norm": 7.25,
25670
+ "learning_rate": 0.000482017584584201,
25671
+ "loss": 16.3898,
25672
+ "step": 73320
25673
+ },
25674
+ {
25675
+ "epoch": 0.10863961983539631,
25676
+ "grad_norm": 6.59375,
25677
+ "learning_rate": 0.0004820126456491934,
25678
+ "loss": 16.4542,
25679
+ "step": 73340
25680
+ },
25681
+ {
25682
+ "epoch": 0.1086692461293247,
25683
+ "grad_norm": 6.5625,
25684
+ "learning_rate": 0.0004820077067141858,
25685
+ "loss": 16.4689,
25686
+ "step": 73360
25687
+ },
25688
+ {
25689
+ "epoch": 0.10869887242325309,
25690
+ "grad_norm": 7.8125,
25691
+ "learning_rate": 0.0004820027677791782,
25692
+ "loss": 16.3908,
25693
+ "step": 73380
25694
+ },
25695
+ {
25696
+ "epoch": 0.10872849871718147,
25697
+ "grad_norm": 7.625,
25698
+ "learning_rate": 0.0004819978288441707,
25699
+ "loss": 16.4518,
25700
+ "step": 73400
25701
+ },
25702
+ {
25703
+ "epoch": 0.10875812501110986,
25704
+ "grad_norm": 5.65625,
25705
+ "learning_rate": 0.0004819928899091631,
25706
+ "loss": 16.4547,
25707
+ "step": 73420
25708
+ },
25709
+ {
25710
+ "epoch": 0.10878775130503825,
25711
+ "grad_norm": 7.21875,
25712
+ "learning_rate": 0.00048198795097415556,
25713
+ "loss": 16.4573,
25714
+ "step": 73440
25715
+ },
25716
+ {
25717
+ "epoch": 0.10881737759896663,
25718
+ "grad_norm": 6.59375,
25719
+ "learning_rate": 0.00048198301203914795,
25720
+ "loss": 16.4254,
25721
+ "step": 73460
25722
+ },
25723
+ {
25724
+ "epoch": 0.10884700389289502,
25725
+ "grad_norm": 6.53125,
25726
+ "learning_rate": 0.00048197807310414046,
25727
+ "loss": 16.4479,
25728
+ "step": 73480
25729
+ },
25730
+ {
25731
+ "epoch": 0.1088766301868234,
25732
+ "grad_norm": 6.59375,
25733
+ "learning_rate": 0.00048197313416913285,
25734
+ "loss": 16.374,
25735
+ "step": 73500
25736
+ },
25737
+ {
25738
+ "epoch": 0.10890625648075179,
25739
+ "grad_norm": 7.0625,
25740
+ "learning_rate": 0.0004819681952341253,
25741
+ "loss": 16.4854,
25742
+ "step": 73520
25743
+ },
25744
+ {
25745
+ "epoch": 0.10893588277468018,
25746
+ "grad_norm": 6.5625,
25747
+ "learning_rate": 0.0004819632562991177,
25748
+ "loss": 16.373,
25749
+ "step": 73540
25750
+ },
25751
+ {
25752
+ "epoch": 0.10896550906860857,
25753
+ "grad_norm": 5.6875,
25754
+ "learning_rate": 0.00048195831736411014,
25755
+ "loss": 16.4403,
25756
+ "step": 73560
25757
+ },
25758
+ {
25759
+ "epoch": 0.10899513536253697,
25760
+ "grad_norm": 7.28125,
25761
+ "learning_rate": 0.0004819533784291026,
25762
+ "loss": 16.4399,
25763
+ "step": 73580
25764
+ },
25765
+ {
25766
+ "epoch": 0.10902476165646535,
25767
+ "grad_norm": 6.0,
25768
+ "learning_rate": 0.000481948439494095,
25769
+ "loss": 16.4257,
25770
+ "step": 73600
25771
+ },
25772
+ {
25773
+ "epoch": 0.10905438795039374,
25774
+ "grad_norm": 7.0,
25775
+ "learning_rate": 0.0004819435005590875,
25776
+ "loss": 16.3843,
25777
+ "step": 73620
25778
+ },
25779
+ {
25780
+ "epoch": 0.10908401424432213,
25781
+ "grad_norm": 6.5625,
25782
+ "learning_rate": 0.0004819385616240799,
25783
+ "loss": 16.4221,
25784
+ "step": 73640
25785
+ },
25786
+ {
25787
+ "epoch": 0.10911364053825051,
25788
+ "grad_norm": 6.90625,
25789
+ "learning_rate": 0.0004819336226890723,
25790
+ "loss": 16.4464,
25791
+ "step": 73660
25792
+ },
25793
+ {
25794
+ "epoch": 0.1091432668321789,
25795
+ "grad_norm": 6.875,
25796
+ "learning_rate": 0.0004819286837540647,
25797
+ "loss": 16.4325,
25798
+ "step": 73680
25799
+ },
25800
+ {
25801
+ "epoch": 0.10917289312610728,
25802
+ "grad_norm": 6.09375,
25803
+ "learning_rate": 0.0004819237448190572,
25804
+ "loss": 16.4222,
25805
+ "step": 73700
25806
+ },
25807
+ {
25808
+ "epoch": 0.10920251942003567,
25809
+ "grad_norm": 6.9375,
25810
+ "learning_rate": 0.0004819188058840496,
25811
+ "loss": 16.4236,
25812
+ "step": 73720
25813
+ },
25814
+ {
25815
+ "epoch": 0.10923214571396406,
25816
+ "grad_norm": 6.0625,
25817
+ "learning_rate": 0.00048191386694904206,
25818
+ "loss": 16.4719,
25819
+ "step": 73740
25820
+ },
25821
+ {
25822
+ "epoch": 0.10926177200789244,
25823
+ "grad_norm": 6.78125,
25824
+ "learning_rate": 0.00048190892801403445,
25825
+ "loss": 16.4062,
25826
+ "step": 73760
25827
+ },
25828
+ {
25829
+ "epoch": 0.10929139830182083,
25830
+ "grad_norm": 7.0,
25831
+ "learning_rate": 0.00048190398907902696,
25832
+ "loss": 16.4468,
25833
+ "step": 73780
25834
+ },
25835
+ {
25836
+ "epoch": 0.10932102459574922,
25837
+ "grad_norm": 6.6875,
25838
+ "learning_rate": 0.00048189905014401935,
25839
+ "loss": 16.4426,
25840
+ "step": 73800
25841
+ },
25842
+ {
25843
+ "epoch": 0.1093506508896776,
25844
+ "grad_norm": 6.625,
25845
+ "learning_rate": 0.0004818941112090118,
25846
+ "loss": 16.4042,
25847
+ "step": 73820
25848
+ },
25849
+ {
25850
+ "epoch": 0.10938027718360599,
25851
+ "grad_norm": 6.53125,
25852
+ "learning_rate": 0.0004818891722740042,
25853
+ "loss": 16.489,
25854
+ "step": 73840
25855
+ },
25856
+ {
25857
+ "epoch": 0.10940990347753438,
25858
+ "grad_norm": 7.25,
25859
+ "learning_rate": 0.0004818842333389967,
25860
+ "loss": 16.4129,
25861
+ "step": 73860
25862
+ },
25863
+ {
25864
+ "epoch": 0.10943952977146276,
25865
+ "grad_norm": 6.59375,
25866
+ "learning_rate": 0.0004818792944039891,
25867
+ "loss": 16.4828,
25868
+ "step": 73880
25869
+ },
25870
+ {
25871
+ "epoch": 0.10946915606539116,
25872
+ "grad_norm": 6.03125,
25873
+ "learning_rate": 0.0004818743554689815,
25874
+ "loss": 16.4081,
25875
+ "step": 73900
25876
+ },
25877
+ {
25878
+ "epoch": 0.10949878235931955,
25879
+ "grad_norm": 6.40625,
25880
+ "learning_rate": 0.000481869416533974,
25881
+ "loss": 16.441,
25882
+ "step": 73920
25883
+ },
25884
+ {
25885
+ "epoch": 0.10952840865324794,
25886
+ "grad_norm": 6.96875,
25887
+ "learning_rate": 0.0004818644775989664,
25888
+ "loss": 16.4013,
25889
+ "step": 73940
25890
+ },
25891
+ {
25892
+ "epoch": 0.10955803494717632,
25893
+ "grad_norm": 7.46875,
25894
+ "learning_rate": 0.0004818595386639588,
25895
+ "loss": 16.4102,
25896
+ "step": 73960
25897
+ },
25898
+ {
25899
+ "epoch": 0.10958766124110471,
25900
+ "grad_norm": 7.09375,
25901
+ "learning_rate": 0.0004818545997289512,
25902
+ "loss": 16.3879,
25903
+ "step": 73980
25904
+ },
25905
+ {
25906
+ "epoch": 0.1096172875350331,
25907
+ "grad_norm": 7.0,
25908
+ "learning_rate": 0.0004818496607939437,
25909
+ "loss": 16.3994,
25910
+ "step": 74000
25911
  }
25912
  ],
25913
  "logging_steps": 20,
 
25927
  "attributes": {}
25928
  }
25929
  },
25930
+ "total_flos": 5.440780396085746e+19,
25931
  "train_batch_size": 48,
25932
  "trial_name": null,
25933
  "trial_params": null