mohammadmahdinouri commited on
Commit
a6bc753
·
verified ·
1 Parent(s): 2da1393

Training in progress, step 69000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:855097e18de16f85c46f8b027e1873d375c3a4edc034e8bed8a7f0b58970ad94
3
  size 304481530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e12ef6a026db88916d2bccaa887c346b617f8bf524f61c49a560d4c1854fb6f1
3
  size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6210b328c6e30eb767412099efb2004508322ff25c3e6056826eba5d995bc2b
3
  size 402029570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:287d41aa21a4eb242c8834811d8cbeecb6b0fd5e8162f8a93804fb2ec7aa6398
3
  size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1fe05f5b470f95761cfc3fed3146b8c8e8a912646d05e70e539792b7f745a3f
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9b12b8c5c5c31953b64891eb7a5a87fe3243666cbd4801ead4f6238d85d2c9c
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49ad9d6f5fe6b13eeb9343f8fae928ab75997e82b569c4a8977d808cdc884b1e
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17944b85b9d02378f311e5505f3d2beb901e13fa7a7306f1d0d6ef90c3394bf6
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4711ff133c23ad6d8a7643a31e0e727444cc5280990eabd826bfc8c92e7cdf77
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c35b076a46134f931f65ad614d3a133b44af15affbef2c6984eacb0867534788
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d66ecdc5ab3f9e8ebc655822c33c54e4023463dd04074044db32f0a8095e3378
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00652d787b0217457a14651c8e87f0d8ab4c5f0af3727292f0c3f9d4e718cb0a
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:730a17924aec965fee0684191a1f8a93d017e71268086042298dd7299e09c6f3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc0d2189df36213ee36dacfa0f47fda988de8257ffa315d320b6c0176d420bf1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.1007293993565169,
6
  "eval_steps": 500,
7
- "global_step": 68000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -23808,6 +23808,356 @@
23808
  "learning_rate": 0.00048333134129621366,
23809
  "loss": 16.5557,
23810
  "step": 68000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23811
  }
23812
  ],
23813
  "logging_steps": 20,
@@ -23827,7 +24177,7 @@
23827
  "attributes": {}
23828
  }
23829
  },
23830
- "total_flos": 4.999562170735998e+19,
23831
  "train_batch_size": 48,
23832
  "trial_name": null,
23833
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.10221071405293626,
6
  "eval_steps": 500,
7
+ "global_step": 69000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
23808
  "learning_rate": 0.00048333134129621366,
23809
  "loss": 16.5557,
23810
  "step": 68000
23811
+ },
23812
+ {
23813
+ "epoch": 0.10075902565044528,
23814
+ "grad_norm": 7.46875,
23815
+ "learning_rate": 0.00048332640236120605,
23816
+ "loss": 16.6099,
23817
+ "step": 68020
23818
+ },
23819
+ {
23820
+ "epoch": 0.10078865194437367,
23821
+ "grad_norm": 6.84375,
23822
+ "learning_rate": 0.0004833214634261985,
23823
+ "loss": 16.6517,
23824
+ "step": 68040
23825
+ },
23826
+ {
23827
+ "epoch": 0.10081827823830206,
23828
+ "grad_norm": 7.4375,
23829
+ "learning_rate": 0.0004833165244911909,
23830
+ "loss": 16.5766,
23831
+ "step": 68060
23832
+ },
23833
+ {
23834
+ "epoch": 0.10084790453223044,
23835
+ "grad_norm": 6.71875,
23836
+ "learning_rate": 0.0004833115855561834,
23837
+ "loss": 16.606,
23838
+ "step": 68080
23839
+ },
23840
+ {
23841
+ "epoch": 0.10087753082615883,
23842
+ "grad_norm": 6.59375,
23843
+ "learning_rate": 0.0004833066466211758,
23844
+ "loss": 16.5779,
23845
+ "step": 68100
23846
+ },
23847
+ {
23848
+ "epoch": 0.10090715712008722,
23849
+ "grad_norm": 6.8125,
23850
+ "learning_rate": 0.00048330170768616824,
23851
+ "loss": 16.6219,
23852
+ "step": 68120
23853
+ },
23854
+ {
23855
+ "epoch": 0.1009367834140156,
23856
+ "grad_norm": 7.15625,
23857
+ "learning_rate": 0.0004832967687511607,
23858
+ "loss": 16.5954,
23859
+ "step": 68140
23860
+ },
23861
+ {
23862
+ "epoch": 0.10096640970794399,
23863
+ "grad_norm": 7.0,
23864
+ "learning_rate": 0.0004832918298161531,
23865
+ "loss": 16.5884,
23866
+ "step": 68160
23867
+ },
23868
+ {
23869
+ "epoch": 0.10099603600187239,
23870
+ "grad_norm": 6.78125,
23871
+ "learning_rate": 0.00048328689088114553,
23872
+ "loss": 16.5813,
23873
+ "step": 68180
23874
+ },
23875
+ {
23876
+ "epoch": 0.10102566229580077,
23877
+ "grad_norm": 6.46875,
23878
+ "learning_rate": 0.0004832819519461379,
23879
+ "loss": 16.6294,
23880
+ "step": 68200
23881
+ },
23882
+ {
23883
+ "epoch": 0.10105528858972916,
23884
+ "grad_norm": 7.34375,
23885
+ "learning_rate": 0.0004832770130111304,
23886
+ "loss": 16.627,
23887
+ "step": 68220
23888
+ },
23889
+ {
23890
+ "epoch": 0.10108491488365755,
23891
+ "grad_norm": 7.0,
23892
+ "learning_rate": 0.0004832720740761228,
23893
+ "loss": 16.5516,
23894
+ "step": 68240
23895
+ },
23896
+ {
23897
+ "epoch": 0.10111454117758593,
23898
+ "grad_norm": 7.40625,
23899
+ "learning_rate": 0.00048326713514111526,
23900
+ "loss": 16.6234,
23901
+ "step": 68260
23902
+ },
23903
+ {
23904
+ "epoch": 0.10114416747151432,
23905
+ "grad_norm": 7.1875,
23906
+ "learning_rate": 0.00048326219620610766,
23907
+ "loss": 16.5566,
23908
+ "step": 68280
23909
+ },
23910
+ {
23911
+ "epoch": 0.10117379376544271,
23912
+ "grad_norm": 7.1875,
23913
+ "learning_rate": 0.00048325725727110016,
23914
+ "loss": 16.6184,
23915
+ "step": 68300
23916
+ },
23917
+ {
23918
+ "epoch": 0.1012034200593711,
23919
+ "grad_norm": 7.46875,
23920
+ "learning_rate": 0.00048325231833609255,
23921
+ "loss": 16.6634,
23922
+ "step": 68320
23923
+ },
23924
+ {
23925
+ "epoch": 0.10123304635329948,
23926
+ "grad_norm": 7.75,
23927
+ "learning_rate": 0.000483247379401085,
23928
+ "loss": 16.5374,
23929
+ "step": 68340
23930
+ },
23931
+ {
23932
+ "epoch": 0.10126267264722787,
23933
+ "grad_norm": 7.40625,
23934
+ "learning_rate": 0.0004832424404660774,
23935
+ "loss": 16.6118,
23936
+ "step": 68360
23937
+ },
23938
+ {
23939
+ "epoch": 0.10129229894115625,
23940
+ "grad_norm": 7.03125,
23941
+ "learning_rate": 0.0004832375015310699,
23942
+ "loss": 16.5952,
23943
+ "step": 68380
23944
+ },
23945
+ {
23946
+ "epoch": 0.10132192523508464,
23947
+ "grad_norm": 7.0,
23948
+ "learning_rate": 0.0004832325625960623,
23949
+ "loss": 16.5866,
23950
+ "step": 68400
23951
+ },
23952
+ {
23953
+ "epoch": 0.10135155152901303,
23954
+ "grad_norm": 7.09375,
23955
+ "learning_rate": 0.00048322762366105474,
23956
+ "loss": 16.5999,
23957
+ "step": 68420
23958
+ },
23959
+ {
23960
+ "epoch": 0.10138117782294141,
23961
+ "grad_norm": 6.9375,
23962
+ "learning_rate": 0.0004832226847260472,
23963
+ "loss": 16.553,
23964
+ "step": 68440
23965
+ },
23966
+ {
23967
+ "epoch": 0.1014108041168698,
23968
+ "grad_norm": 6.4375,
23969
+ "learning_rate": 0.00048321774579103963,
23970
+ "loss": 16.6224,
23971
+ "step": 68460
23972
+ },
23973
+ {
23974
+ "epoch": 0.10144043041079819,
23975
+ "grad_norm": 7.09375,
23976
+ "learning_rate": 0.00048321280685603203,
23977
+ "loss": 16.5789,
23978
+ "step": 68480
23979
+ },
23980
+ {
23981
+ "epoch": 0.10147005670472659,
23982
+ "grad_norm": 6.96875,
23983
+ "learning_rate": 0.0004832078679210244,
23984
+ "loss": 16.6379,
23985
+ "step": 68500
23986
+ },
23987
+ {
23988
+ "epoch": 0.10149968299865497,
23989
+ "grad_norm": 6.0625,
23990
+ "learning_rate": 0.0004832029289860169,
23991
+ "loss": 16.5628,
23992
+ "step": 68520
23993
+ },
23994
+ {
23995
+ "epoch": 0.10152930929258336,
23996
+ "grad_norm": 6.875,
23997
+ "learning_rate": 0.0004831979900510093,
23998
+ "loss": 16.5675,
23999
+ "step": 68540
24000
+ },
24001
+ {
24002
+ "epoch": 0.10155893558651174,
24003
+ "grad_norm": 6.96875,
24004
+ "learning_rate": 0.00048319305111600176,
24005
+ "loss": 16.591,
24006
+ "step": 68560
24007
+ },
24008
+ {
24009
+ "epoch": 0.10158856188044013,
24010
+ "grad_norm": 6.8125,
24011
+ "learning_rate": 0.00048318811218099416,
24012
+ "loss": 16.5641,
24013
+ "step": 68580
24014
+ },
24015
+ {
24016
+ "epoch": 0.10161818817436852,
24017
+ "grad_norm": 6.5625,
24018
+ "learning_rate": 0.00048318317324598666,
24019
+ "loss": 16.5867,
24020
+ "step": 68600
24021
+ },
24022
+ {
24023
+ "epoch": 0.1016478144682969,
24024
+ "grad_norm": 6.4375,
24025
+ "learning_rate": 0.00048317823431097905,
24026
+ "loss": 16.5886,
24027
+ "step": 68620
24028
+ },
24029
+ {
24030
+ "epoch": 0.10167744076222529,
24031
+ "grad_norm": 7.625,
24032
+ "learning_rate": 0.0004831732953759715,
24033
+ "loss": 16.5648,
24034
+ "step": 68640
24035
+ },
24036
+ {
24037
+ "epoch": 0.10170706705615368,
24038
+ "grad_norm": 6.46875,
24039
+ "learning_rate": 0.0004831683564409639,
24040
+ "loss": 16.5727,
24041
+ "step": 68660
24042
+ },
24043
+ {
24044
+ "epoch": 0.10173669335008206,
24045
+ "grad_norm": 6.75,
24046
+ "learning_rate": 0.0004831634175059564,
24047
+ "loss": 16.5931,
24048
+ "step": 68680
24049
+ },
24050
+ {
24051
+ "epoch": 0.10176631964401045,
24052
+ "grad_norm": 6.6875,
24053
+ "learning_rate": 0.0004831584785709488,
24054
+ "loss": 16.6137,
24055
+ "step": 68700
24056
+ },
24057
+ {
24058
+ "epoch": 0.10179594593793884,
24059
+ "grad_norm": 7.15625,
24060
+ "learning_rate": 0.00048315353963594124,
24061
+ "loss": 16.5733,
24062
+ "step": 68720
24063
+ },
24064
+ {
24065
+ "epoch": 0.10182557223186722,
24066
+ "grad_norm": 7.15625,
24067
+ "learning_rate": 0.0004831486007009337,
24068
+ "loss": 16.5379,
24069
+ "step": 68740
24070
+ },
24071
+ {
24072
+ "epoch": 0.10185519852579561,
24073
+ "grad_norm": 5.59375,
24074
+ "learning_rate": 0.00048314366176592613,
24075
+ "loss": 16.6031,
24076
+ "step": 68760
24077
+ },
24078
+ {
24079
+ "epoch": 0.101884824819724,
24080
+ "grad_norm": 6.6875,
24081
+ "learning_rate": 0.00048313872283091853,
24082
+ "loss": 16.5457,
24083
+ "step": 68780
24084
+ },
24085
+ {
24086
+ "epoch": 0.10191445111365238,
24087
+ "grad_norm": 6.125,
24088
+ "learning_rate": 0.000483133783895911,
24089
+ "loss": 16.5053,
24090
+ "step": 68800
24091
+ },
24092
+ {
24093
+ "epoch": 0.10194407740758078,
24094
+ "grad_norm": 7.1875,
24095
+ "learning_rate": 0.0004831288449609034,
24096
+ "loss": 16.576,
24097
+ "step": 68820
24098
+ },
24099
+ {
24100
+ "epoch": 0.10197370370150917,
24101
+ "grad_norm": 6.875,
24102
+ "learning_rate": 0.0004831239060258958,
24103
+ "loss": 16.6092,
24104
+ "step": 68840
24105
+ },
24106
+ {
24107
+ "epoch": 0.10200332999543756,
24108
+ "grad_norm": 6.5,
24109
+ "learning_rate": 0.00048311896709088827,
24110
+ "loss": 16.598,
24111
+ "step": 68860
24112
+ },
24113
+ {
24114
+ "epoch": 0.10203295628936594,
24115
+ "grad_norm": 6.3125,
24116
+ "learning_rate": 0.00048311402815588066,
24117
+ "loss": 16.5953,
24118
+ "step": 68880
24119
+ },
24120
+ {
24121
+ "epoch": 0.10206258258329433,
24122
+ "grad_norm": 6.34375,
24123
+ "learning_rate": 0.00048310908922087316,
24124
+ "loss": 16.5686,
24125
+ "step": 68900
24126
+ },
24127
+ {
24128
+ "epoch": 0.10209220887722271,
24129
+ "grad_norm": 7.3125,
24130
+ "learning_rate": 0.00048310415028586555,
24131
+ "loss": 16.5649,
24132
+ "step": 68920
24133
+ },
24134
+ {
24135
+ "epoch": 0.1021218351711511,
24136
+ "grad_norm": 7.15625,
24137
+ "learning_rate": 0.000483099211350858,
24138
+ "loss": 16.5833,
24139
+ "step": 68940
24140
+ },
24141
+ {
24142
+ "epoch": 0.10215146146507949,
24143
+ "grad_norm": 7.0,
24144
+ "learning_rate": 0.0004830942724158504,
24145
+ "loss": 16.5813,
24146
+ "step": 68960
24147
+ },
24148
+ {
24149
+ "epoch": 0.10218108775900787,
24150
+ "grad_norm": 7.125,
24151
+ "learning_rate": 0.0004830893334808429,
24152
+ "loss": 16.6435,
24153
+ "step": 68980
24154
+ },
24155
+ {
24156
+ "epoch": 0.10221071405293626,
24157
+ "grad_norm": 6.6875,
24158
+ "learning_rate": 0.0004830843945458353,
24159
+ "loss": 16.6299,
24160
+ "step": 69000
24161
  }
24162
  ],
24163
  "logging_steps": 20,
 
24177
  "attributes": {}
24178
  }
24179
  },
24180
+ "total_flos": 5.073098616395845e+19,
24181
  "train_batch_size": 48,
24182
  "trial_name": null,
24183
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ae6fe7865a6680f0788decd4b8035db04ae39b0ae4392f872489469c00e7d58
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3f551404b0d7edd833494ee70d9c95a722ebd26deaead78190bce345559dbd
3
  size 5432