mohammadmahdinouri commited on
Commit
a2e61db
·
verified ·
1 Parent(s): 3109545

Training in progress, step 75000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c14f92422cc30c9605f95654d62c250bad463581bd3da10bb7b17093206005e
3
  size 304481530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a67b2c60c5b42d0ad22d6b38771528b94fc53ceec628d0597d6fa521952a684c
3
  size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdbe93c9686a0a02ecdcba702915ad1389c2bb261f4103c48b737864febba412
3
  size 402029570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6916c88bb66f81e6f1308f6aadeffdb932cc73012f17c967d2f81582f0d6ec4
3
  size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf863b0b895309e73d9088642dd8d00845be8fee481352073f05fd0bd67029a2
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1caa66015d3956d30ec507257de058a8c2fd4bde8e3572a38d393062e23e25fa
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f0942e1e9569ddb210dcd2d42bc92e339bbd2239990fd3cc546265bee775d39
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43374ebce165dffb63c7f0a02b8a1fb69d9d2182c0805086854a706ff35de8db
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2a7d2488bf1d4b76628b506fc6b6fb862cbf4396985e4c9e2f16e4262ba5085
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23f5704701def73bff9de54ed2bc9c44e464b4fd7bf79cf9e15b571b97700de5
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:115e6df582159f803bd87cdfeee2a6c991779cf09357b4ef2537b502b04c878f
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2ec98dcfff897ba38371ec424fd9cb0533d296496a8ad5f5af6ba3e2b631320
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9391a0b437930e5697a6d0905f7bf157b3a70a9ca0d6fddfd220757077049906
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83d418122fbb0fa369cfecb2f66848d24fc6c35ef433b91965b2ecce9163409e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.1096172875350331,
6
  "eval_steps": 500,
7
- "global_step": 74000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -25908,6 +25908,356 @@
25908
  "learning_rate": 0.0004818496607939437,
25909
  "loss": 16.3994,
25910
  "step": 74000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25911
  }
25912
  ],
25913
  "logging_steps": 20,
@@ -25927,7 +26277,7 @@
25927
  "attributes": {}
25928
  }
25929
  },
25930
- "total_flos": 5.440780396085746e+19,
25931
  "train_batch_size": 48,
25932
  "trial_name": null,
25933
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.11109860223145246,
6
  "eval_steps": 500,
7
+ "global_step": 75000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
25908
  "learning_rate": 0.0004818496607939437,
25909
  "loss": 16.3994,
25910
  "step": 74000
25911
+ },
25912
+ {
25913
+ "epoch": 0.10964691382896148,
25914
+ "grad_norm": 6.34375,
25915
+ "learning_rate": 0.0004818447218589361,
25916
+ "loss": 16.3548,
25917
+ "step": 74020
25918
+ },
25919
+ {
25920
+ "epoch": 0.10967654012288987,
25921
+ "grad_norm": 6.84375,
25922
+ "learning_rate": 0.00048183978292392856,
25923
+ "loss": 16.4066,
25924
+ "step": 74040
25925
+ },
25926
+ {
25927
+ "epoch": 0.10970616641681825,
25928
+ "grad_norm": 7.34375,
25929
+ "learning_rate": 0.00048183484398892096,
25930
+ "loss": 16.4287,
25931
+ "step": 74060
25932
+ },
25933
+ {
25934
+ "epoch": 0.10973579271074664,
25935
+ "grad_norm": 6.34375,
25936
+ "learning_rate": 0.00048182990505391346,
25937
+ "loss": 16.4596,
25938
+ "step": 74080
25939
+ },
25940
+ {
25941
+ "epoch": 0.10976541900467503,
25942
+ "grad_norm": 7.625,
25943
+ "learning_rate": 0.00048182496611890585,
25944
+ "loss": 16.4221,
25945
+ "step": 74100
25946
+ },
25947
+ {
25948
+ "epoch": 0.10979504529860341,
25949
+ "grad_norm": 6.9375,
25950
+ "learning_rate": 0.0004818200271838983,
25951
+ "loss": 16.4392,
25952
+ "step": 74120
25953
+ },
25954
+ {
25955
+ "epoch": 0.1098246715925318,
25956
+ "grad_norm": 6.5625,
25957
+ "learning_rate": 0.0004818150882488907,
25958
+ "loss": 16.4181,
25959
+ "step": 74140
25960
+ },
25961
+ {
25962
+ "epoch": 0.10985429788646019,
25963
+ "grad_norm": 7.40625,
25964
+ "learning_rate": 0.0004818101493138832,
25965
+ "loss": 16.4593,
25966
+ "step": 74160
25967
+ },
25968
+ {
25969
+ "epoch": 0.10988392418038857,
25970
+ "grad_norm": 6.8125,
25971
+ "learning_rate": 0.0004818052103788756,
25972
+ "loss": 16.4166,
25973
+ "step": 74180
25974
+ },
25975
+ {
25976
+ "epoch": 0.10991355047431696,
25977
+ "grad_norm": 6.90625,
25978
+ "learning_rate": 0.00048180027144386804,
25979
+ "loss": 16.422,
25980
+ "step": 74200
25981
+ },
25982
+ {
25983
+ "epoch": 0.10994317676824536,
25984
+ "grad_norm": 6.84375,
25985
+ "learning_rate": 0.0004817953325088605,
25986
+ "loss": 16.3946,
25987
+ "step": 74220
25988
+ },
25989
+ {
25990
+ "epoch": 0.10997280306217375,
25991
+ "grad_norm": 6.5,
25992
+ "learning_rate": 0.0004817903935738529,
25993
+ "loss": 16.4022,
25994
+ "step": 74240
25995
+ },
25996
+ {
25997
+ "epoch": 0.11000242935610213,
25998
+ "grad_norm": 7.25,
25999
+ "learning_rate": 0.0004817854546388453,
26000
+ "loss": 16.4411,
26001
+ "step": 74260
26002
+ },
26003
+ {
26004
+ "epoch": 0.11003205565003052,
26005
+ "grad_norm": 7.28125,
26006
+ "learning_rate": 0.0004817805157038377,
26007
+ "loss": 16.3874,
26008
+ "step": 74280
26009
+ },
26010
+ {
26011
+ "epoch": 0.1100616819439589,
26012
+ "grad_norm": 6.40625,
26013
+ "learning_rate": 0.0004817755767688302,
26014
+ "loss": 16.4294,
26015
+ "step": 74300
26016
+ },
26017
+ {
26018
+ "epoch": 0.11009130823788729,
26019
+ "grad_norm": 6.8125,
26020
+ "learning_rate": 0.0004817706378338226,
26021
+ "loss": 16.4261,
26022
+ "step": 74320
26023
+ },
26024
+ {
26025
+ "epoch": 0.11012093453181568,
26026
+ "grad_norm": 7.34375,
26027
+ "learning_rate": 0.00048176569889881506,
26028
+ "loss": 16.4238,
26029
+ "step": 74340
26030
+ },
26031
+ {
26032
+ "epoch": 0.11015056082574406,
26033
+ "grad_norm": 7.125,
26034
+ "learning_rate": 0.00048176075996380746,
26035
+ "loss": 16.3817,
26036
+ "step": 74360
26037
+ },
26038
+ {
26039
+ "epoch": 0.11018018711967245,
26040
+ "grad_norm": 6.65625,
26041
+ "learning_rate": 0.00048175582102879996,
26042
+ "loss": 16.3883,
26043
+ "step": 74380
26044
+ },
26045
+ {
26046
+ "epoch": 0.11020981341360084,
26047
+ "grad_norm": 6.71875,
26048
+ "learning_rate": 0.00048175088209379235,
26049
+ "loss": 16.4262,
26050
+ "step": 74400
26051
+ },
26052
+ {
26053
+ "epoch": 0.11023943970752922,
26054
+ "grad_norm": 7.0,
26055
+ "learning_rate": 0.0004817459431587848,
26056
+ "loss": 16.4212,
26057
+ "step": 74420
26058
+ },
26059
+ {
26060
+ "epoch": 0.11026906600145761,
26061
+ "grad_norm": 6.90625,
26062
+ "learning_rate": 0.0004817410042237772,
26063
+ "loss": 16.4718,
26064
+ "step": 74440
26065
+ },
26066
+ {
26067
+ "epoch": 0.110298692295386,
26068
+ "grad_norm": 7.8125,
26069
+ "learning_rate": 0.0004817360652887697,
26070
+ "loss": 16.4202,
26071
+ "step": 74460
26072
+ },
26073
+ {
26074
+ "epoch": 0.11032831858931438,
26075
+ "grad_norm": 7.28125,
26076
+ "learning_rate": 0.0004817311263537621,
26077
+ "loss": 16.3904,
26078
+ "step": 74480
26079
+ },
26080
+ {
26081
+ "epoch": 0.11035794488324277,
26082
+ "grad_norm": 6.75,
26083
+ "learning_rate": 0.00048172618741875454,
26084
+ "loss": 16.4918,
26085
+ "step": 74500
26086
+ },
26087
+ {
26088
+ "epoch": 0.11038757117717116,
26089
+ "grad_norm": 7.09375,
26090
+ "learning_rate": 0.000481721248483747,
26091
+ "loss": 16.3993,
26092
+ "step": 74520
26093
+ },
26094
+ {
26095
+ "epoch": 0.11041719747109956,
26096
+ "grad_norm": 6.5,
26097
+ "learning_rate": 0.00048171630954873943,
26098
+ "loss": 16.3485,
26099
+ "step": 74540
26100
+ },
26101
+ {
26102
+ "epoch": 0.11044682376502794,
26103
+ "grad_norm": 6.59375,
26104
+ "learning_rate": 0.0004817113706137318,
26105
+ "loss": 16.3389,
26106
+ "step": 74560
26107
+ },
26108
+ {
26109
+ "epoch": 0.11047645005895633,
26110
+ "grad_norm": 6.9375,
26111
+ "learning_rate": 0.0004817064316787242,
26112
+ "loss": 16.3738,
26113
+ "step": 74580
26114
+ },
26115
+ {
26116
+ "epoch": 0.11050607635288472,
26117
+ "grad_norm": 7.5,
26118
+ "learning_rate": 0.0004817014927437167,
26119
+ "loss": 16.3881,
26120
+ "step": 74600
26121
+ },
26122
+ {
26123
+ "epoch": 0.1105357026468131,
26124
+ "grad_norm": 6.1875,
26125
+ "learning_rate": 0.0004816965538087091,
26126
+ "loss": 16.3802,
26127
+ "step": 74620
26128
+ },
26129
+ {
26130
+ "epoch": 0.11056532894074149,
26131
+ "grad_norm": 7.375,
26132
+ "learning_rate": 0.00048169161487370156,
26133
+ "loss": 16.4216,
26134
+ "step": 74640
26135
+ },
26136
+ {
26137
+ "epoch": 0.11059495523466988,
26138
+ "grad_norm": 7.1875,
26139
+ "learning_rate": 0.00048168667593869396,
26140
+ "loss": 16.4166,
26141
+ "step": 74660
26142
+ },
26143
+ {
26144
+ "epoch": 0.11062458152859826,
26145
+ "grad_norm": 7.21875,
26146
+ "learning_rate": 0.00048168173700368646,
26147
+ "loss": 16.358,
26148
+ "step": 74680
26149
+ },
26150
+ {
26151
+ "epoch": 0.11065420782252665,
26152
+ "grad_norm": 7.59375,
26153
+ "learning_rate": 0.00048167679806867885,
26154
+ "loss": 16.4844,
26155
+ "step": 74700
26156
+ },
26157
+ {
26158
+ "epoch": 0.11068383411645503,
26159
+ "grad_norm": 7.59375,
26160
+ "learning_rate": 0.0004816718591336713,
26161
+ "loss": 16.4061,
26162
+ "step": 74720
26163
+ },
26164
+ {
26165
+ "epoch": 0.11071346041038342,
26166
+ "grad_norm": 7.09375,
26167
+ "learning_rate": 0.0004816669201986637,
26168
+ "loss": 16.4073,
26169
+ "step": 74740
26170
+ },
26171
+ {
26172
+ "epoch": 0.11074308670431181,
26173
+ "grad_norm": 6.28125,
26174
+ "learning_rate": 0.0004816619812636562,
26175
+ "loss": 16.3988,
26176
+ "step": 74760
26177
+ },
26178
+ {
26179
+ "epoch": 0.1107727129982402,
26180
+ "grad_norm": 5.96875,
26181
+ "learning_rate": 0.0004816570423286486,
26182
+ "loss": 16.4417,
26183
+ "step": 74780
26184
+ },
26185
+ {
26186
+ "epoch": 0.11080233929216858,
26187
+ "grad_norm": 7.15625,
26188
+ "learning_rate": 0.00048165210339364104,
26189
+ "loss": 16.3517,
26190
+ "step": 74800
26191
+ },
26192
+ {
26193
+ "epoch": 0.11083196558609697,
26194
+ "grad_norm": 6.40625,
26195
+ "learning_rate": 0.0004816471644586335,
26196
+ "loss": 16.3409,
26197
+ "step": 74820
26198
+ },
26199
+ {
26200
+ "epoch": 0.11086159188002535,
26201
+ "grad_norm": 6.65625,
26202
+ "learning_rate": 0.00048164222552362593,
26203
+ "loss": 16.3664,
26204
+ "step": 74840
26205
+ },
26206
+ {
26207
+ "epoch": 0.11089121817395375,
26208
+ "grad_norm": 6.75,
26209
+ "learning_rate": 0.0004816372865886183,
26210
+ "loss": 16.4146,
26211
+ "step": 74860
26212
+ },
26213
+ {
26214
+ "epoch": 0.11092084446788214,
26215
+ "grad_norm": 6.90625,
26216
+ "learning_rate": 0.0004816323476536108,
26217
+ "loss": 16.3548,
26218
+ "step": 74880
26219
+ },
26220
+ {
26221
+ "epoch": 0.11095047076181053,
26222
+ "grad_norm": 6.8125,
26223
+ "learning_rate": 0.0004816274087186032,
26224
+ "loss": 16.4546,
26225
+ "step": 74900
26226
+ },
26227
+ {
26228
+ "epoch": 0.11098009705573891,
26229
+ "grad_norm": 6.59375,
26230
+ "learning_rate": 0.0004816224697835956,
26231
+ "loss": 16.3883,
26232
+ "step": 74920
26233
+ },
26234
+ {
26235
+ "epoch": 0.1110097233496673,
26236
+ "grad_norm": 7.0,
26237
+ "learning_rate": 0.00048161753084858806,
26238
+ "loss": 16.4069,
26239
+ "step": 74940
26240
+ },
26241
+ {
26242
+ "epoch": 0.11103934964359569,
26243
+ "grad_norm": 7.15625,
26244
+ "learning_rate": 0.00048161259191358046,
26245
+ "loss": 16.3556,
26246
+ "step": 74960
26247
+ },
26248
+ {
26249
+ "epoch": 0.11106897593752407,
26250
+ "grad_norm": 6.65625,
26251
+ "learning_rate": 0.00048160765297857296,
26252
+ "loss": 16.4227,
26253
+ "step": 74980
26254
+ },
26255
+ {
26256
+ "epoch": 0.11109860223145246,
26257
+ "grad_norm": 6.90625,
26258
+ "learning_rate": 0.00048160271404356535,
26259
+ "loss": 16.4454,
26260
+ "step": 75000
26261
  }
26262
  ],
26263
  "logging_steps": 20,
 
26277
  "attributes": {}
26278
  }
26279
  },
26280
+ "total_flos": 5.514317317520595e+19,
26281
  "train_batch_size": 48,
26282
  "trial_name": null,
26283
  "trial_params": null