mohammadmahdinouri commited on
Commit
05602c3
·
verified ·
1 Parent(s): 9766389

Training in progress, step 40500, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbb933599d03723090d50f6ffb70296e6eebfa61eacef48a3a190ab7f15c58c2
3
  size 448472762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d03259d2e256ae1843dd9d93c96f13ae033fa50104b7b3335fc98eda0124f7a5
3
  size 448472762
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:903023559671a4d82750f12e5ca9d151bdd8d505a4fce885d24d671e2610e186
3
  size 151589028
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15854e4e6f02157f1526c4c8d47e876fa37ca90091d0530a30a41ceb5b08bcec
3
  size 151589028
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81957fa12fa0c422c90aa82913486d7b1e310985e3f015d6405f86eae7e8caf3
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30b82ff92adb88680c27be8e5a2b5c9da63ae08090ef3a7c14508b2164f79186
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da890422df85affed3b9f319afd342babff58a471fda9ec061f701ac1b55f1c7
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf6d32c22f27c022798bfaaaca4ba2cb9286d958e4d3a8bc21674e25ce2e9897
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8851c06e96c8092fbedd67a7750090010c0af343cfa97642b1e2f88d3465e85d
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bad3a95014e8c014b1b0e1d2c7e862e00a64dd0395000ae8357a96077edd14c5
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96398a4c09a68bacf3ea4300ac6d599a0630a0edcecaa220a7a8972091f45d18
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce9750971f49c876ed40cfbeda660585c3f668d39961f483bf1708ae57b0f2eb
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1aa1b92b39f2173cd0223cc365bd686d25497911f61b1c0837c564d84b38f68
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b16d6b3e5a3f4efdb62ade99a44d77fcf809fff9d006debbc9f917125a34ca7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.07799893726447978,
6
  "eval_steps": 500,
7
- "global_step": 40000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -28008,6 +28008,356 @@
28008
  "learning_rate": 0.00048715881405557025,
28009
  "loss": 2.0898,
28010
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28011
  }
28012
  ],
28013
  "logging_steps": 10,
@@ -28027,7 +28377,7 @@
28027
  "attributes": {}
28028
  }
28029
  },
28030
- "total_flos": 1.3080923317873934e+19,
28031
  "train_batch_size": 48,
28032
  "trial_name": null,
28033
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.07897392398028577,
6
  "eval_steps": 500,
7
+ "global_step": 40500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
28008
  "learning_rate": 0.00048715881405557025,
28009
  "loss": 2.0898,
28010
  "step": 40000
28011
+ },
28012
+ {
28013
+ "epoch": 0.07801843699879589,
28014
+ "grad_norm": 0.37890625,
28015
+ "learning_rate": 0.0004871555630401156,
28016
+ "loss": 2.0975,
28017
+ "step": 40010
28018
+ },
28019
+ {
28020
+ "epoch": 0.07803793673311202,
28021
+ "grad_norm": 0.416015625,
28022
+ "learning_rate": 0.0004871523120246609,
28023
+ "loss": 2.0899,
28024
+ "step": 40020
28025
+ },
28026
+ {
28027
+ "epoch": 0.07805743646742813,
28028
+ "grad_norm": 0.4453125,
28029
+ "learning_rate": 0.00048714906100920624,
28030
+ "loss": 2.1008,
28031
+ "step": 40030
28032
+ },
28033
+ {
28034
+ "epoch": 0.07807693620174425,
28035
+ "grad_norm": 0.490234375,
28036
+ "learning_rate": 0.00048714580999375157,
28037
+ "loss": 2.0912,
28038
+ "step": 40040
28039
+ },
28040
+ {
28041
+ "epoch": 0.07809643593606037,
28042
+ "grad_norm": 0.39453125,
28043
+ "learning_rate": 0.0004871425589782969,
28044
+ "loss": 2.0913,
28045
+ "step": 40050
28046
+ },
28047
+ {
28048
+ "epoch": 0.07811593567037649,
28049
+ "grad_norm": 0.396484375,
28050
+ "learning_rate": 0.0004871393079628422,
28051
+ "loss": 2.0787,
28052
+ "step": 40060
28053
+ },
28054
+ {
28055
+ "epoch": 0.07813543540469262,
28056
+ "grad_norm": 0.369140625,
28057
+ "learning_rate": 0.00048713605694738755,
28058
+ "loss": 2.1002,
28059
+ "step": 40070
28060
+ },
28061
+ {
28062
+ "epoch": 0.07815493513900873,
28063
+ "grad_norm": 0.396484375,
28064
+ "learning_rate": 0.0004871328059319329,
28065
+ "loss": 2.1055,
28066
+ "step": 40080
28067
+ },
28068
+ {
28069
+ "epoch": 0.07817443487332486,
28070
+ "grad_norm": 0.392578125,
28071
+ "learning_rate": 0.00048712955491647816,
28072
+ "loss": 2.0943,
28073
+ "step": 40090
28074
+ },
28075
+ {
28076
+ "epoch": 0.07819393460764097,
28077
+ "grad_norm": 0.412109375,
28078
+ "learning_rate": 0.0004871263039010235,
28079
+ "loss": 2.096,
28080
+ "step": 40100
28081
+ },
28082
+ {
28083
+ "epoch": 0.07821343434195709,
28084
+ "grad_norm": 0.462890625,
28085
+ "learning_rate": 0.0004871230528855688,
28086
+ "loss": 2.0891,
28087
+ "step": 40110
28088
+ },
28089
+ {
28090
+ "epoch": 0.07823293407627321,
28091
+ "grad_norm": 0.3828125,
28092
+ "learning_rate": 0.00048711980187011414,
28093
+ "loss": 2.0926,
28094
+ "step": 40120
28095
+ },
28096
+ {
28097
+ "epoch": 0.07825243381058933,
28098
+ "grad_norm": 0.45703125,
28099
+ "learning_rate": 0.00048711655085465947,
28100
+ "loss": 2.0898,
28101
+ "step": 40130
28102
+ },
28103
+ {
28104
+ "epoch": 0.07827193354490546,
28105
+ "grad_norm": 0.37109375,
28106
+ "learning_rate": 0.00048711329983920474,
28107
+ "loss": 2.0886,
28108
+ "step": 40140
28109
+ },
28110
+ {
28111
+ "epoch": 0.07829143327922157,
28112
+ "grad_norm": 0.373046875,
28113
+ "learning_rate": 0.00048711004882375007,
28114
+ "loss": 2.0855,
28115
+ "step": 40150
28116
+ },
28117
+ {
28118
+ "epoch": 0.07831093301353768,
28119
+ "grad_norm": 0.55859375,
28120
+ "learning_rate": 0.0004871067978082954,
28121
+ "loss": 2.1028,
28122
+ "step": 40160
28123
+ },
28124
+ {
28125
+ "epoch": 0.07833043274785381,
28126
+ "grad_norm": 0.42578125,
28127
+ "learning_rate": 0.00048710354679284073,
28128
+ "loss": 2.0743,
28129
+ "step": 40170
28130
+ },
28131
+ {
28132
+ "epoch": 0.07834993248216993,
28133
+ "grad_norm": 0.53125,
28134
+ "learning_rate": 0.00048710029577738606,
28135
+ "loss": 2.0989,
28136
+ "step": 40180
28137
+ },
28138
+ {
28139
+ "epoch": 0.07836943221648605,
28140
+ "grad_norm": 0.39453125,
28141
+ "learning_rate": 0.0004870970447619314,
28142
+ "loss": 2.0705,
28143
+ "step": 40190
28144
+ },
28145
+ {
28146
+ "epoch": 0.07838893195080217,
28147
+ "grad_norm": 0.3515625,
28148
+ "learning_rate": 0.0004870937937464767,
28149
+ "loss": 2.0708,
28150
+ "step": 40200
28151
+ },
28152
+ {
28153
+ "epoch": 0.0784084316851183,
28154
+ "grad_norm": 0.48828125,
28155
+ "learning_rate": 0.00048709054273102204,
28156
+ "loss": 2.0809,
28157
+ "step": 40210
28158
+ },
28159
+ {
28160
+ "epoch": 0.07842793141943441,
28161
+ "grad_norm": 0.43359375,
28162
+ "learning_rate": 0.00048708729171556737,
28163
+ "loss": 2.0944,
28164
+ "step": 40220
28165
+ },
28166
+ {
28167
+ "epoch": 0.07844743115375052,
28168
+ "grad_norm": 0.349609375,
28169
+ "learning_rate": 0.0004870840407001127,
28170
+ "loss": 2.0886,
28171
+ "step": 40230
28172
+ },
28173
+ {
28174
+ "epoch": 0.07846693088806665,
28175
+ "grad_norm": 0.71875,
28176
+ "learning_rate": 0.00048708078968465803,
28177
+ "loss": 2.0778,
28178
+ "step": 40240
28179
+ },
28180
+ {
28181
+ "epoch": 0.07848643062238277,
28182
+ "grad_norm": 0.39453125,
28183
+ "learning_rate": 0.00048707753866920336,
28184
+ "loss": 2.0816,
28185
+ "step": 40250
28186
+ },
28187
+ {
28188
+ "epoch": 0.0785059303566989,
28189
+ "grad_norm": 0.38671875,
28190
+ "learning_rate": 0.0004870742876537487,
28191
+ "loss": 2.095,
28192
+ "step": 40260
28193
+ },
28194
+ {
28195
+ "epoch": 0.07852543009101501,
28196
+ "grad_norm": 0.357421875,
28197
+ "learning_rate": 0.000487071036638294,
28198
+ "loss": 2.0847,
28199
+ "step": 40270
28200
+ },
28201
+ {
28202
+ "epoch": 0.07854492982533114,
28203
+ "grad_norm": 0.3984375,
28204
+ "learning_rate": 0.00048706778562283934,
28205
+ "loss": 2.095,
28206
+ "step": 40280
28207
+ },
28208
+ {
28209
+ "epoch": 0.07856442955964725,
28210
+ "grad_norm": 0.408203125,
28211
+ "learning_rate": 0.00048706453460738467,
28212
+ "loss": 2.0868,
28213
+ "step": 40290
28214
+ },
28215
+ {
28216
+ "epoch": 0.07858392929396336,
28217
+ "grad_norm": 0.375,
28218
+ "learning_rate": 0.00048706128359192995,
28219
+ "loss": 2.0935,
28220
+ "step": 40300
28221
+ },
28222
+ {
28223
+ "epoch": 0.07860342902827949,
28224
+ "grad_norm": 0.43359375,
28225
+ "learning_rate": 0.0004870580325764753,
28226
+ "loss": 2.0762,
28227
+ "step": 40310
28228
+ },
28229
+ {
28230
+ "epoch": 0.0786229287625956,
28231
+ "grad_norm": 0.384765625,
28232
+ "learning_rate": 0.0004870547815610206,
28233
+ "loss": 2.0939,
28234
+ "step": 40320
28235
+ },
28236
+ {
28237
+ "epoch": 0.07864242849691173,
28238
+ "grad_norm": 0.431640625,
28239
+ "learning_rate": 0.0004870515305455659,
28240
+ "loss": 2.091,
28241
+ "step": 40330
28242
+ },
28243
+ {
28244
+ "epoch": 0.07866192823122785,
28245
+ "grad_norm": 0.44921875,
28246
+ "learning_rate": 0.0004870482795301112,
28247
+ "loss": 2.1065,
28248
+ "step": 40340
28249
+ },
28250
+ {
28251
+ "epoch": 0.07868142796554398,
28252
+ "grad_norm": 0.42578125,
28253
+ "learning_rate": 0.00048704502851465653,
28254
+ "loss": 2.0833,
28255
+ "step": 40350
28256
+ },
28257
+ {
28258
+ "epoch": 0.07870092769986009,
28259
+ "grad_norm": 0.390625,
28260
+ "learning_rate": 0.00048704177749920186,
28261
+ "loss": 2.0815,
28262
+ "step": 40360
28263
+ },
28264
+ {
28265
+ "epoch": 0.0787204274341762,
28266
+ "grad_norm": 0.431640625,
28267
+ "learning_rate": 0.0004870385264837472,
28268
+ "loss": 2.0821,
28269
+ "step": 40370
28270
+ },
28271
+ {
28272
+ "epoch": 0.07873992716849233,
28273
+ "grad_norm": 0.416015625,
28274
+ "learning_rate": 0.0004870352754682925,
28275
+ "loss": 2.0873,
28276
+ "step": 40380
28277
+ },
28278
+ {
28279
+ "epoch": 0.07875942690280845,
28280
+ "grad_norm": 0.404296875,
28281
+ "learning_rate": 0.00048703202445283785,
28282
+ "loss": 2.0744,
28283
+ "step": 40390
28284
+ },
28285
+ {
28286
+ "epoch": 0.07877892663712457,
28287
+ "grad_norm": 0.392578125,
28288
+ "learning_rate": 0.0004870287734373832,
28289
+ "loss": 2.0897,
28290
+ "step": 40400
28291
+ },
28292
+ {
28293
+ "epoch": 0.07879842637144069,
28294
+ "grad_norm": 0.46484375,
28295
+ "learning_rate": 0.0004870255224219285,
28296
+ "loss": 2.0874,
28297
+ "step": 40410
28298
+ },
28299
+ {
28300
+ "epoch": 0.0788179261057568,
28301
+ "grad_norm": 0.435546875,
28302
+ "learning_rate": 0.00048702227140647383,
28303
+ "loss": 2.0957,
28304
+ "step": 40420
28305
+ },
28306
+ {
28307
+ "epoch": 0.07883742584007293,
28308
+ "grad_norm": 0.419921875,
28309
+ "learning_rate": 0.00048701902039101916,
28310
+ "loss": 2.0726,
28311
+ "step": 40430
28312
+ },
28313
+ {
28314
+ "epoch": 0.07885692557438904,
28315
+ "grad_norm": 0.4140625,
28316
+ "learning_rate": 0.0004870157693755645,
28317
+ "loss": 2.0802,
28318
+ "step": 40440
28319
+ },
28320
+ {
28321
+ "epoch": 0.07887642530870517,
28322
+ "grad_norm": 0.53515625,
28323
+ "learning_rate": 0.00048701251836010976,
28324
+ "loss": 2.0906,
28325
+ "step": 40450
28326
+ },
28327
+ {
28328
+ "epoch": 0.07889592504302129,
28329
+ "grad_norm": 0.384765625,
28330
+ "learning_rate": 0.0004870092673446551,
28331
+ "loss": 2.0789,
28332
+ "step": 40460
28333
+ },
28334
+ {
28335
+ "epoch": 0.07891542477733741,
28336
+ "grad_norm": 0.421875,
28337
+ "learning_rate": 0.0004870060163292004,
28338
+ "loss": 2.0869,
28339
+ "step": 40470
28340
+ },
28341
+ {
28342
+ "epoch": 0.07893492451165353,
28343
+ "grad_norm": 0.396484375,
28344
+ "learning_rate": 0.00048700276531374575,
28345
+ "loss": 2.0973,
28346
+ "step": 40480
28347
+ },
28348
+ {
28349
+ "epoch": 0.07895442424596964,
28350
+ "grad_norm": 0.39453125,
28351
+ "learning_rate": 0.0004869995142982911,
28352
+ "loss": 2.0851,
28353
+ "step": 40490
28354
+ },
28355
+ {
28356
+ "epoch": 0.07897392398028577,
28357
+ "grad_norm": 0.40234375,
28358
+ "learning_rate": 0.0004869962632828364,
28359
+ "loss": 2.0964,
28360
+ "step": 40500
28361
  }
28362
  ],
28363
  "logging_steps": 10,
 
28377
  "attributes": {}
28378
  }
28379
  },
28380
+ "total_flos": 1.3244470339766845e+19,
28381
  "train_batch_size": 48,
28382
  "trial_name": null,
28383
  "trial_params": null