mohammadmahdinouri commited on
Commit
210cd0f
·
verified ·
1 Parent(s): eac8210

Training in progress, step 30000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:746096b767c4c47c0c49b66fcf9e67e43d00132964e1a14503d0dc54e61a88ce
3
  size 304481530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:900bfec8bbaaf11e60a02269eb248da56555b23515aaabb66939f602b61f46df
3
  size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ff72e1a59e706ba21c0c5fc5faf4ff560d04a9269b480a240031d2014cadf01
3
  size 402029570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad04fd573c8b774660467779874e5c0f0b9b095f3d589b4be06b47c308b832ba
3
  size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46ed9fc518619ac92c06b536cae3d8dd21e3799906ab806f17d4dd1aa6e8dd9d
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6f630813deaef9a1cc0ef7719a65b543a4823a01fe785bec01a6960bb31bfbb
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0bcf7c080583def4d92e63cc47df57eaf4cf519a6a214957e6214d525864a6a
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9528f5a47024c1363ae9c5890f76104c675c25e9bd6dd5b9caad572671227bb3
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbfa9291779333cc6de79bd13fa6c586039654ce156817d635f2b7564e084805
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61faea55b1645e9f10d0f2c6256d4246f7ad721e1bf026c127a0990af1d5a698
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2d36fdfb70bf9082281ebe37b706d22a6591594718aa46603291c3e49697116
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9360d3b90f33742b1ed5c074ec111cfe7589e324c03bf98699924f1d744a8fb4
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cd39fe9272798d41cdbf7f22a06af7a14c62772e1b67733185e58a79e1dfc7e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cd080e8e812ad045a5fa8d9a10ea8e545aad48f1dc638ca2ffca8a435029247
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.05654922951674783,
6
  "eval_steps": 500,
7
- "global_step": 29000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -10158,6 +10158,356 @@
10158
  "learning_rate": 0.0004907349310557152,
10159
  "loss": 18.2915,
10160
  "step": 29000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10161
  }
10162
  ],
10163
  "logging_steps": 20,
@@ -10177,7 +10527,7 @@
10177
  "attributes": {}
10178
  }
10179
  },
10180
- "total_flos": 2.1319717710265844e+19,
10181
  "train_batch_size": 48,
10182
  "trial_name": null,
10183
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.05849920294835983,
6
  "eval_steps": 500,
7
+ "global_step": 30000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
10158
  "learning_rate": 0.0004907349310557152,
10159
  "loss": 18.2915,
10160
  "step": 29000
10161
+ },
10162
+ {
10163
+ "epoch": 0.056588228985380074,
10164
+ "grad_norm": 7.875,
10165
+ "learning_rate": 0.0004907284290248059,
10166
+ "loss": 18.3218,
10167
+ "step": 29020
10168
+ },
10169
+ {
10170
+ "epoch": 0.056627228454012316,
10171
+ "grad_norm": 8.4375,
10172
+ "learning_rate": 0.0004907219269938965,
10173
+ "loss": 18.2289,
10174
+ "step": 29040
10175
+ },
10176
+ {
10177
+ "epoch": 0.05666622792264455,
10178
+ "grad_norm": 9.125,
10179
+ "learning_rate": 0.0004907154249629872,
10180
+ "loss": 18.2586,
10181
+ "step": 29060
10182
+ },
10183
+ {
10184
+ "epoch": 0.05670522739127679,
10185
+ "grad_norm": 8.25,
10186
+ "learning_rate": 0.0004907089229320778,
10187
+ "loss": 18.2226,
10188
+ "step": 29080
10189
+ },
10190
+ {
10191
+ "epoch": 0.056744226859909035,
10192
+ "grad_norm": 6.90625,
10193
+ "learning_rate": 0.0004907024209011685,
10194
+ "loss": 18.2936,
10195
+ "step": 29100
10196
+ },
10197
+ {
10198
+ "epoch": 0.05678322632854128,
10199
+ "grad_norm": 9.3125,
10200
+ "learning_rate": 0.0004906959188702592,
10201
+ "loss": 18.3193,
10202
+ "step": 29120
10203
+ },
10204
+ {
10205
+ "epoch": 0.05682222579717351,
10206
+ "grad_norm": 9.0,
10207
+ "learning_rate": 0.0004906894168393497,
10208
+ "loss": 18.2579,
10209
+ "step": 29140
10210
+ },
10211
+ {
10212
+ "epoch": 0.056861225265805754,
10213
+ "grad_norm": 8.6875,
10214
+ "learning_rate": 0.0004906829148084404,
10215
+ "loss": 18.3944,
10216
+ "step": 29160
10217
+ },
10218
+ {
10219
+ "epoch": 0.056900224734437996,
10220
+ "grad_norm": 8.25,
10221
+ "learning_rate": 0.000490676412777531,
10222
+ "loss": 18.2326,
10223
+ "step": 29180
10224
+ },
10225
+ {
10226
+ "epoch": 0.05693922420307023,
10227
+ "grad_norm": 8.1875,
10228
+ "learning_rate": 0.0004906699107466217,
10229
+ "loss": 18.2686,
10230
+ "step": 29200
10231
+ },
10232
+ {
10233
+ "epoch": 0.05697822367170247,
10234
+ "grad_norm": 7.96875,
10235
+ "learning_rate": 0.0004906634087157123,
10236
+ "loss": 18.2266,
10237
+ "step": 29220
10238
+ },
10239
+ {
10240
+ "epoch": 0.057017223140334715,
10241
+ "grad_norm": 7.90625,
10242
+ "learning_rate": 0.000490656906684803,
10243
+ "loss": 18.2404,
10244
+ "step": 29240
10245
+ },
10246
+ {
10247
+ "epoch": 0.05705622260896695,
10248
+ "grad_norm": 8.75,
10249
+ "learning_rate": 0.0004906504046538936,
10250
+ "loss": 18.2013,
10251
+ "step": 29260
10252
+ },
10253
+ {
10254
+ "epoch": 0.05709522207759919,
10255
+ "grad_norm": 7.375,
10256
+ "learning_rate": 0.0004906439026229843,
10257
+ "loss": 18.2322,
10258
+ "step": 29280
10259
+ },
10260
+ {
10261
+ "epoch": 0.057134221546231434,
10262
+ "grad_norm": 8.125,
10263
+ "learning_rate": 0.000490637400592075,
10264
+ "loss": 18.2848,
10265
+ "step": 29300
10266
+ },
10267
+ {
10268
+ "epoch": 0.05717322101486367,
10269
+ "grad_norm": 8.0,
10270
+ "learning_rate": 0.0004906308985611656,
10271
+ "loss": 18.2395,
10272
+ "step": 29320
10273
+ },
10274
+ {
10275
+ "epoch": 0.05721222048349591,
10276
+ "grad_norm": 8.875,
10277
+ "learning_rate": 0.0004906243965302563,
10278
+ "loss": 18.2619,
10279
+ "step": 29340
10280
+ },
10281
+ {
10282
+ "epoch": 0.05725121995212815,
10283
+ "grad_norm": 7.5,
10284
+ "learning_rate": 0.0004906178944993469,
10285
+ "loss": 18.2287,
10286
+ "step": 29360
10287
+ },
10288
+ {
10289
+ "epoch": 0.057290219420760395,
10290
+ "grad_norm": 8.125,
10291
+ "learning_rate": 0.0004906113924684375,
10292
+ "loss": 18.2756,
10293
+ "step": 29380
10294
+ },
10295
+ {
10296
+ "epoch": 0.05732921888939263,
10297
+ "grad_norm": 8.8125,
10298
+ "learning_rate": 0.0004906048904375281,
10299
+ "loss": 18.2243,
10300
+ "step": 29400
10301
+ },
10302
+ {
10303
+ "epoch": 0.05736821835802487,
10304
+ "grad_norm": 7.96875,
10305
+ "learning_rate": 0.0004905983884066188,
10306
+ "loss": 18.202,
10307
+ "step": 29420
10308
+ },
10309
+ {
10310
+ "epoch": 0.057407217826657114,
10311
+ "grad_norm": 8.0625,
10312
+ "learning_rate": 0.0004905918863757094,
10313
+ "loss": 18.2796,
10314
+ "step": 29440
10315
+ },
10316
+ {
10317
+ "epoch": 0.05744621729528935,
10318
+ "grad_norm": 8.0625,
10319
+ "learning_rate": 0.0004905853843448001,
10320
+ "loss": 18.2503,
10321
+ "step": 29460
10322
+ },
10323
+ {
10324
+ "epoch": 0.05748521676392159,
10325
+ "grad_norm": 9.25,
10326
+ "learning_rate": 0.0004905788823138908,
10327
+ "loss": 18.2193,
10328
+ "step": 29480
10329
+ },
10330
+ {
10331
+ "epoch": 0.05752421623255383,
10332
+ "grad_norm": 9.0,
10333
+ "learning_rate": 0.0004905723802829814,
10334
+ "loss": 18.2248,
10335
+ "step": 29500
10336
+ },
10337
+ {
10338
+ "epoch": 0.05756321570118607,
10339
+ "grad_norm": 8.25,
10340
+ "learning_rate": 0.0004905658782520721,
10341
+ "loss": 18.249,
10342
+ "step": 29520
10343
+ },
10344
+ {
10345
+ "epoch": 0.05760221516981831,
10346
+ "grad_norm": 8.0625,
10347
+ "learning_rate": 0.0004905593762211627,
10348
+ "loss": 18.261,
10349
+ "step": 29540
10350
+ },
10351
+ {
10352
+ "epoch": 0.05764121463845055,
10353
+ "grad_norm": 8.0625,
10354
+ "learning_rate": 0.0004905528741902534,
10355
+ "loss": 18.1378,
10356
+ "step": 29560
10357
+ },
10358
+ {
10359
+ "epoch": 0.05768021410708279,
10360
+ "grad_norm": 7.96875,
10361
+ "learning_rate": 0.000490546372159344,
10362
+ "loss": 18.2145,
10363
+ "step": 29580
10364
+ },
10365
+ {
10366
+ "epoch": 0.05771921357571503,
10367
+ "grad_norm": 9.125,
10368
+ "learning_rate": 0.0004905398701284347,
10369
+ "loss": 18.2221,
10370
+ "step": 29600
10371
+ },
10372
+ {
10373
+ "epoch": 0.05775821304434727,
10374
+ "grad_norm": 8.9375,
10375
+ "learning_rate": 0.0004905333680975253,
10376
+ "loss": 18.2222,
10377
+ "step": 29620
10378
+ },
10379
+ {
10380
+ "epoch": 0.057797212512979514,
10381
+ "grad_norm": 8.75,
10382
+ "learning_rate": 0.0004905268660666159,
10383
+ "loss": 18.1924,
10384
+ "step": 29640
10385
+ },
10386
+ {
10387
+ "epoch": 0.05783621198161175,
10388
+ "grad_norm": 8.25,
10389
+ "learning_rate": 0.0004905203640357066,
10390
+ "loss": 18.2808,
10391
+ "step": 29660
10392
+ },
10393
+ {
10394
+ "epoch": 0.05787521145024399,
10395
+ "grad_norm": 7.65625,
10396
+ "learning_rate": 0.0004905138620047972,
10397
+ "loss": 18.1663,
10398
+ "step": 29680
10399
+ },
10400
+ {
10401
+ "epoch": 0.05791421091887623,
10402
+ "grad_norm": 8.0625,
10403
+ "learning_rate": 0.0004905073599738879,
10404
+ "loss": 18.3324,
10405
+ "step": 29700
10406
+ },
10407
+ {
10408
+ "epoch": 0.05795321038750847,
10409
+ "grad_norm": 8.0,
10410
+ "learning_rate": 0.0004905008579429785,
10411
+ "loss": 18.1949,
10412
+ "step": 29720
10413
+ },
10414
+ {
10415
+ "epoch": 0.05799220985614071,
10416
+ "grad_norm": 8.4375,
10417
+ "learning_rate": 0.0004904943559120692,
10418
+ "loss": 18.2462,
10419
+ "step": 29740
10420
+ },
10421
+ {
10422
+ "epoch": 0.05803120932477295,
10423
+ "grad_norm": 9.1875,
10424
+ "learning_rate": 0.0004904878538811597,
10425
+ "loss": 18.2457,
10426
+ "step": 29760
10427
+ },
10428
+ {
10429
+ "epoch": 0.05807020879340519,
10430
+ "grad_norm": 9.3125,
10431
+ "learning_rate": 0.0004904813518502504,
10432
+ "loss": 18.2479,
10433
+ "step": 29780
10434
+ },
10435
+ {
10436
+ "epoch": 0.05810920826203743,
10437
+ "grad_norm": 7.75,
10438
+ "learning_rate": 0.0004904748498193411,
10439
+ "loss": 18.2079,
10440
+ "step": 29800
10441
+ },
10442
+ {
10443
+ "epoch": 0.05814820773066967,
10444
+ "grad_norm": 8.75,
10445
+ "learning_rate": 0.0004904683477884317,
10446
+ "loss": 18.1499,
10447
+ "step": 29820
10448
+ },
10449
+ {
10450
+ "epoch": 0.058187207199301906,
10451
+ "grad_norm": 7.15625,
10452
+ "learning_rate": 0.0004904618457575224,
10453
+ "loss": 18.1579,
10454
+ "step": 29840
10455
+ },
10456
+ {
10457
+ "epoch": 0.05822620666793415,
10458
+ "grad_norm": 8.125,
10459
+ "learning_rate": 0.000490455343726613,
10460
+ "loss": 18.2203,
10461
+ "step": 29860
10462
+ },
10463
+ {
10464
+ "epoch": 0.05826520613656639,
10465
+ "grad_norm": 8.25,
10466
+ "learning_rate": 0.0004904488416957037,
10467
+ "loss": 18.2209,
10468
+ "step": 29880
10469
+ },
10470
+ {
10471
+ "epoch": 0.05830420560519863,
10472
+ "grad_norm": 7.96875,
10473
+ "learning_rate": 0.0004904423396647943,
10474
+ "loss": 18.142,
10475
+ "step": 29900
10476
+ },
10477
+ {
10478
+ "epoch": 0.05834320507383087,
10479
+ "grad_norm": 7.5625,
10480
+ "learning_rate": 0.0004904358376338849,
10481
+ "loss": 18.1117,
10482
+ "step": 29920
10483
+ },
10484
+ {
10485
+ "epoch": 0.05838220454246311,
10486
+ "grad_norm": 7.9375,
10487
+ "learning_rate": 0.0004904293356029755,
10488
+ "loss": 18.119,
10489
+ "step": 29940
10490
+ },
10491
+ {
10492
+ "epoch": 0.05842120401109535,
10493
+ "grad_norm": 9.0625,
10494
+ "learning_rate": 0.0004904228335720662,
10495
+ "loss": 18.1707,
10496
+ "step": 29960
10497
+ },
10498
+ {
10499
+ "epoch": 0.058460203479727586,
10500
+ "grad_norm": 8.375,
10501
+ "learning_rate": 0.0004904163315411569,
10502
+ "loss": 18.1383,
10503
+ "step": 29980
10504
+ },
10505
+ {
10506
+ "epoch": 0.05849920294835983,
10507
+ "grad_norm": 8.875,
10508
+ "learning_rate": 0.0004904098295102475,
10509
+ "loss": 18.1583,
10510
+ "step": 30000
10511
  }
10512
  ],
10513
  "logging_steps": 20,
 
10527
  "attributes": {}
10528
  }
10529
  },
10530
+ "total_flos": 2.2054844912870883e+19,
10531
  "train_batch_size": 48,
10532
  "trial_name": null,
10533
  "trial_params": null