mohammadmahdinouri commited on
Commit
2616bbb
·
verified ·
1 Parent(s): b251457

Training in progress, step 16500, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:281d4ef0a8e7b7872e39b497a3a30d1eab74ceb3607386521e444e38cb3a1999
3
  size 487156538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:289545fc1552428b0e12aeeecd55b134e9a52241f44036de8d8f204e35e20afb
3
  size 487156538
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08bfdaa6e85ad39297909804012faf4615d0b2ab8a37ba9d564ba4b20c5afe3b
3
  size 1059459406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99059f4bf0ce62b572ac5e7aed5f529d09c121705516bcb6c43b763dcdea026d
3
  size 1059459406
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:914d2ab3f587e800d17f7d196d6f0092d0a87493b41d0a671e21b5adb1f2d2a0
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82d0cb560e8cc88d37b6fdb38283c527bf386371741e3fb12423b76b412c4d30
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:344eca6c5ec13dc95525cab835b1c86942a9064531b87da2a47c4c1d44c791cc
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:139282925be08220983826aa431aa288b1fc5afb82a768d6f91bc4f11be56858
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfc05f18aa518e0b225107b64c8d08b389e8623e906d941f8628b36cccf0462d
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:495800332f29bfc930f2d135466505a7115bb60583302cd925a56ab992bb542c
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4c006f6e9030f012e3a14c721b6fec459b16163302ae533134c5c4431765d62
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8384800dd0611fe7ff52172a3c0ca3c74f64cf34436f28d688acecfccf0334f6
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccc2a52ae0327def30cc40f7f273a4a1537961b9b580753fe57ec7ecdab69b35
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1fa44d83f1ea27212c9079f128b8147324741571d792587433ce7cd41805e05
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.031199574905791908,
6
  "eval_steps": 500,
7
- "global_step": 16000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -11208,6 +11208,356 @@
11208
  "learning_rate": 0.0004949612511467957,
11209
  "loss": 16.847,
11210
  "step": 16000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11211
  }
11212
  ],
11213
  "logging_steps": 10,
@@ -11227,7 +11577,7 @@
11227
  "attributes": {}
11228
  }
11229
  },
11230
- "total_flos": 3.5599099377236312e+19,
11231
  "train_batch_size": 48,
11232
  "trial_name": null,
11233
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0321745616215979,
6
  "eval_steps": 500,
7
+ "global_step": 16500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
11208
  "learning_rate": 0.0004949612511467957,
11209
  "loss": 16.847,
11210
  "step": 16000
11211
+ },
11212
+ {
11213
+ "epoch": 0.03121907464010803,
11214
+ "grad_norm": 7.65625,
11215
+ "learning_rate": 0.000494958000131341,
11216
+ "loss": 17.0239,
11217
+ "step": 16010
11218
+ },
11219
+ {
11220
+ "epoch": 0.03123857437442415,
11221
+ "grad_norm": 10.125,
11222
+ "learning_rate": 0.0004949547491158863,
11223
+ "loss": 16.8909,
11224
+ "step": 16020
11225
+ },
11226
+ {
11227
+ "epoch": 0.03125807410874027,
11228
+ "grad_norm": 10.75,
11229
+ "learning_rate": 0.0004949514981004317,
11230
+ "loss": 16.8678,
11231
+ "step": 16030
11232
+ },
11233
+ {
11234
+ "epoch": 0.03127757384305639,
11235
+ "grad_norm": 6.4375,
11236
+ "learning_rate": 0.000494948247084977,
11237
+ "loss": 16.7921,
11238
+ "step": 16040
11239
+ },
11240
+ {
11241
+ "epoch": 0.031297073577372506,
11242
+ "grad_norm": 6.875,
11243
+ "learning_rate": 0.0004949449960695223,
11244
+ "loss": 16.8111,
11245
+ "step": 16050
11246
+ },
11247
+ {
11248
+ "epoch": 0.03131657331168863,
11249
+ "grad_norm": 8.1875,
11250
+ "learning_rate": 0.0004949417450540676,
11251
+ "loss": 16.8964,
11252
+ "step": 16060
11253
+ },
11254
+ {
11255
+ "epoch": 0.03133607304600475,
11256
+ "grad_norm": 7.96875,
11257
+ "learning_rate": 0.000494938494038613,
11258
+ "loss": 16.9569,
11259
+ "step": 16070
11260
+ },
11261
+ {
11262
+ "epoch": 0.03135557278032087,
11263
+ "grad_norm": 8.5625,
11264
+ "learning_rate": 0.0004949352430231583,
11265
+ "loss": 16.9247,
11266
+ "step": 16080
11267
+ },
11268
+ {
11269
+ "epoch": 0.03137507251463699,
11270
+ "grad_norm": 9.6875,
11271
+ "learning_rate": 0.0004949319920077036,
11272
+ "loss": 16.9235,
11273
+ "step": 16090
11274
+ },
11275
+ {
11276
+ "epoch": 0.03139457224895311,
11277
+ "grad_norm": 6.5625,
11278
+ "learning_rate": 0.000494928740992249,
11279
+ "loss": 17.061,
11280
+ "step": 16100
11281
+ },
11282
+ {
11283
+ "epoch": 0.031414071983269225,
11284
+ "grad_norm": 7.1875,
11285
+ "learning_rate": 0.0004949254899767943,
11286
+ "loss": 16.9691,
11287
+ "step": 16110
11288
+ },
11289
+ {
11290
+ "epoch": 0.031433571717585346,
11291
+ "grad_norm": 8.4375,
11292
+ "learning_rate": 0.0004949222389613396,
11293
+ "loss": 16.9493,
11294
+ "step": 16120
11295
+ },
11296
+ {
11297
+ "epoch": 0.03145307145190147,
11298
+ "grad_norm": 7.625,
11299
+ "learning_rate": 0.0004949189879458849,
11300
+ "loss": 16.8725,
11301
+ "step": 16130
11302
+ },
11303
+ {
11304
+ "epoch": 0.03147257118621759,
11305
+ "grad_norm": 8.125,
11306
+ "learning_rate": 0.0004949157369304303,
11307
+ "loss": 16.9221,
11308
+ "step": 16140
11309
+ },
11310
+ {
11311
+ "epoch": 0.03149207092053371,
11312
+ "grad_norm": 8.1875,
11313
+ "learning_rate": 0.0004949124859149756,
11314
+ "loss": 16.964,
11315
+ "step": 16150
11316
+ },
11317
+ {
11318
+ "epoch": 0.03151157065484983,
11319
+ "grad_norm": 7.03125,
11320
+ "learning_rate": 0.0004949092348995209,
11321
+ "loss": 16.9567,
11322
+ "step": 16160
11323
+ },
11324
+ {
11325
+ "epoch": 0.03153107038916595,
11326
+ "grad_norm": 7.40625,
11327
+ "learning_rate": 0.0004949059838840663,
11328
+ "loss": 16.8704,
11329
+ "step": 16170
11330
+ },
11331
+ {
11332
+ "epoch": 0.031550570123482065,
11333
+ "grad_norm": 9.0625,
11334
+ "learning_rate": 0.0004949027328686116,
11335
+ "loss": 16.8933,
11336
+ "step": 16180
11337
+ },
11338
+ {
11339
+ "epoch": 0.031570069857798186,
11340
+ "grad_norm": 7.625,
11341
+ "learning_rate": 0.0004948994818531569,
11342
+ "loss": 17.0769,
11343
+ "step": 16190
11344
+ },
11345
+ {
11346
+ "epoch": 0.03158956959211431,
11347
+ "grad_norm": 7.375,
11348
+ "learning_rate": 0.0004948962308377022,
11349
+ "loss": 16.9815,
11350
+ "step": 16200
11351
+ },
11352
+ {
11353
+ "epoch": 0.03160906932643043,
11354
+ "grad_norm": 143.0,
11355
+ "learning_rate": 0.0004948929798222476,
11356
+ "loss": 16.9903,
11357
+ "step": 16210
11358
+ },
11359
+ {
11360
+ "epoch": 0.03162856906074655,
11361
+ "grad_norm": 7.1875,
11362
+ "learning_rate": 0.0004948897288067928,
11363
+ "loss": 16.9974,
11364
+ "step": 16220
11365
+ },
11366
+ {
11367
+ "epoch": 0.03164806879506267,
11368
+ "grad_norm": 7.21875,
11369
+ "learning_rate": 0.0004948864777913381,
11370
+ "loss": 16.8849,
11371
+ "step": 16230
11372
+ },
11373
+ {
11374
+ "epoch": 0.031667568529378784,
11375
+ "grad_norm": 10.125,
11376
+ "learning_rate": 0.0004948832267758834,
11377
+ "loss": 16.9787,
11378
+ "step": 16240
11379
+ },
11380
+ {
11381
+ "epoch": 0.031687068263694905,
11382
+ "grad_norm": 10.4375,
11383
+ "learning_rate": 0.0004948799757604288,
11384
+ "loss": 16.941,
11385
+ "step": 16250
11386
+ },
11387
+ {
11388
+ "epoch": 0.031706567998011026,
11389
+ "grad_norm": 9.0625,
11390
+ "learning_rate": 0.0004948767247449741,
11391
+ "loss": 16.7498,
11392
+ "step": 16260
11393
+ },
11394
+ {
11395
+ "epoch": 0.03172606773232715,
11396
+ "grad_norm": 9.3125,
11397
+ "learning_rate": 0.0004948734737295194,
11398
+ "loss": 16.8968,
11399
+ "step": 16270
11400
+ },
11401
+ {
11402
+ "epoch": 0.03174556746664327,
11403
+ "grad_norm": 23.25,
11404
+ "learning_rate": 0.0004948702227140648,
11405
+ "loss": 16.9446,
11406
+ "step": 16280
11407
+ },
11408
+ {
11409
+ "epoch": 0.03176506720095939,
11410
+ "grad_norm": 6.4375,
11411
+ "learning_rate": 0.0004948669716986101,
11412
+ "loss": 16.9886,
11413
+ "step": 16290
11414
+ },
11415
+ {
11416
+ "epoch": 0.03178456693527551,
11417
+ "grad_norm": 6.59375,
11418
+ "learning_rate": 0.0004948637206831554,
11419
+ "loss": 16.8989,
11420
+ "step": 16300
11421
+ },
11422
+ {
11423
+ "epoch": 0.031804066669591624,
11424
+ "grad_norm": 7.125,
11425
+ "learning_rate": 0.0004948604696677007,
11426
+ "loss": 16.9622,
11427
+ "step": 16310
11428
+ },
11429
+ {
11430
+ "epoch": 0.031823566403907745,
11431
+ "grad_norm": 10.6875,
11432
+ "learning_rate": 0.0004948572186522461,
11433
+ "loss": 16.9232,
11434
+ "step": 16320
11435
+ },
11436
+ {
11437
+ "epoch": 0.031843066138223866,
11438
+ "grad_norm": 8.4375,
11439
+ "learning_rate": 0.0004948539676367914,
11440
+ "loss": 17.0506,
11441
+ "step": 16330
11442
+ },
11443
+ {
11444
+ "epoch": 0.03186256587253999,
11445
+ "grad_norm": 8.875,
11446
+ "learning_rate": 0.0004948507166213367,
11447
+ "loss": 16.8868,
11448
+ "step": 16340
11449
+ },
11450
+ {
11451
+ "epoch": 0.03188206560685611,
11452
+ "grad_norm": 6.71875,
11453
+ "learning_rate": 0.0004948474656058821,
11454
+ "loss": 16.9945,
11455
+ "step": 16350
11456
+ },
11457
+ {
11458
+ "epoch": 0.03190156534117223,
11459
+ "grad_norm": 8.25,
11460
+ "learning_rate": 0.0004948442145904274,
11461
+ "loss": 16.832,
11462
+ "step": 16360
11463
+ },
11464
+ {
11465
+ "epoch": 0.03192106507548834,
11466
+ "grad_norm": 6.53125,
11467
+ "learning_rate": 0.0004948409635749726,
11468
+ "loss": 17.0248,
11469
+ "step": 16370
11470
+ },
11471
+ {
11472
+ "epoch": 0.031940564809804464,
11473
+ "grad_norm": 7.03125,
11474
+ "learning_rate": 0.0004948377125595179,
11475
+ "loss": 17.0616,
11476
+ "step": 16380
11477
+ },
11478
+ {
11479
+ "epoch": 0.031960064544120585,
11480
+ "grad_norm": 6.34375,
11481
+ "learning_rate": 0.0004948344615440633,
11482
+ "loss": 16.9359,
11483
+ "step": 16390
11484
+ },
11485
+ {
11486
+ "epoch": 0.031979564278436706,
11487
+ "grad_norm": 18.0,
11488
+ "learning_rate": 0.0004948312105286086,
11489
+ "loss": 16.9874,
11490
+ "step": 16400
11491
+ },
11492
+ {
11493
+ "epoch": 0.03199906401275283,
11494
+ "grad_norm": 9.25,
11495
+ "learning_rate": 0.0004948279595131539,
11496
+ "loss": 16.9289,
11497
+ "step": 16410
11498
+ },
11499
+ {
11500
+ "epoch": 0.03201856374706895,
11501
+ "grad_norm": 6.5,
11502
+ "learning_rate": 0.0004948247084976992,
11503
+ "loss": 16.8866,
11504
+ "step": 16420
11505
+ },
11506
+ {
11507
+ "epoch": 0.03203806348138507,
11508
+ "grad_norm": 7.5625,
11509
+ "learning_rate": 0.0004948214574822446,
11510
+ "loss": 16.8968,
11511
+ "step": 16430
11512
+ },
11513
+ {
11514
+ "epoch": 0.03205756321570118,
11515
+ "grad_norm": 14.0,
11516
+ "learning_rate": 0.0004948182064667899,
11517
+ "loss": 16.8328,
11518
+ "step": 16440
11519
+ },
11520
+ {
11521
+ "epoch": 0.032077062950017304,
11522
+ "grad_norm": 8.125,
11523
+ "learning_rate": 0.0004948149554513352,
11524
+ "loss": 16.9513,
11525
+ "step": 16450
11526
+ },
11527
+ {
11528
+ "epoch": 0.032096562684333425,
11529
+ "grad_norm": 6.3125,
11530
+ "learning_rate": 0.0004948117044358806,
11531
+ "loss": 16.9399,
11532
+ "step": 16460
11533
+ },
11534
+ {
11535
+ "epoch": 0.032116062418649546,
11536
+ "grad_norm": 8.4375,
11537
+ "learning_rate": 0.0004948084534204259,
11538
+ "loss": 16.9612,
11539
+ "step": 16470
11540
+ },
11541
+ {
11542
+ "epoch": 0.03213556215296567,
11543
+ "grad_norm": 7.40625,
11544
+ "learning_rate": 0.0004948052024049712,
11545
+ "loss": 16.9981,
11546
+ "step": 16480
11547
+ },
11548
+ {
11549
+ "epoch": 0.03215506188728179,
11550
+ "grad_norm": 8.1875,
11551
+ "learning_rate": 0.0004948019513895165,
11552
+ "loss": 16.9472,
11553
+ "step": 16490
11554
+ },
11555
+ {
11556
+ "epoch": 0.0321745616215979,
11557
+ "grad_norm": 7.3125,
11558
+ "learning_rate": 0.0004947987003740619,
11559
+ "loss": 16.9264,
11560
+ "step": 16500
11561
  }
11562
  ],
11563
  "logging_steps": 10,
 
11577
  "attributes": {}
11578
  }
11579
  },
11580
+ "total_flos": 3.6711547362756e+19,
11581
  "train_batch_size": 48,
11582
  "trial_name": null,
11583
  "trial_params": null