mohammadmahdinouri commited on
Commit
c67bb7e
·
verified ·
1 Parent(s): e7aaa07

Training in progress, step 82000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:823eb63012823ec707c6d7f902c74957999bf78b1aa0d0d2d5c9d70a5734d17f
3
  size 304481530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fdea62ab3807d5c83f086f5151ea04cbf9ee9578a7b8e17883bf2d371b73c59
3
  size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0d63d0bb38931ba004527cf05d519d8436a05de32795acdca357558484b1ec2
3
  size 402029570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a88a14f277e372edcbef1004c41517572ec49368da6f457c37072a723ef15a5b
3
  size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b90fc3ec29902b44c5dab65db1810b8b8fc6323fbc44dcf350f56c46e57c5fd8
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e1adfa857ebdc86ec7fd943675fc57102e813288b2aafc927551a884f4b79c6
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6f3de7c18cee2c7f621fa79e4f4fdd03056bf89759c48b8200641917be9bbea
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57a30f5d80cfa3dad198a9cbb5668d7cb89aba9aa41f1f44032001d4e5f9fed3
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41254897ccfcc1492b57dfce97c4bfb1f85783664d284f06a33696e33eceee23
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50a1704d8cd0ae1d1da5487260fd6a9d83621f0daf881048aad7559ce485f0af
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8dca7e7f5e1df0d3354cf3bcc9ab680db704c8e243044bbcbe0b559be11e528
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:512ff917732395bd5049da89bf880b825c8d71316cd69e7277929f7763966d5b
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38edd545bc4f01de3f608883af1908fbe14efdd33931e3adde347eb4fa00e55f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13c457672f739f0501828eb93166275a4b9a832449b61ea1951a9a198cb7e4de
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.11998649040996866,
6
  "eval_steps": 500,
7
- "global_step": 81000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -28358,6 +28358,356 @@
28358
  "learning_rate": 0.0004801210335412954,
28359
  "loss": 16.2376,
28360
  "step": 81000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28361
  }
28362
  ],
28363
  "logging_steps": 20,
@@ -28377,7 +28727,7 @@
28377
  "attributes": {}
28378
  }
28379
  },
28380
- "total_flos": 5.955535421644892e+19,
28381
  "train_batch_size": 48,
28382
  "trial_name": null,
28383
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.12146780510638802,
6
  "eval_steps": 500,
7
+ "global_step": 82000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
28358
  "learning_rate": 0.0004801210335412954,
28359
  "loss": 16.2376,
28360
  "step": 81000
28361
+ },
28362
+ {
28363
+ "epoch": 0.12001611670389704,
28364
+ "grad_norm": 6.78125,
28365
+ "learning_rate": 0.00048011609460628786,
28366
+ "loss": 16.2201,
28367
+ "step": 81020
28368
+ },
28369
+ {
28370
+ "epoch": 0.12004574299782543,
28371
+ "grad_norm": 7.0625,
28372
+ "learning_rate": 0.00048011115567128025,
28373
+ "loss": 16.2081,
28374
+ "step": 81040
28375
+ },
28376
+ {
28377
+ "epoch": 0.12007536929175382,
28378
+ "grad_norm": 6.40625,
28379
+ "learning_rate": 0.00048010621673627276,
28380
+ "loss": 16.1728,
28381
+ "step": 81060
28382
+ },
28383
+ {
28384
+ "epoch": 0.1201049955856822,
28385
+ "grad_norm": 5.96875,
28386
+ "learning_rate": 0.00048010127780126515,
28387
+ "loss": 16.2446,
28388
+ "step": 81080
28389
+ },
28390
+ {
28391
+ "epoch": 0.12013462187961059,
28392
+ "grad_norm": 6.8125,
28393
+ "learning_rate": 0.0004800963388662576,
28394
+ "loss": 16.2341,
28395
+ "step": 81100
28396
+ },
28397
+ {
28398
+ "epoch": 0.12016424817353898,
28399
+ "grad_norm": 7.21875,
28400
+ "learning_rate": 0.00048009139993125004,
28401
+ "loss": 16.2402,
28402
+ "step": 81120
28403
+ },
28404
+ {
28405
+ "epoch": 0.12019387446746736,
28406
+ "grad_norm": 6.3125,
28407
+ "learning_rate": 0.0004800864609962425,
28408
+ "loss": 16.2117,
28409
+ "step": 81140
28410
+ },
28411
+ {
28412
+ "epoch": 0.12022350076139575,
28413
+ "grad_norm": 6.09375,
28414
+ "learning_rate": 0.0004800815220612349,
28415
+ "loss": 16.2817,
28416
+ "step": 81160
28417
+ },
28418
+ {
28419
+ "epoch": 0.12025312705532414,
28420
+ "grad_norm": 6.59375,
28421
+ "learning_rate": 0.00048007658312622733,
28422
+ "loss": 16.2479,
28423
+ "step": 81180
28424
+ },
28425
+ {
28426
+ "epoch": 0.12028275334925252,
28427
+ "grad_norm": 7.03125,
28428
+ "learning_rate": 0.0004800716441912198,
28429
+ "loss": 16.2402,
28430
+ "step": 81200
28431
+ },
28432
+ {
28433
+ "epoch": 0.12031237964318092,
28434
+ "grad_norm": 6.59375,
28435
+ "learning_rate": 0.00048006670525621223,
28436
+ "loss": 16.2495,
28437
+ "step": 81220
28438
+ },
28439
+ {
28440
+ "epoch": 0.12034200593710931,
28441
+ "grad_norm": 6.75,
28442
+ "learning_rate": 0.0004800617663212046,
28443
+ "loss": 16.2373,
28444
+ "step": 81240
28445
+ },
28446
+ {
28447
+ "epoch": 0.1203716322310377,
28448
+ "grad_norm": 7.0625,
28449
+ "learning_rate": 0.00048005682738619707,
28450
+ "loss": 16.1696,
28451
+ "step": 81260
28452
+ },
28453
+ {
28454
+ "epoch": 0.12040125852496608,
28455
+ "grad_norm": 6.375,
28456
+ "learning_rate": 0.0004800518884511895,
28457
+ "loss": 16.1894,
28458
+ "step": 81280
28459
+ },
28460
+ {
28461
+ "epoch": 0.12043088481889447,
28462
+ "grad_norm": 7.0,
28463
+ "learning_rate": 0.00048004694951618197,
28464
+ "loss": 16.214,
28465
+ "step": 81300
28466
+ },
28467
+ {
28468
+ "epoch": 0.12046051111282285,
28469
+ "grad_norm": 7.125,
28470
+ "learning_rate": 0.00048004201058117436,
28471
+ "loss": 16.2855,
28472
+ "step": 81320
28473
+ },
28474
+ {
28475
+ "epoch": 0.12049013740675124,
28476
+ "grad_norm": 6.84375,
28477
+ "learning_rate": 0.00048003707164616675,
28478
+ "loss": 16.2033,
28479
+ "step": 81340
28480
+ },
28481
+ {
28482
+ "epoch": 0.12051976370067963,
28483
+ "grad_norm": 7.25,
28484
+ "learning_rate": 0.00048003213271115926,
28485
+ "loss": 16.1961,
28486
+ "step": 81360
28487
+ },
28488
+ {
28489
+ "epoch": 0.12054938999460801,
28490
+ "grad_norm": 9.8125,
28491
+ "learning_rate": 0.00048002719377615165,
28492
+ "loss": 16.1959,
28493
+ "step": 81380
28494
+ },
28495
+ {
28496
+ "epoch": 0.1205790162885364,
28497
+ "grad_norm": 6.0625,
28498
+ "learning_rate": 0.0004800222548411441,
28499
+ "loss": 16.2231,
28500
+ "step": 81400
28501
+ },
28502
+ {
28503
+ "epoch": 0.12060864258246479,
28504
+ "grad_norm": 6.6875,
28505
+ "learning_rate": 0.00048001731590613655,
28506
+ "loss": 16.206,
28507
+ "step": 81420
28508
+ },
28509
+ {
28510
+ "epoch": 0.12063826887639317,
28511
+ "grad_norm": 6.375,
28512
+ "learning_rate": 0.000480012376971129,
28513
+ "loss": 16.2459,
28514
+ "step": 81440
28515
+ },
28516
+ {
28517
+ "epoch": 0.12066789517032156,
28518
+ "grad_norm": 5.90625,
28519
+ "learning_rate": 0.0004800074380361214,
28520
+ "loss": 16.2108,
28521
+ "step": 81460
28522
+ },
28523
+ {
28524
+ "epoch": 0.12069752146424995,
28525
+ "grad_norm": 6.3125,
28526
+ "learning_rate": 0.00048000249910111383,
28527
+ "loss": 16.2263,
28528
+ "step": 81480
28529
+ },
28530
+ {
28531
+ "epoch": 0.12072714775817833,
28532
+ "grad_norm": 6.65625,
28533
+ "learning_rate": 0.0004799975601661063,
28534
+ "loss": 16.2835,
28535
+ "step": 81500
28536
+ },
28537
+ {
28538
+ "epoch": 0.12075677405210672,
28539
+ "grad_norm": 6.625,
28540
+ "learning_rate": 0.00047999262123109873,
28541
+ "loss": 16.246,
28542
+ "step": 81520
28543
+ },
28544
+ {
28545
+ "epoch": 0.12078640034603512,
28546
+ "grad_norm": 7.71875,
28547
+ "learning_rate": 0.0004799876822960911,
28548
+ "loss": 16.2665,
28549
+ "step": 81540
28550
+ },
28551
+ {
28552
+ "epoch": 0.1208160266399635,
28553
+ "grad_norm": 6.46875,
28554
+ "learning_rate": 0.00047998274336108357,
28555
+ "loss": 16.2341,
28556
+ "step": 81560
28557
+ },
28558
+ {
28559
+ "epoch": 0.12084565293389189,
28560
+ "grad_norm": 6.84375,
28561
+ "learning_rate": 0.000479977804426076,
28562
+ "loss": 16.2343,
28563
+ "step": 81580
28564
+ },
28565
+ {
28566
+ "epoch": 0.12087527922782028,
28567
+ "grad_norm": 6.375,
28568
+ "learning_rate": 0.00047997286549106847,
28569
+ "loss": 16.1953,
28570
+ "step": 81600
28571
+ },
28572
+ {
28573
+ "epoch": 0.12090490552174867,
28574
+ "grad_norm": 8.125,
28575
+ "learning_rate": 0.00047996792655606086,
28576
+ "loss": 16.2518,
28577
+ "step": 81620
28578
+ },
28579
+ {
28580
+ "epoch": 0.12093453181567705,
28581
+ "grad_norm": 8.0625,
28582
+ "learning_rate": 0.0004799629876210533,
28583
+ "loss": 16.1682,
28584
+ "step": 81640
28585
+ },
28586
+ {
28587
+ "epoch": 0.12096415810960544,
28588
+ "grad_norm": 6.59375,
28589
+ "learning_rate": 0.00047995804868604576,
28590
+ "loss": 16.2499,
28591
+ "step": 81660
28592
+ },
28593
+ {
28594
+ "epoch": 0.12099378440353382,
28595
+ "grad_norm": 9.5,
28596
+ "learning_rate": 0.00047995310975103815,
28597
+ "loss": 16.2143,
28598
+ "step": 81680
28599
+ },
28600
+ {
28601
+ "epoch": 0.12102341069746221,
28602
+ "grad_norm": 6.59375,
28603
+ "learning_rate": 0.0004799481708160306,
28604
+ "loss": 16.178,
28605
+ "step": 81700
28606
+ },
28607
+ {
28608
+ "epoch": 0.1210530369913906,
28609
+ "grad_norm": 7.03125,
28610
+ "learning_rate": 0.00047994323188102305,
28611
+ "loss": 16.2018,
28612
+ "step": 81720
28613
+ },
28614
+ {
28615
+ "epoch": 0.12108266328531898,
28616
+ "grad_norm": 6.9375,
28617
+ "learning_rate": 0.0004799382929460155,
28618
+ "loss": 16.2976,
28619
+ "step": 81740
28620
+ },
28621
+ {
28622
+ "epoch": 0.12111228957924737,
28623
+ "grad_norm": 6.4375,
28624
+ "learning_rate": 0.0004799333540110079,
28625
+ "loss": 16.2988,
28626
+ "step": 81760
28627
+ },
28628
+ {
28629
+ "epoch": 0.12114191587317576,
28630
+ "grad_norm": 8.8125,
28631
+ "learning_rate": 0.00047992841507600033,
28632
+ "loss": 16.2317,
28633
+ "step": 81780
28634
+ },
28635
+ {
28636
+ "epoch": 0.12117154216710414,
28637
+ "grad_norm": 7.6875,
28638
+ "learning_rate": 0.0004799234761409928,
28639
+ "loss": 16.1929,
28640
+ "step": 81800
28641
+ },
28642
+ {
28643
+ "epoch": 0.12120116846103253,
28644
+ "grad_norm": 6.1875,
28645
+ "learning_rate": 0.00047991853720598523,
28646
+ "loss": 16.1955,
28647
+ "step": 81820
28648
+ },
28649
+ {
28650
+ "epoch": 0.12123079475496092,
28651
+ "grad_norm": 6.09375,
28652
+ "learning_rate": 0.0004799135982709776,
28653
+ "loss": 16.2418,
28654
+ "step": 81840
28655
+ },
28656
+ {
28657
+ "epoch": 0.12126042104888932,
28658
+ "grad_norm": 6.90625,
28659
+ "learning_rate": 0.00047990865933597007,
28660
+ "loss": 16.218,
28661
+ "step": 81860
28662
+ },
28663
+ {
28664
+ "epoch": 0.1212900473428177,
28665
+ "grad_norm": 9.5625,
28666
+ "learning_rate": 0.0004799037204009625,
28667
+ "loss": 16.265,
28668
+ "step": 81880
28669
+ },
28670
+ {
28671
+ "epoch": 0.12131967363674609,
28672
+ "grad_norm": 6.78125,
28673
+ "learning_rate": 0.00047989878146595497,
28674
+ "loss": 16.2145,
28675
+ "step": 81900
28676
+ },
28677
+ {
28678
+ "epoch": 0.12134929993067448,
28679
+ "grad_norm": 6.59375,
28680
+ "learning_rate": 0.00047989384253094736,
28681
+ "loss": 16.1485,
28682
+ "step": 81920
28683
+ },
28684
+ {
28685
+ "epoch": 0.12137892622460286,
28686
+ "grad_norm": 6.40625,
28687
+ "learning_rate": 0.0004798889035959398,
28688
+ "loss": 16.2334,
28689
+ "step": 81940
28690
+ },
28691
+ {
28692
+ "epoch": 0.12140855251853125,
28693
+ "grad_norm": 7.5625,
28694
+ "learning_rate": 0.00047988396466093226,
28695
+ "loss": 16.1556,
28696
+ "step": 81960
28697
+ },
28698
+ {
28699
+ "epoch": 0.12143817881245964,
28700
+ "grad_norm": 6.9375,
28701
+ "learning_rate": 0.0004798790257259247,
28702
+ "loss": 16.1977,
28703
+ "step": 81980
28704
+ },
28705
+ {
28706
+ "epoch": 0.12146780510638802,
28707
+ "grad_norm": 6.625,
28708
+ "learning_rate": 0.0004798740867909171,
28709
+ "loss": 16.2152,
28710
+ "step": 82000
28711
  }
28712
  ],
28713
  "logging_steps": 20,
 
28727
  "attributes": {}
28728
  }
28729
  },
28730
+ "total_flos": 6.02907206476586e+19,
28731
  "train_batch_size": 48,
28732
  "trial_name": null,
28733
  "trial_params": null