mohammadmahdinouri commited on
Commit
0cf3eca
·
verified ·
1 Parent(s): 468e23d

Training in progress, step 39000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85198d44150dd14dc748ac40b8bc5114b47ca08dc1aa2350d3a07d27b2e60109
3
  size 319352826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa8d092167263760751be7d64325984a75b7703a340e78d50675e80c3268d124
3
  size 319352826
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0510082ca4b5d270ea3f0da8ac5e584aa078b6cb148f402cd1ad606f52c0a03
3
  size 900372486
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d61ec51d8d7118e96f0dea2c3a25213ed4312020f0758f383f9360478751ced5
3
  size 900372486
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f9cea07efef388aaa4d845d75160c41a0dd68d3421684c54fac1c07520d2735
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa7750a42bfcb857f2a8e98481152c1819f20fb160a29916eafdca1fe7427f50
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebdc426ed788887085056988e2767e2a6aa14d71ec9f52eb10bf22f7d91d20f9
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50dcdff9fd2af9b5bad6141fb049dadfa5202e5b29d58dd20b462f210d858734
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bac533bc34534beac3208835cd7735cfe0091b41649f1553a7021e09465ecf25
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21f1502aa78044a36b5816c5c55d8e465c4014bfb315f8f493433e3ca45e7ae7
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adbdc7018bdeed82712d18c65f68c2bcdc595394c33ad7aba1ba358ae13f0a71
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a17f91aa15ca7b8a7e9989ef3ca40593acdf2c68462ffb083c0202e8a25a1ee
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7cbf3f1b2d5b56d9f5b71f70765a3b4d09f29112fad8f5bfc47e27378749620
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa403d369b1ca2463cb498c59fd6da0c0d97bf23ae0774f6dd0f5ad165ffa16e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.05628995846393591,
6
  "eval_steps": 500,
7
- "global_step": 38000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -13308,6 +13308,356 @@
13308
  "learning_rate": 4.907397438075633e-05,
13309
  "loss": 22.7662,
13310
  "step": 38000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13311
  }
13312
  ],
13313
  "logging_steps": 20,
@@ -13327,7 +13677,7 @@
13327
  "attributes": {}
13328
  }
13329
  },
13330
- "total_flos": 7.030978338202059e+19,
13331
  "train_batch_size": 48,
13332
  "trial_name": null,
13333
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.057771273160355276,
6
  "eval_steps": 500,
7
+ "global_step": 39000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
13308
  "learning_rate": 4.907397438075633e-05,
13309
  "loss": 22.7662,
13310
  "step": 38000
13311
+ },
13312
+ {
13313
+ "epoch": 0.0563195847578643,
13314
+ "grad_norm": 7.8125,
13315
+ "learning_rate": 4.9073480487255576e-05,
13316
+ "loss": 22.8013,
13317
+ "step": 38020
13318
+ },
13319
+ {
13320
+ "epoch": 0.056349211051792684,
13321
+ "grad_norm": 8.375,
13322
+ "learning_rate": 4.907298659375481e-05,
13323
+ "loss": 22.7253,
13324
+ "step": 38040
13325
+ },
13326
+ {
13327
+ "epoch": 0.05637883734572107,
13328
+ "grad_norm": 8.0,
13329
+ "learning_rate": 4.907249270025406e-05,
13330
+ "loss": 22.8673,
13331
+ "step": 38060
13332
+ },
13333
+ {
13334
+ "epoch": 0.056408463639649464,
13335
+ "grad_norm": 8.5,
13336
+ "learning_rate": 4.907199880675331e-05,
13337
+ "loss": 22.773,
13338
+ "step": 38080
13339
+ },
13340
+ {
13341
+ "epoch": 0.05643808993357785,
13342
+ "grad_norm": 8.3125,
13343
+ "learning_rate": 4.907150491325255e-05,
13344
+ "loss": 22.7193,
13345
+ "step": 38100
13346
+ },
13347
+ {
13348
+ "epoch": 0.05646771622750624,
13349
+ "grad_norm": 8.1875,
13350
+ "learning_rate": 4.907101101975179e-05,
13351
+ "loss": 22.8162,
13352
+ "step": 38120
13353
+ },
13354
+ {
13355
+ "epoch": 0.056497342521434624,
13356
+ "grad_norm": 8.9375,
13357
+ "learning_rate": 4.907051712625104e-05,
13358
+ "loss": 22.7384,
13359
+ "step": 38140
13360
+ },
13361
+ {
13362
+ "epoch": 0.05652696881536301,
13363
+ "grad_norm": 8.6875,
13364
+ "learning_rate": 4.907002323275028e-05,
13365
+ "loss": 22.7555,
13366
+ "step": 38160
13367
+ },
13368
+ {
13369
+ "epoch": 0.0565565951092914,
13370
+ "grad_norm": 7.3125,
13371
+ "learning_rate": 4.906952933924952e-05,
13372
+ "loss": 22.7421,
13373
+ "step": 38180
13374
+ },
13375
+ {
13376
+ "epoch": 0.05658622140321978,
13377
+ "grad_norm": 8.75,
13378
+ "learning_rate": 4.906903544574876e-05,
13379
+ "loss": 22.7838,
13380
+ "step": 38200
13381
+ },
13382
+ {
13383
+ "epoch": 0.056615847697148176,
13384
+ "grad_norm": 8.4375,
13385
+ "learning_rate": 4.906854155224801e-05,
13386
+ "loss": 22.7588,
13387
+ "step": 38220
13388
+ },
13389
+ {
13390
+ "epoch": 0.05664547399107656,
13391
+ "grad_norm": 7.375,
13392
+ "learning_rate": 4.9068047658747254e-05,
13393
+ "loss": 22.7246,
13394
+ "step": 38240
13395
+ },
13396
+ {
13397
+ "epoch": 0.05667510028500495,
13398
+ "grad_norm": 8.3125,
13399
+ "learning_rate": 4.906755376524649e-05,
13400
+ "loss": 22.7522,
13401
+ "step": 38260
13402
+ },
13403
+ {
13404
+ "epoch": 0.056704726578933336,
13405
+ "grad_norm": 8.875,
13406
+ "learning_rate": 4.906705987174574e-05,
13407
+ "loss": 22.7606,
13408
+ "step": 38280
13409
+ },
13410
+ {
13411
+ "epoch": 0.05673435287286172,
13412
+ "grad_norm": 12.25,
13413
+ "learning_rate": 4.9066565978244984e-05,
13414
+ "loss": 22.7347,
13415
+ "step": 38300
13416
+ },
13417
+ {
13418
+ "epoch": 0.05676397916679011,
13419
+ "grad_norm": 8.8125,
13420
+ "learning_rate": 4.906607208474423e-05,
13421
+ "loss": 22.7115,
13422
+ "step": 38320
13423
+ },
13424
+ {
13425
+ "epoch": 0.056793605460718495,
13426
+ "grad_norm": 9.0,
13427
+ "learning_rate": 4.9065578191243465e-05,
13428
+ "loss": 22.6453,
13429
+ "step": 38340
13430
+ },
13431
+ {
13432
+ "epoch": 0.05682323175464688,
13433
+ "grad_norm": 12.875,
13434
+ "learning_rate": 4.9065084297742715e-05,
13435
+ "loss": 22.7791,
13436
+ "step": 38360
13437
+ },
13438
+ {
13439
+ "epoch": 0.056852858048575275,
13440
+ "grad_norm": 9.75,
13441
+ "learning_rate": 4.906459040424196e-05,
13442
+ "loss": 22.6924,
13443
+ "step": 38380
13444
+ },
13445
+ {
13446
+ "epoch": 0.05688248434250366,
13447
+ "grad_norm": 9.1875,
13448
+ "learning_rate": 4.9064096510741195e-05,
13449
+ "loss": 22.6704,
13450
+ "step": 38400
13451
+ },
13452
+ {
13453
+ "epoch": 0.05691211063643205,
13454
+ "grad_norm": 7.5625,
13455
+ "learning_rate": 4.906360261724044e-05,
13456
+ "loss": 22.7041,
13457
+ "step": 38420
13458
+ },
13459
+ {
13460
+ "epoch": 0.056941736930360434,
13461
+ "grad_norm": 7.59375,
13462
+ "learning_rate": 4.906310872373969e-05,
13463
+ "loss": 22.7381,
13464
+ "step": 38440
13465
+ },
13466
+ {
13467
+ "epoch": 0.05697136322428882,
13468
+ "grad_norm": 8.25,
13469
+ "learning_rate": 4.906261483023893e-05,
13470
+ "loss": 22.8227,
13471
+ "step": 38460
13472
+ },
13473
+ {
13474
+ "epoch": 0.05700098951821721,
13475
+ "grad_norm": 8.4375,
13476
+ "learning_rate": 4.906212093673817e-05,
13477
+ "loss": 22.7251,
13478
+ "step": 38480
13479
+ },
13480
+ {
13481
+ "epoch": 0.057030615812145594,
13482
+ "grad_norm": 7.6875,
13483
+ "learning_rate": 4.906162704323741e-05,
13484
+ "loss": 22.6936,
13485
+ "step": 38500
13486
+ },
13487
+ {
13488
+ "epoch": 0.05706024210607398,
13489
+ "grad_norm": 8.8125,
13490
+ "learning_rate": 4.906113314973666e-05,
13491
+ "loss": 22.7815,
13492
+ "step": 38520
13493
+ },
13494
+ {
13495
+ "epoch": 0.05708986840000237,
13496
+ "grad_norm": 8.875,
13497
+ "learning_rate": 4.90606392562359e-05,
13498
+ "loss": 22.7159,
13499
+ "step": 38540
13500
+ },
13501
+ {
13502
+ "epoch": 0.05711949469393076,
13503
+ "grad_norm": 8.875,
13504
+ "learning_rate": 4.906014536273514e-05,
13505
+ "loss": 22.7325,
13506
+ "step": 38560
13507
+ },
13508
+ {
13509
+ "epoch": 0.057149120987859146,
13510
+ "grad_norm": 8.375,
13511
+ "learning_rate": 4.905965146923439e-05,
13512
+ "loss": 22.7178,
13513
+ "step": 38580
13514
+ },
13515
+ {
13516
+ "epoch": 0.05717874728178753,
13517
+ "grad_norm": 8.3125,
13518
+ "learning_rate": 4.9059157575733636e-05,
13519
+ "loss": 22.7357,
13520
+ "step": 38600
13521
+ },
13522
+ {
13523
+ "epoch": 0.05720837357571592,
13524
+ "grad_norm": 8.0,
13525
+ "learning_rate": 4.905866368223287e-05,
13526
+ "loss": 22.7835,
13527
+ "step": 38620
13528
+ },
13529
+ {
13530
+ "epoch": 0.057237999869644306,
13531
+ "grad_norm": 8.0,
13532
+ "learning_rate": 4.9058169788732116e-05,
13533
+ "loss": 22.7622,
13534
+ "step": 38640
13535
+ },
13536
+ {
13537
+ "epoch": 0.05726762616357269,
13538
+ "grad_norm": 11.0,
13539
+ "learning_rate": 4.9057675895231366e-05,
13540
+ "loss": 22.7288,
13541
+ "step": 38660
13542
+ },
13543
+ {
13544
+ "epoch": 0.05729725245750108,
13545
+ "grad_norm": 8.0,
13546
+ "learning_rate": 4.90571820017306e-05,
13547
+ "loss": 22.8271,
13548
+ "step": 38680
13549
+ },
13550
+ {
13551
+ "epoch": 0.05732687875142947,
13552
+ "grad_norm": 8.8125,
13553
+ "learning_rate": 4.9056688108229846e-05,
13554
+ "loss": 22.8034,
13555
+ "step": 38700
13556
+ },
13557
+ {
13558
+ "epoch": 0.05735650504535786,
13559
+ "grad_norm": 7.75,
13560
+ "learning_rate": 4.905619421472909e-05,
13561
+ "loss": 22.7918,
13562
+ "step": 38720
13563
+ },
13564
+ {
13565
+ "epoch": 0.057386131339286245,
13566
+ "grad_norm": 8.375,
13567
+ "learning_rate": 4.905570032122834e-05,
13568
+ "loss": 22.7912,
13569
+ "step": 38740
13570
+ },
13571
+ {
13572
+ "epoch": 0.05741575763321463,
13573
+ "grad_norm": 7.25,
13574
+ "learning_rate": 4.9055206427727576e-05,
13575
+ "loss": 22.6727,
13576
+ "step": 38760
13577
+ },
13578
+ {
13579
+ "epoch": 0.05744538392714302,
13580
+ "grad_norm": 8.0,
13581
+ "learning_rate": 4.905471253422682e-05,
13582
+ "loss": 22.799,
13583
+ "step": 38780
13584
+ },
13585
+ {
13586
+ "epoch": 0.057475010221071404,
13587
+ "grad_norm": 7.9375,
13588
+ "learning_rate": 4.905421864072606e-05,
13589
+ "loss": 22.7086,
13590
+ "step": 38800
13591
+ },
13592
+ {
13593
+ "epoch": 0.05750463651499979,
13594
+ "grad_norm": 8.6875,
13595
+ "learning_rate": 4.9053724747225314e-05,
13596
+ "loss": 22.7619,
13597
+ "step": 38820
13598
+ },
13599
+ {
13600
+ "epoch": 0.05753426280892818,
13601
+ "grad_norm": 9.5,
13602
+ "learning_rate": 4.905323085372455e-05,
13603
+ "loss": 22.7321,
13604
+ "step": 38840
13605
+ },
13606
+ {
13607
+ "epoch": 0.05756388910285657,
13608
+ "grad_norm": 7.03125,
13609
+ "learning_rate": 4.9052736960223794e-05,
13610
+ "loss": 22.7381,
13611
+ "step": 38860
13612
+ },
13613
+ {
13614
+ "epoch": 0.05759351539678496,
13615
+ "grad_norm": 7.84375,
13616
+ "learning_rate": 4.9052243066723044e-05,
13617
+ "loss": 22.6783,
13618
+ "step": 38880
13619
+ },
13620
+ {
13621
+ "epoch": 0.05762314169071334,
13622
+ "grad_norm": 7.75,
13623
+ "learning_rate": 4.905174917322228e-05,
13624
+ "loss": 22.7077,
13625
+ "step": 38900
13626
+ },
13627
+ {
13628
+ "epoch": 0.05765276798464173,
13629
+ "grad_norm": 8.1875,
13630
+ "learning_rate": 4.9051255279721524e-05,
13631
+ "loss": 22.7116,
13632
+ "step": 38920
13633
+ },
13634
+ {
13635
+ "epoch": 0.057682394278570116,
13636
+ "grad_norm": 8.375,
13637
+ "learning_rate": 4.905076138622077e-05,
13638
+ "loss": 22.6759,
13639
+ "step": 38940
13640
+ },
13641
+ {
13642
+ "epoch": 0.0577120205724985,
13643
+ "grad_norm": 9.25,
13644
+ "learning_rate": 4.905026749272002e-05,
13645
+ "loss": 22.7974,
13646
+ "step": 38960
13647
+ },
13648
+ {
13649
+ "epoch": 0.05774164686642689,
13650
+ "grad_norm": 8.625,
13651
+ "learning_rate": 4.9049773599219254e-05,
13652
+ "loss": 22.7558,
13653
+ "step": 38980
13654
+ },
13655
+ {
13656
+ "epoch": 0.057771273160355276,
13657
+ "grad_norm": 10.5625,
13658
+ "learning_rate": 4.90492797057185e-05,
13659
+ "loss": 22.7169,
13660
+ "step": 39000
13661
  }
13662
  ],
13663
  "logging_steps": 20,
 
13677
  "attributes": {}
13678
  }
13679
  },
13680
+ "total_flos": 7.216003991715447e+19,
13681
  "train_batch_size": 48,
13682
  "trial_name": null,
13683
  "trial_params": null