mohammadmahdinouri commited on
Commit
97bb743
·
verified ·
1 Parent(s): f9af054

Training in progress, step 56000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e053ce558edbe337e73306d7fbe26e01c3b3739e9654841d2ecf6ce0bd40f9e
3
  size 304481530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe736c4aa2a01e7bdd450b3f5ad17d22bd6d998c21f3be88229c094c87c7e31
3
  size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dff8d75ea38f47739184ddb4f15d59881c040611b60a73f3dec49872f53b5d63
3
  size 402029570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13c5595cffc9acc4fba913e67571bbfa169120e968c56adede64d35dc4a9983
3
  size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d696ac112fd37df04d162968bc7673857914fcc279bf6c80c805e522e1b12140
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eefd6eabe10776e158c26b037c833f0a538e87ecc5b41f3ec5b83db2ee085222
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19cc7a4a8979e8130433cf8829e3378f90d0f833f68939500ebb226cc11a1bb7
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61b622224d0429fa788320c8e5bf7b4fa226b91d5779b03ff807c7a77c5801ff
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f437dbdcbf6f4690950881f29edcf36177f37c3662f7c0fbeeb5b9a736d2d9b
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13eaed908712a1c285ee1e1812b438bbabf64c8443377b65c97ba88f1f1659c5
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34e8b215e64db84cfc27164b1a2e2325c8274d4b1e175ee122414e5295198a22
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:511643e6249f4ea9212a1dfdf8dd72a9148b63815d4ef9de03948ab4598161f3
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1887d8c6d2dc250cfb0b7c57e61e4fa0abc40fda0dbe8977a6841b90daceb70
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0989f631c4201212ca348622ae2d095f9b6b69c39f42732c5c97cef21592c5a6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.08147230830306514,
6
  "eval_steps": 500,
7
- "global_step": 55000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -19258,6 +19258,356 @@
19258
  "learning_rate": 0.0004865416490511318,
19259
  "loss": 17.1327,
19260
  "step": 55000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19261
  }
19262
  ],
19263
  "logging_steps": 20,
@@ -19277,7 +19627,7 @@
19277
  "attributes": {}
19278
  }
19279
  },
19280
- "total_flos": 4.043591890870731e+19,
19281
  "train_batch_size": 48,
19282
  "trial_name": null,
19283
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0829536229994845,
6
  "eval_steps": 500,
7
+ "global_step": 56000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
19258
  "learning_rate": 0.0004865416490511318,
19259
  "loss": 17.1327,
19260
  "step": 55000
19261
+ },
19262
+ {
19263
+ "epoch": 0.08150193459699352,
19264
+ "grad_norm": 6.9375,
19265
+ "learning_rate": 0.00048653671011612425,
19266
+ "loss": 17.1478,
19267
+ "step": 55020
19268
+ },
19269
+ {
19270
+ "epoch": 0.08153156089092191,
19271
+ "grad_norm": 7.28125,
19272
+ "learning_rate": 0.0004865317711811167,
19273
+ "loss": 17.1158,
19274
+ "step": 55040
19275
+ },
19276
+ {
19277
+ "epoch": 0.0815611871848503,
19278
+ "grad_norm": 6.96875,
19279
+ "learning_rate": 0.0004865268322461091,
19280
+ "loss": 17.1082,
19281
+ "step": 55060
19282
+ },
19283
+ {
19284
+ "epoch": 0.08159081347877868,
19285
+ "grad_norm": 7.34375,
19286
+ "learning_rate": 0.00048652189331110154,
19287
+ "loss": 17.1178,
19288
+ "step": 55080
19289
+ },
19290
+ {
19291
+ "epoch": 0.08162043977270707,
19292
+ "grad_norm": 7.125,
19293
+ "learning_rate": 0.000486516954376094,
19294
+ "loss": 17.0626,
19295
+ "step": 55100
19296
+ },
19297
+ {
19298
+ "epoch": 0.08165006606663545,
19299
+ "grad_norm": 7.40625,
19300
+ "learning_rate": 0.00048651201544108643,
19301
+ "loss": 17.1654,
19302
+ "step": 55120
19303
+ },
19304
+ {
19305
+ "epoch": 0.08167969236056384,
19306
+ "grad_norm": 6.375,
19307
+ "learning_rate": 0.0004865070765060788,
19308
+ "loss": 17.1095,
19309
+ "step": 55140
19310
+ },
19311
+ {
19312
+ "epoch": 0.08170931865449224,
19313
+ "grad_norm": 7.21875,
19314
+ "learning_rate": 0.0004865021375710713,
19315
+ "loss": 17.1073,
19316
+ "step": 55160
19317
+ },
19318
+ {
19319
+ "epoch": 0.08173894494842063,
19320
+ "grad_norm": 7.15625,
19321
+ "learning_rate": 0.0004864971986360637,
19322
+ "loss": 17.1573,
19323
+ "step": 55180
19324
+ },
19325
+ {
19326
+ "epoch": 0.08176857124234901,
19327
+ "grad_norm": 7.15625,
19328
+ "learning_rate": 0.00048649225970105617,
19329
+ "loss": 17.1434,
19330
+ "step": 55200
19331
+ },
19332
+ {
19333
+ "epoch": 0.0817981975362774,
19334
+ "grad_norm": 6.59375,
19335
+ "learning_rate": 0.00048648732076604856,
19336
+ "loss": 17.0566,
19337
+ "step": 55220
19338
+ },
19339
+ {
19340
+ "epoch": 0.08182782383020579,
19341
+ "grad_norm": 7.71875,
19342
+ "learning_rate": 0.00048648238183104106,
19343
+ "loss": 17.1302,
19344
+ "step": 55240
19345
+ },
19346
+ {
19347
+ "epoch": 0.08185745012413417,
19348
+ "grad_norm": 6.9375,
19349
+ "learning_rate": 0.00048647744289603346,
19350
+ "loss": 17.1324,
19351
+ "step": 55260
19352
+ },
19353
+ {
19354
+ "epoch": 0.08188707641806256,
19355
+ "grad_norm": 12.125,
19356
+ "learning_rate": 0.0004864725039610259,
19357
+ "loss": 17.0792,
19358
+ "step": 55280
19359
+ },
19360
+ {
19361
+ "epoch": 0.08191670271199095,
19362
+ "grad_norm": 7.90625,
19363
+ "learning_rate": 0.0004864675650260183,
19364
+ "loss": 17.1382,
19365
+ "step": 55300
19366
+ },
19367
+ {
19368
+ "epoch": 0.08194632900591933,
19369
+ "grad_norm": 6.78125,
19370
+ "learning_rate": 0.00048646262609101075,
19371
+ "loss": 17.1347,
19372
+ "step": 55320
19373
+ },
19374
+ {
19375
+ "epoch": 0.08197595529984772,
19376
+ "grad_norm": 7.25,
19377
+ "learning_rate": 0.0004864576871560032,
19378
+ "loss": 17.0667,
19379
+ "step": 55340
19380
+ },
19381
+ {
19382
+ "epoch": 0.0820055815937761,
19383
+ "grad_norm": 7.0,
19384
+ "learning_rate": 0.0004864527482209956,
19385
+ "loss": 17.0842,
19386
+ "step": 55360
19387
+ },
19388
+ {
19389
+ "epoch": 0.08203520788770449,
19390
+ "grad_norm": 7.71875,
19391
+ "learning_rate": 0.00048644780928598804,
19392
+ "loss": 17.1114,
19393
+ "step": 55380
19394
+ },
19395
+ {
19396
+ "epoch": 0.08206483418163288,
19397
+ "grad_norm": 7.34375,
19398
+ "learning_rate": 0.0004864428703509805,
19399
+ "loss": 17.089,
19400
+ "step": 55400
19401
+ },
19402
+ {
19403
+ "epoch": 0.08209446047556127,
19404
+ "grad_norm": 7.875,
19405
+ "learning_rate": 0.00048643793141597293,
19406
+ "loss": 17.1292,
19407
+ "step": 55420
19408
+ },
19409
+ {
19410
+ "epoch": 0.08212408676948965,
19411
+ "grad_norm": 6.96875,
19412
+ "learning_rate": 0.0004864329924809653,
19413
+ "loss": 17.1258,
19414
+ "step": 55440
19415
+ },
19416
+ {
19417
+ "epoch": 0.08215371306341804,
19418
+ "grad_norm": 6.28125,
19419
+ "learning_rate": 0.0004864280535459578,
19420
+ "loss": 17.1261,
19421
+ "step": 55460
19422
+ },
19423
+ {
19424
+ "epoch": 0.08218333935734644,
19425
+ "grad_norm": 6.6875,
19426
+ "learning_rate": 0.0004864231146109502,
19427
+ "loss": 17.0684,
19428
+ "step": 55480
19429
+ },
19430
+ {
19431
+ "epoch": 0.08221296565127482,
19432
+ "grad_norm": 7.625,
19433
+ "learning_rate": 0.00048641817567594267,
19434
+ "loss": 17.0862,
19435
+ "step": 55500
19436
+ },
19437
+ {
19438
+ "epoch": 0.08224259194520321,
19439
+ "grad_norm": 6.53125,
19440
+ "learning_rate": 0.00048641323674093506,
19441
+ "loss": 17.1307,
19442
+ "step": 55520
19443
+ },
19444
+ {
19445
+ "epoch": 0.0822722182391316,
19446
+ "grad_norm": 6.46875,
19447
+ "learning_rate": 0.00048640829780592756,
19448
+ "loss": 17.0831,
19449
+ "step": 55540
19450
+ },
19451
+ {
19452
+ "epoch": 0.08230184453305998,
19453
+ "grad_norm": 7.0,
19454
+ "learning_rate": 0.00048640335887091996,
19455
+ "loss": 17.0549,
19456
+ "step": 55560
19457
+ },
19458
+ {
19459
+ "epoch": 0.08233147082698837,
19460
+ "grad_norm": 6.625,
19461
+ "learning_rate": 0.0004863984199359124,
19462
+ "loss": 17.1213,
19463
+ "step": 55580
19464
+ },
19465
+ {
19466
+ "epoch": 0.08236109712091676,
19467
+ "grad_norm": 7.09375,
19468
+ "learning_rate": 0.0004863934810009048,
19469
+ "loss": 17.1498,
19470
+ "step": 55600
19471
+ },
19472
+ {
19473
+ "epoch": 0.08239072341484514,
19474
+ "grad_norm": 6.25,
19475
+ "learning_rate": 0.0004863885420658973,
19476
+ "loss": 17.0782,
19477
+ "step": 55620
19478
+ },
19479
+ {
19480
+ "epoch": 0.08242034970877353,
19481
+ "grad_norm": 6.4375,
19482
+ "learning_rate": 0.0004863836031308897,
19483
+ "loss": 17.1037,
19484
+ "step": 55640
19485
+ },
19486
+ {
19487
+ "epoch": 0.08244997600270192,
19488
+ "grad_norm": 7.15625,
19489
+ "learning_rate": 0.0004863786641958821,
19490
+ "loss": 17.1475,
19491
+ "step": 55660
19492
+ },
19493
+ {
19494
+ "epoch": 0.0824796022966303,
19495
+ "grad_norm": 6.90625,
19496
+ "learning_rate": 0.00048637372526087454,
19497
+ "loss": 17.1295,
19498
+ "step": 55680
19499
+ },
19500
+ {
19501
+ "epoch": 0.08250922859055869,
19502
+ "grad_norm": 6.21875,
19503
+ "learning_rate": 0.000486368786325867,
19504
+ "loss": 17.0528,
19505
+ "step": 55700
19506
+ },
19507
+ {
19508
+ "epoch": 0.08253885488448708,
19509
+ "grad_norm": 6.21875,
19510
+ "learning_rate": 0.00048636384739085943,
19511
+ "loss": 17.0928,
19512
+ "step": 55720
19513
+ },
19514
+ {
19515
+ "epoch": 0.08256848117841546,
19516
+ "grad_norm": 7.09375,
19517
+ "learning_rate": 0.0004863589084558518,
19518
+ "loss": 17.0391,
19519
+ "step": 55740
19520
+ },
19521
+ {
19522
+ "epoch": 0.08259810747234385,
19523
+ "grad_norm": 7.21875,
19524
+ "learning_rate": 0.0004863539695208443,
19525
+ "loss": 17.1124,
19526
+ "step": 55760
19527
+ },
19528
+ {
19529
+ "epoch": 0.08262773376627225,
19530
+ "grad_norm": 6.65625,
19531
+ "learning_rate": 0.0004863490305858367,
19532
+ "loss": 17.1316,
19533
+ "step": 55780
19534
+ },
19535
+ {
19536
+ "epoch": 0.08265736006020064,
19537
+ "grad_norm": 6.8125,
19538
+ "learning_rate": 0.00048634409165082917,
19539
+ "loss": 17.081,
19540
+ "step": 55800
19541
+ },
19542
+ {
19543
+ "epoch": 0.08268698635412902,
19544
+ "grad_norm": 7.625,
19545
+ "learning_rate": 0.00048633915271582156,
19546
+ "loss": 17.064,
19547
+ "step": 55820
19548
+ },
19549
+ {
19550
+ "epoch": 0.08271661264805741,
19551
+ "grad_norm": 5.96875,
19552
+ "learning_rate": 0.00048633421378081406,
19553
+ "loss": 17.0937,
19554
+ "step": 55840
19555
+ },
19556
+ {
19557
+ "epoch": 0.0827462389419858,
19558
+ "grad_norm": 7.75,
19559
+ "learning_rate": 0.00048632927484580646,
19560
+ "loss": 17.0401,
19561
+ "step": 55860
19562
+ },
19563
+ {
19564
+ "epoch": 0.08277586523591418,
19565
+ "grad_norm": 6.84375,
19566
+ "learning_rate": 0.0004863243359107989,
19567
+ "loss": 17.0646,
19568
+ "step": 55880
19569
+ },
19570
+ {
19571
+ "epoch": 0.08280549152984257,
19572
+ "grad_norm": 6.3125,
19573
+ "learning_rate": 0.0004863193969757913,
19574
+ "loss": 17.0864,
19575
+ "step": 55900
19576
+ },
19577
+ {
19578
+ "epoch": 0.08283511782377095,
19579
+ "grad_norm": 7.90625,
19580
+ "learning_rate": 0.0004863144580407838,
19581
+ "loss": 17.0813,
19582
+ "step": 55920
19583
+ },
19584
+ {
19585
+ "epoch": 0.08286474411769934,
19586
+ "grad_norm": 8.0625,
19587
+ "learning_rate": 0.0004863095191057762,
19588
+ "loss": 17.0242,
19589
+ "step": 55940
19590
+ },
19591
+ {
19592
+ "epoch": 0.08289437041162773,
19593
+ "grad_norm": 6.78125,
19594
+ "learning_rate": 0.00048630458017076864,
19595
+ "loss": 17.1233,
19596
+ "step": 55960
19597
+ },
19598
+ {
19599
+ "epoch": 0.08292399670555611,
19600
+ "grad_norm": 6.3125,
19601
+ "learning_rate": 0.00048629964123576104,
19602
+ "loss": 17.0682,
19603
+ "step": 55980
19604
+ },
19605
+ {
19606
+ "epoch": 0.0829536229994845,
19607
+ "grad_norm": 7.375,
19608
+ "learning_rate": 0.0004862947023007535,
19609
+ "loss": 17.0628,
19610
+ "step": 56000
19611
  }
19612
  ],
19613
  "logging_steps": 20,
 
19627
  "attributes": {}
19628
  }
19629
  },
19630
+ "total_flos": 4.117128426295394e+19,
19631
  "train_batch_size": 48,
19632
  "trial_name": null,
19633
  "trial_params": null