Training in progress, step 82000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:823eb63012823ec707c6d7f902c74957999bf78b1aa0d0d2d5c9d70a5734d17f
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:6fdea62ab3807d5c83f086f5151ea04cbf9ee9578a7b8e17883bf2d371b73c59
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0d63d0bb38931ba004527cf05d519d8436a05de32795acdca357558484b1ec2
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:a88a14f277e372edcbef1004c41517572ec49368da6f457c37072a723ef15a5b
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b90fc3ec29902b44c5dab65db1810b8b8fc6323fbc44dcf350f56c46e57c5fd8
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e1adfa857ebdc86ec7fd943675fc57102e813288b2aafc927551a884f4b79c6
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d6f3de7c18cee2c7f621fa79e4f4fdd03056bf89759c48b8200641917be9bbea
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:57a30f5d80cfa3dad198a9cbb5668d7cb89aba9aa41f1f44032001d4e5f9fed3
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41254897ccfcc1492b57dfce97c4bfb1f85783664d284f06a33696e33eceee23
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:50a1704d8cd0ae1d1da5487260fd6a9d83621f0daf881048aad7559ce485f0af
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e8dca7e7f5e1df0d3354cf3bcc9ab680db704c8e243044bbcbe0b559be11e528
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:512ff917732395bd5049da89bf880b825c8d71316cd69e7277929f7763966d5b
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38edd545bc4f01de3f608883af1908fbe14efdd33931e3adde347eb4fa00e55f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:13c457672f739f0501828eb93166275a4b9a832449b61ea1951a9a198cb7e4de
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.11998649040996866,
   "eval_steps": 500,
-  "global_step": 81000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -28358,6 +28358,356 @@
       "learning_rate": 0.0004801210335412954,
       "loss": 16.2376,
       "step": 81000
     }
   ],
   "logging_steps": 20,
@@ -28377,7 +28727,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.955535421644892e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.12146780510638802,
   "eval_steps": 500,
+  "global_step": 82000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004801210335412954,
       "loss": 16.2376,
       "step": 81000
+    },
+    {
+      "epoch": 0.12001611670389704,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.00048011609460628786,
+      "loss": 16.2201,
+      "step": 81020
+    },
+    {
+      "epoch": 0.12004574299782543,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.00048011115567128025,
+      "loss": 16.2081,
+      "step": 81040
+    },
+    {
+      "epoch": 0.12007536929175382,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.00048010621673627276,
+      "loss": 16.1728,
+      "step": 81060
+    },
+    {
+      "epoch": 0.1201049955856822,
+      "grad_norm": 5.96875,
+      "learning_rate": 0.00048010127780126515,
+      "loss": 16.2446,
+      "step": 81080
+    },
+    {
+      "epoch": 0.12013462187961059,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004800963388662576,
+      "loss": 16.2341,
+      "step": 81100
+    },
+    {
+      "epoch": 0.12016424817353898,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.00048009139993125004,
+      "loss": 16.2402,
+      "step": 81120
+    },
+    {
+      "epoch": 0.12019387446746736,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.0004800864609962425,
+      "loss": 16.2117,
+      "step": 81140
+    },
+    {
+      "epoch": 0.12022350076139575,
+      "grad_norm": 6.09375,
+      "learning_rate": 0.0004800815220612349,
+      "loss": 16.2817,
+      "step": 81160
+    },
+    {
+      "epoch": 0.12025312705532414,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00048007658312622733,
+      "loss": 16.2479,
+      "step": 81180
+    },
+    {
+      "epoch": 0.12028275334925252,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.0004800716441912198,
+      "loss": 16.2402,
+      "step": 81200
+    },
+    {
+      "epoch": 0.12031237964318092,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00048006670525621223,
+      "loss": 16.2495,
+      "step": 81220
+    },
+    {
+      "epoch": 0.12034200593710931,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004800617663212046,
+      "loss": 16.2373,
+      "step": 81240
+    },
+    {
+      "epoch": 0.1203716322310377,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.00048005682738619707,
+      "loss": 16.1696,
+      "step": 81260
+    },
+    {
+      "epoch": 0.12040125852496608,
+      "grad_norm": 6.375,
+      "learning_rate": 0.0004800518884511895,
+      "loss": 16.1894,
+      "step": 81280
+    },
+    {
+      "epoch": 0.12043088481889447,
+      "grad_norm": 7.0,
+      "learning_rate": 0.00048004694951618197,
+      "loss": 16.214,
+      "step": 81300
+    },
+    {
+      "epoch": 0.12046051111282285,
+      "grad_norm": 7.125,
+      "learning_rate": 0.00048004201058117436,
+      "loss": 16.2855,
+      "step": 81320
+    },
+    {
+      "epoch": 0.12049013740675124,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.00048003707164616675,
+      "loss": 16.2033,
+      "step": 81340
+    },
+    {
+      "epoch": 0.12051976370067963,
+      "grad_norm": 7.25,
+      "learning_rate": 0.00048003213271115926,
+      "loss": 16.1961,
+      "step": 81360
+    },
+    {
+      "epoch": 0.12054938999460801,
+      "grad_norm": 9.8125,
+      "learning_rate": 0.00048002719377615165,
+      "loss": 16.1959,
+      "step": 81380
+    },
+    {
+      "epoch": 0.1205790162885364,
+      "grad_norm": 6.0625,
+      "learning_rate": 0.0004800222548411441,
+      "loss": 16.2231,
+      "step": 81400
+    },
+    {
+      "epoch": 0.12060864258246479,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048001731590613655,
+      "loss": 16.206,
+      "step": 81420
+    },
+    {
+      "epoch": 0.12063826887639317,
+      "grad_norm": 6.375,
+      "learning_rate": 0.000480012376971129,
+      "loss": 16.2459,
+      "step": 81440
+    },
+    {
+      "epoch": 0.12066789517032156,
+      "grad_norm": 5.90625,
+      "learning_rate": 0.0004800074380361214,
+      "loss": 16.2108,
+      "step": 81460
+    },
+    {
+      "epoch": 0.12069752146424995,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.00048000249910111383,
+      "loss": 16.2263,
+      "step": 81480
+    },
+    {
+      "epoch": 0.12072714775817833,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004799975601661063,
+      "loss": 16.2835,
+      "step": 81500
+    },
+    {
+      "epoch": 0.12075677405210672,
+      "grad_norm": 6.625,
+      "learning_rate": 0.00047999262123109873,
+      "loss": 16.246,
+      "step": 81520
+    },
+    {
+      "epoch": 0.12078640034603512,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.0004799876822960911,
+      "loss": 16.2665,
+      "step": 81540
+    },
+    {
+      "epoch": 0.1208160266399635,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.00047998274336108357,
+      "loss": 16.2341,
+      "step": 81560
+    },
+    {
+      "epoch": 0.12084565293389189,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.000479977804426076,
+      "loss": 16.2343,
+      "step": 81580
+    },
+    {
+      "epoch": 0.12087527922782028,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00047997286549106847,
+      "loss": 16.1953,
+      "step": 81600
+    },
+    {
+      "epoch": 0.12090490552174867,
+      "grad_norm": 8.125,
+      "learning_rate": 0.00047996792655606086,
+      "loss": 16.2518,
+      "step": 81620
+    },
+    {
+      "epoch": 0.12093453181567705,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004799629876210533,
+      "loss": 16.1682,
+      "step": 81640
+    },
+    {
+      "epoch": 0.12096415810960544,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00047995804868604576,
+      "loss": 16.2499,
+      "step": 81660
+    },
+    {
+      "epoch": 0.12099378440353382,
+      "grad_norm": 9.5,
+      "learning_rate": 0.00047995310975103815,
+      "loss": 16.2143,
+      "step": 81680
+    },
+    {
+      "epoch": 0.12102341069746221,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004799481708160306,
+      "loss": 16.178,
+      "step": 81700
+    },
+    {
+      "epoch": 0.1210530369913906,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.00047994323188102305,
+      "loss": 16.2018,
+      "step": 81720
+    },
+    {
+      "epoch": 0.12108266328531898,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.0004799382929460155,
+      "loss": 16.2976,
+      "step": 81740
+    },
+    {
+      "epoch": 0.12111228957924737,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004799333540110079,
+      "loss": 16.2988,
+      "step": 81760
+    },
+    {
+      "epoch": 0.12114191587317576,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.00047992841507600033,
+      "loss": 16.2317,
+      "step": 81780
+    },
+    {
+      "epoch": 0.12117154216710414,
+      "grad_norm": 7.6875,
+      "learning_rate": 0.0004799234761409928,
+      "loss": 16.1929,
+      "step": 81800
+    },
+    {
+      "epoch": 0.12120116846103253,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.00047991853720598523,
+      "loss": 16.1955,
+      "step": 81820
+    },
+    {
+      "epoch": 0.12123079475496092,
+      "grad_norm": 6.09375,
+      "learning_rate": 0.0004799135982709776,
+      "loss": 16.2418,
+      "step": 81840
+    },
+    {
+      "epoch": 0.12126042104888932,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00047990865933597007,
+      "loss": 16.218,
+      "step": 81860
+    },
+    {
+      "epoch": 0.1212900473428177,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004799037204009625,
+      "loss": 16.265,
+      "step": 81880
+    },
+    {
+      "epoch": 0.12131967363674609,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.00047989878146595497,
+      "loss": 16.2145,
+      "step": 81900
+    },
+    {
+      "epoch": 0.12134929993067448,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00047989384253094736,
+      "loss": 16.1485,
+      "step": 81920
+    },
+    {
+      "epoch": 0.12137892622460286,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004798889035959398,
+      "loss": 16.2334,
+      "step": 81940
+    },
+    {
+      "epoch": 0.12140855251853125,
+      "grad_norm": 7.5625,
+      "learning_rate": 0.00047988396466093226,
+      "loss": 16.1556,
+      "step": 81960
+    },
+    {
+      "epoch": 0.12143817881245964,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.0004798790257259247,
+      "loss": 16.1977,
+      "step": 81980
+    },
+    {
+      "epoch": 0.12146780510638802,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004798740867909171,
+      "loss": 16.2152,
+      "step": 82000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 6.02907206476586e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null