Training in progress, step 83000, checkpoint

Browse files

Files changed (9) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3
last-checkpoint/training_args.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fdea62ab3807d5c83f086f5151ea04cbf9ee9578a7b8e17883bf2d371b73c59
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:86867d7114034c5ce9cc9d029da201dff42a389f63d4b662bb9a3aaa72d02379
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a88a14f277e372edcbef1004c41517572ec49368da6f457c37072a723ef15a5b
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:81e0d480b36bf30d291b97e091e18788bf233399a4446897865328f68d72beb6
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e1adfa857ebdc86ec7fd943675fc57102e813288b2aafc927551a884f4b79c6
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8d7f9758da2d3e14d7f42182479d86315138a7b7b34199b33bbe616fd250fd1
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57a30f5d80cfa3dad198a9cbb5668d7cb89aba9aa41f1f44032001d4e5f9fed3
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:b91440a4692b5f169135f333404f68fc858d96847193631d33bcd1a9bc277a1e
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50a1704d8cd0ae1d1da5487260fd6a9d83621f0daf881048aad7559ce485f0af
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:07ba5f63f4711c8ab404d662c7cb13d35ae312e00a001da6fffa61922c3b4f44
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:512ff917732395bd5049da89bf880b825c8d71316cd69e7277929f7763966d5b
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e5adf39b25c897bc63e4019ed90698924cec0f2e7d40940eb00ab5a52f2cef4
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13c457672f739f0501828eb93166275a4b9a832449b61ea1951a9a198cb7e4de
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e6939fa0bb635077cb363f18f1780aac2e900315e1171d36449035e74f63bb8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.12146780510638802,
   "eval_steps": 500,
-  "global_step": 82000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -28708,6 +28708,356 @@
       "learning_rate": 0.0004798740867909171,
       "loss": 16.2152,
       "step": 82000
     }
   ],
   "logging_steps": 20,
@@ -28727,7 +29077,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.02907206476586e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.12294911980280739,
   "eval_steps": 500,
+  "global_step": 83000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004798740867909171,
       "loss": 16.2152,
       "step": 82000
+    },
+    {
+      "epoch": 0.12149743140031641,
+      "grad_norm": 6.03125,
+      "learning_rate": 0.00047986914785590955,
+      "loss": 16.2106,
+      "step": 82020
+    },
+    {
+      "epoch": 0.1215270576942448,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.000479864208920902,
+      "loss": 16.2421,
+      "step": 82040
+    },
+    {
+      "epoch": 0.12155668398817318,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004798592699858944,
+      "loss": 16.2096,
+      "step": 82060
+    },
+    {
+      "epoch": 0.12158631028210157,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.00047985433105088683,
+      "loss": 16.2556,
+      "step": 82080
+    },
+    {
+      "epoch": 0.12161593657602995,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004798493921158793,
+      "loss": 16.183,
+      "step": 82100
+    },
+    {
+      "epoch": 0.12164556286995834,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.00047984445318087173,
+      "loss": 16.232,
+      "step": 82120
+    },
+    {
+      "epoch": 0.12167518916388673,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.0004798395142458641,
+      "loss": 16.2333,
+      "step": 82140
+    },
+    {
+      "epoch": 0.12170481545781513,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.00047983457531085657,
+      "loss": 16.1709,
+      "step": 82160
+    },
+    {
+      "epoch": 0.12173444175174351,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.000479829636375849,
+      "loss": 16.243,
+      "step": 82180
+    },
+    {
+      "epoch": 0.1217640680456719,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.00047982469744084147,
+      "loss": 16.2207,
+      "step": 82200
+    },
+    {
+      "epoch": 0.12179369433960029,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00047981975850583386,
+      "loss": 16.2444,
+      "step": 82220
+    },
+    {
+      "epoch": 0.12182332063352867,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004798148195708263,
+      "loss": 16.1667,
+      "step": 82240
+    },
+    {
+      "epoch": 0.12185294692745706,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.00047980988063581876,
+      "loss": 16.1988,
+      "step": 82260
+    },
+    {
+      "epoch": 0.12188257322138545,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004798049417008112,
+      "loss": 16.216,
+      "step": 82280
+    },
+    {
+      "epoch": 0.12191219951531383,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.0004798000027658036,
+      "loss": 16.1921,
+      "step": 82300
+    },
+    {
+      "epoch": 0.12194182580924222,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004797950638307961,
+      "loss": 16.2033,
+      "step": 82320
+    },
+    {
+      "epoch": 0.1219714521031706,
+      "grad_norm": 5.875,
+      "learning_rate": 0.0004797901248957885,
+      "loss": 16.1843,
+      "step": 82340
+    },
+    {
+      "epoch": 0.12200107839709899,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.0004797851859607809,
+      "loss": 16.1888,
+      "step": 82360
+    },
+    {
+      "epoch": 0.12203070469102738,
+      "grad_norm": 5.6875,
+      "learning_rate": 0.00047978024702577334,
+      "loss": 16.1824,
+      "step": 82380
+    },
+    {
+      "epoch": 0.12206033098495576,
+      "grad_norm": 6.125,
+      "learning_rate": 0.0004797753080907658,
+      "loss": 16.1561,
+      "step": 82400
+    },
+    {
+      "epoch": 0.12208995727888415,
+      "grad_norm": 7.25,
+      "learning_rate": 0.00047977036915575823,
+      "loss": 16.2548,
+      "step": 82420
+    },
+    {
+      "epoch": 0.12211958357281254,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004797654302207506,
+      "loss": 16.2019,
+      "step": 82440
+    },
+    {
+      "epoch": 0.12214920986674092,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00047976049128574307,
+      "loss": 16.1525,
+      "step": 82460
+    },
+    {
+      "epoch": 0.12217883616066932,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.0004797555523507355,
+      "loss": 16.2534,
+      "step": 82480
+    },
+    {
+      "epoch": 0.12220846245459771,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.00047975061341572797,
+      "loss": 16.1858,
+      "step": 82500
+    },
+    {
+      "epoch": 0.1222380887485261,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00047974567448072036,
+      "loss": 16.1439,
+      "step": 82520
+    },
+    {
+      "epoch": 0.12226771504245448,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004797407355457128,
+      "loss": 16.2245,
+      "step": 82540
+    },
+    {
+      "epoch": 0.12229734133638287,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.00047973579661070526,
+      "loss": 16.1173,
+      "step": 82560
+    },
+    {
+      "epoch": 0.12232696763031126,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004797308576756977,
+      "loss": 16.1917,
+      "step": 82580
+    },
+    {
+      "epoch": 0.12235659392423964,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004797259187406901,
+      "loss": 16.2421,
+      "step": 82600
+    },
+    {
+      "epoch": 0.12238622021816803,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004797209798056826,
+      "loss": 16.2,
+      "step": 82620
+    },
+    {
+      "epoch": 0.12241584651209642,
+      "grad_norm": 6.28125,
+      "learning_rate": 0.000479716040870675,
+      "loss": 16.2038,
+      "step": 82640
+    },
+    {
+      "epoch": 0.1224454728060248,
+      "grad_norm": 5.96875,
+      "learning_rate": 0.00047971110193566744,
+      "loss": 16.2384,
+      "step": 82660
+    },
+    {
+      "epoch": 0.12247509909995319,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.00047970616300065984,
+      "loss": 16.2127,
+      "step": 82680
+    },
+    {
+      "epoch": 0.12250472539388158,
+      "grad_norm": 6.28125,
+      "learning_rate": 0.0004797012240656523,
+      "loss": 16.235,
+      "step": 82700
+    },
+    {
+      "epoch": 0.12253435168780996,
+      "grad_norm": 6.125,
+      "learning_rate": 0.00047969628513064473,
+      "loss": 16.1783,
+      "step": 82720
+    },
+    {
+      "epoch": 0.12256397798173835,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.0004796913461956371,
+      "loss": 16.227,
+      "step": 82740
+    },
+    {
+      "epoch": 0.12259360427566673,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.00047968640726062957,
+      "loss": 16.2354,
+      "step": 82760
+    },
+    {
+      "epoch": 0.12262323056959512,
+      "grad_norm": 6.125,
+      "learning_rate": 0.000479681468325622,
+      "loss": 16.1881,
+      "step": 82780
+    },
+    {
+      "epoch": 0.12265285686352352,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00047967652939061447,
+      "loss": 16.2082,
+      "step": 82800
+    },
+    {
+      "epoch": 0.12268248315745191,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00047967159045560686,
+      "loss": 16.1181,
+      "step": 82820
+    },
+    {
+      "epoch": 0.1227121094513803,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004796666515205993,
+      "loss": 16.1534,
+      "step": 82840
+    },
+    {
+      "epoch": 0.12274173574530868,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.00047966171258559176,
+      "loss": 16.1316,
+      "step": 82860
+    },
+    {
+      "epoch": 0.12277136203923707,
+      "grad_norm": 6.5,
+      "learning_rate": 0.0004796567736505842,
+      "loss": 16.1864,
+      "step": 82880
+    },
+    {
+      "epoch": 0.12280098833316545,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.0004796518347155766,
+      "loss": 16.1988,
+      "step": 82900
+    },
+    {
+      "epoch": 0.12283061462709384,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.0004796468957805691,
+      "loss": 16.2569,
+      "step": 82920
+    },
+    {
+      "epoch": 0.12286024092102223,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004796419568455615,
+      "loss": 16.1792,
+      "step": 82940
+    },
+    {
+      "epoch": 0.12288986721495061,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.00047963701791055394,
+      "loss": 16.2019,
+      "step": 82960
+    },
+    {
+      "epoch": 0.122919493508879,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00047963207897554634,
+      "loss": 16.1815,
+      "step": 82980
+    },
+    {
+      "epoch": 0.12294911980280739,
+      "grad_norm": 6.75,
+      "learning_rate": 0.00047962714004053884,
+      "loss": 16.2309,
+      "step": 83000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 6.102608488091877e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc3f551404b0d7edd833494ee70d9c95a722ebd26deaead78190bce345559dbd
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:78e73c5569e6c1326aedcb241444fa9deb29154b44bf64880d75f7e6d9e90132
 size 5432