Training in progress, step 20000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01e4827000f30108c5db6d9ab6168d6e7dfecf37eef3edc1465363ee9ea8e490
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:db7432141f78eaf89762dbfa7cd270e9a33828df0e033550b34c9481463227a6
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:795bb5905ce658a665a647e1035b68562ea8227998cfd6cdd93e835459408e5d
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:5277a64b453be07385d84e9f80db45a50e37f4890167c3e4c572e1a6fb7bdaaa
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6872023f654a65ebb855f875663f2550ec7c7270f37183aedc09afdf3151f71c
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:baf78593c218b20d298480993c7fbaf9b2ea100e2a22749e83c5c1aba18f3f4c
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d749134b574c8d566f1f7b1e5e174cfc46c406c32210d882ffb530c2f402814
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:65689efd51e6068aa6422e7737ef0148b7583a59986d4d53a6a0a02103bfcb11
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7b2317285b7aac6485bde8423b9bd42301b29e0cd0b6a3f299d06ddf3270099
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:90981e208c884dfa861b8ec3fc9badb69e05a78f261183a623615ac5a97c3c95
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a512863aeac154eb9ea09654b5c57fb002e6788836adf8be9c2844cb710adf1
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:c76af8694f8d37feee42992c1a0000126f33879d8755e31713c98eb2fdb7b48c
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b73090e5ff4d77e40aae33305c58d2deda13e4f4510f1c076acf40a9f8a97bef
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5601fb269352a3de217d5b9fa42e25567fee4127194adcf0f48431818665f1d8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.03704949520062789,
   "eval_steps": 500,
-  "global_step": 19000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6658,6 +6658,356 @@
       "learning_rate": 0.0004939859465103925,
       "loss": 19.6594,
       "step": 19000
     }
   ],
   "logging_steps": 20,
@@ -6677,7 +7027,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.3968203395446604e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.03899946863223989,
   "eval_steps": 500,
+  "global_step": 20000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004939859465103925,
       "loss": 19.6594,
       "step": 19000
+    },
+    {
+      "epoch": 0.03708849466926013,
+      "grad_norm": 11.0625,
+      "learning_rate": 0.0004939794444794831,
+      "loss": 19.7557,
+      "step": 19020
+    },
+    {
+      "epoch": 0.03712749413789237,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004939729424485738,
+      "loss": 19.6508,
+      "step": 19040
+    },
+    {
+      "epoch": 0.03716649360652461,
+      "grad_norm": 12.375,
+      "learning_rate": 0.0004939664404176644,
+      "loss": 19.6447,
+      "step": 19060
+    },
+    {
+      "epoch": 0.03720549307515685,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004939599383867551,
+      "loss": 19.5852,
+      "step": 19080
+    },
+    {
+      "epoch": 0.037244492543789094,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004939534363558458,
+      "loss": 19.6278,
+      "step": 19100
+    },
+    {
+      "epoch": 0.03728349201242133,
+      "grad_norm": 11.25,
+      "learning_rate": 0.0004939469343249364,
+      "loss": 19.7012,
+      "step": 19120
+    },
+    {
+      "epoch": 0.03732249148105357,
+      "grad_norm": 10.9375,
+      "learning_rate": 0.0004939404322940271,
+      "loss": 19.6584,
+      "step": 19140
+    },
+    {
+      "epoch": 0.03736149094968581,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004939339302631176,
+      "loss": 19.6233,
+      "step": 19160
+    },
+    {
+      "epoch": 0.03740049041831805,
+      "grad_norm": 11.625,
+      "learning_rate": 0.0004939274282322083,
+      "loss": 19.5714,
+      "step": 19180
+    },
+    {
+      "epoch": 0.03743948988695029,
+      "grad_norm": 10.0,
+      "learning_rate": 0.0004939209262012989,
+      "loss": 19.5819,
+      "step": 19200
+    },
+    {
+      "epoch": 0.03747848935558253,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004939144241703896,
+      "loss": 19.605,
+      "step": 19220
+    },
+    {
+      "epoch": 0.03751748882421477,
+      "grad_norm": 10.6875,
+      "learning_rate": 0.0004939079221394803,
+      "loss": 19.6348,
+      "step": 19240
+    },
+    {
+      "epoch": 0.03755648829284701,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004939014201085709,
+      "loss": 19.6152,
+      "step": 19260
+    },
+    {
+      "epoch": 0.03759548776147925,
+      "grad_norm": 10.1875,
+      "learning_rate": 0.0004938949180776616,
+      "loss": 19.6556,
+      "step": 19280
+    },
+    {
+      "epoch": 0.03763448723011149,
+      "grad_norm": 9.125,
+      "learning_rate": 0.0004938884160467522,
+      "loss": 19.603,
+      "step": 19300
+    },
+    {
+      "epoch": 0.03767348669874373,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.0004938819140158429,
+      "loss": 19.6083,
+      "step": 19320
+    },
+    {
+      "epoch": 0.03771248616737597,
+      "grad_norm": 11.8125,
+      "learning_rate": 0.0004938754119849335,
+      "loss": 19.6945,
+      "step": 19340
+    },
+    {
+      "epoch": 0.03775148563600821,
+      "grad_norm": 10.125,
+      "learning_rate": 0.0004938689099540242,
+      "loss": 19.5438,
+      "step": 19360
+    },
+    {
+      "epoch": 0.03779048510464045,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004938624079231149,
+      "loss": 19.6158,
+      "step": 19380
+    },
+    {
+      "epoch": 0.03782948457327269,
+      "grad_norm": 10.1875,
+      "learning_rate": 0.0004938559058922054,
+      "loss": 19.6023,
+      "step": 19400
+    },
+    {
+      "epoch": 0.03786848404190493,
+      "grad_norm": 9.75,
+      "learning_rate": 0.0004938494038612961,
+      "loss": 19.6143,
+      "step": 19420
+    },
+    {
+      "epoch": 0.03790748351053717,
+      "grad_norm": 9.875,
+      "learning_rate": 0.0004938429018303867,
+      "loss": 19.5367,
+      "step": 19440
+    },
+    {
+      "epoch": 0.03794648297916941,
+      "grad_norm": 9.375,
+      "learning_rate": 0.0004938363997994774,
+      "loss": 19.5761,
+      "step": 19460
+    },
+    {
+      "epoch": 0.03798548244780165,
+      "grad_norm": 10.1875,
+      "learning_rate": 0.000493829897768568,
+      "loss": 19.5939,
+      "step": 19480
+    },
+    {
+      "epoch": 0.038024481916433886,
+      "grad_norm": 10.0,
+      "learning_rate": 0.0004938233957376587,
+      "loss": 19.5595,
+      "step": 19500
+    },
+    {
+      "epoch": 0.03806348138506613,
+      "grad_norm": 12.75,
+      "learning_rate": 0.0004938168937067493,
+      "loss": 19.5722,
+      "step": 19520
+    },
+    {
+      "epoch": 0.03810248085369837,
+      "grad_norm": 10.375,
+      "learning_rate": 0.00049381039167584,
+      "loss": 19.5889,
+      "step": 19540
+    },
+    {
+      "epoch": 0.038141480322330605,
+      "grad_norm": 10.5,
+      "learning_rate": 0.0004938038896449307,
+      "loss": 19.5379,
+      "step": 19560
+    },
+    {
+      "epoch": 0.03818047979096285,
+      "grad_norm": 9.875,
+      "learning_rate": 0.0004937973876140213,
+      "loss": 19.5243,
+      "step": 19580
+    },
+    {
+      "epoch": 0.03821947925959509,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004937908855831119,
+      "loss": 19.6067,
+      "step": 19600
+    },
+    {
+      "epoch": 0.03825847872822733,
+      "grad_norm": 10.25,
+      "learning_rate": 0.0004937843835522025,
+      "loss": 19.6051,
+      "step": 19620
+    },
+    {
+      "epoch": 0.038297478196859566,
+      "grad_norm": 10.5,
+      "learning_rate": 0.0004937778815212932,
+      "loss": 19.5555,
+      "step": 19640
+    },
+    {
+      "epoch": 0.03833647766549181,
+      "grad_norm": 10.75,
+      "learning_rate": 0.0004937713794903838,
+      "loss": 19.601,
+      "step": 19660
+    },
+    {
+      "epoch": 0.03837547713412405,
+      "grad_norm": 9.5,
+      "learning_rate": 0.0004937648774594745,
+      "loss": 19.5818,
+      "step": 19680
+    },
+    {
+      "epoch": 0.038414476602756285,
+      "grad_norm": 10.625,
+      "learning_rate": 0.0004937583754285651,
+      "loss": 19.5565,
+      "step": 19700
+    },
+    {
+      "epoch": 0.03845347607138853,
+      "grad_norm": 9.9375,
+      "learning_rate": 0.0004937518733976558,
+      "loss": 19.5719,
+      "step": 19720
+    },
+    {
+      "epoch": 0.03849247554002077,
+      "grad_norm": 10.3125,
+      "learning_rate": 0.0004937453713667465,
+      "loss": 19.5583,
+      "step": 19740
+    },
+    {
+      "epoch": 0.038531475008653004,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.000493738869335837,
+      "loss": 19.5279,
+      "step": 19760
+    },
+    {
+      "epoch": 0.038570474477285246,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.0004937323673049277,
+      "loss": 19.4711,
+      "step": 19780
+    },
+    {
+      "epoch": 0.03860947394591749,
+      "grad_norm": 11.9375,
+      "learning_rate": 0.0004937258652740183,
+      "loss": 19.5244,
+      "step": 19800
+    },
+    {
+      "epoch": 0.038648473414549724,
+      "grad_norm": 9.8125,
+      "learning_rate": 0.000493719363243109,
+      "loss": 19.5652,
+      "step": 19820
+    },
+    {
+      "epoch": 0.038687472883181966,
+      "grad_norm": 11.4375,
+      "learning_rate": 0.0004937128612121996,
+      "loss": 19.5002,
+      "step": 19840
+    },
+    {
+      "epoch": 0.03872647235181421,
+      "grad_norm": 10.1875,
+      "learning_rate": 0.0004937063591812903,
+      "loss": 19.4699,
+      "step": 19860
+    },
+    {
+      "epoch": 0.03876547182044645,
+      "grad_norm": 10.8125,
+      "learning_rate": 0.000493699857150381,
+      "loss": 19.486,
+      "step": 19880
+    },
+    {
+      "epoch": 0.038804471289078685,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004936933551194716,
+      "loss": 19.5072,
+      "step": 19900
+    },
+    {
+      "epoch": 0.03884347075771093,
+      "grad_norm": 9.625,
+      "learning_rate": 0.0004936868530885622,
+      "loss": 19.434,
+      "step": 19920
+    },
+    {
+      "epoch": 0.03888247022634317,
+      "grad_norm": 9.5,
+      "learning_rate": 0.0004936803510576528,
+      "loss": 19.4787,
+      "step": 19940
+    },
+    {
+      "epoch": 0.038921469694975404,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.0004936738490267435,
+      "loss": 19.4656,
+      "step": 19960
+    },
+    {
+      "epoch": 0.038960469163607646,
+      "grad_norm": 9.125,
+      "learning_rate": 0.0004936673469958341,
+      "loss": 19.5858,
+      "step": 19980
+    },
+    {
+      "epoch": 0.03899946863223989,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004936608449649248,
+      "loss": 19.5272,
+      "step": 20000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 1.4703345011961889e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null